Source code for dawsonia.typing
"""
```{note}
{var}`MarkArray` is used as a sentinel value when a table's position is not detected.
```
"""
from __future__ import annotations
import datetime
from enum import Enum
from typing import (
Literal,
NamedTuple,
NewType,
Sequence,
TypedDict,
Union,
)
from numpy.typing import NDArray
from typing_extensions import NotRequired, Required
MarkArray = Enum("MarkArray", "EMPTY")
TimeUnits = Enum("TimeUnits", ["HOURS", "DAYS", "NONE"])
PreprocMethods = Enum("PreprocMethods", "SCIPY_PROJ OPENCV_CONTOURS")
# Types related to OPENCV_CONTOURS method
BBoxTuple = NewType("BBoxTuple", tuple[int, int, int, int])
ClusterLabel = NewType("ClusterLabel", int)
# Can be changed into list[Iterable[int]] if this is too restrictive
# NOTE: the array below is essentially has a shape <nth_bbox, bbox_vertices>. Where bbox_vertices=4
TablePositions = list[list] # list of list of NDArray | BBoxTuple | Empty
TablePosArrays = list[Union[Literal[MarkArray.EMPTY], NDArray]]
TableSizes = list[list[int]] # for example: [[5, 8], [5, 9], [], [], []]
TableSizesGeneric = Sequence[Sequence[int]] # to allow for both list and tuples
TableRowIndex = tuple[Union[int, datetime.time, datetime.timedelta], ...]
Prediction = tuple[tuple[str]]
Probability = tuple[tuple[float]]
[docs]
class Preproc(TypedDict):
table_modif: bool
corr_rotate: bool
row_idx_unit: str | TimeUnits
method: str | PreprocMethods
idx_tables_size_verify: list[int]
[docs]
class PreprocConfig(NamedTuple):
"""Default values for preproc. Used in {mod}`dawsonia.image_preproc`
Parameters
----------
table_modif: bool
Allow modification of tables, for eg. remove leading columns or rows.
See {func}`dawsonia.image_preproc.get_pos`.
corr_rotate: bool
Correct skewness of books by using the `--page-middle` value and
rotating the pages in opposite directions. See
{func}`dawsonia.image_preproc.corr_rotate`
row_idx_unit: TimeUnits
Specifies the row index units (i.e. the leading column) so can be cast into
`datetime.time` type.
idx_tables_size_verify: int
Indices of tables to verify sizes
"""
table_modif: bool = True
corr_rotate: bool = True # Correction rotation based on page middle
row_idx_unit: TimeUnits = TimeUnits.HOURS
method: PreprocMethods = PreprocMethods.SCIPY_PROJ
idx_tables_size_verify: list[int] = [0, 1]
[docs]
class TableFormatYear(TypedDict, total=False):
"""Dictionary items specified under TOML section ``[YEAR]``. Where YEAR is an string.
It has a special meaning when ``[default]`` is used as YEAR.
Usage
-----
.. code-block::toml
[1942]
version = "0b"
The section may optionally contain its own preproc and transforms sections
.. code-block::toml
[1942.preproc]
corr_rotate = false
[1942.transforms]
rotate = 90
"""
version: int | str
preproc: Preproc
transforms: Transforms
[docs]
class TableFormatFileVersion(TypedDict):
"""Dictionary items specified under TOML section ``[version.XX]``, where XX is a string.
For example:
Usage
-----
.. code-block::toml
[version.0b]
columns = [
[
"term_på_baro",
"barom",
"torra_term",
"våta_term",
"moln_slag_lägre",
"moln_mängd_lägre",
"moln_slag_medel",
"moln_slag_högre"
],
[
"moln_het_sol_dimma_nederbörd_total",
"vind_riktning",
"vind_beaufort",
"vind_m_sek",
"sikt",
"sjögang",
"maximi_term",
"minimi_term",
"nederbörd_mängd",
"nederbörd_slag"
]
]
name_idx = "tid"
rows = [10:30:00, 8, 14, 19, 21]
tables = [
[5, 8],
[5, 10],
[3, 1],
[4, 2],
[4, 5]
]
"""
name_idx: str #: Name of the index column
columns: tuple[str, ...] #:
rows: TableRowIndex
tables: tuple[tuple[int, int], ...]
preproc: NotRequired[Preproc]
transforms: NotRequired[Transforms]
[docs]
class TableFormatFile(TypedDict, total=False):
default: Required[TableFormatYear]
# And other keys with arbitary years
version: Required[dict[str, TableFormatFileVersion]]
[docs]
class TableFormatMerged(TableFormatYear, TableFormatFileVersion): # type:ignore[misc]
"""Dictionary items once default and a specific version are merged"""
[docs]
class TableFormat(NamedTuple):
"""Specific table format configuration corresponding to year. Some attributes are
merged from `default` configuration (See {class}`TableFormatMerged`).
"""
name_idx: str
columns: tuple[str, ...]
rows: TableRowIndex
tables: tuple[tuple[int, int], ...]
row_sections: tuple[tuple[int, int], ...]
col_sections: tuple[tuple[int, int], ...]
divided_tables: tuple[tuple[int, int], ...]
preproc: PreprocConfig
transforms: TransformsConfig | None
version: str
station: str