Source code for dawsonia.typing

"""

```{note}
{var}`MarkArray` is used as a sentinel value when a table's position is not detected.
```

"""

from __future__ import annotations

import datetime
from enum import Enum
from typing import (
    Literal,
    NamedTuple,
    NewType,
    Sequence,
    TypedDict,
    Union,
)

from numpy.typing import NDArray
from typing_extensions import NotRequired, Required

MarkArray = Enum("MarkArray", "EMPTY")
TimeUnits = Enum("TimeUnits", ["HOURS", "DAYS", "NONE"])
PreprocMethods = Enum("PreprocMethods", "SCIPY_PROJ OPENCV_CONTOURS")

# Types related to OPENCV_CONTOURS method
BBoxTuple = NewType("BBoxTuple", tuple[int, int, int, int])
ClusterLabel = NewType("ClusterLabel", int)

# Can be changed into list[Iterable[int]] if this is too restrictive
# NOTE: the array below is essentially has a shape <nth_bbox, bbox_vertices>. Where bbox_vertices=4
TablePositions = list[list]  # list of list of NDArray | BBoxTuple | Empty
TablePosArrays = list[Union[Literal[MarkArray.EMPTY], NDArray]]
TableSizes = list[list[int]]  # for example: [[5, 8], [5, 9], [], [], []]
TableSizesGeneric = Sequence[Sequence[int]]  # to allow for both list and tuples

TableRowIndex = tuple[Union[int, datetime.time, datetime.timedelta], ...]

Prediction = tuple[tuple[str]]
Probability = tuple[tuple[float]]


[docs] class Preproc(TypedDict): table_modif: bool corr_rotate: bool row_idx_unit: str | TimeUnits method: str | PreprocMethods idx_tables_size_verify: list[int]
[docs] class PreprocConfig(NamedTuple): """Default values for preproc. Used in {mod}`dawsonia.image_preproc` Parameters ---------- table_modif: bool Allow modification of tables, for eg. remove leading columns or rows. See {func}`dawsonia.image_preproc.get_pos`. corr_rotate: bool Correct skewness of books by using the `--page-middle` value and rotating the pages in opposite directions. See {func}`dawsonia.image_preproc.corr_rotate` row_idx_unit: TimeUnits Specifies the row index units (i.e. the leading column) so can be cast into `datetime.time` type. idx_tables_size_verify: int Indices of tables to verify sizes """ table_modif: bool = True corr_rotate: bool = True # Correction rotation based on page middle row_idx_unit: TimeUnits = TimeUnits.HOURS method: PreprocMethods = PreprocMethods.SCIPY_PROJ idx_tables_size_verify: list[int] = [0, 1]
[docs] class Transforms(TypedDict): rotate: int
[docs] class TransformsConfig(NamedTuple): rotate: int = 0
[docs] class TableFormatYear(TypedDict, total=False): """Dictionary items specified under TOML section ``[YEAR]``. Where YEAR is an string. It has a special meaning when ``[default]`` is used as YEAR. Usage ----- .. code-block::toml [1942] version = "0b" The section may optionally contain its own preproc and transforms sections .. code-block::toml [1942.preproc] corr_rotate = false [1942.transforms] rotate = 90 """ version: int | str preproc: Preproc transforms: Transforms
[docs] class TableFormatFileVersion(TypedDict): """Dictionary items specified under TOML section ``[version.XX]``, where XX is a string. For example: Usage ----- .. code-block::toml [version.0b] columns = [ [ "term_på_baro", "barom", "torra_term", "våta_term", "moln_slag_lägre", "moln_mängd_lägre", "moln_slag_medel", "moln_slag_högre" ], [ "moln_het_sol_dimma_nederbörd_total", "vind_riktning", "vind_beaufort", "vind_m_sek", "sikt", "sjögang", "maximi_term", "minimi_term", "nederbörd_mängd", "nederbörd_slag" ] ] name_idx = "tid" rows = [10:30:00, 8, 14, 19, 21] tables = [ [5, 8], [5, 10], [3, 1], [4, 2], [4, 5] ] """ name_idx: str #: Name of the index column columns: tuple[str, ...] #: rows: TableRowIndex tables: tuple[tuple[int, int], ...] preproc: NotRequired[Preproc] transforms: NotRequired[Transforms]
[docs] class TableFormatFile(TypedDict, total=False): default: Required[TableFormatYear] # And other keys with arbitary years version: Required[dict[str, TableFormatFileVersion]]
[docs] class TableFormatMerged(TableFormatYear, TableFormatFileVersion): # type:ignore[misc] """Dictionary items once default and a specific version are merged"""
[docs] class TableFormat(NamedTuple): """Specific table format configuration corresponding to year. Some attributes are merged from `default` configuration (See {class}`TableFormatMerged`). """ name_idx: str columns: tuple[str, ...] rows: TableRowIndex tables: tuple[tuple[int, int], ...] row_sections: tuple[tuple[int, int], ...] col_sections: tuple[tuple[int, int], ...] divided_tables: tuple[tuple[int, int], ...] preproc: PreprocConfig transforms: TransformsConfig | None version: str station: str