Source code for static_frame.core.index_datetime

from __future__ import annotations

import datetime
from functools import partial

import numpy as np
import typing_extensions as tp

from static_frame.core.doc_str import doc_inject, doc_update
from static_frame.core.exception import (
    GrowOnlyInvalid,
    InvalidDatetime64Initializer,
    LocInvalid,
)
from static_frame.core.index import INDEX_GO_LEAF_SLOTS, Index, IndexGO, _IndexGOMixin
from static_frame.core.util import (
    DT64_DAY,
    DT64_H,
    DT64_M,
    DT64_MONTH,
    DT64_MS,
    DT64_NS,
    DT64_S,
    DT64_US,
    DT64_YEAR,
    DTYPE_BOOL,
    NAME_DEFAULT,
    TD64_DAY,
    TD64_MONTH,
    TD64_YEAR,
    SortStatus,
    TDateInitializer,
    TDtypeDT64,
    TILocSelector,
    TIndexInitializer,
    TKeyTransform,
    TLabel,
    TLocSelector,
    TName,
    TNDArrayAny,
    TYearInitializer,
    TYearMonthInitializer,
    WarningsSilent,
    key_to_datetime_key,
    to_datetime64,
    to_timedelta64,
)

if tp.TYPE_CHECKING:
    import pandas
    from arraykit import AutoMap

key_to_datetime_key_year = partial(key_to_datetime_key, dtype=DT64_YEAR)

I = tp.TypeVar('I', bound='IndexDatetime')

# -------------------------------------------------------------------------------
# Specialized index for dates


class IndexDatetime(Index[np.datetime64]):
    """
    Derivation of Index to support Datetime operations. Derived classes must define _DTYPE.
    """

    STATIC = True
    _DTYPE: TDtypeDT64  # define in derived class
    __slots__ = ()

    def __init__(
        self,
        labels: TIndexInitializer,
        /,
        *,
        loc_is_iloc: bool = False,
        name: TName = NAME_DEFAULT,
        sort_status: SortStatus = SortStatus.UNKNOWN,
    ) -> None:
        """Initializer.

        {args}
        """
        assert not loc_is_iloc
        # __init__ here leaves out the dtype argument, reducing the signature to arguments relevant for these derived classes
        Index.__init__(
            self,
            labels,
            name=name,
            loc_is_iloc=loc_is_iloc,
            sort_status=sort_status,
        )

    # ---------------------------------------------------------------------------
    # dict like interface

    def __contains__(
        self,
        value: tp.Any,
        /,
    ) -> bool:
        """Return True if value in the labels. Will only return True for an exact match to the type of dates stored within."""
        try:
            v = to_datetime64(value)
        except ValueError:  # cannot be converted to dt64
            return False
        return self._map.__contains__(v)  # type: ignore

    # ---------------------------------------------------------------------------
    # operators

    def _ufunc_binary_operator(
        self,
        *,
        operator: tp.Callable[..., tp.Any],
        other: tp.Any,
        fill_value: tp.Any = np.nan,
    ) -> TNDArrayAny:
        if self._recache:
            self._update_array_cache()

        if operator.__name__ == 'matmul' or operator.__name__ == 'rmatmul':
            raise NotImplementedError('matrix multiplication not supported')

        if isinstance(other, Index):
            other = other.values  # operate on labels to labels
            other_is_array = True
        elif isinstance(other, str):
            # do not pass dtype, as want to coerce to this parsed type, not the type of self
            other = to_datetime64(other)
            other_is_array = False
        elif other.__class__ is np.ndarray:
            other_is_array = True
        else:
            other_is_array = False

        result: TNDArrayAny
        if isinstance(other, np.datetime64):
            # convert labels to other's datetime64 type to enable matching on month, year, etc.
            result = operator(self._labels.astype(other.dtype), other)
        elif isinstance(other, datetime.timedelta):
            result = operator(self._labels, to_timedelta64(other))
        else:  # np.timedelta64 should work fine here
            with WarningsSilent():
                result = operator(self._labels, other)

        # NOTE: remove when min numpy is 1.25
        # NOTE: similar branching as in container_util.apply_binary_operator
        # NOTE: all string will have been converted to dt64, or raise ValueError; comparison to same sized iterables (list, tuple) will result in an array when they are the same size
        if result is False:  # type: ignore  # will never be True
            if (  # type: ignore
                not other_is_array
                and hasattr(other, '__len__')
                and len(other) == len(self)  # pyright: ignore
            ):
                # NOTE: equality comparisons of an array to same sized iterable normally return an array, but with dt64 types they just return False
                result = np.full(self.shape, result, dtype=DTYPE_BOOL)
            elif other_is_array and other.size == 1:  # pyright: ignore
                # elements in arrays of 0 or more dimensions are acceptable; this is what NP does for arithmetic operators when the types are compatible
                result = np.full(self.shape, result, dtype=DTYPE_BOOL)
            else:
                raise ValueError('operands could not be broadcast together')
                # raise on unaligned shapes as is done for arithmetic operators

        result.flags.writeable = False
        return result

    def _loc_to_iloc(
        self,
        key: TLocSelector,
        key_transform: TKeyTransform = key_to_datetime_key,
        partial_selection: bool = False,
    ) -> TILocSelector:
        """
        Specialized for IndexData indices to convert string data representations into np.datetime64 objects as appropriate.
        """
        # not passing self.dtype to key_to_datetime_key so as to allow translation to a foreign datetime; slice comparison will be handled by map_slice_args
        return Index._loc_to_iloc(
            self,
            key=key,
            key_transform=key_transform,
            partial_selection=partial_selection,
        )

    # ---------------------------------------------------------------------------
    def to_pandas(self) -> 'pandas.DatetimeIndex':
        """Return a Pandas Index."""
        import pandas

        return pandas.DatetimeIndex(self.values.copy(), name=self._name)  # pyright: ignore

    # ---------------------------------------------------------------------------

    @doc_inject(selector='searchsorted', label_type='iloc (integer)')
    def iloc_searchsorted(
        self,
        values: tp.Any,
        /,
        *,
        side_left: bool = True,
    ) -> TNDArrayAny:
        """
        {doc}

        Args:
            {values}
            {side_left}
        """
        # permit variable forms of date specification
        return Index.iloc_searchsorted(
            self,
            key_to_datetime_key(values),
            side_left=side_left,
        )


doc_update(IndexDatetime.__init__, selector='index_date_time_init')


# -------------------------------------------------------------------------------
class _IndexDatetimeGOMixin(_IndexGOMixin):
    _DTYPE: TDtypeDT64
    _map: tp.Optional[AutoMap]
    __slots__ = ()  # define in derived class

    def append(
        self,
        value: TLabel,
        /,
    ) -> None:
        """Specialize for fixed-typed indices: convert `value` argument; do not need to resolve_dtype with each addition; self._map is never None"""
        try:
            value = to_datetime64(value, self._DTYPE)  # type: ignore
        except ValueError:
            raise GrowOnlyInvalid() from None

        if self._map is not None:  # if starting from an empty index
            try:
                self._map.add(value)
            except ValueError as e:
                raise KeyError(f'duplicate key append attempted: {value}') from e

        self._sort_status = self._determine_sort_status_from_new_value(
            static_labels=self._labels,  # type: ignore
            mutable_labels=self._labels_mutable,
            prev_status=self._sort_status,
            new_value=value,
            loc_is_iloc=False,
        )
        self._labels_mutable.append(value)
        self._positions_mutable_count += 1
        self._recache = True


# -------------------------------------------------------------------------------
[docs] class IndexYear(IndexDatetime): """A mapping of years (NumPy :obj:`datetime64[Y]`) to positions, immutable and of fixed size.""" STATIC = True _DTYPE = DT64_YEAR __slots__ = ()
[docs] @classmethod def from_date_range( cls: tp.Type[I], start: TDateInitializer, stop: TDateInitializer, step: int = 1, *, name: tp.Optional[TLabel] = None, ) -> I: """ Get an IndexYearMonth instance over a range of dates, where start and stop are inclusive. """ labels = np.arange( to_datetime64(start, DT64_DAY), to_datetime64(stop, DT64_DAY).astype(DT64_YEAR) + TD64_YEAR, np.timedelta64(step, 'Y'), dtype=DT64_YEAR, ) labels.flags.writeable = False return cls( labels, name=name, sort_status=SortStatus.from_range_step(step), )
[docs] @classmethod def from_year_month_range( cls: tp.Type[I], start: TYearMonthInitializer, stop: TYearMonthInitializer, step: int = 1, *, name: tp.Optional[TLabel] = None, ) -> I: """ Get an IndexYearMonth instance over a range of months, where start and end are inclusive. """ labels = np.arange( to_datetime64(start, DT64_MONTH), to_datetime64(stop, DT64_MONTH).astype(DT64_YEAR) + TD64_YEAR, np.timedelta64(step, 'Y'), dtype=DT64_YEAR, ) labels.flags.writeable = False return cls( labels, name=name, sort_status=SortStatus.from_range_step(step), )
[docs] @classmethod def from_year_range( cls: tp.Type[I], start: TYearInitializer, stop: TYearInitializer, step: int = 1, *, name: tp.Optional[TLabel] = None, ) -> I: """ Get an IndexDate instance over a range of years, where start and end are inclusive. """ labels: TNDArrayAny = np.arange( to_datetime64(start, DT64_YEAR), to_datetime64(stop, DT64_YEAR) + TD64_YEAR, step=np.timedelta64(step, 'Y'), ) labels.flags.writeable = False return cls( labels, name=name, sort_status=SortStatus.from_range_step(step), )
# --------------------------------------------------------------------------- # specializations to permit integers as years
[docs] def __contains__( self, value: tp.Any, /, ) -> bool: """Return True if value in the labels. Will only return True for an exact match to the type of dates stored within.""" try: return self._map.__contains__(to_datetime64(value, self._DTYPE)) # type: ignore except InvalidDatetime64Initializer: # if value is a dt64 and not of a the same unit as self._DTYPE, the initializations exception is raised, which means False return False
def _loc_to_iloc( self, key: TLocSelector, key_transform: TKeyTransform = key_to_datetime_key_year, partial_selection: bool = False, ) -> TILocSelector: """ Specialized for IndexData indices to convert string data representations into np.datetime64 objects as appropriate. """ try: return Index._loc_to_iloc( self, key=key, key_transform=key_transform, partial_selection=partial_selection, ) except InvalidDatetime64Initializer as e: raise LocInvalid(e.args[0]) from None # ---------------------------------------------------------------------------
[docs] def to_pandas(self) -> 'pandas.DatetimeIndex': """Return a Pandas Index.""" raise NotImplementedError( 'Pandas does not support a year type, and it is ambiguous if a date proxy should be the first of the year or the last of the year.' )
[docs] class IndexYearGO(_IndexDatetimeGOMixin, IndexYear): _IMMUTABLE_CONSTRUCTOR = IndexYear __slots__ = INDEX_GO_LEAF_SLOTS
IndexYear._MUTABLE_CONSTRUCTOR = IndexYearGO IndexYear._IMMUTABLE_CONSTRUCTOR = IndexYear # -------------------------------------------------------------------------------
[docs] class IndexYearMonth(IndexDatetime): """A mapping of year months (NumPy :obj:`datetime64[M]`) to positions, immutable and of fixed size.""" STATIC = True _DTYPE = DT64_MONTH __slots__ = ()
[docs] @classmethod def from_date_range( cls: tp.Type[I], start: TDateInitializer, stop: TDateInitializer, step: int = 1, *, name: tp.Optional[TLabel] = None, ) -> I: """ Get an IndexYearMonth instance over a range of dates, where start and stop is inclusive. """ labels = np.arange( to_datetime64(start, DT64_DAY), to_datetime64(stop, DT64_DAY).astype(DT64_MONTH) + TD64_MONTH, np.timedelta64(step, 'M'), dtype=DT64_MONTH, ) labels.flags.writeable = False return cls( labels, name=name, sort_status=SortStatus.from_range_step(step), )
[docs] @classmethod def from_year_month_range( cls: tp.Type[I], start: TYearMonthInitializer, stop: TYearMonthInitializer, step: int = 1, *, name: tp.Optional[TLabel] = None, ) -> I: """ Get an IndexYearMonth instance over a range of months, where start and end are inclusive. """ labels = np.arange( to_datetime64(start, DT64_MONTH), to_datetime64(stop, DT64_MONTH) + TD64_MONTH, np.timedelta64(step, 'M'), dtype=DT64_MONTH, ) labels.flags.writeable = False return cls( labels, name=name, sort_status=SortStatus.from_range_step(step), )
[docs] @classmethod def from_year_range( cls: tp.Type[I], start: TYearInitializer, stop: TYearInitializer, step: int = 1, *, name: tp.Optional[TLabel] = None, ) -> I: """ Get an IndexYearMonth instance over a range of years, where start and end are inclusive. """ labels = np.arange( to_datetime64(start, DT64_YEAR), to_datetime64(stop, DT64_YEAR) + TD64_YEAR, step=np.timedelta64(step, 'M'), dtype=DT64_MONTH, ) labels.flags.writeable = False return cls( labels, name=name, sort_status=SortStatus.from_range_step(step), )
# ---------------------------------------------------------------------------
[docs] def to_pandas(self) -> 'pandas.DatetimeIndex': """Return a Pandas Index.""" raise NotImplementedError( 'Pandas does not support a year month type, and it is ambiguous if a date proxy should be the first of the month or the last of the month.' )
[docs] class IndexYearMonthGO(_IndexDatetimeGOMixin, IndexYearMonth): _IMMUTABLE_CONSTRUCTOR = IndexYearMonth __slots__ = INDEX_GO_LEAF_SLOTS
IndexYearMonth._MUTABLE_CONSTRUCTOR = IndexYearMonthGO IndexYearMonth._IMMUTABLE_CONSTRUCTOR = IndexYearMonth # -------------------------------------------------------------------------------
[docs] class IndexDate(IndexDatetime): """A mapping of dates (NumPy :obj:`datetime64[D]`) to positions, immutable and of fixed size.""" STATIC = True _DTYPE = DT64_DAY __slots__ = ()
[docs] @classmethod def from_date_range( cls: tp.Type[I], start: TDateInitializer, stop: TDateInitializer, step: int = 1, *, name: tp.Optional[TLabel] = None, ) -> I: """ Get an IndexDate instance over a range of dates, where start and stop is inclusive. """ labels: TNDArrayAny = np.arange( to_datetime64(start, DT64_DAY), to_datetime64(stop, DT64_DAY) + TD64_DAY, np.timedelta64(step, 'D'), ) labels.flags.writeable = False return cls( labels, name=name, sort_status=SortStatus.from_range_step(step), )
[docs] @classmethod def from_year_month_range( cls: tp.Type[I], start: TYearMonthInitializer, stop: TYearMonthInitializer, step: int = 1, *, name: tp.Optional[TLabel] = None, ) -> I: """ Get an IndexDate instance over a range of months, where start and end are inclusive. """ labels = np.arange( to_datetime64(start, DT64_MONTH), to_datetime64(stop, DT64_MONTH) + TD64_MONTH, step=np.timedelta64(step, 'D'), dtype=DT64_DAY, ) labels.flags.writeable = False return cls( labels, name=name, sort_status=SortStatus.from_range_step(step), )
[docs] @classmethod def from_year_range( cls: tp.Type[I], start: TYearInitializer, stop: TYearInitializer, step: int = 1, *, name: tp.Optional[TLabel] = None, ) -> I: """ Get an IndexDate instance over a range of years, where start and end are inclusive. """ labels = np.arange( to_datetime64(start, DT64_YEAR), to_datetime64(stop, DT64_YEAR) + TD64_YEAR, step=np.timedelta64(step, 'D'), dtype=DT64_DAY, ) labels.flags.writeable = False return cls( labels, name=name, sort_status=SortStatus.from_range_step(step), )
[docs] class IndexDateGO(_IndexDatetimeGOMixin, IndexDate): _IMMUTABLE_CONSTRUCTOR = IndexDate __slots__ = INDEX_GO_LEAF_SLOTS
IndexDate._MUTABLE_CONSTRUCTOR = IndexDateGO IndexDate._IMMUTABLE_CONSTRUCTOR = IndexDate # -------------------------------------------------------------------------------
[docs] class IndexHour(IndexDatetime): """A mapping of hours (NumPy :obj:`datetime64[h]`) to positions, immutable and of fixed size.""" STATIC = True _DTYPE = DT64_H __slots__ = ()
[docs] class IndexHourGO(_IndexDatetimeGOMixin, IndexHour): _IMMUTABLE_CONSTRUCTOR = IndexHour __slots__ = INDEX_GO_LEAF_SLOTS
IndexHour._MUTABLE_CONSTRUCTOR = IndexHourGO IndexHour._IMMUTABLE_CONSTRUCTOR = IndexHour # -------------------------------------------------------------------------------
[docs] class IndexMinute(IndexDatetime): """A mapping of minutes (NumPy :obj:`datetime64[m]`) to positions, immutable and of fixed size.""" STATIC = True _DTYPE = DT64_M __slots__ = ()
[docs] class IndexMinuteGO(_IndexDatetimeGOMixin, IndexMinute): _IMMUTABLE_CONSTRUCTOR = IndexMinute __slots__ = INDEX_GO_LEAF_SLOTS
IndexMinute._MUTABLE_CONSTRUCTOR = IndexMinuteGO IndexMinute._IMMUTABLE_CONSTRUCTOR = IndexMinute # -------------------------------------------------------------------------------
[docs] class IndexSecond(IndexDatetime): """A mapping of seconds (NumPy :obj:`datetime64[s]`) to positions, immutable and of fixed size.""" STATIC = True _DTYPE = DT64_S __slots__ = ()
[docs] class IndexSecondGO(_IndexDatetimeGOMixin, IndexSecond): _IMMUTABLE_CONSTRUCTOR = IndexSecond __slots__ = INDEX_GO_LEAF_SLOTS
IndexSecond._MUTABLE_CONSTRUCTOR = IndexSecondGO IndexSecond._IMMUTABLE_CONSTRUCTOR = IndexSecond # -------------------------------------------------------------------------------
[docs] class IndexMillisecond(IndexDatetime): """A mapping of milliseconds (NumPy :obj:`datetime64[ms]`) to positions, immutable and of fixed size.""" STATIC = True _DTYPE = DT64_MS __slots__ = ()
[docs] class IndexMillisecondGO(_IndexDatetimeGOMixin, IndexMillisecond): _IMMUTABLE_CONSTRUCTOR = IndexMillisecond __slots__ = INDEX_GO_LEAF_SLOTS
IndexMillisecond._MUTABLE_CONSTRUCTOR = IndexMillisecondGO IndexMillisecond._IMMUTABLE_CONSTRUCTOR = IndexMillisecond # -------------------------------------------------------------------------------
[docs] class IndexMicrosecond(IndexDatetime): """A mapping of microseconds (NumPy :obj:`datetime64[us]`) to positions, immutable and of fixed size.""" STATIC = True _DTYPE = DT64_US __slots__ = ()
[docs] class IndexMicrosecondGO(_IndexDatetimeGOMixin, IndexMicrosecond): _IMMUTABLE_CONSTRUCTOR = IndexMicrosecond __slots__ = INDEX_GO_LEAF_SLOTS
IndexMicrosecond._MUTABLE_CONSTRUCTOR = IndexMicrosecondGO IndexMicrosecond._IMMUTABLE_CONSTRUCTOR = IndexMicrosecond # -------------------------------------------------------------------------------
[docs] class IndexNanosecond(IndexDatetime): """A mapping of nanoseconds (NumPy :obj:`datetime64[ns]`) to positions, immutable and of fixed size.""" STATIC = True _DTYPE = DT64_NS __slots__ = ()
[docs] class IndexNanosecondGO(_IndexDatetimeGOMixin, IndexNanosecond): _IMMUTABLE_CONSTRUCTOR = IndexNanosecond __slots__ = INDEX_GO_LEAF_SLOTS
IndexNanosecond._MUTABLE_CONSTRUCTOR = IndexNanosecondGO IndexNanosecond._IMMUTABLE_CONSTRUCTOR = IndexNanosecond # ------------------------------------------------------------------------------- _DTYPE_TO_CLASS: tp.Dict[TDtypeDT64, tp.Type[Index[np.datetime64]]] = { cls._DTYPE: cls for cls in ( IndexYear, IndexYearMonth, IndexDate, IndexHour, IndexMinute, IndexSecond, IndexMillisecond, IndexMicrosecond, IndexNanosecond, ) } def dtype_to_index_cls(static: bool, dtype: TDtypeDT64) -> tp.Type[Index[tp.Any]]: """ Given an the class of the Index from which this is valled, as well as the dtype of the resultant array, return the appropriate Index class. """ resolved_static: tp.Type[Index[tp.Any]] | None = _DTYPE_TO_CLASS.get(dtype) if resolved_static is not None: if static: return resolved_static return resolved_static._MUTABLE_CONSTRUCTOR # type: ignore # if origin is a dt64, and dtype is not a dt64, we can go to Index or IndexGO if static: return Index return IndexGO