
    ˀh                    z    d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	 erd dl
mZ d dlmZ e	 G d d	             Zy
)    )annotations)TYPE_CHECKING)unstable)wrap_s)expr_dispatch)Series)PySeriesc                  z    e Zd ZdZdZddZddZddZddZ e	       dd       Z
ddZdd	Zdd
ZddZdddZy)CatNameSpacez)Namespace for categorical related series.catc                &    |j                   | _         y N)_s)selfseriess     u/var/www/html/wine-match-dev/backend/winematch-backend/venv/lib/python3.12/site-packages/polars/series/categorical.py__init__zCatNameSpace.__init__   s    "II    c                     y)aP  
        Get the categories stored in this data type.

        Examples
        --------
        >>> s = pl.Series(["foo", "bar", "foo", "foo", "ham"], dtype=pl.Categorical)
        >>> s.cat.get_categories()
        shape: (3,)
        Series: '' [str]
        [
            "foo"
            "bar"
            "ham"
        ]
        N r   s    r   get_categorieszCatNameSpace.get_categories       r   c                6    | j                   j                         S )a  
        Return whether or not the column is a local categorical.

        Examples
        --------
        Categoricals constructed without a string cache are considered local.

        >>> s = pl.Series(["a", "b", "a"], dtype=pl.Categorical)
        >>> s.cat.is_local()
        True

        Categoricals constructed with a string cache are considered global.

        >>> with pl.StringCache():
        ...     s = pl.Series(["a", "b", "a"], dtype=pl.Categorical)
        >>> s.cat.is_local()
        False
        )r   cat_is_localr   s    r   is_localzCatNameSpace.is_local(   s    & ww##%%r   c                H    t        | j                  j                               S )a  
        Convert a categorical column to its local representation.

        This may change the underlying physical representation of the column.

        See the documentation of :func:`StringCache` for more information on the
        difference between local and global categoricals.

        Examples
        --------
        Compare the global and local representations of a categorical.

        >>> with pl.StringCache():
        ...     _ = pl.Series("x", ["a", "b", "a"], dtype=pl.Categorical)
        ...     s = pl.Series("y", ["c", "b", "d"], dtype=pl.Categorical)
        >>> s.to_physical()
        shape: (3,)
        Series: 'y' [u32]
        [
                2
                1
                3
        ]
        >>> s.cat.to_local().to_physical()
        shape: (3,)
        Series: 'y' [u32]
        [
                0
                1
                2
        ]
        )r   r   cat_to_localr   s    r   to_localzCatNameSpace.to_local=   s    B dgg**,--r   c                6    | j                   j                         S )a  
        Indicate whether the Series uses lexical ordering.

        .. warning::
            This functionality is considered **unstable**. It may be changed
            at any point without it being considered a breaking change.

        Examples
        --------
        >>> s = pl.Series(["b", "a", "b"]).cast(pl.Categorical)
        >>> s.cat.uses_lexical_ordering()
        False
        >>> s = s.cast(pl.Categorical("lexical"))
        >>> s.cat.uses_lexical_ordering()
        True
        )r   cat_uses_lexical_orderingr   s    r   uses_lexical_orderingz"CatNameSpace.uses_lexical_ordering`   s    $ ww0022r   c                     y)u&  
        Return the byte-length of the string representation of each value.

        Returns
        -------
        Series
            Series of data type :class:`UInt32`.

        See Also
        --------
        len_chars

        Notes
        -----
        When working with non-ASCII text, the length in bytes is not the same as the
        length in characters. You may want to use :func:`len_chars` instead.
        Note that :func:`len_bytes` is much more performant (_O(1)_) than
        :func:`len_chars` (_O(n)_).

        Examples
        --------
        >>> s = pl.Series(["Café", "345", "東京", None], dtype=pl.Categorical)
        >>> s.cat.len_bytes()
        shape: (4,)
        Series: '' [u32]
        [
            5
            3
            6
            null
        ]
        Nr   r   s    r   	len_byteszCatNameSpace.len_bytest   r   r   c                     y)u  
        Return the number of characters of the string representation of each value.

        Returns
        -------
        Series
            Series of data type :class:`UInt32`.

        See Also
        --------
        len_bytes

        Notes
        -----
        When working with ASCII text, use :func:`len_bytes` instead to achieve
        equivalent output with much better performance:
        :func:`len_bytes` runs in _O(1)_, while :func:`len_chars` runs in (_O(n)_).

        A character is defined as a `Unicode scalar value`_. A single character is
        represented by a single byte when working with ASCII text, and a maximum of
        4 bytes otherwise.

        .. _Unicode scalar value: https://www.unicode.org/glossary/#unicode_scalar_value

        Examples
        --------
        >>> s = pl.Series(["Café", "345", "東京", None], dtype=pl.Categorical)
        >>> s.cat.len_chars()
        shape: (4,)
        Series: '' [u32]
        [
            4
            3
            2
            null
        ]
        Nr   r   s    r   	len_charszCatNameSpace.len_chars   r   r   c                     y)a{  
        Check if string representations of values start with a substring.

        Parameters
        ----------
        prefix
            Prefix substring.

        See Also
        --------
        contains : Check if the string repr contains a substring that matches a pattern.
        ends_with : Check if string repr ends with a substring.

        Examples
        --------
        >>> s = pl.Series("fruits", ["apple", "mango", None], dtype=pl.Categorical)
        >>> s.cat.starts_with("app")
        shape: (3,)
        Series: 'fruits' [bool]
        [
            true
            false
            null
        ]
        Nr   )r   prefixs     r   starts_withzCatNameSpace.starts_with   r   r   c                     y)az  
        Check if string representations of values end with a substring.

        Parameters
        ----------
        suffix
            Suffix substring.

        See Also
        --------
        contains : Check if the string repr contains a substring that matches a pattern.
        starts_with : Check if string repr starts with a substring.

        Examples
        --------
        >>> s = pl.Series("fruits", ["apple", "mango", None], dtype=pl.Categorical)
        >>> s.cat.ends_with("go")
        shape: (3,)
        Series: 'fruits' [bool]
        [
            false
            true
            null
        ]
        Nr   )r   suffixs     r   	ends_withzCatNameSpace.ends_with   r   r   Nc                     y)aV  
        Extract a substring from the string representation of each string value.

        Parameters
        ----------
        offset
            Start index. Negative indexing is supported.
        length
            Length of the slice. If set to `None` (default), the slice is taken to the
            end of the string.

        Returns
        -------
        Series
            Series of data type :class:`String`.

        Notes
        -----
        Both the `offset` and `length` inputs are defined in terms of the number
        of characters in the (UTF8) string. A character is defined as a
        `Unicode scalar value`_. A single character is represented by a single byte
        when working with ASCII text, and a maximum of 4 bytes otherwise.

        .. _Unicode scalar value: https://www.unicode.org/glossary/#unicode_scalar_value

        Examples
        --------
        >>> s = pl.Series(["pear", None, "papaya", "dragonfruit"], dtype=pl.Categorical)
        >>> s.cat.slice(-3)
        shape: (4,)
        Series: '' [str]
        [
            "ear"
            null
            "aya"
            "uit"
        ]

        Using the optional `length` parameter

        >>> s.cat.slice(4, length=3)
        shape: (4,)
        Series: '' [str]
        [
            ""
            null
            "ya"
            "onf"
        ]
        Nr   )r   offsetlengths      r   slicezCatNameSpace.slice   r   r   )r   r   returnNone)r1   r   )r1   bool)r(   strr1   r   )r+   r4   r1   r   r   )r.   intr/   z
int | Noner1   r   )__name__
__module____qualname____doc__	_accessorr   r   r   r   r   r"   r$   r&   r)   r,   r0   r   r   r   r   r      sP    3I&"&*!.F Z3 3& D%N662r   r   N)
__future__r   typingr   polars._utils.unstabler   polars._utils.wrapr   polars.series.utilsr   polarsr   polars.polarsr	   r   r   r   r   <module>rB      s9    "   + % -& V V Vr   