Λ
    θΛh*  γ                  σX    d dl mZ d dlmZ d dlmZ d dlmZ erd dlm	Z	  G d d«      Z
y)	ι    )Ϊannotations)ΪTYPE_CHECKING)Ϊqualified_type_name)Ϊ	wrap_expr)ΪExprc                  σN    e Zd ZdZdZddZddZddZddZddZ	ddZ
ddd
Zy	)ΪExprCatNameSpacez.Namespace for categorical related expressions.Ϊcatc                σ&    |j                   | _         y ©N)Ϊ_pyexpr)ΪselfΪexprs     ϊs/var/www/html/wine-match-dev/backend/winematch-backend/venv/lib/python3.12/site-packages/polars/expr/categorical.pyΪ__init__zExprCatNameSpace.__init__   s    Ψ||σ    c                σH    t        | j                  j                  «       «      S )u  
        Get the categories stored in this data type.

        Examples
        --------
        >>> df = pl.Series(
        ...     "cats", ["foo", "bar", "foo", "foo", "ham"], dtype=pl.Categorical
        ... ).to_frame()
        >>> df.select(pl.col("cats").cat.get_categories())
        shape: (3, 1)
        ββββββββ
        β cats β
        β ---  β
        β str  β
        ββββββββ‘
        β foo  β
        β bar  β
        β ham  β
        ββββββββ
        )r   r   Ϊcat_get_categories©r   s    r   Ϊget_categorieszExprCatNameSpace.get_categories   s    τ* Χ8Ρ8Σ:Σ;Π;r   c                σH    t        | j                  j                  «       «      S )uΤ  
        Return the byte-length of the string representation of each value.

        Returns
        -------
        Expr
            Expression of data type :class:`UInt32`.

        See Also
        --------
        len_chars

        Notes
        -----
        When working with non-ASCII text, the length in bytes is not the same as the
        length in characters. You may want to use :func:`len_chars` instead.
        Note that :func:`len_bytes` is much more performant (_O(1)_) than
        :func:`len_chars` (_O(n)_).

        Examples
        --------
        >>> df = pl.DataFrame(
        ...     {"a": pl.Series(["CafΓ©", "345", "ζ±δΊ¬", None], dtype=pl.Categorical)}
        ... )
        >>> df.with_columns(
        ...     pl.col("a").cat.len_bytes().alias("n_bytes"),
        ...     pl.col("a").cat.len_chars().alias("n_chars"),
        ... )
        shape: (4, 3)
        ββββββββ¬ββββββββββ¬ββββββββββ
        β a    β n_bytes β n_chars β
        β ---  β ---     β ---     β
        β cat  β u32     β u32     β
        ββββββββͺββββββββββͺββββββββββ‘
        β CafΓ© β 5       β 4       β
        β 345  β 3       β 3       β
        β ζ±δΊ¬ β 6       β 2       β
        β null β null    β null    β
        ββββββββ΄ββββββββββ΄ββββββββββ
        )r   r   Ϊcat_len_bytesr   s    r   Ϊ	len_byteszExprCatNameSpace.len_bytes+   s    τR Χ3Ρ3Σ5Σ6Π6r   c                σH    t        | j                  j                  «       «      S )uΕ  
        Return the number of characters of the string representation of each value.

        Returns
        -------
        Expr
            Expression of data type :class:`UInt32`.

        See Also
        --------
        len_bytes

        Notes
        -----
        When working with ASCII text, use :func:`len_bytes` instead to achieve
        equivalent output with much better performance:
        :func:`len_bytes` runs in _O(1)_, while :func:`len_chars` runs in (_O(n)_).

        A character is defined as a `Unicode scalar value`_. A single character is
        represented by a single byte when working with ASCII text, and a maximum of
        4 bytes otherwise.

        .. _Unicode scalar value: https://www.unicode.org/glossary/#unicode_scalar_value

        Examples
        --------
        >>> df = pl.DataFrame(
        ...     {"a": pl.Series(["CafΓ©", "345", "ζ±δΊ¬", None], dtype=pl.Categorical)}
        ... )
        >>> df.with_columns(
        ...     pl.col("a").cat.len_chars().alias("n_chars"),
        ...     pl.col("a").cat.len_bytes().alias("n_bytes"),
        ... )
        shape: (4, 3)
        ββββββββ¬ββββββββββ¬ββββββββββ
        β a    β n_chars β n_bytes β
        β ---  β ---     β ---     β
        β cat  β u32     β u32     β
        ββββββββͺββββββββββͺββββββββββ‘
        β CafΓ© β 4       β 5       β
        β 345  β 3       β 3       β
        β ζ±δΊ¬ β 2       β 6       β
        β null β null    β null    β
        ββββββββ΄ββββββββββ΄ββββββββββ
        )r   r   Ϊcat_len_charsr   s    r   Ϊ	len_charszExprCatNameSpace.len_charsV   s    τ\ Χ3Ρ3Σ5Σ6Π6r   c                σ    t        |t        «      sdt        |«      }t        |«      t	        | j
                  j                  |«      «      S )uO  
        Check if string representations of values start with a substring.

        Parameters
        ----------
        prefix
            Prefix substring.

        See Also
        --------
        contains : Check if string repr contains a substring that matches a pattern.
        ends_with : Check if string repr end with a substring.

        Notes
        -----
        Whereas `str.starts_with` allows expression inputs, `cat.starts_with` requires
        a literal string value.

        Examples
        --------
        >>> df = pl.DataFrame(
        ...     {"fruits": pl.Series(["apple", "mango", None], dtype=pl.Categorical)}
        ... )
        >>> df.with_columns(
        ...     pl.col("fruits").cat.starts_with("app").alias("has_prefix"),
        ... )
        shape: (3, 2)
        ββββββββββ¬βββββββββββββ
        β fruits β has_prefix β
        β ---    β ---        β
        β cat    β bool       β
        ββββββββββͺβββββββββββββ‘
        β apple  β true       β
        β mango  β false      β
        β null   β null       β
        ββββββββββ΄βββββββββββββ

        Using `starts_with` as a filter condition:

        >>> df.filter(pl.col("fruits").cat.starts_with("app"))
        shape: (1, 1)
        ββββββββββ
        β fruits β
        β ---    β
        β cat    β
        ββββββββββ‘
        β apple  β
        ββββββββββ
        z!'prefix' must be a string; found )Ϊ
isinstanceΪstrr   Ϊ	TypeErrorr   r   Ϊcat_starts_with)r   ΪprefixΪmsgs      r   Ϊstarts_withzExprCatNameSpace.starts_with   sF    τd &€#Τ&Ψ5Τ6IΘ&Σ6QΠ5TΠUCάC.Π άΧ5Ρ5°fΣ=Σ>Π>r   c                σ    t        |t        «      sdt        |«      }t        |«      t	        | j
                  j                  |«      «      S )u(  
        Check if string representations of values end with a substring.

        Parameters
        ----------
        suffix
            Suffix substring.

        See Also
        --------
        contains : Check if string reprs contains a substring that matches a pattern.
        starts_with : Check if string reprs start with a substring.

        Notes
        -----
        Whereas `str.ends_with` allows expression inputs, `cat.ends_with` requires a
        literal string value.

        Examples
        --------
        >>> df = pl.DataFrame(
        ...     {"fruits": pl.Series(["apple", "mango", None], dtype=pl.Categorical)}
        ... )
        >>> df.with_columns(pl.col("fruits").cat.ends_with("go").alias("has_suffix"))
        shape: (3, 2)
        ββββββββββ¬βββββββββββββ
        β fruits β has_suffix β
        β ---    β ---        β
        β cat    β bool       β
        ββββββββββͺβββββββββββββ‘
        β apple  β false      β
        β mango  β true       β
        β null   β null       β
        ββββββββββ΄βββββββββββββ

        Using `ends_with` as a filter condition:

        >>> df.filter(pl.col("fruits").cat.ends_with("go"))
        shape: (1, 1)
        ββββββββββ
        β fruits β
        β ---    β
        β cat    β
        ββββββββββ‘
        β mango  β
        ββββββββββ
        z!'suffix' must be a string; found )r   r   r   r    r   r   Ϊcat_ends_with)r   Ϊsuffixr#   s      r   Ϊ	ends_withzExprCatNameSpace.ends_with½   sF    τ` &€#Τ&Ψ5Τ6IΘ&Σ6QΠ5TΠUCάC.Π άΧ3Ρ3°FΣ;Σ<Π<r   Nc                σL    t        | j                  j                  ||«      «      S )uN	  
        Extract a substring from the string representation of each value.

        Parameters
        ----------
        offset
            Start index. Negative indexing is supported.
        length
            Length of the slice. If set to `None` (default), the slice is taken to the
            end of the string.

        Returns
        -------
        Expr
            Expression of data type :class:`String`.

        Notes
        -----
        Both the `offset` and `length` inputs are defined in terms of the number
        of characters in the (UTF8) string. A character is defined as a
        `Unicode scalar value`_. A single character is represented by a single byte
        when working with ASCII text, and a maximum of 4 bytes otherwise.

        .. _Unicode scalar value: https://www.unicode.org/glossary/#unicode_scalar_value

        Examples
        --------
        >>> df = pl.DataFrame(
        ...     {
        ...         "s": pl.Series(
        ...             ["pear", None, "papaya", "dragonfruit"],
        ...             dtype=pl.Categorical,
        ...         )
        ...     }
        ... )
        >>> df.with_columns(pl.col("s").cat.slice(-3).alias("slice"))
        shape: (4, 2)
        βββββββββββββββ¬ββββββββ
        β s           β slice β
        β ---         β ---   β
        β cat         β str   β
        βββββββββββββββͺββββββββ‘
        β pear        β ear   β
        β null        β null  β
        β papaya      β aya   β
        β dragonfruit β uit   β
        βββββββββββββββ΄ββββββββ

        Using the optional `length` parameter

        >>> df.with_columns(pl.col("s").cat.slice(4, length=3).alias("slice"))
        shape: (4, 2)
        βββββββββββββββ¬ββββββββ
        β s           β slice β
        β ---         β ---   β
        β cat         β str   β
        βββββββββββββββͺββββββββ‘
        β pear        β       β
        β null        β null  β
        β papaya      β ya    β
        β dragonfruit β onf   β
        βββββββββββββββ΄ββββββββ
        )r   r   Ϊ	cat_slice)r   ΪoffsetΪlengths      r   ΪslicezExprCatNameSpace.sliceς   s!    τ@ Χ/Ρ/°ΈΣ?Σ@Π@r   )r   r   ΪreturnΪNone)r.   r   )r"   r   r.   r   )r'   r   r.   r   r   )r+   Ϊintr,   z
int | Noner.   r   )Ϊ__name__Ϊ
__module__Ϊ__qualname__Ϊ__doc__Ϊ	_accessorr   r   r   r   r$   r(   r-   © r   r   r	   r	      s3    Ω8ΰIσ$σ<σ.)7σV.7σ`5?σn3=υj@Ar   r	   N)Ϊ
__future__r   Ϊtypingr   Ϊpolars._utils.variousr   Ϊpolars._utils.wrapr   Ϊpolarsr   r	   r6   r   r   ϊ<module>r<      s%   πέ "ε  ε 5έ (αέχfAς fAr   