
    ˀh'                        d dl mZ d dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ erd dlZd d	lmZ d d
lmZ  G d d      ZddZy)    )annotationsN)partial)perf_counter)TYPE_CHECKINGAnyLiteral)eprintverbose)ComputeError)_scan_pyarrow_dataset_impl)Table)	LazyFramec                  |    e Zd ZdZddd	 	 	 	 	 	 	 	 	 ddZddZddZddd	 	 	 	 	 ddZdd	Zdd
Z	ddZ
ddZy)IcebergDatasetz Dataset interface for PyIceberg.N)snapshot_idiceberg_storage_propertiesc                   d | _         d | _        || _        || _        || _        t        |t              r|| _         y || _        y )N)_metadata_path_table_snapshot_id_iceberg_storage_properties_reader_override
isinstancestr)selfsourcer   r   reader_overrides        u/var/www/html/wine-match-dev/backend/winematch-backend/venv/lib/python3.12/site-packages/polars/io/iceberg/dataset.py__init__zIcebergDataset.__init__   sF     #'+E(GV
 fc""(D DK    c                     y)zName of the reader.iceberg )r   s    r   reader_namezIcebergDataset.reader_name1   s    r    c                V    ddl m}  || j                         j                               S )zFetch the schema of the table.r   schema_to_pyarrow)pyiceberg.io.pyarrowr'   tableschema)r   r'   s     r   r*   zIcebergDataset.schema5   s    : !4!4!677r    )limit
projectionc          
         ddl }|j                  j                  j                         }|rt	        d| d|        | j                         }|dn
t        |      }| j                  }|!|j                  |      d| }t        |      | j                  xs t        j                  d      }	|	r|	dvrd	|	 d
}t        |      |	dk(  rdn|	dk7  rdnd}
g }i }|	dk7  rz|
swddlm}m} |rt	        d       t!               }|j#                  |||      }d}t%        |j'                               D ]  \  }}|j(                  j*                  |j,                  k7  rd|j(                  j*                   }
 n|j.                  rg ||<   |j.                  D ]y  }|j0                  |j2                  k7  rd|j0                   }
 nO|j*                  |j,                  k7  rd|j*                   }
 n%||   j5                  |j6                         |dz  }{ |
r n(|j5                  |j(                  j6                         	 |rt!               |z
  }t	        d|dd       |
s=ddlm} |r't	        dt=        |       d dt=        |       d        ||ddd |f!      S |	dk(  rd"|
 }t?        |      |rt	        d#|
        tA        tB        ||||$      }dd%l"m#}  ||jI                               }tJ        jL                  jO                  ||d&'      }|S )(zConstruct a LazyFrame scan.r   Nz*IcebergDataset: to_dataset_scan(): limit: z, projection: )*ziceberg snapshot ID not found: POLARS_ICEBERG_READER_OVERRIDE)native	pyicebergz-iceberg: unknown value for reader_override: 'z*', expected one of ('native', 'pyiceberg')r1   z"forced reader_override='pyiceberg'r0   z native scans disabled by default)DataFileContent
FileFormatz7IcebergDataset: to_dataset_scan(): begin path expansion)r   r+   selected_fieldsznon-parquet format: z unsupported deletion file type: z"unsupported deletion file format:    z:IcebergDataset: to_dataset_scan(): finish path expansion (z.3fzs))scan_parquetz:IcebergDataset: to_dataset_scan(): native scan_parquet() (z sources), deletion files: z files, z sourcesinsertignoreziceberg-position-delete)missing_columnsextra_columns_deletion_filesz)iceberg reader_override='native' failed: zGIcebergDataset: to_dataset_scan(): fallback to python[pyiceberg] scan: )r   n_rowswith_columnsr&   T)pyarrow)(polars._utils.logging_utilsloggingr
   r	   r)   tupler   snapshot_by_id
ValueErrorr   osgetenvpyiceberg.manifestr2   r3   r   scan	enumerate
plan_filesfilefile_formatPARQUETdelete_filescontentPOSITION_DELETESappend	file_pathpolars.io.parquet.functionsr6   lenr   r   r   r(   r'   r*   plr   _scan_python_function)r   r+   r,   polarsr
   tblr4   r   msgr   fallback_reasonsourcesdeletion_filesr2   r3   
start_timerH   total_deletion_filesi	file_infodeletion_fileelapsedr6   funcr'   arrow_schemalfs                              r   to_dataset_scanzIcebergDataset.to_dataset_scan;   s    	%--''//1 !)l, jjl$.$6&E*<M''"!!+.67}E o% // 
299,4
 6MM#$$NP  S/!
 +- 1
 (* 4 	 /1k)/FPQ%J88'uo  D $%  )$//*; < 99>>--1C1CC.y~~/I/I.JK $ ))(*N1%)2)?)? 2(00O4T4TT B#0#8#8"9!; , "(44
8J8JJ D#0#<#<"=!? , "&q)001H1HI,1,!2$ #y~~778?9B &.:5..5c]">
 @..1'l^ <'';&<H>*+85   (&!:N K	  (=o=NOCs##77F6GI
 &##
 	;(6\\//dD/Q	r    c                    | j                   8| j                  d}t        |      | j                         j                  | _         | j                   S )zFetch the metadata path.1impl error: both metadata_path and table are None)r   r   rD   r)   metadata_location)r   rY   s     r   metadata_pathzIcebergDataset.metadata_path   sG    &{{"I o%"&**,"@"@D"""r    c                   | j                   q| j                  d}t        |      t               rt	        d| j                         ddlm} |j                  | j                  | j                  xs i       | _         | j                   S )z!Fetch the PyIceberg Table object.rh   z;IcebergDataset: construct table from self._metadata_path = r   )StaticTable)ri   
properties)	r   r   rD   r
   r	   pyiceberg.tablerl   from_metadatar   )r   rY   rl   s      r   r)   zIcebergDataset.table   s    ;;""*I o%yUt?R?R>VWX3%33"&"5"5;;Ar 4 DK
 {{r    c           
         | j                         | j                  | j                  | j                  d}t	               r4|d   }|d   }t        |d         }|d   }t        d| d| d| d	|        |S )
N)rj   r   r   r   rj   r   r   r   #IcebergDataset: getstate(): path: '', snapshot_id: '', iceberg_storage_properties: , reader_override: )rj   r   r   r   r
   _redact_dict_valuesr	   r   state	path_reprr   	keys_reprr   s         r   __getstate__zIcebergDataset.__getstate__   s    !//1,,*.*J*J#44	
 9o.I.K+E2N,OPI#$56O# %!!, .//8k :$$3#4	6 r    c           
         t               r4|d   }|d   }t        |d         }|d   }t        d| d| d| d|        t        j	                  | |d   |d   |d   |d   	       y )
Nrj   r   r   r   rq   rr   rs   rt   )r   r   r   )r
   ru   r	   r   r   rv   s         r   __setstate__zIcebergDataset.__setstate__  s    9o.I.K+E2N,OPI#$56O# %!!, .//8k :$$3#4	6 	/"m,',-I'J!"34 	  	
r    )
r   zstr | Tabler   
int | Noner   zdict[str, Any] | Noner   z%Literal['native', 'pyiceberg'] | NonereturnNone)r~   r   )r~   z	pa.schema)r+   r}   r,   zlist[str] | Noner~   r   )r~   r   )r~   dict[str, Any])rw   r   r~   r   )__name__
__module____qualname____doc__r   r$   r*   rf   rj   r)   rz   r|   r#   r    r   r   r      s    * #'<@!!  	!
 %:! ?! 
!48 !'+	W W %	W
 
Wz	#40
r    r   c                    t        | t              r| j                         D ci c]  }|d c}S | dt        |       j                   dS dS c c}w )NREDACTED<z object>r   )r   dictkeystyper   )objks     r   ru   ru   -  s_     c4  !$
+1J+ ? c##$H-
 +s   
A)r   r   r~   r   )
__future__r   rE   	functoolsr   timer   typingr   r   r   polars._reexport	_reexportrU   r?   r	   r
   polars.exceptionsr   polars.io.iceberg._utilsr   r>   parn   r   polars.lazyframe.framer   r   ru   r#   r    r   <module>r      sA    " 	   . .  1 * ?%0V
 V
rr    