
    Jhy                     B   d dl m Z  d dlmZmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlZ G d de	      Z G d de      Z G d de      Z G d de	      Z G d de	      Z G d de      Z G d de	      Zy)    )datetime)ListOptional)File)
UploadFile)	BaseModelFieldcomputed_field)FileResponseSchema)WebCrawlerFiles)get_file_header)WebCrawlerFileType)APIException)statusNc                      e Zd ZU  edd      Zee   ed<    edd      Zee   ed<    edd      Z	ee
   ed<    edd	      Zee   ed
<    edd      Zee   ed<    edd      Zee   ed<    edd      Zee   ed<    edd      Zee   ed<    edd      Zee   ed<    edd      Zee   ed<    edd      Zee   ed<    edd      Zee   ed<    edd      Zee   ed<    edd      Zee
   ed<    edd      Zee
   ed<    edd      Zee   ed<    edd       Zee   ed!<    edd"      Zee   ed#<    edd$      Zee   ed%<    edd&      Zee   ed'<    edd(      Zee   ed)<    edd*      Zee   ed+<    edd,      Zee   ed-<    edd.      Z ee   ed/<    edd0      Z!ee   ed1<    edd2      Z"ee   ed3<    edd      Z#ee   ed4<   y)5WebCrawlerBaseNzv1.0exampleversion750mlbottle_size_default   location_idUSDcurrency_codeFdisable_historydisable_keywordz
source.csvfile_sourcez2024 Historyhistory_nameproduct	page_typeTsearch_bottle_sizesearch_vintagez#product-listselectorvintage_default_nve   	client_id   group_id2bottle_size_indexz3,4description_indexesz5,6history_indexesz7,8keyword_indexes9files_index1	sku_index10	url_index11vintage_index12price_index13tax_status_index,match_input_separatorbottle_size_default_blank)$__name__
__module____qualname__r	   r   r   str__annotations__r   r   intr   r   boolr   r   r   r!   r"   r#   r$   r%   r'   r)   r+   r,   r-   r.   r0   r2   r4   r6   r8   r:   r<   r=        b/var/www/html/wine-match-dev/backend/winematch-backend/src/apps/web_crawler/schemas/web_crawler.pyr   r      sA   "48GXc]8).tW)E#E!&tQ!7K#7#(u#=M8C==&+E5&AOXd^A&+E5&AOXd^A!&t\!BK#B"'n"EL(3-E$T9=Ix}=).ud)CC%*5$%?NHTN?#D/BHhsmB).tU)CC$T37Ix}7#D#6Hhsm6',T3'?x}?).tU)C#C%*4%?OXc]?%*4%?OXc]?!&tS!9K#9$T37Ix}7$T48Ix}8#(t#<M8C=<!&tT!:K#:&+D$&?hsm?+0s+C8C=C05eU0Kx~KrF   r   c                       e Zd ZU  edd      Zeed<    edd      Zeed<    edd      Zeed<    edd	      Z	eed
<    edd      Z
eed<    edd      Zee   ed<   y)WebCrawlerCreate.WCR123r   codeWine Crawlernamer;   	delimiter;output_delimiterIN PROGRESSr   Nhttps://example.comurl)r>   r?   r@   r	   rK   rA   rB   rM   rN   rP   r   rS   r   rE   rF   rG   rI   rI   /   sn    c8,D#,c>2D#23,Is,!#s3c3]3FC3t-BCC#CrF   rI   c                       e Zd ZU  edd      Zee   ed<    edd      Zee   ed<    edd      Z	ee   ed<    edd	      Z
ee   ed
<    edd      Zee   ed<    edd      Zee   ed<   y)WebCrawlerUpdateNrJ   r   rK   rL   rM   r;   rN   rO   rP   rQ   r   rR   rS   )r>   r?   r@   r	   rK   r   rA   rB   rM   rN   rP   r   rS   rE   rF   rG   rU   rU   8   s    h7D(3-7n=D(3-=$T37Ix}7&+D#&>hsm>!$>FHSM>t-BCC#CrF   rU   c                   h    e Zd ZU eed<   dZee   ed<   dZee	   ed<   dZ
ee   ed<   dZee   ed<   y)WebCrawlerFileResponseidN	file_typefile
created_at
updated_at)r>   r?   r@   rC   rB   rY   r   rA   rZ   r   r[   r   r\   rE   rF   rG   rW   rW   A   sC    G#Ix}#)-D(%
&-%)J")%)J")rF   rW   c                   4   e Zd ZU eed<   dZee   ed<   eed<   dZe	ed<   dZ
eed<   dZeed	<   dZeed
<   dZeed<   dZeed<   eed<   dZee   ed<   dZee   ed<   dZee   ed<   dZee   ed<   dZee   ed<   dZee   ed<   dZee   ed<    G d d      Zy)MatchedLogResponserX   NliteralrK   Tprocess_keywordr   total_recordshistory_matchedkeyword_matchedambiguous_matchedunknown_winesr   
crawl_date
date_startdate_endmessagedate_createdlast_updatedexecution_timec                       e Zd ZdZy)MatchedLogResponse.ConfigTN)r>   r?   r@   from_attributesrE   rF   rG   Configrn   \   s    rF   rp   )r>   r?   r@   rC   rB   r_   r   rA   r`   rD   ra   rb   rc   rd   re   rf   r   rg   rh   ri   rj   rk   rl   rp   rE   rF   rG   r^   r^   I   s    G!GXc]!
I OT M3OSOSsM3K%)J")%)J")#'Hhx '!GXc]!'+L(8$+'+L(8$+$(NHSM( rF   r^   c                   0   e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   dZee   ed<   dZee	e
      ed	<   eed
<   dZee   ed<   dZee   ed<   dZee   ed<    edd      Zee   ed<   dZee   ed<    edd      Zee   ed<   ddiZdZee   ed<   dee   fdZdedee	e      fdZedee	e      fd       Zedee	e      fd       Zedee	e      fd       Zedee	e      fd       Zedee	e      fd       Z edee	e      fd       Z!y)WebCrawlerResponserX   rK   rM   rN   rP   r   NrS   files
is_deletedr[   r\   
deleted_atTr   show_match_run
match_datematched_logsro   latest_master_filereturnc                    | j                   sg S | j                   D cg c]"  }|j                  t        j                  k(  s!|$ }}|sg S t	        |d d      d   }|sg S t        |j                  dd      }t        | dd      }|sg S 	 t        ||	      }t        |      D 	cg c]
  \  }}	|	|d
 c}	}S c c}w # t        $ r}g cY d}~S d}~ww xY wc c}	}w )z
        Returns the headers of the latest MASTER_FILE type source file for the web crawler.
        This method retrieves the headers from the latest MASTER_FILE associated with the web crawler.
        c                     | j                   S )N)r[   )fs    rG   <lambda>z8WebCrawlerResponse._latest_file_header.<locals>.<lambda>   s
     rF   T)keyreverser   pathNrN   r;   )custom_delimiter)column_nameindex)
rs   rY   r   MASTER_FILEsortedgetattrrZ   r   	Exception	enumerate)
selfr}   master_fileslatest_filecsv_pathrN   headerseidxrM   s
             rG   _latest_file_headerz&WebCrawlerResponse._latest_file_headeru   s    
 zzI#'::_a@R@^@^1^__I\/EtTUVWI;++VT:D+s3	I	%hKG FOwEWX	Ts3XX# `  	I	 Ys/   "B7 B7	B< $C<	CCCCindexes_fieldc                 F   | j                         }|sg S t        | |d       }|sg S t        |t              r|j	                  d      n|}|sg S |D cg c]E  }t        |      j                         st        |      t        |      k  s5|t        |         d   G c}S c c}w )Nr;   r   )r   r   
isinstancerA   splitisdigitrC   len)r   r   r   indexesr   s        rG   _map_indexes_to_columnsz*WebCrawlerResponse._map_indexes_to_columns   s    **,I$t4I(27C(@'--$gI<CvSs3xGWGWGY^abe^filmtiu^uC!-0vvvs   B0BBc                 $    | j                  d      S )Nr.   r   r   s    rG   keyword_indexes_from_csvz+WebCrawlerResponse.keyword_indexes_from_csv       ++,=>>rF   c                 $    | j                  d      S )Nr-   r   r   s    rG   history_indexes_from_csvz+WebCrawlerResponse.history_indexes_from_csv   r   rF   c                 $    | j                  d      S )Nr6   r   r   s    rG   vintage_index_from_csvz)WebCrawlerResponse.vintage_index_from_csv   s    ++O<<rF   c                 $    | j                  d      S )Nr4   r   r   s    rG   url_index_from_csvz%WebCrawlerResponse.url_index_from_csv       ++K88rF   c                 $    | j                  d      S )Nr,   r   r   s    rG   description_indexes_from_csvz/WebCrawlerResponse.description_indexes_from_csv   s    ++,ABBrF   c                 $    | j                  d      S )Nr2   r   r   s    rG   sku_index_from_csvz%WebCrawlerResponse.sku_index_from_csv   r   rF   )"r>   r?   r@   rC   rB   rA   rS   r   rs   r   rW   rD   r[   r   r\   ru   r	   rv   rw   rx   r^   model_configry   r   listr   r   r
   r   r   r   r   r   r   rE   rF   rG   rr   rr   `   s   G
I
INKC#48E8D/018%)J")%)J")%)J")%*4%>NHTN>%)J")16t^1TL(-.T%t,L7;!34;YXd^ Y4
wS 
wXd3i=P 
w ?(49*= ? ? ?(49*= ? ? =c(; = = 9HT#Y$7 9 9 ChtCy.A C C 9HT#Y$7 9 9rF   rr   c                       e Zd ZU  edd      Zee   ed<    edd      Zee   ed<    edd      Z	ee   ed<    edd	      Z
ee   ed
<   y)WebCrawlerFilterSchemaNrJ   r   rK   rL   rM   activer   r   r   )r>   r?   r@   r	   rK   r   rA   rB   rM   r   r   rC   rE   rF   rG   r   r      sV    h7D(3-7n=D(3-=!$9FHSM9!&tQ!7K#7rF   r   ) r   typingr   r   fastapir   FastAPIFiler   pydanticr   r	   r
   src.apps.files.schemas.filer   'src.apps.web_crawler.models.web_crawlerr   src.apps.files.models.filesrc.utils.helpers.functionsr   src.utils.enumsr   src.core.exceptionsr   r   sysr   rI   rU   rW   r^   rr   r   rE   rF   rG   <module>r      s     ! '  5 5 : C + 7 . ,  
LY L<D~ DD~ D*Y * .Q9 Q9h8Y 8rF   