
    #hT6                        d dl Z d dlZd dlmc mZ d dlmZmZ d dlm	Z	 d dl
Z
d dlmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZm Z  d dl!m"Z"m#Z#m$Z$m%Z% d dl&m'Z( d dlmZ d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 dZ1dede2fdZ3d*dede2de2de	e#   fdZ4dede"defdZ5dede2de%defdZ6dede2de7fdZ8ded e2d!e2fd"Z9ded#e2fd$Z:d+ded%ede;d&e7def
d'Zd,dede;d(e2fd)Z<y)-    N)datetimetimezone)Optional)HTTPException
UploadFilestatus)FileResponse)SQLAlchemyError)Session)APIException)CDN)File)create_file)
WebCrawlerWebCrawlerFiles)WebCrawlerCreateWebCrawlerFilterSchemaWebCrawlerResponseWebCrawlerUpdate)web_crawler)	constants)get_file_header)QueryPaginator)WebCrawlerFileTypematch_config_csvdb
crawler_idc                    | j                  t              j                  t        j                  |k(  t        j                  dk(        j                         }|s!t        t        i t        j                  d      | j                  t              j                  t        j                  |j                  k(        j                         }|D cg c]  }|j                   }}|D ]K  }| j                  t              j                  t        j                  |d   k(        j                         |d<   M |sg }i |j                  d|i}t        j                   |      S c c}w )NFWeb Crawler not foundmoduleerrorstatus_codemessagefile_idfilefiles)queryr   filterid
is_deletedfirstr   __name__r   HTTP_400_BAD_REQUESTr   web_crawler_idall__dict__r   r   model_validate)r   r   crawlerr'   tr&   response_datas          c/var/www/html/wine-match-dev/backend/winematch-backend/src/apps/web_crawler/services/web_crawler.pyget_web_crawlerr7   !   s)   hhz"))*--:*EzG\G\`eGefllnG33+	
 	
 HH_%,,_-K-Kwzz-YZ^^`E!&'AQZZ'E' Qxx~,,TWWY-GHNNPVQ


M ,,];; (s   
Epageper_pagefiltersc                    |dz
  |z  }| j                  t              j                  t        j                  dk(        }|r~|j	                  d      j                         D ][  \  }}t        t        |      r#|j                  t        t        |      |k(        }9t        t        i t        j                  d|        t        |t        d||d      }|j                         S )	N   FT)exclude_nonezInvalid filter key: r    z/web-crawlers)r(   schemaurloffsetlimituse_orm)r(   r   r)   r+   dictitemshasattrgetattrr   r-   r   r.   r   r   paginate)	r   r8   r9   r:   r@   r(   keyvalue	paginators	            r6   get_web_crawlersrK   9   s    Qh("FHHZ ''
(=(=(FGE!,,D,9??A 		JCz3'WZ%=%FG"# & ; ;23%8	 			 .OFZblpI     r3   returnc           
      :   	 t        di |j                         }| j                  |       | j                          | j	                  |       |S # t
        $ rB}| j                          t        t        i t        j                  dt        |             d }~ww xY w)Nz Database error during creation: r     )r   rC   addcommitrefreshr
   rollbackr   r-   r   r.   str)r   r3   
db_crawleres       r6   create_web_crawlerrW   P   s    
1',,.1

z
		


: 

336s1vh?	
 	

s   AA 	B=BBc           
         | j                  t              j                  t        j                  |k(  t        j                  dk(        j                         }|s!t        t        i t        j                  d      |j                  d      }	 |j                         D ]  \  }}t        ||      st        |||       ! | j                          | j                  |       t!        | |j                        }|S # t"        $ rB}| j%                          t        t        i t        j                  dt'        |             d }~ww xY w)NFr   r    T)exclude_unsetzDatabase error during update: )r(   r   r)   r*   r+   r,   r   r-   r   r.   rC   rD   rE   setattrrQ   rR   r7   r
   rS   rT   )	r   r   r3   rU   update_datarH   rI   web_craelerrV   s	            r6   update_web_crawlerr]   b   s   *%,,Z]]j-H*J_J_chJhiooqJ33+	
 	
 ,,T,2K
%++- 	0JCz3'
C/	0 			


:%b*--8 

334SVH=	
 	

s   "C7 /AC7 7	E =D==Ec           
         | j                  t              j                  t        j                  |k(  t        j                  dk(        j                         }|s!t        t        i t        j                  d      d|_        t        j                         |_        	 | j                          y# t        $ rB}| j                          t        t        i t        j                  dt!        |             d }~ww xY w)NFr   r    TzError during delete: )r(   r   r)   r*   r+   r,   r   r-   r   r.   r   utcnow
deleted_atrQ   r
   rS   rT   )r   r   rU   rV   s       r6   soft_delete_web_crawlerra      s    *%,,Z]]j-H*J_J_chJhiooqJ33+	
 	
 !J$OO-J

		 

263N3NZopstupvowXx
 	

s   B+ +	C64=C11C6match_crawler_idcreated_by_idc           
      V   | j                  t              j                  t        j                  |k(  t        j                  dk(        j                         }|st        dd      |}|j                  }|st        dd      t        j                  |      }|j                  dk7  rt        dd      |j                  }	 t        j                  |      }g }	t               }
|j                  d	      D ]  }t!        |j"                        }|D ]y  }|j$                  s|j&                  s|j&                  j)                         s8|j&                  j)                         ||j$                  <   |
j+                  |j$                         { |
j-                  |j"                  j/                                |s|	j1                  |        |	st        dd
      t3        t5        |
            }t;        j<                  t>        d       |j@                   dtC        jD                          d}t:        jF                  jI                  t>        |      }tK        |ddd      5 }tM        jN                  ||      }|jQ                          |jS                  |	       d d d        | j                  tT              j                  tT        jV                  dk(        j                         }|st        dd      tY        |||d|j                  |tC        jZ                               }| j+                  |       | j]                          | j_                  |       ta        |j                  |j                  tb        jd                        }| j+                  |       | j]                          tC        jZ                         jg                         |_4        | j]                          | j_                  |       |S # t        j6                  $ r}t        ddt9        |             d }~ww xY w# 1 sw Y   xY w)NF  #Web Crawler configuration not foundr#   detail  z"URL not set for this configuration   z/Failed to fetch XML data from the provided URL.z.//*zNo data rows found in XML.zError parsing XML: T)exist_ok-z.csvw zutf-8)modenewlineencoding)
fieldnamesz,CDN with label 'Crawler Files CDN' not foundztext/csv)original_namenamepathmimecdn_idrc   
created_atr/   r%   	file_type)5r(   r   r)   r*   r+   r,   r   r?   requestsgetr#   contentET
fromstringsetfindallrC   attribtagtextstriprP   updatekeysappendsortedlist
ParseErrorrT   osmakedirsMATCH_CONFIG_CSV_PATHrt   r   todayru   joinopencsv
DictWriterwriteheader	writerowsr   	is_activer   r_   rQ   rR   r   r   MASTER_FILEdate
match_date)r   rb   rc   r3   configr?   responsexml_datarootdataall_columnsitem	item_datachildcolumnsrV   csv_file_namecsv_file_pathr&   writercdnnew_fileconfig_filess                          r6   generate_csv_from_xmlr      sD   hhz"))*--;K*KZMbMbfkMklrrtG4YZZF
++C4XYY ||C Hs"4effHT}}X&eLL( 	'DT[[)I /99

0@0@0B+0::+;+;+=Ieii(OOEII./
 t{{//12I&	' C8TUUk*+
 KK%5||nAhnn&6%7t<MGGLL!6FM	m#rG	D 9
 ((3-

s}}4
5
;
;
=C4bcc #vv#??$H FF8IIKJJx #&))X[[\n\z\z{LFF<IIK )..0FIIKJJxOQ == T6I#a&4RSST s>    AO* O* )O* A/O* 46O* 9P*P=PPP(webcrawler_idc           
      h   | j                  t              j                  t        j                  |k(  t        j                  dk(        j                         }|s!t        t        i t        j                  d      |j                  s!t        t        i t        j                  d      	 | j                  t              j                  t        j                  |j                  k(  t        j                  t        j                  k(        j!                  t        j"                  j%                               j                         }| j                  t*              j                  t*        j                  |j                  k(        j                         }|s!t        t        i t        j                  d      |j,                  }t/        ||j0                        }t3        |      D 	cg c]
  \  }}	|	|d	 }
}}	|
S # t&        $ r.}t        di t        j                  dt)        |             d }~ww xY wc c}	}w )
NFzWebCrawler not foundr    z+No CSV file associated with this WebCrawlerr   zError fetching file: File not found)custom_delimiter)column_nameindex)r(   r   r)   r*   r+   r,   r   r-   r   r.   r'   r   r/   rz   r   r   order_byrx   desc	ExceptionrT   r   ru   r   	delimiter	enumerate)r   r   r3   latest_filerV   file_recordcsv_pathheadersidxrt   columns_with_indexess              r6   get_column_indexes_servicer      s   hhz"))*--=*H*J_J_chJhiooqG33*	
 	
 ==33A	
 	

HH_%V..'**<))-?-K-KK Xo005578UW 	" ((4.'';>>(ABHHJK33$	
 	
 H h9J9JKG R[[bQcdICD3?dd5  
 33+CF84	
 	

0 es   'BG4  H.4	H+=)H&&H+upload_file
is_historyc                 
  K   |j                   }| j                  t              j                  t        j                  |k(  t        j
                  dk(        j                         }|st        dd      |j                  }t        j                  t        j                        }|j                         }|j                  d      }	|s||vrt        dd| d|	 d	      |j                  | dd
d      }
|
j!                  d      d   }|s|st        dd      |j                   d|	 }t#        | ||dd       d{   }|st        dd      |s||_        | j'                          |rt(        j*                  nt(        j,                  }t/        ||j                  |      }| j1                  |       	 | j'                          |S 7 # t2        $ r.}| j5                          t        ddt7        |             d}~ww xY ww)zA
    Create a File record and save the actual uploaded file.
    Fre   rf   rg   z%Y-%m-%dri   z!Uploaded file name must contain 'rl   'rn   r<   .r   z%Match date not found in the file name_T)r   r   r   rc   
is_crawlerNi  zFailed to create file recordry   z(Database error during file association: )filenamer(   r   r)   r*   r+   r,   r   coder   nowr   utcr   strftimereplacesplitcreate_file_servicer   rQ   r   HISTORYr   r   rP   r
   rS   rT   )r   r   r   r   rs   r3   crawler_coder   r   	today_strname_without_coder   new_file_namecreated_filerz   web_crawler_filerV   s                    r6   r   r   +  s    
  ((Mhhz"))*--:*EzG\G\`eGefllnG4YZZ<<L <<%DIIKEz*I},C:[\h[iijktjuuv8wxx &--a.@"aH"((-a0Jj4[\\||nAi[1M, L 4RSS'
		 /9"**>P>\>\I&j,//enoFFi
		
 1(  i
6^_bcd_e^f4ghhis7   D:H<G=A7H5G	 H		H )G;;H  Hrowsc           
        K   | j                  t              j                  t        j                  |k(  t        j                  dk(        j                         }|st        dddit        j                  d      | j                  t              j                  t        j                  |k(  t        j                  t        j                  k(        j                  t        j                  j!                               j                         }|st        dddit        j                  d      | j                  t"              j                  t"        j                  |j$                  k(        j                         }|st        dddit        j                  d      |j&                  }t(        j&                  j+                  |      st        ddd	it        j                  d	      	 t-        ||j.                  |
      }|j6                  ||dS # t0        $ r<}t        dddt3        |       it        j4                  dt3        |             d}~ww xY ww)z/
    Preview the first few rows of a file.
    Fr   r   rf   r    r&   z"No file found for this Web Crawlerr   z!File does not exist on the server)r   r   zFailed to read CSV: N)	file_namerecordsr   )r(   r   r)   r*   r+   r,   r   r   HTTP_404_NOT_FOUNDr   r/   rz   r   r   r   rx   r   r   r%   ru   r   existsr   r   r   rT   HTTP_500_INTERNAL_SERVER_ERRORrs   )	r   r   r   r3   	last_filer&   	file_pathr   rV   s	            r6   preview_filer   d  s     hhz"))*--:*EzG\G\`eGefllnG !FG119	
 	
 	!	**j8%%);)G)GG

 
/,,113	4	   ?@118	
 	
 88D>  I,=,=!=>DDFD +,11$	
 	
 		I77>>)$ >?117	
 	

!9w?P?PW[\ ''   
 1#a&:;==*3q6(3	
 	

s*   GI!H	 9I		I7I		II)r<   
   N)F)   )=r   r   xml.etree.ElementTreeetreeElementTreer~   r   r   typingr   r{   fastapir   r   r   fastapi.responsesr	   sqlalchemy.excr
   sqlalchemy.ormr   src.core.exceptionsr   src.apps.files.models.cdnr   src.apps.files.models.filer   src.apps.files.services.filer   r   'src.apps.web_crawler.models.web_crawlerr   r   (src.apps.web_crawler.schemas.web_crawlerr   r   r   r   src.apps.web_crawler.servicesr   web_crawler_service	src.utilsr   src.utils.helpers.functionsr   src.utils.paginationr   src.utils.enumsr   r   intr7   rK   rW   r]   boolra   r   r   rT   r   rO   rL   r6   <module>r      sl   
 	 " " '   5 5 * * " , ) + K O  M ,  7 / .* < <S <0     3  hWmNn  .
7 
-= 
* 
$
7 
 
>N 
S] 
D
 
S 
T 
0Qg Q QS Qh8 7 8 3 8 v6' 6
 6 6Y] 6jn 6r@7 @ @3 @rL   