
    QhL                     `   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZ d dlZd dlmZ d dlmZmZ d dlmZmZmZmZmZ d dlmZ d d	lmZ d d
lmZ d dlm Z  d dl!m"Z"m#Z# d dl$m%Z% d dl&m'Z'm(Z( d dl)m*Z* d dl+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3m4Z4m5Z5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@Z@ d dlAmBZB dZCdZDdZEdZFdZG ej                  d      ZI ej                  d ej                        ZK ej                  d!      ZL ej                  d"      ZM ej                  d#      ZNg fd$ed%eeO   fd&ZPd'eOd(eOfd)ZQd$ed*e"d+eOd(eOfd,ZRd$ed-eOd(ee"   fd.ZSd$ed/eOd+e d(eeO   fd0ZTd$ed1e"d+e d(eeO   fd2ZU	 dOd$ed3eOd4eOd(eee	      fd5ZVd6eOd7eOd(eOfd8ZWd9eOd(eOfd:ZXd;eeO   d(eeO   fd<ZYd$ed;eeO   d(eeO   fd=ZZd;eeO   d(eeO   fd>Z[d?eOd(e\fd@Z]d?eOd(eOfdAZ^d9eeO   d(eeO   fdBZ_d$ed/eOdCeOdDe\dEeeO   f
dFZ`dPd$ed/eOdCeOdDe\dGeaf
dHZbd$edIeOfdJZcd$ed4eafdKZdd?eOdz  d(e\fdLZed$edMe/fdNZfy)Q    N)defaultdict)Set)AnyDictListOptional)BeautifulSoup)HTTPExceptionstatus)Integerand_castfuncor_)Session)CDN)File)MatchedType)
WebCrawlerWebCrawlerFiles)
BottleSize)ProducerProducerNoise)get_all_retailers)	WorkGroupWorkGroupWebCrawler)SaveMatchedRequest)Vintage)WineWineDbWineKeyword	WineNoise)	WineMatch)settings)APIException)API_PREFIXES)WebCrawlerFileType)QueryPaginatori  z(?i)(^|\b|[^\w\s]|\s)z(\s|[^\w\s]|\b|$)  z$(^|\b|\W)(17|18|19|20)\d{2}(\W|\b|$)z(^|\b|\W)n\.?v\.?(\W|\b|$)u%   [!@#$%&?()\[\]{}⟨⟩<>“”\",:;*]u#   (?!\+)[./´`‘’'\-‒–—―]+\s+dbcodesc                   K   d|v r.t        | d       d {   }|D cg c]  }|j                   }}| j                  t        j                        j                  t        j                  j                               j                         }|D cg c]  }|d   	 }}g }|D ]  }||v r| j                  t        j                        j                  t              j                  t        j                  |k(        j                         }|D cg c]  }|d   	 }}|s||j                  |       |j                  |        t        t        |            }|S 7 Cc c}w c c}w c c}w w)NallF)r,   	paginatedr   )r   codequeryr   nameorder_byascr/   r   webCrawlerCodejoinfilterextendappendlistset)	r,   r-   	retailersrgroupsgjoined_listretrtss	            f/var/www/html/wine-match-dev/backend/winematch-backend/src/apps/wine/validation/services/validation.pypopulate_retailersrE   /   s9    ~+rUCC	!*+A++ XXinn%..y~~/A/A/CDHHJF"#qad#F#K $&= ,;;<i	#-.	  "%%A1Q4%C%""3's#$  s;'(K7 D+ $ &s?   E,EE,EAE,E"A1E,E'E,<E,E,	file_pathreturnc                    K   yw)Nzutf-8 )rF   s    rD   detect_encodingrJ   P   s      s   crawlermatched_typec                   K   | j                  t              j                  t        j                  |j                  k(  t        j
                  t        j                  k(  t        j                  j                  d             j                  t        j                  j                               j                         }|st        dddit        j                   d      | j                  t"              j                  t"        j                  |j$                  k(        j                         }|st        dddit        j                   d      |j&                  }|S w)N
Validationfilez"No file found for this Web Crawlermoduleerrorstatus_codemessagezFile not found)r2   r   r8   web_crawler_idid	file_typer'   HISTORY
deleted_atis_r4   
created_atdescfirstr%   r   HTTP_404_NOT_FOUNDr   file_idpath)r,   rK   rL   	last_filerO   rF   s         rD   get_matched_file_namerb   W   s    
!	**gjj8%%);)C)CC&&**40


 
/,,113	4	  ?@118	
 	
 88D>  I,=,=!=>DDFD+,11$	
 	
 		Is   EEr1   c                    K   | j                  t              j                  t        j                  |k(        j	                         }|r|S y wN)r2   r   r8   r1   r]   )r,   r1   rK   s      rD   find_webcrawler_by_codere   y   s<     hhz"))*//T*ABHHJGs   A	Aretailer_codec                 l   K   t        | |       d {   }|sg S t        | ||       d {   S 7 7 wrd   )re   load_matched_results)r,   rf   rL   rK   s       rD   load_matched_results_by_coderi      s;     +B>>G	%b'<@@@ ? As   404244
webcrawlerc                   K   g }	 t        | ||       d {   }t        j                  j                  |      s%t	        ddd| it
        j                  d|       t        j                  |t        j                        s%t	        ddd| it
        j                  d|       t        |       d {   }t        |d|      5 }|j                         }|dd  D cg c]#  }|j                         s|j                         % }}d d d        |S 7 7 `c c}w # 1 sw Y   |S xY w# t        $ r}	t        d	|	        Y d }	~	|S d }	~	ww xY ww)
NrN   rO   zFile not found: rP   zNo permission to read file: r>   )encoding   zError loading matched results: )rb   osr`   existsr%   r   r^   accessR_OKrJ   open	readlinesstrip	Exceptionprint)
r,   rj   rL   output_listrF   file_encodingflineslinees
             rD   rh   rh      sX    K5/JMM	ww~~i(#!1)=>"55*9+6	  yyBGG,#!=i[IJ"556ykB	  .i88)S=9 	OQKKME49!"IND4::<NKN	O 3 N" 9 O	O   5/s3445s   ED+ DBD+ 6D7D+ 	D!D7D	DD+ ED+ D+ DD(#D+ &E(D+ +	E4EEEEwine_id
wine_db_idc                    | j                  t        j                  t        j                  t        j                  t        j
                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                         j#                  t        j$                        j'                  t        j(                        j#                  t*        t*        j,                  t        j.                  k(        j1                  t*        j2                  |k(        j5                  d      j7                         }|rt9        |      S | j                  t        j                  t        j                  t        j                  t        j
                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                         j#                  t        j$                        j'                  t        j(                        j1                  t        j2                  |k(        j5                  d      j7                         }|r/t9        |      }|j;                  d       |j;                  d       |S y)zp
    Equivalent of Grails getWinePropertiesForMatchValidation
    Returns a list of wine properties or None
    rm   r*   N)r2   r    colortype	sweetnessvarietynoter3   wine_countrywine_regionwine_locationwine_locale	wine_siter   	name_showr!   patternr7   producer	outerjoinwine_keywordr   r~   rV   r8   literallimitr]   r;   r:   )r,   r}   r~   wine	wine_lists        rD   (get_wine_properties_for_match_validationr      s    	LLKKNNKKKK  	
< 
foo		6&&	'	dDOOvyy0	1 
'	(	q	M 	R Dz 	LLKKNNKKKK  	
 
foo		6&&	'	*,	-	q	) 	. J	    r   labelc                 ,    | r	|r|  d| S | xs |xs dS )Nr)   r*   rI   )r   r   s     rD   concat_namer      s(    E1UG$$"u""r   textc                 j    | r0| j                         r t        d|  dd      }|j                         S | S )Nz<p>z</p>zhtml.parser)rt   r	   get_text)r   soups     rD   cleanup_numeric_htmlr     s3    

s4&-}=}}Kr   
input_textc                 b   i }| r| j                         sy | j                         } |j                         D ]h  \  }}t        t	        j
                  |      z   t        z   }t	        j                  |t        |z   t        z   |       } t	        j                  dt        |       } j | j                         S )Nr+   )	rt   loweritemsREGEX_PARSE_PREFIXreescapeREGEX_PARSE_SUFFIXsub	STR_SPACE)r   aliases_mapkvr   s        rD   replace_aliasesr   
  s    K
 Z--/!!#J!!# ;1$ryy|36HHVVGY]Y%>
K
VVFIz:
;
 r   c                    |r|j                         sy |j                         }| j                  t        j                        j                         }|D cg c]  }|d   s	|d   j                          }}dh}|D ]E  }||vst        t        j                  |      z   t        z   }t        j                  |t        |      }G |j                         S c c}w )Nr   r*   )rt   r   r2   r   r3   r/   r   r   r   r   r   r   )r,   r   bottle_sizesbbottle_exclusionsbottler   s          rD   remove_size_infor     s    Z--/!!#J88JOO,002L*6?Q!A$AaDJJL?L?#%$ @**(299V+<<?QQGJ?J@
  @s   
C Cc                     | rn| j                         r^t        j                  t        |       } t        j                  t        |       } t
        j                  t        |       } | j                         } | S rd   )rt   PATTERN_VINTAGE_PARSERr   r   PATTERN_NV_PARSERPATTERN_MULTIPLE_SPACE)r   s    rD   remove_vintagesr   .  sZ    j&&(+//	:F
&**9jA
+//	:F
%%'
r   sc                 F    	 | j                  d       y# t        $ r Y yw xY w)NasciiTF)encodeUnicodeEncodeErrorr   s    rD   is_asciir   7  s(    	 s    	  c                 Z    dj                  d t        j                  d|       D              S )Nr*   c              3   R   K   | ]  }t        j                  |      d k7  s| ! yw)MnN)unicodedatacategory).0cs     rD   	<genexpr>z strip_accents.<locals>.<genexpr>@  s$     aAUAUVWAX\`A`1as   ''NFD)r7   r   	normalizer   s    rD   strip_accentsr   ?  s$    77ak33E1=aaar   c                 6   | r| j                         s| S t        |       st        |       } | j                         } t        j                  t        |       } t        j                  t        |       } t        j                  t        |       } | j                         } | S rd   )
rt   r   r   r   PATTERN_CUSTOM_PUNCT_1r   	STR_BLANKPATTERN_CUSTOM_PUNCT_2r   r   )r   s    rD   cleanupr   C  st    tzz|D>T"::<D!%%i6D!%%i6D!%%i6D::<DKr   matched_filter	is_narrow	wine_infoc                   K   t        |      dkD  r|d   r|d   j                         nd}t        |      dkD  r|d   r|d   j                         nd}t        d| d| d| d| d	| 
       t        | xr |xr |j                         dk(  xr |        t        |xr | xs |j                         d
k7         i }||d<   t        |      dkD  r|d   r|d   j                         nd}||d<   |r
|dk7  r|dd nd|d<   |r
|dk7  r|dd nd|d<   t        |      dkD  r|d   j                         nd|d<   t        |      dkD  r|d   j                         nd}	|	r|	j                         dk(  rd}	|	|d<   t        |      dkD  r|d   j                         nd|d<   t        |      dkD  r|d   j                         nd|d<   t        |      dkD  r|d   j                         nd|d<   t        |      dkD  r|d   j                         nd|d <   t        |      d!kD  r|d!   j                         nd|d"<   t        |      dkD  r|d   j                         nd|d#<   t	        | |d   |d         }
|j                         d$k(  r|
r|
d%   sy|j                         d&k(  r|
r|
d%   ryt        |      d'kD  r|d'   nd}||d(<   t        |      }t        |      }t        | |      }t        |      }t        |      }t        |      }||d)<   |
r}|
d*   |d+<   |
d%   |d,<   |
d   |d-<   |
d.   |d/<   |
d   |d0<   |
d   |d1<   |
d   |d2<   t        |
d!   |
d         |d3<   |
d   |d4<   |
d   |d5<   |
d   |d6<   |
d   |d7<   |
d'   |d8<   |
d   |d9<   g |d:<   |S w);z0
    Equivalent to Groovy parseKeywordMatch
       keyword   NzMatch type: z, Error string: z for wine info: z and retailer: z and is_narrow: historyretailerCoder   r*   wineAlertId?   rV   	   wineDbId   
bottleSize   zn.v.NVvintage   price   	taxStatus   urldescription   historyStringskuzrp wines   znon rp wines
   originalKeywordStringkeywordString   maturitymongoIdr   rm   wineTyper   r   waNotewineNamecountryregionlocationlocalesiter   
wineNoises)lenrt   rv   r   r   r   r   r   r   r   r   )r,   rf   r   r   r   
match_typeerror_stringresultwine_alert_idr   r   keyword_strings               rD   parse_keyword_matchr   S  s6     *-Y!);	!1##%R[J+.y>A+=)A,9Q<%%'TXL	
zl"2<.@PQZP[[jkxjy  zJ  KT  JU  	V 
i-
_J
_:+;+;+=+J
_S_O_`	)
KZJ:+;+;+=+JL  F*F> -0	NQ,>9Q<IaL&&(UWM)F=)6=C;O="%UWF4L.;QT@Tr*Z\F:36y>A3E9Q<--/4F<&))nq&8il  "dG7==?f,F9.1)nq.@il((*dF7O25i.12D)A,,,.$F;,/	NQ,>IaL&&(DF5M47	NQ4FIaL..0DF=7:9~7Jim113PTF?-0^b-@IbM'')dF5M3BtfZFXYD+Td2h/Td2h '*)nr&9Yr]rN&4F"#).9N$^4N%b.9N$^4N^,N$^4N,F? !"Xz Hyq'w!!Wz"1g{ Gy7x(b47;z Gy7x!!Wz7xbv HyF<Ms   N
Noffsetc                   K   g }g }|s||dS t        | |t        j                         d {   }|s||dS |r|||t        z    }nPt	        |      }|t        kD  r<|d t         }|t        z   dz
  t        z  }	t        d|	      D 
cg c]  }
|
t        z   }}
|D ]>  }|j                  d      }t        | ||||       d {   }|s.|j                  |       @ ||dS 7 c c}
w 7 &w)N)offsetsmatchesrm   |)	ri   r   KEYWORDMAX_RECORDS_PER_LOADr   rangesplitr   r:   )r,   rf   r   r   r  
kw_matchedr  output
total_rowstotal_pagesir{   r   matchs                 rD   get_keyword_matchesr    s    JG"z::/M;CVCVWWF"z::*>!>?[
,,112F%(<<q@EYYK9>q+9NOAq//OGO %JJsO	)"m^YXabbe$	% :66' X P cs4   *C(CAC(C!*C(?C& C(C(!C(producer_idc                 d  K   | j                  t        j                        j                  t        t        j
                  t        j                  k(        j                  t        j                  |k(        j                         }|D cg c]  }|d   s	|d   j                          }}|S c c}w w)Nr   )r2   r   noiser7   r   r  rV   r8   r   r/   r   )r,   r  noisesn
noise_lists        rD   producer_noiser    s     
$$%	h11X[[@	A	  K/	0		  )/71!A$!A$**,7J7 8s   BB0
B+B+'	B0c                   K   | j                  t        j                        j                  t        t        j
                  t        j                  k(        j                  t        j                  |k(        j                  t        j                  j                  d             j                  t        j                  dk7        j                         }|D cg c]  }|d   s	|d   j                          }}|S c c}w w)Nr*   r   )r2   r"   r  r7   r    r~   rV   r8   r   isnotr/   r   )r,   r~   r  r  r  s        rD   
wine_noiser    s     
!	fi**fii7	8	*,	-		%%d+	,		2%	&	  )/71!A$!A$**,7J7 8s   CC<
C7C73	C<c                 D   K   t        | xr | j                               S w)z[
    Returns True if the given string is not None, not empty, and not just whitespace.
    )boolrt   r   s    rD   is_not_blankr    s      aggi  s    payloadc           
        K   |j                   xs dj                         dv }d }t        |j                         d {   rn| j	                  t
              j                  t
        j                  |j                  k(        j                         }|s|st        di t        j                  d      | j	                  t              j                  |j                        j                         }|st        di t        j                  d      d }t        |j                         d {   r^| j	                  t               j                  |j                        j                         }|s|st        di t        j                  d	      d }t        |j"                         d {   r^| j	                  t$              j                  |j"                        j                         }|s|st        di t        j                  d
      d }|r*|r(|r&|j                   |j&                   |j&                   }| j	                  t(              j                  ||j*                  |j&                        j                         }|sLt)        ||j*                  |j*                  |j&                        }t-        d|        | j/                  |       d|_
        t        |j0                         d {   r|j0                  j3                         |_        |j6                  |_        |j                   xs dj                         dk(  rd|_        n(|j                   xs dj                         dk(  rd|_        	 t-        d|        | j=                          dddS 7 W7 q7 7 # t>        $ r>}	| jA                          t        di t        j                  dtC        |	             d }	~	ww xY ww)Nr*   )deferinsufficientrN   zWineDB not found.rP   )r1   zRetailer not found.)r3   zVintage not found.zBottle Size not found.)r   r   rU   )r   r3   r   rU   zCreating new Matched: DRAFTr   xr!  zzSaving Matched: zError saving Matched: TzSaved successfully.)successrT   )"historyTyper   r  r   r2   r    r8   r   r]   r%   r   r^   r   	filter_bywineRetailerwineVintager   wineBottleSizer   rV   r#   wineHistoryrv   addvalidationTypeuppervalidation_typewineOriginalKeywordr   r   commitru   rollbackstr)
r,   r  is_xorzwine_dbweb_crawlerr   bottle_sizer   matchedr|   s
             rD   save_matchedr9    sp    ""(b//15NNG G'**+++((6"))&..G<L<L*LMSSUw#26;T;T^q 
 ((:&00g6J6J0KQQSKrv7P7PZo
 	

 G'--...((7#--73F3F-GMMOw#26;T;T^r 
 K'00111hhz*44':P:P4QWWY7#26;T;T^v 
 M7{"++,WZZL8HI 		G<O<O`k`n`n	o	  &'$$++&>>	
 	&wi01
wGN'00111")"8"8">">"@11GO 	!r((*g5 #



#	*	*	,	> #

 	*+
		 (=>>_ ,  / 2> 2  

11,SVH5	
 	

sm   ;O)NC(O)&N'A<O)#N$EO)*N+BO)1N O)O)O)O)	O&(9O!!O&&O))r*   r*   )r   )gloggingrn   r   r   collectionsr   collections.abcr   typingr   r   r   r   chardetbs4r	   fastapir
   r   
sqlalchemyr   r   r   r   r   sqlalchemy.ormr   src.apps.files.models.cdnr   src.apps.files.models.filer   src.apps.match.enumsr   'src.apps.web_crawler.models.web_crawlerr   r   ,src.apps.wine.bottle_size.models.bottle_sizer   &src.apps.wine.producer.models.producerr   r   (src.apps.wine.retailer.services.retailerr   *src.apps.wine.validation.models.validationr   r   +src.apps.wine.validation.schemas.validationr   $src.apps.wine.vintage.models.vintager   src.apps.wine.wine.models.winer   r    r!   r"   %src.apps.wine_match.models.wine_matchr#   src.core.configr$   src.core.exceptionsr%   src.utils.constantsr&   src.utils.enumsr'   src.utils.paginationr(   r  r   r   r   r   compiler   
IGNORECASEr   r   r   r   r3  rE   rJ   rb   re   ri   rh   r   r   r   r   r   r   r  r   r   r   r   intr  r  r  r  r9  rI   r   rD   <module>rW     sl    	 	  #  , ,   ) 5 5 " ) + , O C J F U J 8 O O ; $ , , . / - ) 		 $$KL BJJ<bmmL #$LM #$JK #F+  >@  c BS S G j PS X[ Dg S Xj=Q A7 A3 AVa Afjknfo A7 
 R] bfgjbk @ 79TTT03Td3iTn## #c #c #s s  (3- & hsm  " (3-   bS bS b(3- HSM  [[[ [ 	[
 Cy[|7' 7# 7s 7_c 7mp 7:W 3  c !#* ! !T?7 T?-? T?r   