
    (h)                        d dl Z d dlZd dlZd dlZd dlmZmZ d dlZd dlZ	d dl
Z
d dlmZmZmZmZmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZmZmZmZ d dlmZ d d	l m!Z! d d
l"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dlmZ d dl*m+Z+ d dl,m-Z- dede.fdZ/de.de.fdZ0de.de1de.fdZ2de.de3de4de.fdZ5d#de.de.de.fdZ6de.de.fdZ7dede1fdZ8dede9fd Z: ed!      fdedede;fd"Z<y)$    N)ListOptional)FastAPIFileHTTPException
UploadFilestatus)JSONResponse)Session)ProducerProducerKeyword)WineWineDbWineDuplicationWineKeyword	WineNoise)	Separator)APIException)join_pattern)ThreadPoolExecutor)BeautifulSoup)defaultdict)	WordAlias)
BottleSizefilereturnc                     | j                   j                  d      }| j                   j                  d       t        j                  |      }|d   xs dS )Ni   r   encodingzutf-8)r   readseekchardetdetect)r   rawresults      j/var/www/html/wine-match-dev/backend/winematch-backend/src/apps/wine/noise_parser/services/noise_parser.pydetect_encodingr&      sB    
))..
CIINN1^^C F*((    textc                 f    | r.| j                         rt        d|  dd      j                         } | S )z>Cleans up numeric HTML content by parsing and extracting text.z<p>z</p>zhtml.parser)stripr   get_text)r(   s    r%   cleanup_numeric_htmlr,   %   s0    

s4&-}=FFHKr'   
input_textaliases_mapc                 B   | r| j                         sy| j                         } d}d}d}|j                         D ]T  \  }}| t        j                  |       | }t        j
                  || | | |       } t        j
                  d||       } V | j                         S )zEReplaces aliases in the input text based on the provided aliases map.N(?i)(^|\b|\p{Punct}|\s)(\s|\p{Punct}|\b|$) \s+)r*   loweritemsreescapesub)r-   r.   regex_parse_prefixregex_parse_suffix	str_spacekvpatterns           r%   replace_aliasesr?   +   s    Z--/!!#J3/I!!# ;1'(17I6JKVVG	{1#i[%A:N
VVFIz:
;
 r'   bottle_sizesbottle_exclusionsc                     | r| j                         sy| j                         } d}d}d}|D ]9  }||vs| t        j                  |       | }t        j                  |||       } ; | j                         S )zRRemoves size information from the input text based on bottle sizes and exclusions.Nr0   r1   r2   )r*   r4   r6   r7   r8   )r-   r@   rA   r9   r:   r;   bottler>   s           r%   remove_size_inforD   <   s    Z--/!!#J3/I @**+,RYYv->,?@R?STGJ?J@
 r'   r>   c                    d }| r| j                         rd}t        j                  d      }t        j                  d      }t        j                  d      }|rI ||      }|r' ||       }|D ]  }	|	|vs| j                  |	|      }  n/t        j                  |||       } nt        j                  |||       } t        j                  |||       } t        j                  |||       } | j                         } | S )zmRemoves vintage years, NV patterns, and multiple spaces from the input text, with optional reserved patterns.c                 V    t        j                  d      }| r|j                  |       S g S )Nz(17|18|19|20)\d{2})r6   compilefindall)r(   vintage_patterns     r%   extract_all_vintagesz-remove_vintages.<locals>.extract_all_vintagesO   s)    **%:;04&&t,<"<r'   r2   z8(^|\b|\p{Punct}|\s)(17|18|19|20)\d{2}(\s|\p{Punct}|\b|$)z2(?i)(^|\b|\p{Punct}|\s)n\.?v\.?(\s|\p{Punct}|\b|$)r3   )r*   r6   rG   replacer8   )
r-   r>   rJ   r;   pattern_vintage_parserpattern_nv_parserpattern_multiple_spacereservedto_be_removedss
             r%   remove_vintagesrR   M   s    = j&&(	!#,g!hJJ'\]!#F!3+G4H 4Z @& FA(%/%7%79%E
F  VV$:IzR
 6	:NJVV-y*E
VV2IzJ
%%'
r'   c                    | r| j                         rd}d}t        j                  d      }t        j                  d      }t        j                  d      }| j                         s5t	        j
                  d|       j                  dd      j                  d      } | j                         } t        j                  |||       } t        j                  |||       } t        j                  |||       } | j                         } | S )	ziCleans up the input text by removing custom punctuation, normalizing spaces, and converting to lowercase. r2   u%   [!@#$%&?()\[\]{}⟨⟩<>“”\",:;*]u#   (?!\+)[./´`‘’'\-‒–—―]+r3   NFKDasciiignore)
r*   r6   rG   isasciiunicodedata	normalizeencodedecoder4   r8   )r(   	str_blankr;   pattern_custom_punct_1pattern_custom_punct_2rN   s         r%   cleanupr`   k   s    

		!#,T!U!#,R!S!#F!3 ||~((6==gxPWWX_`Dzz|vv,i>vv,i>vv,i>zz|Kr'   dbc                 L  K   	 i }| j                  t              j                  t        j                  dk(        j	                         }|D ]t  }|j
                  r|j
                  j                         nd}|j                  r|j                  j                  d      ng }|D ]  }|j                         }||vs|||<    v t        t        |j                         d             }|S # t        $ r/}t        t        t        |      t         j"                  d      d}~ww xY ww)	z8Maps aliases from the WordAlias model into a dictionary.WINErT   ,c                      t        | d          S )Nr   )len)items    r%   <lambda>zmap_aliases.<locals>.<lambda>   s    Sa\M r'   )keyzError mapping aliases.moduleerrorstatus_codemessageN)queryr   filter
alias_typeallwordr4   aliassplitdictsortedr5   	Exceptionr   __name__strr	   HTTP_500_INTERNAL_SERVER_ERROR)	ra   	alias_mapword_aliases
word_aliasreplacementaliasesrt   alias_loweres	            r%   map_aliasesr      s    
	 xx	*11)2F2F&2PQUUW& 	9J5?__*////1"K5?5E5Ej&&,,S12G  9#kkmi/-8Ik*9		9 	 17QRS	 
a&==,	
 	

s/   D$B4C) 9/C) (D$)	D!2*DD!!D$c                   K   	 t               }i }| j                  t              j                  t        j                  j                  d            j                         }i }g }|D ]  }|j                  r|j                  j                  d      ng }|D ]7  }|j                         }	|j                  ||	<   |	|vs'|j                  |	       9 |j                  r|j                  j                  d      ng }
|
D ]!  }|j                  |j                                # |j                  ||j                  <    |j                  t         d       ||fS # t"        $ r/}t%        t&        t)        |      t*        j,                  d      d}~ww xY ww)zAMaps bottle aliases and returns bottle exclusions and bottle map.Nrd   T)ri   reversezError mapping bottle aliases.rj   )setro   r   rp   
deleted_atis_rr   rt   ru   r4   nameappend	exclusionaddidsortrf   rx   r   ry   rz   r	   r{   )ra   rA   
bottle_mapbottlesr|   r@   rC   r   rt   r   
exclusionsr   r   s                r%   map_bottle_aliasesr      sl    %
E
 ((:&--j.C.C.G.G.MNRRT	 	0F17fll((-2G  5#kkm)/	+&l2 ''4	5 9?8H8H))//4bJ' 9	!%%ioo&789 '-iiJv{{#	0$ 	c40 )++ 
a&==3	
 	

s0   FB5E :BE F	F
*FF

F.c                   K   	 |s!t        t        i t        j                  d      t	        |      }|j                          d {   }t        j                  |      }|j                  d       |j                         j                  |      }|j                         D cg c]#  }|j                         s|j                         % }}g }|rWt        |      dkD  rH| j                  t        j                  t        j                   t"        j$                  j'                  d            j)                  t"        t        j*                  t"        j                  k(        j-                  t        j.                  j1                  d             j3                         }	|	D 
ci c]  }
|
d   |
d    c}
t5        |        d {   t7        |        d {   \  t8        d   j:                  |}fd	}t=               5 }t?        |jA                  ||            }d d d        |S d
diS 7 c c}w c c}
w 7 z7 h# 1 sw Y   |S xY w# tB        $ r/}t        t        tE        |      t        jF                  d      d }~ww xY ww)NzPlease select file to upload.rj   r      wine_db_literal   )ra   TABc                    | r| j                        ng }	j                  t        |      dkD  r|d   nd      }t        |      dkD  r|d   nd}t        |      }t	        |      }t        |      }	 |rt        ||      }nt        |      }t        |      }t	        |      }t        |      dkD  r|d   nd t        |      dkD  r|d   nd ||g dS # t        $ r}t        d|        Y d }~bd }~ww xY w)Nr   rT   r   )r.   )r@   rA   zError removing vintages: )wineDbIddescriptionr>   wineKeywordnoises)
ru   getrf   r,   r?   rD   rR   rx   printr`   )
info	wine_infowkwine_keywordr   r.   bottle_aliases_maprA   	separatorwks_maps
        r%   process_infoz5upload_wines_for_noises_service.<locals>.process_info   s   59DJJy1r	[[Y!1C1L/29~/Ay|r3LA.|U/K]  rC   D;'6|R'H'6|'D  '|4.|U 14I0B	!36y>A3E9Q<4!#/   ! ;5aS9::;s   7C 	C8 C33C8rn   zNo data Foundz#Error processing the uploaded file.)$r   ry   r	   HTTP_400_BAD_REQUESTr&   r   ioBytesIOr    r\   
splitlinesr*   rf   ro   r   r   r>   r   literallabeljoin
wine_db_idrp   r   r   rr   r   r   r   valuer   listmaprx   rz   r{   )ra   r   r   contentbufferr(   linelineswines_for_noiseswksr   output_listr   executorr   r.   r   rA   r   r   s                  @@@@@r%   upload_wines_for_noises_servicer      s'    S
"777	  #4(		#G$A{{}##H-*.//*;L$tzz|LLSZ!^ NN''NN(():;
 fk44		AB..22489  /22r!ube|2G +B/K:LPR:S4S11!%(..IK < $% Q#'\;(O#P Q $# 
 	
A $ M  3/4SHQ $#
  
a&==9	
 	

s   J	AI 
H0AI &H3<H3CI !H81I H=I H?/I I"	I +J	,I /J	0I 3I ?I II 
J	I 	J*JJJ	)N)=r   r6   rY   uuidtypingr   r   r!   polarspl
sqlalchemyfastapir   r   r   r   r	   fastapi.responsesr
   sqlalchemy.ormr   &src.apps.wine.producer.models.producerr   r   src.apps.wine.wine.models.winer   r   r   r   r   src.utils.enumsr   src.core.exceptionsr   src.utils.helpers.functionsr   concurrent.futuresr   bs4r   collectionsr   *src.apps.wine.word_alias.models.word_aliasr   ,src.apps.wine.bottle_size.models.bottle_sizer   rz   r&   r,   rv   r?   r   r   rD   rR   r`   r   tupler   anyr    r'   r%   <module>r      s5   	 	   !    D D * " L  & , 4 1  # " @ C)* ) )s s  $ 3 " D S UX " c S <# # *
' 
d 
:'
 '
U '
V CyW
W

W
 	W
r'   