
    why                         d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlm	Z	 d dl
Z
d dlZ ej                  e      Z G d d      Zy)    N)Decimal)ListOptional)BeautifulSoupc                      e Zd ZdZdZdZdZdZdZdZ	 e
j                  d      Z e
j                  d	      Z e
j                  d
      Z e
j                  d      Z e
j                  d      Z e
j                  d      Zededefd       Zededefd       Zededefd       Zededefd       Zededefd       Zededee   fd       Zed$dedee   defd       Zededee   fd       Zed%dedee   fd       Zed&dedee   fd       Zededefd       Z ededee!   fd       Z"edededed ed!e#d"ee   de#fd#       Z$y)'
Calculator_NVz:1:z:0:10!z$(^|\b|\W)(17|18|19|20)\d{2}(\b|\W|$)z(?i)(^|\b|\W)n\.?v\.?(\b|\W|$)u   [!@#$%&?()\[\]{}<>“”\",:;*]u   [./´`‘’'\\-‒–—―]+z\s+zL(![^&|!()]+_?)|(&![^&|!()]+_?)|(\|![^&|!()]+_?)|(&!\((.*?)\))|(\|!\((.*?)\))textreturnc                    |syt        j                  |      }t        j                  d|      j	                  dd      j                  d      }|j                         }| j                  j                  d|      }| j                  j                  d|      }| j                  j                  d|      j                         S )N NFKDasciiignorezutf-8 )htmlunescapeunicodedata	normalizeencodedecodelowerPATTERN_PUNCTsubPATTERN_CUSTOM_PUNCTPATTERN_MULTISPACEstripclsr   s     Y/var/www/html/wine-match-dev/backend/winematch-backend/src/apps/match/utils/calculator.pycleanupzCalculator.cleanup"   s    }}T"$$VT299'8LSST[\zz|  $$R.''++C6%%))#t4::<<    c                 >    |rt        d| dd      j                  S dS )Nz<p>z</p>zhtml.parserr   )r   r   r"   s     r$   cleanup_numeric_htmlzCalculator.cleanup_numeric_html-   s&    FJ}s4&-}=BBRPRRr&   c                 ^    |r*| j                   j                  d|      j                         S dS )Nr   r   )r    r   r!   r"   s     r$   cleanup_spaceszCalculator.cleanup_spaces1   s+    @Ds%%))#t4::<L"Lr&   patternc                 V    |r&t        j                  dd|      j                  d      S dS )Nz[!()&|]+,r   )rer   r!   )r#   r+   s     r$   normalize_patternzCalculator.normalize_pattern5   s'    ?Frvvk3066s;NBNr&   c                     |sy| j                  |      j                  d      }t        t        |      d       }dj	                  |      S )Nr   r-   c                     t        |        S N)len)ws    r$   <lambda>z7Calculator.normalize_and_sort_pattern.<locals>.<lambda>>   s    3q6' r&   )key)r/   splitsortedsetjoin)r#   r+   wordsuniques       r$   normalize_and_sort_patternz%Calculator.normalize_and_sort_pattern9   sC    %%g.44S9E
(9:xxr&   c                     |sy | j                   j                  |      }|r%t        j                  dd|j	                               S | j
                  j                  |      r| j                  S y )Nz\Dr   )PATTERN_VINTAGEsearchr.   r   group
PATTERN_NVNON_VINTAGEr#   r   ms      r$   parse_vintagezCalculator.parse_vintageA   sY    &&t,66%QWWY//^^""4(??"r&   Nc                 Z   |sy|r| j                  |      ng }| j                  |      }|r|D ]  }||vs|j                  |d      } n| j                  j                  d|      }| j                  j                  d|      }| j
                  j                  d|      j                         S )Nr   r   )extract_all_vintagesreplacer?   r   rB   r    r!   )r#   r   r+   reserved	to_removevs         r$   remove_vintageszCalculator.remove_vintagesL   s    8?3++G4R,,T2	 0H$<<3/D0 &&**35D~~!!#t,%%))#t4::<<r&   c           	          |rL| j                   j                  |      D cg c]'  }t        j                  dd|j	                               ) c}S g S c c}w )Nz[^\d]r   )r?   finditerr.   r   rA   rD   s      r$   rH   zCalculator.extract_all_vintages[   sC    `d9L9L9U9UVZ9[\AxQWWY/\ljll\s   ,Ac                     |sg S | j                  |      }t        j                  d|      }g }|D ]I  }|j                         }|s|j	                  | j
                        r2|s||vs9|j                  |       K |S )Nz[&|()])remove_notsr.   r7   r!   
startswithSTR_NOTappend)r#   r+   allow_duplicatesr;   resultwords         r$   true_positive_wordszCalculator.true_positive_words_   sv    I//'*G, 	(D::<DDOOCKK8#t6'9MM$'		(
 r&   c                    | j                  |      }|sg S t        j                  d|      }g }|D ]V  }|j                         }|s|j	                  d      r(|r|j                  | j                  d      }|j                  |       X |S )Nz&(?=[^()]*\)|[^()]*$)(r   )rQ   r.   r7   r!   rR   rI   WILDCARD_SYMBOLrT   )r#   r+   remove_wildcard
candidatesrV   rW   s         r$   required_wordszCalculator.required_wordsm   s    //'*IXX6@
 	$D::<DDOOC0"<<(;(;R@Dd#	$ r&   c                 B    |r| j                   j                  d|      S dS )Nr   )PATTERN_NOT_REMOVERr   r"   s     r$   rQ   zCalculator.remove_nots|   s"    8<s&&**2t4D"Dr&   c                     dD ]]  }	 t        j                  t         j                  |       t        j                  dd|      }t        t        j                  |            c S  y # t        $ r Y lw xY w)N)zen_US.UTF-8zde_DE.UTF-8z[^\d,.-]r   )locale	setlocaleLC_ALLr.   r   r   atof	Exception)r#   r   loccleaneds       r$   parse_moneyzCalculator.parse_money   si    1 	C  4&&b$7v{{7344		   s   AA$$	A0/A0namenorm_sortednorm_rawlatest_bestrequiredc                 x   r|sy|rt        fd|D              sydx}x}}	d d}
|j                  d      }|j                  d      }|D ci c]  }||j                  |       }}|D ]  }d}|j                  | j                        r5|d d }t        j                  dt        j                  |       d|
      d u}n
d| d|
v rd	}|r| j                  n| j                  }||   }|	|r|ndz  }	|j                  |||      } |	|k  ry|j                  | j                  | j                        j                  | j                  | j                        j                  d
d      j                  dd      }	 t        |      sy	 |	S c c}w # t        $ r"}t        j!                  d|        Y d }~yd }~ww xY w)Nr   c              3   &   K   | ]  }|v  
 y wr2    ).0r4   rj   s     r$   	<genexpr>z'Calculator.get_score.<locals>.<genexpr>   s     <aT	<s   r   r-   Fz\sz\S*T&z and |z or zPattern eval failed: )allr7   countendswithr[   r.   r@   escapeREPLACEMENT_FOUNDREPLACEMENT_NOT_FOUNDrI   STR_TRUE	STR_FALSEevalrf   loggerwarning)r#   rj   r+   rk   rl   rm   rn   found_totalnot_found_totalscorename_wrappedr;   uniquesr4   
word_countrW   foundprefixreplacementrx   pattern_evales    `                    r$   	get_scorezCalculator.get_score   s    7C<8<<0111o4&{s###C(189AaQ'9
9 	@DE}}S001cr		R		&(9':#">MUYYTF!,38#//c>W>WKt$EeU*EoodK?G	@ K  WS**CLL9WS..>WS'"WS&!	 		% & A :8  	NN21#67	s   F	:F 	F9F44F9r2   )F)T)%__name__
__module____qualname__r[   rC   r{   r|   r}   r~   rS   r.   compiler?   rB   r   r   r    r`   classmethodstrr%   r(   r*   r/   r=   r   rF   rM   r   rH   rX   r^   rQ   r   ri   intr   rq   r&   r$   r   r      s   OK!HIG bjj!HIO=>JBJJABM%2::&GH#F+$"**	 =3 =3 = = S S S S M# M# M M O O O O          #   =3 =# =# = = m mS	 m m # $s)   S 49   Es Es E E s x'8   33 3 	3
 3 3 s)3 
3 3r&   r   )r.   r   r   decimalr   typingr   r   bs4r   rb   logging	getLoggerr   r   r   rq   r&   r$   <module>r      s;    	    !   			8	$s sr&   