
    ph                         d dl Z d dlZd dlmZ d dlmZmZmZmZ d dlm	Z	  ej                  e      ZdZdZdZefded	ee   d
edefdZdedefdZefde	ded	ed
edee   f
dZeefde	dedededeeef   f
dZy)    N)datetime)OptionalListDictAny)Sessionzuploads/match_outputszuploads/historyzUTF-8codedatecrawl_out_locationreturnc                 D   | rg|re	 |j                  d      }t        j                  j                  ||  d| d      }t	        d|        t        j                  j                  |      S y# t        $ r+}t        j                  dt        |              Y d}~yd}~ww xY w)a&  
    Check if a valid crawl output file exists for the given code and date.
    
    Args:
        code: The crawl code
        date: The crawl date
        crawl_out_location: Path to the crawl outputs directory
        
    Returns:
        bool: True if the file exists, False otherwise
    %Y-%m-%d-.txtzChecking file: zError checking crawl output: NF)
strftimeospathjoinprintisfile	Exceptionloggererrorstr)r	   r
   r   date_str	file_pathes         e/var/www/html/wine-match-dev/backend/winematch-backend/src/apps/wine_match/services/output_service.pyis_valid_crawl_outputr      s     	C}}Z0H %7D68*D9QRIOI;/077>>),,
   	CLL8QABB	Cs   A#A+ +	B4!BBr   c                     t         S )z
    Simple charset detection for files
    
    Args:
        file_path: Path to the file
        
    Returns:
        str: UTF-8 as default encoding
    )DEFAULT_OUTPUT_ENCODING)r   s    r   detect_charsetr"   +   s
     #"    dbweb_crawlerc           
         g }|r|s|S |j                  d      }|j                   d| d}t        j                  j	                  ||      }t        j                  j                  |      st        d|j                   d| d       |S t        |d      r|j                  r|j                  n
t        |      }t        d|j                   d| d       	 t        |d|	      5 }	t        |	d
       |	D ]&  }
|
j                         }
|
s|j                  |
       ( 	 d
d
d
       t        d|j                   d| dt        |       d       |S # 1 sw Y   3xY w# t        $ r{ t        d|j                   d| d       t        |dd	      5 }	t        |	d
       |	D ]&  }
|
j                         }
|
s|j                  |
       ( 	 d
d
d
       n# 1 sw Y   nxY wY w xY w)a/  
    Load retailer output file content
    
    Args:
        db: Database session
        web_crawler: WebCrawler object
        date: Date of the crawl
        crawl_out_location: Path to the crawl outputs directory
        
    Returns:
        List[str]: List of output lines from retailer file
    r   r   r   [z] File not found.output_encodingz] Loading crawler output...rencodingNz*] UTF-8 decode failed, trying with latin-1latin-1z	] Loaded z lines)r   r	   r   r   r   r   r   hasattrr(   r"   opennextstripappendUnicodeDecodeErrorlen)r$   r%   r
   r   output_listr   	file_namer   file_encodingfilelines              r   load_retailer_outputr9   :   s    Kd }}Z(H##$AhZt4I/;I77>>)$+""#1XJ.?@A 4;;HY3Z_j_z_zK//  AO  PY  AZM	Akq
*E
FG-)S=9 	-Tt -zz|&&t,-	-$ 
Akq
)C4D3EV
LM -	- 	-  	-+""#1XJ.XYZ)S95 	-t -zz|&&t,-	- 	- 	-	-sN   E #$EEE EE 3G$G2G	GG	GGhistory_locationr(   c           
      *   i }|s|S 	 t        |d      r|j                  n|j                  }t        d|j                   d| d       | d}t        j                  j                  ||      }t        j                  j                  |      st        d|j                   d|        |S 	 t        |d|      5 }|D ]K  }	|	j                         j                  d	      }
t        |
      d
kD  s1|
d   }ddlm}  ||
d
         }|||<   M 	 ddd       t        d|j                   d| dt        |              |S # 1 sw Y   2xY w# t        $ r t        d|j                   d| d       t        |dd      5 }|D ]K  }	|	j                         j                  d	      }
t        |
      d
kD  s1|
d   }ddlm}  ||
d
         }|||<   M 	 ddd       n# 1 sw Y   nxY wY w xY w# t        $ r"}t        dt        |              Y d}~|S d}~ww xY w)a-  
    Load history matches
    
    Args:
        db: Database session
        web_crawler: WebCrawler object
        history_location: Path to history files
        output_encoding: Default encoding for history files
        
    Returns:
        Dict[str, str]: Map between wine name and wine ID
    r	   r'   z] Loading history file [z] ...z-history.txtz] History file not found: r)   r*   z||   r   )cleanupNz] z# decode failed, trying with latin-1r,   z ] History record(s) loaded for [z]: zError loading history: )r-   r	   r   r   r   r   r   r.   r0   splitr3   ,src.apps.wine_match.services.matcher_servicer=   r2   r   r   )r$   r%   r:   r(   	historieshistory_namer5   r   r7   r8   partswine_idr=   	wine_namer   s                  r   load_historiesrE   t   s0    I%2+2;+G{''[M]M]+""##;L>OP#nL1	GGLL!19=	ww~~i(Ak&&''A)MN	7i? 	74  7D JJL..t4E5zA~"'( Y$+E!H$5	/6	),7	7, 	+""##CL>QTUXYbUcTdef 5	7 	7 " 
	7Ak&&'r/)::]^_iy9 7T  7D JJL..t4E5zA~"'(X$+E!H$5	/6	),77 7 7
	7  2'Ax0112sx   B G' *E 83D;,D;
E 'G' ;E E 3G$:3G.G	G$G	G$!G' #G$$G' '	H0HH)r   loggingr   typingr   r   r   r   sqlalchemy.ormr   	getLogger__name__r   DEFAULT_CRAWL_OUT_LOCATIONDEFAULT_HISTORY_LOCATIONr!   r   boolr   r"   r9   rE    r#   r   <module>rO      s    	   , , "			8	$ 5 , ! Ys  8H+= SV x| 8#c #c #  3M8W 83 8h 8,/8QUVYQZ8v +C)@:w :S :$':#&:EI#s(^:r#   