import os
import logging
from datetime import datetime
from typing import Optional, List, Dict, Any

from sqlalchemy.orm import Session

logger = logging.getLogger(__name__)

# Default location for crawl outputs
DEFAULT_CRAWL_OUT_LOCATION = "uploads/match_outputs"
DEFAULT_HISTORY_LOCATION = "uploads/history"
DEFAULT_OUTPUT_ENCODING = "UTF-8"

def is_valid_crawl_output(code: str, date: Optional[datetime], crawl_out_location: str = DEFAULT_CRAWL_OUT_LOCATION) -> bool:
    """
    Check if a valid crawl output file exists for the given code and date.
    
    Args:
        code: The crawl code
        date: The crawl date
        crawl_out_location: Path to the crawl outputs directory
        
    Returns:
        bool: True if the file exists, False otherwise
    """
    if code and date:
        try:
            # Format date string
            date_str = date.strftime("%Y-%m-%d")
            
            # Construct file path
            file_path = os.path.join(crawl_out_location, f"{code}-{date_str}.txt")
            print(f"Checking file: {file_path}")
            # Check if file exists
            return os.path.isfile(file_path)
            
        except Exception as e:
            logger.error(f"Error checking crawl output: {str(e)}")
    
    return False

def detect_charset(file_path: str) -> str:
    """
    Simple charset detection for files
    
    Args:
        file_path: Path to the file
        
    Returns:
        str: UTF-8 as default encoding
    """
    # Since we don't have chardet, we'll default to UTF-8
    # In a real implementation, you might want to add proper detection
    # or add chardet to requirements.txt
    return DEFAULT_OUTPUT_ENCODING

def load_retailer_output(db: Session, web_crawler: Any, date: datetime, 
                        crawl_out_location: str = DEFAULT_CRAWL_OUT_LOCATION) -> List[str]:
    """
    Load retailer output file content
    
    Args:
        db: Database session
        web_crawler: WebCrawler object
        date: Date of the crawl
        crawl_out_location: Path to the crawl outputs directory
        
    Returns:
        List[str]: List of output lines from retailer file
    """
    output_list = []
    if not web_crawler or not date:
        return output_list
    
    # try:
    # Format date as YYYY-MM-DD
    date_str = date.strftime("%Y-%m-%d")
    file_name = f"{web_crawler.code}-{date_str}.txt"
    file_path = os.path.join(crawl_out_location, file_name)
    
    if not os.path.isfile(file_path):
        print(f"[{web_crawler.code}-{date_str}] File not found.")
        return output_list
        
    # Detect file encoding or use the one specified in web_crawler
    file_encoding = web_crawler.output_encoding if hasattr(web_crawler, 'output_encoding') and web_crawler.output_encoding else detect_charset(file_path)
    
    print(f"[{web_crawler.code}-{date_str}] Loading crawler output...")
    
    try:
        with open(file_path, 'r', encoding=file_encoding) as file:
            # Skip the header line
            next(file, None)
            for line in file:
                line = line.strip()
                if line:
                    output_list.append(line)
    except UnicodeDecodeError:
        # Fallback to Latin-1 which can read any byte value
        print(f"[{web_crawler.code}-{date_str}] UTF-8 decode failed, trying with latin-1")
        with open(file_path, 'r', encoding='latin-1') as file:
            # Skip the header line
            next(file, None)
            for line in file:
                line = line.strip()
                if line:
                    output_list.append(line)
                
    print(f"[{web_crawler.code}-{date_str}] Loaded {len(output_list)} lines")
    # except Exception as e:
    #     logger.error(f"Error loading retailer output: {str(e)}")
        
    return output_list

def load_histories(db: Session, web_crawler: Any, 
                  history_location: str = DEFAULT_HISTORY_LOCATION, 
                  output_encoding: str = DEFAULT_OUTPUT_ENCODING) -> Dict[str, str]:
    """
    Load history matches
    
    Args:
        db: Database session
        web_crawler: WebCrawler object
        history_location: Path to history files
        output_encoding: Default encoding for history files
        
    Returns:
        Dict[str, str]: Map between wine name and wine ID
    """
    histories = {}
    if not web_crawler:
        return histories
        
    try:
        history_name = web_crawler.code if hasattr(web_crawler, 'code') else web_crawler.code
        print(f"[{web_crawler.code}] Loading history file [{history_name}] ...")
        
        file_name = f"{history_name}-history.txt"
        file_path = os.path.join(history_location, file_name)
        
        if not os.path.isfile(file_path):
            print(f"[{web_crawler.code}] History file not found: {file_path}")
            return histories
            
        # Use the specified encoding for history files
        try:
            with open(file_path, 'r', encoding=output_encoding) as file:
                for line in file:
                    parts = line.strip().split("||")
                    if len(parts) > 1:
                        wine_id = parts[0]
                        # Use the cleanup function from the matcher service
                        # We're assuming this is implemented elsewhere
                        from src.apps.wine_match.services.matcher_service import cleanup
                        wine_name = cleanup(parts[1])
                        histories[wine_name] = wine_id
        except UnicodeDecodeError:
            # Fallback to Latin-1 which can read any byte value
            print(f"[{web_crawler.code}] {output_encoding} decode failed, trying with latin-1")
            with open(file_path, 'r', encoding='latin-1') as file:
                for line in file:
                    parts = line.strip().split("||")
                    if len(parts) > 1:
                        wine_id = parts[0]
                        from src.apps.wine_match.services.matcher_service import cleanup
                        wine_name = cleanup(parts[1])
                        histories[wine_name] = wine_id

        print(f"[{web_crawler.code}] History record(s) loaded for [{history_name}]: {len(histories)}")
    except Exception as e:
        print(f"Error loading history: {str(e)}")
        
    return histories
