import logging
import os
import re
from datetime import datetime
from typing import List, Dict, Optional, Any, Set, Tuple

from sqlalchemy.orm import Session
from fastapi import HTTPException

from src.apps.match.enums import MatchedType, Status, Separator, LineTerminator
from src.apps.web_crawler.models.web_crawler import WebCrawler
from src.apps.match.models.crawl_property import CrawlProperty
from src.utils.enums import WineProperty
from src.apps.wine.wine_log.models.wine_log import MatchedLog
from src.apps.wine.wine.models.wine import Wine, WineKeyword, WineNoise, WineDb
from src.apps.wine.bottle_size.models.bottle_size import BottleSize
from src.apps.wine_match.schemas.matcher_schemas import (
    MatchInputSchema, 
    MatchOutputSchema,
    MatchResultSchema
)
from src.apps.wine_match.services.output_service import load_retailer_output, load_histories
from src.apps.wine_match.services.kw_match import (
    create_kw_match,
    set_match_score,
    get_matches_size,
    get_matches,
    get_first_match,
    get_score
)

logger = logging.getLogger(__name__)

# Constants
VINTAGE_MISSING = "vintage missing"
SIZE_MISSING = "size missing"
NOT_FOUND = "notfound"
DEFAULT_MATCH_TITLE = "Wine Id|Wine Name|Match|Errors|Bottle Size|Vintage|Price|Tax Status|URL|Retailer Description|Keyword String|History String|SKU"

# Global tracking of running matches using sets for better performance
RUNNING_KEYWORD_MATCHES = set()
RUNNING_HISTORY_MATCHES = set()

# Config values (these would typically come from environment or settings)
OUTPUT_LINE_TERMINATOR = LineTerminator.CRLF
OUTPUT_ENCODING = "UTF-8"
DEFAULT_BOTTLE_SIZE = "750ml"


# Functions for working with match data


def format_match_output(match_output: MatchOutputSchema, output_line: str, input_delimiter: str) -> str:
    """
    Format match output as a string
    
    Args:
        match_output: Match output data
        output_line: Original output line
        input_delimiter: Delimiter used in output line
        
    Returns:
        str: Formatted output string
    """
    # Use pipe as the output delimiter
    output_delimiter = Separator.PIPE.value
    
    # Construct the output
    output_parts = [
        match_output.wine_id or "",
        match_output.wine_name or "",
        match_output.match or "",
        match_output.errors or "",
        match_output.bottle_size or "",
        match_output.vintage or "",
        str(match_output.price) if match_output.price is not None else "",
        match_output.tax_status or "",
        match_output.url or "",
        match_output.description or "",
        match_output.keyword_string or "",
        match_output.history_string or "",
        match_output.sku or ""
    ]
    
    # Join with the output line
    base_output = output_delimiter.join(output_parts)
    
    # Add original output fields
    if output_line:
        original_parts = output_line.split(input_delimiter)
        if original_parts:
            base_output += output_delimiter + output_delimiter.join(original_parts)
            
    return base_output


# Simple scoring mechanism is now implemented in kw_match.py in a function-based approach


# Helper functions for text processing
def cleanup(text: str) -> str:
    """Clean up text by removing special characters and standardizing whitespace"""
    if not text:
        return ""
    
    # Convert to lowercase first
    text = text.lower()
    
    # Handle accented characters more comprehensively
    text = text.replace('é', 'e').replace('è', 'e').replace('ê', 'e').replace('ë', 'e')
    text = text.replace('à', 'a').replace('á', 'a').replace('â', 'a').replace('ä', 'a').replace('ã', 'a')
    text = text.replace('ù', 'u').replace('ú', 'u').replace('û', 'u').replace('ü', 'u')
    text = text.replace('ì', 'i').replace('í', 'i').replace('î', 'i').replace('ï', 'i')
    text = text.replace('ò', 'o').replace('ó', 'o').replace('ô', 'o').replace('ö', 'o').replace('õ', 'o')
    text = text.replace('ç', 'c').replace('ñ', 'n')
    text = text.replace('ý', 'y').replace('ÿ', 'y')
    
    # Handle punctuation marks consistently
    text = text.replace('-', ' ')  # Replace hyphens with spaces
    text = text.replace('–', ' ')  # Replace en-dash with spaces  
    text = text.replace('—', ' ')  # Replace em-dash with spaces
    text = text.replace('+', ' ')  # Replace plus signs with spaces
    text = text.replace('&', ' and ')  # Replace ampersands with 'and'
    text = text.replace("'", '')  # Remove apostrophes completely
    text = text.replace('"', '')  # Remove quotes
    text = text.replace('`', '')  # Remove backticks
    
    # Remove remaining special characters except alphanumeric and spaces
    text = re.sub(r'[^\w\s]', ' ', text)
    
    # Normalize spaces
    text = re.sub(r'\s+', ' ', text)
    return text.strip()


def cleanup_numeric_html(text: str) -> str:
    """Clean up HTML and numeric references"""
    if not text:
        return ""
    
    # Remove HTML entities and normalize spaces
    text = re.sub(r'&\w+;', ' ', text)
    text = re.sub(r'&#\d+;', ' ', text)
    return text


def parse_money(price_string: str) -> float:
    """Parse a price string into a decimal value"""
    if not price_string:
        return 0.0
    
    # Remove currency symbols and convert to float
    price_string = re.sub(r'[^\d.]', '', price_string)
    try:
        return float(price_string) if price_string else 0.0
    except ValueError:
        return 0.0


def parse_vintage(text: str) -> str:
    """Extract vintage year from text"""
    if not text:
        return ""
    
    # Look for 4-digit years that could be vintages (19xx or 20xx)
    matches = re.search(r'\b(19\d{2}|20\d{2})\b', text)
    if matches:
        return matches.group(1)
    
    # Check for NV (non-vintage)
    if re.search(r'\bNV\b', text, re.IGNORECASE):
        return "NV"
    
    return ""


# Matcher Service Functions
def add_to_running_matches(retailer_code: str, matched_type: MatchedType):
    """Add a retailer code to the running matches set"""
    if not retailer_code:
        return
        
    if matched_type == MatchedType.KEYWORD:
        RUNNING_KEYWORD_MATCHES.add(retailer_code)
    elif matched_type == MatchedType.HISTORY:
        RUNNING_HISTORY_MATCHES.add(retailer_code)


def add_retailer_codes_to_running_matches(retailer_codes: List[str], matched_type: MatchedType):
    """Add multiple retailer codes to running matches"""
    if not retailer_codes:
        return
        
    for code in retailer_codes:
        add_to_running_matches(code, matched_type)


def remove_from_running_matches(retailer_code: str, matched_type: MatchedType):
    """Remove a retailer code from running matches"""
    if not retailer_code:
        return
        
    if matched_type == MatchedType.KEYWORD:
        RUNNING_KEYWORD_MATCHES.discard(retailer_code)
    elif matched_type == MatchedType.HISTORY:
        RUNNING_HISTORY_MATCHES.discard(retailer_code)


def get_running_matches(matched_type: MatchedType) -> List[str]:
    """Get list of currently running matches"""
    if matched_type == MatchedType.KEYWORD:
        return list(RUNNING_KEYWORD_MATCHES)
    else:
        return list(RUNNING_HISTORY_MATCHES)


def is_terminated_match(retailer_code: str, matched_type: MatchedType) -> bool:
    """Check if a match has been terminated"""
    if not retailer_code:
        return False
        
    if matched_type == MatchedType.KEYWORD:
        return retailer_code not in RUNNING_KEYWORD_MATCHES
    elif matched_type == MatchedType.HISTORY:
        return retailer_code not in RUNNING_HISTORY_MATCHES
    
    return False


def clear_running_matches(matched_type: MatchedType):
    """Clear all running matches of a specific type"""
    if matched_type == MatchedType.KEYWORD:
        RUNNING_KEYWORD_MATCHES.clear()
    else:
        RUNNING_HISTORY_MATCHES.clear()


# Keyword Service Functions
def get_bottle_id(db: Session, bottle_size: str) -> Optional[str]:
    """Get the bottle ID from a bottle size string"""
    if not bottle_size:
        return None
        
    # Query for exact match
    result = db.query(BottleSize).filter(BottleSize.name == bottle_size).first()
    if result:
        return str(result.id)
        
    # Try to match against aliases
    results = db.query(BottleSize).all()
    for size in results:
        if size.alias:
            aliases = size.alias.lower().split(',')
            if bottle_size.lower() in aliases:
                return str(size.id)
    
    return None


def get_bottle_size(db: Session, bottle_id: str) -> Optional[str]:
    """Get bottle size name from ID"""
    if not bottle_id:
        return None
        
    try:
        result = db.query(BottleSize).filter(BottleSize.id == int(bottle_id)).first()
        return result.name if result else None
    except (ValueError, AttributeError):
        return None


def parse_bottle_size(db: Session, text: str) -> Optional[str]:
    """Extract bottle size information from text"""
    if not text:
        return None
        
    # Common bottle size patterns
    patterns = [
        r'(\d+\s*ml)', 
        r'(\d+\s*cl)',
        r'(\d+\.\d+\s*l)',
        r'(\d+\s*liter)',
        r'(\d+\s*l)'
    ]
    
    for pattern in patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            size = match.group(1).strip()
            # Normalize to standard format
            size = re.sub(r'\s+', '', size)
            
            # Check if this is a recognized size
            bottle_id = get_bottle_id(db, size)
            if bottle_id:
                return size
    
    return None


# Vintage Service Functions
def get_vintage_id(db: Session, vintage: str) -> Optional[str]:
    """Convert vintage string to ID format"""
    if not vintage:
        return None
        
    if vintage.lower() == "nv":
        return "nv0"  # Standard ID for non-vintage
        
    # Validate year format (19xx or 20xx)
    if re.match(r'^(19|20)\d{2}$', vintage):
        # Format as 3-character code (last 2 digits + 0 for standard bottle)
        return vintage[-2:] + "0"
        
    return None


def get_vintage(db: Session, vintage_id: str) -> Optional[str]:
    """Convert vintage ID to readable string"""
    if not vintage_id:
        return None
        
    if vintage_id == "nv0":
        return "NV"
        
    if len(vintage_id) == 3 and vintage_id[2] == "0":
        # Determine century (19xx or 20xx)
        year_digits = vintage_id[:2]
        century = "19" if year_digits > "30" else "20"  # Heuristic: if > 30, assume 19xx
        return century + year_digits
        
    return None


def save_matched_to_file(db: Session, code: str, file_encoding: str, content: str, date: datetime, is_keyword: bool):
    """Save matched results to a file"""
    if not code or not content:
        return
        
    # Determine file location
    output_dir = "uploads/match_outputs"  # Replace with actual config

    # Format date as YYYY-MM-DD
    date_str = date.strftime("%Y-%m-%d")
    
    # Determine file name based on match type
    match_type = "keyword" if is_keyword else "history"
    file_name = f"{code}-{date_str}-{match_type}.txt"
    file_path = os.path.join(output_dir, file_name)
    
    try:
        os.makedirs(os.path.dirname(file_path), exist_ok=True)
        with open(file_path, 'w', encoding=file_encoding) as file:
            file.write(content)
        logger.info(f"Match results saved to {file_path}")
    except Exception as e:
        logger.error(f"Error saving match results: {str(e)}")


# Matching Functions
def calculate_match(db: Session, search_text: str) -> tuple:
    """
    Calculate keyword matches for search text
    
    Args:
        db: Database session
        search_text: Text to match against
        
    Returns:
        tuple: A tuple containing matches data (matches dict, current score)
    """
    kw_match = create_kw_match()
    
    # In the original code, this would use parallel processing
    # Here we'll use a simpler sequential approach
    candidates = get_candidate_list(db, search_text)
    
    for wine_id in candidates:
        try:
            required_words = get_required_words(db, wine_id)
            pattern = get_pattern(db, wine_id)
            normalized_sorted_pattern = get_normalized_sorted_pattern(db, wine_id)
            normalized_pattern = get_normalized_pattern(db, wine_id)
            
            score = get_score(search_text, pattern, normalized_sorted_pattern, 
                             normalized_pattern, get_score(kw_match), required_words)
            
            kw_match = set_match_score(kw_match, wine_id, score)
        except Exception as e:
            logger.error(f"Error calculating match for {wine_id}: {str(e)}")
    
    return kw_match


def get_candidate_list(db: Session, search_text: str) -> List[str]:
    """Get list of candidate wine IDs based on search text"""
    # This would typically involve database queries
    # For now, return an empty list as placeholder
    return []


def get_required_words(db: Session, wine_id: str) -> List[str]:
    """Get required words for a wine ID"""
    # This would typically involve database queries
    # For now, return an empty list as placeholder
    return []


def get_pattern(db: Session, wine_id: str) -> str:
    """Get search pattern for a wine ID"""
    # This would typically involve database queries
    # For now, return an empty string as placeholder
    return ""


def get_normalized_sorted_pattern(db: Session, wine_id: str) -> str:
    """
    Get normalized and sorted pattern for a wine ID
    
    This function retrieves the normalized and sorted pattern for the given wine ID
    from the database. The pattern is used for matching wines based on keywords.
    
    Args:
        db: Database session
        wine_id: The ID of the wine to get the pattern for
        
    Returns:
        str: The normalized and sorted pattern
    """
    if not wine_id:
        return ""
    
    try:
        # Query the database for the pattern
        # This is a placeholder implementation - in a real system,
        # you would query the appropriate table
        
        # Example query (adjust to your actual schema):
        # result = db.query(WinePattern).filter(WinePattern.wine_id == wine_id).first()
        # if result:
        #     pattern = result.normalized_sorted_pattern
        #     return pattern or ""
        
        # For now, return an empty string as placeholder
        return ""
    except Exception as e:
        logger.error(f"Error getting normalized sorted pattern: {str(e)}")
        return ""


def get_normalized_pattern(db: Session, wine_id: str) -> str:
    """Get normalized pattern for a wine ID"""
    # This would typically involve database queries
    # For now, return an empty string as placeholder
    return ""


def get_score(search_text: str, pattern: str, normalized_sorted_pattern: str, 
              normalized_pattern: str, current_score: int, required_words: List[str]) -> int:
    """Calculate match score"""
    # Simple scoring algorithm - in practice this would be more sophisticated
    score = 0
    
    # Check if all required words are present
    if required_words:
        for word in required_words:
            if word.lower() not in search_text.lower():
                return 0  # Required word missing
    
    # Check for exact match
    if pattern and pattern.lower() == search_text.lower():
        return 100
        
    # Check for normalized match
    if normalized_pattern and normalized_pattern == cleanup(search_text):
        return 80
        
    # TODO: Implement more sophisticated scoring
    
    return score


def get_vintage_from_wine_id(db: Session, wine_id: str) -> Optional[str]:
    """Extract vintage information from wine ID"""
    if not wine_id or len(wine_id) < 14:
        return None
        
    vintage_id = wine_id[9:12]
    return get_vintage(db, vintage_id)


def get_bottle_id_from_wine_id(wine_id: str) -> Optional[str]:
    """Extract bottle size ID from wine ID"""
    if not wine_id or len(wine_id) < 14:
        return None
        
    return wine_id[12:]


def set_wine_names(db: Session, outputs: List[MatchOutputSchema]) -> List[MatchOutputSchema]:
    """
    Set wine names for keyword matches
    
    Args:
        db: Database session
        outputs: List of match outputs
        
    Returns:
        List[MatchOutputSchema]: Updated match outputs with wine names
    """
    if not outputs:
        return outputs
        
    # Filter for keyword matches without errors
    keyword_matches_indices = [
        i for i, o in enumerate(outputs) 
        if o.match == MatchedType.KEYWORD.value and not o.errors
    ]
    
    if not keyword_matches_indices:
        return outputs
        
    # Extract wine DB IDs
    ids = [outputs[i].wine_db_id for i in keyword_matches_indices if outputs[i].wine_db_id]
    if not ids:
        return outputs
        
    # Remove duplicates
    unique_ids = list(set(ids))
    
    # Query wine information
    wines = []
    for batch in [unique_ids[i:i+50] for i in range(0, len(unique_ids), 50)]:
        batch_wines = db.query(WineDb).filter(WineDb.id.in_(batch)).all()
        wines.extend(batch_wines)
    
    # Set wine names
    for idx in keyword_matches_indices:
        output = outputs[idx]
        for wine in wines:
            if wine.id == output.wine_db_id:
                updated_output = output.dict()
                updated_output["wine_name"] = wine.label
                outputs[idx] = MatchOutputSchema(**updated_output)
                break
                
    return outputs


def parse_input(db: Session, web_crawler, output_line: str) -> MatchInputSchema:
    """
    Parse crawler output as wine match input
    
    Args:
        db: Database session
        web_crawler: Web crawler instance
        output_line: Output line to parse
        
    Returns:
        MatchInputSchema: Parsed match input
    """
    if not web_crawler or not output_line:
        return MatchInputSchema()
    
    # Determine the separator to use
    separator = web_crawler.output_delimiter if hasattr(web_crawler, 'output_delimiter') else "\\|"
    
    # If the separator is a backslash-escaped character, convert it
    if separator == "\\|":
        separator = "|"
    elif separator == "TAB":
        separator = "\t"
    
    # Split the output line using the appropriate separator
    wine_info = output_line.split(separator)
    print(f"Wine info: {wine_info}")
    # Create match input
    match_input = MatchInputSchema()
    
    # try:
    # Extract vintage
    vintage = wine_info[int(web_crawler.vintage_index)].strip() if web_crawler.vintage_index is not None and int(web_crawler.vintage_index) < len(wine_info) else ""
    if vintage and vintage.upper() == "N.V.":
        vintage = "NV"
    
    # Validate vintage (must be 4 digits or NV)
    if not (len(vintage) == 4 or vintage.upper() == "NV"):
        vintage = ""
        
    # Extract size
    original_size_info = wine_info[int(web_crawler.bottle_size_index)].strip() if web_crawler.bottle_size_index is not None and int(web_crawler.bottle_size_index) < len(wine_info) else ""
    size = original_size_info if get_bottle_id(db, original_size_info) else ""
    
    # Extract price
    price_string = wine_info[int(web_crawler.price_index)].strip() if web_crawler.price_index is not None and int(web_crawler.price_index) < len(wine_info) else ""
    price = parse_money(price_string)
    match_input.price = price
    
    # Extract SKU
    sku = wine_info[int(web_crawler.sku_index)].strip() if web_crawler.sku_index is not None and int(web_crawler.sku_index) < len(wine_info) else ""
    match_input.sku = sku
    
    # Extract tax status
    tax_status = wine_info[int(web_crawler.tax_status_index)].strip() if web_crawler.tax_status_index is not None and int(web_crawler.tax_status_index) < len(wine_info) else ""
    match_input.tax_status = tax_status
    
    # Extract URL
    url = wine_info[int(web_crawler.url_index)].strip() if web_crawler.url_index is not None and int(web_crawler.url_index) < len(wine_info) else ""
    match_input.url = url
    
    # Process description
    desc_build = []
    if hasattr(web_crawler, 'list_description_indexes') and web_crawler.list_description_indexes:
        for idx in web_crawler.list_description_indexes:
            if idx is not None and idx < len(wine_info):
                text = wine_info[idx].strip()
                if text:
                    desc_build.append(text)
    
    match_input.description = " ".join(desc_build)
    print(f"Description: {match_input.description}")
    # History input
    h_input = []
    print(f"History indexes: {web_crawler.list_history_indexes if hasattr(web_crawler, 'list_history_indexes') else 'None'}")
    if hasattr(web_crawler, 'list_history_indexes') and web_crawler.list_history_indexes:
        for idx in web_crawler.list_history_indexes:
            if idx is not None and idx < len(wine_info):
                text = wine_info[idx].strip()
                if text:
                    h_input.append(text)
    
    history_input = "".join(h_input)
    match_input.history_text = cleanup(history_input)
    match_input.original_history_text = "".join(h_input).strip()
    print(f"History input: {match_input.history_text}")
    print(f"Original history input: {match_input.original_history_text}")
    # Keyword input
    k_input = []
    if hasattr(web_crawler, 'list_keyword_indexes') and web_crawler.list_keyword_indexes:
        for idx in web_crawler.list_keyword_indexes:
            if idx is not None and idx < len(wine_info):
                text = wine_info[idx].strip()
                if text:
                    k_input.append(text)
    
    match_input.original_keyword_text = " ".join(k_input).strip()
    print(f"Keyword input: {match_input.original_keyword_text}")
    matches_input = " ".join(k_input)
    matches_input = cleanup_numeric_html(matches_input)
    # In the original code, these would call keywordsService methods
    # matches_input = replace_aliases(matches_input)
    # matches_input = remove_size_info(matches_input)
    matches_input = cleanup(matches_input)
    # matches_input = replace_aliases(matches_input)
    
    match_input.keyword_text = matches_input.strip()
    print(f"Keyword text: {match_input.keyword_text}")
    # Vintage parser - search from name/desc if not found
    if not vintage:
        vintage = parse_vintage(match_input.original_keyword_text)
        
    # Use NV if config default is NV
    if not vintage and hasattr(web_crawler, 'vintage_default_nv') and web_crawler.vintage_default_nv:
        vintage = "NV"
        
    match_input.vintage = vintage or ""
    
    # Bottle size parser - search from description if not found
    if not size:
        size = parse_bottle_size(db, original_size_info)
        if not size:
            size = parse_bottle_size(db, match_input.description)
            
    # Use default bottle size if config doesn't default to blank
    if not size and hasattr(web_crawler, 'bottle_size_default_blank') and not web_crawler.bottle_size_default_blank:
        size = DEFAULT_BOTTLE_SIZE
        
    match_input.size = size or ""
    
    bottle_id = get_bottle_id(db, size)
    if bottle_id:
        match_input.bottle_size_id = bottle_id
            
    # except Exception as e:
    #     logger.error(f"Error parsing input: {str(e)}")
        
    return match_input


def execute(db: Session, retailer_code: str, run_keyword: bool, date: datetime = None):
    """Execute match processing for a retailer code"""
    web_crawler = db.query(WebCrawler).filter(WebCrawler.code == retailer_code).first()
    if not web_crawler:
        raise RuntimeError("Retailer not found")
    
    # Continue with the web_crawler object
    execute_with_crawler(db, web_crawler, run_keyword, date)


def execute_with_crawler(db: Session, web_crawler, run_keyword: bool, date: datetime = None):
    """Execute match processing for a WebCrawler instance"""
    if not web_crawler:
        raise RuntimeError("Retailer not found")
        
    # Default to current date if not provided
    if not date:
        date = datetime.now()
        
    start_time = datetime.now()
    
    # Format for logging and filenames
    date_str = date.strftime("%Y-%m-%d")
    code = web_crawler.code
    file_name = f"{code}-{date_str}"
    logger.info(f"[{file_name}] Matching process started")
    
    # Set up initial counters
    history_matches = 0
    keyword_matches = 0
    ambiguous_matches = 0
    unknown_matches = 0
    raw_wines_size = 0
    
    # Determine input delimiter and file encoding
    input_delimiter = web_crawler.output_delimiter if hasattr(web_crawler, 'output_delimiter') else "\\|"
    file_encoding = web_crawler.output_encoding if hasattr(web_crawler, 'output_encoding') and web_crawler.output_encoding else OUTPUT_ENCODING
    
    # Get output titles
    output_titles = []
    if hasattr(web_crawler, 'output_title') and web_crawler.output_title:
        # Handle both Java-style backslash-escaped pipes and regular pipes
        delimiter_to_split = input_delimiter
        if delimiter_to_split == "\\|":
            delimiter_to_split = "|"
        output_titles = web_crawler.output_title.split(delimiter_to_split)
        
    if not output_titles:
        # Query from crawl properties
        crawl_props = db.query(
            CrawlProperty.wine_property, 
            CrawlProperty.custom_label
        ).filter(
            CrawlProperty.web_crawler_id == web_crawler.id,
            CrawlProperty.data_output == True
        ).order_by(CrawlProperty.property_index).all()
        
        for prop in crawl_props:
            if prop[1]:  # custom label
                output_titles.append(prop[1])
            else:
                try:
                    wine_prop = WineProperty[prop[0]]
                    output_titles.append(wine_prop.value)
                except (KeyError, AttributeError) as e:
                    logger.warning(f"Invalid wine property: {prop[0]}, error: {str(e)}")
                    output_titles.append("Unknown")
    
    status = Status.RUNNING
    message = ""
    
    try:
        # Load retailer output
        output_list = load_retailer_output(db, web_crawler, date)
        if output_list:
            raw_wines_size = len(output_list)
            
            # Load history matches
            history_matches_dict = load_histories(db, web_crawler)
            # Print all history matches for debugging
            if history_matches_dict:
                print("History Matches Dictionary:")
                for key, value in history_matches_dict.items():
                    print(f"  {key}: {value}")
            else:
                print("History Matches Dictionary is empty")
            
            # Process each wine
            counter = 0
            log_interval = max(1, raw_wines_size // 2)
            
            results = []
            # Process each wine
            results = []
            output_lines = {}  # Keep track of original output lines
            
            for output_line in output_list:
                match_input = parse_input(db, web_crawler, output_line)
                # Create match output directly
                match_output = MatchOutputSchema(
                    wine_id="",
                    wine_db_id="",
                    wine_name="",
                    match=MatchedType.UNKNOWN.value,
                    errors="",
                    bottle_size="",
                    vintage=match_input.vintage,
                    price=match_input.price,
                    tax_status=match_input.tax_status,
                    url=match_input.url,
                    description=match_input.description,
                    keyword_string=match_input.keyword_text,
                    history_string=match_input.original_history_text,
                    sku=match_input.sku
                )
                print(f"Match input: {match_input}")
                output_lines[id(match_output)] = output_line  # Store original output line
                bottle_id = match_input.bottle_size_id
                
                # HISTORY MATCH
                print(f"History string: {match_output.history_string}")
                history_id = ""
                # Normalize history string for comparison using the same cleanup function
                if match_output.history_string:
                    history_string_normalized = cleanup(match_output.history_string)
                    for key, value in history_matches_dict.items():
                        key_normalized = cleanup(key)
                        if key_normalized == history_string_normalized:
                            history_id = value
                            break
                print(f"History ID: {history_id}")
                if history_id:
                    # Create updated match output with history match data
                    match_output_dict = match_output.dict()
                    match_output_dict["wine_id"] = history_id
                    match_output_dict["match"] = MatchedType.HISTORY.value
                    match_output_dict["wine_name"] = ""
                    
                    # Get vintage from ID
                    if not match_output.vintage:
                        vintage = get_vintage_from_wine_id(db, history_id)
                        if vintage:
                            match_output_dict["vintage"] = vintage
                    
                    # Update match output with new data
                    match_output = MatchOutputSchema(**match_output_dict)
                            
                    # Get bottle size from ID
                    history_bottle_id = get_bottle_id_from_wine_id(history_id)
                    if history_bottle_id:
                        bottle_id = history_bottle_id
                elif run_keyword:  # KEYWORD MATCH
                    # Create a mutable dictionary from match output
                    match_output_dict = match_output.dict()
                    
                    if match_output.vintage:
                        match_result = calculate_match(db, match_output.keyword_string)
                        
                        if get_matches_size(match_result) == 1:  # single match
                            match_output_dict["match"] = MatchedType.KEYWORD.value
                            
                            if not bottle_id:  # size missing
                                match_output_dict["errors"] = SIZE_MISSING
                            else:
                                wine_db_id = get_first_match(match_result) or ""
                                wine_exists = db.query(WineDb).filter(WineDb.id == wine_db_id).count() > 0
                                
                                if wine_exists:
                                    vintage_id = get_vintage_id(db, match_output.vintage) or ""
                                    match_output_dict["wine_db_id"] = wine_db_id
                                    match_output_dict["wine_id"] = f"{wine_db_id}{vintage_id}{bottle_id}"
                                else:  # wine not found
                                    match_output_dict["errors"] = NOT_FOUND
                                    logger.warning(f"WineDB not found: {wine_db_id}")
                        elif get_matches_size(match_result) > 1:  # ambiguous
                            match_output_dict["match"] = MatchedType.KEYWORD.value
                            match_output_dict["errors"] = MatchedType.AMBIGUOUS.value
                            match_output_dict["wine_id"] = ",".join(get_matches(match_result))
                        else:  # unknown
                            match_output_dict["match"] = MatchedType.UNKNOWN.value
                            match_output_dict["errors"] = MatchedType.UNKNOWN.value
                    else:
                        # vintage missing
                        match_output_dict["match"] = MatchedType.UNKNOWN.value
                        match_output_dict["errors"] = VINTAGE_MISSING
                        
                    # Update match output with new data
                    match_output = MatchOutputSchema(**match_output_dict)
                
                # Set bottle size
                if bottle_id:
                    bottle = get_bottle_size(db, bottle_id)
                    if bottle:
                        match_output_dict = match_output.dict()
                        match_output_dict["bottle_size"] = bottle
                        match_output = MatchOutputSchema(**match_output_dict)
                
                counter += 1
                if counter % log_interval == 0:
                    print(f"[{file_name}] Processed wines: {counter} of {raw_wines_size}")
                
                results.append(match_output)
            
            print(f"[{file_name}] Processed wines: {counter} of {raw_wines_size}")
            
            # Prepare output
            output_delimiter = Separator.PIPE.value  # Always use PIPE as delimiter for rollup
            line_terminator = OUTPUT_LINE_TERMINATOR.value
            
            # Build output content
            output_builder = []
            output_builder.append(output_delimiter.join(DEFAULT_MATCH_TITLE.split("\\|")))
            output_builder.append(output_delimiter)
            output_builder.append(output_delimiter.join(output_titles))
            output_builder.append(line_terminator)
            
            # Set wine names and count match types
            set_wine_names(db, results)
            
            for result in results:
                if result.match == MatchedType.HISTORY.value:
                    history_matches += 1
                elif result.match == MatchedType.KEYWORD.value and not result.errors:
                    keyword_matches += 1
                elif result.errors == MatchedType.AMBIGUOUS.value:
                    ambiguous_matches += 1
                else:
                    unknown_matches += 1
                    
                original_line = output_lines.get(id(result), "")
                formatted_output = format_match_output(result, original_line, input_delimiter)
                output_builder.append(formatted_output)
                output_builder.append(line_terminator)
            
            # Save output to file
            save_matched_to_file(db, code, file_encoding, "".join(output_builder), date, run_keyword)
        
        status = Status.SUCCESS
        
    except Exception as e:
        logger.error(f"Error executing match: {str(e)}", exc_info=True)
        status = Status.FAILED
        message = str(e)
        
    # Record match log
    try:
        match_log = MatchedLog(
            code=code,
            process_keyword=run_keyword,
            status=status,
            message=message,
            date_start=start_time,
            date_end=datetime.now(),
            history_matched=history_matches,
            keyword_matched=keyword_matches,
            ambiguous_matched=ambiguous_matches,
            unknown_wines=unknown_matches
        )
        db.add(match_log)
        db.commit()
    except Exception as e:
        logger.error(f"Error saving match log: {str(e)}")
        db.rollback()
    
    # Log completion
    end_time = datetime.now()
    execution_time = (end_time - start_time).total_seconds()
    match_name = "Keyword" if run_keyword else "History"
    
    logger.info(
        f"[{file_name}] {match_name} match done. "
        f"Total data: {raw_wines_size}, History: {history_matches}, "
        f"Keywords: {keyword_matches}, Ambiguous: {ambiguous_matches}, "
        f"Unknowns: {unknown_matches}. "
        f"Execution time: {execution_time} seconds"
    )
