import io
import re
import unicodedata
import uuid
from typing import List, Optional
import chardet
import polars as pl
import sqlalchemy
from fastapi import FastAPI, File, HTTPException, UploadFile, status
from fastapi.responses import JSONResponse
from sqlalchemy.orm import Session
from src.apps.wine.producer.models.producer import Producer, ProducerKeyword
from src.apps.wine.wine.models.wine import (
    Wine,
    WineDb,
    WineDuplication,
    WineKeyword,
    WineNoise,
)
from src.utils.enums import Separator
from src.core.exceptions import APIException
from src.utils.helpers.functions import join_pattern
from concurrent.futures import ThreadPoolExecutor
from bs4 import BeautifulSoup
from collections import defaultdict
from sqlalchemy.orm import Session
from src.apps.wine.word_alias.models.word_alias import WordAlias
from src.apps.wine.bottle_size.models.bottle_size import BottleSize


def detect_encoding(file: UploadFile) -> str:
    raw = file.file.read(1024)
    file.file.seek(0)
    result = chardet.detect(raw)
    return result["encoding"] or "utf-8"

def cleanup_numeric_html(text: str) -> str:
    """Cleans up numeric HTML content by parsing and extracting text."""
    if text and text.strip():
        text = BeautifulSoup(f"<p>{text}</p>", "html.parser").get_text()
    return text

def replace_aliases(input_text: str, aliases_map: dict) -> str:
    """Replaces aliases in the input text based on the provided aliases map."""
    if not input_text or not input_text.strip():
        return None

    input_text = input_text.lower()
    regex_parse_prefix = r"(?i)(^|\b|\p{Punct}|\s)"
    regex_parse_suffix = r"(\s|\p{Punct}|\b|$)"
    str_space = " "

    for k, v in aliases_map.items():
        pattern = f"{regex_parse_prefix}{re.escape(k)}{regex_parse_suffix}"
        input_text = re.sub(pattern, f"{str_space}{v}{str_space}", input_text)
        input_text = re.sub(r"\s+", str_space, input_text)

    return input_text.strip()

def remove_size_info(input_text: str, bottle_sizes: list, bottle_exclusions: set) -> str:
    """Removes size information from the input text based on bottle sizes and exclusions."""
    if not input_text or not input_text.strip():
        return None

    input_text = input_text.lower()
    regex_parse_prefix = r"(?i)(^|\b|\p{Punct}|\s)"
    regex_parse_suffix = r"(\s|\p{Punct}|\b|$)"
    str_space = " "

    for bottle in bottle_sizes:
        if bottle not in bottle_exclusions:
            pattern = f"{regex_parse_prefix}{re.escape(bottle)}{regex_parse_suffix}"
            input_text = re.sub(pattern, str_space, input_text)

    return input_text.strip()

def remove_vintages(input_text: str, pattern: str = None) -> str:
    """Removes vintage years, NV patterns, and multiple spaces from the input text, with optional reserved patterns."""
    def extract_all_vintages(text):
        vintage_pattern = re.compile(r"(17|18|19|20)\d{2}")
        return vintage_pattern.findall(text) if text else []

    if input_text and input_text.strip():
        str_space = " "
        pattern_vintage_parser = re.compile(r"(^|\b|\p{Punct}|\s)(17|18|19|20)\d{2}(\s|\p{Punct}|\b|$)")
        pattern_nv_parser = re.compile(r"(?i)(^|\b|\p{Punct}|\s)n\.?v\.?(\s|\p{Punct}|\b|$)")
        pattern_multiple_space = re.compile(r"\s+")

        if pattern:
            reserved = extract_all_vintages(pattern)
            if reserved:
                to_be_removed = extract_all_vintages(input_text)
                for s in to_be_removed:
                    if s not in reserved:
                        input_text = input_text.replace(s, str_space)
            else:
                input_text = re.sub(pattern_vintage_parser, str_space, input_text)
        else:
            input_text = re.sub(pattern_vintage_parser, str_space, input_text)

        input_text = re.sub(pattern_nv_parser, str_space, input_text)
        input_text = re.sub(pattern_multiple_space, str_space, input_text)
        input_text = input_text.strip()

    return input_text

def cleanup(text: str) -> str:
    """Cleans up the input text by removing custom punctuation, normalizing spaces, and converting to lowercase."""
    if text and text.strip():
        str_blank = ""
        str_space = " "
        pattern_custom_punct_1 = re.compile(r"[!@#$%&?()\[\]{}⟨⟩<>“”\",:;*]")
        pattern_custom_punct_2 = re.compile(r"(?!\+)[./´`‘’'\-‒–—―]+")
        pattern_multiple_space = re.compile(r"\s+")

        # Remove accents if text is not ASCII
        if not text.isascii():
            text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')

        text = text.lower()
        text = re.sub(pattern_custom_punct_1, str_blank, text)
        text = re.sub(pattern_custom_punct_2, str_space, text)
        text = re.sub(pattern_multiple_space, str_space, text)
        text = text.strip()

    return text

async def map_aliases(db: Session) -> dict:
    """Maps aliases from the WordAlias model into a dictionary."""
    try:
        alias_map = {}

        # Fetch all word aliases with alias_type 'WINE'
        word_aliases = db.query(WordAlias).filter(WordAlias.alias_type == 'WINE').all()

        for word_alias in word_aliases:
            replacement = word_alias.word.lower() if word_alias.word else ""
            aliases = word_alias.alias.split(",") if word_alias.alias else []

            for alias in aliases:
                alias_lower = alias.lower()
                if alias_lower not in alias_map:
                    alias_map[alias_lower] = replacement

        # Sort the alias map by key length in descending order
        alias_map = dict(sorted(alias_map.items(), key=lambda item: -len(item[0])))

        return alias_map
    except Exception as e:
        raise APIException(
            module=__name__,
            error=str(e),
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            message="Error mapping aliases."
        )

async def map_bottle_aliases(db: Session) -> tuple:
    """Maps bottle aliases and returns bottle exclusions and bottle map."""
    try:
        bottle_exclusions = set()
        bottle_map = {}

        # Fetch all bottle sizes from the database
        bottles = db.query(BottleSize).filter(BottleSize.deleted_at.is_(None)).all()

        alias_map = {}
        bottle_sizes = []

        for bottle in bottles:
            # Process aliases
            aliases = bottle.alias.split(",") if bottle.alias else []
            for alias in aliases:
                alias_lower = alias.lower()
                alias_map[alias_lower] = bottle.name
                if alias_lower not in bottle_sizes:
                    bottle_sizes.append(alias_lower)

            # Process exclusions
            exclusions = bottle.exclusion.split(",") if bottle.exclusion else []
            for exclusion in exclusions:
                bottle_exclusions.add(exclusion.lower())

            # Map bottle ID to name
            bottle_map[bottle.name] = bottle.id

        # Sort bottle sizes by length in descending order
        bottle_sizes.sort(key=len, reverse=True)

        return bottle_exclusions, alias_map
    except Exception as e:
        raise APIException(
            module=__name__,
            error=str(e),
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            message="Error mapping bottle aliases."
        )

async def upload_wines_for_noises_service(
    db: Session, 
    file: UploadFile = File(...)
) -> any:
    try:
        if not file:
            raise APIException(
                module=__name__,
                error={},
                status_code=status.HTTP_400_BAD_REQUEST,
                message="Please select file to upload.",
            )

        encoding = detect_encoding(file)
        content = await file.read()
        buffer = io.BytesIO(content)
        buffer.seek(0)
        text = buffer.read().decode(encoding)
        lines = [line.strip() for line in text.splitlines() if line.strip()]
        wines_for_noises = []
        
        if lines and len(lines) > 1:
            
            wks = (
                db.query(
                    WineKeyword.id,
                    WineKeyword.pattern,
                    WineDb.literal.label("wine_db_literal"),
                )
                .join(WineDb, WineKeyword.wine_db_id == WineDb.id)
                .filter(WineKeyword.deleted_at.is_(None))
                .all()
            )
            
            wks_map = {wk[2]: wk[1] for wk in wks}
            
            aliases_map = await map_aliases(db)
            bottle_exclusions, bottle_aliases_map = await map_bottle_aliases(db=db)
            
            separator = Separator['TAB'].value

            output_list = lines

            def process_info(info):
                # print(f"=======Processing info: {info}")
                wine_info = info.split(separator) if info else []

                wk = wks_map.get(wine_info[0] if len(wine_info) > 0 else "")

                wine_keyword = wine_info[1] if len(wine_info) > 1 else ""
                wine_keyword = cleanup_numeric_html(wine_keyword)
                wine_keyword = replace_aliases(wine_keyword, aliases_map=aliases_map)
                wine_keyword = remove_size_info(wine_keyword, bottle_sizes=bottle_aliases_map, bottle_exclusions=bottle_exclusions)

                try:
                    if wk:
                        wine_keyword = remove_vintages(wine_keyword, wk)
                    else:
                        wine_keyword = remove_vintages(wine_keyword)
                except Exception as e:
                    print(f"Error removing vintages: {e}")

                wine_keyword = cleanup(wine_keyword)
                wine_keyword = replace_aliases(wine_keyword, aliases_map=aliases_map)

                return {
                    "wineDbId": wine_info[0] if len(wine_info) > 0 else None,
                    "description": wine_info[1] if len(wine_info) > 1 else None,
                    "pattern": wk,
                    "wineKeyword": wine_keyword,
                    "noises": []  
                }

            with ThreadPoolExecutor() as executor:
                wines_for_noises = list(executor.map(process_info, output_list))
                
            return wines_for_noises
        
        return {
            "message": "No data Found"
        }
    except Exception as e:
        raise APIException(
            module=__name__,
            error=str(e),
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            message="Error processing the uploaded file."
        )

