import logging
import os
import re
import unicodedata
from collections import defaultdict
from collections.abc import Set
from typing import Any, Dict, List, Optional

import chardet
from bs4 import BeautifulSoup
from fastapi import HTTPException, status
from sqlalchemy import Integer, and_, cast, func, or_
from sqlalchemy.orm import Session

from src.apps.files.models.cdn import CDN
from src.apps.files.models.file import File
from src.apps.match.enums import MatchedType
from src.apps.web_crawler.models.web_crawler import WebCrawler, WebCrawlerFiles
from src.apps.wine.bottle_size.models.bottle_size import BottleSize
from src.apps.wine.producer.models.producer import Producer, ProducerNoise
from src.apps.wine.retailer.services.retailer import get_all_retailers
from src.apps.wine.validation.models.validation import WorkGroup, WorkGroupWebCrawler
from src.apps.wine.validation.schemas.validation import SaveMatchedRequest
from src.apps.wine.vintage.models.vintage import Vintage
from src.apps.wine.wine.models.wine import Wine, WineDb, WineKeyword, WineNoise
from src.apps.wine_match.models.wine_match import WineMatch
from src.core.config import settings
from src.core.exceptions import APIException
from src.utils.constants import API_PREFIXES
from src.utils.enums import WebCrawlerFileType
from src.utils.pagination import QueryPaginator

MAX_RECORDS_PER_LOAD = 1000
REGEX_PARSE_PREFIX = r"(?i)(^|\b|[^\w\s]|\s)"
REGEX_PARSE_SUFFIX = r"(\s|[^\w\s]|\b|$)"
STR_SPACE = " "
STR_BLANK = ""

# Compile regex equivalents
PATTERN_VINTAGE_PARSER = re.compile(r"(^|\b|\W)(17|18|19|20)\d{2}(\W|\b|$)")
PATTERN_NV_PARSER = re.compile(r"(^|\b|\W)n\.?v\.?(\W|\b|$)", re.IGNORECASE)
PATTERN_CUSTOM_PUNCT_1 = re.compile(r"[!@#$%&?()\[\]{}⟨⟩<>“”\",:;*]")  # remove
PATTERN_CUSTOM_PUNCT_2 = re.compile(r"(?!\+)[./´`‘’'\-‒–—―]+")  # replace with space
PATTERN_MULTIPLE_SPACE = re.compile(r"\s+")


async def populate_retailers(db: Session, codes: List[str] = []):
    # Remove "all"
    if "all" in codes:
        retailers = await get_all_retailers(db=db, paginated=False)
        codes = [r.code for r in retailers]

    # Fetch group names
    groups = db.query(WorkGroup.name).order_by(WorkGroup.name.asc()).all()
    groups = [g[0] for g in groups]

    joined_list: List[str] = []

    for ret in codes:
        if ret in groups:
            # Fetch associated webCrawler codes
            rts = (
                db.query(WorkGroupWebCrawler.webCrawlerCode)  # adjust attribute name
                .join(WorkGroup)
                .filter(WorkGroup.name == ret)
                .all()
            )
            rts = [r[0] for r in rts]
            if rts:
                joined_list.extend(rts)
        else:
            joined_list.append(ret)

    # Deduplicate
    joined_list = list(set(joined_list))

    return joined_list


async def detect_encoding(file_path: str) -> str:
    # with open(file_path, "rb") as f:
    #     result = chardet.detect(f.read(1024))
    # return result["encoding"] or "utf-8"
    return "utf-8"


async def get_matched_file_name(db: Session, crawler: WebCrawler, matched_type: str) -> str:
    last_file = (
        db.query(WebCrawlerFiles)
        .filter(
            WebCrawlerFiles.web_crawler_id == crawler.id,
            WebCrawlerFiles.file_type == WebCrawlerFileType.HISTORY,
            WebCrawlerFiles.deleted_at.is_(None),
        )
        .order_by(WebCrawlerFiles.created_at.desc())
        .first()
    )

    if not last_file:
        raise APIException(
            module="Validation",
            error={"file": "No file found for this Web Crawler"},
            status_code=status.HTTP_404_NOT_FOUND,
            message="No file found for this Web Crawler",
        )

    file = db.query(File).filter(File.id == last_file.file_id).first()
    if not file:
        raise APIException(
            module="Validation",
            error={"file": "File not found"},
            status_code=status.HTTP_404_NOT_FOUND,
            message="File not found",
        )

    file_path = file.path
    return file_path
    # return f"./output/{retailer_code}_{matched_type}.csv"  # example path


async def find_webcrawler_by_code(db: Session, code: str) -> Optional[WebCrawler]:
    crawler = db.query(WebCrawler).filter(WebCrawler.code == code).first()
    if crawler:
        return crawler
    return None


async def load_matched_results_by_code(db: Session, retailer_code: str, matched_type: MatchedType) -> List[str]:
    crawler = await find_webcrawler_by_code(db, retailer_code)
    if not crawler:
        return []
    return await load_matched_results(db, crawler, matched_type)


async def load_matched_results(db: Session, webcrawler: WebCrawler, matched_type: MatchedType) -> List[str]:
    output_list = []
    try:
        file_path = await get_matched_file_name(db, webcrawler, matched_type)
        if not os.path.exists(file_path):
            raise APIException(
                module="Validation",
                error={"file": f"File not found: {file_path}"},
                status_code=status.HTTP_404_NOT_FOUND,
                message=f"File not found: {file_path}",
            )

        if not os.access(file_path, os.R_OK):
            raise APIException(
                module="Validation",
                error={"file": f"No permission to read file: {file_path}"},
                status_code=status.HTTP_404_NOT_FOUND,
                message=f"No permission to read file: {file_path}",
            )

        file_encoding = await detect_encoding(file_path)
        with open(file_path, "r", encoding=file_encoding) as f:
            lines = f.readlines()
            output_list = [line.strip() for line in lines[1:] if line.strip()]  # Skip header

    except Exception as e:
        print(f"Error loading matched results: {e}")

    return output_list


def get_wine_properties_for_match_validation(
    db: Session, wine_id: str = "", wine_db_id: str = ""
) -> Optional[List[Any]]:
    """
    Equivalent of Grails getWinePropertiesForMatchValidation
    Returns a list of wine properties or None
    """

    # --- First, try Wine table ---
    wine = (
        db.query(
            WineDb.color,
            WineDb.type,
            WineDb.sweetness,
            WineDb.variety,
            WineDb.note,
            WineDb.name,
            WineDb.wine_country,
            WineDb.wine_region,
            WineDb.wine_location,
            WineDb.wine_locale,
            WineDb.wine_site,
            Producer.name_show,
            WineKeyword.pattern,
            # Wine.wine_db.color,
            # Wine.wine_db.type,
            # Wine.wine_db.sweetness,
            # Wine.wine_db.variety,
            # Wine.wine_db.note,
            # Wine.wine_db.name,
            # Wine.wine_db.wine_country,
            # Wine.wine_db.wine_region,
            # Wine.wine_db.wine_location,
            # Wine.wine_db.wine_locale,
            # Wine.wine_db.wine_site,
            # Producer.name_show,
            # WineKeyword.pattern,
            # Wine.maturity,
            # Wine.literal,
        )
        .join(WineDb.producer)
        .outerjoin(WineDb.wine_keyword)
        .join(Wine, Wine.wine_db_id == WineDb.id)
        # .join(Wine.wine_db.producer)
        # .outerjoin(Wine.wine_db.wine_keyword)
        .filter(Wine.literal == wine_id)
        .limit(1)
        .first()
    )

    if wine:
        return list(wine)

    # --- Else, try WineDb table ---
    wine = (
        db.query(
            WineDb.color,
            WineDb.type,
            WineDb.sweetness,
            WineDb.variety,
            WineDb.note,
            WineDb.name,
            WineDb.wine_country,
            WineDb.wine_region,
            WineDb.wine_location,
            WineDb.wine_locale,
            WineDb.wine_site,
            Producer.name_show,
            WineKeyword.pattern,
        )
        .join(WineDb.producer)
        .outerjoin(WineDb.wine_keyword)
        .filter(WineDb.literal == wine_db_id)
        .limit(1)
        .first()
    )

    if wine:
        wine_list = list(wine)
        # Add 2 blanks to match original Grails behavior
        wine_list.append("")
        wine_list.append("")
        return wine_list

    return None


def concat_name(producer: str, label: str) -> str:
    if producer and label:
        return f"{producer} {label}"
    return producer or label or ""


def cleanup_numeric_html(text: str) -> str:
    if text and text.strip():
        soup = BeautifulSoup(f"<p>{text}</p>", "html.parser")
        return soup.get_text()
    return text


def replace_aliases(input_text: Optional[str]) -> Optional[str]:
    aliases_map = {
        # Example: "cab": "cabernet sauvignon",
        # "chard": "chardonnay",
    }

    if not input_text or not input_text.strip():
        return None

    input_text = input_text.lower()

    for k, v in aliases_map.items():
        pattern = REGEX_PARSE_PREFIX + re.escape(k) + REGEX_PARSE_SUFFIX
        input_text = re.sub(pattern, STR_SPACE + v + STR_SPACE, input_text)
        input_text = re.sub(r"\s+", STR_SPACE, input_text)

    return input_text.strip()


def remove_size_info(db: Session, input_text: Optional[str]) -> Optional[str]:
    if not input_text or not input_text.strip():
        return None

    input_text = input_text.lower()

    bottle_sizes = db.query(BottleSize.name).all()
    bottle_sizes = [b[0].lower() for b in bottle_sizes if b[0]]
    bottle_exclusions: Set[str] = {""}
    for bottle in bottle_sizes:
        if bottle not in bottle_exclusions:
            pattern = REGEX_PARSE_PREFIX + re.escape(bottle) + REGEX_PARSE_SUFFIX
            input_text = re.sub(pattern, STR_SPACE, input_text)

    return input_text.strip()


def remove_vintages(input_text: Optional[str]) -> Optional[str]:
    if input_text and input_text.strip():
        input_text = PATTERN_VINTAGE_PARSER.sub(STR_SPACE, input_text)
        input_text = PATTERN_NV_PARSER.sub(STR_SPACE, input_text)
        input_text = PATTERN_MULTIPLE_SPACE.sub(STR_SPACE, input_text)
        input_text = input_text.strip()
    return input_text


def is_ascii(s: str) -> bool:
    try:
        s.encode("ascii")
        return True
    except UnicodeEncodeError:
        return False


def strip_accents(s: str) -> str:
    return "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")


def cleanup(text: Optional[str]) -> Optional[str]:
    if not text or not text.strip():
        return text

    if not is_ascii(text):
        text = strip_accents(text)

    text = text.lower()
    text = PATTERN_CUSTOM_PUNCT_1.sub(STR_BLANK, text)
    text = PATTERN_CUSTOM_PUNCT_2.sub(STR_SPACE, text)
    text = PATTERN_MULTIPLE_SPACE.sub(STR_SPACE, text)
    text = text.strip()

    return text


async def parse_keyword_match(
    db: Session,
    retailer_code: str,
    matched_filter: str,
    is_narrow: bool,
    wine_info: List[str],
):
    """
    Equivalent to Groovy parseKeywordMatch
    """

    match_type = wine_info[2].strip() if len(wine_info) > 2 and wine_info[2] else "keyword"
    error_string = wine_info[3].strip() if len(wine_info) > 3 and wine_info[3] else None
    print(
        f"Match type: {match_type}, Error string: {error_string} for wine info: {wine_info} and retailer: {retailer_code} and is_narrow: {is_narrow}"
    )
    # Validation logic
    print(not is_narrow and match_type and match_type.lower() == "keyword" and not error_string)
    print(is_narrow and (not match_type or match_type.lower() != "history"))
    # is_valid_output = (
    #     (not is_narrow and match_type and match_type.lower() == "keyword" and not error_string)
    #     or (is_narrow and (not match_type or match_type.lower() != "history"))
    # )

    # if not is_valid_output:
    # return None

    result: Dict[str, Any] = {}
    result["retailerCode"] = retailer_code

    # Process ID
    wine_alert_id = wine_info[0].strip() if len(wine_info) > 0 and wine_info[0] else ""
    result["wineAlertId"] = wine_alert_id
    result["id"] = wine_alert_id[:12] if wine_alert_id and wine_alert_id != "?" else ""
    result["wineDbId"] = wine_alert_id[:9] if wine_alert_id and wine_alert_id != "?" else ""

    result["bottleSize"] = wine_info[4].strip() if len(wine_info) > 4 else None

    vintage = wine_info[5].strip() if len(wine_info) > 5 else None
    if vintage and vintage.lower() == "n.v.":
        vintage = "NV"
    result["vintage"] = vintage

    result["price"] = wine_info[6].strip() if len(wine_info) > 6 else None
    result["taxStatus"] = wine_info[7].strip() if len(wine_info) > 7 else None
    result["url"] = wine_info[8].strip() if len(wine_info) > 8 else None
    result["description"] = wine_info[9].strip() if len(wine_info) > 9 else None

    result["historyString"] = wine_info[11].strip() if len(wine_info) > 11 else None
    result["sku"] = wine_info[12].strip() if len(wine_info) > 12 else None
    # Fetch wine properties
    wine = get_wine_properties_for_match_validation(db, result["id"], result["wineDbId"])

    if matched_filter.lower() == "rp wines" and not (wine and wine[14]):
        return None

    if matched_filter.lower() == "non rp wines" and (wine and wine[14]):
        return None

    # Process keyword string
    keyword_string = wine_info[10] if len(wine_info) > 10 else ""
    result["originalKeywordString"] = keyword_string

    keyword_string = cleanup_numeric_html(keyword_string)
    keyword_string = replace_aliases(keyword_string)  # aliases before cleanup
    keyword_string = remove_size_info(db, keyword_string)
    # keyword_string = remove_vintages(keyword_string, wine[12] if wine else None)
    keyword_string = remove_vintages(keyword_string)
    keyword_string = cleanup(keyword_string)
    keyword_string = replace_aliases(keyword_string)  # aliases after cleanup
    result["keywordString"] = keyword_string

    # Wine properties mapping
    if wine:
        result["maturity"] = wine[13]
        result["mongoId"] = wine[14]
        result["color"] = wine[0]
        result["wineType"] = wine[1]
        result["sweetness"] = wine[2]
        result["variety"] = wine[3]
        result["waNote"] = wine[4]
        result["wineName"] = concat_name(wine[11], wine[5])
        result["country"] = wine[6]
        result["region"] = wine[7]
        result["location"] = wine[8]
        result["locale"] = wine[9]
        result["site"] = wine[10]
        result["pattern"] = wine[12]

    result["wineNoises"] = []

    return result


async def get_keyword_matches(db: Session, retailer_code: str, matched_filter: str, is_narrow: bool, offset: int = 0):
    kw_matched = []
    offsets = []

    if not retailer_code:
        return {"offsets": offsets, "matches": kw_matched}

    output = await load_matched_results_by_code(db, retailer_code, MatchedType.KEYWORD)
    if not output:
        return {"offsets": offsets, "matches": kw_matched}
    # Pagination logic
    if offset:
        output = output[offset : offset + MAX_RECORDS_PER_LOAD]
    else:
        total_rows = len(output)
        if total_rows > MAX_RECORDS_PER_LOAD:
            output = output[:MAX_RECORDS_PER_LOAD]
            total_pages = (total_rows + MAX_RECORDS_PER_LOAD - 1) // MAX_RECORDS_PER_LOAD
            offsets = [i * MAX_RECORDS_PER_LOAD for i in range(1, total_pages)]

    for line in output:
        wine_info = line.split("|")
        match = await parse_keyword_match(db, retailer_code, matched_filter, is_narrow, wine_info)
        if match:
            kw_matched.append(match)

    return {"offsets": offsets, "matches": kw_matched}


async def producer_noise(db: Session, producer_id: str):
    noises = (
        db.query(ProducerNoise.noise)
        .join(Producer, ProducerNoise.producer_id == Producer.id)
        .filter(Producer.literal == producer_id)
        .all()
    )

    # Flatten and lowercase
    noise_list = [n[0].lower() for n in noises if n[0]]

    return noise_list


async def wine_noise(db: Session, wine_db_id: int):
    noises = (
        db.query(WineNoise.noise)
        .join(WineDb, WineNoise.wine_db_id == WineDb.id)
        .filter(WineDb.literal == wine_db_id)
        .filter(WineNoise.noise.isnot(None))
        .filter(WineNoise.noise != "")
        .all()
    )

    noise_list = [n[0].lower() for n in noises if n[0]]

    return noise_list


async def is_not_blank(s: str | None) -> bool:
    """
    Returns True if the given string is not None, not empty, and not just whitespace.
    """
    return bool(s and s.strip())


async def save_matched(db: Session, payload: SaveMatchedRequest):
    is_xorz = (payload.historyType or "").lower() in ("defer", "insufficient")

    # Lookup WineDb
    wine_db = None
    if await is_not_blank(payload.wineDbId):
        wine_db = db.query(WineDb).filter(WineDb.literal == payload.wineDbId).first()
        if not wine_db and not is_xorz:
            raise APIException(
                module="Validation", error={}, status_code=status.HTTP_404_NOT_FOUND, message="WineDB not found."
            )

    # Lookup WebCrawler
    web_crawler = db.query(WebCrawler).filter_by(code=payload.wineRetailer).first()
    if not web_crawler:
        raise APIException(
            module="Validation", error={}, status_code=status.HTTP_404_NOT_FOUND, message="Retailer not found."
        )

    # Lookup Vintage
    vintage = None
    if await is_not_blank(payload.wineVintage):
        vintage = db.query(Vintage).filter_by(name=payload.wineVintage).first()
        if not vintage and not is_xorz:
            raise APIException(
                module="Validation", error={}, status_code=status.HTTP_404_NOT_FOUND, message="Vintage not found."
            )

    # Lookup BottleSize
    bottle_size = None
    if await is_not_blank(payload.wineBottleSize):
        bottle_size = db.query(BottleSize).filter_by(name=payload.wineBottleSize).first()
        if not bottle_size and not is_xorz:
            raise APIException(
                module="Validation", error={}, status_code=status.HTTP_404_NOT_FOUND, message="Bottle Size not found."
            )

    # Construct wineAlertId
    wine_alert_id = None
    if wine_db and vintage and bottle_size:
        wine_alert_id = f"{payload.wineDbId}{vintage.id}{bottle_size.id}"

    # Find or create Matched
    matched: WineMatch = (
        db.query(WineMatch)
        .filter_by(wine_alert_id=wine_alert_id, description=payload.wineHistory, web_crawler_id=web_crawler.id)
        .first()
    )

    if not matched:
        matched: WineMatch = WineMatch(
            wine_alert_id=wine_alert_id,
            name=payload.wineHistory,
            description=payload.wineHistory,
            web_crawler_id=web_crawler.id,
        )
        print(f"Creating new Matched: {matched}")
        db.add(matched)

    matched.status = "DRAFT"

    if await is_not_blank(payload.validationType):
        matched.validation_type = payload.validationType.upper()

    matched.keyword = payload.wineOriginalKeyword

    # Override wine_alert_id if needed
    if (payload.historyType or "").lower() == "defer":
        matched.wine_alert_id = "x"
    elif (payload.historyType or "").lower() == "insufficient":
        matched.wine_alert_id = "z"

    try:
        print(f"Saving Matched: {matched}")
        db.commit()
    except Exception as e:
        db.rollback()
        raise APIException(
            module="Validation",
            error={},
            status_code=status.HTTP_404_NOT_FOUND,
            message=f"Error saving Matched: {str(e)}",
        )

    return {"success": True, "message": "Saved successfully."}
