import re
import html
import unicodedata
from decimal import Decimal
from typing import List, Optional
from bs4 import BeautifulSoup
import locale
import logging

logger = logging.getLogger(__name__)

class Calculator:
    WILDCARD_SYMBOL = "_"
    NON_VINTAGE = "NV"
    REPLACEMENT_FOUND = ":1:"
    REPLACEMENT_NOT_FOUND = ":0:"
    STR_TRUE = "1"
    STR_FALSE = "0"
    STR_NOT = "!"

    PATTERN_VINTAGE = re.compile(r"(^|\b|\W)(17|18|19|20)\d{2}(\b|\W|$)")
    PATTERN_NV = re.compile(r"(?i)(^|\b|\W)n\.?v\.?(\b|\W|$)")
    PATTERN_PUNCT = re.compile(r"[!@#$%&?()\[\]{}<>“”\",:;*]")
    PATTERN_CUSTOM_PUNCT = re.compile(r"[./´`‘’'\\-‒–—―]+")
    PATTERN_MULTISPACE = re.compile(r"\s+")
    
    PATTERN_NOT_REMOVER = re.compile(
        r"(![^&|!()]+_?)"                     # !some_text or !some_text_
        r"|(&![^&|!()]+_?)"                   # &!some_text or &!some_text_
        r"|(\|![^&|!()]+_?)"                  # |!some_text or |!some_text_
        r"|(&!\((.*?)\))"                     # &!(...) expression
        r"|(\|!\((.*?)\))"                    # |!(...) expression
    )
    @classmethod
    def cleanup(cls, text: str) -> str:
        if not text:
            return ""
        text = html.unescape(text)
        text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
        text = text.lower()
        text = cls.PATTERN_PUNCT.sub("", text)
        text = cls.PATTERN_CUSTOM_PUNCT.sub(" ", text)
        return cls.PATTERN_MULTISPACE.sub(" ", text).strip()

    @classmethod
    def cleanup_numeric_html(cls, text: str) -> str:
        return BeautifulSoup(f"<p>{text}</p>", "html.parser").text if text else ""

    @classmethod
    def cleanup_spaces(cls, text: str) -> str:
        return cls.PATTERN_MULTISPACE.sub(" ", text).strip() if text else ""

    @classmethod
    def normalize_pattern(cls, pattern: str) -> str:
        return re.sub(r"[!()&|]+", ",", pattern).strip(",") if pattern else ""

    @classmethod
    def normalize_and_sort_pattern(cls, pattern: str) -> str:
        if not pattern:
            return ""
        words = cls.normalize_pattern(pattern).split(",")
        unique = sorted(set(words), key=lambda w: -len(w))
        return ",".join(unique)

    @classmethod
    def parse_vintage(cls, text: str) -> Optional[str]:
        if not text:
            return None
        m = cls.PATTERN_VINTAGE.search(text)
        if m:
            return re.sub(r"\D", "", m.group())
        elif cls.PATTERN_NV.search(text):
            return cls.NON_VINTAGE
        return None

    @classmethod
    def remove_vintages(cls, text: str, pattern: Optional[str] = None) -> str:
        if not text:
            return ""
        reserved = cls.extract_all_vintages(pattern) if pattern else []
        to_remove = cls.extract_all_vintages(text)
        if reserved:
            for v in to_remove:
                if v not in reserved:
                    text = text.replace(v, " ")
        else:
            text = cls.PATTERN_VINTAGE.sub(" ", text)
        text = cls.PATTERN_NV.sub(" ", text)
        return cls.PATTERN_MULTISPACE.sub(" ", text).strip()

    @classmethod
    def extract_all_vintages(cls, text: str) -> List[str]:
        return [re.sub(r"[^\d]", "", m.group()) for m in cls.PATTERN_VINTAGE.finditer(text)] if text else []

    @classmethod
    def true_positive_words(cls, pattern: str, allow_duplicates=False) -> List[str]:
        if not pattern:
            return []
        pattern = cls.remove_nots(pattern)
        words = re.split(r"[&|()]", pattern)
        result = []
        for word in words:
            word = word.strip()
            if word and not word.startswith(cls.STR_NOT):
                if allow_duplicates or word not in result:
                    result.append(word)
        return result

    @classmethod
    def required_words(cls, pattern: str, remove_wildcard=True) -> List[str]:
        pattern = cls.remove_nots(pattern)
        if not pattern:
            return []
        candidates = re.split(r"&(?=[^()]*\)|[^()]*$)", pattern)
        result = []
        for word in candidates:
            word = word.strip()
            if word and not word.startswith("("):
                if remove_wildcard:
                    word = word.replace(cls.WILDCARD_SYMBOL, "")
                result.append(word)
        return result

    @classmethod
    def remove_nots(cls, text: str) -> str:
        return cls.PATTERN_NOT_REMOVER.sub("", text) if text else ""

    @classmethod
    def parse_money(cls, text: str) -> Optional[Decimal]:
        for loc in ("en_US.UTF-8", "de_DE.UTF-8"):
            try:
                locale.setlocale(locale.LC_ALL, loc)
                cleaned = re.sub(r"[^\d,.-]", "", text)
                return Decimal(locale.atof(cleaned))
            except Exception:
                continue
        return None

    @classmethod
    def get_score(
        cls,
        name: str,
        pattern: str,
        norm_sorted: str,
        norm_raw: str,
        latest_best: int,
        required: List[str]
    ) -> int:
        if not name or not pattern:
            return 0
        if required and not all(w in name for w in required):
            return 0

        found_total = not_found_total = score = 0
        name_wrapped = f" {name} "
        words = norm_raw.split(",")
        uniques = norm_sorted.split(",")

        word_count = {w: words.count(w) for w in uniques}

        for word in uniques:
            found = False
            if word.endswith(cls.WILDCARD_SYMBOL):
                prefix = word[:-1]
                found = re.search(rf"\s{re.escape(prefix)}\S*", name_wrapped) is not None
            elif f" {word} " in name_wrapped:
                found = True

            replacement = cls.REPLACEMENT_FOUND if found else cls.REPLACEMENT_NOT_FOUND
            count = word_count[word]
            score += count if found else 0
            pattern = pattern.replace(word, replacement, count)

        if score <= latest_best:
            return 0

        # Logical evaluation
        pattern_eval = (pattern
            .replace(cls.REPLACEMENT_FOUND, cls.STR_TRUE)
            .replace(cls.REPLACEMENT_NOT_FOUND, cls.STR_FALSE)
            .replace("&", " and ")
            .replace("|", " or "))

        try:
            if not eval(pattern_eval):
                return 0
        except Exception as e:
            logger.warning(f"Pattern eval failed: {e}")
            return 0

        return score