from datetime import datetime, timezone

from sqlalchemy import Boolean, Column, DateTime, ForeignKey, Integer, String, Text
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.orm import relationship

from src.core.models.base import Base

from src.utils.enums import WebCrawlerFileType
from sqlalchemy import Enum
from sqlalchemy import Enum as SAEnum
from collections import defaultdict
from src.apps.files.schemas.file import FileResponseSchema

class WebCrawlerFiles(Base):
    __tablename__ = "web_crawler_files"

    id = Column(Integer, primary_key=True, index=True)
    web_crawler_id = Column(Integer, ForeignKey("web_crawlers.id"), nullable=False)
    file_id = Column(Integer, ForeignKey("files.id"), nullable=False)
    created_at = Column(DateTime, default=datetime.now(timezone.utc))
    updated_at = Column(DateTime, default=datetime.now(timezone.utc), onupdate=datetime.now(timezone.utc))
    deleted_at = Column(DateTime, nullable=True)
    file_type = Column(String(100), nullable=True)
    file = relationship("File")


class WebCrawler(Base):
    __tablename__ = "web_crawlers"

    id = Column(Integer, primary_key=True, index=True)
    version = Column(String(50), nullable=True)
    bottle_size_default = Column(String(50), nullable=True)
    location_id = Column(Integer, ForeignKey("locations.id"), nullable=True)
    code = Column(String(100), nullable=False)
    currency_code = Column(String(10), nullable=True)
    delimiter = Column(String(10), nullable=False)
    disable_history = Column(Boolean, default=False, nullable=True)
    disable_keyword = Column(Boolean, default=False, nullable=True)
    file_source = Column(String(255), nullable=True)
    history_name = Column(String(255), nullable=True)
    name = Column(String(255), nullable=False)
    output_delimiter = Column(String(10), nullable=False)
    page_type = Column(String(50), nullable=True)
    search_bottle_size = Column(Boolean, default=False, nullable=True)
    search_vintage = Column(Boolean, default=False, nullable=True)
    selector = Column(String(255), nullable=True)
    status = Column(String(50), nullable=False)
    url = Column(Text, nullable=False)
    vintage_default_nv = Column(Boolean, default=False, nullable=True)
    client_id = Column(Integer, nullable=True)
    group_id = Column(Integer, nullable=True)
    bottle_size_index = Column(String(50), nullable=True)
    description_indexes = Column(String(255), nullable=True)
    history_indexes = Column(String(255), nullable=True)
    keyword_indexes = Column(String(255), nullable=True)
    files_index = Column(String(255), nullable=True)
    sku_index = Column(String(50), nullable=True)
    url_index = Column(String(50), nullable=True)
    vintage_index = Column(String(50), nullable=True)
    price_index = Column(String(50), nullable=True)
    tax_status_index = Column(String(50), nullable=True)
    match_input_separator = Column(String(255), nullable=True)

    bottle_size_default_blank = Column(Boolean, default=False, nullable=True)
    match_date = Column(DateTime, nullable=True)

    is_deleted = Column(Boolean, default=False)
    created_at = Column(DateTime, default=datetime.now(timezone.utc))
    updated_at = Column(DateTime, default=datetime.now(timezone.utc), onupdate=datetime.now(timezone.utc))
    deleted_at = Column(DateTime, nullable=True)

    files = relationship("WebCrawlerFiles")
    
    @property
    def list_files_by_type(self):
        result = {}
        for file_type in WebCrawlerFileType:
            files_of_type = [FileResponseSchema.model_validate(file.file) for file in self.files if file.file_type == file_type]
            if files_of_type:
                result[file_type.value] = files_of_type
        return result
    
    # @property
    # def latest_master_file(self):
    #     """
    #     Returns the latest MASTER_FILE type source file for the web crawler.
    #     This property is used to retrieve the most recent MASTER_FILE associated with the web crawler.
    #     """
    #     if not self.files:
    #         return None
    #     master_files = [f for f in self.files if f.file_type == WebCrawlerFileType.MASTER_FILE]
    #     if not master_files:
    #         return None
    #     latest_file = sorted(master_files, key=lambda f: f.created_at, reverse=True)[0]
    #     return latest_file.file if latest_file else None

    @property
    def list_history_indexes(self):
        """
        Convert comma-separated history indexes to a list of integers.
        Similar to the Groovy code:
        def getListHistoryIndexes() {
            def iList = historyIndexes?.tokenize(",")
            iList?.removeAll {
                !it.trim()?.isInteger()
            }
            return iList*.toInteger() ?: []
        }
        """
        if not self.history_indexes:
            return []

        try:
            # Split the string by commas and filter for valid integers
            index_list = []
            for idx in self.history_indexes.split(","):
                idx = idx.strip()
                if idx and idx.isdigit():
                    index_list.append(int(idx))
            return index_list
        except Exception:
            return []

    @property
    def list_description_indexes(self):
        """
        Convert comma-separated description indexes to a list of integers.
        """
        if not self.description_indexes:
            return []

        try:
            # Split the string by commas and filter for valid integers
            index_list = []
            for idx in self.description_indexes.split(","):
                idx = idx.strip()
                if idx and idx.isdigit():
                    index_list.append(int(idx))
            return index_list
        except Exception:
            return []

    @property
    def list_keyword_indexes(self):
        """
        Convert comma-separated keyword indexes to a list of integers.
        """
        if not self.keyword_indexes:
            return []

        try:
            # Split the string by commas and filter for valid integers
            index_list = []
            for idx in self.keyword_indexes.split(","):
                idx = idx.strip()
                if idx and idx.isdigit():
                    index_list.append(int(idx))
            return index_list
        except Exception:
            return []

    @property
    def matched_logs(self):
        """
        Returns the latest data from code match between webcrawler and matched logs.
        This property is used to store the latest matching data between web crawler and matched logs.
        """
        from sqlalchemy import desc
        from sqlalchemy.orm import Session

        from src.apps.wine.wine_log.models.wine_log import MatchedLog
        from src.core.dependencies import get_db

        # Get a database session
        db = next(get_db())
        try:
            # Query the latest matched log for this crawler's code
            latest_log = (
                db.query(MatchedLog)
                .filter(MatchedLog.code == self.code)
                .order_by(desc(MatchedLog.date_created))
                .first()
            )

            return latest_log
        except Exception as e:
            # Log the error in a real-world application
            print(f"Error retrieving matched logs: {e}")
            return None
        finally:
            # Close the session to prevent resource leaks
            db.close()

    @hybrid_property
    def show_match_run(self):
        """
        Returns True if the web crawler is not deleted and has a valid match date.
        This property is used to determine if the match run should be shown in the UI.
        """
        if self.match_date is not None and self.matched_logs is not None:
            return not self.is_deleted and self.matched_logs.last_updated < self.match_date
        return not self.is_deleted and self.match_date is not None
