from datetime import datetime
from typing import List, Optional

from fastapi import File as FastAPIFile
from fastapi import UploadFile
from pydantic import BaseModel, Field, computed_field

from src.apps.files.schemas.file import FileResponseSchema
from src.apps.web_crawler.models.web_crawler import WebCrawlerFiles
from src.apps.files.models.file import File
from src.utils.helpers.functions import get_file_header
from src.utils.enums import WebCrawlerFileType
from src.core.exceptions import APIException
from fastapi import status
import sys

class WebCrawlerBase(BaseModel):
    version: Optional[str] = Field(None, example="v1.0")
    bottle_size_default: Optional[str] = Field(None, example="750ml")
    location_id: Optional[int] = Field(None, example=1)
    currency_code: Optional[str] = Field(None, example="USD")
    disable_history: Optional[bool] = Field(False, example=False)
    disable_keyword: Optional[bool] = Field(False, example=False)
    file_source: Optional[str] = Field(None, example="source.csv")
    history_name: Optional[str] = Field(None, example="2024 History")
    page_type: Optional[str] = Field(None, example="product")
    search_bottle_size: Optional[bool] = Field(False, example=True)
    search_vintage: Optional[bool] = Field(False, example=True)
    selector: Optional[str] = Field(None, example="#product-list")
    vintage_default_nv: Optional[bool] = Field(None, example=False)
    client_id: Optional[int] = Field(None, example=101)
    group_id: Optional[int] = Field(None, example=201)
    bottle_size_index: Optional[str] = Field(None, example="2")
    description_indexes: Optional[str] = Field(None, example="3,4")
    history_indexes: Optional[str] = Field(None, example="5,6")
    keyword_indexes: Optional[str] = Field(None, example="7,8")
    files_index: Optional[str] = Field(None, example="9")
    sku_index: Optional[str] = Field(None, example="1")
    url_index: Optional[str] = Field(None, example="10")
    vintage_index: Optional[str] = Field(None, example="11")
    price_index: Optional[str] = Field(None, example="12")
    tax_status_index: Optional[str] = Field(None, example="13")
    match_input_separator: Optional[str] = Field(None, example=",")
    bottle_size_default_blank: Optional[bool] = Field(False, example=False)


class WebCrawlerCreate(WebCrawlerBase):
    code: str = Field(..., example="WCR123")
    name: str = Field(..., example="Wine Crawler")
    delimiter: str = Field(..., example=",")
    output_delimiter: str = Field(..., example=";")
    status: str = Field(..., example="IN PROGRESS")
    url: Optional[str] = Field(None, example="https://example.com")


class WebCrawlerUpdate(WebCrawlerBase):
    code: Optional[str] = Field(None, example="WCR123")
    name: Optional[str] = Field(None, example="Wine Crawler")
    delimiter: Optional[str] = Field(None, example=",")
    output_delimiter: Optional[str] = Field(None, example=";")
    status: Optional[str] = Field(None, example="IN PROGRESS")
    url: Optional[str] = Field(None, example="https://example.com")


class WebCrawlerFileResponse(BaseModel):
    id: int
    file_type: Optional[str] = None
    file: Optional[FileResponseSchema] = None
    created_at: Optional[datetime] = None
    updated_at: Optional[datetime] = None


class MatchedLogResponse(BaseModel):
    id: int
    literal: Optional[str] = None
    code: str
    process_keyword: bool = True
    total_records: int = 0
    history_matched: int = 0
    keyword_matched: int = 0
    ambiguous_matched: int = 0
    unknown_wines: int = 0
    status: str
    crawl_date: Optional[datetime] = None
    date_start: Optional[datetime] = None
    date_end: Optional[datetime] = None
    message: Optional[str] = None
    date_created: Optional[datetime] = None
    last_updated: Optional[datetime] = None
    execution_time: Optional[str] = None

    class Config:
        from_attributes = True


class WebCrawlerResponse(WebCrawlerBase):
    id: int
    code: str
    name: str
    delimiter: str
    output_delimiter: str
    status: str
    url: Optional[str] = None
    files: Optional[List[WebCrawlerFileResponse]] = None
    is_deleted: bool
    created_at: Optional[datetime] = None
    updated_at: Optional[datetime] = None
    deleted_at: Optional[datetime] = None
    show_match_run: Optional[bool] = Field(None, example=True)
    match_date: Optional[datetime] = None

    matched_logs: Optional[MatchedLogResponse] = Field(None, example="matched_logs")
    model_config = {"from_attributes": True}
    
    latest_master_file: Optional[FileResponseSchema] = None
    
    def _latest_file_header(self) -> Optional[list]:
        """
        Returns the headers of the latest MASTER_FILE type source file for the web crawler.
        This method retrieves the headers from the latest MASTER_FILE associated with the web crawler.
        """
        if not self.files:
            return []
        master_files = [f for f in self.files if f.file_type == WebCrawlerFileType.MASTER_FILE]
        if not master_files:
            return []
        latest_file = sorted(master_files, key=lambda f: f.created_at, reverse=True)[0]
        if not latest_file:
            return []
        
        csv_path = getattr(latest_file.file, "path", None)
        delimiter = getattr(self, "delimiter", ",")
        if not csv_path:
            return []

        try:
            headers = get_file_header(csv_path, custom_delimiter=delimiter)
        except Exception as e:
            return []

        return [{"column_name": name, "index": idx} for idx, name in enumerate(headers)]
    
    def _map_indexes_to_columns(self, indexes_field: str) -> Optional[List[str]]:
        headers = self._latest_file_header()
        if not headers:
            return []
        indexes = getattr(self, indexes_field, None)
        if not indexes:
            return []
        indexes = indexes.split(",") if isinstance(indexes, str) else indexes
        if not indexes:
            return []
        return [headers[int(idx)]["column_name"] for idx in indexes if str(idx).isdigit() and int(idx) < len(headers)]
    
    @computed_field
    def keyword_indexes_from_csv(self) -> Optional[List[str]]:
        return self._map_indexes_to_columns("keyword_indexes")
    
    @computed_field
    def history_indexes_from_csv(self) -> Optional[List[str]]:
        return self._map_indexes_to_columns("history_indexes")
    
    @computed_field
    def vintage_index_from_csv(self) -> Optional[List[str]]:
        return self._map_indexes_to_columns("vintage_index")
    
    @computed_field
    def url_index_from_csv(self) -> Optional[List[str]]:
        return self._map_indexes_to_columns("url_index")
    
    @computed_field
    def description_indexes_from_csv(self) -> Optional[List[str]]:
        return self._map_indexes_to_columns("description_indexes")
    
    @computed_field
    def sku_index_from_csv(self) -> Optional[List[str]]:
        return self._map_indexes_to_columns("sku_index")
        

class WebCrawlerFilterSchema(BaseModel):
    code: Optional[str] = Field(None, example="WCR123")
    name: Optional[str] = Field(None, example="Wine Crawler")
    status: Optional[str] = Field(None, example="active")
    location_id: Optional[int] = Field(None, example=1)