from abc import ABC, abstractmethod
from dataclasses import dataclass
import re as regex
import typing

import httpx
from pandas import DataFrame

if typing.TYPE_CHECKING:
    import pandas as pd

from lxml import etree as lxmletree

from travelogues_extraction.getrecords.session import RecordRetriever


class AbstractDataExtractor(ABC):
    target_dataframe: DataFrame

    def __init__(self, target_dataframe: 'pd.DataFrame'):
        self.target_dataframe = target_dataframe

    @abstractmethod
    async def write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:
        pass

    @classmethod
    @abstractmethod
    def get_columns_names_I_work_on(cls) -> list:
        """
        :return: A set of column names, that will be changed by the class
        """
        return []


class AbstractSingleDataExtractor(AbstractDataExtractor, ABC):
    column: str

    @classmethod
    def get_columns_names_I_work_on(cls) -> list:
        return [cls.column]


class AbstractXpathDirectlyToColumn(AbstractSingleDataExtractor):
    xpath: lxmletree.XPath

    async def write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:
        result = self.xpath(record.lxmlelement)
        if len(result) > 0:
            result = result[0].text
            self.target_dataframe.at[record.ac_number, self.column] = result
            return result


class AbstractXpathJoinDirectlyToColumn(AbstractSingleDataExtractor):
    join_string: str = ' ; '
    xpath: lxmletree.XPath

    async def write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:
        result = self.xpath(record.lxmlelement)
        if len(result) > 0:
            result =  self.join_string.join([element.text for element in result])
            self.target_dataframe.at[record.ac_number, self.column] = result
            return result



class AbstractMultifield(AbstractDataExtractor):

    column: str

    @classmethod
    def get_columns_names_I_work_on(cls) -> list:
        return [cls.column]

    primary_xml_path: lxmletree.XPath

    join_string_primary_level = ' .- '
    join_string_secondary_level = ' ; '
    join_string_tertiary_level = ' '

    @dataclass
    class XpathIsGnd:
        xpath: lxmletree.XPath
        isgnd: bool

    xpath_isgnd_tuples: typing.Tuple[XpathIsGnd]
    gnd_regex = regex.compile(r'(?<=\(DE-588\))[\d\-X]{8,9}')

    def get_gnd_as_uri(self, any_string: str) -> str:
        found = self.gnd_regex.search(any_string)
        if found is None:
            return any_string  # fallback
        else:
            return 'http://d-nb.info/gnd/' + found.group(0)

    async def write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:

        primary_xml_results = self.primary_xml_path(record.lxmlelement)

        if len(primary_xml_results) == 0:
            return

        primary_output_level = []

        for primary_xml_result in primary_xml_results:
            secondary_output_level = []
            for xpath_isgnd_tuple in self.xpath_isgnd_tuples:
                tertiary_xml_results = xpath_isgnd_tuple.xpath(primary_xml_result)
                if len(tertiary_xml_results) == 0:
                    continue
                tertiary_xml_results = [e.text for e in tertiary_xml_results]
                if xpath_isgnd_tuple.isgnd:
                    tertiary_xml_results = [self.get_gnd_as_uri(e) for e in tertiary_xml_results]
                if len(tertiary_xml_results) != 0:
                    secondary_output_level.append(self.join_string_tertiary_level.join(tertiary_xml_results))
            if len(secondary_output_level) != 0:
                primary_output_level.append(self.join_string_secondary_level.join(secondary_output_level))

        if len(primary_output_level) != 0:
            result = self.join_string_primary_level.join(primary_output_level)
            self.target_dataframe.at[record.ac_number, self.column] = result
            return result


class AbstractParentAsSecondCast(AbstractDataExtractor):

    column: str

    @classmethod
    def get_columns_names_I_work_on(cls) -> list:
        return [cls.column]

    parent_ac_xpath: lxmletree.XPath

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.parend_titles = {}
        self.log = []

    @abstractmethod
    async def _write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:
        """
        the same as the usual write method, just that this one, will be called by the write(), which handles the parent logic
        :param record:
        :return:
        """
        pass

    async def write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:
        async with httpx.AsyncClient() as session:
            contents = await self._write(record)
            if not (contents is None):
                return

                # else: try it with parents!
            ac_parent_elements = self.parent_ac_xpath(record.lxmlelement)

            if len(ac_parent_elements) == 0:
                return  # and cry

            ac_parent = ac_parent_elements[0].text.replace('(AT-OBV)', '')
            # check if we have requested this ac number already
            if ac_parent in self.parend_titles:
                self.target_dataframe.at[record.ac_number, self.column] = self.parend_titles[ac_parent]
                return  # and smile

            # else: get the data

            parent_response = await session.get(
                url='https://obv-at-oenb.alma.exlibrisgroup.com/view/sru/43ACC_ONB', params={
                    'startRecord': 1,
                    'maximumRecords': 1,
                    'query': f'alma.local_control_field_009={ac_parent}',
                    'version': '1.2',
                    'operation': 'searchRetrieve',
                    'recordSchema': 'marcxml',
                }, timeout=5
            )
            if parent_response.status_code != '200':
                self.log.append({
                    'status_code': parent_response.status_code,
                    'url': parent_response.url,
                    'message': parent_response.text,
                    'ac_child': record.ac_number,
                    'ac_parent': ac_parent,
                })

            try:
                xml = lxmletree.fromstring(parent_response.content)
            except Exception as exception:
                self.log.append({
                    'ac_child': record.ac_number,
                    'ac_parent': ac_parent,
                    'xml_error': exception.__str__(),
                    'xml': parent_response.text
                })

            records = RecordRetriever.record_xpath(xml)

            if len(records) == 0:
                self.log.append({
                    'issue': 'norecords',
                    'ac_child': record.ac_number,
                    'ac_parent': ac_parent
                })
                return

            return await self._write(
                RecordRetriever.Record(lxmlelement=records[0],
                                       ac_number=record.ac_number)
            )





