from abc import ABC, abstractmethod
from dataclasses import dataclass
import re as regex
import typing

from pandas import DataFrame

if typing.TYPE_CHECKING:
    import pandas as pd
    from travelogues_extraction.getrecords.session import RecordRetriever

from lxml import etree as lxmletree


class AbstractDataExtractor(ABC):
    target_dataframe: DataFrame

    def __init__(self, target_dataframe: 'pd.DataFrame'):
        self.target_dataframe = target_dataframe

    @abstractmethod
    async def write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:
        pass

    @classmethod
    @abstractmethod
    def get_columns_names_I_work_on(cls) -> list:
        """
        :return: A set of column names, that will be changed by the class
        """
        return []


class AbstractSingleDataExtractor(AbstractDataExtractor, ABC):
    column: str

    @classmethod
    def get_columns_names_I_work_on(cls) -> list:
        return [cls.column]


class AbstractXpathDirectlyToColumn(AbstractSingleDataExtractor):
    xpath: lxmletree.XPath

    async def write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:
        result = self.xpath(record.lxmlelement)
        if len(result) > 0:
            result = result[0].text
            self.target_dataframe.at[record.ac_number, self.column] = result
            return result


class AbstractXpathJoinDirectlyToColumn(AbstractSingleDataExtractor):
    join_string: str = ' ; '
    xpath: lxmletree.XPath

    async def write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:
        result = self.xpath(record.lxmlelement)
        if len(result) > 0:
            result =  self.join_string.join([element.text for element in result])
            self.target_dataframe.at[record.ac_number, self.column] = result
            return result



class AbstractMultifield(AbstractDataExtractor):

    column: str

    @classmethod
    def get_columns_names_I_work_on(cls) -> list:
        return [cls.column]

    primary_xml_path: lxmletree.XPath

    join_string_primary_level = ' .- '
    join_string_secondary_level = ' ; '
    join_string_tertiary_level = ' '

    @dataclass
    class XpathIsGnd:
        xpath: lxmletree.XPath
        isgnd: bool

    xpath_isgnd_tuples: typing.Tuple[XpathIsGnd]
    gnd_regex = regex.compile(r'(?<=\(DE-588\))[\d\-X]{8,9}')

    def get_gnd_as_uri(self, any_string: str) -> str:
        found = self.gnd_regex.search(any_string)
        if found is None:
            return any_string  # fallback
        else:
            return 'http://d-nb.info/gnd/' + found.group(0)

    async def write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:

        primary_xml_results = self.primary_xml_path(record.lxmlelement)

        if len(primary_xml_results) == 0:
            return

        primary_output_level = []

        for primary_xml_result in primary_xml_results:
            secondary_output_level = []
            for xpath_isgnd_tuple in self.xpath_isgnd_tuples:
                tertiary_xml_results = xpath_isgnd_tuple.xpath(primary_xml_result)
                if len(tertiary_xml_results) == 0:
                    continue
                tertiary_xml_results = [e.text for e in tertiary_xml_results]
                if xpath_isgnd_tuple.isgnd:
                    tertiary_xml_results = [self.get_gnd_as_uri(e) for e in tertiary_xml_results]
                if len(tertiary_xml_results) != 0:
                    secondary_output_level.append(self.join_string_tertiary_level.join(tertiary_xml_results))
            if len(secondary_output_level) != 0:
                primary_output_level.append(self.join_string_secondary_level.join(secondary_output_level))

        if len(primary_output_level) != 0:
            result = self.join_string_primary_level.join(primary_output_level)
            self.target_dataframe.at[record.ac_number, self.column] = result
            return result

