import re as regex
import typing
from typing import Optional

if typing.TYPE_CHECKING:
    from travelogues_extraction.getrecords.session import RecordRetriever

from urllib import parse

from lxml import etree as lxmletree

from travelogues_extraction.dataextractors.namespaces import namespaces
from travelogues_extraction.dataextractors.abstract import AbstractDataExtractor, AbstractXpathJoinDirectlyToColumn


class VolltextAndBarcode(AbstractDataExtractor):
    """
    Column 2: Volltext
    Column 2,5: Barcode

    """

    join_string: str = ';\n'
    xpath: lxmletree.XPath = lxmletree.XPath('./marc:datafield[@tag="856" and @ind1="4" and @ind2="0"]/marc:subfield[@code="u"]', namespaces=namespaces)

    @classmethod
    def get_columns_names_I_work_on(cls) -> list:
        return ['Volltext', 'Barcode']

    async def write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:
        volltextresult = self.xpath(record.lxmlelement)
        if len(volltextresult) > 0:
            result = self.join_string.join([e.text for e in volltextresult])
            self.target_dataframe.at[record.ac_number, 'Volltext'] = result

            barcodes = [
                parse.unquote(url.text).split('/')[-1].replace('+', '')
                for url in volltextresult
                if url != ''
            ]

            self.target_dataframe.at[record.ac_number, 'Barcode'] = self.join_string.join(barcodes)
            return result



class Ausgabe(AbstractXpathJoinDirectlyToColumn):

    column = 'Ausgabe'
    xpath = lxmletree.XPath('./marc:datafield[@tag="250"][1]/marc:subfield[@code="a"]', namespaces=namespaces)
    # C'est tout


class Verlagsort(AbstractXpathJoinDirectlyToColumn):

    column = 'Verlagsort'
    xpath = lxmletree.XPath('./marc:datafield[@tag="264"][1]/marc:subfield[@code="a"]', namespaces=namespaces)
    # That's all y'all


class VerlegerDrucker(AbstractXpathJoinDirectlyToColumn):

    column = 'Verleger und Drucker'
    xpath = lxmletree.XPath('./marc:datafield[@tag="264" and (@ind2="1" or @ind2="3")]/marc:subfield[@code="b"]', namespaces=namespaces)
    # Eso es todo


class Erscheinungsjahr(AbstractXpathJoinDirectlyToColumn):

    column = 'Erscheinungsjahr'
    xpath = lxmletree.XPath('./marc:datafield[@tag="264" and @ind1=" " and (@ind2="1" or @ind2="3")]/marc:subfield[@code="c"]', namespaces=namespaces)
    # 就这样


class ErscheinungsjahrSortierform(AbstractXpathJoinDirectlyToColumn):

    column = 'Erscheinungsjahr Sortierform'
    xpath = lxmletree.XPath(
        (
            'substring('
                './marc:controlfield[@tag="008"]/text()'
            ', 8, 4)'), namespaces=namespaces
    )

    async def write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:
        """
        in this case xpath returns lxml.etree._ElementUnicodeResult', so …
        :param record:
        :return:
        """
        result = self.xpath(record.lxmlelement)
        if result != '':
            self.target_dataframe.at[record.ac_number, self.column] = str(result)
            return result


class Kollation(AbstractXpathJoinDirectlyToColumn):

    column = 'Kollation'
    xpath = lxmletree.XPath('./marc:datafield[@tag="300" and @ind1=" " and @ind2=" "]/marc:subfield[@code="a"]', namespaces=namespaces)
    # È tutto


class Illustrationen(AbstractXpathJoinDirectlyToColumn):

    xpath = lxmletree.XPath('./marc:datafield[@tag="300"]/marc:subfield[@code="b"]', namespaces=namespaces)
    regexes = [
        regex.compile(r'(\d+)\s*?(?=[Ii]llustration)'),
        regex.compile(r'(\d+)\s*?(?=[Kk]arte)'),
    ]

    @classmethod
    def get_columns_names_I_work_on(cls) -> list:
        return ['Illustrationen', 'Anzahl Illustrationen', 'Anzahl Karten']

    column = 'Illustrationen' # so I can call super().write()

    async def write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:
        result_string: Optional[str] = await super().write(record)
        if result_string:
            for column, regex_ in zip(self.get_columns_names_I_work_on()[1:], self.regexes):
                match = regex_.search(result_string)
                if match:
                    self.target_dataframe.at[record.ac_number, column] = match.group(1)
            return result_string


class Format(AbstractXpathJoinDirectlyToColumn):

    column = 'Format'
    xpath = lxmletree.XPath('./marc:datafield[@tag="300"]/marc:subfield[@code="c"]', namespaces=namespaces)
    # вот и все


class Anmerkungen(AbstractXpathJoinDirectlyToColumn):

    column = 'Anm. zu Illustrationen'
    join_string = ' ; '
    xpath = lxmletree.XPath('./marc:datafield[@tag="500" and @ind1=" " and @ind2=" "]/marc:subfield', namespaces=namespaces)

    @classmethod
    def get_columns_names_I_work_on(cls) -> list:
        return ['Anm. zu Illustrationen', 'Bibliografie', 'Anmerkungen', 'Anm. zu Kollation']

    async def write(self, record: 'RecordRetriever.Record') -> typing.Optional[str]:
        fields = self.xpath(record.lxmlelement)
        if len(fields) > 0:
            field: lxmletree._Element
            for column, field in zip(self.get_columns_names_I_work_on()[:-1], fields):
                self.target_dataframe.at[record.ac_number, column] = field.text
            if len(fields) > 3:
                self.target_dataframe.at[record.ac_number, self.get_columns_names_I_work_on()[-1]] = self.join_string.join([e.text for e in fields[3: ]])
            return fields[0].text # I do not know, whatever


class Sprache(AbstractXpathJoinDirectlyToColumn):

    column = 'Sprache'
    xpath = lxmletree.XPath('./marc:datafield[@tag="041" and @ind2=" " and (@ind1="1" or @ind1=" ")]/marc:subfield[@code="a"]', namespaces=namespaces)
    # det är allt


class OriginalSprache(AbstractXpathJoinDirectlyToColumn):

    column = 'Originalsprache'
    xpath = lxmletree.XPath('./marc:datafield[@tag="041" and @ind1="1" and @ind2=" "]/marc:subfield[@code="h"]', namespaces=namespaces)
    # هذا كل شئ


class BemerkungenZurSprache(AbstractXpathJoinDirectlyToColumn):

    column = 'Bemerkung zur Sprache'
    xpath = lxmletree.XPath('./marc:datafield[@tag="546"]/marc:subfield[@code="a"]', namespaces=namespaces)


class Standardnummer(AbstractXpathJoinDirectlyToColumn):
    column = 'Standardnummer'
    xpath = lxmletree.XPath('./marc:datafield[@tag="024" and @ind1="7" and @ind2=" "]/marc:subfield[@code="a"]', namespaces=namespaces)







