import requests
import pathlib
import pandas as pd
import re
import json



def sanitize_barcode_string(barcode):
    output = re.search(r'Z\w+', barcode)
    return output.group(0)


def get_image_url(barcode, page_nums):
    url_lis = []
    barcode = sanitize_barcode_string(barcode)
    if isinstance(page_nums, int):
        page_nums = [str(page_nums)]
    else:
        page_nums = page_nums.replace(' ', '').split(',')

    for page in page_nums:
        image_url = f'https://iiif.onb.ac.at/images/ABO/{barcode}/{page.zfill(8)}/full/full/0/native.jpg'
        resp = requests.get(image_url)
        if resp.status_code == 200:
            url_lis.append(image_url)
        if resp.status_code == 404:
            url_lis.append('Non-ABO barcode')
    return url_lis


if __name__ == '__main__':
    img_dir = pathlib.Path('img')
    if not img_dir.exists():
        img_dir.mkdir()

    # Test sanitize_barcode_string
    print(sanitize_barcode_string('+Z123456789X \n'))

    # Test get_image_url
    ABO_barcode = 'Z183423904'
    NonABO_barcode = 'Z173254602'
    print('**** testing get_image_url')
    print(get_image_url(ABO_barcode, '1, 212'))
    print(get_image_url(NonABO_barcode, 1))
    print('****')

    # Read GT excel
    GT_df = pd.read_excel('../Groundtruth/BE_GT.xlsx', usecols='A:H')

    # Code for creating image URLs (now contained in Excel file)
    # GT_df['Image URLs'] = GT_df.apply(lambda x: get_image_url(x['Strichcode'], x['Seite']), axis=1)
    # GT_df.to_excel('test.xlsx')

    # Create machine-readable .csv file
    GT_df.to_csv('../Groundtruth/BE_GT.csv')
    # GT_csv = pd.read_csv('../Groundtruth/BE_GT.csv')
