import pandas as pd
import requests
import jsonpath_ng as jp
import pathlib
import re
import sys


def get_raw_data(filepath='akon_postcards_public_domain.csv'):
    df = pd.read_csv(filepath_or_buffer=filepath, dtype=str)
    out_df = df[['akon_id', 'color']]
    return out_df


jp_image_link = jp.parse('sequences[*].canvases[*].images[*].resource.@id')


def get_image_link_for_akon_id(akon_id):
    r = requests.get(f'https://iiif.onb.ac.at/presentation/AKON/{akon_id}/manifest')
    r.raise_for_status()
    links = [m.value for m in jp_image_link.find(r.json())]
    if len(links) > 1:
        print(f'{len(links)} images found for id {akon_id}.', file=sys.stderr)
    return links[0]


def download_and_save_image(akon_id, directory):
    path = pathlib.Path(f'{directory}/{akon_id}.jpg')
    if path.exists():
        return
    else:
        r = requests.get(get_image_link_for_akon_id(akon_id))
        open(path, 'wb').write(r.content)


def dl_save_low_res(akon_id, directory):
    path = pathlib.Path(f'{directory}/{akon_id}.jpg')
    if path.exists():
        return
    else:
        link = get_image_link_for_akon_id(akon_id)
        low_res_link = re.sub(r'full/full', 'full/256,256', link)
        r = requests.get(low_res_link)
        open(path, 'wb').write(r.content)


if __name__ == '__main__':
    df = get_raw_data()
    is_color = df['color'] == 'True'
    df_color = df[is_color]
    print(len(df_color))
    # df_color.apply(lambda x: download_and_save_image(x[0], 'imgs'), axis=1)
    df_color.apply(lambda x: dl_save_low_res(x[0], 'low_res_imgs'), axis=1)
