From aae7cb2f0689d2e9e01ad6ee6d9399855568fa8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20R=C3=B6ggla?= <philip.roeggla@onb.ac.at> Date: Wed, 26 Aug 2020 14:30:12 +0200 Subject: [PATCH] Wrote a click script --- travelogues_extraction/controller/main.py | 6 +- travelogues_extraction/script/script.py | 67 +++++++++++------------ 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/travelogues_extraction/controller/main.py b/travelogues_extraction/controller/main.py index e3cf2d1..3ca2b52 100644 --- a/travelogues_extraction/controller/main.py +++ b/travelogues_extraction/controller/main.py @@ -43,14 +43,14 @@ class FromAlmaOutputToExcel: sub_log: dict - def __init__(self, alma_output: str, target_output: str, slice: slice): + def __init__(self, alma_output: str, target_output: str, slice: slice, column: str): """ file path :param alma_output: """ self.target_output_string = target_output - input_data = pd.read_excel(alma_output, usecols=['Datensatznummer']) - self.ac_numbers = extract_ac_from_series(input_data['Datensatznummer'][slice]) + input_data = pd.read_excel(alma_output, usecols=[column]) + self.ac_numbers = extract_ac_from_series(input_data[column][slice]) self.target_output_dataframe = pd.DataFrame([], columns=[column for extractor in self.extractors for column in extractor.get_columns_names_I_work_on() diff --git a/travelogues_extraction/script/script.py b/travelogues_extraction/script/script.py index a7c9ab6..f0111c0 100644 --- a/travelogues_extraction/script/script.py +++ b/travelogues_extraction/script/script.py @@ -4,49 +4,48 @@ import json import os import pathlib import re as regex - import typing +import click + from travelogues_extraction.controller.main import FromAlmaOutputToExcel -input_folder = './travelogues_extraction/script/input/' -output_folder = './travelogues_extraction/script/output/' -filter = 'D1' -files = os.listdir(input_folder) -files = [file for file in files if filter in file] -files = [input_folder + file for file in files] +@click.command() +@click.argument('--input-file', help='Use .xlsx file as input', type=click.File) +@click.argument('--ac-column', type=click.STRING, help= +'The column of the input file, where the ac numbers are. The column name is the string in the first row of the column', + ) +@click.argument('--start', default=0, type=click.INT, help='The first record of the input file to extract the data') +@click.argument('--stop', default=None, type=click.INT, help='The last record of the input file to extract the data') +@click.argument('--output-file', help='Generate this .xlsx file', type=click.File) +@click.argument('--log-file', default=None, help='If given, write a csv log that that file', type=click.File) +@click.argument('--deep-log-file', default=None, help='If given, write a deep json log that that file', type=click.File) +def extract_data(input_file: str, output_file: str, ac_column: str, start: typing.Optional[int] = 0, stop: typing.Optional[int]=None, log_file: typing.Optional[str]=None, deep_log_file: typing.Optional[str]=None): + """ + Takes an excel file with a column of ac numbers, gets data from alma, brings the data in excel like shape, and generates an excel file with this data + """ + + loop = asyncio.new_event_loop() + loop.run_until_complete(_extract_data(input_file, output_file, ac_column, start, stop, log_file, deep_log_file)) -async def extract(input_file: str, output_folder: str) -> typing.NoReturn: - input_path = pathlib.Path(input_file) - output_path = pathlib.Path( - output_folder + \ - input_path.name.replace( - 'ALMAoutput', 'script_output_' \ - + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) - ) +async def _extract_data(input_file: str, output_file: str, ac_column: str, start: typing.Optional[int] = 0, stop: typing.Optional[int]=None, log_file: typing.Optional[str]=None, deep_log_file: typing.Optional[str]=None) -> typing.NoReturn: + input_file = pathlib.Path(input_file) + output_path = pathlib.Path(output_file) - converter = FromAlmaOutputToExcel(input_file, output_path.absolute(), slice(None, None, 1)) + converter = FromAlmaOutputToExcel(input_file, output_path.absolute(), slice(start, stop, 1), ac_column) await converter.runasync() await converter.close() converter.write() - match = regex.search(r'[dD]1\d', input_file) - name = match.group(0) if match else 'VDXX' - output_log = pathlib.Path( output_folder + 'log/') - output_log.mkdir(parents=True, exist_ok=True) - converter.log.to_csv(output_log.joinpath( - 'log-' + name + datetime.datetime.now().strftime("-%Y-%m-%d-%H:%M:%S") + '.csv' - )) + + if log_file: + converter.log.to_csv(log_file) + # to do unblock - with open( - output_log.joinpath( - 'sub-log-' + name + datetime.datetime.now().strftime("-%Y-%m-%d-%H:%M:%S") + '.json' - ), 'w' - ) as file: - json.dump(converter.sub_log, file) - -loop = asyncio.new_event_loop() -tasks = [loop.create_task(extract(file, output_folder)) for file in files] -waiter = asyncio.wait(tasks) -loop.run_until_complete(waiter) + if deep_log_file: + with open(deep_log_file, 'w') as file: + json.dump(converter.sub_log, file) + +if __name__ == '__main__': + extract_data() -- GitLab