diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c09a91f8dc92db27ba5fd9e41d569976d3a872ea..8672d64ddbbd28c2fa9c6016f288f2802552f4e9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -23,8 +23,7 @@ extract_data: script: - 'python -m pip install --upgrade pip' - 'python -m pip install -r requirements.txt' - - 'python -m pip install ipython' - - 'ipython travelogues_extraction/script/script.py' + - 'python script.py --input-file ./test-data/TravelogueD16_ALMAoutput_20200720.xlsx --ac-column Datensatznummer --stop 10 --output-file ./test-output/test-output.xlsx --log-file ./test-output/log.xlsx --deep-log-file test-output/deeplog.json' artifacts: paths: - 'travelogues_extraction/script/output/' diff --git a/travelogues_extraction/script/script.py b/script.py similarity index 66% rename from travelogues_extraction/script/script.py rename to script.py index 45cd6d948aba21ce0e28d379b2bec7ffb91ee146..af2bc6f6f4c3ba770eeb3374d0fc288cde5b11d8 100644 --- a/travelogues_extraction/script/script.py +++ b/script.py @@ -1,9 +1,7 @@ import asyncio -import datetime import json -import os import pathlib -import re as regex +import time import typing import click @@ -12,15 +10,15 @@ from travelogues_extraction.controller.main import FromAlmaOutputToExcel @click.command() -@click.option('--input-file', help='Use .xlsx file as input', type=click.File()) +@click.option('--input-file', help='Use .xlsx file as input', type=click.Path(exists=True)) @click.option('--ac-column', type=click.STRING, help= 'The column of the input file, where the ac numbers are. The column name is the string in the first row of the column', ) @click.option('--start', default=0, type=click.INT, help='The first record of the input file to extract the data') @click.option('--stop', default=None, type=click.INT, help='The last record of the input file to extract the data') -@click.option('--output-file', help='Generate this .xlsx file', type=click.File()) -@click.option('--log-file', default=None, help='If given, write a csv log that that file', type=click.File()) -@click.option('--deep-log-file', default=None, help='If given, write a deep json log that that file', type=click.File()) +@click.option('--output-file', help='Generate this .xlsx file', type=click.Path()) +@click.option('--log-file', default=None, help='If given, write a csv log that that file', type=click.Path()) +@click.option('--deep-log-file', default=None, help='If given, write a deep json log that that file', type=click.Path()) def extract_data(input_file: str, output_file: str, ac_column: str, @@ -37,9 +35,17 @@ def extract_data(input_file: str, loop.run_until_complete(_extract_data(input_file, output_file, ac_column, start, stop, log_file, deep_log_file)) -async def _extract_data(input_file: str, output_file: str, ac_column: str, start: typing.Optional[int] = 0, - stop: typing.Optional[int] = None, log_file: typing.Optional[str] = None, - deep_log_file: typing.Optional[str] = None) -> typing.NoReturn: +async def _extract_data(input_file: str, + output_file: str, + ac_column: str, + start: typing.Optional[int] = 0, + stop: typing.Optional[int] = None, + log_file: typing.Optional[str] = None, + deep_log_file: typing.Optional[str] = None + ) -> typing.NoReturn: + + click.echo('Starting script at {}'.format(time.ctime())) + input_file = pathlib.Path(input_file) output_path = pathlib.Path(output_file) @@ -47,15 +53,20 @@ async def _extract_data(input_file: str, output_file: str, ac_column: str, start await converter.runasync() await converter.close() converter.write() - + click.echo('Wrote data to {} at {}'.format(output_path, time.ctime())) if log_file: converter.log.to_csv(log_file) - - # to do unblock + click.echo('Wrote log to {} at {}'.format(log_file, time.ctime())) + else: + click.echo('No log to {} at {}'.format(log_file, time.ctime())) if deep_log_file: with open(deep_log_file, 'w') as file: json.dump(converter.sub_log, file) + click.echo('Wrote deep-log to {} at {}'.format(deep_log_file, time.ctime())) + else: + click.echo('No deep-log to {} at {}'.format(log_file, time.ctime())) + click.echo('Finished at {}'.format(time.ctime())) if __name__ == '__main__': extract_data() diff --git a/test-data/TravelogueD16_ALMAoutput_20200720.xlsx b/test-data/TravelogueD16_ALMAoutput_20200720.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..87c68f2cd5a1db2633969406f7b07c819aa4180f Binary files /dev/null and b/test-data/TravelogueD16_ALMAoutput_20200720.xlsx differ diff --git a/travelogues_extraction/script/__init__.py b/test-output/.gitkeep similarity index 100% rename from travelogues_extraction/script/__init__.py rename to test-output/.gitkeep diff --git a/test-output/deeplog.json b/test-output/deeplog.json new file mode 100644 index 0000000000000000000000000000000000000000..89f9c4c90fa639336abe5c687be35810ce89ff73 --- /dev/null +++ b/test-output/deeplog.json @@ -0,0 +1 @@ +{"Werktitel": [], "Schlagworte": [], "record_retriever": []} \ No newline at end of file diff --git a/test-output/log.xlsx b/test-output/log.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..52ebe129f92fa04a86510987661e16e07c23532e --- /dev/null +++ b/test-output/log.xlsx @@ -0,0 +1,11 @@ +,time,n +AC09974246,1599467732983066493,5 +AC09974253,1599467732939683609,2 +AC09974254,1599467732884421951,0 +AC11085738,1599467733038739318,9 +AC11085779,1599467732924149997,1 +AC09625464,1599467733025237028,8 +AC11085838,1599467732968183827,4 +AC09697927,1599467732953083509,3 +AC10477539,1599467732997166956,6 +AC10477476,1599467733011180883,7 diff --git a/test-output/test-output.xlsx b/test-output/test-output.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..73381fad48fd20cfb9f26fbb1a31a0e710755ebf Binary files /dev/null and b/test-output/test-output.xlsx differ