diff --git a/pygea/main.py b/pygea/main.py index 2cbe3fd..c3be366 100644 --- a/pygea/main.py +++ b/pygea/main.py @@ -1,7 +1,31 @@ """Pygea main entry point""" import hashlib +import json +import os + from pygea.pangeafeed import PangeaFeed from pygea.pexception import PangeaServiceException +from pygea import utilities + + +OUTPUT_TO_FILE = utilities.get_configuration_variable('results', 'output_to_file_p') +OUTPUT_FILE_NAME = utilities.get_configuration_variable('results', 'output_file_name') +OUTPUT_DIRECTORY = utilities.get_configuration_variable('results', 'output_directory') + + +def write_manifest(categories): + """Write the category manifest beside the generated feed output.""" + if OUTPUT_TO_FILE is not True: + return + + output_directory = os.path.normpath(OUTPUT_DIRECTORY) + if not os.path.exists(output_directory): + os.makedirs(output_directory) + + manifest_path = os.path.join(output_directory, 'manifest.json') + with open(manifest_path, 'w', encoding='utf-8') as mfile: + json.dump({'categories': categories}, mfile, indent=2, ensure_ascii=False) + mfile.write('\n') def main(): # Feeds are generated for a single, specified, domain @@ -34,6 +58,7 @@ def main(): # 2. Generate different feeds for each defined category try: + manifest_categories = [] for cat_tuple in args['categories']: # form new args for each category/query newargs = { @@ -46,7 +71,13 @@ def main(): # put each feed into a different sub-directory feed_subdir = hashlib.md5(cat_tuple[0].encode('utf-8')).hexdigest()[:7] pf.disgorge(feed_subdir) + manifest_categories.append({ + 'name': cat_tuple[0], + 'short-hash': feed_subdir, + 'local-path': os.path.join(feed_subdir, OUTPUT_FILE_NAME).replace(os.sep, '/') + }) print("feed for {} output to sub-directory {}".format(cat_tuple[0], feed_subdir)) + write_manifest(manifest_categories) except PangeaServiceException as error: print(error)