add manifest.json

This commit is contained in:
Abel Luck 2026-03-29 13:40:03 +02:00
parent b5a66a2665
commit 81bb8afc41

View file

@ -1,7 +1,31 @@
"""Pygea main entry point""" """Pygea main entry point"""
import hashlib import hashlib
import json
import os
from pygea.pangeafeed import PangeaFeed from pygea.pangeafeed import PangeaFeed
from pygea.pexception import PangeaServiceException from pygea.pexception import PangeaServiceException
from pygea import utilities
OUTPUT_TO_FILE = utilities.get_configuration_variable('results', 'output_to_file_p')
OUTPUT_FILE_NAME = utilities.get_configuration_variable('results', 'output_file_name')
OUTPUT_DIRECTORY = utilities.get_configuration_variable('results', 'output_directory')
def write_manifest(categories):
"""Write the category manifest beside the generated feed output."""
if OUTPUT_TO_FILE is not True:
return
output_directory = os.path.normpath(OUTPUT_DIRECTORY)
if not os.path.exists(output_directory):
os.makedirs(output_directory)
manifest_path = os.path.join(output_directory, 'manifest.json')
with open(manifest_path, 'w', encoding='utf-8') as mfile:
json.dump({'categories': categories}, mfile, indent=2, ensure_ascii=False)
mfile.write('\n')
def main(): def main():
# Feeds are generated for a single, specified, domain # Feeds are generated for a single, specified, domain
@ -34,6 +58,7 @@ def main():
# 2. Generate different feeds for each defined category # 2. Generate different feeds for each defined category
try: try:
manifest_categories = []
for cat_tuple in args['categories']: for cat_tuple in args['categories']:
# form new args for each category/query # form new args for each category/query
newargs = { newargs = {
@ -46,7 +71,13 @@ def main():
# put each feed into a different sub-directory # put each feed into a different sub-directory
feed_subdir = hashlib.md5(cat_tuple[0].encode('utf-8')).hexdigest()[:7] feed_subdir = hashlib.md5(cat_tuple[0].encode('utf-8')).hexdigest()[:7]
pf.disgorge(feed_subdir) pf.disgorge(feed_subdir)
manifest_categories.append({
'name': cat_tuple[0],
'short-hash': feed_subdir,
'local-path': os.path.join(feed_subdir, OUTPUT_FILE_NAME).replace(os.sep, '/')
})
print("feed for {} output to sub-directory {}".format(cat_tuple[0], feed_subdir)) print("feed for {} output to sub-directory {}".format(cat_tuple[0], feed_subdir))
write_manifest(manifest_categories)
except PangeaServiceException as error: except PangeaServiceException as error:
print(error) print(error)