Skip to content

ioana.circu - logging #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ settings.json
local/
localcache/
*pyc
build/
build/
6 changes: 4 additions & 2 deletions dirconfig
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# Directory containing new flights to be pushed to Elasticsearch
/home/badc/software/datasets/flight-finder/add_records/
/home/users/icircu/flight-pipeline/add/
# Directory for moving written flights - write DELETE to remove pushed flight records from the local system after pushing.
/home/badc/software/datasets/flight-finder/stac-flightfinder-items/
/home/users/icircu/test-flights/
# Logging File
/home/users/icircu/test-flights/logging
45 changes: 21 additions & 24 deletions flight_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import importlib

import logging
from flightpipe.logger import logger

import argparse

Expand All @@ -18,7 +19,6 @@

settings_file = 'settings.json'


def openConfig():
"""
Function to open configuration file and initialise paths to relevant directories.
Expand All @@ -31,14 +31,15 @@ def openConfig():

if VERB:
print('> (1/6) Opening Config File')
logging.info('> (1/6) Opening Config File')
logger.info('> (1/6) Opening Config File')

f = open('dirconfig','r')
content = f.readlines()
f.close()
try:
return content[1].replace('\n',''), content[3].replace('\n',''), content[5].replace('\n','')
return content[1].replace('\n',''), content[3].replace('\n','')
except IndexError:
logger.error('One or both paths missing from the dirconfig file')
print('Error: One or both paths missing from the dirconfig file - please fill these in')
return '',''

Expand Down Expand Up @@ -73,7 +74,7 @@ def addFlights(rootdir, archive, repush=False):
# ES client to determine array of ids
if VERB:
print('> (2/6) Setting up ES Flight Client')
logging.info('> (2/6) Setting up ES Flight Client')
logger.info('> (2/6) Setting up ES Flight Client')
if repush:
files_list = os.listdir(archive)
fclient = ESFlightClient(archive, settings_file)
Expand All @@ -87,21 +88,21 @@ def addFlights(rootdir, archive, repush=False):
# Push new flights to index
if VERB:
print('> (4/6) Identified {} flights'.format(len(checked_list)))
logging.info('> (4/6) Identified {} flights'.format(len(checked_list)))
logger.info('> (4/6) Identified {} flights'.format(len(checked_list)))
if len(checked_list) > 0:
fclient.push_flights(checked_list)
if VERB:
print('> (5/6) Pushed flights to ES Index')
logging.info('> (5/6) Pushed flights to ES Index')
logger.info('> (5/6) Pushed flights to ES Index')
if not repush:
moveOldFiles(rootdir, archive, checked_list)
if VERB:
print('> (6/6) Removed local files from push directory')
logging.info('> (6/6) Removed local files from push directory')
logger.info('> (6/6) Removed local files from push directory')
else:
if VERB:
print('> Exiting flight pipeline')
logging.info('> Exiting flight pipeline')
logger.info('> Exiting flight pipeline')

# Move old records into an archive directory

Expand Down Expand Up @@ -136,27 +137,19 @@ def main():
REPUSH = False

if args.mode == 'add':
root, archive, log_file = openConfig()

if log_file == '':
print("Error: Please fill in the third directory in dirconfig file")

# Set up logging config
logging.basicConfig(
level=logging.DEBUG, # Capture all levels
format='%(asctime)s - %(levelname)s - %(message)s', # timestamp, level, message
handlers=[
logging.FileHandler(log_file), # Write output to file
logging.StreamHandler() # If logging to console
]
)
logger.debug("Mode set to add")
root, archive = openConfig()

logger.debug("Root directory set to %s", root)
logger.debug("Archive set to %s", archive)

if archive == '':
print('Error: Please fill in second directory in dirconfig file')
logging.error("Error: Second directory in dirconfig file missing")
logger.error("Second directory in dirconfig file missing")
sys.exit()
elif root == '':
print('Error: Please fill in first directory in dirconfig file')
logging.error("Error: First directory in dirconfig file missing")
logger.error("First directory in dirconfig file missing")
sys.exit()
else:
addFlights(root, archive, repush=REPUSH)
Expand All @@ -171,14 +164,18 @@ def main():
"""

elif args.mode == 'update':
logger.debug("Mode set to update")
updateFlights(args.update)

elif args.mode == 'add_moles':
logger.debug("Mode set to add moles")
updateFlights('moles')

elif args.mode == 'reindex':
logger.debug("Mode set to reindex")
reindex(args.new_index)
else:
logger.error("Mode unrecognised - ", args.mode)
print('Error: Mode unrecognised - ', args.mode)
sys.exit()

Expand Down
24 changes: 21 additions & 3 deletions flightpipe/flight_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk

from ceda_elastic_py import SimpleClient, gen_id
from flightpipe.logger import logger

from flightpipe.simple_client import SimpleClient, gen_id
from flightpipe.logger import setup_logging

from datetime import datetime

Expand All @@ -22,6 +25,7 @@
urllib3.disable_warnings()

def resolve_link(path, ):
logger.debug("Debug: Resolving link for path %s", path)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't need the Debug: part of the string if your log message already adds this.

mpath = str(path)

uuid = None
Expand All @@ -32,26 +36,31 @@ def resolve_link(path, ):
r = json.loads(resp)
if r['results']:
uuid = r['results'][0]['uuid']
logger.debug("Debug: Reslolving link, found uuid %s", str(uuid))
except:
print(f'Unsuccessful link retrieval for {path} - proceeding without')
path = '/'.join(path.split('/')[:-1])

if not uuid:
logger.error("Error: Recursive path search failed for %s", mpath)
print(f'Recursive path search failed for: {mpath}')

return uuid

class ESFlightClient(SimpleClient):
"""
Connects to an elasticsearch instance and exports the
documents to elasticsearch."""
documents to elasticsearch.
"""

def __init__(self, rootdir, es_config='settings.json'):
self.rootdir = rootdir
logger.info("Info: Initialising ES Flight Client")

super().__init__("stac-flightfinder-items", es_config=es_config)
super().__init__("stac-flightfinder-items-test", es_config=es_config)

with open('stac_template.json') as f:
logger.info("Info: Reading stac templace JSON file")
self.required_keys = json.load(f).keys()

def push_flights(self, file_list):
Expand All @@ -63,10 +72,13 @@ def push_flights(self, file_list):
elif isinstance(file_list[0], dict):
flight_list = file_list
else:
logger.error("Error: Flight file not found %s", str(file_list[0]))
raise FileNotFoundError(file_list[0])
logger.info("Info: Flights to be pushed %s", str(flight_list))
self.push_records(flight_list)

def preprocess_records(self, file_list):
logger.debug("Debug: Processing following records - %s", file_list)

def set_defaults(refs):
collection = refs['collection']
Expand Down Expand Up @@ -116,6 +128,7 @@ def set_defaults(refs):
if rq not in source:
missing.append(rq)
if len(missing) > 0:
logger.error("Error: File is missing entries - %s", str(missing))
raise TypeError(f"File {file} is missing entries:{missing}")

source['last_update'] = datetime.strftime(datetime.now(),'%Y-%m-%d %H:%M:%S')
Expand All @@ -126,6 +139,7 @@ def set_defaults(refs):
return records

def obtain_field(self, id, fieldnames):
logger.info("Info: Performing query to obtain the following fields: %s", str(fieldnames))
search = {
"_source": fieldnames,
"query": {
Expand All @@ -140,11 +154,14 @@ def obtain_field(self, id, fieldnames):
body=search)

try:
logger.info("Info: Found following fields: %s", str(resp['hits']['hits'][0]))
return resp['hits']['hits'][0]
except IndexError: # No entry found
logger.error("Error: No entry found.")
return None

def add_field(self, id, data, fieldname):
logger.debug("Debug: Update mapping for id - %s", str(id))
# Update mapping
self.es.update(index=self.index, doc_type='_doc', id=id, body={'doc':{fieldname:data}})

Expand Down Expand Up @@ -227,6 +244,7 @@ def check_ptcode(self, ptcode):
return 100

def reindex(self, new_index):
logger.debug("Debug: Reindex for source %s and destination %s", self.index, new_index)

self.es.reindex({
"source":{
Expand Down
50 changes: 50 additions & 0 deletions flightpipe/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import logging

def setup_logging(enable_logging=True, console_logging=True):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding this comment here but it can apply in many places. Consider using type hints for the parameters of functions. In this case enable_logging: bool = True etc.

If multiple variable types are acceptable, you can use the following:

from typing import Union
...
enable_logging: Union[bool,None] = None

"""
Sets up logging configuration. If `enable_logging` is False, no logging will occur.

:param enable_logging: Flag to enable/disable logging.
"""
file = "dirconfig"

with open(file) as f: # 'r' is default if not specified.
content = [r.strip() for r in f.readlines()] # Removes the '\n' from all lines

log_file = content[5].replace('\n','')

if log_file == '':
print("Error: Please fill in the third directory in dirconfig file")

handlers = [
logging.FileHandler(log_file), # Write output to file
]

if console_logging:
handlers.append(logging.StreamHandler()) # Logs to the console if enabled


if enable_logging:
logging.basicConfig(
level=logging.DEBUG, # Capture all levels
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=handlers
)
else:
# Disable logging by setting a null handler
logging.basicConfig(level=logging.CRITICAL)
#NOTSET for no alerts at all


enable_logging = True

# Set up logging with a flag (True to enable logging, False to disable logging)
setup_logging(enable_logging) # Change to False to disable logging

logger = logging.getLogger(__name__)



__author__ = "Ioana Circu"
__contact__ = "[email protected]"
__copyright__ = "Copyright 2025 United Kingdom Research and Innovation"
Loading