use logging throughout the code

This commit is contained in:
Remy Moll 2025-04-23 14:33:49 +02:00
parent c4913046a7
commit db0f1a2b9c
3 changed files with 14 additions and 10 deletions

View File

@ -11,6 +11,8 @@ class BaseHandler(ABC):
""" """
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
_successful_writes = 0
_failed_writes = 0
def __init__(self, fail_on_error: bool = True, **kwargs): def __init__(self, fail_on_error: bool = True, **kwargs):
""" """
@ -48,8 +50,10 @@ class BaseHandler(ABC):
success = await self._write_entry(entry, uid) success = await self._write_entry(entry, uid)
if success: if success:
self.logger.debug(f"Successfully wrote entry with UID {uid}") self.logger.debug(f"Successfully wrote entry with UID {uid}")
self._successful_writes += 1
else: else:
self.logger.error(f"Failed to write entry with UID {uid}") self.logger.error(f"Failed to write entry with UID {uid}")
self._failed_writes += 1
if self.fail_on_error: if self.fail_on_error:
raise Exception(f"Failed to write entry with UID {uid}") raise Exception(f"Failed to write entry with UID {uid}")
@ -58,4 +62,4 @@ class BaseHandler(ABC):
""" """
Closes the handler. This method should be overridden by subclasses if they need to perform any cleanup operations. Closes the handler. This method should be overridden by subclasses if they need to perform any cleanup operations.
""" """
pass self.logger.info(f"Wrote {self._successful_writes+self._failed_writes} entries: {self._successful_writes} successful, {self._failed_writes} failed.")

View File

@ -52,3 +52,4 @@ class BunnyStorageHandler(BaseHandler):
async def close(self): async def close(self):
await self._session.close() await self._session.close()
await self._connector.close() await self._connector.close()
await super().close()

View File

@ -12,6 +12,7 @@ import logging
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
logger = logging.getLogger(__name__)
class WikivoyageParser: class WikivoyageParser:
def __init__(self): def __init__(self):
self.document_templates = [ self.document_templates = [
@ -382,11 +383,11 @@ def gather_handler_kwargs(handler_name: str) -> dict:
elif val.lower() in ("true", "false"): elif val.lower() in ("true", "false"):
val = val.lower() == "true" val = val.lower() == "true"
kwargs[param] = val kwargs[param] = val
print(f"Handler kwargs: {kwargs}") logger.debug(f"Handler kwargs: {kwargs}")
return kwargs return kwargs
async def main(): async def main():
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.INFO)
# 1. Which handler to load? # 1. Which handler to load?
handler_name = os.getenv("HANDLER") handler_name = os.getenv("HANDLER")
@ -420,19 +421,17 @@ async def main():
txt_files = list(input_dir.rglob("*.txt")) txt_files = list(input_dir.rglob("*.txt"))
if not txt_files: if not txt_files:
print(f"No .txt files found under {input_dir}") logger.info(f"No .txt files found under {input_dir}")
sys.exit(1) sys.exit(1)
# 7. read concurrency setting # 7. read concurrency setting
try: try:
max_conc = int(os.getenv("MAX_CONCURRENT", "0")) max_conc = int(os.getenv("MAX_CONCURRENT", "0"))
except ValueError: except ValueError:
print("Error: MAX_CONCURRENT must be an integer") raise ValueError("MAX_CONCURRENT must be an integer")
sys.exit(1)
if max_conc < 0: if max_conc < 0:
print("Error: MAX_CONCURRENT must be >= 0") raise ValueError("MAX_CONCURRENT must be >= 0")
sys.exit(1)
# 8. schedule tasks # 8. schedule tasks
if max_conc == 0: if max_conc == 0:
@ -459,7 +458,7 @@ async def main():
await handler.close() await handler.close()
print("All done.") logger.info("All done.")
if __name__ == "__main__": if __name__ == "__main__":