From db0f1a2b9c10717d9333a476ea74c62a89f0e2f4 Mon Sep 17 00:00:00 2001 From: Remy Moll Date: Wed, 23 Apr 2025 14:33:49 +0200 Subject: [PATCH] use logging throughout the code --- output_handlers/base_handler.py | 6 +++++- output_handlers/bunny_storage.py | 3 ++- transform-documents.py | 15 +++++++-------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/output_handlers/base_handler.py b/output_handlers/base_handler.py index 67d4886..bd8f20b 100644 --- a/output_handlers/base_handler.py +++ b/output_handlers/base_handler.py @@ -11,6 +11,8 @@ class BaseHandler(ABC): """ logger = logging.getLogger(__name__) + _successful_writes = 0 + _failed_writes = 0 def __init__(self, fail_on_error: bool = True, **kwargs): """ @@ -48,8 +50,10 @@ class BaseHandler(ABC): success = await self._write_entry(entry, uid) if success: self.logger.debug(f"Successfully wrote entry with UID {uid}") + self._successful_writes += 1 else: self.logger.error(f"Failed to write entry with UID {uid}") + self._failed_writes += 1 if self.fail_on_error: raise Exception(f"Failed to write entry with UID {uid}") @@ -58,4 +62,4 @@ class BaseHandler(ABC): """ Closes the handler. This method should be overridden by subclasses if they need to perform any cleanup operations. """ - pass + self.logger.info(f"Wrote {self._successful_writes+self._failed_writes} entries: {self._successful_writes} successful, {self._failed_writes} failed.") diff --git a/output_handlers/bunny_storage.py b/output_handlers/bunny_storage.py index 553fbbb..5958d9f 100644 --- a/output_handlers/bunny_storage.py +++ b/output_handlers/bunny_storage.py @@ -51,4 +51,5 @@ class BunnyStorageHandler(BaseHandler): async def close(self): await self._session.close() - await self._connector.close() \ No newline at end of file + await self._connector.close() + await super().close() diff --git a/transform-documents.py b/transform-documents.py index 1a5a9bc..f345803 100644 --- a/transform-documents.py +++ b/transform-documents.py @@ -12,6 +12,7 @@ import logging from dotenv import load_dotenv load_dotenv() +logger = logging.getLogger(__name__) class WikivoyageParser: def __init__(self): self.document_templates = [ @@ -382,11 +383,11 @@ def gather_handler_kwargs(handler_name: str) -> dict: elif val.lower() in ("true", "false"): val = val.lower() == "true" kwargs[param] = val - print(f"Handler kwargs: {kwargs}") + logger.debug(f"Handler kwargs: {kwargs}") return kwargs async def main(): - logging.basicConfig(level=logging.DEBUG) + logging.basicConfig(level=logging.INFO) # 1. Which handler to load? handler_name = os.getenv("HANDLER") @@ -420,19 +421,17 @@ async def main(): txt_files = list(input_dir.rglob("*.txt")) if not txt_files: - print(f"No .txt files found under {input_dir}") + logger.info(f"No .txt files found under {input_dir}") sys.exit(1) # 7. read concurrency setting try: max_conc = int(os.getenv("MAX_CONCURRENT", "0")) except ValueError: - print("Error: MAX_CONCURRENT must be an integer") - sys.exit(1) + raise ValueError("MAX_CONCURRENT must be an integer") if max_conc < 0: - print("Error: MAX_CONCURRENT must be >= 0") - sys.exit(1) + raise ValueError("MAX_CONCURRENT must be >= 0") # 8. schedule tasks if max_conc == 0: @@ -459,7 +458,7 @@ async def main(): await handler.close() - print("All done.") + logger.info("All done.") if __name__ == "__main__":