diff --git a/output_handlers/base_handler.py b/output_handlers/base_handler.py index 67d4886..bd8f20b 100644 --- a/output_handlers/base_handler.py +++ b/output_handlers/base_handler.py @@ -11,6 +11,8 @@ class BaseHandler(ABC): """ logger = logging.getLogger(__name__) + _successful_writes = 0 + _failed_writes = 0 def __init__(self, fail_on_error: bool = True, **kwargs): """ @@ -48,8 +50,10 @@ class BaseHandler(ABC): success = await self._write_entry(entry, uid) if success: self.logger.debug(f"Successfully wrote entry with UID {uid}") + self._successful_writes += 1 else: self.logger.error(f"Failed to write entry with UID {uid}") + self._failed_writes += 1 if self.fail_on_error: raise Exception(f"Failed to write entry with UID {uid}") @@ -58,4 +62,4 @@ class BaseHandler(ABC): """ Closes the handler. This method should be overridden by subclasses if they need to perform any cleanup operations. """ - pass + self.logger.info(f"Wrote {self._successful_writes+self._failed_writes} entries: {self._successful_writes} successful, {self._failed_writes} failed.") diff --git a/output_handlers/bunny_storage.py b/output_handlers/bunny_storage.py index 553fbbb..5958d9f 100644 --- a/output_handlers/bunny_storage.py +++ b/output_handlers/bunny_storage.py @@ -51,4 +51,5 @@ class BunnyStorageHandler(BaseHandler): async def close(self): await self._session.close() - await self._connector.close() \ No newline at end of file + await self._connector.close() + await super().close() diff --git a/transform-documents.py b/transform-documents.py index 0a7508f..758c227 100644 --- a/transform-documents.py +++ b/transform-documents.py @@ -5,9 +5,10 @@ import asyncio import importlib import logging from dotenv import load_dotenv - from parser import WikivoyageParser +logger = logging.getLogger(__name__) + async def process_file( input_file: Path, handler, @@ -43,7 +44,7 @@ def gather_handler_kwargs(handler_name: str) -> dict: elif val.lower() in ("true", "false"): val = val.lower() == "true" kwargs[param] = val - print(f"Handler kwargs: {kwargs}") + logger.debug(f"Handler kwargs: {kwargs}") return kwargs async def main(): @@ -80,19 +81,17 @@ async def main(): txt_files = list(input_dir.rglob("*.txt")) if not txt_files: - print(f"No .txt files found under {input_dir}") + logger.info(f"No .txt files found under {input_dir}") sys.exit(1) # 7. read concurrency setting try: max_conc = int(os.getenv("MAX_CONCURRENT", "0")) except ValueError: - print("Error: MAX_CONCURRENT must be an integer") - sys.exit(1) + raise ValueError("MAX_CONCURRENT must be an integer") if max_conc < 0: - print("Error: MAX_CONCURRENT must be >= 0") - sys.exit(1) + raise ValueError("MAX_CONCURRENT must be >= 0") # 8. schedule tasks if max_conc == 0: @@ -119,7 +118,7 @@ async def main(): await handler.close() - print("All done.") + logger.info("All done.") if __name__ == "__main__":