Merge pull request #14 from bcye/fix/consistent-logging

Uses logging statements instead of printing
This commit is contained in:
Bruce 2025-04-26 13:54:32 +02:00 committed by GitHub
commit e578473c9f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 14 additions and 10 deletions

View File

@ -11,6 +11,8 @@ class BaseHandler(ABC):
""" """
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
_successful_writes = 0
_failed_writes = 0
def __init__(self, fail_on_error: bool = True, **kwargs): def __init__(self, fail_on_error: bool = True, **kwargs):
""" """
@ -48,8 +50,10 @@ class BaseHandler(ABC):
success = await self._write_entry(entry, uid) success = await self._write_entry(entry, uid)
if success: if success:
self.logger.debug(f"Successfully wrote entry with UID {uid}") self.logger.debug(f"Successfully wrote entry with UID {uid}")
self._successful_writes += 1
else: else:
self.logger.error(f"Failed to write entry with UID {uid}") self.logger.error(f"Failed to write entry with UID {uid}")
self._failed_writes += 1
if self.fail_on_error: if self.fail_on_error:
raise Exception(f"Failed to write entry with UID {uid}") raise Exception(f"Failed to write entry with UID {uid}")
@ -58,4 +62,4 @@ class BaseHandler(ABC):
""" """
Closes the handler. This method should be overridden by subclasses if they need to perform any cleanup operations. Closes the handler. This method should be overridden by subclasses if they need to perform any cleanup operations.
""" """
pass self.logger.info(f"Wrote {self._successful_writes+self._failed_writes} entries: {self._successful_writes} successful, {self._failed_writes} failed.")

View File

@ -52,3 +52,4 @@ class BunnyStorageHandler(BaseHandler):
async def close(self): async def close(self):
await self._session.close() await self._session.close()
await self._connector.close() await self._connector.close()
await super().close()

View File

@ -5,9 +5,10 @@ import asyncio
import importlib import importlib
import logging import logging
from dotenv import load_dotenv from dotenv import load_dotenv
from parser import WikivoyageParser from parser import WikivoyageParser
logger = logging.getLogger(__name__)
async def process_file( async def process_file(
input_file: Path, input_file: Path,
handler, handler,
@ -43,7 +44,7 @@ def gather_handler_kwargs(handler_name: str) -> dict:
elif val.lower() in ("true", "false"): elif val.lower() in ("true", "false"):
val = val.lower() == "true" val = val.lower() == "true"
kwargs[param] = val kwargs[param] = val
print(f"Handler kwargs: {kwargs}") logger.debug(f"Handler kwargs: {kwargs}")
return kwargs return kwargs
async def main(): async def main():
@ -80,19 +81,17 @@ async def main():
txt_files = list(input_dir.rglob("*.txt")) txt_files = list(input_dir.rglob("*.txt"))
if not txt_files: if not txt_files:
print(f"No .txt files found under {input_dir}") logger.info(f"No .txt files found under {input_dir}")
sys.exit(1) sys.exit(1)
# 7. read concurrency setting # 7. read concurrency setting
try: try:
max_conc = int(os.getenv("MAX_CONCURRENT", "0")) max_conc = int(os.getenv("MAX_CONCURRENT", "0"))
except ValueError: except ValueError:
print("Error: MAX_CONCURRENT must be an integer") raise ValueError("MAX_CONCURRENT must be an integer")
sys.exit(1)
if max_conc < 0: if max_conc < 0:
print("Error: MAX_CONCURRENT must be >= 0") raise ValueError("MAX_CONCURRENT must be >= 0")
sys.exit(1)
# 8. schedule tasks # 8. schedule tasks
if max_conc == 0: if max_conc == 0:
@ -119,7 +118,7 @@ async def main():
await handler.close() await handler.close()
print("All done.") logger.info("All done.")
if __name__ == "__main__": if __name__ == "__main__":