use logging throughout the code

This commit is contained in:
Remy Moll 2025-04-23 14:33:49 +02:00
parent c4913046a7
commit db0f1a2b9c
3 changed files with 14 additions and 10 deletions

View File

@ -11,6 +11,8 @@ class BaseHandler(ABC):
"""
logger = logging.getLogger(__name__)
_successful_writes = 0
_failed_writes = 0
def __init__(self, fail_on_error: bool = True, **kwargs):
"""
@ -48,8 +50,10 @@ class BaseHandler(ABC):
success = await self._write_entry(entry, uid)
if success:
self.logger.debug(f"Successfully wrote entry with UID {uid}")
self._successful_writes += 1
else:
self.logger.error(f"Failed to write entry with UID {uid}")
self._failed_writes += 1
if self.fail_on_error:
raise Exception(f"Failed to write entry with UID {uid}")
@ -58,4 +62,4 @@ class BaseHandler(ABC):
"""
Closes the handler. This method should be overridden by subclasses if they need to perform any cleanup operations.
"""
pass
self.logger.info(f"Wrote {self._successful_writes+self._failed_writes} entries: {self._successful_writes} successful, {self._failed_writes} failed.")

View File

@ -52,3 +52,4 @@ class BunnyStorageHandler(BaseHandler):
async def close(self):
await self._session.close()
await self._connector.close()
await super().close()

View File

@ -12,6 +12,7 @@ import logging
from dotenv import load_dotenv
load_dotenv()
logger = logging.getLogger(__name__)
class WikivoyageParser:
def __init__(self):
self.document_templates = [
@ -382,11 +383,11 @@ def gather_handler_kwargs(handler_name: str) -> dict:
elif val.lower() in ("true", "false"):
val = val.lower() == "true"
kwargs[param] = val
print(f"Handler kwargs: {kwargs}")
logger.debug(f"Handler kwargs: {kwargs}")
return kwargs
async def main():
logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.INFO)
# 1. Which handler to load?
handler_name = os.getenv("HANDLER")
@ -420,19 +421,17 @@ async def main():
txt_files = list(input_dir.rglob("*.txt"))
if not txt_files:
print(f"No .txt files found under {input_dir}")
logger.info(f"No .txt files found under {input_dir}")
sys.exit(1)
# 7. read concurrency setting
try:
max_conc = int(os.getenv("MAX_CONCURRENT", "0"))
except ValueError:
print("Error: MAX_CONCURRENT must be an integer")
sys.exit(1)
raise ValueError("MAX_CONCURRENT must be an integer")
if max_conc < 0:
print("Error: MAX_CONCURRENT must be >= 0")
sys.exit(1)
raise ValueError("MAX_CONCURRENT must be >= 0")
# 8. schedule tasks
if max_conc == 0:
@ -459,7 +458,7 @@ async def main():
await handler.close()
print("All done.")
logger.info("All done.")
if __name__ == "__main__":