Merge pull request #14 from bcye/fix/consistent-logging

Uses logging statements instead of printing
This commit is contained in:
Bruce 2025-04-26 13:54:32 +02:00 committed by GitHub
commit e578473c9f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 14 additions and 10 deletions

View File

@ -11,6 +11,8 @@ class BaseHandler(ABC):
"""
logger = logging.getLogger(__name__)
_successful_writes = 0
_failed_writes = 0
def __init__(self, fail_on_error: bool = True, **kwargs):
"""
@ -48,8 +50,10 @@ class BaseHandler(ABC):
success = await self._write_entry(entry, uid)
if success:
self.logger.debug(f"Successfully wrote entry with UID {uid}")
self._successful_writes += 1
else:
self.logger.error(f"Failed to write entry with UID {uid}")
self._failed_writes += 1
if self.fail_on_error:
raise Exception(f"Failed to write entry with UID {uid}")
@ -58,4 +62,4 @@ class BaseHandler(ABC):
"""
Closes the handler. This method should be overridden by subclasses if they need to perform any cleanup operations.
"""
pass
self.logger.info(f"Wrote {self._successful_writes+self._failed_writes} entries: {self._successful_writes} successful, {self._failed_writes} failed.")

View File

@ -51,4 +51,5 @@ class BunnyStorageHandler(BaseHandler):
async def close(self):
await self._session.close()
await self._connector.close()
await self._connector.close()
await super().close()

View File

@ -5,9 +5,10 @@ import asyncio
import importlib
import logging
from dotenv import load_dotenv
from parser import WikivoyageParser
logger = logging.getLogger(__name__)
async def process_file(
input_file: Path,
handler,
@ -43,7 +44,7 @@ def gather_handler_kwargs(handler_name: str) -> dict:
elif val.lower() in ("true", "false"):
val = val.lower() == "true"
kwargs[param] = val
print(f"Handler kwargs: {kwargs}")
logger.debug(f"Handler kwargs: {kwargs}")
return kwargs
async def main():
@ -80,19 +81,17 @@ async def main():
txt_files = list(input_dir.rglob("*.txt"))
if not txt_files:
print(f"No .txt files found under {input_dir}")
logger.info(f"No .txt files found under {input_dir}")
sys.exit(1)
# 7. read concurrency setting
try:
max_conc = int(os.getenv("MAX_CONCURRENT", "0"))
except ValueError:
print("Error: MAX_CONCURRENT must be an integer")
sys.exit(1)
raise ValueError("MAX_CONCURRENT must be an integer")
if max_conc < 0:
print("Error: MAX_CONCURRENT must be >= 0")
sys.exit(1)
raise ValueError("MAX_CONCURRENT must be >= 0")
# 8. schedule tasks
if max_conc == 0:
@ -119,7 +118,7 @@ async def main():
await handler.close()
print("All done.")
logger.info("All done.")
if __name__ == "__main__":