mirror of
https://github.com/bcye/structured-wikivoyage-exports.git
synced 2025-06-07 16:34:04 +00:00
use logging throughout the code
This commit is contained in:
parent
c4913046a7
commit
db0f1a2b9c
@ -11,6 +11,8 @@ class BaseHandler(ABC):
|
||||
"""
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
_successful_writes = 0
|
||||
_failed_writes = 0
|
||||
|
||||
def __init__(self, fail_on_error: bool = True, **kwargs):
|
||||
"""
|
||||
@ -48,8 +50,10 @@ class BaseHandler(ABC):
|
||||
success = await self._write_entry(entry, uid)
|
||||
if success:
|
||||
self.logger.debug(f"Successfully wrote entry with UID {uid}")
|
||||
self._successful_writes += 1
|
||||
else:
|
||||
self.logger.error(f"Failed to write entry with UID {uid}")
|
||||
self._failed_writes += 1
|
||||
if self.fail_on_error:
|
||||
raise Exception(f"Failed to write entry with UID {uid}")
|
||||
|
||||
@ -58,4 +62,4 @@ class BaseHandler(ABC):
|
||||
"""
|
||||
Closes the handler. This method should be overridden by subclasses if they need to perform any cleanup operations.
|
||||
"""
|
||||
pass
|
||||
self.logger.info(f"Wrote {self._successful_writes+self._failed_writes} entries: {self._successful_writes} successful, {self._failed_writes} failed.")
|
||||
|
@ -51,4 +51,5 @@ class BunnyStorageHandler(BaseHandler):
|
||||
|
||||
async def close(self):
|
||||
await self._session.close()
|
||||
await self._connector.close()
|
||||
await self._connector.close()
|
||||
await super().close()
|
||||
|
@ -12,6 +12,7 @@ import logging
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
class WikivoyageParser:
|
||||
def __init__(self):
|
||||
self.document_templates = [
|
||||
@ -382,11 +383,11 @@ def gather_handler_kwargs(handler_name: str) -> dict:
|
||||
elif val.lower() in ("true", "false"):
|
||||
val = val.lower() == "true"
|
||||
kwargs[param] = val
|
||||
print(f"Handler kwargs: {kwargs}")
|
||||
logger.debug(f"Handler kwargs: {kwargs}")
|
||||
return kwargs
|
||||
|
||||
async def main():
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
# 1. Which handler to load?
|
||||
handler_name = os.getenv("HANDLER")
|
||||
@ -420,19 +421,17 @@ async def main():
|
||||
txt_files = list(input_dir.rglob("*.txt"))
|
||||
|
||||
if not txt_files:
|
||||
print(f"No .txt files found under {input_dir}")
|
||||
logger.info(f"No .txt files found under {input_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
# 7. read concurrency setting
|
||||
try:
|
||||
max_conc = int(os.getenv("MAX_CONCURRENT", "0"))
|
||||
except ValueError:
|
||||
print("Error: MAX_CONCURRENT must be an integer")
|
||||
sys.exit(1)
|
||||
raise ValueError("MAX_CONCURRENT must be an integer")
|
||||
|
||||
if max_conc < 0:
|
||||
print("Error: MAX_CONCURRENT must be >= 0")
|
||||
sys.exit(1)
|
||||
raise ValueError("MAX_CONCURRENT must be >= 0")
|
||||
|
||||
# 8. schedule tasks
|
||||
if max_conc == 0:
|
||||
@ -459,7 +458,7 @@ async def main():
|
||||
await handler.close()
|
||||
|
||||
|
||||
print("All done.")
|
||||
logger.info("All done.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Loading…
x
Reference in New Issue
Block a user