From 3bfd30a073c33fe1056ee0fea013e59a2ba65755 Mon Sep 17 00:00:00 2001 From: Remy Moll Date: Sat, 12 Apr 2025 01:09:11 +0200 Subject: [PATCH] definition of abstract handler and implementation of handler for local filesystem --- output_handlers/__init__.py | 2 ++ output_handlers/base_handler.py | 54 ++++++++++++++++++++++++++++ output_handlers/filesystm_handler.py | 43 ++++++++++++++++++++++ pyproject.toml | 1 + uv.lock | 12 +++++++ 5 files changed, 112 insertions(+) create mode 100644 output_handlers/__init__.py create mode 100644 output_handlers/base_handler.py create mode 100644 output_handlers/filesystm_handler.py diff --git a/output_handlers/__init__.py b/output_handlers/__init__.py new file mode 100644 index 0000000..66ca110 --- /dev/null +++ b/output_handlers/__init__.py @@ -0,0 +1,2 @@ +from .base_handler import BaseHandler +from .filesystm_handler import FileSystemHandler \ No newline at end of file diff --git a/output_handlers/base_handler.py b/output_handlers/base_handler.py new file mode 100644 index 0000000..3574dac --- /dev/null +++ b/output_handlers/base_handler.py @@ -0,0 +1,54 @@ +"""Reference handler for output handlers.""" +from abc import ABC, abstractmethod +import logging + + + +class BaseHandler(ABC): + """ + Abstract base class for output handlers. Defines the standardized interface that all output handlers must implement. + In particular, it requires the implementation of an asynchronous ("private") method `_write_entry` to write a single entry to the output. + """ + + logger = logging.getLogger(__name__) + + def __init__(self, fail_on_error: bool = True, **kwargs): + """ + Initializes the BaseHandler with optional parameters. + + Args: + fail_on_error (bool): If True, the handler will raise an exception on error. Defaults to True. + **kwargs: Additional keyword arguments for specific handler implementations. + """ + self.fail_on_error = fail_on_error + + + @abstractmethod + async def _write_entry(self, entry: dict, uid: str) -> bool: + """ + Asynchronously writes a single entry to the output. This method should gracefully handle any exceptions that may occur during the writing process and simply return False if an error occurs. + + Args: + entry (dict): The entry to write (will be JSON-encoded). + uid (str): The unique identifier for the entry. The default id provided by wikivoyage is recommended. + Returns: + bool: True if the entry was written successfully, False otherwise. + """ + pass + + + async def write_entry(self, entry: dict, uid: str): + """ + Public method to write an entry to the output. It handles exceptions and logs errors. + + Args: + entry (dict): The entry to write (will be JSON-encoded). + uid (str): The unique identifier for the entry. The default id provided by wikivoyage is recommended. + """ + success = await self._write_entry(entry, uid) + if success: + self.logger.debug(f"Successfully wrote entry with UID {uid}") + else: + self.logger.error(f"Failed to write entry with UID {uid}") + if self.fail_on_error: + raise Exception(f"Failed to write entry with UID {uid}") diff --git a/output_handlers/filesystm_handler.py b/output_handlers/filesystm_handler.py new file mode 100644 index 0000000..2adeeb7 --- /dev/null +++ b/output_handlers/filesystm_handler.py @@ -0,0 +1,43 @@ +"""Handler that writes files to the filesystem.""" +from pathlib import Path +import aiofiles +from .base_handler import BaseHandler + +class FileSystemHandler(BaseHandler): + """ + Handler that writes files to the filesystem. + """ + def __init__(self, output_dir: str, **kwargs): + """ + Initializes the FileSystemHandler with the specified output directory. + + Args: + output_dir (str): The directory where files will be written. + **kwargs: Additional keyword arguments for the BaseHandler. + """ + super().__init__(**kwargs) + self.output_dir = Path(output_dir) + # Ensure the target directory exists + self.output_dir.mkdir(parents=True, exist_ok=True) + self.logger.info(f"Output directory set to {self.output_dir}") + + + async def _write_entry(self, entry: dict, uid: str) -> bool: + """ + Asynchronously writes a single entry to the filesystem. + + Args: + entry (dict): The entry to write (will be JSON-encoded). + uid (str): The unique identifier for the entry. + + Returns: + bool: True if the entry was written successfully, False otherwise. + """ + try: + file_path = self.output_dir / f"{uid}.json" + async with aiofiles.open(file_path, 'w') as f: + await f.write(entry) + return True + except IOError as e: + self.logger.error(f"Error writing entry {uid}: {e}") + return False diff --git a/pyproject.toml b/pyproject.toml index b347f6c..c9589f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,7 @@ description = "Add your description here" readme = "README.md" requires-python = ">=3.12" dependencies = [ + "aiofiles>=24.1.0", "mwparserfromhell>=0.6.6", "wikitextparser>=0.56.3", ] diff --git a/uv.lock b/uv.lock index 6d3f2ac..011f9a5 100644 --- a/uv.lock +++ b/uv.lock @@ -1,17 +1,29 @@ version = 1 +revision = 1 requires-python = ">=3.12" +[[package]] +name = "aiofiles" +version = "24.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/03/a88171e277e8caa88a4c77808c20ebb04ba74cc4681bf1e9416c862de237/aiofiles-24.1.0.tar.gz", hash = "sha256:22a075c9e5a3810f0c2e48f3008c94d68c65d763b9b03857924c99e57355166c", size = 30247 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/45/30bb92d442636f570cb5651bc661f52b610e2eec3f891a5dc3a4c3667db0/aiofiles-24.1.0-py3-none-any.whl", hash = "sha256:b4ec55f4195e3eb5d7abd1bf7e061763e864dd4954231fb8539a0ef8bb8260e5", size = 15896 }, +] + [[package]] name = "mapvoyage-extract" version = "0.1.0" source = { virtual = "." } dependencies = [ + { name = "aiofiles" }, { name = "mwparserfromhell" }, { name = "wikitextparser" }, ] [package.metadata] requires-dist = [ + { name = "aiofiles", specifier = ">=24.1.0" }, { name = "mwparserfromhell", specifier = ">=0.6.6" }, { name = "wikitextparser", specifier = ">=0.56.3" }, ]