move code to dedicated src/ folder

This commit is contained in:
2025-05-13 16:51:53 +02:00
parent 38901474c6
commit 10fbef63b3
11 changed files with 98 additions and 2 deletions

View File

@@ -0,0 +1,3 @@
from .base_handler import BaseHandler
from .filesystem import FilesystemHandler
from .bunny_storage import BunnyStorageHandler

View File

@@ -0,0 +1,65 @@
"""Reference handler for output handlers."""
from abc import ABC, abstractmethod
import logging
class BaseHandler(ABC):
"""
Abstract base class for output handlers. Defines the standardized interface that all output handlers must implement.
In particular, it requires the implementation of an asynchronous ("private") method `_write_entry` to write a single entry to the output.
"""
logger = logging.getLogger(__name__)
_successful_writes = 0
_failed_writes = 0
def __init__(self, fail_on_error: bool = True, **kwargs):
"""
Initializes the BaseHandler with optional parameters.
Args:
fail_on_error (bool): If True, the handler will raise an exception on error. Defaults to True.
**kwargs: Additional keyword arguments for specific handler implementations.
"""
self.fail_on_error = fail_on_error
@abstractmethod
async def _write_entry(self, entry: dict, uid: str) -> bool:
"""
Asynchronously writes a single entry to the output. This method should gracefully handle any exceptions that may occur during the writing process and simply return False if an error occurs.
Args:
entry (dict): The entry to write (will be JSON-encoded).
uid (str): The unique identifier for the entry. The default id provided by wikivoyage is recommended.
Returns:
bool: True if the entry was written successfully, False otherwise.
"""
pass
async def write_entry(self, entry: dict, uid: str):
"""
Public method to write an entry to the output. It handles exceptions and logs errors.
Args:
entry (dict): The entry to write (will be JSON-encoded).
uid (str): The unique identifier for the entry. The default id provided by wikivoyage is recommended.
"""
success = await self._write_entry(entry, uid)
if success:
self.logger.debug(f"Successfully wrote entry with UID {uid}")
self._successful_writes += 1
else:
self.logger.error(f"Failed to write entry with UID {uid}")
self._failed_writes += 1
if self.fail_on_error:
raise Exception(f"Failed to write entry with UID {uid}")
async def close(self):
"""
Closes the handler. This method should be overridden by subclasses if they need to perform any cleanup operations.
"""
self.logger.info(f"Wrote {self._successful_writes+self._failed_writes} entries: {self._successful_writes} successful, {self._failed_writes} failed.")

View File

@@ -0,0 +1,55 @@
import json
import aiohttp
from .base_handler import BaseHandler
class BunnyStorageHandler(BaseHandler):
def __init__(
self,
region: str,
base_path: str,
api_key: str,
fail_on_error: bool = True,
keepalive_timeout: int = 75,
):
super().__init__(fail_on_error=fail_on_error)
self.base_url = f"https://{region}.bunnycdn.com/{base_path}"
self.headers = {
"AccessKey": api_key,
"Content-Type": "application/json",
"accept": "application/json",
}
# initialized later, in a guaranteed async context
self._connector = None
self._session = None
self._keepalive_timeout = keepalive_timeout
async def setup_connector(self):
if self._session is None:
self._connector = aiohttp.TCPConnector(
# limit is implicitly set to 100
keepalive_timeout = self._keepalive_timeout,
)
self._session = aiohttp.ClientSession(connector=self._connector)
async def _write_entry(self, entry: dict, uid: str) -> bool:
await self.setup_connector()
payload = json.dumps(entry).encode("utf-8")
url = f"{self.base_url}/{uid}.json"
try:
async with self._session.put(url, data=payload, headers=self.headers) as resp:
if resp.status in (200, 201, 204):
return True
body = await resp.text()
self.logger.error(f"Upload failed UID={uid} status={resp.status} body={body}")
return False
except Exception:
self.logger.exception(f"Exception while uploading UID={uid}")
return False
async def close(self):
await self._session.close()
await self._connector.close()
await super().close()

View File

@@ -0,0 +1,44 @@
"""Handler that writes files to the filesystem."""
from pathlib import Path
import aiofiles
from .base_handler import BaseHandler
import json
class FilesystemHandler(BaseHandler):
"""
Handler that writes files to the filesystem.
"""
def __init__(self, output_dir: str, **kwargs):
"""
Initializes the FileSystemHandler with the specified output directory.
Args:
output_dir (str): The directory where files will be written.
**kwargs: Additional keyword arguments for the BaseHandler.
"""
super().__init__(**kwargs)
self.output_dir = Path(output_dir)
# Ensure the target directory exists
self.output_dir.mkdir(parents=True, exist_ok=True)
self.logger.info(f"Output directory set to {self.output_dir}")
async def _write_entry(self, entry: dict, uid: str) -> bool:
"""
Asynchronously writes a single entry to the filesystem.
Args:
entry (dict): The entry to write (will be JSON-encoded).
uid (str): The unique identifier for the entry.
Returns:
bool: True if the entry was written successfully, False otherwise.
"""
try:
file_path = self.output_dir / f"{uid}.json"
async with aiofiles.open(file_path, 'w') as f:
await f.write(json.dumps(entry))
return True
except IOError as e:
self.logger.error(f"Error writing entry {uid}: {e}")
return False