mirror of
https://github.com/bcye/structured-wikivoyage-exports.git
synced 2025-11-02 08:02:44 +00:00
move code to dedicated src/ folder
This commit is contained in:
3
src/output_handlers/__init__.py
Normal file
3
src/output_handlers/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .base_handler import BaseHandler
|
||||
from .filesystem import FilesystemHandler
|
||||
from .bunny_storage import BunnyStorageHandler
|
||||
65
src/output_handlers/base_handler.py
Normal file
65
src/output_handlers/base_handler.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""Reference handler for output handlers."""
|
||||
from abc import ABC, abstractmethod
|
||||
import logging
|
||||
|
||||
|
||||
|
||||
class BaseHandler(ABC):
|
||||
"""
|
||||
Abstract base class for output handlers. Defines the standardized interface that all output handlers must implement.
|
||||
In particular, it requires the implementation of an asynchronous ("private") method `_write_entry` to write a single entry to the output.
|
||||
"""
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
_successful_writes = 0
|
||||
_failed_writes = 0
|
||||
|
||||
def __init__(self, fail_on_error: bool = True, **kwargs):
|
||||
"""
|
||||
Initializes the BaseHandler with optional parameters.
|
||||
|
||||
Args:
|
||||
fail_on_error (bool): If True, the handler will raise an exception on error. Defaults to True.
|
||||
**kwargs: Additional keyword arguments for specific handler implementations.
|
||||
"""
|
||||
self.fail_on_error = fail_on_error
|
||||
|
||||
|
||||
@abstractmethod
|
||||
async def _write_entry(self, entry: dict, uid: str) -> bool:
|
||||
"""
|
||||
Asynchronously writes a single entry to the output. This method should gracefully handle any exceptions that may occur during the writing process and simply return False if an error occurs.
|
||||
|
||||
Args:
|
||||
entry (dict): The entry to write (will be JSON-encoded).
|
||||
uid (str): The unique identifier for the entry. The default id provided by wikivoyage is recommended.
|
||||
Returns:
|
||||
bool: True if the entry was written successfully, False otherwise.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
async def write_entry(self, entry: dict, uid: str):
|
||||
"""
|
||||
Public method to write an entry to the output. It handles exceptions and logs errors.
|
||||
|
||||
Args:
|
||||
entry (dict): The entry to write (will be JSON-encoded).
|
||||
uid (str): The unique identifier for the entry. The default id provided by wikivoyage is recommended.
|
||||
"""
|
||||
success = await self._write_entry(entry, uid)
|
||||
if success:
|
||||
self.logger.debug(f"Successfully wrote entry with UID {uid}")
|
||||
self._successful_writes += 1
|
||||
else:
|
||||
self.logger.error(f"Failed to write entry with UID {uid}")
|
||||
self._failed_writes += 1
|
||||
if self.fail_on_error:
|
||||
raise Exception(f"Failed to write entry with UID {uid}")
|
||||
|
||||
|
||||
async def close(self):
|
||||
"""
|
||||
Closes the handler. This method should be overridden by subclasses if they need to perform any cleanup operations.
|
||||
"""
|
||||
self.logger.info(f"Wrote {self._successful_writes+self._failed_writes} entries: {self._successful_writes} successful, {self._failed_writes} failed.")
|
||||
55
src/output_handlers/bunny_storage.py
Normal file
55
src/output_handlers/bunny_storage.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import json
|
||||
import aiohttp
|
||||
from .base_handler import BaseHandler
|
||||
|
||||
class BunnyStorageHandler(BaseHandler):
|
||||
def __init__(
|
||||
self,
|
||||
region: str,
|
||||
base_path: str,
|
||||
api_key: str,
|
||||
fail_on_error: bool = True,
|
||||
keepalive_timeout: int = 75,
|
||||
):
|
||||
super().__init__(fail_on_error=fail_on_error)
|
||||
self.base_url = f"https://{region}.bunnycdn.com/{base_path}"
|
||||
self.headers = {
|
||||
"AccessKey": api_key,
|
||||
"Content-Type": "application/json",
|
||||
"accept": "application/json",
|
||||
}
|
||||
|
||||
# initialized later, in a guaranteed async context
|
||||
self._connector = None
|
||||
self._session = None
|
||||
self._keepalive_timeout = keepalive_timeout
|
||||
|
||||
async def setup_connector(self):
|
||||
if self._session is None:
|
||||
self._connector = aiohttp.TCPConnector(
|
||||
# limit is implicitly set to 100
|
||||
keepalive_timeout = self._keepalive_timeout,
|
||||
)
|
||||
self._session = aiohttp.ClientSession(connector=self._connector)
|
||||
|
||||
async def _write_entry(self, entry: dict, uid: str) -> bool:
|
||||
await self.setup_connector()
|
||||
payload = json.dumps(entry).encode("utf-8")
|
||||
url = f"{self.base_url}/{uid}.json"
|
||||
|
||||
try:
|
||||
async with self._session.put(url, data=payload, headers=self.headers) as resp:
|
||||
if resp.status in (200, 201, 204):
|
||||
return True
|
||||
body = await resp.text()
|
||||
self.logger.error(f"Upload failed UID={uid} status={resp.status} body={body}")
|
||||
return False
|
||||
|
||||
except Exception:
|
||||
self.logger.exception(f"Exception while uploading UID={uid}")
|
||||
return False
|
||||
|
||||
async def close(self):
|
||||
await self._session.close()
|
||||
await self._connector.close()
|
||||
await super().close()
|
||||
44
src/output_handlers/filesystem.py
Normal file
44
src/output_handlers/filesystem.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""Handler that writes files to the filesystem."""
|
||||
from pathlib import Path
|
||||
import aiofiles
|
||||
from .base_handler import BaseHandler
|
||||
import json
|
||||
|
||||
class FilesystemHandler(BaseHandler):
|
||||
"""
|
||||
Handler that writes files to the filesystem.
|
||||
"""
|
||||
def __init__(self, output_dir: str, **kwargs):
|
||||
"""
|
||||
Initializes the FileSystemHandler with the specified output directory.
|
||||
|
||||
Args:
|
||||
output_dir (str): The directory where files will be written.
|
||||
**kwargs: Additional keyword arguments for the BaseHandler.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.output_dir = Path(output_dir)
|
||||
# Ensure the target directory exists
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.logger.info(f"Output directory set to {self.output_dir}")
|
||||
|
||||
|
||||
async def _write_entry(self, entry: dict, uid: str) -> bool:
|
||||
"""
|
||||
Asynchronously writes a single entry to the filesystem.
|
||||
|
||||
Args:
|
||||
entry (dict): The entry to write (will be JSON-encoded).
|
||||
uid (str): The unique identifier for the entry.
|
||||
|
||||
Returns:
|
||||
bool: True if the entry was written successfully, False otherwise.
|
||||
"""
|
||||
try:
|
||||
file_path = self.output_dir / f"{uid}.json"
|
||||
async with aiofiles.open(file_path, 'w') as f:
|
||||
await f.write(json.dumps(entry))
|
||||
return True
|
||||
except IOError as e:
|
||||
self.logger.error(f"Error writing entry {uid}: {e}")
|
||||
return False
|
||||
Reference in New Issue
Block a user