From ee8359491a20c0d5d661ea78eecccd0cd3da48c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruce=20R=C3=B6ttgers?= Date: Fri, 16 May 2025 23:30:56 +0200 Subject: [PATCH 1/5] add new csv handler --- src/output_handlers/csv.py | 74 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 src/output_handlers/csv.py diff --git a/src/output_handlers/csv.py b/src/output_handlers/csv.py new file mode 100644 index 0000000..f358386 --- /dev/null +++ b/src/output_handlers/csv.py @@ -0,0 +1,74 @@ +"""CSV output handler for writing entries to a single CSV file with ID and title.""" +import os +import aiofiles +import logging +from .base_handler import BaseHandler + + +class CsvHandler(BaseHandler): + """ + Handler for writing entries to a single CSV file. + This handler extracts only the ID and title (from properties.title) + rather than writing the entire JSON document. + """ + + def __init__( + self, + output_path: str, + fail_on_error: bool = True, + **kwargs + ): + """ + Initializes the CSVHandler. + + Args: + output_path (str): Path to the CSV file to write to. + fail_on_error (bool): If True, the handler will raise an exception on error. + **kwargs: Additional keyword arguments. + """ + super().__init__(fail_on_error=fail_on_error, **kwargs) + self.output_path = output_path + self.logger = logging.getLogger(__name__) + + # Create directory if it doesn't exist + os.makedirs(os.path.dirname(self.output_path), exist_ok=True) + + # Create file with header if it doesn't exist + with open(self.output_path, 'w', encoding='utf-8') as f: + f.write("id,title\n") + + async def _write_entry(self, entry: dict, uid: str) -> bool: + """ + Asynchronously writes a single entry to the CSV file. + + Args: + entry (dict): The entry to write. + uid (str): The unique identifier for the entry. + + Returns: + bool: True if the entry was written successfully, False otherwise. + """ + try: + # Extract title from properties.title + title = "" + if "properties" in entry and "title" in entry["properties"]: + title = entry["properties"]["title"] + + # Escape quotes in title + title = str(title).replace('"', '""') + + # Open file in append mode for each write + async with aiofiles.open(self.output_path, mode='a', encoding='utf-8') as file: + # Write the row + await file.write(f'"{uid}","{title}"\n') + + return True + except Exception as e: + self.logger.error(f"Error writing entry {uid} to CSV: {str(e)}") + return False + + async def close(self): + """ + Performs cleanup and logs statistics. + """ + await super().close() \ No newline at end of file From 22c7d48d3d51ca69969c06be456a55dd7c989e24 Mon Sep 17 00:00:00 2001 From: Remy Moll Date: Wed, 15 Oct 2025 18:55:29 +0200 Subject: [PATCH 2/5] refined handling of a single file object leveraging create and close methods --- src/output_handlers/csv.py | 66 ++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/src/output_handlers/csv.py b/src/output_handlers/csv.py index f358386..f61fafc 100644 --- a/src/output_handlers/csv.py +++ b/src/output_handlers/csv.py @@ -1,7 +1,6 @@ """CSV output handler for writing entries to a single CSV file with ID and title.""" -import os +from pathlib import Path import aiofiles -import logging from .base_handler import BaseHandler @@ -12,30 +11,38 @@ class CsvHandler(BaseHandler): rather than writing the entire JSON document. """ - def __init__( - self, - output_path: str, - fail_on_error: bool = True, - **kwargs - ): + file_writer: any # I believe aiofiles doesn't expose a type for this + + @classmethod + async def create( + cls, + output_path: Path, + **kwargs, + ) -> "CsvHandler": """ Initializes the CSVHandler. Args: output_path (str): Path to the CSV file to write to. - fail_on_error (bool): If True, the handler will raise an exception on error. **kwargs: Additional keyword arguments. """ - super().__init__(fail_on_error=fail_on_error, **kwargs) - self.output_path = output_path - self.logger = logging.getLogger(__name__) - - # Create directory if it doesn't exist - os.makedirs(os.path.dirname(self.output_path), exist_ok=True) - + obj = await super().create(**kwargs) + output_path = Path(output_path) + + # Create the containging directory if it doesn't exist + output_path.parent.mkdir(parents=True, exist_ok=True) + # Create file with header if it doesn't exist - with open(self.output_path, 'w', encoding='utf-8') as f: - f.write("id,title\n") + if not output_path.exists(): + async with aiofiles.open(output_path, mode='w', encoding='utf-8') as file: + await file.write('"id","title"\n') + + # open the file and keep it open for appending + obj.file_writer = await aiofiles.open(output_path, mode='a', encoding='utf-8') + # this has type + + return obj + async def _write_entry(self, entry: dict, uid: str) -> bool: """ @@ -44,31 +51,28 @@ class CsvHandler(BaseHandler): Args: entry (dict): The entry to write. uid (str): The unique identifier for the entry. - + Returns: bool: True if the entry was written successfully, False otherwise. """ try: # Extract title from properties.title - title = "" - if "properties" in entry and "title" in entry["properties"]: - title = entry["properties"]["title"] - + title = entry.get("properties", {}).get("title", "") + # Escape quotes in title title = str(title).replace('"', '""') - - # Open file in append mode for each write - async with aiofiles.open(self.output_path, mode='a', encoding='utf-8') as file: - # Write the row - await file.write(f'"{uid}","{title}"\n') - + + await self.file_writer.write(f'"{uid}","{title}"\n') + return True except Exception as e: - self.logger.error(f"Error writing entry {uid} to CSV: {str(e)}") + self.logger.exception(f"Error writing entry {uid} to CSV.") return False + async def close(self): """ Performs cleanup and logs statistics. """ - await super().close() \ No newline at end of file + await self.file_writer.close() + await super().close() From 6ae83a2ee423395ff154a2379566abc73c3aae82 Mon Sep 17 00:00:00 2001 From: Remy Moll Date: Tue, 21 Oct 2025 13:09:01 +0200 Subject: [PATCH 3/5] remove trailing comment --- src/output_handlers/csv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/output_handlers/csv.py b/src/output_handlers/csv.py index f61fafc..d73c0f2 100644 --- a/src/output_handlers/csv.py +++ b/src/output_handlers/csv.py @@ -39,7 +39,6 @@ class CsvHandler(BaseHandler): # open the file and keep it open for appending obj.file_writer = await aiofiles.open(output_path, mode='a', encoding='utf-8') - # this has type return obj From 07eaa62669727814a2bedc0749f952b7ea33cd44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Moll?= <36770313+moll-re@users.noreply.github.com> Date: Tue, 21 Oct 2025 13:23:53 +0200 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- src/output_handlers/csv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/output_handlers/csv.py b/src/output_handlers/csv.py index d73c0f2..ee42e0a 100644 --- a/src/output_handlers/csv.py +++ b/src/output_handlers/csv.py @@ -29,7 +29,7 @@ class CsvHandler(BaseHandler): obj = await super().create(**kwargs) output_path = Path(output_path) - # Create the containging directory if it doesn't exist + # Create the containing directory if it doesn't exist output_path.parent.mkdir(parents=True, exist_ok=True) # Create file with header if it doesn't exist @@ -64,7 +64,7 @@ class CsvHandler(BaseHandler): await self.file_writer.write(f'"{uid}","{title}"\n') return True - except Exception as e: + except Exception: self.logger.exception(f"Error writing entry {uid} to CSV.") return False From 275b1ee41fad82e9363ed267b32303ee06b5f908 Mon Sep 17 00:00:00 2001 From: Remy Moll Date: Tue, 21 Oct 2025 13:30:25 +0200 Subject: [PATCH 5/5] resolve more suggestions --- src/output_handlers/csv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/output_handlers/csv.py b/src/output_handlers/csv.py index ee42e0a..717b35b 100644 --- a/src/output_handlers/csv.py +++ b/src/output_handlers/csv.py @@ -11,7 +11,7 @@ class CsvHandler(BaseHandler): rather than writing the entire JSON document. """ - file_writer: any # I believe aiofiles doesn't expose a type for this + file_writer: object # I believe aiofiles doesn't expose a type for this @classmethod async def create( @@ -23,7 +23,7 @@ class CsvHandler(BaseHandler): Initializes the CSVHandler. Args: - output_path (str): Path to the CSV file to write to. + output_path (Path): Path to the CSV file to write to. **kwargs: Additional keyword arguments. """ obj = await super().create(**kwargs)