support multiple handlers

This commit is contained in:
Bruce Röttgers
2025-05-16 20:27:59 +02:00
parent f73046bd65
commit 60c13fb9ec
2 changed files with 48 additions and 35 deletions

View File

@@ -9,13 +9,14 @@ class WikiDumpHandler(xml.sax.ContentHandler):
"""
SAX handler that, for each <page> whose <id> is in mappings,
collects the <text> and schedules an async task to parse
and write via the usersupplied handler.
and write via the usersupplied handler(s).
"""
def __init__(self, mappings, handler, max_concurrent):
def __init__(self, mappings, handlers, max_concurrent):
super().__init__()
self.mappings = mappings
self.handler = handler
# Support a single handler or a list of handlers
self.handlers = handlers
self.sem = (
asyncio.Semaphore(max_concurrent) if max_concurrent > 0 else None
)
@@ -98,7 +99,11 @@ class WikiDumpHandler(xml.sax.ContentHandler):
parser = WikivoyageParser()
entry = parser.parse(text)
entry['properties']['title'] = title
await self.handler.write_entry(entry, uid)
# Write to all handlers concurrently
await asyncio.gather(*[
handler.write_entry(entry, uid) for handler in self.handlers
])
async def _bounded_process(self, text: str, uid: str, title: str):
# Only run N at once