mirror of
https://github.com/bcye/structured-wikivoyage-exports.git
synced 2025-11-03 08:32:43 +00:00
support multiple handlers
This commit is contained in:
@@ -9,13 +9,14 @@ class WikiDumpHandler(xml.sax.ContentHandler):
|
||||
"""
|
||||
SAX handler that, for each <page> whose <id> is in mappings,
|
||||
collects the <text> and schedules an async task to parse
|
||||
and write via the user‐supplied handler.
|
||||
and write via the user‐supplied handler(s).
|
||||
"""
|
||||
|
||||
def __init__(self, mappings, handler, max_concurrent):
|
||||
def __init__(self, mappings, handlers, max_concurrent):
|
||||
super().__init__()
|
||||
self.mappings = mappings
|
||||
self.handler = handler
|
||||
# Support a single handler or a list of handlers
|
||||
self.handlers = handlers
|
||||
self.sem = (
|
||||
asyncio.Semaphore(max_concurrent) if max_concurrent > 0 else None
|
||||
)
|
||||
@@ -98,7 +99,11 @@ class WikiDumpHandler(xml.sax.ContentHandler):
|
||||
parser = WikivoyageParser()
|
||||
entry = parser.parse(text)
|
||||
entry['properties']['title'] = title
|
||||
await self.handler.write_entry(entry, uid)
|
||||
|
||||
# Write to all handlers concurrently
|
||||
await asyncio.gather(*[
|
||||
handler.write_entry(entry, uid) for handler in self.handlers
|
||||
])
|
||||
|
||||
async def _bounded_process(self, text: str, uid: str, title: str):
|
||||
# Only run N at once
|
||||
|
||||
Reference in New Issue
Block a user