import httpx import json base_url = "https://en.wikipedia.org/w/api.php" def best_page_match(title) -> int: params = { "action": "query", "format": "json", "list": "prefixsearch", "pssearch": title, } response = httpx.get(base_url, params=params) data = response.json() data = data.get("query", {}).get("prefixsearch", []) titles_and_ids = {d["title"]: d["pageid"] for d in data} for t in titles_and_ids: if title.lower() == t.lower(): print("Matched") return titles_and_ids[t] def get_image_url(page_id) -> str: # https://en.wikipedia.org/w/api.php?action=query&titles=K%C3%B6lner%20Dom&prop=imageinfo&iiprop=url&format=json params = { "action": "query", "format": "json", "prop": "pageimages", "pageids": page_id, "pithumbsize": 500, } response = httpx.get(base_url, params=params) data = response.json() data = data.get("query", {}).get("pages", {}) data = data.get(str(page_id), {}) return data.get("thumbnail", {}).get("source") def get_image_url_from_title(title) -> str: page_id = best_page_match(title) if page_id is None: return None return get_image_url(page_id) print(get_image_url_from_title("kölner dom")) print(get_image_url_from_title("grossmünster")) print(get_image_url_from_title("eiffel tower")) print(get_image_url_from_title("taj mahal")) print(get_image_url_from_title("big ben"))