"""Module allowing connexion to overpass api and fectch data from OSM.""" import os import urllib import math import logging import xml.etree.ElementTree as ET from .caching_strategy import get_cache_key, CachingStrategy from ..constants import OSM_CACHE_DIR, OSM_TYPES RESOLUTION = 0.05 class Overpass : """ Overpass class to manage the query building and sending to overpass api. The caching strategy is a part of this class and initialized upon creation of the Overpass object. """ logger = logging.getLogger(__name__) def __init__(self, caching_strategy: str = 'XML', cache_dir: str = OSM_CACHE_DIR) : """ Initialize the Overpass instance with the url, headers and caching strategy. """ self.overpass_url = "https://overpass-api.de/api/interpreter" self.headers = {'User-Agent': 'Mozilla/5.0 (compatible; OverpassQuery/1.0; +http://example.com)',} self.caching_strategy = CachingStrategy.use(caching_strategy, cache_dir=cache_dir) def send_query(self, bbox: tuple, osm_types: OSM_TYPES, selector: str, conditions=[], out='center') -> ET: """ Sends the Overpass QL query to the Overpass API and returns the parsed JSON response. Args: query (str): The Overpass QL query to be sent to the Overpass API. Returns: dict: The parsed JSON response from the Overpass API, or None if the request fails. """ # Determine which grid cells overlap with this bounding box. overlapping_cells = Overpass.get_overlapping_cells(bbox) # Check the cache for any data that overlaps with these cells cell_key_dict = {} for cell in overlapping_cells : for elem in osm_types : key_str = f"{elem}[{selector}]{conditions}({','.join(map(str, cell))})" cell_key_dict[cell] = get_cache_key(key_str) cached_responses = [] hollow_cache_keys = [] # Retrieve the cached data and mark the missing entries as hollow for cell, key in cell_key_dict.items(): cached_data = self.caching_strategy.get(key) if cached_data is not None : cached_responses.append(cached_data) else: # Cache miss: Mark the cache key as hollow self.caching_strategy.set_hollow(key, cell, osm_types, selector, conditions, out) hollow_cache_keys.append(key) # If there is no missing data, return the cached responses if not hollow_cache_keys : self.logger.debug(f'Cache hit.') return self.combine_cached_data(cached_responses) # TODO If there is SOME missing data : hybrid stuff with partial cache # Build the query string in case of needed overpass query query_str = Overpass.build_query(bbox, osm_types, selector, conditions, out) # Prepare the data to be sent as POST request, encoded as bytes data = urllib.parse.urlencode({'data': query_str}).encode('utf-8') try: # Create a Request object with the specified URL, data, and headers request = urllib.request.Request(self.overpass_url, data=data, headers=self.headers) # Send the request and read the response with urllib.request.urlopen(request) as response: # Read and decode the response response_data = response.read().decode('utf-8') root = ET.fromstring(response_data) self.logger.debug(f'Cache miss. Fetching data through Overpass\nQuery = {query_str}') return root except urllib.error.URLError as e: raise ConnectionError(f"Error connecting to Overpass API: {e}") from e def fill_cache(self, xml_string: str) : # Build the query using info from hollow cache entry query_str, cache_key = Overpass.build_query_from_hollow(xml_string) # Prepare the data to be sent as POST request, encoded as bytes data = urllib.parse.urlencode({'data': query_str}).encode('utf-8') try: # Create a Request object with the specified URL, data, and headers request = urllib.request.Request(self.overpass_url, data=data, headers=self.headers) # Send the request and read the response with urllib.request.urlopen(request) as response: # Read and decode the response response_data = response.read().decode('utf-8') root = ET.fromstring(response_data) self.caching_strategy.set(cache_key, root) self.logger.debug(f'Cache set') except urllib.error.URLError as e: raise ConnectionError(f"Error connecting to Overpass API: {e}") from e @staticmethod def build_query(bbox: tuple, osm_types: OSM_TYPES, selector: str, conditions=[], out='center') -> str: """ Constructs a query string for the Overpass API to retrieve OpenStreetMap (OSM) data. Args: bbox (tuple): A tuple representing the geographical search area, typically in the format (lat_min, lon_min, lat_max, lon_max). osm_types (list[str]): A list of OSM element types to search for. Must be one or more of 'Way', 'Node', or 'Relation'. selector (str): The key or tag to filter the OSM elements (e.g., 'amenity', 'highway', etc.). conditions (list, optional): A list of conditions to apply as additional filters for the selected OSM elements. The conditions should be written in the Overpass QL format, and they are combined with '&&' if multiple are provided. Defaults to an empty list. out (str, optional): Specifies the output type, such as 'center', 'body', or 'tags'. Defaults to 'center'. Returns: str: The constructed Overpass QL query string. Notes: - If no conditions are provided, the query will just use the `selector` to filter the OSM elements without additional constraints. """ if not isinstance(conditions, list) : conditions = [conditions] if not isinstance(osm_types, list) : osm_types = [osm_types] query = '(' # convert the bbox to string. bbox_str = f"({','.join(map(str, bbox))})" if conditions : conditions = '(if: ' + ' && '.join(conditions) + ')' else : conditions = '' for elem in osm_types : query += elem + '[' + selector + ']' + conditions + bbox_str + ';' query += ');' + f'out {out};' return query @staticmethod def build_query_from_hollow(xml_string): """Extract variables from an XML string.""" # Parse the XML string into an ElementTree object root = ET.fromstring(xml_string) # Extract values from the XML tree key = root.find('key').text cell = tuple(map(float, root.find('cell').text.strip('()').split(','))) bbox = Overpass.get_bbox_from_grid_cell(cell[0], cell[1]) osm_types = root.find('osm_types').text.split(',') selector = root.find('selector').text conditions = root.find('conditions').text.split(',') if root.find('conditions').text != "none" else [] out = root.find('out').text query_str = Overpass.build_query(bbox, osm_types, selector, conditions, out) return query_str, key @staticmethod def get_grid_cell(lat: float, lon: float): """ Returns the grid cell coordinates for a given latitude and longitude. Each grid cell is 0.05°lat x 0.05°lon resolution in size. """ lat_index = math.floor(lat / RESOLUTION) lon_index = math.floor(lon / RESOLUTION) return (lat_index, lon_index) @staticmethod def get_bbox_from_grid_cell(lat_index: int, lon_index: int): """ Returns the bounding box for a given grid cell index. Each grid cell is resolution x resolution in size. The bounding box is returned as (min_lat, min_lon, max_lat, max_lon). """ # Calculate the southwest (min_lat, min_lon) corner of the bounding box min_lat = round(lat_index * RESOLUTION, 2) min_lon = round(lon_index * RESOLUTION, 2) # Calculate the northeast (max_lat, max_lon) corner of the bounding box max_lat = round((lat_index + 1) * RESOLUTION, 2) max_lon = round((lon_index + 1) * RESOLUTION, 2) return (min_lat, min_lon, max_lat, max_lon) @staticmethod def get_overlapping_cells(query_bbox: tuple): """ Returns a set of all grid cells that overlap with the given bounding box. """ # Extract location from the query bbox lat_min, lon_min, lat_max, lon_max = query_bbox min_lat_cell, min_lon_cell = Overpass.get_grid_cell(lat_min, lon_min) max_lat_cell, max_lon_cell = Overpass.get_grid_cell(lat_max, lon_max) overlapping_cells = set() for lat_idx in range(min_lat_cell, max_lat_cell + 1): for lon_idx in range(min_lon_cell, max_lon_cell + 1): overlapping_cells.add((lat_idx, lon_idx)) return overlapping_cells @staticmethod def combine_cached_data(cached_data_list): """ Combines data from multiple cached responses into a single result. """ combined_data = ET.Element("osm") for cached_data in cached_data_list: for element in cached_data: combined_data.append(element) return combined_data def get_base_info(elem: ET.Element, osm_type: OSM_TYPES, with_name=False) : """ Extracts base information (coordinates, OSM ID, and optionally a name) from an OSM element. This function retrieves the latitude and longitude coordinates, OSM ID, and optionally the name of a given OpenStreetMap (OSM) element. It handles different OSM types (e.g., 'node', 'way') by extracting coordinates either directly or from a center tag, depending on the element type. Args: elem (ET.Element): The XML element representing the OSM entity. osm_type (str): The type of the OSM entity (e.g., 'node', 'way'). If 'node', the coordinates are extracted directly from the element; otherwise, from the 'center' tag. with_name (bool): Whether to extract and return the name of the element. If True, it attempts to find the 'name' tag within the element and return its value. Defaults to False. Returns: tuple: A tuple containing: - osm_id (str): The OSM ID of the element. - coords (tuple): A tuple of (latitude, longitude) coordinates. - name (str, optional): The name of the element if `with_name` is True; otherwise, not included. """ # 1. extract coordinates if osm_type != 'node' : center = elem.find('center') lat = float(center.get('lat')) lon = float(center.get('lon')) else : lat = float(elem.get('lat')) lon = float(elem.get('lon')) coords = tuple((lat, lon)) # 2. Extract OSM id osm_id = elem.get('id') # 3. Extract name if specified and return if with_name : name = elem.find("tag[@k='name']").get('v') if elem.find("tag[@k='name']") is not None else None return osm_id, coords, name else : return osm_id, coords def fill_cache(): overpass = Overpass(caching_strategy='XML', cache_dir=OSM_CACHE_DIR) with os.scandir(OSM_CACHE_DIR) as it: for entry in it: if entry.is_file() and entry.name.startswith('hollow_'): # Read the whole file content as a string with open(entry.path, 'r') as f: xml_string = f.read() # Fill the cache with the query and key overpass.fill_cache(xml_string) # Now delete the file as the cache is filled os.remove(entry.path)