"""Module allowing connexion to overpass api and fectch data from OSM.""" import os import urllib import math import logging import json from typing import List, Tuple from .caching_strategy import get_cache_key, CachingStrategy from ..constants import OSM_CACHE_DIR, OSM_TYPES, BBOX RESOLUTION = 0.05 CELL = Tuple[int, int] class Overpass : """ Overpass class to manage the query building and sending to overpass api. The caching strategy is a part of this class and initialized upon creation of the Overpass object. """ logger = logging.getLogger(__name__) def __init__(self, caching_strategy: str = 'JSON', cache_dir: str = OSM_CACHE_DIR) : """ Initialize the Overpass instance with the url, headers and caching strategy. """ self.overpass_url = "https://overpass-api.de/api/interpreter" self.headers = {'User-Agent': 'Mozilla/5.0 (compatible; OverpassQuery/1.0; +http://example.com)',} self.caching_strategy = CachingStrategy.use(caching_strategy, cache_dir=cache_dir) def send_query(self, bbox: BBOX, osm_types: OSM_TYPES, selector: str, conditions: list=None, out='center') -> List[dict]: """ Sends the Overpass QL query to the Overpass API and returns the parsed json response. Args: bbox (tuple): Bounding box for the query. osm_types (list[str]): List of OSM element types (e.g., 'node', 'way'). selector (str): Key or tag to filter OSM elements (e.g., 'highway'). conditions (list): Optional list of additional filter conditions in Overpass QL format. out (str): Output format ('center', 'body', etc.). Defaults to 'center'. Returns: list: Parsed json response from the Overpass API, or cached data if available. """ # Determine which grid cells overlap with this bounding box. overlapping_cells = Overpass._get_overlapping_cells(bbox) # Retrieve cached data and identify missing cache entries cached_responses, non_cached_cells = self._retrieve_cached_data(overlapping_cells, osm_types, selector, conditions, out) self.logger.info(f'Cache hit for {len(overlapping_cells)-len(non_cached_cells)}/{len(overlapping_cells)} quadrants.') # If there is no missing data, return the cached responses after filtering. if not non_cached_cells : return Overpass._filter_landmarks(cached_responses, bbox) # If there is no cached data, fetch all from Overpass. elif not cached_responses : query_str = Overpass.build_query(bbox, osm_types, selector, conditions, out) return self.fetch_data_from_api(query_str) # Hybrid cache: some data from Overpass, some data from cache. else : # Resize the bbox for smaller search area and build new query string. non_cached_bbox = Overpass._get_non_cached_bbox(non_cached_cells, bbox) query_str = Overpass.build_query(non_cached_bbox, osm_types, selector, conditions, out) non_cached_responses = self.fetch_data_from_api(query_str) return Overpass._filter_landmarks(cached_responses, bbox) + non_cached_responses def fetch_data_from_api(self, query_str: str) -> List[dict]: """ Fetch data from the Overpass API and return the json data. Args: query_str (str): The Overpass query string. Returns: dict: Combined cached and fetched data. """ try: data = urllib.parse.urlencode({'data': query_str}).encode('utf-8') request = urllib.request.Request(self.overpass_url, data=data, headers=self.headers) with urllib.request.urlopen(request) as response: response_data = response.read().decode('utf-8') # Convert the HTTPResponse to a string data = json.loads(response_data) # Load the JSON from the string elements = data.get('elements', []) # self.logger.debug(f'Query = {query_str}') return elements except urllib.error.URLError as e: self.logger.error(f"Error connecting to Overpass API: {e}") raise ConnectionError(f"Error connecting to Overpass API: {e}") from e except Exception as exc : raise Exception(f'An unexpected error occured: {str(exc)}') from exc def fill_cache(self, json_data: dict) : """ Fill cache with data by using a hollow cache entry's information. """ query_str, cache_key = Overpass._build_query_from_hollow(json_data) try: data = urllib.parse.urlencode({'data': query_str}).encode('utf-8') request = urllib.request.Request(self.overpass_url, data=data, headers=self.headers) with urllib.request.urlopen(request) as response: # Convert the HTTPResponse to a string and load data response_data = response.read().decode('utf-8') data = json.loads(response_data) # Get elements and set cache elements = data.get('elements', []) self.caching_strategy.set(cache_key, elements) self.logger.debug(f'Cache set for {cache_key}') except urllib.error.URLError as e: raise ConnectionError(f"Error connecting to Overpass API: {e}") from e except Exception as exc : raise Exception(f'An unexpected error occured: {str(exc)}') from exc @staticmethod def build_query(bbox: BBOX, osm_types: OSM_TYPES, selector: str, conditions: list=None, out='center') -> str: """ Constructs a query string for the Overpass API to retrieve OpenStreetMap (OSM) data. Args: bbox (tuple): A tuple representing the geographical search area, typically in the format (lat_min, lon_min, lat_max, lon_max). osm_types (list[str]): A list of OSM element types to search for. Must be one or more of 'Way', 'Node', or 'Relation'. selector (str): The key or tag to filter the OSM elements (e.g., 'amenity', 'highway', etc.). conditions (list, optional): A list of conditions to apply as additional filters for the selected OSM elements. The conditions should be written in the Overpass QL format, and they are combined with '&&' if multiple are provided. Defaults to an empty list. out (str, optional): Specifies the output type, such as 'center', 'body', or 'tags'. Defaults to 'center'. Returns: str: The constructed Overpass QL query string. Notes: - If no conditions are provided, the query will just use the `selector` to filter the OSM elements without additional constraints. """ query = '[out:json];(' # convert the bbox to string. bbox_str = f"({','.join(map(str, bbox))})" if conditions is not None and len(conditions) > 0: conditions = '(if: ' + ' && '.join(conditions) + ')' else : conditions = '' for elem in osm_types : query += elem + '[' + selector + ']' + conditions + bbox_str + ';' query += ');' + f'out {out};' return query def _retrieve_cached_data(self, overlapping_cells: CELL, osm_types: OSM_TYPES, selector: str, conditions: list, out: str) -> Tuple[List[dict], list[CELL]]: """ Retrieve cached data and identify missing cache quadrants. Args: overlapping_cells (list): Cells to check for cached data. osm_types (list): OSM types (e.g., 'node', 'way'). selector (str): Key or tag to filter OSM elements. conditions (list): Additional conditions to apply. out (str): Output format. Returns: tuple: A tuple containing: - cached_responses (list): List of cached data found. - non_cached_cells (list(tuple)): List of cells with missing data. """ cell_key_dict = {} for cell in overlapping_cells : for elem in osm_types : key_str = f"{elem}[{selector}]{conditions}({','.join(map(str, cell))})" cell_key_dict[cell] = get_cache_key(key_str) cached_responses = [] non_cached_cells = [] # Retrieve the cached data and mark the missing entries as hollow for cell, key in cell_key_dict.items(): cached_data = self.caching_strategy.get(key) if cached_data is not None : cached_responses += cached_data else: self.caching_strategy.set_hollow(key, cell, osm_types, selector, conditions, out) non_cached_cells.append(cell) return cached_responses, non_cached_cells @staticmethod def _build_query_from_hollow(json_data: dict) -> Tuple[str, str]: """ Build query string using information from a hollow cache entry. """ # Extract values from the JSON object key = json_data.get('key') cell = tuple(json_data.get('cell')) bbox = Overpass._get_bbox_from_grid_cell(cell) osm_types = json_data.get('osm_types') selector = json_data.get('selector') conditions = json_data.get('conditions') out = json_data.get('out') query_str = Overpass.build_query(bbox, osm_types, selector, conditions, out) return query_str, key @staticmethod def _get_overlapping_cells(query_bbox: tuple) -> List[CELL]: """ Returns a set of all grid cells that overlap with the given bounding box. """ # Extract location from the query bbox lat_min, lon_min, lat_max, lon_max = query_bbox min_lat_cell, min_lon_cell = Overpass._get_grid_cell(lat_min, lon_min) max_lat_cell, max_lon_cell = Overpass._get_grid_cell(lat_max, lon_max) overlapping_cells = set() for lat_idx in range(min_lat_cell, max_lat_cell + 1): for lon_idx in range(min_lon_cell, max_lon_cell + 1): overlapping_cells.add((lat_idx, lon_idx)) return overlapping_cells @staticmethod def _get_grid_cell(lat: float, lon: float) -> CELL: """ Returns the grid cell coordinates for a given latitude and longitude. Each grid cell is 0.05°lat x 0.05°lon resolution in size. """ lat_index = math.floor(lat / RESOLUTION) lon_index = math.floor(lon / RESOLUTION) return (lat_index, lon_index) @staticmethod def _get_bbox_from_grid_cell(cell: CELL) -> BBOX: """ Returns the bounding box for a given grid cell index. Each grid cell is resolution x resolution in size. The bounding box is returned as (min_lat, min_lon, max_lat, max_lon). """ # Calculate the southwest (min_lat, min_lon) corner of the bounding box min_lat = round(cell[0] * RESOLUTION, 2) min_lon = round(cell[1] * RESOLUTION, 2) # Calculate the northeast (max_lat, max_lon) corner of the bounding box max_lat = round((cell[0] + 1) * RESOLUTION, 2) max_lon = round((cell[1] + 1) * RESOLUTION, 2) return (min_lat, min_lon, max_lat, max_lon) @staticmethod def _get_non_cached_bbox(non_cached_cells: List[CELL], original_bbox: BBOX): """ Calculate the non-cached bounding box by excluding cached cells. Args: non_cached_cells (list): The list of cells that were not found in the cache. original_bbox (tuple): The original bounding box (min_lat, min_lon, max_lat, max_lon). Returns: tuple: The new bounding box that excludes cached cells, or None if all cells are cached. """ if not non_cached_cells: return None # All cells were cached # Initialize the non-cached bounding box with extreme values min_lat, min_lon, max_lat, max_lon = float('inf'), float('inf'), float('-inf'), float('-inf') # Iterate over non-cached cells to find the new bounding box for cell in non_cached_cells: cell_min_lat, cell_min_lon, cell_max_lat, cell_max_lon = Overpass._get_bbox_from_grid_cell(cell) min_lat = min(min_lat, cell_min_lat) min_lon = min(min_lon, cell_min_lon) max_lat = max(max_lat, cell_max_lat) max_lon = max(max_lon, cell_max_lon) # If no update to bounding box, return the original if min_lat == float('inf') or min_lon == float('inf'): return None return (max(min_lat, original_bbox[0]), max(min_lon, original_bbox[1]), min(max_lat, original_bbox[2]), min(max_lon, original_bbox[3])) @staticmethod def _filter_landmarks(elements: List[dict], bbox: BBOX) -> List[dict]: """ Filters elements based on whether their coordinates are inside the given bbox. Args: - elements (list of dict): List of elements containing coordinates. - bbox (tuple): A bounding box defined as (min_lat, min_lon, max_lat, max_lon). Returns: - list: A list of elements whose coordinates are inside the bounding box. """ filtered_elements = [] min_lat, min_lon, max_lat, max_lon = bbox for elem in elements: # Extract coordinates based on the 'type' of element if elem.get('type') != 'node': center = elem.get('center', {}) lat = float(center.get('lat', 0)) lon = float(center.get('lon', 0)) else: lat = float(elem.get('lat', 0)) lon = float(elem.get('lon', 0)) # Check if the coordinates fall within the given bounding box if min_lat <= lat <= max_lat and min_lon <= lon <= max_lon: filtered_elements.append(elem) return filtered_elements def get_base_info(elem: dict, osm_type: OSM_TYPES, with_name=False) : """ Extracts base information (coordinates, OSM ID, and optionally a name) from an OSM element. This function retrieves the latitude and longitude coordinates, OSM ID, and optionally the name of a given OpenStreetMap (OSM) element. It handles different OSM types (e.g., 'node', 'way') by extracting coordinates either directly or from a center tag, depending on the element type. Args: elem (dict): The JSON element representing the OSM entity. osm_type (str): The type of the OSM entity (e.g., 'node', 'way'). If 'node', the coordinates are extracted directly from the element; otherwise, from the 'center' tag. with_name (bool): Whether to extract and return the name of the element. If True, it attempts to find the 'name' tag within the element and return its value. Defaults to False. Returns: tuple: A tuple containing: - osm_id (str): The OSM ID of the element. - coords (tuple): A tuple of (latitude, longitude) coordinates. - name (str, optional): The name of the element if `with_name` is True; otherwise, not included. """ # 1. extract coordinates if osm_type != 'node' : center = elem.get('center') lat = float(center.get('lat')) lon = float(center.get('lon')) else : lat = float(elem.get('lat')) lon = float(elem.get('lon')) coords = tuple((lat, lon)) # 2. Extract OSM id osm_id = elem.get('id') # 3. Extract name if specified and return if with_name : name = elem.get('tags', {}).get('name') return osm_id, coords, name else : return osm_id, coords def fill_cache(): """ Scans the specified cache directory for files starting with 'hollow_' and attempts to load their contents as JSON to fill the cache of the Overpass system. """ overpass = Overpass() with os.scandir(OSM_CACHE_DIR) as it: for entry in it: if entry.is_file() and entry.name.startswith('hollow_'): try : # Read the whole file content as a string with open(entry.path, 'r') as f: # load data and fill the cache with the query and key json_data = json.load(f) overpass.fill_cache(json_data) # Now delete the file as the cache is filled os.remove(entry.path) except Exception as exc : overpass.logger.error(f'An error occured while parsing file {entry.path} as .json file')