anyway/backend/src/overpass/overpass.py
Helldragon67 f48dcf80c2
Some checks failed
Run testing on the backend code / Build (pull_request) Has been cancelled
Build and deploy the backend to staging / Build and push image (pull_request) Successful in 2m7s
Run linting on the backend code / Build (pull_request) Has been cancelled
Build and deploy the backend to staging / Deploy to staging (pull_request) Successful in 24s
better exception
2025-01-28 15:19:03 +01:00

349 lines
14 KiB
Python

"""Module allowing connexion to overpass api and fectch data from OSM."""
import os
import urllib
import math
import logging
import json
from .caching_strategy import get_cache_key, CachingStrategy
from ..constants import OSM_CACHE_DIR, OSM_TYPES
RESOLUTION = 0.05
class Overpass :
"""
Overpass class to manage the query building and sending to overpass api.
The caching strategy is a part of this class and initialized upon creation of the Overpass object.
"""
logger = logging.getLogger(__name__)
def __init__(self, caching_strategy: str = 'JSON', cache_dir: str = OSM_CACHE_DIR) :
"""
Initialize the Overpass instance with the url, headers and caching strategy.
"""
self.overpass_url = "https://overpass-api.de/api/interpreter"
self.headers = {'User-Agent': 'Mozilla/5.0 (compatible; OverpassQuery/1.0; +http://example.com)',}
self.caching_strategy = CachingStrategy.use(caching_strategy, cache_dir=cache_dir)
def send_query(self, bbox: tuple, osm_types: OSM_TYPES,
selector: str, conditions: list=None, out='center'):
"""
Sends the Overpass QL query to the Overpass API and returns the parsed json response.
Args:
bbox (tuple): Bounding box for the query.
osm_types (list[str]): List of OSM element types (e.g., 'node', 'way').
selector (str): Key or tag to filter OSM elements (e.g., 'highway').
conditions (list): Optional list of additional filter conditions in Overpass QL format.
out (str): Output format ('center', 'body', etc.). Defaults to 'center'.
Returns:
dict: Parsed json response from the Overpass API, or cached data if available.
"""
# Determine which grid cells overlap with this bounding box.
overlapping_cells = Overpass._get_overlapping_cells(bbox)
# Retrieve cached data and identify missing cache entries
cached_responses, hollow_cache_keys = self._retrieve_cached_data(overlapping_cells, osm_types, selector, conditions, out)
# If there is no missing data, return the cached responses
if not hollow_cache_keys :
self.logger.info('Cache hit.')
return self._combine_cached_data(cached_responses)
# TODO If there is SOME missing data : hybrid stuff with partial cache
# Missing data: Make a query to Overpass API
query_str = Overpass.build_query(bbox, osm_types, selector, conditions, out)
return self.fetch_data_from_api(query_str)
def fetch_data_from_api(self, query_str: str) -> list:
"""
Fetch data from the Overpass API and return the json data.
Args:
query_str (str): The Overpass query string.
Returns:
dict: Combined cached and fetched data.
"""
try:
data = urllib.parse.urlencode({'data': query_str}).encode('utf-8')
request = urllib.request.Request(self.overpass_url, data=data, headers=self.headers)
with urllib.request.urlopen(request) as response:
response_data = response.read().decode('utf-8') # Convert the HTTPResponse to a string
data = json.loads(response_data) # Load the JSON from the string
elements = data.get('elements', [])
self.logger.info(f'Cache miss. Fetching data through Overpass.')
self.logger.debug(f'Query = {query_str}')
return elements
except urllib.error.URLError as e:
self.logger.error(f"Error connecting to Overpass API: {e}")
raise ConnectionError(f"Error connecting to Overpass API: {e}") from e
except Exception as exc :
raise Exception(f'An unexpected error occured: {str(exc)}') from exc
def fill_cache(self, json_data: dict) :
"""
Fill cache with data by using a hollow cache entry's information.
"""
query_str, cache_key = Overpass._build_query_from_hollow(json_data)
try:
data = urllib.parse.urlencode({'data': query_str}).encode('utf-8')
request = urllib.request.Request(self.overpass_url, data=data, headers=self.headers)
with urllib.request.urlopen(request) as response:
# Convert the HTTPResponse to a string and load data
response_data = response.read().decode('utf-8')
data = json.loads(response_data)
# Get elements and set cache
elements = data.get('elements', [])
self.caching_strategy.set(cache_key, elements)
self.logger.debug(f'Cache set for {cache_key}')
except urllib.error.URLError as e:
raise ConnectionError(f"Error connecting to Overpass API: {e}") from e
except Exception as exc :
raise Exception(f'An unexpected error occured: {str(exc)}') from exc
@staticmethod
def build_query(bbox: tuple, osm_types: OSM_TYPES,
selector: str, conditions: list=None, out='center') -> str:
"""
Constructs a query string for the Overpass API to retrieve OpenStreetMap (OSM) data.
Args:
bbox (tuple): A tuple representing the geographical search area, typically in the format
(lat_min, lon_min, lat_max, lon_max).
osm_types (list[str]): A list of OSM element types to search for. Must be one or more of
'Way', 'Node', or 'Relation'.
selector (str): The key or tag to filter the OSM elements (e.g., 'amenity', 'highway', etc.).
conditions (list, optional): A list of conditions to apply as additional filters for the
selected OSM elements. The conditions should be written in
the Overpass QL format, and they are combined with '&&' if
multiple are provided. Defaults to an empty list.
out (str, optional): Specifies the output type, such as 'center', 'body', or 'tags'.
Defaults to 'center'.
Returns:
str: The constructed Overpass QL query string.
Notes:
- If no conditions are provided, the query will just use the `selector` to filter the OSM
elements without additional constraints.
"""
query = '[out:json];('
# convert the bbox to string.
bbox_str = f"({','.join(map(str, bbox))})"
if conditions is not None and len(conditions) > 0:
conditions = '(if: ' + ' && '.join(conditions) + ')'
else :
conditions = ''
for elem in osm_types :
query += elem + '[' + selector + ']' + conditions + bbox_str + ';'
query += ');' + f'out {out};'
return query
def _retrieve_cached_data(self, overlapping_cells: list, osm_types: OSM_TYPES, selector: str, conditions: list, out: str):
"""
Retrieve cached data and identify missing cache entries.
Args:
overlapping_cells (list): Cells to check for cached data.
osm_types (list): OSM types (e.g., 'node', 'way').
selector (str): Key or tag to filter OSM elements.
conditions (list): Additional conditions to apply.
out (str): Output format.
Returns:
tuple: A tuple containing:
- cached_responses (list): List of cached data found.
- hollow_cache_keys (list): List of keys with missing data.
"""
cell_key_dict = {}
for cell in overlapping_cells :
for elem in osm_types :
key_str = f"{elem}[{selector}]{conditions}({','.join(map(str, cell))})"
cell_key_dict[cell] = get_cache_key(key_str)
cached_responses = []
hollow_cache_keys = []
# Retrieve the cached data and mark the missing entries as hollow
for cell, key in cell_key_dict.items():
cached_data = self.caching_strategy.get(key)
if cached_data is not None :
cached_responses.append(cached_data)
else:
self.caching_strategy.set_hollow(key, cell, osm_types, selector, conditions, out)
hollow_cache_keys.append(key)
return cached_responses, hollow_cache_keys
@staticmethod
def _build_query_from_hollow(json_data: dict):
"""
Build query string using information from a hollow cache entry.
"""
# Extract values from the JSON object
key = json_data.get('key')
cell = tuple(json_data.get('cell'))
bbox = Overpass._get_bbox_from_grid_cell(cell[0], cell[1])
osm_types = json_data.get('osm_types')
selector = json_data.get('selector')
conditions = json_data.get('conditions')
out = json_data.get('out')
query_str = Overpass.build_query(bbox, osm_types, selector, conditions, out)
return query_str, key
@staticmethod
def _get_overlapping_cells(query_bbox: tuple):
"""
Returns a set of all grid cells that overlap with the given bounding box.
"""
# Extract location from the query bbox
lat_min, lon_min, lat_max, lon_max = query_bbox
min_lat_cell, min_lon_cell = Overpass._get_grid_cell(lat_min, lon_min)
max_lat_cell, max_lon_cell = Overpass._get_grid_cell(lat_max, lon_max)
overlapping_cells = set()
for lat_idx in range(min_lat_cell, max_lat_cell + 1):
for lon_idx in range(min_lon_cell, max_lon_cell + 1):
overlapping_cells.add((lat_idx, lon_idx))
return overlapping_cells
@staticmethod
def _get_grid_cell(lat: float, lon: float):
"""
Returns the grid cell coordinates for a given latitude and longitude.
Each grid cell is 0.05°lat x 0.05°lon resolution in size.
"""
lat_index = math.floor(lat / RESOLUTION)
lon_index = math.floor(lon / RESOLUTION)
return (lat_index, lon_index)
@staticmethod
def _get_bbox_from_grid_cell(lat_index: int, lon_index: int):
"""
Returns the bounding box for a given grid cell index.
Each grid cell is resolution x resolution in size.
The bounding box is returned as (min_lat, min_lon, max_lat, max_lon).
"""
# Calculate the southwest (min_lat, min_lon) corner of the bounding box
min_lat = round(lat_index * RESOLUTION, 2)
min_lon = round(lon_index * RESOLUTION, 2)
# Calculate the northeast (max_lat, max_lon) corner of the bounding box
max_lat = round((lat_index + 1) * RESOLUTION, 2)
max_lon = round((lon_index + 1) * RESOLUTION, 2)
return (min_lat, min_lon, max_lat, max_lon)
@staticmethod
def _combine_cached_data(cached_data_list):
"""
Combines data from multiple cached responses into a single result.
"""
combined_data = []
for cached_data in cached_data_list:
for element in cached_data:
combined_data.append(element)
return combined_data
def get_base_info(elem: dict, osm_type: OSM_TYPES, with_name=False) :
"""
Extracts base information (coordinates, OSM ID, and optionally a name) from an OSM element.
This function retrieves the latitude and longitude coordinates, OSM ID, and optionally the name
of a given OpenStreetMap (OSM) element. It handles different OSM types (e.g., 'node', 'way') by
extracting coordinates either directly or from a center tag, depending on the element type.
Args:
elem (dict): The JSON element representing the OSM entity.
osm_type (str): The type of the OSM entity (e.g., 'node', 'way'). If 'node', the coordinates
are extracted directly from the element; otherwise, from the 'center' tag.
with_name (bool): Whether to extract and return the name of the element. If True, it attempts
to find the 'name' tag within the element and return its value. Defaults to False.
Returns:
tuple: A tuple containing:
- osm_id (str): The OSM ID of the element.
- coords (tuple): A tuple of (latitude, longitude) coordinates.
- name (str, optional): The name of the element if `with_name` is True; otherwise, not included.
"""
# 1. extract coordinates
if osm_type != 'node' :
center = elem.get('center')
lat = float(center.get('lat'))
lon = float(center.get('lon'))
else :
lat = float(elem.get('lat'))
lon = float(elem.get('lon'))
coords = tuple((lat, lon))
# 2. Extract OSM id
osm_id = elem.get('id')
# 3. Extract name if specified and return
if with_name :
name = elem.get('tags', {}).get('name')
return osm_id, coords, name
else :
return osm_id, coords
def fill_cache():
overpass = Overpass()
with os.scandir(OSM_CACHE_DIR) as it:
for entry in it:
if entry.is_file() and entry.name.startswith('hollow_'):
# Read the whole file content as a string
with open(entry.path, 'r') as f:
try :
json_data = json.load(f)
except json.decoder.JSONDecodeError as exc :
raise Exception(f'Failed to parse file {f}') from exc
except Exception as exc :
raise Exception(f'An unexpected error occured while parsing file {f}') from exc
# Fill the cache with the query and key
overpass.fill_cache(json_data)
# Now delete the file as the cache is filled
os.remove(entry.path)