better structure
Some checks failed
Build and deploy the backend to staging / Build and push image (pull_request) Successful in 1m45s
Run linting on the backend code / Build (pull_request) Successful in 27s
Run testing on the backend code / Build (pull_request) Failing after 45s
Build and deploy the backend to staging / Deploy to staging (pull_request) Successful in 25s

This commit is contained in:
2025-02-19 15:53:41 +01:00
parent 83be4b7616
commit 05092e55f1
9 changed files with 88 additions and 64 deletions

View File

@@ -0,0 +1,300 @@
"""Find clusters of interest to add more general areas of visit to the tour."""
import logging
from typing import Literal, Tuple
import numpy as np
from sklearn.cluster import DBSCAN
from pydantic import BaseModel
from ..overpass.overpass import Overpass, get_base_info
from ..structs.landmark import Landmark
from ..utils.get_time_distance import get_distance
from ..utils.bbox import create_bbox
# silence the overpass logger
logging.getLogger('Overpass').setLevel(level=logging.CRITICAL)
class Cluster(BaseModel):
""""
A class representing an interesting area for shopping or sightseeing.
It can represent either a general area or a specifc route with start and end point.
The importance represents the number of shops found in this cluster.
Attributes:
type : either a 'street' or 'area' (representing a denser field of shops).
importance : size of the cluster (number of points).
centroid : center of the cluster.
start : if the type is a street it goes from here...
end : ...to here
"""
type: Literal['street', 'area']
importance: int
centroid: Tuple[float, float]
# start: Optional[list] = None # for later use if we want to have streets as well
# end: Optional[list] = None
class ClusterManager:
"""
A manager responsible for clustering points of interest, such as shops or historic sites,
to identify areas worth visiting. It uses the DBSCAN algorithm to detect clusters
based on a set of points retrieved from OpenStreetMap (OSM).
Attributes:
logger (logging.Logger): Logger for capturing relevant events and errors.
valid (bool): Indicates whether clusters were successfully identified.
all_points (list): All points retrieved from OSM, representing locations of interest.
cluster_points (list): Points identified as part of a cluster.
cluster_labels (list): Labels corresponding to the clusters each point belongs to.
cluster_type (Literal['sightseeing', 'shopping']): Type of clustering, either for sightseeing
landmarks or shopping areas.
"""
logger = logging.getLogger(__name__)
# NOTE: all points are in (lat, lon) format
valid: bool # Ensure the manager is valid (ie there are some clusters to be found)
all_points: list
cluster_points: list
cluster_labels: list
cluster_type: Literal['sightseeing', 'shopping']
def __init__(self, bbox: tuple, cluster_type: Literal['sightseeing', 'shopping']) -> None:
"""
Upon intialization, generate the point cloud used for cluster detection.
The points represent bag/clothes shops and general boutiques.
If the first step is successful, it applies the DBSCAN clustering algorithm with different
parameters depending on the size of the city (number of points).
It filters out noise points and keeps only the largest clusters.
A successful initialization updates:
- `self.cluster_points`: The points belonging to clusters.
- `self.cluster_labels`: The labels for the points in clusters.
The method also calls `filter_clusters()` to retain only the largest clusters.
Args:
bbox: The bounding box coordinates (around:radius, center_lat, center_lon).
"""
# Setup the caching in the Overpass class.
self.overpass = Overpass()
self.cluster_type = cluster_type
if cluster_type == 'shopping' :
osm_types = ['node']
sel = '"shop"~"^(bag|boutique|clothes)$"'
out = 'ids center'
elif cluster_type == 'sightseeing' :
osm_types = ['way']
sel = '"historic"~"^(monument|building|yes)$"'
out = 'ids center'
else :
raise NotImplementedError("Please choose only an available option for cluster detection")
# Initialize the points for cluster detection
try:
result = self.overpass.send_query(
bbox = bbox,
osm_types = osm_types,
selector = sel,
out = out
)
except Exception as e:
self.logger.error(f"Error fetching clusters: {e}")
if result is None :
self.logger.debug(f"Found no {cluster_type} clusters, overpass query returned no datapoints.")
self.valid = False
else :
points = []
for elem in result:
osm_type = elem.get('type')
# Get coordinates and append them to the points list
_, coords = get_base_info(elem, osm_type)
if coords is not None :
points.append(coords)
if points :
self.all_points = np.array(points)
# Apply DBSCAN to find clusters. Choose different settings for different cities.
if self.cluster_type == 'shopping' and len(self.all_points) > 200 :
dbscan = DBSCAN(eps=0.00118, min_samples=15, algorithm='kd_tree') # for large cities
elif self.cluster_type == 'sightseeing' :
dbscan = DBSCAN(eps=0.0025, min_samples=15, algorithm='kd_tree') # for historic neighborhoods
else :
dbscan = DBSCAN(eps=0.00075, min_samples=10, algorithm='kd_tree') # for small cities
labels = dbscan.fit_predict(self.all_points)
# Check that there are is least 1 cluster
if len(set(labels)) > 1 :
self.logger.info(f"Found {len(set(labels))} different {cluster_type} clusters.")
# Separate clustered points and noise points
self.cluster_points = self.all_points[labels != -1]
self.cluster_labels = labels[labels != -1]
self.filter_clusters() # ValueError here sometimes. I dont know why. # Filter the clusters to keep only the largest ones.
self.valid = True
else :
self.logger.info(f"Found 0 {cluster_type} clusters.")
self.valid = False
else :
self.logger.debug(f"Detected 0 {cluster_type} clusters.")
self.valid = False
def generate_clusters(self) -> list[Landmark]:
"""
Generate a list of landmarks based on identified clusters.
This method iterates over the different clusters, calculates the centroid
(as the mean of the points within each cluster), and assigns an importance
based on the size of the cluster.
The generated shopping locations are stored in `self.clusters`
as a list of `Cluster` objects, each with:
- `type`: Set to 'area'.
- `centroid`: The calculated centroid of the cluster.
- `importance`: The number of points in the cluster.
"""
if not self.valid :
return [] # Return empty list if no clusters were found
locations = []
# loop through the different clusters
for label in set(self.cluster_labels):
# Extract points belonging to the current cluster
current_cluster = self.cluster_points[self.cluster_labels == label]
# Calculate the centroid as the mean of the points
centroid = np.mean(current_cluster, axis=0)
centroid = tuple((round(centroid[0], 7), round(centroid[1], 7)))
if self.cluster_type == 'shopping' :
score = len(current_cluster)*3
else :
score = len(current_cluster)*15
locations.append(Cluster(
type='area',
centroid=centroid,
importance = score
))
# Transform the locations in landmarks and return the list
cluster_landmarks = []
for cluster in locations :
cluster_landmarks.append(self.create_landmark(cluster))
return cluster_landmarks
def create_landmark(self, cluster: Cluster) -> Landmark:
"""
Create a Landmark object based on the given shopping location.
This method queries the Overpass API for nearby neighborhoods and shopping malls
within a 1000m radius around the shopping location centroid. It selects the closest
result and creates a landmark with the associated details such as name, type, and OSM ID.
Parameters:
shopping_location (Cluster): A Cluster object containing
the centroid and importance of the area.
Returns:
Landmark: A Landmark object containing details such as the name, type,
location, attractiveness, and OSM details.
"""
# Define the bounding box for a given radius around the coordinates
bbox = create_bbox(cluster.centroid, 300)
# Query neighborhoods and shopping malls
selectors = ['"place"~"^(suburb|neighborhood|neighbourhood|quarter|city_block)$"']
if self.cluster_type == 'shopping' :
selectors.append('"shop"="mall"')
new_name = 'Shopping Area'
t = 30
else :
new_name = 'Neighborhood'
t = 20
min_dist = float('inf')
osm_id = 0
osm_type = 'node'
osm_types = ['node', 'way', 'relation']
for sel in selectors :
try:
result = self.overpass.send_query(bbox = bbox,
osm_types = osm_types,
selector = sel,
out = 'ids center tags'
)
except Exception as e:
self.logger.error(f"Error fetching clusters: {e}")
continue
if result is None :
self.logger.error(f"Error fetching clusters: {e}")
continue
for elem in result:
# Get basic info
id, coords, name = get_base_info(elem, elem.get('type'), with_name=True)
if name is None or coords is None :
continue
d = get_distance(cluster.centroid, coords)
if d < min_dist :
min_dist = d
new_name = name # add name
osm_type = elem.get('type') # add type: 'way' or 'relation'
osm_id = id # add OSM id
return Landmark(
name=new_name,
type=self.cluster_type,
location=cluster.centroid, # later: use the fact the we can also recognize streets.
attractiveness=cluster.importance,
n_tags=0,
osm_id=osm_id,
osm_type=osm_type,
duration=t
)
def filter_clusters(self):
"""
Filter clusters to retain only the 5 largest clusters by point count.
This method calculates the size of each cluster and filters out all but the
5 largest clusters. It then updates the cluster points and labels to reflect
only those from the top 5 clusters.
"""
label_counts = np.bincount(self.cluster_labels)
# Step 3: Get the indices (labels) of the 5 largest clusters
top_5_labels = np.argsort(label_counts)[-5:] # Get the largest 5 clusters
# Step 4: Filter points to keep only the points in the top 5 clusters
filtered_cluster_points = []
filtered_cluster_labels = []
for label in top_5_labels:
filtered_cluster_points.append(self.cluster_points[self.cluster_labels == label])
filtered_cluster_labels.append(np.full((label_counts[label],), label)) # Replicate the label
# update the cluster points and labels with the filtered data
self.cluster_points = np.vstack(filtered_cluster_points) # ValueError here
self.cluster_labels = np.concatenate(filtered_cluster_labels)

View File

@@ -0,0 +1,409 @@
"""Module used to import data from OSM and arrange them in categories."""
import logging
import yaml
from ..structs.preferences import Preferences
from ..structs.landmark import Landmark
from ..utils.take_most_important import take_most_important
from .cluster_manager import ClusterManager
from ..overpass.overpass import Overpass, get_base_info
from ..utils.bbox import create_bbox
from ..constants import AMENITY_SELECTORS_PATH, LANDMARK_PARAMETERS_PATH, OPTIMIZER_PARAMETERS_PATH
class LandmarkManager:
"""
Use this to manage landmarks.
Uses the overpass api to fetch landmarks and classify them.
"""
logger = logging.getLogger(__name__)
radius_close_to: int # radius in meters
church_coeff: float # coeff to adjsut score of churches
nature_coeff: float # coeff to adjust score of parks
overall_coeff: float # coeff to adjust weight of tags
n_important: int # number of important landmarks to consider
def __init__(self) -> None:
with AMENITY_SELECTORS_PATH.open('r') as f:
self.amenity_selectors = yaml.safe_load(f)
with LANDMARK_PARAMETERS_PATH.open('r') as f:
parameters = yaml.safe_load(f)
self.max_bbox_side = parameters['max_bbox_side']
self.church_coeff = parameters['church_coeff']
self.nature_coeff = parameters['nature_coeff']
self.overall_coeff = parameters['overall_coeff']
self.tag_exponent = parameters['tag_exponent']
self.image_bonus = parameters['image_bonus']
self.wikipedia_bonus = parameters['wikipedia_bonus']
self.viewpoint_bonus = parameters['viewpoint_bonus']
self.pay_bonus = parameters['pay_bonus']
self.n_important = parameters['N_important']
with OPTIMIZER_PARAMETERS_PATH.open('r') as f:
parameters = yaml.safe_load(f)
self.walking_speed = parameters['average_walking_speed']
self.detour_factor = parameters['detour_factor']
# Setup the caching in the Overpass class.
self.overpass = Overpass()
self.logger.info('LandmakManager successfully initialized.')
def generate_landmarks_list(self, center_coordinates: tuple[float, float], preferences: Preferences) -> tuple[list[Landmark], list[Landmark]]:
"""
Generate and prioritize a list of landmarks based on user preferences.
This method fetches landmarks from various categories (sightseeing, nature, shopping) based on the user's preferences
and current location. It scores and corrects these landmarks, removes duplicates, and then selects the most important
landmarks based on a predefined criterion.
Args:
center_coordinates (tuple[float, float]): The latitude and longitude of the center location around which to search.
preferences (Preferences): The user's preference settings that influence the landmark selection.
Returns:
tuple[list[Landmark], list[Landmark]]:
- A list of all existing landmarks.
- A list of the most important landmarks based on the user's preferences.
"""
self.logger.debug('Starting to fetch landmarks...')
max_walk_dist = int((preferences.max_time_minute/2)/60*self.walking_speed*1000/self.detour_factor)
radius = min(max_walk_dist, int(self.max_bbox_side/2))
# use set to avoid duplicates, this requires some __methods__ to be set in Landmark
all_landmarks = set()
# Create a bbox using the around technique, tuple of strings
bbox = create_bbox(center_coordinates, radius)
# list for sightseeing
if preferences.sightseeing.score != 0:
self.logger.debug('Fetching sightseeing landmarks...')
current_landmarks = self.fetch_landmarks(bbox, self.amenity_selectors['sightseeing'], preferences.sightseeing.type, preferences.sightseeing.score)
all_landmarks.update(current_landmarks)
self.logger.info(f'Found {len(current_landmarks)} sightseeing landmarks')
# special pipeline for historic neighborhoods
neighborhood_manager = ClusterManager(bbox, 'sightseeing')
historic_clusters = neighborhood_manager.generate_clusters()
all_landmarks.update(historic_clusters)
# list for nature
if preferences.nature.score != 0:
self.logger.debug('Fetching nature landmarks...')
current_landmarks = self.fetch_landmarks(bbox, self.amenity_selectors['nature'], preferences.nature.type, preferences.nature.score)
all_landmarks.update(current_landmarks)
self.logger.info(f'Found {len(current_landmarks)} nature landmarks')
# list for shopping
if preferences.shopping.score != 0:
self.logger.debug('Fetching shopping landmarks...')
current_landmarks = self.fetch_landmarks(bbox, self.amenity_selectors['shopping'], preferences.shopping.type, preferences.shopping.score)
self.logger.info(f'Found {len(current_landmarks)} shopping landmarks')
# set time for all shopping activites :
for landmark in current_landmarks :
landmark.duration = 30
all_landmarks.update(current_landmarks)
# special pipeline for shopping malls
shopping_manager = ClusterManager(bbox, 'shopping')
shopping_clusters = shopping_manager.generate_clusters()
all_landmarks.update(shopping_clusters)
landmarks_constrained = take_most_important(all_landmarks, self.n_important)
# self.logger.info(f'All landmarks generated : {len(all_landmarks)} landmarks around {center_coordinates}, and constrained to {len(landmarks_constrained)} most important ones.')
return all_landmarks, landmarks_constrained
def set_landmark_score(self, landmark: Landmark, landmarktype: str, preference_level: int) :
"""
Calculate and set the attractiveness score for a given landmark.
This method evaluates the landmark's attractiveness based on its properties
(number of tags, presence of Wikipedia URL, image, website, and whether it's
a place of worship) and adjusts the score using the user's preference level.
Args:
landmark (Landmark): The landmark object to score.
landmarktype (str): The type of the landmark (currently unused).
preference_level (int): The user's preference level for this landmark type.
"""
score = landmark.n_tags**self.tag_exponent
if landmark.wiki_url :
score *= self.wikipedia_bonus
if landmark.image_url :
score *= self.image_bonus
if landmark.website_url :
score *= self.wikipedia_bonus
if landmark.is_place_of_worship :
score *= self.church_coeff
if landmark.is_viewpoint :
score *= self.viewpoint_bonus
if landmarktype == 'nature' :
score *= self.nature_coeff
landmark.attractiveness = int(score * preference_level * 2)
def fetch_landmarks(self, bbox: tuple, amenity_selector: dict, landmarktype: str, preference_level: int) -> list[Landmark]:
"""
Fetches landmarks of a specified type from OpenStreetMap (OSM) within a bounding box centered on given coordinates.
Args:
bbox (tuple[float, float, float, float]): The bounding box coordinates (around:radius, center_lat, center_lon).
amenity_selector (dict): The Overpass API query selector for the desired landmark type.
landmarktype (str): The type of the landmark (e.g., 'sightseeing', 'nature', 'shopping').
Returns:
list[Landmark]: A list of Landmark objects that were fetched and filtered based on the provided criteria.
Notes:
- Landmarks are fetched using Overpass API queries.
- Selectors are translated from the dictionary to the Overpass query format. (e.g., 'amenity'='place_of_worship')
- Landmarks are filtered based on various conditions including tags and type.
"""
return_list = []
if landmarktype == 'nature' : query_conditions = None
else : query_conditions = ['count_tags()>5']
# caution, when applying a list of selectors, overpass will search for elements that match ALL selectors simultaneously
# we need to split the selectors into separate queries and merge the results
for sel in dict_to_selector_list(amenity_selector):
# self.logger.debug(f"Current selector: {sel}")
osm_types = ['way', 'relation']
if 'viewpoint' in sel :
query_conditions = None
osm_types.append('node')
# Send the overpass query
try:
result = self.overpass.send_query(
bbox = bbox,
osm_types = osm_types,
selector = sel,
conditions = query_conditions, # except for nature....
out = 'ids center tags'
)
except Exception as e:
self.logger.error(f"Error fetching landmarks: {str(e)}")
continue
return_list += self._to_landmarks(result, landmarktype, preference_level)
# self.logger.debug(f"Fetched {len(return_list)} landmarks of type {landmarktype} in {bbox}")
return return_list
def _to_landmarks(self, elements: list, landmarktype, preference_level) -> list[Landmark]:
"""
Parse the Overpass API result and extract landmarks.
This method processes the JSON elements returned by the Overpass API and
extracts landmarks of types 'node', 'way', and 'relation'. It retrieves
relevant information such as name, coordinates, and tags, and converts them
into Landmark objects.
Args:
elements (list): The elements of json response from Overpass API.
elem_type (str): The type of landmark (e.g., node, way, relation).
Returns:
list[Landmark]: A list of Landmark objects extracted from the JSON data.
"""
if elements is None :
return []
landmarks = []
for elem in elements:
osm_type = elem.get('type')
id, coords, name = get_base_info(elem, osm_type, with_name=True)
if name is None or coords is None :
continue
tags = elem.get('tags')
# Convert this to Landmark object
landmark = Landmark(name=name,
type=landmarktype,
location=coords,
osm_id=id,
osm_type=osm_type,
attractiveness=0,
n_tags=len(tags))
# Browse through tags to add information to landmark.
for key, value in tags.items():
# Skip this landmark if not suitable.
if key == 'building:part' and value == 'yes' :
break
if 'disused:' in key :
break
if 'boundary:' in key :
break
if 'shop' in key and landmarktype != 'shopping' :
break
# if value == 'apartments' :
# break
# Fill in the other attributes.
if key == 'image' :
landmark.image_url = value
if key == 'website' :
landmark.website_url = value
if value == 'place_of_worship' :
landmark.is_place_of_worship = True
if key == 'wikipedia' :
landmark.wiki_url = value
if key == 'name:en' :
landmark.name_en = value
if 'building:' in key or 'pay' in key :
landmark.n_tags -= 1
# Set the duration.
if value in ['museum', 'aquarium', 'planetarium'] :
landmark.duration = 60
elif value == 'viewpoint' :
landmark.is_viewpoint = True
landmark.duration = 10
elif value == 'cathedral' :
landmark.is_place_of_worship = False
landmark.duration = 10
landmark.description, landmark.keywords = self.description_and_keywords(tags)
self.set_landmark_score(landmark, landmarktype, preference_level)
landmarks.append(landmark)
continue
return landmarks
def description_and_keywords(self, tags: dict):
"""
"""
# Extract relevant fields
name = tags.get('name')
importance = tags.get('importance', None)
n_visitors = tags.get('tourism:visitors', None)
height = tags.get('height')
place_type = self.get_place_type(tags)
date = self.get_date(tags)
if place_type is None :
return None, None
# Start the description.
if importance is None :
if len(tags.keys()) < 5 :
return None, None
elif len(tags.keys()) < 10 :
description = f"{name} is a well known {place_type}."
elif len(tags.keys()) < 17 :
importance = 'national'
description = f"{name} is a {place_type} of national importance."
else :
importance = 'international'
description = f"{name} is an internationally famous {place_type}."
else :
description = f"{name} is a {place_type} of {importance} importance."
if height is not None and date is not None :
description += f" This {place_type} was constructed in {date} and is ca. {height} meters high."
elif height is not None :
description += f" This {place_type} stands ca. {height} meters tall."
elif date is not None:
description += f" It was constructed in {date}."
# Format the visitor number
if n_visitors is not None :
n_visitors = int(n_visitors)
if n_visitors < 1000000 :
description += f" It welcomes {int(n_visitors/1000)} thousand visitors every year."
else :
description += f" It welcomes {round(n_visitors/1000000, 1)} million visitors every year."
# Set the keywords.
keywords = {"importance": importance,
"height": height,
"place_type": place_type,
"date": date}
return description, keywords
def get_place_type(self, data):
amenity = data.get('amenity', None)
building = data.get('building', None)
historic = data.get('historic', None)
leisure = data.get('leisure')
if historic and historic != "yes":
return historic
if building and building not in ["yes", "civic", "government", "apartments", "residential", "commericial", "industrial", "retail", "religious", "public", "service"]:
return building
if amenity:
return amenity
if leisure:
return leisure
return None
def get_date(self, data):
construction_date = data.get('construction_date', None)
opening_date = data.get('opening_date', None)
start_date = data.get('start_date', None)
year_of_construction = data.get('year_of_construction', None)
# Prioritize based on availability
if construction_date:
return construction_date
if start_date:
return start_date
if year_of_construction:
return year_of_construction
if opening_date:
return opening_date
return None
def dict_to_selector_list(d: dict) -> list:
"""
Convert a dictionary of key-value pairs to a list of Overpass query strings.
Args:
d (dict): A dictionary of key-value pairs representing the selector.
Returns:
list: A list of strings representing the Overpass query selectors.
"""
return_list = []
for key, value in d.items():
if isinstance(value, list):
val = '|'.join(value)
return_list.append(f'{key}~"^({val})$"')
elif isinstance(value, str) and len(value) == 0:
return_list.append(f'{key}')
else:
return_list.append(f'{key}={value}')
return return_list