From 9b61471c940a33d45c5de638310ff4c38da31dc3 Mon Sep 17 00:00:00 2001 From: Helldragon67 Date: Mon, 16 Dec 2024 17:56:53 +0100 Subject: [PATCH 1/2] better naming and MM --- .../src/parameters/landmark_parameters.yaml | 6 +- backend/src/tests/test_main.py | 10 +- ...uster_processing.py => cluster_manager.py} | 175 +++++++++--------- backend/src/utils/landmarks_manager.py | 33 ++-- 4 files changed, 119 insertions(+), 105 deletions(-) rename backend/src/utils/{cluster_processing.py => cluster_manager.py} (68%) diff --git a/backend/src/parameters/landmark_parameters.yaml b/backend/src/parameters/landmark_parameters.yaml index 2f6a78d..ef2c81d 100644 --- a/backend/src/parameters/landmark_parameters.yaml +++ b/backend/src/parameters/landmark_parameters.yaml @@ -1,11 +1,11 @@ city_bbox_side: 7500 #m radius_close_to: 50 -church_coeff: 0.9 -nature_coeff: 1.25 +church_coeff: 0.65 +nature_coeff: 1.35 overall_coeff: 10 tag_exponent: 1.15 image_bonus: 10 -viewpoint_bonus: 15 +viewpoint_bonus: 5 wikipedia_bonus: 4 name_bonus: 3 N_important: 40 diff --git a/backend/src/tests/test_main.py b/backend/src/tests/test_main.py index 8fe7436..e8ef0f1 100644 --- a/backend/src/tests/test_main.py +++ b/backend/src/tests/test_main.py @@ -53,7 +53,7 @@ def test_bellecour(client, request) : # pylint: disable=redefined-outer-name client: request: """ - duration_minutes = 30 + duration_minutes = 120 response = client.post( "/trip/new", json={ @@ -72,10 +72,15 @@ def test_bellecour(client, request) : # pylint: disable=redefined-outer-name # Add details to report log_trip_details(request, landmarks, result['total_time'], duration_minutes) + for elem in landmarks : + print(elem) + # checks : assert response.status_code == 200 # check for successful planning assert duration_minutes*0.8 < int(result['total_time']) < duration_minutes*1.2 assert 136200148 in osm_ids # check for Cathédrale St. Jean in trip + assert response.status_code == 2000 # check for successful planning + def test_shopping(client, request) : # pylint: disable=redefined-outer-name @@ -86,7 +91,7 @@ def test_shopping(client, request) : # pylint: disable=redefined-outer-name client: request: """ - duration_minutes = 600 + duration_minutes = 1000 response = client.post( "/trip/new", json={ @@ -100,7 +105,6 @@ def test_shopping(client, request) : # pylint: disable=redefined-outer-name ) result = response.json() landmarks = load_trip_landmarks(client, result['first_landmark_uuid']) - # osm_ids = landmarks_to_osmid(landmarks) # Add details to report log_trip_details(request, landmarks, result['total_time'], duration_minutes) diff --git a/backend/src/utils/cluster_processing.py b/backend/src/utils/cluster_manager.py similarity index 68% rename from backend/src/utils/cluster_processing.py rename to backend/src/utils/cluster_manager.py index 2114bcb..e715b58 100644 --- a/backend/src/utils/cluster_processing.py +++ b/backend/src/utils/cluster_manager.py @@ -9,12 +9,12 @@ from OSMPythonTools.cachingStrategy import CachingStrategy, JSON from ..structs.landmark import Landmark from ..utils.get_time_separation import get_distance -from ..constants import AMENITY_SELECTORS_PATH, LANDMARK_PARAMETERS_PATH, OPTIMIZER_PARAMETERS_PATH, OSM_CACHE_DIR +from ..constants import OSM_CACHE_DIR -class ShoppingLocation(BaseModel): +class Cluster(BaseModel): """" - A classe representing an interesting area for shopping. + A class representing an interesting area for shopping or sightseeing. It can represent either a general area or a specifc route with start and end point. The importance represents the number of shops found in this cluster. @@ -33,7 +33,7 @@ class ShoppingLocation(BaseModel): # end: Optional[list] = None -class ShoppingManager: +class ClusterManager: logger = logging.getLogger(__name__) @@ -42,12 +42,21 @@ class ShoppingManager: all_points: list cluster_points: list cluster_labels: list - shopping_locations: list[ShoppingLocation] + cluster_type: Literal['sightseeing', 'shopping'] - def __init__(self, bbox: tuple) -> None: + def __init__(self, bbox: tuple, cluster_type: Literal['sightseeing', 'shopping']) -> None: """ Upon intialization, generate the point cloud used for cluster detection. The points represent bag/clothes shops and general boutiques. + If the first step is successful, it applies the DBSCAN clustering algorithm with different + parameters depending on the size of the city (number of points). + It filters out noise points and keeps only the largest clusters. + + A successful initialization updates: + - `self.cluster_points`: The points belonging to clusters. + - `self.cluster_labels`: The labels for the points in clusters. + + The method also calls `filter_clusters()` to retain only the largest clusters. Args: bbox: The bounding box coordinates (around:radius, center_lat, center_lon). @@ -57,13 +66,23 @@ class ShoppingManager: self.overpass = Overpass() CachingStrategy.use(JSON, cacheDir=OSM_CACHE_DIR) + self.cluster_type = cluster_type + if cluster_type == 'shopping' : + elem_type = ['node'] + sel = ['"shop"~"^(bag|boutique|clothes)$"'] + out = 'skel' + else : + elem_type = ['way'] + sel = ['"historic"="building"'] + out = 'center' + # Initialize the points for cluster detection query = overpassQueryBuilder( bbox = bbox, - elementType = ['node'], - selector = ['"shop"~"^(bag|boutique|clothes)$"'], + elementType = elem_type, + selector = sel, includeCenter = True, - out = 'skel' + out = out ) try: @@ -77,87 +96,50 @@ class ShoppingManager: else : points = [] for elem in result.elements() : - points.append(tuple((elem.lat(), elem.lon()))) + coords = tuple((elem.lat(), elem.lon())) + if coords[0] is None : + coords = tuple((elem.centerLat(), elem.centerLon())) + points.append(coords) self.all_points = np.array(points) - self.valid = True + self.valid = True + + # Apply DBSCAN to find clusters. Choose different settings for different cities. + if self.cluster_type == 'shopping' and len(self.all_points) > 200 : + dbscan = DBSCAN(eps=0.00118, min_samples=15, algorithm='kd_tree') # for large cities + elif self.cluster_type == 'sightseeing' : + dbscan = DBSCAN(eps=0.0025, min_samples=15, algorithm='kd_tree') # for historic neighborhoods + else : + dbscan = DBSCAN(eps=0.00075, min_samples=10, algorithm='kd_tree') # for small cities + + labels = dbscan.fit_predict(self.all_points) + + # Separate clustered points and noise points + self.cluster_points = self.all_points[labels != -1] + self.cluster_labels = labels[labels != -1] + + # filter the clusters to keep only the largest ones + self.filter_clusters() - def generate_shopping_landmarks(self) -> list[Landmark]: + def generate_clusters(self) -> list[Landmark]: """ - Generate shopping landmarks based on clustered locations. - - This method first generates clusters of locations and then extracts shopping-related - locations from these clusters. It transforms each shopping location into a `Landmark` object. - - Returns: - list[Landmark]: A list of `Landmark` objects representing shopping locations. - Returns an empty list if no clusters are found. - """ - - self.generate_clusters() - - if len(set(self.cluster_labels)) == 0 : - return [] # Return empty list if no clusters were found - - # Then generate the shopping locations - self.generate_shopping_locations() - - # Transform the locations in landmarks and return the list - shopping_landmarks = [] - for location in self.shopping_locations : - shopping_landmarks.append(self.create_landmark(location)) - - return shopping_landmarks - - - - def generate_clusters(self) : - """ - Generate clusters of points using DBSCAN. - - This method applies the DBSCAN clustering algorithm with different - parameters depending on the size of the city (number of points). - It filters out noise points and keeps only the largest clusters. - - The method updates: - - `self.cluster_points`: The points belonging to clusters. - - `self.cluster_labels`: The labels for the points in clusters. - - The method also calls `filter_clusters()` to retain only the largest clusters. - """ - - # Apply DBSCAN to find clusters. Choose different settings for different cities. - if len(self.all_points) > 200 : - dbscan = DBSCAN(eps=0.00118, min_samples=15, algorithm='kd_tree') # for large cities - else : - dbscan = DBSCAN(eps=0.00075, min_samples=10, algorithm='kd_tree') # for small cities - - labels = dbscan.fit_predict(self.all_points) - - # Separate clustered points and noise points - self.cluster_points = self.all_points[labels != -1] - self.cluster_labels = labels[labels != -1] - - # filter the clusters to keep only the largest ones - self.filter_clusters() - - - def generate_shopping_locations(self) : - """ - Generate shopping locations based on clustered points. + Generate a list of landmarks based on identified clusters. This method iterates over the different clusters, calculates the centroid (as the mean of the points within each cluster), and assigns an importance based on the size of the cluster. - The generated shopping locations are stored in `self.shopping_locations` - as a list of `ShoppingLocation` objects, each with: + The generated shopping locations are stored in `self.clusters` + as a list of `Cluster` objects, each with: - `type`: Set to 'area'. - `centroid`: The calculated centroid of the cluster. - `importance`: The number of points in the cluster. """ + if not self.valid : + return [] # Return empty list if no clusters were found + locations = [] # loop through the different clusters @@ -169,16 +151,25 @@ class ShoppingManager: # Calculate the centroid as the mean of the points centroid = np.mean(current_cluster, axis=0) - locations.append(ShoppingLocation( + if self.cluster_type == 'shopping' : + score = len(current_cluster)*2 + else : + score = len(current_cluster)*4 + locations.append(Cluster( type='area', centroid=centroid, - importance = len(current_cluster) + importance = score )) - self.shopping_locations = locations + # Transform the locations in landmarks and return the list + cluster_landmarks = [] + for cluster in locations : + cluster_landmarks.append(self.create_landmark(cluster)) + + return cluster_landmarks - def create_landmark(self, shopping_location: ShoppingLocation) -> Landmark: + def create_landmark(self, cluster: Cluster) -> Landmark: """ Create a Landmark object based on the given shopping location. @@ -187,7 +178,7 @@ class ShoppingManager: result and creates a landmark with the associated details such as name, type, and OSM ID. Parameters: - shopping_location (ShoppingLocation): A ShoppingLocation object containing + shopping_location (Cluster): A Cluster object containing the centroid and importance of the area. Returns: @@ -196,14 +187,21 @@ class ShoppingManager: """ # Define the bounding box for a given radius around the coordinates - lat, lon = shopping_location.centroid + lat, lon = cluster.centroid bbox = ("around:1000", str(lat), str(lon)) # Query neighborhoods and shopping malls - selectors = ['"place"~"^(suburb|neighborhood|neighbourhood|quarter|city_block)$"', '"shop"="mall"'] + selectors = ['"place"~"^(suburb|neighborhood|neighbourhood|quarter|city_block)$"'] + + if self.cluster_type == 'shopping' : + selectors.append('"shop"="mall"') + new_name = 'Shopping Area' + t = 40 + else : + new_name = 'Neighborhood' + t = 15 min_dist = float('inf') - new_name = 'Shopping Area' new_name_en = None osm_id = 0 osm_type = 'node' @@ -231,7 +229,7 @@ class ShoppingManager: if location[0] is None : continue - d = get_distance(shopping_location.centroid, location) + d = get_distance(cluster.centroid, location) if d < min_dist : min_dist = d new_name = elem.tag('name') @@ -246,13 +244,14 @@ class ShoppingManager: return Landmark( name=new_name, - type='shopping', - location=shopping_location.centroid, # TODO: use the fact the we can also recognize streets. - attractiveness=shopping_location.importance, + type=self.cluster_type, + location=cluster.centroid, # TODO: use the fact the we can also recognize streets. + attractiveness=cluster.importance, n_tags=0, osm_id=osm_id, osm_type=osm_type, - name_en=new_name_en + name_en=new_name_en, + duration=t ) diff --git a/backend/src/utils/landmarks_manager.py b/backend/src/utils/landmarks_manager.py index c5e6091..92d01bb 100644 --- a/backend/src/utils/landmarks_manager.py +++ b/backend/src/utils/landmarks_manager.py @@ -5,7 +5,7 @@ from OSMPythonTools.cachingStrategy import CachingStrategy, JSON from ..structs.preferences import Preferences from ..structs.landmark import Landmark from .take_most_important import take_most_important -from .cluster_processing import ShoppingManager +from .cluster_manager import ClusterManager from ..constants import AMENITY_SELECTORS_PATH, LANDMARK_PARAMETERS_PATH, OPTIMIZER_PARAMETERS_PATH, OSM_CACHE_DIR @@ -86,6 +86,11 @@ class LandmarkManager: current_landmarks = self.fetch_landmarks(bbox, self.amenity_selectors['sightseeing'], preferences.sightseeing.type, score_function) all_landmarks.update(current_landmarks) + # special pipeline for historic neighborhoods + neighborhood_manager = ClusterManager(bbox, 'sightseeing') + historic_clusters = neighborhood_manager.generate_clusters() + all_landmarks.update(historic_clusters) + # list for nature if preferences.nature.score != 0: score_function = lambda score: score * 10 * self.nature_coeff * preferences.nature.score / 5 @@ -102,11 +107,9 @@ class LandmarkManager: all_landmarks.update(current_landmarks) # special pipeline for shopping malls - shopping_manager = ShoppingManager(bbox) - if shopping_manager.valid : - shopping_clusters = shopping_manager.generate_shopping_landmarks() - for landmark in shopping_clusters : landmark.duration = 45 - all_landmarks.update(shopping_clusters) + shopping_manager = ClusterManager(bbox, 'shopping') + shopping_clusters = shopping_manager.generate_clusters() + all_landmarks.update(shopping_clusters) @@ -277,6 +280,11 @@ class LandmarkManager: skip = True break + if "building:" in tag_key: + # do not count the building description as being particularly useful + n_tags -= 1 + + if "boundary" in tag_key: # skip "areas" like administrative boundaries and stuff skip = True @@ -327,13 +335,16 @@ class LandmarkManager: continue score = score_function(score) - if "place_of_worship" in elem.tags().values(): - score = score * self.church_coeff - duration = 10 + if "place_of_worship" in elem.tags().values() : + if "cathedral" not in elem.tags().values() : + score = score * self.church_coeff + duration = 5 + else : + duration = 10 - if 'viewpoint' in elem.tags().values() : + elif 'viewpoint' in elem.tags().values() : # viewpoints must count more - score += self.viewpoint_bonus + score = score * self.viewpoint_bonus duration = 10 elif "museum" in elem.tags().values() or "aquarium" in elem.tags().values() or "planetarium" in elem.tags().values(): From a0467e1e192b8651af96e9e2520c2f202d08fb18 Mon Sep 17 00:00:00 2001 From: Helldragon67 Date: Mon, 16 Dec 2024 18:09:33 +0100 Subject: [PATCH 2/2] higher importance for historic clusters and first time no failed test --- backend/src/tests/test_main.py | 5 +++-- backend/src/utils/cluster_manager.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/backend/src/tests/test_main.py b/backend/src/tests/test_main.py index e8ef0f1..25f58d4 100644 --- a/backend/src/tests/test_main.py +++ b/backend/src/tests/test_main.py @@ -74,12 +74,13 @@ def test_bellecour(client, request) : # pylint: disable=redefined-outer-name for elem in landmarks : print(elem) + print(elem.osm_id) # checks : assert response.status_code == 200 # check for successful planning assert duration_minutes*0.8 < int(result['total_time']) < duration_minutes*1.2 assert 136200148 in osm_ids # check for Cathédrale St. Jean in trip - assert response.status_code == 2000 # check for successful planning + # assert response.status_code == 2000 # check for successful planning @@ -91,7 +92,7 @@ def test_shopping(client, request) : # pylint: disable=redefined-outer-name client: request: """ - duration_minutes = 1000 + duration_minutes = 240 response = client.post( "/trip/new", json={ diff --git a/backend/src/utils/cluster_manager.py b/backend/src/utils/cluster_manager.py index e715b58..ed79c86 100644 --- a/backend/src/utils/cluster_manager.py +++ b/backend/src/utils/cluster_manager.py @@ -154,7 +154,7 @@ class ClusterManager: if self.cluster_type == 'shopping' : score = len(current_cluster)*2 else : - score = len(current_cluster)*4 + score = len(current_cluster)*8 locations.append(Cluster( type='area', centroid=centroid,