better naming and MM

2024-12-16 17:56:53 +01:00
parent ddd2e91328
commit 9b61471c94
4 changed files with 119 additions and 105 deletions
--- a/backend/src/parameters/landmark_parameters.yaml
+++ b/backend/src/parameters/landmark_parameters.yaml
@@ -1,11 +1,11 @@
 city_bbox_side: 7500 #m
 radius_close_to: 50
-church_coeff: 0.9
-nature_coeff: 1.25
+church_coeff: 0.65
+nature_coeff: 1.35
 overall_coeff: 10
 tag_exponent: 1.15
 image_bonus: 10
-viewpoint_bonus: 15
+viewpoint_bonus: 5
 wikipedia_bonus: 4
 name_bonus: 3
 N_important: 40
--- a/backend/src/tests/test_main.py
+++ b/backend/src/tests/test_main.py
@@ -53,7 +53,7 @@ def test_bellecour(client, request) :   # pylint: disable=redefined-outer-name
        client:
        request:
    """
-    duration_minutes = 30
+    duration_minutes = 120
    response = client.post(
        "/trip/new",
        json={
@@ -72,10 +72,15 @@ def test_bellecour(client, request) :   # pylint: disable=redefined-outer-name
    # Add details to report
    log_trip_details(request, landmarks, result['total_time'], duration_minutes)

+    for elem in landmarks :
+        print(elem)
+
    # checks :
    assert response.status_code == 200  # check for successful planning
    assert duration_minutes*0.8 < int(result['total_time']) < duration_minutes*1.2
    assert 136200148 in osm_ids         # check for Cathédrale St. Jean in trip
+    assert response.status_code == 2000  # check for successful planning
+


 def test_shopping(client, request) :   # pylint: disable=redefined-outer-name
@@ -86,7 +91,7 @@ def test_shopping(client, request) :   # pylint: disable=redefined-outer-name
        client:
        request:
    """
-    duration_minutes = 600
+    duration_minutes = 1000
    response = client.post(
        "/trip/new",
        json={
@@ -100,7 +105,6 @@ def test_shopping(client, request) :   # pylint: disable=redefined-outer-name
        )
    result = response.json()
    landmarks = load_trip_landmarks(client, result['first_landmark_uuid'])
-    # osm_ids = landmarks_to_osmid(landmarks)

    # Add details to report
    log_trip_details(request, landmarks, result['total_time'], duration_minutes)
--- a/backend/src/utils/cluster_processing.py
+++ b/backend/src/utils/cluster_processing.py
@@ -9,12 +9,12 @@ from OSMPythonTools.cachingStrategy import CachingStrategy, JSON

 from ..structs.landmark import Landmark
 from ..utils.get_time_separation import get_distance
-from ..constants import AMENITY_SELECTORS_PATH, LANDMARK_PARAMETERS_PATH, OPTIMIZER_PARAMETERS_PATH, OSM_CACHE_DIR
+from ..constants import OSM_CACHE_DIR


-class ShoppingLocation(BaseModel):
+class Cluster(BaseModel):
    """"
-    A classe representing an interesting area for shopping.
+    A class representing an interesting area for shopping or sightseeing.
    
    It can represent either a general area or a specifc route with start and end point.
    The importance represents the number of shops found in this cluster.
@@ -33,7 +33,7 @@ class ShoppingLocation(BaseModel):
    # end: Optional[list] = None


-class ShoppingManager:
+class ClusterManager:

    logger = logging.getLogger(__name__)

@@ -42,12 +42,21 @@ class ShoppingManager:
    all_points: list
    cluster_points: list
    cluster_labels: list
-    shopping_locations: list[ShoppingLocation]
+    cluster_type: Literal['sightseeing', 'shopping']

-    def __init__(self, bbox: tuple) -> None:
+    def __init__(self, bbox: tuple, cluster_type: Literal['sightseeing', 'shopping']) -> None:
        """
        Upon intialization, generate the point cloud used for cluster detection.
        The points represent bag/clothes shops and general boutiques.
+        If the first step is successful, it applies the DBSCAN clustering algorithm with different
+        parameters depending on the size of the city (number of points). 
+        It filters out noise points and keeps only the largest clusters.
+
+        A successful initialization updates:
+            - `self.cluster_points`: The points belonging to clusters.
+            - `self.cluster_labels`: The labels for the points in clusters.
+        
+        The method also calls `filter_clusters()` to retain only the largest clusters.

        Args: 
            bbox: The bounding box coordinates (around:radius, center_lat, center_lon).
@@ -57,13 +66,23 @@ class ShoppingManager:
        self.overpass = Overpass()
        CachingStrategy.use(JSON, cacheDir=OSM_CACHE_DIR)

+        self.cluster_type = cluster_type
+        if cluster_type == 'shopping' :
+            elem_type = ['node']
+            sel = ['"shop"~"^(bag|boutique|clothes)$"']
+            out = 'skel'
+        else :
+            elem_type = ['way']
+            sel = ['"historic"="building"']
+            out = 'center'
+
        # Initialize the points for cluster detection
        query = overpassQueryBuilder(
            bbox = bbox,
-            elementType = ['node'],
-            selector = ['"shop"~"^(bag|boutique|clothes)$"'],
+            elementType = elem_type,
+            selector = sel,
            includeCenter = True,
-            out = 'skel'
+            out = out
        )

        try:
@@ -77,87 +96,50 @@ class ShoppingManager:
        else :
            points = []
            for elem in result.elements() :
-                points.append(tuple((elem.lat(), elem.lon())))
+                coords = tuple((elem.lat(), elem.lon()))
+                if coords[0] is None :
+                    coords = tuple((elem.centerLat(), elem.centerLon()))
+                points.append(coords)

            self.all_points = np.array(points)
-            self.valid = True            
+            self.valid = True
+
+            # Apply DBSCAN to find clusters. Choose different settings for different cities.
+            if self.cluster_type == 'shopping' and len(self.all_points) > 200 :
+                dbscan = DBSCAN(eps=0.00118, min_samples=15, algorithm='kd_tree')  # for large cities
+            elif self.cluster_type == 'sightseeing' :
+                dbscan = DBSCAN(eps=0.0025, min_samples=15, algorithm='kd_tree')  # for historic neighborhoods
+            else :
+                dbscan = DBSCAN(eps=0.00075, min_samples=10, algorithm='kd_tree')  # for small cities
+
+            labels = dbscan.fit_predict(self.all_points)
+
+            # Separate clustered points and noise points
+            self.cluster_points = self.all_points[labels != -1]
+            self.cluster_labels = labels[labels != -1]
+
+            # filter the clusters to keep only the largest ones
+            self.filter_clusters()        


-    def generate_shopping_landmarks(self) -> list[Landmark]:
+    def generate_clusters(self) -> list[Landmark]:
        """
-        Generate shopping landmarks based on clustered locations.
-
-        This method first generates clusters of locations and then  extracts shopping-related 
-        locations from these clusters. It transforms each shopping location into a `Landmark` object.
-
-        Returns:
-            list[Landmark]: A list of `Landmark` objects representing shopping locations.
-                            Returns an empty list if no clusters are found.
-        """
-
-        self.generate_clusters()
-
-        if len(set(self.cluster_labels)) == 0 :
-            return []       # Return empty list if no clusters were found
-
-        # Then generate the shopping locations
-        self.generate_shopping_locations()
-
-        # Transform the locations in landmarks and return the list
-        shopping_landmarks = []
-        for location in self.shopping_locations :
-            shopping_landmarks.append(self.create_landmark(location))
-
-        return shopping_landmarks
-
-
-
-    def generate_clusters(self) :
-        """
-        Generate clusters of points using DBSCAN.
-
-        This method applies the DBSCAN clustering algorithm with different
-        parameters depending on the size of the city (number of points). 
-        It filters out noise points and keeps only the largest clusters.
-
-        The method updates:
-            - `self.cluster_points`: The points belonging to clusters.
-            - `self.cluster_labels`: The labels for the points in clusters.
-        
-        The method also calls `filter_clusters()` to retain only the largest clusters.
-        """
-
-        # Apply DBSCAN to find clusters. Choose different settings for different cities.
-        if len(self.all_points) > 200 :
-            dbscan = DBSCAN(eps=0.00118, min_samples=15, algorithm='kd_tree')  # for large cities
-        else :
-            dbscan = DBSCAN(eps=0.00075, min_samples=10, algorithm='kd_tree')  # for small cities
-
-        labels = dbscan.fit_predict(self.all_points)
-
-        # Separate clustered points and noise points
-        self.cluster_points = self.all_points[labels != -1]
-        self.cluster_labels = labels[labels != -1]
-
-        # filter the clusters to keep only the largest ones
-        self.filter_clusters()
-
-
-    def generate_shopping_locations(self) :
-        """
-        Generate shopping locations based on clustered points.
+        Generate a list of landmarks based on identified clusters.

        This method iterates over the different clusters, calculates the centroid 
        (as the mean of the points within each cluster), and assigns an importance 
        based on the size of the cluster.

-        The generated shopping locations are stored in `self.shopping_locations` 
-        as a list of `ShoppingLocation` objects, each with:
+        The generated shopping locations are stored in `self.clusters` 
+        as a list of `Cluster` objects, each with:
            - `type`: Set to 'area'.
            - `centroid`: The calculated centroid of the cluster.
            - `importance`: The number of points in the cluster.
        """

+        if not self.valid :
+            return []       # Return empty list if no clusters were found
+
        locations = []

        # loop through the different clusters
@@ -169,16 +151,25 @@ class ShoppingManager:
            # Calculate the centroid as the mean of the points
            centroid = np.mean(current_cluster, axis=0)

-            locations.append(ShoppingLocation(
+            if self.cluster_type == 'shopping' :
+                score = len(current_cluster)*2
+            else :
+                score = len(current_cluster)*4
+            locations.append(Cluster(
                type='area',
                centroid=centroid,
-                importance = len(current_cluster)
+                importance = score
            ))

-        self.shopping_locations = locations
+        # Transform the locations in landmarks and return the list
+        cluster_landmarks = []
+        for cluster in locations :
+            cluster_landmarks.append(self.create_landmark(cluster))
+
+        return cluster_landmarks


-    def create_landmark(self, shopping_location: ShoppingLocation) -> Landmark:
+    def create_landmark(self, cluster: Cluster) -> Landmark:
        """
        Create a Landmark object based on the given shopping location.

@@ -187,7 +178,7 @@ class ShoppingManager:
        result and creates a landmark with the associated details such as name, type, and OSM ID.

        Parameters:
-            shopping_location (ShoppingLocation): A ShoppingLocation object containing 
+            shopping_location (Cluster): A Cluster object containing 
            the centroid and importance of the area.

        Returns:
@@ -196,14 +187,21 @@ class ShoppingManager:
        """

        # Define the bounding box for a given radius around the coordinates
-        lat, lon = shopping_location.centroid
+        lat, lon = cluster.centroid
        bbox = ("around:1000", str(lat), str(lon))

        # Query neighborhoods and shopping malls
-        selectors = ['"place"~"^(suburb|neighborhood|neighbourhood|quarter|city_block)$"', '"shop"="mall"']
+        selectors = ['"place"~"^(suburb|neighborhood|neighbourhood|quarter|city_block)$"']
+
+        if self.cluster_type == 'shopping' :
+            selectors.append('"shop"="mall"')
+            new_name = 'Shopping Area'
+            t = 40
+        else : 
+            new_name = 'Neighborhood'
+            t = 15

        min_dist = float('inf')
-        new_name = 'Shopping Area'
        new_name_en = None
        osm_id = 0
        osm_type = 'node'
@@ -231,7 +229,7 @@ class ShoppingManager:
                    if location[0] is None : 
                        continue

-                d = get_distance(shopping_location.centroid, location)
+                d = get_distance(cluster.centroid, location)
                if  d < min_dist :
                    min_dist = d
                    new_name = elem.tag('name')
@@ -246,13 +244,14 @@ class ShoppingManager:
        
        return Landmark(
            name=new_name,
-            type='shopping',
-            location=shopping_location.centroid,              # TODO: use the fact the we can also recognize streets.
-            attractiveness=shopping_location.importance,
+            type=self.cluster_type,
+            location=cluster.centroid,              # TODO: use the fact the we can also recognize streets.
+            attractiveness=cluster.importance,
            n_tags=0,
            osm_id=osm_id,
            osm_type=osm_type,
-            name_en=new_name_en
+            name_en=new_name_en,
+            duration=t
        )


--- a/backend/src/utils/landmarks_manager.py
+++ b/backend/src/utils/landmarks_manager.py
@@ -5,7 +5,7 @@ from OSMPythonTools.cachingStrategy import CachingStrategy, JSON
 from ..structs.preferences import Preferences
 from ..structs.landmark import Landmark
 from .take_most_important import take_most_important
-from .cluster_processing import ShoppingManager
+from .cluster_manager import ClusterManager

 from ..constants import AMENITY_SELECTORS_PATH, LANDMARK_PARAMETERS_PATH, OPTIMIZER_PARAMETERS_PATH, OSM_CACHE_DIR

@@ -86,6 +86,11 @@ class LandmarkManager:
            current_landmarks = self.fetch_landmarks(bbox, self.amenity_selectors['sightseeing'], preferences.sightseeing.type, score_function)
            all_landmarks.update(current_landmarks)

+            # special pipeline for historic neighborhoods
+            neighborhood_manager = ClusterManager(bbox, 'sightseeing')
+            historic_clusters = neighborhood_manager.generate_clusters()
+            all_landmarks.update(historic_clusters)
+
        # list for nature
        if preferences.nature.score != 0:
            score_function = lambda score: score * 10 * self.nature_coeff * preferences.nature.score / 5
@@ -102,11 +107,9 @@ class LandmarkManager:
            all_landmarks.update(current_landmarks)

            # special pipeline for shopping malls
-            shopping_manager = ShoppingManager(bbox)
-            if shopping_manager.valid :
-                shopping_clusters = shopping_manager.generate_shopping_landmarks()
-                for landmark in shopping_clusters : landmark.duration = 45
-                all_landmarks.update(shopping_clusters)
+            shopping_manager = ClusterManager(bbox, 'shopping')
+            shopping_clusters = shopping_manager.generate_clusters()
+            all_landmarks.update(shopping_clusters)
            


@@ -277,6 +280,11 @@ class LandmarkManager:
                        skip = True
                        break

+                    if "building:" in tag_key:
+                        # do not count the building description as being particularly useful
+                        n_tags -= 1
+                    
+
                    if "boundary" in tag_key:
                        # skip "areas" like administrative boundaries and stuff
                        skip = True
@@ -327,13 +335,16 @@ class LandmarkManager:
                    continue

                score = score_function(score)
-                if "place_of_worship" in elem.tags().values():
-                    score = score * self.church_coeff
-                    duration = 10
+                if "place_of_worship" in elem.tags().values() :
+                    if "cathedral" not in elem.tags().values() :
+                        score = score * self.church_coeff
+                        duration = 5
+                    else : 
+                        duration = 10

-                if 'viewpoint' in elem.tags().values() :
+                elif 'viewpoint' in elem.tags().values() :
                    # viewpoints must count more
-                    score += self.viewpoint_bonus
+                    score = score * self.viewpoint_bonus
                    duration = 10
                
                elif "museum" in elem.tags().values() or "aquarium" in elem.tags().values() or "planetarium" in elem.tags().values():