massive numpy optimization and more tests

2025-01-14 18:23:58 +01:00
parent 4fae658dbb
commit ecd505a9ce
8 changed files with 1440 additions and 247 deletions
--- a/backend/src/utils/cluster_manager.py
+++ b/backend/src/utils/cluster_manager.py
@@ -12,6 +12,10 @@ from ..utils.get_time_separation import get_distance
 from ..constants import OSM_CACHE_DIR


+# silence the overpass logger
+logging.getLogger('OSMPythonTools').setLevel(level=logging.CRITICAL)
+
+
 class Cluster(BaseModel):
    """"
    A class representing an interesting area for shopping or sightseeing.
@@ -102,7 +106,6 @@ class ClusterManager:
                points.append(coords)

            self.all_points = np.array(points)
-            self.valid = True

            # Apply DBSCAN to find clusters. Choose different settings for different cities.
            if self.cluster_type == 'shopping' and len(self.all_points) > 200 :
@@ -114,12 +117,17 @@ class ClusterManager:

            labels = dbscan.fit_predict(self.all_points)

-            # Separate clustered points and noise points
-            self.cluster_points = self.all_points[labels != -1]
-            self.cluster_labels = labels[labels != -1]
+            # Check that there are at least 2 different clusters
+            if len(set(labels)) > 2 :
+                self.logger.debug(f"Found {len(set(labels))} different clusters.")
+                # Separate clustered points and noise points
+                self.cluster_points = self.all_points[labels != -1]
+                self.cluster_labels = labels[labels != -1]
+                self.filter_clusters()      # ValueError here sometimes. I dont know why. # Filter the clusters to keep only the largest ones.
+                self.valid = True

-            # filter the clusters to keep only the largest ones
-            self.filter_clusters()        
+            else : 
+                self.valid = False      


    def generate_clusters(self) -> list[Landmark]: