From c5a08125f6a4bd833b29369c348782937cce969f Mon Sep 17 00:00:00 2001 From: Helldragon67 Date: Wed, 15 Jan 2025 07:10:00 +0100 Subject: [PATCH] better clusters --- backend/src/utils/cluster_manager.py | 25 ++++++++++++++++++------- backend/src/utils/landmarks_manager.py | 4 ++-- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/backend/src/utils/cluster_manager.py b/backend/src/utils/cluster_manager.py index ed79c86..4164ac3 100644 --- a/backend/src/utils/cluster_manager.py +++ b/backend/src/utils/cluster_manager.py @@ -12,6 +12,10 @@ from ..utils.get_time_separation import get_distance from ..constants import OSM_CACHE_DIR +# silence the overpass logger +logging.getLogger('OSMPythonTools').setLevel(level=logging.CRITICAL) + + class Cluster(BaseModel): """" A class representing an interesting area for shopping or sightseeing. @@ -102,7 +106,6 @@ class ClusterManager: points.append(coords) self.all_points = np.array(points) - self.valid = True # Apply DBSCAN to find clusters. Choose different settings for different cities. if self.cluster_type == 'shopping' and len(self.all_points) > 200 : @@ -114,12 +117,17 @@ class ClusterManager: labels = dbscan.fit_predict(self.all_points) - # Separate clustered points and noise points - self.cluster_points = self.all_points[labels != -1] - self.cluster_labels = labels[labels != -1] + # Check that there are at least 2 different clusters + if len(set(labels)) > 2 : + self.logger.debug(f"Found {len(set(labels))} different clusters.") + # Separate clustered points and noise points + self.cluster_points = self.all_points[labels != -1] + self.cluster_labels = labels[labels != -1] + self.filter_clusters() # ValueError here sometimes. I dont know why. # Filter the clusters to keep only the largest ones. + self.valid = True - # filter the clusters to keep only the largest ones - self.filter_clusters() + else : + self.valid = False def generate_clusters(self) -> list[Landmark]: @@ -224,6 +232,9 @@ class ClusterManager: for elem in result.elements(): location = (elem.centerLat(), elem.centerLon()) + # Skip if element has neither name or location + if elem.tag('name') is None : + continue if location[0] is None : location = (elem.lat(), elem.lon()) if location[0] is None : @@ -277,6 +288,6 @@ class ClusterManager: filtered_cluster_labels.append(np.full((label_counts[label],), label)) # Replicate the label # update the cluster points and labels with the filtered data - self.cluster_points = np.vstack(filtered_cluster_points) + self.cluster_points = np.vstack(filtered_cluster_points) # ValueError here self.cluster_labels = np.concatenate(filtered_cluster_labels) diff --git a/backend/src/utils/landmarks_manager.py b/backend/src/utils/landmarks_manager.py index 92d01bb..7e83505 100644 --- a/backend/src/utils/landmarks_manager.py +++ b/backend/src/utils/landmarks_manager.py @@ -210,7 +210,7 @@ class LandmarkManager: # caution, when applying a list of selectors, overpass will search for elements that match ALL selectors simultaneously # we need to split the selectors into separate queries and merge the results for sel in dict_to_selector_list(amenity_selector): - self.logger.debug(f"Current selector: {sel}") + # self.logger.debug(f"Current selector: {sel}") # query_conditions = ['count_tags()>5'] # if landmarktype == 'shopping' : # use this later for shopping clusters @@ -232,7 +232,7 @@ class LandmarkManager: includeCenter = True, out = 'center' ) - self.logger.debug(f"Query: {query}") + # self.logger.debug(f"Query: {query}") try: result = self.overpass.query(query)