2024-12-14 15:57:09 +00:00
3 changed files with 181 additions and 17 deletions
--- a/backend/src/sandbox/get_streets.py
+++ b/backend/src/sandbox/get_streets.py
@@ -13,6 +13,7 @@ from math import sin, cos, sqrt, atan2, radians
 EARTH_RADIUS_KM = 6373
 class ShoppingLocation(BaseModel):
    type: Literal['street', 'area']
    importance: int
@@ -21,7 +22,6 @@ class ShoppingLocation(BaseModel):
    end: Optional[list] = None
 # Output to frontend
 class Landmark(BaseModel) :
    # Properties of the landmark
@@ -206,7 +206,7 @@ def create_landmark(shopping_location: ShoppingLocation):
    # CachingStrategy.use(JSON, cacheDir=OSM_CACHE_DIR)
    # Query neighborhoods and shopping malls
-    selectors = ['"place"~"^(suburb|neighborhood|city_block)$"', '"shop"="mall"']
+    selectors = ['"place"~"^(suburb|neighborhood|neighbourhood|quarter|city_block)$"', '"shop"="mall"']
    min_dist = float('inf')
    new_name = 'Shopping Area'
@@ -220,22 +220,22 @@ def create_landmark(shopping_location: ShoppingLocation):
            elementType = ['node', 'way', 'relation'],
            selector = sel,
            includeCenter = True,
-            out = 'body'
+            out = 'center'
        )
        try:
            result = overpass.query(query)
            # print(f'query OK with {len(result.elements())} elements')
        except Exception as e:
            raise Exception("query unsuccessful")
        for elem in result.elements():
-            location = (elem.lat(), elem.lon())
+            location = (elem.centerLat(), elem.centerLon())
            if location[0] is None : 
-                location = (elem.centerLat(), elem.centerLon())
+                location = (elem.lat(), elem.lon())
                if location[0] is None : 
                    # print(f"Fetching coordinates failed with {elem.type()}/{elem.id()}")
                    continue
            # print(f"Distance : {get_distance(shopping_location.centroid, location)}")
@@ -246,14 +246,12 @@ def create_landmark(shopping_location: ShoppingLocation):
                osm_type = elem.type()              # Add type: 'way' or 'relation'
                osm_id = elem.id()                  # Add OSM id 
                # print("closer thing found")
                # add english name if it exists
                try :
                    new_name_en = elem.tag('name:en')
                except:
                    pass 
-
+    
    return Landmark(
        name=new_name,
        type='shopping',
@@ -267,7 +265,7 @@ def create_landmark(shopping_location: ShoppingLocation):
 # Extract points
-points = extract_points('newyork_data.json')
+points = extract_points('vienna_data.json')
 # print(len(points))
@@ -311,9 +309,13 @@ axes[2].set_title('PCA Fitted Lines on Clusters')
 # Create a list of Landmarks for the shopping things
 shopping_landmarks = []
 for loc in locations :
-    axes[2].scatter(loc.centroid[0], loc.centroid[1], color='lime', marker='x', s=200, linewidth=3)
+    axes[2].scatter(loc.centroid[1], loc.centroid[0], color='red', marker='x', s=200, linewidth=3)
    landmark = create_landmark(loc)
    shopping_landmarks.append(landmark)
    axes[2].text(loc.centroid[1], loc.centroid[0], landmark.name, 
             ha='center', va='top', fontsize=6, 
             bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.2'),
             zorder=3)
--- a/backend/src/utils/cluster_processing.py
+++ b/backend/src/utils/cluster_processing.py
@@ -0,0 +1,165 @@
 import logging, yaml
 from typing import Optional, Literal
 import numpy as np
 from sklearn.cluster import DBSCAN
 from sklearn.decomposition import PCA
 from pydantic import BaseModel
 from OSMPythonTools.overpass import Overpass, overpassQueryBuilder
 from OSMPythonTools.cachingStrategy import CachingStrategy, JSON
 from ..structs.landmark import Landmark
 from ..structs.preferences import Preferences
 from ..utils.get_time_separation import get_distance
 from ..constants import AMENITY_SELECTORS_PATH, LANDMARK_PARAMETERS_PATH, OPTIMIZER_PARAMETERS_PATH, OSM_CACHE_DIR
 class ShoppingLocation(BaseModel):
    type: Literal['street', 'area']
    importance: int
    centroid: tuple
    start: Optional[list] = None
    end: Optional[list] = None
 class ShoppingManager:
    logger = logging.getLogger(__name__)
    def __init__(self) -> None:
        with OPTIMIZER_PARAMETERS_PATH.open('r') as f:
                parameters = yaml.safe_load(f)
                self.walking_speed = parameters['average_walking_speed']
                self.detour_factor = parameters['detour_factor']
        self.overpass = Overpass()
        CachingStrategy.use(JSON, cacheDir=OSM_CACHE_DIR)
    def generate_landmarks_list(self, center_coordinates: tuple[float, float], preferences: Preferences) :
        max_walk_dist = (preferences.max_time_minute/2)/60*self.walking_speed*1000/self.detour_factor
        reachable_bbox_side = min(max_walk_dist, self.max_bbox_side)
        # use set to avoid duplicates, this requires some __methods__ to be set in Landmark
        shopping_landmarks = set()
        # Create a bbox using the around technique.
        bbox = tuple((f"around:{reachable_bbox_side/2}", str(center_coordinates[0]), str(center_coordinates[1])))
        # list for sightseeing
 def get_clusters(points: list) -> tuple:
    """
    Apply DBSCAN to find clusters.
    """
    if len(points) > 400 :
        dbscan = DBSCAN(eps=0.00118, min_samples=15, algorithm='kd_tree')  # for large cities
    else :
        dbscan = DBSCAN(eps=0.00075, min_samples=10, algorithm='kd_tree')  # for small cities
    labels = dbscan.fit_predict(points)
    # Separate clustered points and noise points
    clustered_points = points[labels != -1]
    clustered_labels = labels[labels != -1]
    return clustered_points, clustered_labels
 def filter_clusters(cluster_points, cluster_labels):
    """
    Remove clusters of less importance.
    """
    label_counts = np.bincount(cluster_labels)
    # Step 3: Get the indices (labels) of the 5 largest clusters
    top_5_labels = np.argsort(label_counts)[-5:]  # Get the largest 5 clusters
    # Step 4: Filter points to keep only the points in the top 5 clusters
    filtered_cluster_points = []
    filtered_cluster_labels = []
    for label in top_5_labels:
        filtered_cluster_points.append(cluster_points[cluster_labels == label])
        filtered_cluster_labels.append(np.full((label_counts[label],), label))  # Replicate the label
    # Concatenate filtered clusters into a single array
    return np.vstack(filtered_cluster_points), np.concatenate(filtered_cluster_labels)
 def fit_lines(points, labels):
    """
    Fit lines to identified clusters.
    """
    all_x = []
    all_y = []
    lines = []
    locations = []
    for label in set(labels):
        cluster_points = points[labels == label]
        # If there's not enough points, skip
        if len(cluster_points) < 2:
            continue
        # Apply PCA to find the principal component (i.e., the line of best fit)
        pca = PCA(n_components=1)
        pca.fit(cluster_points)
        direction = pca.components_[0]
        centroid = pca.mean_
        # Project the cluster points onto the principal direction (line direction)
        projections = np.dot(cluster_points - centroid, direction)
        # Get the range of the projections to find the approximate length of the cluster
        cluster_length = projections.max() - projections.min()
        # Now adjust `t` so that it scales with the cluster length
        t = np.linspace(-cluster_length / 2.75, cluster_length / 2.75, 10)
        # Calculate the start and end of the line based on min/max projections
        start_point = centroid[0] + t*direction[0]
        end_point = centroid[1] + t*direction[1]
        # Store the line
        lines.append((start_point, end_point))
        # For visualization, store the points
        all_x.append(min(start_point))
        all_x.append(max(start_point))
        all_y.append(min(end_point))
        all_y.append(max(end_point))
        if np.linalg.norm(t) <= 0.0045 :
            loc = ShoppingLocation(
                type='area',
                centroid=tuple((centroid[1], centroid[0])),
                importance = len(cluster_points),
            )
        else :
            loc = ShoppingLocation(
                type='street',
                centroid=tuple((centroid[1], centroid[0])),
                importance = len(cluster_points),
                start=start_point,
                end=end_point
            )
        locations.append(loc)
    xmin = min(all_x)
    xmax = max(all_x)
    ymin = min(all_y)
    ymax = max(all_y)
    corners = (xmin, xmax, ymin, ymax)
    return corners, locations
--- a/backend/src/utils/landmarks_manager.py
+++ b/backend/src/utils/landmarks_manager.py
@@ -1,7 +1,4 @@
-import math
+import math, yaml, logging
 import yaml
 import logging
 from OSMPythonTools.overpass import Overpass, overpassQueryBuilder
 from OSMPythonTools.cachingStrategy import CachingStrategy, JSON
@@ -79,7 +76,7 @@ class LandmarkManager:
        # use set to avoid duplicates, this requires some __methods__ to be set in Landmark
        all_landmarks = set()
-        # Create a bbox using the around
+        # Create a bbox using the around technique
        bbox = tuple((f"around:{reachable_bbox_side/2}", str(center_coordinates[0]), str(center_coordinates[1])))
        # list for sightseeing
        if preferences.sightseeing.score != 0:
@@ -219,7 +216,7 @@ class LandmarkManager:
                selector = sel,
                conditions = query_conditions,        # except for nature....
                includeCenter = True,
-                out = 'body'
+                out = 'center'
                )
            self.logger.debug(f"Query: {query}")