Some checks failed
Build and deploy the backend to staging / Build and push image (pull_request) Failing after 2m35s
Build and deploy the backend to staging / Deploy to staging (pull_request) Has been skipped
Run linting on the backend code / Build (pull_request) Failing after 28s
Run testing on the backend code / Build (pull_request) Failing after 1m24s
351 lines
11 KiB
Python
351 lines
11 KiB
Python
# pylint: skip-file
|
|
|
|
import numpy as np
|
|
import json
|
|
import os
|
|
from typing import Optional, Literal
|
|
from sklearn.cluster import DBSCAN
|
|
from sklearn.decomposition import PCA
|
|
import matplotlib.pyplot as plt
|
|
from pydantic import BaseModel
|
|
from OSMPythonTools.overpass import Overpass, overpassQueryBuilder
|
|
from OSMPythonTools.cachingStrategy import CachingStrategy, JSON
|
|
from math import sin, cos, sqrt, atan2, radians
|
|
|
|
|
|
EARTH_RADIUS_KM = 6373
|
|
|
|
|
|
class ShoppingLocation(BaseModel):
|
|
type: Literal['street', 'area']
|
|
importance: int
|
|
centroid: tuple
|
|
start: Optional[list] = None
|
|
end: Optional[list] = None
|
|
|
|
|
|
# Output to frontend
|
|
class Landmark(BaseModel) :
|
|
# Properties of the landmark
|
|
name : str
|
|
type: Literal['sightseeing', 'nature', 'shopping', 'start', 'finish']
|
|
location : tuple
|
|
osm_type : str
|
|
osm_id : int
|
|
attractiveness : int
|
|
n_tags : int
|
|
image_url : Optional[str] = None
|
|
website_url : Optional[str] = None
|
|
description : Optional[str] = None # TODO future
|
|
duration : Optional[int] = 0
|
|
name_en : Optional[str] = None
|
|
|
|
# Additional properties depending on specific tour
|
|
must_do : Optional[bool] = False
|
|
must_avoid : Optional[bool] = False
|
|
is_secondary : Optional[bool] = False
|
|
|
|
time_to_reach_next : Optional[int] = 0
|
|
next_uuid : Optional[str] = None
|
|
|
|
|
|
def extract_points(filestr: str) :
|
|
"""
|
|
Extract points from geojson file.
|
|
|
|
Returns :
|
|
np.array containing the points
|
|
"""
|
|
points = []
|
|
|
|
with open(os.path.dirname(__file__) + '/' + filestr, 'r') as f:
|
|
geojson = json.load(f)
|
|
|
|
for feature in geojson['features']:
|
|
if feature['geometry']['type'] == 'Point':
|
|
centroid = feature['geometry']['coordinates']
|
|
points.append(centroid)
|
|
|
|
elif feature['geometry']['type'] == 'Polygon':
|
|
centroid = np.array(feature['geometry']['coordinates'][0][0])
|
|
points.append(centroid)
|
|
|
|
# Convert the list of points to a NumPy array
|
|
return np.array(points)
|
|
|
|
|
|
def get_distance(p1: tuple[float, float], p2: tuple[float, float]) -> int:
|
|
"""
|
|
Calculate the time in minutes to travel from one location to another.
|
|
|
|
Args:
|
|
p1 (tuple[float, float]): Coordinates of the starting location.
|
|
p2 (tuple[float, float]): Coordinates of the destination.
|
|
|
|
Returns:
|
|
int: Time to travel from p1 to p2 in minutes.
|
|
"""
|
|
|
|
|
|
if p1 == p2:
|
|
return 0
|
|
else:
|
|
# Compute the distance in km along the surface of the Earth
|
|
# (assume spherical Earth)
|
|
# this is the haversine formula, stolen from stackoverflow
|
|
# in order to not use any external libraries
|
|
lat1, lon1 = radians(p1[0]), radians(p1[1])
|
|
lat2, lon2 = radians(p2[0]), radians(p2[1])
|
|
|
|
dlon = lon2 - lon1
|
|
dlat = lat2 - lat1
|
|
|
|
a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
|
|
c = 2 * atan2(sqrt(a), sqrt(1 - a))
|
|
|
|
return EARTH_RADIUS_KM * c
|
|
|
|
def filter_clusters(cluster_points, cluster_labels):
|
|
"""
|
|
Remove clusters of less importance.
|
|
"""
|
|
label_counts = np.bincount(cluster_labels)
|
|
|
|
# Step 3: Get the indices (labels) of the 5 largest clusters
|
|
top_5_labels = np.argsort(label_counts)[-5:] # Get the largest 5 clusters
|
|
|
|
# Step 4: Filter points to keep only the points in the top 5 clusters
|
|
filtered_cluster_points = []
|
|
filtered_cluster_labels = []
|
|
|
|
for label in top_5_labels:
|
|
filtered_cluster_points.append(cluster_points[cluster_labels == label])
|
|
filtered_cluster_labels.append(np.full((label_counts[label],), label)) # Replicate the label
|
|
|
|
# Concatenate filtered clusters into a single array
|
|
return np.vstack(filtered_cluster_points), np.concatenate(filtered_cluster_labels)
|
|
|
|
|
|
def fit_lines(points, labels):
|
|
"""
|
|
Fit lines to identified clusters.
|
|
"""
|
|
all_x = []
|
|
all_y = []
|
|
lines = []
|
|
locations = []
|
|
|
|
for label in set(labels):
|
|
cluster_points = points[labels == label]
|
|
|
|
# If there's not enough points, skip
|
|
if len(cluster_points) < 2:
|
|
continue
|
|
|
|
# Apply PCA to find the principal component (i.e., the line of best fit)
|
|
pca = PCA(n_components=1)
|
|
pca.fit(cluster_points)
|
|
|
|
direction = pca.components_[0]
|
|
centroid = pca.mean_
|
|
|
|
# Project the cluster points onto the principal direction (line direction)
|
|
projections = np.dot(cluster_points - centroid, direction)
|
|
|
|
# Get the range of the projections to find the approximate length of the cluster
|
|
cluster_length = projections.max() - projections.min()
|
|
|
|
# Now adjust `t` so that it scales with the cluster length
|
|
t = np.linspace(-cluster_length / 2.75, cluster_length / 2.75, 10)
|
|
|
|
# Calculate the start and end of the line based on min/max projections
|
|
start_point = centroid[0] + t*direction[0]
|
|
end_point = centroid[1] + t*direction[1]
|
|
|
|
# Store the line
|
|
lines.append((start_point, end_point))
|
|
|
|
# For visualization, store the points
|
|
all_x.append(min(start_point))
|
|
all_x.append(max(start_point))
|
|
all_y.append(min(end_point))
|
|
all_y.append(max(end_point))
|
|
|
|
if np.linalg.norm(t) <= 0.0045 :
|
|
loc = ShoppingLocation(
|
|
type='area',
|
|
centroid=tuple((centroid[1], centroid[0])),
|
|
importance = len(cluster_points),
|
|
)
|
|
else :
|
|
loc = ShoppingLocation(
|
|
type='street',
|
|
centroid=tuple((centroid[1], centroid[0])),
|
|
importance = len(cluster_points),
|
|
start=start_point,
|
|
end=end_point
|
|
)
|
|
|
|
locations.append(loc)
|
|
|
|
xmin = min(all_x)
|
|
xmax = max(all_x)
|
|
ymin = min(all_y)
|
|
ymax = max(all_y)
|
|
corners = (xmin, xmax, ymin, ymax)
|
|
|
|
return corners, locations
|
|
|
|
|
|
|
|
def create_landmark(shopping_location: ShoppingLocation):
|
|
|
|
# Define the bounding box for a given radius around the coordinates
|
|
lat, lon = shopping_location.centroid
|
|
bbox = ("around:1000", str(lat), str(lon))
|
|
|
|
overpass = Overpass()
|
|
# CachingStrategy.use(JSON, cacheDir=OSM_CACHE_DIR)
|
|
|
|
# Query neighborhoods and shopping malls
|
|
selectors = ['"place"~"^(suburb|neighborhood|neighbourhood|quarter|city_block)$"', '"shop"="mall"']
|
|
|
|
min_dist = float('inf')
|
|
new_name = 'Shopping Area'
|
|
new_name_en = None
|
|
osm_id = 0
|
|
osm_type = 'node'
|
|
|
|
for sel in selectors :
|
|
query = overpassQueryBuilder(
|
|
bbox = bbox,
|
|
elementType = ['node', 'way', 'relation'],
|
|
selector = sel,
|
|
includeCenter = True,
|
|
out = 'center'
|
|
)
|
|
|
|
try:
|
|
result = overpass.query(query)
|
|
except Exception as e:
|
|
raise Exception("query unsuccessful")
|
|
|
|
for elem in result.elements():
|
|
|
|
location = (elem.centerLat(), elem.centerLon())
|
|
|
|
if location[0] is None :
|
|
location = (elem.lat(), elem.lon())
|
|
if location[0] is None :
|
|
# print(f"Fetching coordinates failed with {elem.type()}/{elem.id()}")
|
|
continue
|
|
|
|
# print(f"Distance : {get_distance(shopping_location.centroid, location)}")
|
|
d = get_distance(shopping_location.centroid, location)
|
|
if d < min_dist :
|
|
min_dist = d
|
|
new_name = elem.tag('name')
|
|
osm_type = elem.type() # Add type: 'way' or 'relation'
|
|
osm_id = elem.id() # Add OSM id
|
|
|
|
# add english name if it exists
|
|
try :
|
|
new_name_en = elem.tag('name:en')
|
|
except:
|
|
pass
|
|
|
|
return Landmark(
|
|
name=new_name,
|
|
type='shopping',
|
|
location=shopping_location.centroid, # TODO: use the fact the we can also recognize streets.
|
|
attractiveness=shopping_location.importance,
|
|
n_tags=0,
|
|
osm_id=osm_id,
|
|
osm_type=osm_type,
|
|
name_en=new_name_en
|
|
)
|
|
|
|
|
|
# Extract points
|
|
points = extract_points('vienna_data.json')
|
|
|
|
# print(len(points))
|
|
|
|
######## Create a figure with 1 row and 3 columns for side-by-side plots
|
|
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
|
|
# Plot Raw data points
|
|
axes[0].set_title('Raw Data')
|
|
axes[0].scatter(points[:, 0], points[:, 1], color='blue', s=20)
|
|
|
|
|
|
# Apply DBSCAN to find clusters. Choose different settings for different cities.
|
|
if len(points) > 400 :
|
|
dbscan = DBSCAN(eps=0.00118, min_samples=15, algorithm='kd_tree') # for large cities
|
|
else :
|
|
dbscan = DBSCAN(eps=0.00075, min_samples=10, algorithm='kd_tree') # for small cities
|
|
|
|
labels = dbscan.fit_predict(points)
|
|
|
|
# Separate clustered points and noise points
|
|
clustered_points = points[labels != -1]
|
|
clustered_labels = labels[labels != -1]
|
|
noise_points = points[labels == -1]
|
|
|
|
######## Plot n°1: DBSCAN Clustering Results
|
|
axes[1].set_title('DBSCAN Clusters')
|
|
axes[1].scatter(clustered_points[:, 0], clustered_points[:, 1], c=clustered_labels, cmap='rainbow', s=20)
|
|
axes[1].scatter(noise_points[:, 0], noise_points[:, 1], c='blue', s=7, label='Noise')
|
|
|
|
# Keep the 5 biggest clusters
|
|
clustered_points, clustered_labels = filter_clusters(clustered_points, clustered_labels)
|
|
|
|
# Fit lines
|
|
corners, locations = fit_lines(clustered_points, clustered_labels)
|
|
(xmin, xmax, ymin, ymax) = corners
|
|
|
|
|
|
######## Plot clustered points in normal size and noise points separately
|
|
axes[2].scatter(clustered_points[:, 0], clustered_points[:, 1], c=clustered_labels, cmap='rainbow', s=30)
|
|
axes[2].set_title('PCA Fitted Lines on Clusters')
|
|
|
|
# Create a list of Landmarks for the shopping things
|
|
shopping_landmarks = []
|
|
for loc in locations :
|
|
axes[2].scatter(loc.centroid[1], loc.centroid[0], color='red', marker='x', s=200, linewidth=3)
|
|
landmark = create_landmark(loc)
|
|
shopping_landmarks.append(landmark)
|
|
axes[2].text(loc.centroid[1], loc.centroid[0], landmark.name,
|
|
ha='center', va='top', fontsize=6,
|
|
bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.2'),
|
|
zorder=3)
|
|
|
|
|
|
|
|
####### Plot the detected lines in the final plot #######
|
|
# for loc in locations:
|
|
# if loc.type == 'street' :
|
|
# line_x = loc.start
|
|
# line_y = loc.end
|
|
# axes[2].plot(line_x, line_y, color='lime', linewidth=3)
|
|
# else :
|
|
|
|
|
|
|
|
axes[0].set_xlim(xmin-0.01, xmax+0.01)
|
|
axes[0].set_ylim(ymin-0.01, ymax+0.01)
|
|
|
|
axes[1].set_xlim(xmin-0.01, xmax+0.01)
|
|
axes[1].set_ylim(ymin-0.01, ymax+0.01)
|
|
|
|
axes[2].set_xlim(xmin-0.01, xmax+0.01)
|
|
axes[2].set_ylim(ymin-0.01, ymax+0.01)
|
|
|
|
|
|
print("\n\n\n")
|
|
for landmark in shopping_landmarks :
|
|
print(f"{landmark.name} is a shopping area with a score of {landmark.attractiveness}")
|
|
|
|
|
|
plt.tight_layout()
|
|
plt.show()
|