cluster recognition added to backend pipeline
Some checks failed
Build and deploy the backend to staging / Build and push image (pull_request) Successful in 3m0s
Run linting on the backend code / Build (pull_request) Failing after 29s
Run testing on the backend code / Build (pull_request) Failing after 2m9s
Build and deploy the backend to staging / Deploy to staging (pull_request) Successful in 15s
Some checks failed
Build and deploy the backend to staging / Build and push image (pull_request) Successful in 3m0s
Run linting on the backend code / Build (pull_request) Failing after 29s
Run testing on the backend code / Build (pull_request) Failing after 2m9s
Build and deploy the backend to staging / Deploy to staging (pull_request) Successful in 15s
This commit is contained in:
parent
d9be7b0707
commit
7f77ecab04
File diff suppressed because one or more lines are too long
@ -78,6 +78,36 @@ def test_bellecour(client, request) : # pylint: disable=redefined-outer-name
|
|||||||
assert 136200148 in osm_ids # check for Cathédrale St. Jean in trip
|
assert 136200148 in osm_ids # check for Cathédrale St. Jean in trip
|
||||||
|
|
||||||
|
|
||||||
|
def test_shopping(client, request) : # pylint: disable=redefined-outer-name
|
||||||
|
"""
|
||||||
|
Test n°3 : Custom test in Lyon centre to ensure shopping clusters are found.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
client:
|
||||||
|
request:
|
||||||
|
"""
|
||||||
|
duration_minutes = 600
|
||||||
|
response = client.post(
|
||||||
|
"/trip/new",
|
||||||
|
json={
|
||||||
|
"preferences": {"sightseeing": {"type": "sightseeing", "score": 0},
|
||||||
|
"nature": {"type": "nature", "score": 0},
|
||||||
|
"shopping": {"type": "shopping", "score": 5},
|
||||||
|
"max_time_minute": duration_minutes,
|
||||||
|
"detour_tolerance_minute": 0},
|
||||||
|
"start": [45.7576485, 4.8330241]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
result = response.json()
|
||||||
|
landmarks = load_trip_landmarks(client, result['first_landmark_uuid'])
|
||||||
|
osm_ids = landmarks_to_osmid(landmarks)
|
||||||
|
|
||||||
|
# Add details to report
|
||||||
|
log_trip_details(request, landmarks, result['total_time'], duration_minutes)
|
||||||
|
|
||||||
|
# checks :
|
||||||
|
assert response.status_code == 200 # check for successful planning
|
||||||
|
assert duration_minutes*0.8 < int(result['total_time']) < duration_minutes*1.2
|
||||||
|
|
||||||
# def test_new_trip_single_prefs(client):
|
# def test_new_trip_single_prefs(client):
|
||||||
# response = client.post(
|
# response = client.post(
|
||||||
|
@ -1,80 +1,195 @@
|
|||||||
import logging, yaml
|
import logging
|
||||||
from typing import Optional, Literal
|
from typing import Literal
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.cluster import DBSCAN
|
from sklearn.cluster import DBSCAN
|
||||||
from sklearn.decomposition import PCA
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from OSMPythonTools.overpass import Overpass, overpassQueryBuilder
|
from OSMPythonTools.overpass import Overpass, overpassQueryBuilder
|
||||||
from OSMPythonTools.cachingStrategy import CachingStrategy, JSON
|
from OSMPythonTools.cachingStrategy import CachingStrategy, JSON
|
||||||
|
|
||||||
from ..structs.landmark import Landmark
|
from ..structs.landmark import Landmark
|
||||||
from ..structs.preferences import Preferences
|
|
||||||
from ..utils.get_time_separation import get_distance
|
from ..utils.get_time_separation import get_distance
|
||||||
from ..constants import AMENITY_SELECTORS_PATH, LANDMARK_PARAMETERS_PATH, OPTIMIZER_PARAMETERS_PATH, OSM_CACHE_DIR
|
from ..constants import AMENITY_SELECTORS_PATH, LANDMARK_PARAMETERS_PATH, OPTIMIZER_PARAMETERS_PATH, OSM_CACHE_DIR
|
||||||
|
|
||||||
|
|
||||||
class ShoppingLocation(BaseModel):
|
class ShoppingLocation(BaseModel):
|
||||||
type: Literal['street', 'area']
|
type: Literal['street', 'area']
|
||||||
importance: int
|
importance: int
|
||||||
centroid: tuple
|
centroid: tuple
|
||||||
start: Optional[list] = None
|
# start: Optional[list] = None # for later use if we want to have streets as well
|
||||||
end: Optional[list] = None
|
# end: Optional[list] = None
|
||||||
|
|
||||||
|
|
||||||
class ShoppingManager:
|
class ShoppingManager:
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# NOTE: all points are in (lat, lon) format
|
||||||
|
valid: bool # Ensure the manager is valid (ie there are some clusters to be found)
|
||||||
|
all_points: list
|
||||||
|
cluster_points: list
|
||||||
|
cluster_labels: list
|
||||||
|
shopping_locations: list[ShoppingLocation]
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self, bbox: tuple) -> None:
|
||||||
|
"""
|
||||||
with OPTIMIZER_PARAMETERS_PATH.open('r') as f:
|
Upon intialization, generate the list of shops used for cluster points.
|
||||||
parameters = yaml.safe_load(f)
|
"""
|
||||||
self.walking_speed = parameters['average_walking_speed']
|
|
||||||
self.detour_factor = parameters['detour_factor']
|
|
||||||
|
|
||||||
|
# Initialize overpass and cache
|
||||||
self.overpass = Overpass()
|
self.overpass = Overpass()
|
||||||
CachingStrategy.use(JSON, cacheDir=OSM_CACHE_DIR)
|
CachingStrategy.use(JSON, cacheDir=OSM_CACHE_DIR)
|
||||||
|
|
||||||
|
# Initialize the points for cluster detection
|
||||||
|
query = overpassQueryBuilder(
|
||||||
|
bbox = bbox,
|
||||||
|
elementType = ['node'],
|
||||||
|
selector = ['"shop"~"^(bag|boutique|clothes)$"'],
|
||||||
|
includeCenter = True,
|
||||||
|
out = 'skel'
|
||||||
|
)
|
||||||
|
|
||||||
def generate_landmarks_list(self, center_coordinates: tuple[float, float], preferences: Preferences) :
|
try:
|
||||||
|
result = self.overpass.query(query)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error fetching landmarks: {e}")
|
||||||
|
|
||||||
max_walk_dist = (preferences.max_time_minute/2)/60*self.walking_speed*1000/self.detour_factor
|
if len(result.elements()) > 0 :
|
||||||
reachable_bbox_side = min(max_walk_dist, self.max_bbox_side)
|
|
||||||
|
|
||||||
# use set to avoid duplicates, this requires some __methods__ to be set in Landmark
|
points = []
|
||||||
shopping_landmarks = set()
|
for elem in result.elements() :
|
||||||
|
points.append(tuple((elem.lat(), elem.lon())))
|
||||||
|
|
||||||
# Create a bbox using the around technique.
|
self.all_points = np.array(points)
|
||||||
bbox = tuple((f"around:{reachable_bbox_side/2}", str(center_coordinates[0]), str(center_coordinates[1])))
|
self.valid = True
|
||||||
# list for sightseeing
|
|
||||||
|
else :
|
||||||
|
self.valid = False
|
||||||
|
|
||||||
|
|
||||||
def get_clusters(points: list) -> tuple:
|
def generate_shopping_landmarks(self) -> list[Landmark]:
|
||||||
"""
|
|
||||||
Apply DBSCAN to find clusters.
|
# First generate the clusters
|
||||||
"""
|
self.generate_clusters()
|
||||||
if len(points) > 400 :
|
|
||||||
|
# Then generate the shopping locations
|
||||||
|
self.generate_shopping_locations()
|
||||||
|
|
||||||
|
# Transform the locations in landmarks and return the list
|
||||||
|
shopping_landmarks = []
|
||||||
|
for location in self.shopping_locations :
|
||||||
|
shopping_landmarks.append(self.create_landmark(location))
|
||||||
|
|
||||||
|
return shopping_landmarks
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def generate_clusters(self) :
|
||||||
|
|
||||||
|
# Apply DBSCAN to find clusters. Choose different settings for different cities.
|
||||||
|
if len(self.all_points) > 200 :
|
||||||
dbscan = DBSCAN(eps=0.00118, min_samples=15, algorithm='kd_tree') # for large cities
|
dbscan = DBSCAN(eps=0.00118, min_samples=15, algorithm='kd_tree') # for large cities
|
||||||
else :
|
else :
|
||||||
dbscan = DBSCAN(eps=0.00075, min_samples=10, algorithm='kd_tree') # for small cities
|
dbscan = DBSCAN(eps=0.00075, min_samples=10, algorithm='kd_tree') # for small cities
|
||||||
|
|
||||||
labels = dbscan.fit_predict(points)
|
labels = dbscan.fit_predict(self.all_points)
|
||||||
|
|
||||||
# Separate clustered points and noise points
|
# Separate clustered points and noise points
|
||||||
clustered_points = points[labels != -1]
|
self.cluster_points = self.all_points[labels != -1]
|
||||||
clustered_labels = labels[labels != -1]
|
self.cluster_labels = labels[labels != -1]
|
||||||
|
|
||||||
return clustered_points, clustered_labels
|
# filter the clusters to keep only the largest ones
|
||||||
|
self.filter_clusters()
|
||||||
|
|
||||||
|
|
||||||
|
def generate_shopping_locations(self) :
|
||||||
|
|
||||||
def filter_clusters(cluster_points, cluster_labels):
|
locations = []
|
||||||
|
|
||||||
|
# loop through the different clusters
|
||||||
|
for label in set(self.cluster_labels):
|
||||||
|
|
||||||
|
# Extract points belonging to the current cluster
|
||||||
|
current_cluster = self.cluster_points[self.cluster_labels == label]
|
||||||
|
|
||||||
|
# Calculate the centroid as the mean of the points
|
||||||
|
centroid = np.mean(current_cluster, axis=0)
|
||||||
|
|
||||||
|
locations.append(ShoppingLocation(
|
||||||
|
type='area',
|
||||||
|
centroid=centroid,
|
||||||
|
importance = len(current_cluster)
|
||||||
|
))
|
||||||
|
|
||||||
|
self.shopping_locations = locations
|
||||||
|
|
||||||
|
|
||||||
|
def create_landmark(self, shopping_location: ShoppingLocation) -> Landmark:
|
||||||
|
|
||||||
|
# Define the bounding box for a given radius around the coordinates
|
||||||
|
lat, lon = shopping_location.centroid
|
||||||
|
bbox = ("around:1000", str(lat), str(lon))
|
||||||
|
|
||||||
|
# Query neighborhoods and shopping malls
|
||||||
|
selectors = ['"place"~"^(suburb|neighborhood|neighbourhood|quarter|city_block)$"', '"shop"="mall"']
|
||||||
|
|
||||||
|
min_dist = float('inf')
|
||||||
|
new_name = 'Shopping Area'
|
||||||
|
new_name_en = None
|
||||||
|
osm_id = 0
|
||||||
|
osm_type = 'node'
|
||||||
|
|
||||||
|
for sel in selectors :
|
||||||
|
query = overpassQueryBuilder(
|
||||||
|
bbox = bbox,
|
||||||
|
elementType = ['node', 'way', 'relation'],
|
||||||
|
selector = sel,
|
||||||
|
includeCenter = True,
|
||||||
|
out = 'center'
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = self.overpass.query(query)
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception("query unsuccessful")
|
||||||
|
|
||||||
|
for elem in result.elements():
|
||||||
|
|
||||||
|
location = (elem.centerLat(), elem.centerLon())
|
||||||
|
|
||||||
|
if location[0] is None :
|
||||||
|
location = (elem.lat(), elem.lon())
|
||||||
|
if location[0] is None :
|
||||||
|
continue
|
||||||
|
|
||||||
|
d = get_distance(shopping_location.centroid, location)
|
||||||
|
if d < min_dist :
|
||||||
|
min_dist = d
|
||||||
|
new_name = elem.tag('name')
|
||||||
|
osm_type = elem.type() # Add type: 'way' or 'relation'
|
||||||
|
osm_id = elem.id() # Add OSM id
|
||||||
|
|
||||||
|
# add english name if it exists
|
||||||
|
try :
|
||||||
|
new_name_en = elem.tag('name:en')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return Landmark(
|
||||||
|
name=new_name,
|
||||||
|
type='shopping',
|
||||||
|
location=shopping_location.centroid, # TODO: use the fact the we can also recognize streets.
|
||||||
|
attractiveness=shopping_location.importance,
|
||||||
|
n_tags=0,
|
||||||
|
osm_id=osm_id,
|
||||||
|
osm_type=osm_type,
|
||||||
|
name_en=new_name_en
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def filter_clusters(self):
|
||||||
"""
|
"""
|
||||||
Remove clusters of less importance.
|
Remove clusters of lesser importance.
|
||||||
"""
|
"""
|
||||||
label_counts = np.bincount(cluster_labels)
|
label_counts = np.bincount(self.cluster_labels)
|
||||||
|
|
||||||
# Step 3: Get the indices (labels) of the 5 largest clusters
|
# Step 3: Get the indices (labels) of the 5 largest clusters
|
||||||
top_5_labels = np.argsort(label_counts)[-5:] # Get the largest 5 clusters
|
top_5_labels = np.argsort(label_counts)[-5:] # Get the largest 5 clusters
|
||||||
@ -84,82 +199,10 @@ def filter_clusters(cluster_points, cluster_labels):
|
|||||||
filtered_cluster_labels = []
|
filtered_cluster_labels = []
|
||||||
|
|
||||||
for label in top_5_labels:
|
for label in top_5_labels:
|
||||||
filtered_cluster_points.append(cluster_points[cluster_labels == label])
|
filtered_cluster_points.append(self.cluster_points[self.cluster_labels == label])
|
||||||
filtered_cluster_labels.append(np.full((label_counts[label],), label)) # Replicate the label
|
filtered_cluster_labels.append(np.full((label_counts[label],), label)) # Replicate the label
|
||||||
|
|
||||||
# Concatenate filtered clusters into a single array
|
# update the cluster points and labels with the filtered data
|
||||||
return np.vstack(filtered_cluster_points), np.concatenate(filtered_cluster_labels)
|
self.cluster_points = np.vstack(filtered_cluster_points)
|
||||||
|
self.cluster_labels = np.concatenate(filtered_cluster_labels)
|
||||||
|
|
||||||
def fit_lines(points, labels):
|
|
||||||
"""
|
|
||||||
Fit lines to identified clusters.
|
|
||||||
"""
|
|
||||||
all_x = []
|
|
||||||
all_y = []
|
|
||||||
lines = []
|
|
||||||
locations = []
|
|
||||||
|
|
||||||
for label in set(labels):
|
|
||||||
cluster_points = points[labels == label]
|
|
||||||
|
|
||||||
# If there's not enough points, skip
|
|
||||||
if len(cluster_points) < 2:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Apply PCA to find the principal component (i.e., the line of best fit)
|
|
||||||
pca = PCA(n_components=1)
|
|
||||||
pca.fit(cluster_points)
|
|
||||||
|
|
||||||
direction = pca.components_[0]
|
|
||||||
centroid = pca.mean_
|
|
||||||
|
|
||||||
# Project the cluster points onto the principal direction (line direction)
|
|
||||||
projections = np.dot(cluster_points - centroid, direction)
|
|
||||||
|
|
||||||
# Get the range of the projections to find the approximate length of the cluster
|
|
||||||
cluster_length = projections.max() - projections.min()
|
|
||||||
|
|
||||||
# Now adjust `t` so that it scales with the cluster length
|
|
||||||
t = np.linspace(-cluster_length / 2.75, cluster_length / 2.75, 10)
|
|
||||||
|
|
||||||
# Calculate the start and end of the line based on min/max projections
|
|
||||||
start_point = centroid[0] + t*direction[0]
|
|
||||||
end_point = centroid[1] + t*direction[1]
|
|
||||||
|
|
||||||
# Store the line
|
|
||||||
lines.append((start_point, end_point))
|
|
||||||
|
|
||||||
# For visualization, store the points
|
|
||||||
all_x.append(min(start_point))
|
|
||||||
all_x.append(max(start_point))
|
|
||||||
all_y.append(min(end_point))
|
|
||||||
all_y.append(max(end_point))
|
|
||||||
|
|
||||||
if np.linalg.norm(t) <= 0.0045 :
|
|
||||||
loc = ShoppingLocation(
|
|
||||||
type='area',
|
|
||||||
centroid=tuple((centroid[1], centroid[0])),
|
|
||||||
importance = len(cluster_points),
|
|
||||||
)
|
|
||||||
else :
|
|
||||||
loc = ShoppingLocation(
|
|
||||||
type='street',
|
|
||||||
centroid=tuple((centroid[1], centroid[0])),
|
|
||||||
importance = len(cluster_points),
|
|
||||||
start=start_point,
|
|
||||||
end=end_point
|
|
||||||
)
|
|
||||||
|
|
||||||
locations.append(loc)
|
|
||||||
|
|
||||||
xmin = min(all_x)
|
|
||||||
xmax = max(all_x)
|
|
||||||
ymin = min(all_y)
|
|
||||||
ymax = max(all_y)
|
|
||||||
corners = (xmin, xmax, ymin, ymax)
|
|
||||||
|
|
||||||
return corners, locations
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ from OSMPythonTools.cachingStrategy import CachingStrategy, JSON
|
|||||||
from ..structs.preferences import Preferences
|
from ..structs.preferences import Preferences
|
||||||
from ..structs.landmark import Landmark
|
from ..structs.landmark import Landmark
|
||||||
from .take_most_important import take_most_important
|
from .take_most_important import take_most_important
|
||||||
|
from .cluster_processing import ShoppingManager
|
||||||
|
|
||||||
from ..constants import AMENITY_SELECTORS_PATH, LANDMARK_PARAMETERS_PATH, OPTIMIZER_PARAMETERS_PATH, OSM_CACHE_DIR
|
from ..constants import AMENITY_SELECTORS_PATH, LANDMARK_PARAMETERS_PATH, OPTIMIZER_PARAMETERS_PATH, OSM_CACHE_DIR
|
||||||
|
|
||||||
@ -94,10 +95,19 @@ class LandmarkManager:
|
|||||||
if preferences.shopping.score != 0:
|
if preferences.shopping.score != 0:
|
||||||
score_function = lambda score: score * 10 * preferences.shopping.score / 5
|
score_function = lambda score: score * 10 * preferences.shopping.score / 5
|
||||||
current_landmarks = self.fetch_landmarks(bbox, self.amenity_selectors['shopping'], preferences.shopping.type, score_function)
|
current_landmarks = self.fetch_landmarks(bbox, self.amenity_selectors['shopping'], preferences.shopping.type, score_function)
|
||||||
|
|
||||||
# set time for all shopping activites :
|
# set time for all shopping activites :
|
||||||
for landmark in current_landmarks : landmark.duration = 30
|
for landmark in current_landmarks : landmark.duration = 30
|
||||||
all_landmarks.update(current_landmarks)
|
all_landmarks.update(current_landmarks)
|
||||||
|
|
||||||
|
# special pipeline for shopping malls
|
||||||
|
shopping_manager = ShoppingManager(bbox)
|
||||||
|
if shopping_manager.valid :
|
||||||
|
shopping_clusters = shopping_manager.generate_shopping_landmarks()
|
||||||
|
for landmark in shopping_clusters : landmark.duration = 45
|
||||||
|
all_landmarks.update(shopping_clusters)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
landmarks_constrained = take_most_important(all_landmarks, self.N_important)
|
landmarks_constrained = take_most_important(all_landmarks, self.N_important)
|
||||||
self.logger.info(f'Generated {len(all_landmarks)} landmarks around {center_coordinates}, and constrained to {len(landmarks_constrained)} most important ones.')
|
self.logger.info(f'Generated {len(all_landmarks)} landmarks around {center_coordinates}, and constrained to {len(landmarks_constrained)} most important ones.')
|
||||||
@ -353,7 +363,6 @@ class LandmarkManager:
|
|||||||
return return_list
|
return return_list
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def dict_to_selector_list(d: dict) -> list:
|
def dict_to_selector_list(d: dict) -> list:
|
||||||
"""
|
"""
|
||||||
Convert a dictionary of key-value pairs to a list of Overpass query strings.
|
Convert a dictionary of key-value pairs to a list of Overpass query strings.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user