commit 42b8056e27057500a36d6d114b41b4d90ea3f9e7 Author: Remy Moll Date: Mon Nov 24 20:59:33 2025 +0100 initial working but bare setup diff --git a/README.md b/README.md new file mode 100644 index 0000000..d1bf00f --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +# OSM on postgres + +## Key components +- a stable, completely standardized database that can easily be scaled backed by postgres + postgis + - this is backed a by a persistent, node-independent storage claim +- a specialized container (running as cronjob/on demand job) that ingests the data from OSM dumps. + - this is backed by its own storage to prevent excessive re-downloads + + +## Prerequisites +- A storage class provisioner +- A CNPG operator that will provision this. + + +## Results +A well defined database with a table and entries defined [base/importer.configmap.yaml](base/importer.configmap.yaml) that can be reached from within the cluster. +- Acccess data can be read from the secret `-app`. + diff --git a/base/importer.configmap.yaml b/base/importer.configmap.yaml new file mode 100644 index 0000000..01e549f --- /dev/null +++ b/base/importer.configmap.yaml @@ -0,0 +1,205 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: importer-config +data: + simple.lua: | + -- This config example file is released into the Public Domain. + + -- This is a very simple Lua config for the Flex output not intended for + -- real-world use. Use it do understand the basic principles of the + -- configuration. After reading and understanding this, have a look at + -- "geometries.lua". + + -- The global variable "osm2pgsql" is used to talk to the main osm2pgsql code. + -- You can, for instance, get the version of osm2pgsql: + print('osm2pgsql version: ' .. osm2pgsql.version) + + -- A place to store the SQL tables we will define shortly. + local tables = {} + + -- Create a new table called "pois" with the given columns. When running in + -- "create" mode, this will do the `CREATE TABLE`, when running in "append" + -- mode, this will only declare the table for use. + -- + -- This is a "node table", it can only contain data derived from nodes and will + -- contain a "node_id" column (SQL type INT8) as first column. When running in + -- "append" mode, osm2pgsql will automatically update this table using the node + -- ids. + tables.pois = osm2pgsql.define_node_table('pois', { + { column = 'tags', type = 'jsonb' }, + -- In most cases we'll need a column for the geometry. The default + -- projection is Web Mercator (3857), so this will result in an SQL + -- type `geometry(Point, 3857)`. + { column = 'geom', type = 'point', not_null = true }, + }) + + -- A special table for restaurants to demonstrate that we can have any tables + -- with any columns we want. + tables.restaurants = osm2pgsql.define_node_table('restaurants', { + { column = 'name', type = 'text' }, + { column = 'cuisine', type = 'text' }, + -- We declare all geometry columns as "NOT NULL". If osm2pgsql encounters + -- an invalid geometry (for whatever reason) it will generate a null + -- geometry which will not be written to the database if "not_null" is + -- set. The result is that broken geometries will just be silently + -- ignored. + { column = 'geom', type = 'point', not_null = true }, + }) + + -- This is a "way table", it can only contain data derived from ways and will + -- contain a "way_id" column. When running in "append" mode, osm2pgsql will + -- automatically update this table using the way ids. + tables.ways = osm2pgsql.define_way_table('ways', { + { column = 'tags', type = 'jsonb' }, + { column = 'geom', type = 'linestring', not_null = true }, + }) + + -- This is an "area table", it can contain data derived from ways or relations + -- and will contain an "area_id" column. Way ids will be stored "as is" in the + -- "area_id" column, for relations the negative id will be stored. When + -- running in "append" mode, osm2pgsql will automatically update this table + -- using the way/relation ids. + tables.polygons = osm2pgsql.define_area_table('polygons', { + { column = 'type', type = 'text' }, + { column = 'tags', type = 'jsonb' }, + -- The type of the `geom` column is `geometry`, because we need to store + -- polygons AND multipolygons + { column = 'geom', type = 'geometry', not_null = true }, + }) + + -- Debug output: Show definition of tables + for name, dtable in pairs(tables) do + print("\ntable '" .. name .. "':") + print(" name='" .. dtable:name() .. "'") + end + + -- Called for every node in the input. The `object` argument contains all the + -- attributes of the node like `id`, `version`, etc. as well as all tags as a + -- Lua table (`object.tags`). + function osm2pgsql.process_node(object) + if object.tags.amenity == 'restaurant' then + -- Add a row to the SQL table. The keys in the parameter table + -- correspond to the table columns, if one is missing the column will + -- be NULL. The id column will be filled automatically. + tables.restaurants:insert({ + name = object.tags.name, + cuisine = object.tags.cuisine, + geom = object:as_point() + }) + else + tables.pois:insert({ + -- We know `tags` is of type `jsonb` so this will do the + -- right thing. + tags = object.tags, + geom = object:as_point() + }) + end + end + + -- Called for every way in the input. The `object` argument contains the same + -- information as with nodes and additionally a boolean `is_closed` flag and + -- the list of node IDs referenced by the way (`object.nodes`). + function osm2pgsql.process_way(object) + -- Very simple check to decide whether a way is a polygon or not, in a + -- real stylesheet we'd have to also look at the tags... + if object.is_closed then + tables.polygons:insert({ + type = object.type, + tags = object.tags, + geom = object:as_polygon() + }) + else + tables.ways:insert({ + tags = object.tags, + geom = object:as_linestring() + }) + end + end + + -- Called for every relation in the input. The `object` argument contains the + -- same information as with nodes and additionally an array of members + -- (`object.members`). + function osm2pgsql.process_relation(object) + -- Store multipolygons and boundaries as polygons + if object.tags.type == 'multipolygon' or + object.tags.type == 'boundary' then + tables.polygons:insert({ + type = object.type, + tags = object.tags, + geom = object:as_multipolygon() + }) + end + end + + anyway.lua: | + -- This config file mirrors our previous overpass selectors for overpass. They follow an arbitrary selection from https://taginfo.openstreetmap.org + + local tables = {} + + tables.landmark = osm2pgsql.define_node_table('landmark', { + { column = 'type', type = 'text' }, + { column = 'name', type = 'text' }, + { column = 'name_en', type = 'text' }, + { column = 'description', type = 'text' }, + { column = 'tags', type = 'jsonb' }, + { column = 'geom', type = 'point', not_null = true }, + }) + + + + function name_in_list(tag, list) + if tag == nil then + return false + end + for _, v in ipairs(list) do + if tag == v then + return true + end + end + return false + end + + function osm2pgsql.process_node(object) + -- we assign landmarks a type based on their tags. so we specify a list of tags to check and assign a type accordingly + + + -- nature landmark + if name_in_list(object.tags.natural, {'geyser', 'hot_spring', 'arch', 'volcano', 'stone'}) or name_in_list(object.tags.tourism, {'alpine_hut', 'viewpoint', 'zoo', 'resort', 'picnic_site'}) or name_in_list(object.tags.water, {'pond', 'lake', 'river', 'basin', 'stream', 'lagoon', 'rapids'}) or name_in_list(object.tags.waterway, {'waterfall', 'river', 'canal', 'dam', 'dock', 'boatyard'}) then + tables.landmark:insert({ + type = 'nature', + name = object.tags.name, + name_en = object.tags['name:en'], + description = object.tags.description, + tags = object.tags, + geom = object:as_point() + }) + return + end + + -- shopping landmark + if name_in_list(object.tags.shop, {'mall', 'department_store', 'clothes', 'shoes', 'jewelry', 'furniture', 'electronics', 'books'}) then + tables.landmark:insert({ + type = 'shopping', + name = object.tags.name, + name_en = object.tags['name:en'], + description = object.tags.description, + tags = object.tags, + geom = object:as_point() + }) + return + end + + -- sightseeing landmark + if name_in_list(object.tags.tourism, {'museum', 'attraction', 'artwork', 'gallery', 'aquarium', 'castle', 'monument', 'memorial'}) or object.tags.historic ~= nil or name_in_list(object.tags.amenity, {'theatre', 'planetarium', 'library', 'place_of_worship', 'fountain', 'townhall'}) or name_in_list(object.tags.bridge, {'aqueduct', 'viaduct', 'suspension', 'boardwalk', 'cantilever', 'abandoned'}) or name_in_list(object.tags.building, {'cathedral', 'church', 'mosque', 'temple', 'synagogue', 'chapel'}) then + tables.landmark:insert({ + type = 'sightseeing', + name = object.tags.name, + name_en = object.tags['name:en'], + description = object.tags.description, + tags = object.tags, + geom = object:as_point() + }) + return + end + end diff --git a/base/importer.cronjob.yaml b/base/importer.cronjob.yaml new file mode 100644 index 0000000..29adac5 --- /dev/null +++ b/base/importer.cronjob.yaml @@ -0,0 +1,67 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: importer +spec: + schedule: '0 0 1 1 1' + concurrencyPolicy: Forbid + jobTemplate: + spec: + template: + spec: + containers: + - name: download + image: download + command: ["/bin/sh", "-c"] + args: + - '[ ! -f /data/latest.osm.pbf ] && curl -o /data/latest.osm.pbf http://download.geofabrik.de/europe/switzerland-251123.osm.pbf || echo "File already exists, skipping download"' + volumeMounts: + - name: data + mountPath: /data + + + + - name: importer + image: importer + args: + - --create + - --input-reader=pbf + - --output=flex + - --style=/rules/anyway.lua + - /data/latest.osm.pbf + + # connection details are implictly loaded from env vars, not set as args + + # Environment Variables + env: + - name: PGHOST + value: "osm-postgresql-r" + - name: PGPORT + value: "5432" + - name: PGDATABASE + value: "osm" + - name: PGUSER + valueFrom: + secretKeyRef: + name: osm-postgresql-app + key: username + - name: PGPASSWORD + valueFrom: + secretKeyRef: + name: osm-postgresql-app + key: password + volumeMounts: + - name: data + mountPath: /data + - name: rules + mountPath: /rules + + restartPolicy: Never + + volumes: + - name: data + persistentVolumeClaim: + claimName: worldfile + - name: rules + configMap: + name: importer-config diff --git a/base/kustomization.yaml b/base/kustomization.yaml new file mode 100644 index 0000000..44c5a16 --- /dev/null +++ b/base/kustomization.yaml @@ -0,0 +1,22 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + + + +resources: + - namespace.yaml + + - postgres.yaml + + - worldfile.pvc.yaml + - importer.cronjob.yaml + - importer.configmap.yaml + + +images: + - name: importer + newName: iboates/osm2pgsql + newTag: 2.1.0-nightly + - name: download + newName: curlimages/curl + newTag: 8.4.0 diff --git a/base/namespace.yaml b/base/namespace.yaml new file mode 100644 index 0000000..0a074bd --- /dev/null +++ b/base/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: placeholder diff --git a/base/postgres.yaml b/base/postgres.yaml new file mode 100644 index 0000000..2d26793 --- /dev/null +++ b/base/postgres.yaml @@ -0,0 +1,28 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: osm-postgresql +spec: + instances: 1 + imageName: ghcr.io/cloudnative-pg/postgis:18-3.6-system-trixie + + bootstrap: + initdb: + owner: osm + database: osm + + postInitTemplateSQL: + - CREATE EXTENSION postgis; + # - CREATE EXTENSION postgis_topology; + # - CREATE EXTENSION fuzzystrmatch; + # - CREATE EXTENSION postgis_tiger_geocoder; + # - CREATE EXTENSION hstore; + + + storage: + size: 10Gi + pvcTemplate: + accessModes: + - ReadWriteOnce + storageClassName: nfs-client + volumeMode: Filesystem diff --git a/base/worldfile.pvc.yaml b/base/worldfile.pvc.yaml new file mode 100644 index 0000000..361e095 --- /dev/null +++ b/base/worldfile.pvc.yaml @@ -0,0 +1,11 @@ +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: worldfile +spec: + storageClassName: "nfs-client" + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi diff --git a/overlays/stg/kustomization.yaml b/overlays/stg/kustomization.yaml new file mode 100644 index 0000000..2a66422 --- /dev/null +++ b/overlays/stg/kustomization.yaml @@ -0,0 +1,8 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + + +namespace: anydev-osm-stg + +resources: + - ../../base/