From cb0766fde2d4c7768a6b3bf5aaa15a4dbd248b99 Mon Sep 17 00:00:00 2001
From: Remy Moll <me@moll.re>
Date: Mon, 30 Sep 2024 15:22:01 +0200
Subject: [PATCH] play with containerized llms

---
 apps/ollama/backend.deployment.yaml  | 55 ++++++++++++++++++++++++++++
 apps/ollama/backend.service.yaml     | 13 +++++++
 apps/ollama/frontend.deployment.yaml | 30 +++++++++++++++
 apps/ollama/frontend.service.yaml    | 13 +++++++
 apps/ollama/ingress.yaml             | 21 +++++++++++
 apps/ollama/kustomization.yaml       | 23 ++++++++++++
 apps/ollama/namespace.yaml           |  6 +++
 7 files changed, 161 insertions(+)
 create mode 100644 apps/ollama/backend.deployment.yaml
 create mode 100644 apps/ollama/backend.service.yaml
 create mode 100644 apps/ollama/frontend.deployment.yaml
 create mode 100644 apps/ollama/frontend.service.yaml
 create mode 100644 apps/ollama/ingress.yaml
 create mode 100644 apps/ollama/kustomization.yaml
 create mode 100644 apps/ollama/namespace.yaml

diff --git a/apps/ollama/backend.deployment.yaml b/apps/ollama/backend.deployment.yaml
new file mode 100644
index 0000000..d313e7f
--- /dev/null
+++ b/apps/ollama/backend.deployment.yaml
@@ -0,0 +1,55 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama-rocm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: ollama-rocm
+  template:
+    metadata:
+      labels:
+        app: ollama-rocm
+    spec:
+      nodeSelector:
+        gpu: full
+      containers:
+      - name: ollama
+        image: ollama
+        env:
+        - name: HSA_OVERRIDE_GFX_VERSION
+          # allows to run on IGPU as well
+          value: "11.0.0"
+        ports:
+        - containerPort: 11434
+          name: ollama
+        volumeMounts:
+        - name: ollama-data
+          mountPath: /root/.ollama
+        - name: dshm
+          mountPath: /dev/shm
+        - name: dri
+          mountPath: /dev/dri/
+        - name: kfd
+          mountPath: /dev/kfd
+        resources:
+          requests:
+            memory: "1Gi"
+            cpu: "1"
+          limits:
+            memory: "16Gi"
+            cpu: "8"
+
+
+      volumes:
+      - name: ollama-data
+        emptyDir: {}
+      - name: dri
+        hostPath:
+          path: /dev/dri/
+      - name: dshm
+        emptyDir:
+          medium: Memory
+      - name: kfd
+        hostPath: /dev/kfd
diff --git a/apps/ollama/backend.service.yaml b/apps/ollama/backend.service.yaml
new file mode 100644
index 0000000..a9293fd
--- /dev/null
+++ b/apps/ollama/backend.service.yaml
@@ -0,0 +1,13 @@
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama-service
+spec:
+  selector:
+    app: ollama-rocm
+  ports:
+  - protocol: TCP
+    port: 11434
+    targetPort: 11434
+    name: ollama
\ No newline at end of file
diff --git a/apps/ollama/frontend.deployment.yaml b/apps/ollama/frontend.deployment.yaml
new file mode 100644
index 0000000..9eb86b8
--- /dev/null
+++ b/apps/ollama/frontend.deployment.yaml
@@ -0,0 +1,30 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama-ui
+  labels:
+    app: ollama-ui
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: ollama-ui
+  template:
+    metadata:
+      labels:
+        app: ollama-ui
+    spec:
+      containers:
+        - name: ollama-ui
+          image: ollama-ui
+          ports:
+            - containerPort: 8080
+          env:
+            - name: OLLAMA_BASE_URL
+              value: http://ollama-service:11434
+          volumeMounts:
+            - name: ollama-ui-data
+              mountPath: /app/backend/data
+      volumes:
+        - name: ollama-ui-data
+          emptyDir: {}
diff --git a/apps/ollama/frontend.service.yaml b/apps/ollama/frontend.service.yaml
new file mode 100644
index 0000000..8a121cb
--- /dev/null
+++ b/apps/ollama/frontend.service.yaml
@@ -0,0 +1,13 @@
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama-ui-service
+spec:
+  selector:
+    app: ollama-ui
+  ports:
+  - protocol: TCP
+    port: 8080
+    targetPort: 8080
+    name: ollama-ui
\ No newline at end of file
diff --git a/apps/ollama/ingress.yaml b/apps/ollama/ingress.yaml
new file mode 100644
index 0000000..5dc6101
--- /dev/null
+++ b/apps/ollama/ingress.yaml
@@ -0,0 +1,21 @@
+apiVersion: traefik.io/v1alpha1
+kind: IngressRoute
+metadata:
+  name: ollama-ingressroute
+
+spec:
+  entryPoints:
+    - websecure
+  routes:
+    - match: Host(`llm.kluster.moll.re`)
+      kind: Rule
+      services:
+        - name: ollama-ui-service
+          port: 8080
+    # - match: Host(`todos.kluster.moll.re`) && PathPrefix(`/`)
+    #   kind: Rule
+    #   services:
+    #     - name: todos-frontend
+    #       port: 80
+  tls:
+    certResolver: default-tls
diff --git a/apps/ollama/kustomization.yaml b/apps/ollama/kustomization.yaml
new file mode 100644
index 0000000..06566e5
--- /dev/null
+++ b/apps/ollama/kustomization.yaml
@@ -0,0 +1,23 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+namespace: ollama
+
+resources:
+  - namespace.yaml
+  - backend.deployment.yaml
+  - backend.service.yaml
+  - frontend.deployment.yaml
+  - frontend.service.yaml
+  - ingress.yaml
+
+
+images:
+  - name: ollama
+    newName: ollama/ollama
+    newTag: 0.3.6-rocm
+  - name: ollama-ui
+    newName: ghcr.io/open-webui/open-webui
+    newTag: main
+
+
diff --git a/apps/ollama/namespace.yaml b/apps/ollama/namespace.yaml
new file mode 100644
index 0000000..1178cee
--- /dev/null
+++ b/apps/ollama/namespace.yaml
@@ -0,0 +1,6 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: placeholder
+  labels:
+    pod-security.kubernetes.io/enforce: privileged