Update example K8s manifests (#40)

2025-02-21 09:41:45 +00:00 · 2023-04-20 09:31:11 -07:00 · 2023-04-20 09:31:11 -07:00 · c905512bb0
commit c905512bb0
parent 1254951fab
2 changed files with 56 additions and 11 deletions
--- a/kubernetes/data-volume.yaml
+++ b/kubernetes/data-volume.yaml
@ -0,0 +1,28 @@
 # Create a PVC containing a model binary, sourced from an arbitrary HTTP server
 # (requires https://github.com/kubevirt/containerized-data-importer)
 apiVersion: cdi.kubevirt.io/v1beta1
 kind: DataVolume
 metadata:
  name: models
  namespace: local-ai
 spec:
  contentType: archive
  source:
    http:
      url: http://<model_server>/koala-7B-4bit-128g.GGML.tar
      secretRef: model-secret
  pvc:
    accessModes:
    - ReadWriteOnce
    resources:
      requests:
        storage: 5Gi
 ---
 apiVersion: v1
 kind: Secret
 metadata:
  name: model-secret
  namespace: local-ai
 data:
  accessKeyId: <model_server_username_base64_encoded>
  secretKey: <model_server_password_base64_encoded>
--- a/kubernetes/deployment.yaml
+++ b/kubernetes/deployment.yaml
@ -1,38 +1,55 @@
 apiVersion: v1
 kind: Namespace
 metadata:
-  name: llama
+  name: local-ai
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: llama
+  name: local-ai
-  namespace: llama
+  namespace: local-ai
  labels:
-    app: llama
+    app: local-ai
 spec:
  selector:
    matchLabels:
-      app: llama
+      app: local-ai
  replicas: 1
  template:
    metadata:
      labels:
-        app: llama
+        app: local-ai
-      name: llama
+      name: local-ai
    spec:
      containers:
-        - name: llama
+        - name: local-ai
          image: quay.io/go-skynet/local-ai:latest
          env:
          - name: THREADS
            value: "14"
          - name: CONTEXT_SIZE
            value: "512"
          - name: MODELS_PATH
            value: /models
          volumeMounts:
          - mountPath: /models
            name: models
      volumes:
      - name: models
        persistentVolumeClaim:
          claimName: models
 ---
 apiVersion: v1
 kind: Service
 metadata:
-  name: llama
+  name: local-ai
-  namespace: llama
+  namespace: local-ai
  # If using AWS, you'll need to override the default 60s load balancer idle timeout
  # annotations:
  #   service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
 spec:
  selector:
-    app: llama
+    app: local-ai
  type: LoadBalancer
  ports:
    - protocol: TCP