mirror of
https://github.com/mudler/LocalAI.git
synced 2025-02-21 09:41:45 +00:00
Update example K8s manifests (#40)
This commit is contained in:
parent
1254951fab
commit
c905512bb0
28
kubernetes/data-volume.yaml
Normal file
28
kubernetes/data-volume.yaml
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
# Create a PVC containing a model binary, sourced from an arbitrary HTTP server
|
||||||
|
# (requires https://github.com/kubevirt/containerized-data-importer)
|
||||||
|
apiVersion: cdi.kubevirt.io/v1beta1
|
||||||
|
kind: DataVolume
|
||||||
|
metadata:
|
||||||
|
name: models
|
||||||
|
namespace: local-ai
|
||||||
|
spec:
|
||||||
|
contentType: archive
|
||||||
|
source:
|
||||||
|
http:
|
||||||
|
url: http://<model_server>/koala-7B-4bit-128g.GGML.tar
|
||||||
|
secretRef: model-secret
|
||||||
|
pvc:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 5Gi
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: model-secret
|
||||||
|
namespace: local-ai
|
||||||
|
data:
|
||||||
|
accessKeyId: <model_server_username_base64_encoded>
|
||||||
|
secretKey: <model_server_password_base64_encoded>
|
@ -1,38 +1,55 @@
|
|||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Namespace
|
kind: Namespace
|
||||||
metadata:
|
metadata:
|
||||||
name: llama
|
name: local-ai
|
||||||
---
|
---
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: llama
|
name: local-ai
|
||||||
namespace: llama
|
namespace: local-ai
|
||||||
labels:
|
labels:
|
||||||
app: llama
|
app: local-ai
|
||||||
spec:
|
spec:
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app: llama
|
app: local-ai
|
||||||
replicas: 1
|
replicas: 1
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app: llama
|
app: local-ai
|
||||||
name: llama
|
name: local-ai
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: llama
|
- name: local-ai
|
||||||
image: quay.io/go-skynet/local-ai:latest
|
image: quay.io/go-skynet/local-ai:latest
|
||||||
|
env:
|
||||||
|
- name: THREADS
|
||||||
|
value: "14"
|
||||||
|
- name: CONTEXT_SIZE
|
||||||
|
value: "512"
|
||||||
|
- name: MODELS_PATH
|
||||||
|
value: /models
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /models
|
||||||
|
name: models
|
||||||
|
volumes:
|
||||||
|
- name: models
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: models
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
name: llama
|
name: local-ai
|
||||||
namespace: llama
|
namespace: local-ai
|
||||||
|
# If using AWS, you'll need to override the default 60s load balancer idle timeout
|
||||||
|
# annotations:
|
||||||
|
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
|
||||||
spec:
|
spec:
|
||||||
selector:
|
selector:
|
||||||
app: llama
|
app: local-ai
|
||||||
type: LoadBalancer
|
type: LoadBalancer
|
||||||
ports:
|
ports:
|
||||||
- protocol: TCP
|
- protocol: TCP
|
||||||
|
Loading…
x
Reference in New Issue
Block a user