LocalAI/examples/kubernetes/deployment-nvidia.yaml

69 lines
1.3 KiB
YAML
Raw Normal View History

apiVersion: v1
kind: Namespace
metadata:
name: local-ai
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: models-pvc
namespace: local-ai
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: local-ai
namespace: local-ai
labels:
app: local-ai
spec:
selector:
matchLabels:
app: local-ai
replicas: 1
template:
metadata:
labels:
app: local-ai
name: local-ai
spec:
runtimeClassName: "nvidia"
containers:
- args:
- phi-2
env:
- name: DEBUG
value: "true"
name: local-ai
image: quay.io/go-skynet/local-ai:master-cublas-cuda12
imagePullPolicy: IfNotPresent
resources:
limits:
nvidia.com/gpu: 1
volumeMounts:
- name: models-volume
mountPath: /build/models
volumes:
- name: models-volume
persistentVolumeClaim:
claimName: models-pvc
---
apiVersion: v1
kind: Service
metadata:
name: local-ai
namespace: local-ai
spec:
selector:
app: local-ai
type: NodePort
ports:
- protocol: TCP
targetPort: 8080
port: 8080