mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-23 22:42:27 +00:00
Build images without model
This commit is contained in:
parent
d98d1fe55e
commit
bffaf2aa42
@ -34,5 +34,14 @@ image:
|
|||||||
ENTRYPOINT [ "/llama-cli" ]
|
ENTRYPOINT [ "/llama-cli" ]
|
||||||
SAVE IMAGE --push $IMAGE
|
SAVE IMAGE --push $IMAGE
|
||||||
|
|
||||||
|
lite-image:
|
||||||
|
FROM +go-deps
|
||||||
|
ARG IMAGE=alpaca-cli-nomodel
|
||||||
|
COPY +build/llama-cli /llama-cli
|
||||||
|
ENV MODEL_PATH=/model.bin
|
||||||
|
ENTRYPOINT [ "/llama-cli" ]
|
||||||
|
SAVE IMAGE --push $IMAGE-lite
|
||||||
|
|
||||||
image-all:
|
image-all:
|
||||||
BUILD --platform=linux/amd64 --platform=linux/arm64 +image
|
BUILD --platform=linux/amd64 --platform=linux/arm64 +image
|
||||||
|
BUILD --platform=linux/amd64 --platform=linux/arm64 +lite-image
|
24
README.md
24
README.md
@ -8,7 +8,7 @@ llama-cli is a straightforward golang CLI interface for [llama.cpp](https://gith
|
|||||||
The `llama-cli` [container images](https://quay.io/repository/go-skynet/llama-cli?tab=tags&tag=latest) come preloaded with the [alpaca.cpp 7B](https://github.com/antimatter15/alpaca.cpp) model, enabling you to start making predictions immediately! To begin, run:
|
The `llama-cli` [container images](https://quay.io/repository/go-skynet/llama-cli?tab=tags&tag=latest) come preloaded with the [alpaca.cpp 7B](https://github.com/antimatter15/alpaca.cpp) model, enabling you to start making predictions immediately! To begin, run:
|
||||||
|
|
||||||
```
|
```
|
||||||
docker run -ti --rm quay.io/go-skynet/llama-cli:v0.1 --instruction "What's an alpaca?" --topk 10000
|
docker run -ti --rm quay.io/go-skynet/llama-cli:v0.2 --instruction "What's an alpaca?" --topk 10000
|
||||||
```
|
```
|
||||||
|
|
||||||
You will receive a response like the following:
|
You will receive a response like the following:
|
||||||
@ -53,7 +53,7 @@ This will generate text based on the given model and instruction.
|
|||||||
Example of starting the API with `docker`:
|
Example of starting the API with `docker`:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker run -p 8080:8080 -ti --rm quay.io/go-skynet/llama-cli:v0.1 api
|
docker run -p 8080:8080 -ti --rm quay.io/go-skynet/llama-cli:v0.2 api
|
||||||
```
|
```
|
||||||
|
|
||||||
And you'll see:
|
And you'll see:
|
||||||
@ -102,29 +102,21 @@ curl --location --request POST 'http://localhost:8080/predict' --header 'Content
|
|||||||
### 13B
|
### 13B
|
||||||
|
|
||||||
```
|
```
|
||||||
wget -O tokenizer.model https://huggingface.co/decapoda-research/llama-30b-hf/resolve/main/tokenizer.model
|
docker run --name model --entrypoint /models quay.io/go-skynet/models:ggml2-alpaca-13b-v0.2
|
||||||
mkdir models
|
docker cp model:/models/model.bin ./
|
||||||
wget -O models/gml-model-13B-q4_0.bin https://huggingface.co/Pi3141/alpaca-13B-ggml/resolve/main/ggml-model-q4_0.bin
|
|
||||||
git clone https://gist.github.com/eiz/828bddec6162a023114ce19146cb2b82
|
|
||||||
python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.models
|
|
||||||
mv models/gml-model-13B-q4_0.bin.tmp models/gml-model-13B-q4_0.bin
|
|
||||||
|
|
||||||
# Use the model with llama-cli
|
# Use the model with llama-cli
|
||||||
docker run -v $PWD/models:/models -p 8080:8080 -ti --rm quay.io/go-skynet/llama-cli:master api --model /models/gml-model-13B-q4_0.bin
|
docker run -v $PWD:/models -p 8080:8080 -ti --rm quay.io/go-skynet/llama-cli:v0.2 api --model /models/model.bin
|
||||||
```
|
```
|
||||||
|
|
||||||
### 30B
|
### 30B
|
||||||
|
|
||||||
```
|
```
|
||||||
wget -O tokenizer.model https://huggingface.co/decapoda-research/llama-30b-hf/resolve/main/tokenizer.model
|
docker run --name model --entrypoint /models quay.io/go-skynet/models:ggml2-alpaca-30b-v0.2
|
||||||
mkdir models
|
docker cp model:/models/model.bin ./
|
||||||
wget -O models/ggml-model-30B-q4_0.bin https://huggingface.co/Pi3141/alpaca-30B-ggml/blob/main/ggml-model-q4_0.bin
|
|
||||||
git clone https://gist.github.com/eiz/828bddec6162a023114ce19146cb2b82
|
|
||||||
python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.models
|
|
||||||
mv models/ggml-model-30B-q4_0.bin.tmp models/ggml-model-30B-q4_0.bin
|
|
||||||
|
|
||||||
# Use the model with llama-cli
|
# Use the model with llama-cli
|
||||||
docker run -v $PWD/models:/models -p 8080:8080 -ti --rm quay.io/go-skynet/llama-cli:master api --model /models/ggml-model-30B-q4_0.bin
|
docker run -v $PWD:/models -p 8080:8080 -ti --rm quay.io/go-skynet/llama-cli:v0.2 api --model /models/model.bin
|
||||||
```
|
```
|
||||||
|
|
||||||
### Golang client API
|
### Golang client API
|
||||||
|
Loading…
Reference in New Issue
Block a user