mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-20 05:07:54 +00:00
feat: bump llama.cpp, add gguf support (#943)
**Description** This PR syncs up the `llama` backend to use `gguf` (https://github.com/go-skynet/go-llama.cpp/pull/180). It also adds `llama-stable` to the targets so we can still load ggml. It adapts the current tests to use the `llama-backend` for ggml and uses a `gguf` model to run tests on the new backend. In order to consume the new version of go-llama.cpp, it also bump go to 1.21 (images, pipelines, etc) --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
704323b805
commit
1120847f72
6
.github/workflows/release.yaml
vendored
6
.github/workflows/release.yaml
vendored
@ -22,6 +22,9 @@ jobs:
|
|||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
- uses: actions/setup-go@v4
|
||||||
|
with:
|
||||||
|
go-version: '>=1.21.0'
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
@ -60,6 +63,9 @@ jobs:
|
|||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
- uses: actions/setup-go@v4
|
||||||
|
with:
|
||||||
|
go-version: '>=1.21.0'
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
env:
|
env:
|
||||||
|
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@ -18,7 +18,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
go-version: ['1.20.x', 'stable']
|
go-version: ['1.21.x']
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
@ -63,7 +63,7 @@ jobs:
|
|||||||
runs-on: macOS-latest
|
runs-on: macOS-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
go-version: ['1.20.x', 'stable']
|
go-version: ['1.21.x']
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
ARG GO_VERSION=1.20-bullseye
|
ARG GO_VERSION=1.21-bullseye
|
||||||
|
|
||||||
FROM golang:$GO_VERSION as requirements
|
FROM golang:$GO_VERSION as requirements
|
||||||
|
|
||||||
|
11
Makefile
11
Makefile
@ -4,7 +4,7 @@ GOVET=$(GOCMD) vet
|
|||||||
BINARY_NAME=local-ai
|
BINARY_NAME=local-ai
|
||||||
|
|
||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_VERSION?=f03869d188b72c8a617bea3a36cf8eb43f73445c
|
GOLLAMA_VERSION?=0ef04cde78e5da41de234832d73bb768ced709e7
|
||||||
|
|
||||||
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
|
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
|
||||||
|
|
||||||
@ -103,7 +103,7 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
|||||||
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
||||||
endif
|
endif
|
||||||
|
|
||||||
GRPC_BACKENDS?=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
GRPC_BACKENDS?=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
||||||
|
|
||||||
.PHONY: all test build vendor
|
.PHONY: all test build vendor
|
||||||
|
|
||||||
@ -302,9 +302,10 @@ test: prepare test-models/testmodel grpcs
|
|||||||
export GO_TAGS="tts stablediffusion"
|
export GO_TAGS="tts stablediffusion"
|
||||||
$(MAKE) prepare-test
|
$(MAKE) prepare-test
|
||||||
HUGGINGFACE_GRPC=$(abspath ./)/extra/grpc/huggingface/huggingface.py TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
HUGGINGFACE_GRPC=$(abspath ./)/extra/grpc/huggingface/huggingface.py TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama" --flake-attempts 5 -v -r ./api ./pkg
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 -v -r ./api ./pkg
|
||||||
$(MAKE) test-gpt4all
|
$(MAKE) test-gpt4all
|
||||||
$(MAKE) test-llama
|
$(MAKE) test-llama
|
||||||
|
$(MAKE) test-llama-gguf
|
||||||
$(MAKE) test-tts
|
$(MAKE) test-tts
|
||||||
$(MAKE) test-stablediffusion
|
$(MAKE) test-stablediffusion
|
||||||
|
|
||||||
@ -316,6 +317,10 @@ test-llama: prepare-test
|
|||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r ./api ./pkg
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r ./api ./pkg
|
||||||
|
|
||||||
|
test-llama-gguf: prepare-test
|
||||||
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r ./api ./pkg
|
||||||
|
|
||||||
test-tts: prepare-test
|
test-tts: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r ./api ./pkg
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r ./api ./pkg
|
||||||
|
@ -296,7 +296,7 @@ var _ = Describe("API test", func() {
|
|||||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||||
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
|
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
|
||||||
Name: "openllama_3b",
|
Name: "openllama_3b",
|
||||||
Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128},
|
Overrides: map[string]interface{}{"backend": "llama-stable", "mmap": true, "f16": true, "context_size": 128},
|
||||||
})
|
})
|
||||||
|
|
||||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||||
@ -359,6 +359,76 @@ var _ = Describe("API test", func() {
|
|||||||
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("runs openllama gguf", Label("llama-gguf"), func() {
|
||||||
|
if runtime.GOOS != "linux" {
|
||||||
|
Skip("test supported only on linux")
|
||||||
|
}
|
||||||
|
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||||
|
URL: "github:go-skynet/model-gallery/openllama-3b-gguf.yaml",
|
||||||
|
Name: "openllama_3b_gguf",
|
||||||
|
Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128},
|
||||||
|
})
|
||||||
|
|
||||||
|
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||||
|
|
||||||
|
uuid := response["uuid"].(string)
|
||||||
|
|
||||||
|
Eventually(func() bool {
|
||||||
|
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||||
|
return response["processed"].(bool)
|
||||||
|
}, "360s", "10s").Should(Equal(true))
|
||||||
|
|
||||||
|
By("testing completion")
|
||||||
|
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "openllama_3b_gguf", Prompt: "Count up to five: one, two, three, four, "})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
|
Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
|
||||||
|
|
||||||
|
By("testing functions")
|
||||||
|
resp2, err := client.CreateChatCompletion(
|
||||||
|
context.TODO(),
|
||||||
|
openai.ChatCompletionRequest{
|
||||||
|
Model: "openllama_3b_gguf",
|
||||||
|
Messages: []openai.ChatCompletionMessage{
|
||||||
|
{
|
||||||
|
Role: "user",
|
||||||
|
Content: "What is the weather like in San Francisco (celsius)?",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Functions: []openai.FunctionDefinition{
|
||||||
|
openai.FunctionDefinition{
|
||||||
|
Name: "get_current_weather",
|
||||||
|
Description: "Get the current weather",
|
||||||
|
Parameters: jsonschema.Definition{
|
||||||
|
Type: jsonschema.Object,
|
||||||
|
Properties: map[string]jsonschema.Definition{
|
||||||
|
"location": {
|
||||||
|
Type: jsonschema.String,
|
||||||
|
Description: "The city and state, e.g. San Francisco, CA",
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
Type: jsonschema.String,
|
||||||
|
Enum: []string{"celcius", "fahrenheit"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Required: []string{"location"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(len(resp2.Choices)).To(Equal(1))
|
||||||
|
Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
|
||||||
|
Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
|
||||||
|
|
||||||
|
var res map[string]string
|
||||||
|
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(res["location"]).To(Equal("San Francisco, California"), fmt.Sprint(res))
|
||||||
|
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
|
||||||
|
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
||||||
|
})
|
||||||
|
|
||||||
It("runs gpt4all", Label("gpt4all"), func() {
|
It("runs gpt4all", Label("gpt4all"), func() {
|
||||||
if runtime.GOOS != "linux" {
|
if runtime.GOOS != "linux" {
|
||||||
Skip("test supported only on linux")
|
Skip("test supported only on linux")
|
||||||
|
2
go.mod
2
go.mod
@ -1,6 +1,6 @@
|
|||||||
module github.com/go-skynet/LocalAI
|
module github.com/go-skynet/LocalAI
|
||||||
|
|
||||||
go 1.20
|
go 1.21
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df
|
github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df
|
||||||
|
@ -32,14 +32,6 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
|||||||
llama.WithRopeFreqScale(ropeFreqScale),
|
llama.WithRopeFreqScale(ropeFreqScale),
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.NGQA != 0 {
|
|
||||||
llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.RMSNormEps != 0 {
|
|
||||||
llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.ContextSize != 0 {
|
if opts.ContextSize != 0 {
|
||||||
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
|
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user