mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-24 06:46:39 +00:00
ci: add GPU tests (#1095)
* ci: test GPU Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * ci: show logs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Debug * debug Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * split extra/core images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * split extra/core images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * consider runner host dir Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
45370c212b
commit
432513c3ba
60
.github/workflows/test-gpu.yml
vendored
Normal file
60
.github/workflows/test-gpu.yml
vendored
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
---
|
||||||
|
name: 'GPU tests'
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
tags:
|
||||||
|
- '*'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ci-gpu-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
ubuntu-latest:
|
||||||
|
runs-on: self-hosted
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
go-version: ['1.21.x']
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
|
uses: actions/setup-go@v4
|
||||||
|
with:
|
||||||
|
go-version: ${{ matrix.go-version }}
|
||||||
|
# You can test your matrix by printing the current Go version
|
||||||
|
- name: Display Go version
|
||||||
|
run: go version
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y make wget
|
||||||
|
- name: Build
|
||||||
|
run: |
|
||||||
|
if [ ! -e /run/systemd/system ]; then
|
||||||
|
sudo mkdir /run/systemd/system
|
||||||
|
fi
|
||||||
|
make \
|
||||||
|
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
|
||||||
|
BUILD_TYPE=cublas \
|
||||||
|
prepare-e2e run-e2e-image test-e2e
|
||||||
|
- name: Release space from worker ♻
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
sudo rm -rf build || true
|
||||||
|
sudo rm -rf bin || true
|
||||||
|
sudo rm -rf dist || true
|
||||||
|
sudo docker logs $(sudo docker ps -q --filter ancestor=localai-tests) > logs.txt
|
||||||
|
sudo cat logs.txt || true
|
||||||
|
sudo rm -rf logs.txt
|
||||||
|
make clean || true
|
||||||
|
make \
|
||||||
|
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
|
||||||
|
teardown-e2e || true
|
||||||
|
docker system prune -f -a --volumes || true
|
80
Dockerfile
80
Dockerfile
@ -1,6 +1,9 @@
|
|||||||
ARG GO_VERSION=1.21-bullseye
|
ARG GO_VERSION=1.21-bullseye
|
||||||
|
ARG IMAGE_TYPE=extras
|
||||||
|
# extras or core
|
||||||
|
|
||||||
FROM golang:$GO_VERSION as requirements
|
|
||||||
|
FROM golang:$GO_VERSION as requirements-core
|
||||||
|
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG CUDA_MAJOR_VERSION=11
|
ARG CUDA_MAJOR_VERSION=11
|
||||||
@ -35,24 +38,6 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
|||||||
; fi
|
; fi
|
||||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||||
|
|
||||||
# Extras requirements
|
|
||||||
COPY extra/requirements.txt /build/extra/requirements.txt
|
|
||||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
|
||||||
RUN pip install --upgrade pip
|
|
||||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
|
||||||
RUN if [ "${TARGETARCH}" = "amd64" ]; then \
|
|
||||||
pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\
|
|
||||||
fi
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \
|
|
||||||
pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\
|
|
||||||
fi
|
|
||||||
RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
|
|
||||||
|
|
||||||
# Vall-e-X
|
|
||||||
RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt
|
|
||||||
|
|
||||||
WORKDIR /build
|
|
||||||
|
|
||||||
# OpenBLAS requirements
|
# OpenBLAS requirements
|
||||||
RUN apt-get install -y libopenblas-dev
|
RUN apt-get install -y libopenblas-dev
|
||||||
|
|
||||||
@ -61,6 +46,8 @@ RUN apt-get install -y libopencv-dev && \
|
|||||||
ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
|
||||||
# piper requirements
|
# piper requirements
|
||||||
# Use pre-compiled Piper phonemization library (includes onnxruntime)
|
# Use pre-compiled Piper phonemization library (includes onnxruntime)
|
||||||
#RUN if echo "${GO_TAGS}" | grep -q "tts"; then \
|
#RUN if echo "${GO_TAGS}" | grep -q "tts"; then \
|
||||||
@ -80,17 +67,40 @@ RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSIO
|
|||||||
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
|
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
|
||||||
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
|
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
|
||||||
ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
|
ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
|
||||||
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
|
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/ && \
|
||||||
|
rm spdlog-${SPDLOG_VERSION} -rf && \
|
||||||
|
rm /build/lib/Linux-$(uname -m)/piper_phonemize -rf
|
||||||
|
|
||||||
|
# Extras requirements
|
||||||
|
FROM requirements-core as requirements-extras
|
||||||
|
|
||||||
|
COPY extra/requirements.txt /build/extra/requirements.txt
|
||||||
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||||
|
RUN pip install --upgrade pip
|
||||||
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||||
|
RUN if [ "${TARGETARCH}" = "amd64" ]; then \
|
||||||
|
pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\
|
||||||
|
fi
|
||||||
|
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \
|
||||||
|
pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\
|
||||||
|
fi
|
||||||
|
RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
|
||||||
|
|
||||||
|
# Vall-e-X
|
||||||
|
RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt
|
||||||
|
|
||||||
# \
|
# \
|
||||||
# ; fi
|
# ; fi
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
FROM requirements as builder
|
FROM requirements-${IMAGE_TYPE} as builder
|
||||||
|
|
||||||
ARG GO_TAGS="stablediffusion tts"
|
ARG GO_TAGS="stablediffusion tts"
|
||||||
|
ARG GRPC_BACKENDS
|
||||||
|
ARG BUILD_GRPC=true
|
||||||
|
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
||||||
ENV GO_TAGS=${GO_TAGS}
|
ENV GO_TAGS=${GO_TAGS}
|
||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
@ -108,10 +118,12 @@ COPY .git .
|
|||||||
# stablediffusion does not tolerate a newer version of abseil, build it first
|
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
|
|
||||||
RUN git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
RUN if [ "${BUILD_GRPC}" = "true" ]; then \
|
||||||
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
../.. && make -j12 install && rm -rf grpc
|
../.. && make -j12 install && rm -rf grpc \
|
||||||
|
; fi
|
||||||
|
|
||||||
# Rebuild with defaults backends
|
# Rebuild with defaults backends
|
||||||
RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build
|
RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build
|
||||||
@ -119,7 +131,7 @@ RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data
|
|||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
FROM requirements
|
FROM requirements-${IMAGE_TYPE}
|
||||||
|
|
||||||
ARG FFMPEG
|
ARG FFMPEG
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
@ -129,6 +141,11 @@ ENV BUILD_TYPE=${BUILD_TYPE}
|
|||||||
ENV REBUILD=false
|
ENV REBUILD=false
|
||||||
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
||||||
|
|
||||||
|
ARG CUDA_MAJOR_VERSION=11
|
||||||
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
|
||||||
# Add FFmpeg
|
# Add FFmpeg
|
||||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||||
apt-get install -y ffmpeg \
|
apt-get install -y ffmpeg \
|
||||||
@ -146,16 +163,19 @@ RUN make prepare-sources
|
|||||||
# Copy the binary
|
# Copy the binary
|
||||||
COPY --from=builder /build/local-ai ./
|
COPY --from=builder /build/local-ai ./
|
||||||
|
|
||||||
# do not let piper rebuild (requires an older version of absl)
|
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||||
COPY --from=builder /build/backend-assets/grpc/piper ./backend-assets/grpc/piper
|
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
# Copy VALLE-X as it's not a real "lib"
|
# Copy VALLE-X as it's not a real "lib"
|
||||||
RUN cp -rfv /usr/lib/vall-e-x/* ./
|
RUN if [ -d /usr/lib/vall-e-x ]; then \
|
||||||
|
cp -rfv /usr/lib/vall-e-x/* ./ ; \
|
||||||
|
fi
|
||||||
|
|
||||||
# To resolve exllama import error
|
# we also copy exllama libs over to resolve exllama import error
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH:-$(go env GOARCH)}" = "amd64" ]; then \
|
RUN if [ -d /usr/local/lib/python3.9/dist-packages/exllama ]; then \
|
||||||
cp -rfv /usr/local/lib/python3.9/dist-packages/exllama extra/grpc/exllama/;\
|
cp -rfv /usr/local/lib/python3.9/dist-packages/exllama extra/grpc/exllama/;\
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Define the health check command
|
# Define the health check command
|
||||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||||
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
|
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
|
||||||
|
27
Makefile
27
Makefile
@ -47,6 +47,10 @@ CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
|||||||
GO_TAGS?=
|
GO_TAGS?=
|
||||||
BUILD_ID?=git
|
BUILD_ID?=git
|
||||||
|
|
||||||
|
TEST_DIR=/tmp/test
|
||||||
|
|
||||||
|
RANDOM := $(shell bash -c 'echo $$RANDOM')
|
||||||
|
|
||||||
VERSION?=$(shell git describe --always --tags || echo "dev" )
|
VERSION?=$(shell git describe --always --tags || echo "dev" )
|
||||||
# go tool nm ./local-ai | grep Commit
|
# go tool nm ./local-ai | grep Commit
|
||||||
LD_FLAGS?=
|
LD_FLAGS?=
|
||||||
@ -64,6 +68,9 @@ WHITE := $(shell tput -Txterm setaf 7)
|
|||||||
CYAN := $(shell tput -Txterm setaf 6)
|
CYAN := $(shell tput -Txterm setaf 6)
|
||||||
RESET := $(shell tput -Txterm sgr0)
|
RESET := $(shell tput -Txterm sgr0)
|
||||||
|
|
||||||
|
# Default Docker bridge IP
|
||||||
|
E2E_BRIDGE_IP?=172.17.0.1
|
||||||
|
|
||||||
ifndef UNAME_S
|
ifndef UNAME_S
|
||||||
UNAME_S := $(shell uname -s)
|
UNAME_S := $(shell uname -s)
|
||||||
endif
|
endif
|
||||||
@ -329,6 +336,26 @@ test: prepare test-models/testmodel grpcs
|
|||||||
$(MAKE) test-tts
|
$(MAKE) test-tts
|
||||||
$(MAKE) test-stablediffusion
|
$(MAKE) test-stablediffusion
|
||||||
|
|
||||||
|
prepare-e2e:
|
||||||
|
mkdir -p $(TEST_DIR)
|
||||||
|
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||||
|
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||||
|
docker build --build-arg BUILD_GRPC=true --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
|
||||||
|
|
||||||
|
run-e2e-image:
|
||||||
|
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||||
|
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
||||||
|
|
||||||
|
test-e2e:
|
||||||
|
@echo 'Running e2e tests'
|
||||||
|
BUILD_TYPE=$(BUILD_TYPE) \
|
||||||
|
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
|
||||||
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
|
||||||
|
|
||||||
|
teardown-e2e:
|
||||||
|
rm -rf $(TEST_DIR) || true
|
||||||
|
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||||
|
|
||||||
test-gpt4all: prepare-test
|
test-gpt4all: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg
|
||||||
|
17
tests/e2e-fixtures/gpu.yaml
Normal file
17
tests/e2e-fixtures/gpu.yaml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
context_size: 2048
|
||||||
|
mirostat: 2
|
||||||
|
mirostat_tau: 5.0
|
||||||
|
mirostat_eta: 0.1
|
||||||
|
f16: true
|
||||||
|
threads: 1
|
||||||
|
gpu_layers: 90
|
||||||
|
name: gpt-4
|
||||||
|
mmap: true
|
||||||
|
parameters:
|
||||||
|
model: ggllm-test-model.bin
|
||||||
|
rope_freq_base: 10000
|
||||||
|
max_tokens: 20
|
||||||
|
rope_freq_scale: 1
|
||||||
|
temperature: 0.2
|
||||||
|
top_k: 40
|
||||||
|
top_p: 0.95
|
18
tests/e2e/e2e_suite_test.go
Normal file
18
tests/e2e/e2e_suite_test.go
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
package e2e_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
localAIURL = os.Getenv("LOCALAI_API")
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLocalAI(t *testing.T) {
|
||||||
|
RegisterFailHandler(Fail)
|
||||||
|
RunSpecs(t, "LocalAI E2E test suite")
|
||||||
|
}
|
70
tests/e2e/e2e_test.go
Normal file
70
tests/e2e/e2e_test.go
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
package e2e_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
|
||||||
|
openaigo "github.com/otiai10/openaigo"
|
||||||
|
"github.com/sashabaranov/go-openai"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ = Describe("E2E test", func() {
|
||||||
|
var client *openai.Client
|
||||||
|
var client2 *openaigo.Client
|
||||||
|
|
||||||
|
Context("API with ephemeral models", func() {
|
||||||
|
BeforeEach(func() {
|
||||||
|
defaultConfig := openai.DefaultConfig("")
|
||||||
|
defaultConfig.BaseURL = localAIURL
|
||||||
|
|
||||||
|
client2 = openaigo.NewClient("")
|
||||||
|
client2.BaseURL = defaultConfig.BaseURL
|
||||||
|
|
||||||
|
// Wait for API to be ready
|
||||||
|
client = openai.NewClientWithConfig(defaultConfig)
|
||||||
|
Eventually(func() error {
|
||||||
|
_, err := client.ListModels(context.TODO())
|
||||||
|
return err
|
||||||
|
}, "2m").ShouldNot(HaveOccurred())
|
||||||
|
})
|
||||||
|
|
||||||
|
// Check that the GPU was used
|
||||||
|
AfterEach(func() {
|
||||||
|
cmd := exec.Command("/bin/bash", "-xce", "docker logs $(docker ps -q --filter ancestor=localai-tests)")
|
||||||
|
out, err := cmd.CombinedOutput()
|
||||||
|
Expect(err).ToNot(HaveOccurred(), string(out))
|
||||||
|
// Execute docker logs $$(docker ps -q --filter ancestor=localai-tests) as a command and check the output
|
||||||
|
if os.Getenv("BUILD_TYPE") == "cublas" {
|
||||||
|
|
||||||
|
Expect(string(out)).To(ContainSubstring("found 1 CUDA devices"), string(out))
|
||||||
|
Expect(string(out)).To(ContainSubstring("using CUDA for GPU acceleration"), string(out))
|
||||||
|
} else {
|
||||||
|
fmt.Println("Skipping GPU check")
|
||||||
|
Expect(string(out)).To(ContainSubstring("[llama-cpp] Loads OK"), string(out))
|
||||||
|
Expect(string(out)).To(ContainSubstring("llama_model_loader"), string(out))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("Generates text", func() {
|
||||||
|
It("streams chat tokens", func() {
|
||||||
|
model := "gpt-4"
|
||||||
|
resp, err := client.CreateChatCompletion(context.TODO(),
|
||||||
|
openai.ChatCompletionRequest{
|
||||||
|
Model: model, Messages: []openai.ChatCompletionMessage{
|
||||||
|
{
|
||||||
|
Role: "user",
|
||||||
|
Content: "How much is 2+2?",
|
||||||
|
},
|
||||||
|
}})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
|
||||||
|
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
Loading…
Reference in New Issue
Block a user