Add docker-compose

Fixes #14 Signed-off-by: mudler <mudler@c3os.io>
2025-04-05 02:09:25 +00:00 · 2023-04-13 01:13:14 +02:00 · 2023-04-13 01:13:14 +02:00 · 8042e9a2d6
commit 8042e9a2d6
parent 624092cb99
9 changed files with 110 additions and 82 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1 @@
+models/*.bin
--- a/.env
+++ b/.env
@ -0,0 +1 @@
+THREADS=14
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@ -12,68 +12,42 @@ jobs:
  docker:
    runs-on: ubuntu-latest
    steps:
-      - name: Release space from worker
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          df -h
      - name: Checkout
        uses: actions/checkout@v3
+
      - name: Prepare
        id: prep
        run: |
          DOCKER_IMAGE=quay.io/go-skynet/llama-cli
-          VERSION=latest
+          VERSION=master
          SHORTREF=${GITHUB_SHA::8}
+
          # If this is git tag, use the tag name as a docker tag
          if [[ $GITHUB_REF == refs/tags/* ]]; then
            VERSION=${GITHUB_REF#refs/tags/}
          fi
          TAGS="${DOCKER_IMAGE}:${VERSION},${DOCKER_IMAGE}:${SHORTREF}"
+
          # If the VERSION looks like a version number, assume that
          # this is the most recent version of the image and also
          # tag it 'latest'.
          if [[ $VERSION =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
            TAGS="$TAGS,${DOCKER_IMAGE}:latest"
          fi
+
          # Set output parameters.
          echo ::set-output name=tags::${TAGS}
          echo ::set-output name=docker_image::${DOCKER_IMAGE}
-          echo ::set-output name=image::${DOCKER_IMAGE}:${VERSION}
+
      - name: Set up QEMU
        uses: docker/setup-qemu-action@master
        with:
          platforms: all
+
      - name: Set up Docker Buildx
        id: buildx
        uses: docker/setup-buildx-action@master
+
      - name: Login to DockerHub
        if: github.event_name != 'pull_request'
        uses: docker/login-action@v2
@ -81,9 +55,23 @@ jobs:
          registry: quay.io
          username: ${{ secrets.QUAY_USERNAME }}
          password: ${{ secrets.QUAY_PASSWORD }}
-      - uses: earthly/actions/setup-earthly@v1
+      - name: Build PRs
+        if: github.event_name != 'pull_request'
+        uses: docker/build-push-action@v4
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64,linux/arm64,linux/arm
+          push: true
+          tags: ${{ steps.prep.outputs.tags }}
      - name: Build
-        run: |
-            earthly config "global.conversion_parallelism" "1"
-            earthly config "global.buildkit_max_parallelism" "1"
-            earthly --push +image-all --IMAGE=${{ steps.prep.outputs.image }}
+        if: github.event_name == 'pull_request'
+        uses: docker/build-push-action@v4
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64
+          push: false
+          tags: ${{ steps.prep.outputs.tags }}
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,2 @@
-llama-cli
+llama-cli
+models/*.bin
--- a/19
+++ b/19
@ -0,0 +1,19 @@
+ARG GO_VERSION=1.20
+ARG DEBIAN_VERSION=11
+
+FROM golang:$GO_VERSION as builder
+
+WORKDIR /build
+RUN git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp
+RUN cd go-llama.cpp && make libbinding.a
+COPY go.mod ./
+COPY go.sum ./
+RUN go mod download
+RUN apt-get update
+COPY . .
+RUN go mod edit -replace github.com/go-skynet/go-llama.cpp=/build/go-llama.cpp
+RUN C_INCLUDE_PATH=/build/go-llama.cpp LIBRARY_PATH=/build/go-llama.cpp go build -o llama-cli ./
+
+FROM debian:$DEBIAN_VERSION
+COPY --from=builder /build/llama-cli /usr/bin/llama-cli
+ENTRYPOINT [ "/usr/bin/llama-cli" ]
--- a/31
+++ b/31
@ -1,32 +1,5 @@
 VERSION 0.7

-go-deps:
-    ARG GO_VERSION=1.20
-    FROM golang:$GO_VERSION
-    WORKDIR /build
-    COPY go.mod ./
-    COPY go.sum ./
-    RUN go mod download
-    RUN apt-get update
-    SAVE ARTIFACT go.mod AS LOCAL go.mod
-    SAVE ARTIFACT go.sum AS LOCAL go.sum
-
 build:
-    FROM +go-deps
-    WORKDIR /build
-    RUN git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp
-    RUN cd go-llama.cpp && make libbinding.a
-    COPY . .
-    RUN go mod edit -replace github.com/go-skynet/go-llama.cpp=/build/go-llama.cpp
-    RUN C_INCLUDE_PATH=$GOPATH/src/github.com/go-skynet/go-llama.cpp LIBRARY_PATH=$GOPATH/src/github.com/go-skynet/go-llama.cpp go build -o llama-cli ./
-    SAVE ARTIFACT llama-cli AS LOCAL llama-cli
-
-image:
-    FROM +go-deps
-    ARG IMAGE=alpaca-cli-nomodel
-    COPY +build/llama-cli /llama-cli
-    ENTRYPOINT [ "/llama-cli" ]
-    SAVE IMAGE --push $IMAGE
-
-image-all:
-    BUILD --platform=linux/amd64 --platform=linux/arm64 +image
+    FROM DOCKERFILE -f Dockerfile .
+    SAVE ARTIFACT /usr/bin/llama-cli AS LOCAL llama-cli
--- a/README.md
+++ b/README.md
@ -7,6 +7,47 @@ It is compatible with the models supported by `llama.cpp`. You might need to con

 `llama-cli` doesn't shell-out, it uses https://github.com/go-skynet/go-llama.cpp, which is a golang binding of [llama.cpp](https://github.com/ggerganov/llama.cpp).

+## Usage
+
+You can use `docker-compose`:
+
+```bash
+
+git clone https://github.com/go-skynet/llama-cli
+cd llama-cli
+
+# copy your models to models/
+cp your-model.bin models/
+
+# (optional) Edit the .env file to set the number of concurrent threads used for inference
+# echo "THREADS=14" > .env
+
+# start with docker-compose
+docker compose up -d --build
+
+# Now API is accessible at localhost:8080
+curl http://localhost:8080/v1/models
+# {"object":"list","data":[{"id":"your-model.bin","object":"model"}]}
+curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
+     "model": "your-model.bin",            
+     "prompt": "A long time ago in a galaxy far, far away",
+     "temperature": 0.7
+   }'
+
+
+```
+
+Note: You can use a use a default template for every model in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca:
+
+```
+Below is an instruction that describes a task. Write a response that appropriately completes the request.
+
+### Instruction:
+{{.Input}}
+
+### Response:
+```
+
 ## Container images

 `llama-cli` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/llama-cli?tab=tags&tag=latest)
@ -158,16 +199,6 @@ Below is an instruction that describes a task. Write a response that appropriate
 ### Response:
 ```

-Note: You can use a use a default template for every model in your model path, by creating a corresponding file with the `.tmpl` suffix. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance:
-
-```
-Below is an instruction that describes a task. Write a response that appropriately completes the request.
-
-### Instruction:
-{{.Input}}
-
-### Response:
-```

 ## Using other models

@ -229,9 +260,8 @@ In order to build the `llama-cli` container image locally you can use `docker`:

 ```
 # build the image as "alpaca-image"
-docker run --privileged -v /var/run/docker.sock:/var/run/docker.sock --rm -t -v "$(pwd)":/workspace -v earthly-tmp:/tmp/earthly:rw earthly/earthly:v0.7.2 +image --IMAGE=alpaca-image
-# run the image
-docker run alpaca-image --instruction "What's an alpaca?"
+docker build -t llama-cli .
+docker run llama-cli --instruction "What's an alpaca?"
 ```

 Or build the binary with:
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -0,0 +1,15 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/llama-cli:latest
+    build: .
+    volumes:
+      - ./models:/models
+    ports:
+      - 8080:8080
+    environment:
+      - MODELS_PATH=/models
+      - CONTEXT_SIZE=700
+      - THREADS=$THREADS
+    command: api
--- a/models/.keep
+++ b/models/.keep