feat: embedded model configurations, add popular model examples, refactoring (#1532)

* move downloader out * separate startup functions for preloading configuration files * docs: add popular model examples Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * shorteners * Add llava * Add mistral-openorca * Better link to build section * docs: update * fixup * Drop code dups * Minor fixups * Apply suggestions from code review Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> * ci: try to cache gRPC build during tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * ci: do not build all images for tests, just necessary * ci: cache gRPC also in release pipeline * fixes * Update model_preload_test.go Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2024-12-24 06:46:39 +00:00 · 2024-01-05 17:16:33 -05:00 · 2024-01-05 17:16:33 -05:00 · 09e5d9007b
commit 09e5d9007b
parent db926896bd
26 changed files with 586 additions and 150 deletions
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@ -0,0 +1,86 @@
 ---
 name: 'build container images tests'
 on:
  pull_request:
 concurrency:
  group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
  cancel-in-progress: true
 jobs:
  extras-image-build:
    uses: ./.github/workflows/image_build.yml
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
      ffmpeg: ${{ matrix.ffmpeg }}
      image-type: ${{ matrix.image-type }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
    strategy:
      # Pushing with all jobs in parallel
      # eats the bandwidth of all the nodes
      max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
      matrix:
        include:
          - build-type: ''
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "1"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
  core-image-build:
    uses: ./.github/workflows/image_build.yml
    with:
      tag-latest: ${{ matrix.tag-latest }}
      tag-suffix: ${{ matrix.tag-suffix }}
      ffmpeg: ${{ matrix.ffmpeg }}
      image-type: ${{ matrix.image-type }}
      build-type: ${{ matrix.build-type }}
      cuda-major-version: ${{ matrix.cuda-major-version }}
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
    strategy:
      matrix:
        include:
          - build-type: ''
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
            runs-on: 'ubuntu-latest'
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "1"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
            runs-on: 'ubuntu-latest'
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@ -2,7 +2,6 @@
 name: 'build container images'
 on:
  pull_request:
  push:
    branches:
      - master
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@ -34,10 +34,22 @@ jobs:
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
      - name: Cache grpc
        id: cache-grpc
        uses: actions/cache@v3
        with:
          path: grpc
          key: ${{ runner.os }}-grpc
      - name: Build grpc
        if: steps.cache-grpc.outputs.cache-hit != 'true'
        run: |
          git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-              cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
+          cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-                -DgRPC_BUILD_TESTS=OFF \
+            -DgRPC_BUILD_TESTS=OFF \
-                ../.. && sudo make -j12 install
+            ../.. && sudo make -j12
      - name: Install gRPC
        run: |
          cd grpc && cd cmake/build && sudo make -j12 install
      - name: Build
        id: build
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -86,11 +86,22 @@ jobs:
          sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
          # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
          GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
-
+      - name: Cache grpc
        id: cache-grpc
        uses: actions/cache@v3
        with:
          path: grpc
          key: ${{ runner.os }}-grpc
      - name: Build grpc
        if: steps.cache-grpc.outputs.cache-hit != 'true'
        run: |
          git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-              cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
+          cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-                -DgRPC_BUILD_TESTS=OFF \
+            -DgRPC_BUILD_TESTS=OFF \
-                ../.. && sudo make -j12 install
+            ../.. && sudo make -j12
      - name: Install gRPC
        run: |
          cd grpc && cd cmake/build && sudo make -j12 install
      - name: Test
        run: |
          GO_TAGS="stablediffusion tts" make test
--- a/api/api.go
+++ b/api/api.go
@ -5,7 +5,6 @@ import (
 	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 	config "github.com/go-skynet/LocalAI/api/config"
@ -17,7 +16,7 @@ import (
 	"github.com/go-skynet/LocalAI/metrics"
 	"github.com/go-skynet/LocalAI/pkg/assets"
 	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/go-skynet/LocalAI/pkg/startup"
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/cors"
@ -38,25 +37,7 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader,
 	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath)
 	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
-	modelPath := options.Loader.ModelPath
+	startup.PreloadModelsConfigurations(options.Loader.ModelPath, options.ModelsURL...)
 	if len(options.ModelsURL) > 0 {
 		for _, url := range options.ModelsURL {
 			if utils.LooksLikeURL(url) {
 				// md5 of model name
 				md5Name := utils.MD5(url)
 				// check if file exists
 				if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
 					err := utils.DownloadFile(url, filepath.Join(modelPath, md5Name)+".yaml", "", func(fileName, current, total string, percent float64) {
 						utils.DisplayDownloadFunction(fileName, current, total, percent)
 					})
 					if err != nil {
 						log.Error().Msgf("error loading model: %s", err.Error())
 					}
 				}
 			}
 		}
 	}
 	cl := config.NewConfigLoader()
 	if err := cl.LoadConfigs(options.Loader.ModelPath); err != nil {
--- a/api/api_test.go
+++ b/api/api_test.go
@ -16,9 +16,9 @@ import (
 	. "github.com/go-skynet/LocalAI/api"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/metrics"
 	"github.com/go-skynet/LocalAI/pkg/downloader"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/gofiber/fiber/v2"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
@ -61,7 +61,7 @@ func getModelStatus(url string) (response map[string]interface{}) {
 }
 func getModels(url string) (response []gallery.GalleryModel) {
-	utils.GetURI(url, func(url string, i []byte) error {
+	downloader.GetURI(url, func(url string, i []byte) error {
 		// Unmarshal YAML data into a struct
 		return json.Unmarshal(i, &response)
 	})
--- a/api/config/config.go
+++ b/api/config/config.go
@ -9,6 +9,7 @@ import (
 	"strings"
 	"sync"
 	"github.com/go-skynet/LocalAI/pkg/downloader"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v3"
@ -300,21 +301,21 @@ func (cm *ConfigLoader) Preload(modelPath string) error {
 			// Create file path
 			filePath := filepath.Join(modelPath, file.Filename)
-			if err := utils.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
+			if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
 				return err
 			}
 		}
 		modelURL := config.PredictionOptions.Model
-		modelURL = utils.ConvertURL(modelURL)
+		modelURL = downloader.ConvertURL(modelURL)
-		if utils.LooksLikeURL(modelURL) {
+		if downloader.LooksLikeURL(modelURL) {
 			// md5 of model name
 			md5Name := utils.MD5(modelURL)
 			// check if file exists
 			if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
-				err := utils.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
+				err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
 				if err != nil {
 					return err
 				}
--- a/docs/content/advanced/_index.en.md
+++ b/docs/content/advanced/_index.en.md
@ -9,7 +9,7 @@ weight = 6
 In order to define default prompts, model parameters (such as custom default `top_p` or `top_k`), LocalAI can be configured to serve user-defined models with a set of default parameters and templates.
-You can create multiple `yaml` files in the models path or either specify a single YAML configuration file. 
+In order to configure a model, you can create multiple `yaml` files in the models path or either specify a single YAML configuration file. 
 Consider the following `models` folder in the `example/chatbot-ui`:
 ```
@ -96,6 +96,12 @@ Specifying a `config-file` via CLI allows to declare models in a single file as
 See also [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) as an example on how to use config files.
 It is possible to specify a full URL or a short-hand URL to a YAML model configuration file and use it on start with local-ai, for example to use phi-2:
 ```
 local-ai github://mudler/LocalAI/examples/configurations/phi-2.yaml@master
 ```
 ### Full config model file reference
 ```yaml
--- a/docs/content/build/_index.en.md
+++ b/docs/content/build/_index.en.md
@ -235,6 +235,14 @@ make GRPC_BACKENDS=backend-assets/grpc/llama-cpp build
 By default, all the backends are built.
 ### Specific llama.cpp version
 To build with a specific version of llama.cpp, set `CPPLLAMA_VERSION` to the tag or wanted sha:
 ```
 CPPLLAMA_VERSION=<sha> make build
 ```
 ### Windows compatibility
 Make sure to give enough resources to the running container. See https://github.com/go-skynet/LocalAI/issues/2
--- a/docs/content/features/GPU-acceleration.md
+++ b/docs/content/features/GPU-acceleration.md
@ -15,11 +15,19 @@ This section contains instruction on how to use LocalAI with GPU acceleration.
 For accelleration for AMD or Metal HW there are no specific container images, see the [build]({{%relref "build/#acceleration" %}})
 {{% /notice %}}
-### CUDA
+### CUDA(NVIDIA) acceleration
 Requirement: nvidia-container-toolkit (installation instructions [1](https://www.server-world.info/en/note?os=Ubuntu_22.04&p=nvidia&f=2) [2](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html))
-To use CUDA, use the images with the `cublas` tag.
+To check what CUDA version do you need, you can either run `nvidia-smi` or `nvcc --version`. 
 Alternatively, you can also check nvidia-smi with docker:
 ```
 docker run --runtime=nvidia --rm nvidia/cuda nvidia-smi
 ```
 To use CUDA, use the images with the `cublas` tag, for example.
 The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags):
--- a/docs/content/getting_started/_index.en.md
+++ b/docs/content/getting_started/_index.en.md
@ -14,6 +14,8 @@ See also our [How to]({{%relref "howtos" %}}) section for end-to-end guided exam
 The easiest way to run LocalAI is by using [`docker compose`](https://docs.docker.com/compose/install/) or with [Docker](https://docs.docker.com/engine/install/) (to build locally, see the [build section]({{%relref "build" %}})).
 LocalAI needs at least a model file to work, or a configuration YAML file, or both. You can customize further model defaults and specific settings with a configuration file (see [advanced]({{%relref "advanced" %}})).
 {{% notice note %}}
 To run with GPU Accelleration, see [GPU acceleration]({{%relref "features/gpu-acceleration" %}}).
 {{% /notice %}}
@ -113,8 +115,79 @@ helm install local-ai go-skynet/local-ai -f values.yaml
 {{% /tab %}}
 {{% tab name="From source" %}}
 See the [build section]({{%relref "build" %}}).
 {{% /tab %}}
 {{< /tabs >}}
 ### Running Popular models (one-click!)
 {{% notice note %}}
 Note: this feature currently is available only on master builds.
 {{% /notice %}}
 You can run `local-ai` directly with a model name, and it will download the model and start the API with the model loaded.
 #### CPU-only
 > You can use these images which are lighter and do not have Nvidia dependencies
 | Model | Docker command |
 | --- | --- |
 | phi2 | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core phi-2``` |
 | llava | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core llava``` |
 | mistral-openorca | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core mistral-openorca``` |
 #### GPU (CUDA 11)
 For accellerated images with Nvidia and CUDA11, use the following images.
 > If you do not know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version`
 | Model | Docker command |
 | --- | --- |
 | phi-2 | ```docker run -p 8080:8080 --gpus all -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core phi-2``` |
 | llava | ```docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core llava``` |
 | mistral-openorca | ```docker run -p 8080:8080 --gpus all -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-core mistral-openorca``` |
 #### GPU (CUDA 12)
 > If you do not know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version`
 | Model | Docker command |
 | --- | --- |
 | phi-2 | ```docker run -p 8080:8080 -ti --gpus all --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core phi-2``` |
 | llava | ```docker run -p 8080:8080 -ti --gpus all --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core llava``` |
 | mistral-openorca | ```docker run -p 8080:8080 --gpus all -ti --rm quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-core mistral-openorca``` |
 {{% notice note %}}
 LocalAI can be started (either the container image or the binary) with a list of model config files URLs or our short-handed format (e.g. `huggingface://`. `github://`). It works by passing the urls as arguments or environment variable, for example:
 ```
 local-ai github://owner/repo/file.yaml@branch
 # Env
 MODELS="github://owner/repo/file.yaml@branch,github://owner/repo/file.yaml@branch" local-ai
 # Args
 local-ai --models github://owner/repo/file.yaml@branch --models github://owner/repo/file.yaml@branch
 ```
 For example, to start localai with phi-2, it's possible for instance to also use a full config file from gists:
 ```bash
 ./local-ai https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml
 ```
 The file should be a valid YAML configuration file, for the full syntax see [advanced]({{%relref "advanced" %}}).
 {{% /notice %}}
 ### Container images
 LocalAI has a set of images to support CUDA, ffmpeg and 'vanilla' (CPU-only). The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags):
@ -131,6 +204,11 @@ Core Images - Smaller images without predownload python dependencies
 {{% /tab %}}
 {{% tab name="GPU Images CUDA 11" %}}
 Images with Nvidia accelleration support
 > If you do not know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version`
 - `master-cublas-cuda11`
 - `master-cublas-cuda11-core`
 - `{{< version >}}-cublas-cuda11`
@ -142,6 +220,11 @@ Core Images - Smaller images without predownload python dependencies
 {{% /tab %}}
 {{% tab name="GPU Images CUDA 12" %}}
 Images with Nvidia accelleration support
 > If you do not know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version`
 - `master-cublas-cuda12`
 - `master-cublas-cuda12-core`
 - `{{< version >}}-cublas-cuda12`
@ -357,10 +440,6 @@ affinity: {}
 </details>
 ### Build from source
 See the [build section]({{%relref "build" %}}).
 ### Other examples
 ![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
--- a/docs/content/model-compatibility/diffusers.md
+++ b/docs/content/model-compatibility/diffusers.md
@ -167,11 +167,6 @@ curl -H "Content-Type: application/json" -d @-  http://localhost:8080/v1/images/
 ## img2vid
 {{% notice note %}}
 Experimental and available only on master builds. See: https://github.com/mudler/LocalAI/pull/1442
 {{% /notice %}}
 ```yaml
 name: img2vid
@ -193,12 +188,6 @@ curl -H "Content-Type: application/json" -X POST -d @- http://localhost:8080/v1/
 ## txt2vid
 {{% notice note %}}
 Experimental and available only on master builds. See: https://github.com/mudler/LocalAI/pull/1442
 {{% /notice %}}
 ```yaml
 name: txt2vid
 parameters:
--- a/embedded/embedded.go
+++ b/embedded/embedded.go
@ -0,0 +1,53 @@
 package embedded
 import (
 	"embed"
 	"fmt"
 	"slices"
 	"strings"
 	"github.com/go-skynet/LocalAI/pkg/assets"
 	"gopkg.in/yaml.v3"
 )
 var modelShorteners map[string]string
 //go:embed model_library.yaml
 var modelLibrary []byte
 //go:embed models/*
 var embeddedModels embed.FS
 func ModelShortURL(s string) string {
 	if _, ok := modelShorteners[s]; ok {
 		s = modelShorteners[s]
 	}
 	return s
 }
 func init() {
 	yaml.Unmarshal(modelLibrary, &modelShorteners)
 }
 // ExistsInModelsLibrary checks if a model exists in the embedded models library
 func ExistsInModelsLibrary(s string) bool {
 	f := fmt.Sprintf("%s.yaml", s)
 	a := []string{}
 	for _, j := range assets.ListFiles(embeddedModels) {
 		a = append(a, strings.TrimPrefix(j, "models/"))
 	}
 	return slices.Contains(a, f)
 }
 // ResolveContent returns the content in the embedded model library
 func ResolveContent(s string) ([]byte, error) {
 	if ExistsInModelsLibrary(s) {
 		return embeddedModels.ReadFile(fmt.Sprintf("models/%s.yaml", s))
 	}
 	return nil, fmt.Errorf("cannot find model %s", s)
 }
--- a/embedded/model_library.yaml
+++ b/embedded/model_library.yaml
@ -0,0 +1,9 @@
 ### 
 ###
 ### This file contains the list of models that are available in the library
 ### The URLs are automatically expanded when local-ai is being called with the key as argument
 ###
 ### For models with an entire YAML file to be embededd, put the file inside the `models`
 ### directory, it will be automatically available with the file name as key (without the .yaml extension)
 phi-2:  "github://mudler/LocalAI/examples/configurations/phi-2.yaml@master"
--- a/embedded/models/llava.yaml
+++ b/embedded/models/llava.yaml
@ -0,0 +1,31 @@
 backend: llama-cpp
 context_size: 4096
 f16: true
 gpu_layers: 90
 mmap: true
 name: llava
 roles:
  user: "USER:"
  assistant: "ASSISTANT:"
  system: "SYSTEM:"
 mmproj: bakllava-mmproj.gguf
 parameters:
  model: bakllava.gguf
  temperature: 0.2
  top_k: 40
  top_p: 0.95
 template:
  chat: |
    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
    {{.Input}}
    ASSISTANT:
 download_files:
 - filename: bakllava.gguf
  uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
 - filename: bakllava-mmproj.gguf
  uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
--- a/embedded/models/mistral-openorca.yaml
+++ b/embedded/models/mistral-openorca.yaml
@ -0,0 +1,23 @@
 name: mistral-openorca
 mmap: true
 parameters:
  model: huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q6_K.gguf
  temperature: 0.2
  top_k: 40
  top_p: 0.95
 template:
  chat_message: |
    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
    {{if .Content}}{{.Content}}{{end}}
    <|im_end|>
  chat: |
    {{.Input}}
    <|im_start|>assistant
  completion: |
    {{.Input}}
 context_size: 4096
 f16: true
 stopwords:
 - <|im_end|>
--- a/pkg/assets/list.go
+++ b/pkg/assets/list.go
@ -0,0 +1,22 @@
 package assets
 import (
 	"embed"
 	"io/fs"
 )
 func ListFiles(content embed.FS) (files []string) {
 	fs.WalkDir(content, ".", func(path string, d fs.DirEntry, err error) error {
 		if err != nil {
 			return err
 		}
 		if d.IsDir() {
 			return nil
 		}
 		files = append(files, path)
 		return nil
 	})
 	return
 }
--- a/pkg/downloader/progress.go
+++ b/pkg/downloader/progress.go
@ -0,0 +1,26 @@
 package downloader
 import "hash"
 type progressWriter struct {
 	fileName       string
 	total          int64
 	written        int64
 	downloadStatus func(string, string, string, float64)
 	hash           hash.Hash
 }
 func (pw *progressWriter) Write(p []byte) (n int, err error) {
 	n, err = pw.hash.Write(p)
 	pw.written += int64(n)
 	if pw.total > 0 {
 		percentage := float64(pw.written) / float64(pw.total) * 100
 		//log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
 		pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
 	} else {
 		pw.downloadStatus(pw.fileName, formatBytes(pw.written), "", 0)
 	}
 	return
 }
--- a/pkg/downloader/uri.go
+++ b/pkg/downloader/uri.go
@ -1,10 +1,9 @@
-package utils
+package downloader
 import (
 	"crypto/md5"
 	"crypto/sha256"
 	"encoding/base64"
 	"fmt"
 	"hash"
 	"io"
 	"net/http"
 	"os"
@ -12,9 +11,18 @@ import (
 	"strconv"
 	"strings"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 )
 const (
 	HuggingFacePrefix = "huggingface://"
 	HTTPPrefix        = "http://"
 	HTTPSPrefix       = "https://"
 	GithubURI         = "github:"
 	GithubURI2        = "github://"
 )
 func GetURI(url string, f func(url string, i []byte) error) error {
 	url = ConvertURL(url)
@ -52,14 +60,6 @@ func GetURI(url string, f func(url string, i []byte) error) error {
 	return f(url, body)
 }
 const (
 	HuggingFacePrefix = "huggingface://"
 	HTTPPrefix        = "http://"
 	HTTPSPrefix       = "https://"
 	GithubURI         = "github:"
 	GithubURI2        = "github://"
 )
 func LooksLikeURL(s string) bool {
 	return strings.HasPrefix(s, HTTPPrefix) ||
 		strings.HasPrefix(s, HTTPSPrefix) ||
@ -229,10 +229,10 @@ func DownloadFile(url string, filePath, sha string, downloadStatus func(string,
 	}
 	log.Info().Msgf("File %q downloaded and verified", filePath)
-	if IsArchive(filePath) {
+	if utils.IsArchive(filePath) {
 		basePath := filepath.Dir(filePath)
 		log.Info().Msgf("File %q is an archive, uncompressing to %s", filePath, basePath)
-		if err := ExtractArchive(filePath, basePath); err != nil {
+		if err := utils.ExtractArchive(filePath, basePath); err != nil {
 			log.Debug().Msgf("Failed decompressing %q: %s", filePath, err.Error())
 			return err
 		}
@ -241,32 +241,35 @@ func DownloadFile(url string, filePath, sha string, downloadStatus func(string,
 	return nil
 }
-type progressWriter struct {
+// this function check if the string is an URL, if it's an URL downloads the image in memory
-	fileName       string
+// encodes it in base64 and returns the base64 string
-	total          int64
+func GetBase64Image(s string) (string, error) {
-	written        int64
+	if strings.HasPrefix(s, "http") {
-	downloadStatus func(string, string, string, float64)
+		// download the image
-	hash           hash.Hash
+		resp, err := http.Get(s)
-}
+		if err != nil {
 			return "", err
 		}
 		defer resp.Body.Close()
-func (pw *progressWriter) Write(p []byte) (n int, err error) {
+		// read the image data into memory
-	n, err = pw.hash.Write(p)
+		data, err := io.ReadAll(resp.Body)
-	pw.written += int64(n)
+		if err != nil {
 			return "", err
 		}
-	if pw.total > 0 {
+		// encode the image data in base64
-		percentage := float64(pw.written) / float64(pw.total) * 100
+		encoded := base64.StdEncoding.EncodeToString(data)
-		//log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
+
-		pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
+		// return the base64 string
-	} else {
+		return encoded, nil
 		pw.downloadStatus(pw.fileName, formatBytes(pw.written), "", 0)
 	}
-	return
+	// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
-}
+	if strings.HasPrefix(s, "data:image/jpeg;base64,") {
-
+		return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
-// MD5 of a string
+	}
-func MD5(s string) string {
+	return "", fmt.Errorf("not valid string")
 	return fmt.Sprintf("%x", md5.Sum([]byte(s)))
 }
 func formatBytes(bytes int64) string {
--- a/pkg/downloader/uri_test.go
+++ b/pkg/downloader/uri_test.go
@ -1,7 +1,7 @@
-package utils_test
+package downloader_test
 import (
-	. "github.com/go-skynet/LocalAI/pkg/utils"
+	. "github.com/go-skynet/LocalAI/pkg/downloader"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
--- a/pkg/gallery/gallery.go
+++ b/pkg/gallery/gallery.go
@ -6,7 +6,7 @@ import (
 	"path/filepath"
 	"strings"
-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/go-skynet/LocalAI/pkg/downloader"
 	"github.com/imdario/mergo"
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v2"
@ -140,7 +140,7 @@ func AvailableGalleryModels(galleries []Gallery, basePath string) ([]*GalleryMod
 func findGalleryURLFromReferenceURL(url string) (string, error) {
 	var refFile string
-	err := utils.GetURI(url, func(url string, d []byte) error {
+	err := downloader.GetURI(url, func(url string, d []byte) error {
 		refFile = string(d)
 		if len(refFile) == 0 {
 			return fmt.Errorf("invalid reference file at url %s: %s", url, d)
@ -163,7 +163,7 @@ func getGalleryModels(gallery Gallery, basePath string) ([]*GalleryModel, error)
 		}
 	}
-	err := utils.GetURI(gallery.URL, func(url string, d []byte) error {
+	err := downloader.GetURI(gallery.URL, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &models)
 	})
 	if err != nil {
--- a/pkg/gallery/models.go
+++ b/pkg/gallery/models.go
@ -1,14 +1,11 @@
 package gallery
 import (
 	"crypto/sha256"
 	"fmt"
 	"hash"
 	"io"
 	"os"
 	"path/filepath"
 	"strconv"
 	"github.com/go-skynet/LocalAI/pkg/downloader"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/imdario/mergo"
 	"github.com/rs/zerolog/log"
@ -66,7 +63,7 @@ type PromptTemplate struct {
 func GetGalleryConfigFromURL(url string) (Config, error) {
 	var config Config
-	err := utils.GetURI(url, func(url string, d []byte) error {
+	err := downloader.GetURI(url, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &config)
 	})
 	if err != nil {
@ -114,7 +111,7 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
 		// Create file path
 		filePath := filepath.Join(basePath, file.Filename)
-		if err := utils.DownloadFile(file.URI, filePath, file.SHA256, downloadStatus); err != nil {
+		if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, downloadStatus); err != nil {
 			return err
 		}
 	}
@ -183,54 +180,3 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
 	return nil
 }
 type progressWriter struct {
 	fileName       string
 	total          int64
 	written        int64
 	downloadStatus func(string, string, string, float64)
 	hash           hash.Hash
 }
 func (pw *progressWriter) Write(p []byte) (n int, err error) {
 	n, err = pw.hash.Write(p)
 	pw.written += int64(n)
 	if pw.total > 0 {
 		percentage := float64(pw.written) / float64(pw.total) * 100
 		//log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
 		pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
 	} else {
 		pw.downloadStatus(pw.fileName, formatBytes(pw.written), "", 0)
 	}
 	return
 }
 func formatBytes(bytes int64) string {
 	const unit = 1024
 	if bytes < unit {
 		return strconv.FormatInt(bytes, 10) + " B"
 	}
 	div, exp := int64(unit), 0
 	for n := bytes / unit; n >= unit; n /= unit {
 		div *= unit
 		exp++
 	}
 	return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp])
 }
 func calculateSHA(filePath string) (string, error) {
 	file, err := os.Open(filePath)
 	if err != nil {
 		return "", err
 	}
 	defer file.Close()
 	hash := sha256.New()
 	if _, err := io.Copy(hash, file); err != nil {
 		return "", err
 	}
 	return fmt.Sprintf("%x", hash.Sum(nil)), nil
 }
--- a/pkg/startup/model_preload.go
+++ b/pkg/startup/model_preload.go
@ -0,0 +1,54 @@
 package startup
 import (
 	"errors"
 	"os"
 	"path/filepath"
 	"github.com/go-skynet/LocalAI/embedded"
 	"github.com/go-skynet/LocalAI/pkg/downloader"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 )
 // PreloadModelsConfigurations will preload models from the given list of URLs
 // It will download the model if it is not already present in the model path
 // It will also try to resolve if the model is an embedded model YAML configuration
 func PreloadModelsConfigurations(modelPath string, models ...string) {
 	for _, url := range models {
 		url = embedded.ModelShortURL(url)
 		switch {
 		case embedded.ExistsInModelsLibrary(url):
 			modelYAML, err := embedded.ResolveContent(url)
 			// If we resolve something, just save it to disk and continue
 			if err != nil {
 				log.Error().Msgf("error loading model: %s", err.Error())
 				continue
 			}
 			log.Debug().Msgf("[startup] resolved embedded model: %s", url)
 			md5Name := utils.MD5(url)
 			if err := os.WriteFile(filepath.Join(modelPath, md5Name)+".yaml", modelYAML, os.ModePerm); err != nil {
 				log.Error().Msgf("error loading model: %s", err.Error())
 			}
 		case downloader.LooksLikeURL(url):
 			log.Debug().Msgf("[startup] resolved model to download: %s", url)
 			// md5 of model name
 			md5Name := utils.MD5(url)
 			// check if file exists
 			if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
 				err := downloader.DownloadFile(url, filepath.Join(modelPath, md5Name)+".yaml", "", func(fileName, current, total string, percent float64) {
 					utils.DisplayDownloadFunction(fileName, current, total, percent)
 				})
 				if err != nil {
 					log.Error().Msgf("error loading model: %s", err.Error())
 				}
 			}
 		default:
 			log.Warn().Msgf("[startup] failed resolving model '%s'", url)
 		}
 	}
 }
--- a/pkg/startup/model_preload_test.go
+++ b/pkg/startup/model_preload_test.go
@ -0,0 +1,66 @@
 package startup_test
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 	. "github.com/go-skynet/LocalAI/pkg/startup"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
 var _ = Describe("Preload test", func() {
 	Context("Preloading from strings", func() {
 		It("loads from embedded full-urls", func() {
 			tmpdir, err := os.MkdirTemp("", "")
 			Expect(err).ToNot(HaveOccurred())
 			url := "https://raw.githubusercontent.com/mudler/LocalAI/master/examples/configurations/phi-2.yaml"
 			fileName := fmt.Sprintf("%s.yaml", utils.MD5(url))
 			PreloadModelsConfigurations(tmpdir, url)
 			resultFile := filepath.Join(tmpdir, fileName)
 			content, err := os.ReadFile(resultFile)
 			Expect(err).ToNot(HaveOccurred())
 			Expect(string(content)).To(ContainSubstring("name: phi-2"))
 		})
 		It("loads from embedded short-urls", func() {
 			tmpdir, err := os.MkdirTemp("", "")
 			Expect(err).ToNot(HaveOccurred())
 			url := "phi-2"
 			PreloadModelsConfigurations(tmpdir, url)
 			entry, err := os.ReadDir(tmpdir)
 			Expect(err).ToNot(HaveOccurred())
 			Expect(entry).To(HaveLen(1))
 			resultFile := entry[0].Name()
 			content, err := os.ReadFile(filepath.Join(tmpdir, resultFile))
 			Expect(err).ToNot(HaveOccurred())
 			Expect(string(content)).To(ContainSubstring("name: phi-2"))
 		})
 		It("loads from embedded models", func() {
 			tmpdir, err := os.MkdirTemp("", "")
 			Expect(err).ToNot(HaveOccurred())
 			url := "mistral-openorca"
 			fileName := fmt.Sprintf("%s.yaml", utils.MD5(url))
 			PreloadModelsConfigurations(tmpdir, url)
 			resultFile := filepath.Join(tmpdir, fileName)
 			content, err := os.ReadFile(resultFile)
 			Expect(err).ToNot(HaveOccurred())
 			Expect(string(content)).To(ContainSubstring("name: mistral-openorca"))
 		})
 	})
 })
--- a/pkg/startup/startup_suite_test.go
+++ b/pkg/startup/startup_suite_test.go
@ -0,0 +1,13 @@
 package startup_test
 import (
 	"testing"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
 func TestStartup(t *testing.T) {
 	RegisterFailHandler(Fail)
 	RunSpecs(t, "LocalAI startup test")
 }
--- a/pkg/utils/hash.go
+++ b/pkg/utils/hash.go
@ -0,0 +1,10 @@
 package utils
 import (
 	"crypto/md5"
 	"fmt"
 )
 func MD5(s string) string {
 	return fmt.Sprintf("%x", md5.Sum([]byte(s)))
 }