feat: Return OpenAI errors and update docs (#80)

Signed-off-by: mudler <mudler@mocaccino.org>
2025-05-22 10:14:21 +00:00 · 2023-04-24 23:42:03 +02:00 · 2023-04-24 23:42:03 +02:00 · 12d83a4184
commit 12d83a4184
parent 045412e8dd
3 changed files with 74 additions and 5 deletions
--- a/README.md
+++ b/README.md
@ -70,6 +70,42 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
   }'
 ```

+### Example: Use GPT4ALL-J model
+
+<details>
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# Use a template from the examples
+cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
+
+# (optional) Edit the .env file to set things like context size and threads
+# vim .env
+
+# start with docker-compose
+docker-compose up -d --build
+
+# Now API is accessible at localhost:8080
+curl http://localhost:8080/v1/models
+# {"object":"list","data":[{"id":"ggml-gpt4all-j","object":"model"}]}
+
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+     "model": "ggml-gpt4all-j",
+     "messages": [{"role": "user", "content": "How are you?"}],
+     "temperature": 0.9 
+   }'
+
+# {"model":"ggml-gpt4all-j","choices":[{"message":{"role":"assistant","content":"I'm doing well, thanks. How about you?"}}]}
+```
+</details>
+
 ## Prompt templates 

 The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
@ -127,6 +163,7 @@ The API takes takes the following parameters:
 | threads      | THREADS              | Number of Physical cores     | The number of threads to use for text generation. |
 | address      | ADDRESS              | :8080         | The address and port to listen on. |
 | context-size | CONTEXT_SIZE         | 512           | Default token context size. |
+| debug | DEBUG         | false           | Enable debug mode. |

 Once the server is running, you can start making requests to it using HTTP, using the OpenAI API. 

@ -136,10 +173,16 @@ Once the server is running, you can start making requests to it using HTTP, usin

 You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create). 

-Following the list of endpoints/parameters supported.
+Following the list of endpoints/parameters supported. 
+
+Note:
+
+- You can also specify the model a part of the OpenAI token.
+- If only one model is available, the API will use it for all the requests.

 #### Chat completions

+<details>
 For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:

 ```
@ -151,10 +194,12 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
 ```

 Available additional parameters: `top_p`, `top_k`, `max_tokens`
+</details>

 #### Completions

-For example, to generate a comletion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
+<details>
+For example, to generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
 ```
 curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
     "model": "ggml-koala-7b-model-q4_0-r2.bin",
@ -165,14 +210,19 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d

 Available additional parameters: `top_p`, `top_k`, `max_tokens`

+</details>
+
 #### List models

+<details>
 You can list all the models available with:

 ```
 curl http://localhost:8080/v1/models
 ```

+</details>
+
 ## Using other models

 gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
--- a/api/api.go
+++ b/api/api.go
@ -18,6 +18,18 @@ import (
 	"github.com/rs/zerolog/log"
 )

+// APIError provides error information returned by the OpenAI API.
+type APIError struct {
+	Code    any     `json:"code,omitempty"`
+	Message string  `json:"message"`
+	Param   *string `json:"param,omitempty"`
+	Type    string  `json:"type"`
+}
+
+type ErrorResponse struct {
+	Error *APIError `json:"error,omitempty"`
+}
+
 type OpenAIResponse struct {
 	Created int      `json:"created,omitempty"`
 	Object  string   `json:"chat.completion,omitempty"`
@ -395,9 +407,11 @@ func App(loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disab
 			}

 			// Send custom error page
-			return ctx.Status(code).JSON(struct {
-				Error string `json:"error"`
-			}{Error: err.Error()})
+			return ctx.Status(code).JSON(
+				ErrorResponse{
+					Error: &APIError{Message: err.Error(), Code: code},
+				},
+			)
 		},
 	})

--- a/api/api_test.go
+++ b/api/api_test.go
@ -49,5 +49,10 @@ var _ = Describe("API test", func() {
 			Expect(len(resp.Choices)).To(Equal(1))
 			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
 		})
+		It("returns errors", func() {
+			_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
+			Expect(err).To(HaveOccurred())
+			Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: llama: model does not exist"))
+		})
 	})
 })