mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
parent
63601fabd1
commit
0b330d90ad
20
README.md
20
README.md
@ -38,7 +38,9 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
|
|||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: You can use a default template for every model in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca:
|
Note: The API doesn't inject a default prompt for talking to the model, while the CLI does. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
|
||||||
|
|
||||||
|
You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca:
|
||||||
|
|
||||||
```
|
```
|
||||||
Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
||||||
@ -187,22 +189,6 @@ You can list all the models available with:
|
|||||||
curl http://localhost:8080/v1/models
|
curl http://localhost:8080/v1/models
|
||||||
```
|
```
|
||||||
|
|
||||||
## Web interface
|
|
||||||
|
|
||||||
There is also available a simple web interface (for instance, http://localhost:8080/) which can be used as a playground.
|
|
||||||
|
|
||||||
Note: The API doesn't inject a template for talking to the instance, while the CLI does. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release, for instance:
|
|
||||||
|
|
||||||
```
|
|
||||||
Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
|
||||||
|
|
||||||
### Instruction:
|
|
||||||
{instruction}
|
|
||||||
|
|
||||||
### Response:
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## Using other models
|
## Using other models
|
||||||
|
|
||||||
gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
|
gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
|
||||||
|
11
api/api.go
11
api/api.go
@ -1,9 +1,7 @@
|
|||||||
package api
|
package api
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"embed"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
@ -12,7 +10,6 @@ import (
|
|||||||
llama "github.com/go-skynet/go-llama.cpp"
|
llama "github.com/go-skynet/go-llama.cpp"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||||
"github.com/gofiber/fiber/v2/middleware/filesystem"
|
|
||||||
"github.com/gofiber/fiber/v2/middleware/recover"
|
"github.com/gofiber/fiber/v2/middleware/recover"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -65,9 +62,6 @@ type OpenAIRequest struct {
|
|||||||
IgnoreEOS bool `json:"ignore_eos"`
|
IgnoreEOS bool `json:"ignore_eos"`
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:embed index.html
|
|
||||||
var indexHTML embed.FS
|
|
||||||
|
|
||||||
// https://platform.openai.com/docs/api-reference/completions
|
// https://platform.openai.com/docs/api-reference/completions
|
||||||
func openAIEndpoint(chat bool, loader *model.ModelLoader, threads int, defaultMutex *sync.Mutex, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
|
func openAIEndpoint(chat bool, loader *model.ModelLoader, threads int, defaultMutex *sync.Mutex, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
@ -234,11 +228,6 @@ func Start(loader *model.ModelLoader, listenAddr string, threads int) error {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Use("/", filesystem.New(filesystem.Config{
|
|
||||||
Root: http.FS(indexHTML),
|
|
||||||
NotFoundFile: "index.html",
|
|
||||||
}))
|
|
||||||
|
|
||||||
// Start the server
|
// Start the server
|
||||||
app.Listen(listenAddr)
|
app.Listen(listenAddr)
|
||||||
return nil
|
return nil
|
||||||
|
120
api/index.html
120
api/index.html
@ -1,120 +0,0 @@
|
|||||||
<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>llama-cli</title>
|
|
||||||
<meta charset="UTF-8">
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
||||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css" crossorigin="anonymous" referrerpolicy="no-referrer" />
|
|
||||||
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
|
|
||||||
</head>
|
|
||||||
<style>
|
|
||||||
@keyframes rotating {
|
|
||||||
from {
|
|
||||||
transform: rotate(0deg);
|
|
||||||
}
|
|
||||||
to {
|
|
||||||
transform: rotate(360deg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
.waiting {
|
|
||||||
animation: rotating 1s linear infinite;
|
|
||||||
}
|
|
||||||
|
|
||||||
</style>
|
|
||||||
<body>
|
|
||||||
|
|
||||||
<div class="container mt-5" x-data="{ templates:[
|
|
||||||
{
|
|
||||||
name: 'Alpaca: Instruction without input',
|
|
||||||
text: `Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
|
||||||
|
|
||||||
### Instruction:
|
|
||||||
{{.Instruction}}
|
|
||||||
|
|
||||||
### Response:`,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: 'Alpaca: Instruction with input',
|
|
||||||
text: `Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
|
||||||
|
|
||||||
### Instruction:
|
|
||||||
{{.Instruction}}
|
|
||||||
|
|
||||||
### Input:
|
|
||||||
{{.Input}}
|
|
||||||
|
|
||||||
### Response:`,
|
|
||||||
}
|
|
||||||
], selectedTemplate: '', selectedTemplateText: '' }">
|
|
||||||
<h1>llama-cli API</h1>
|
|
||||||
<div class="form-group">
|
|
||||||
<label for="inputText">Input Text:</label>
|
|
||||||
<textarea class="form-control" id="inputText" rows="6" placeholder="Your text input here..." x-text="selectedTemplateText"></textarea>
|
|
||||||
</div>
|
|
||||||
<div class="form-group">
|
|
||||||
<label for="templateSelect">Select Template:</label>
|
|
||||||
<select class="form-control" id="templateSelect" x-model="selectedTemplateText">
|
|
||||||
<option value="">None</option>
|
|
||||||
<template x-for="(template, index) in templates" :key="index">
|
|
||||||
<option :value="template.text" x-text="template.name"></option>
|
|
||||||
</template>
|
|
||||||
</select>
|
|
||||||
</div>
|
|
||||||
<div class="form-group">
|
|
||||||
<label for="topP">Top P:</label>
|
|
||||||
<input type="range" step="0.01" min="0" max="1" class="form-control" id="topP" value="0.20" name="topP" onchange="this.nextElementSibling.value = this.value" required>
|
|
||||||
<output>0.20</output>
|
|
||||||
</div>
|
|
||||||
<div class="form-group">
|
|
||||||
<label for="topK">Top K:</label>
|
|
||||||
<input type="number" class="form-control" id="topK" value="10000" name="topK" required>
|
|
||||||
</div>
|
|
||||||
<div class="form-group">
|
|
||||||
<label for="temperature">Temperature:</label>
|
|
||||||
<input type="range" step="0.01" min="0" max="1" value="0.9" class="form-control" id="temperature" name="temperature" onchange="this.nextElementSibling.value = this.value" required>
|
|
||||||
<output>0.9</output>
|
|
||||||
</div>
|
|
||||||
<div class="form-group">
|
|
||||||
<label for="tokens">Tokens:</label>
|
|
||||||
<input type="number" class="form-control" id="tokens" name="tokens" value="128" required>
|
|
||||||
</div>
|
|
||||||
<button class="btn btn-primary" x-on:click="submitRequest()">Submit <i class="fas fa-paper-plane"></i></button>
|
|
||||||
<hr>
|
|
||||||
<div class="form-group">
|
|
||||||
<label for="outputText">Output Text:</label>
|
|
||||||
<textarea class="form-control" id="outputText" rows="5" readonly></textarea>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
|
|
||||||
<script>
|
|
||||||
function submitRequest() {
|
|
||||||
var button = document.querySelector("i.fa-paper-plane");
|
|
||||||
button.classList.add("waiting");
|
|
||||||
var text = document.getElementById("inputText").value;
|
|
||||||
var url = "/predict";
|
|
||||||
var data = {
|
|
||||||
"text": text,
|
|
||||||
"topP": document.getElementById("topP").value,
|
|
||||||
"topK": document.getElementById("topK").value,
|
|
||||||
"temperature": document.getElementById("temperature").value,
|
|
||||||
"tokens": document.getElementById("tokens").value
|
|
||||||
};
|
|
||||||
fetch(url, {
|
|
||||||
method: "POST",
|
|
||||||
headers: {
|
|
||||||
"Content-Type": "application/json"
|
|
||||||
},
|
|
||||||
body: JSON.stringify(data)
|
|
||||||
})
|
|
||||||
.then(response => response.json())
|
|
||||||
.then(data => {
|
|
||||||
document.getElementById("outputText").value = data.prediction;
|
|
||||||
button.classList.remove("waiting");
|
|
||||||
})
|
|
||||||
.catch(error => { console.error(error); button.classList.remove("waiting"); });
|
|
||||||
}
|
|
||||||
</script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
Loading…
Reference in New Issue
Block a user