2024-06-23 08:24:36 +00:00
|
|
|
module github.com/mudler/LocalAI
|
2023-03-18 22:59:06 +00:00
|
|
|
|
2024-07-12 19:54:08 +00:00
|
|
|
go 1.22.0
|
2024-04-23 07:22:58 +00:00
|
|
|
|
2024-06-23 08:24:36 +00:00
|
|
|
toolchain go1.22.4
|
2023-03-18 22:59:06 +00:00
|
|
|
|
|
|
|
require (
|
2024-07-12 19:54:08 +00:00
|
|
|
dario.cat/mergo v1.0.0
|
|
|
|
github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9
|
2024-03-28 20:52:52 +00:00
|
|
|
github.com/Masterminds/sprig/v3 v3.2.3
|
2024-05-13 09:37:52 +00:00
|
|
|
github.com/alecthomas/kong v0.9.0
|
2024-08-26 18:19:27 +00:00
|
|
|
github.com/census-instrumentation/opencensus-proto v0.4.1
|
2024-04-11 15:41:58 +00:00
|
|
|
github.com/charmbracelet/glamour v0.7.0
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/chasefleming/elem-go v0.26.0
|
2024-08-26 18:19:27 +00:00
|
|
|
github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/containerd/containerd v1.7.19
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/donomii/go-rwkv.cpp v0.0.0-20240228065144-661e7ae26d44
|
2024-05-13 09:37:52 +00:00
|
|
|
github.com/elliotchance/orderedmap/v2 v2.2.0
|
2024-03-01 15:19:53 +00:00
|
|
|
github.com/fsnotify/fsnotify v1.7.0
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad
|
2023-05-09 09:43:50 +00:00
|
|
|
github.com/go-audio/wav v1.1.0
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/go-skynet/go-bert.cpp v0.0.0-20231028093757-710044b12454
|
|
|
|
github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46
|
|
|
|
github.com/gofiber/fiber/v2 v2.52.5
|
2024-04-11 07:19:24 +00:00
|
|
|
github.com/gofiber/swagger v1.0.0
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/gofiber/template/html/v2 v2.1.2
|
2024-08-23 22:27:14 +00:00
|
|
|
github.com/gofrs/flock v0.12.1
|
2024-08-26 18:19:27 +00:00
|
|
|
github.com/golang/protobuf v1.5.4
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/google/go-containerregistry v0.19.2
|
2024-06-04 06:39:19 +00:00
|
|
|
github.com/google/uuid v1.6.0
|
2024-08-26 18:19:27 +00:00
|
|
|
github.com/grpc-ecosystem/grpc-gateway v1.5.0
|
2023-07-14 23:19:43 +00:00
|
|
|
github.com/hpcloud/tail v1.0.0
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/ipfs/go-log v1.0.5
|
2024-05-13 09:37:52 +00:00
|
|
|
github.com/jaypipes/ghw v0.12.0
|
|
|
|
github.com/joho/godotenv v1.5.1
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/klauspost/cpuid/v2 v2.2.8
|
2024-08-20 17:17:35 +00:00
|
|
|
github.com/libp2p/go-libp2p v0.36.2
|
2023-06-24 06:18:17 +00:00
|
|
|
github.com/mholt/archiver/v3 v3.5.1
|
2024-05-13 09:37:52 +00:00
|
|
|
github.com/microcosm-cc/bluemonday v1.0.26
|
2024-08-27 08:48:55 +00:00
|
|
|
github.com/mudler/edgevpn v0.28.0
|
2024-08-23 22:27:14 +00:00
|
|
|
github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/mudler/go-stable-diffusion v0.0.0-20240429204715-4a3cd6aeae6f
|
2024-08-11 08:46:17 +00:00
|
|
|
github.com/onsi/ginkgo/v2 v2.20.0
|
|
|
|
github.com/onsi/gomega v1.34.1
|
2024-03-28 20:52:52 +00:00
|
|
|
github.com/ory/dockertest/v3 v3.10.0
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/otiai10/openaigo v1.7.0
|
2023-07-14 23:19:43 +00:00
|
|
|
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
|
2024-08-20 17:17:35 +00:00
|
|
|
github.com/prometheus/client_golang v1.20.0
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/rs/zerolog v1.33.0
|
2024-04-11 07:19:24 +00:00
|
|
|
github.com/russross/blackfriday v1.6.0
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/sashabaranov/go-openai v1.26.2
|
|
|
|
github.com/schollz/progressbar/v3 v3.14.4
|
|
|
|
github.com/shirou/gopsutil/v3 v3.24.5
|
2024-03-29 21:29:33 +00:00
|
|
|
github.com/stretchr/testify v1.9.0
|
2024-04-11 07:19:24 +00:00
|
|
|
github.com/swaggo/swag v1.16.3
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/thxcode/gguf-parser-go v0.1.0
|
|
|
|
github.com/tmc/langchaingo v0.1.12
|
|
|
|
github.com/valyala/fasthttp v1.55.0
|
|
|
|
go.opentelemetry.io/otel v1.28.0
|
|
|
|
go.opentelemetry.io/otel/exporters/prometheus v0.50.0
|
|
|
|
go.opentelemetry.io/otel/metric v1.28.0
|
|
|
|
go.opentelemetry.io/otel/sdk/metric v1.28.0
|
2024-08-26 18:19:27 +00:00
|
|
|
google.golang.org/api v0.180.0
|
2024-07-12 19:54:08 +00:00
|
|
|
google.golang.org/grpc v1.65.0
|
|
|
|
google.golang.org/protobuf v1.34.2
|
2023-05-18 19:12:42 +00:00
|
|
|
gopkg.in/yaml.v2 v2.4.0
|
2023-04-28 17:24:49 +00:00
|
|
|
gopkg.in/yaml.v3 v3.0.1
|
2024-06-22 06:17:41 +00:00
|
|
|
oras.land/oras-go/v2 v2.5.0
|
2023-03-18 22:59:06 +00:00
|
|
|
)
|
|
|
|
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
require (
|
2024-08-26 18:19:27 +00:00
|
|
|
cel.dev/expr v0.15.0 // indirect
|
|
|
|
cloud.google.com/go/auth v0.4.1 // indirect
|
|
|
|
cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
|
|
|
|
cloud.google.com/go/compute/metadata v0.3.0 // indirect
|
2024-08-27 08:48:55 +00:00
|
|
|
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
|
2024-08-26 18:19:27 +00:00
|
|
|
github.com/envoyproxy/protoc-gen-validate v1.0.4 // indirect
|
|
|
|
github.com/felixge/httpsnoop v1.0.4 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
|
|
|
|
github.com/go-viper/mapstructure/v2 v2.0.0 // indirect
|
2024-08-26 18:19:27 +00:00
|
|
|
github.com/google/s2a-go v0.1.7 // indirect
|
|
|
|
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
|
|
|
|
github.com/googleapis/gax-go/v2 v2.12.4 // indirect
|
2024-08-27 08:48:55 +00:00
|
|
|
github.com/labstack/echo/v4 v4.12.0 // indirect
|
|
|
|
github.com/labstack/gommon v0.4.2 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/moby/docker-image-spec v1.3.1 // indirect
|
|
|
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
2024-08-20 17:17:35 +00:00
|
|
|
github.com/pion/datachannel v1.5.8 // indirect
|
|
|
|
github.com/pion/dtls/v2 v2.2.12 // indirect
|
|
|
|
github.com/pion/ice/v2 v2.3.34 // indirect
|
|
|
|
github.com/pion/interceptor v0.1.30 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/pion/logging v0.2.2 // indirect
|
|
|
|
github.com/pion/mdns v0.0.12 // indirect
|
|
|
|
github.com/pion/randutil v0.1.0 // indirect
|
|
|
|
github.com/pion/rtcp v1.2.14 // indirect
|
2024-08-20 17:17:35 +00:00
|
|
|
github.com/pion/rtp v1.8.9 // indirect
|
|
|
|
github.com/pion/sctp v1.8.33 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/pion/sdp/v3 v3.0.9 // indirect
|
2024-08-20 17:17:35 +00:00
|
|
|
github.com/pion/srtp/v2 v2.0.20 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/pion/stun v0.6.1 // indirect
|
2024-08-20 17:17:35 +00:00
|
|
|
github.com/pion/transport/v2 v2.2.10 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/pion/turn/v2 v2.1.6 // indirect
|
2024-08-20 17:17:35 +00:00
|
|
|
github.com/pion/webrtc/v3 v3.3.0 // indirect
|
2024-08-27 08:48:55 +00:00
|
|
|
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
2024-08-23 22:27:14 +00:00
|
|
|
github.com/shirou/gopsutil/v4 v4.24.7 // indirect
|
2024-08-27 08:48:55 +00:00
|
|
|
github.com/urfave/cli/v2 v2.27.4 // indirect
|
|
|
|
github.com/valyala/fasttemplate v1.2.2 // indirect
|
2024-08-20 17:17:35 +00:00
|
|
|
github.com/wlynxg/anet v0.0.4 // indirect
|
2024-08-27 08:48:55 +00:00
|
|
|
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
|
2024-08-26 18:19:27 +00:00
|
|
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.52.0 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
go.uber.org/mock v0.4.0 // indirect
|
2024-08-26 18:19:27 +00:00
|
|
|
golang.org/x/oauth2 v0.21.0 // indirect
|
|
|
|
google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
require (
|
|
|
|
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/KyleBanks/depth v1.2.1 // indirect
|
|
|
|
github.com/Masterminds/goutils v1.1.1 // indirect
|
|
|
|
github.com/Masterminds/semver/v3 v3.2.0 // indirect
|
|
|
|
github.com/Microsoft/go-winio v0.6.2 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/Microsoft/hcsshim v0.11.7 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
|
|
|
|
github.com/StackExchange/wmi v1.2.1 // indirect
|
|
|
|
github.com/alecthomas/chroma/v2 v2.8.0 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/andybalholm/brotli v1.1.0 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
|
|
|
|
github.com/aymerick/douceur v0.2.0 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/benbjohnson/clock v1.3.5 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/beorn7/perks v1.0.1 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/c-robinson/iplib v1.0.8 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/containerd/cgroups v1.1.0 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/containerd/continuity v0.4.2 // indirect
|
2024-06-22 06:17:41 +00:00
|
|
|
github.com/containerd/errdefs v0.1.0 // indirect
|
|
|
|
github.com/containerd/log v0.1.0 // indirect
|
|
|
|
github.com/containerd/stargz-snapshotter/estargz v0.14.3 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
|
|
|
|
github.com/creachadair/otp v0.4.2 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 // indirect
|
|
|
|
github.com/dlclark/regexp2 v1.10.0 // indirect
|
|
|
|
github.com/docker/cli v27.0.3+incompatible // indirect
|
2024-06-22 06:17:41 +00:00
|
|
|
github.com/docker/distribution v2.8.2+incompatible // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/docker/docker v27.0.3+incompatible
|
2024-06-22 06:17:41 +00:00
|
|
|
github.com/docker/docker-credential-helpers v0.7.0 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/docker/go-connections v0.5.0 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/docker/go-units v0.5.0 // indirect
|
|
|
|
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
|
2024-08-20 17:17:35 +00:00
|
|
|
github.com/elastic/gosigar v0.14.3 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/flynn/noise v1.1.0 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/francoispqt/gojay v1.2.13 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/ghodss/yaml v1.0.0 // indirect
|
|
|
|
github.com/go-audio/audio v1.0.0 // indirect
|
|
|
|
github.com/go-audio/riff v1.0.0 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/go-logr/logr v1.4.2 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/go-logr/stdr v1.2.2 // indirect
|
2024-08-23 22:27:14 +00:00
|
|
|
github.com/go-ole/go-ole v1.3.0 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/go-openapi/jsonpointer v0.21.0 // indirect
|
|
|
|
github.com/go-openapi/jsonreference v0.21.0 // indirect
|
|
|
|
github.com/go-openapi/spec v0.21.0 // indirect
|
|
|
|
github.com/go-openapi/swag v0.23.0 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/godbus/dbus/v5 v5.1.0 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/gofiber/contrib/fiberzerolog v1.0.2
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/gofiber/template v1.8.3 // indirect
|
|
|
|
github.com/gofiber/utils v1.1.0 // indirect
|
|
|
|
github.com/gogo/protobuf v1.3.2 // indirect
|
2024-06-22 06:17:41 +00:00
|
|
|
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/golang/snappy v0.0.4 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/google/btree v1.1.2 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/google/go-cmp v0.6.0 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/google/gopacket v1.1.19 // indirect
|
2024-08-11 08:46:17 +00:00
|
|
|
github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
|
|
|
|
github.com/gorilla/css v1.0.1 // indirect
|
2024-07-13 23:26:17 +00:00
|
|
|
github.com/gorilla/websocket v1.5.3 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/hashicorp/errwrap v1.1.0 // indirect
|
|
|
|
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/hashicorp/golang-lru v1.0.2 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
2024-06-08 20:13:02 +00:00
|
|
|
github.com/henvic/httpretty v0.1.3 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/huandu/xstrings v1.3.3 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/huin/goupnp v1.3.0 // indirect
|
|
|
|
github.com/imdario/mergo v0.3.16 // indirect
|
2024-08-26 18:19:27 +00:00
|
|
|
github.com/ipfs/boxo v0.21.0 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/ipfs/go-cid v0.4.1 // indirect
|
|
|
|
github.com/ipfs/go-datastore v0.6.0 // indirect
|
|
|
|
github.com/ipfs/go-log/v2 v2.5.1 // indirect
|
2024-08-26 18:19:27 +00:00
|
|
|
github.com/ipld/go-ipld-prime v0.21.0 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/jackpal/go-nat-pmp v1.0.2 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/jaypipes/pcidb v1.0.0 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect
|
|
|
|
github.com/jbenet/goprocess v0.1.4 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/josharian/intern v1.0.0 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/klauspost/compress v1.17.9 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/klauspost/pgzip v1.2.5 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/koron/go-ssdp v0.0.4 // indirect
|
|
|
|
github.com/libp2p/go-buffer-pool v0.1.0 // indirect
|
|
|
|
github.com/libp2p/go-cidranger v1.1.0 // indirect
|
|
|
|
github.com/libp2p/go-flow-metrics v0.1.0 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/libp2p/go-libp2p-asn-util v0.4.1 // indirect
|
2024-08-26 18:19:27 +00:00
|
|
|
github.com/libp2p/go-libp2p-kad-dht v0.26.1 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/libp2p/go-libp2p-kbucket v0.6.3 // indirect
|
2024-08-20 17:17:35 +00:00
|
|
|
github.com/libp2p/go-libp2p-pubsub v0.12.0 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/libp2p/go-libp2p-record v0.2.0 // indirect
|
2024-08-26 18:19:27 +00:00
|
|
|
github.com/libp2p/go-libp2p-routing-helpers v0.7.4 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/libp2p/go-msgio v0.3.0 // indirect
|
|
|
|
github.com/libp2p/go-nat v0.2.0 // indirect
|
|
|
|
github.com/libp2p/go-netroute v0.2.1 // indirect
|
|
|
|
github.com/libp2p/go-reuseport v0.4.0 // indirect
|
|
|
|
github.com/libp2p/go-yamux/v4 v4.0.1 // indirect
|
|
|
|
github.com/libp2p/zeroconf/v2 v2.2.0 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
|
2024-08-23 22:27:14 +00:00
|
|
|
github.com/lufia/plan9stats v0.0.0-20240819163618-b1d8f4d146e7 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/mailru/easyjson v0.7.7 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/mattn/go-colorable v0.1.13 // indirect
|
|
|
|
github.com/mattn/go-isatty v0.0.20 // indirect
|
|
|
|
github.com/mattn/go-runewidth v0.0.15 // indirect
|
2024-08-20 17:17:35 +00:00
|
|
|
github.com/miekg/dns v1.1.62 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b // indirect
|
|
|
|
github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc // indirect
|
|
|
|
github.com/minio/sha256-simd v1.0.1 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
|
|
|
github.com/mitchellh/copystructure v1.2.0 // indirect
|
|
|
|
github.com/mitchellh/go-homedir v1.1.0 // indirect
|
|
|
|
github.com/mitchellh/reflectwalk v1.0.2 // indirect
|
2024-06-22 06:17:41 +00:00
|
|
|
github.com/moby/sys/sequential v0.5.0 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/moby/term v0.5.0 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/mr-tron/base58 v1.2.0 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/mudler/water v0.0.0-20221010214108-8c7313014ce0 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/muesli/reflow v0.3.0 // indirect
|
|
|
|
github.com/muesli/termenv v0.15.2 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/multiformats/go-base32 v0.1.0 // indirect
|
|
|
|
github.com/multiformats/go-base36 v0.2.0 // indirect
|
2024-07-13 23:26:17 +00:00
|
|
|
github.com/multiformats/go-multiaddr v0.13.0 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/multiformats/go-multiaddr-dns v0.3.1 // indirect
|
|
|
|
github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect
|
|
|
|
github.com/multiformats/go-multibase v0.2.0 // indirect
|
|
|
|
github.com/multiformats/go-multicodec v0.9.0 // indirect
|
|
|
|
github.com/multiformats/go-multihash v0.2.3 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/multiformats/go-multistream v0.5.0 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/multiformats/go-varint v0.0.7 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/nwaples/rardecode v1.1.0 // indirect
|
|
|
|
github.com/olekukonko/tablewriter v0.0.5 // indirect
|
|
|
|
github.com/opencontainers/go-digest v1.0.0 // indirect
|
|
|
|
github.com/opencontainers/image-spec v1.1.0
|
|
|
|
github.com/opencontainers/runc v1.1.12 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/opencontainers/runtime-spec v1.2.0 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/opentracing/opentracing-go v1.2.0 // indirect
|
|
|
|
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
|
|
|
|
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
|
2024-06-04 19:43:46 +00:00
|
|
|
github.com/philhofer/fwd v1.1.2 // indirect
|
2023-06-24 06:18:17 +00:00
|
|
|
github.com/pierrec/lz4/v4 v4.1.2 // indirect
|
2024-03-22 20:13:11 +00:00
|
|
|
github.com/pkg/errors v0.9.1 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/pkoukk/tiktoken-go v0.1.6 // indirect
|
2024-02-18 10:12:02 +00:00
|
|
|
github.com/pmezard/go-difflib v1.0.0 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/polydawn/refmt v0.89.0 // indirect
|
2024-08-23 22:27:14 +00:00
|
|
|
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/prometheus/client_model v0.6.1 // indirect
|
|
|
|
github.com/prometheus/common v0.55.0 // indirect
|
|
|
|
github.com/prometheus/procfs v0.15.1 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/quic-go/qpack v0.4.0 // indirect
|
2024-08-20 17:17:35 +00:00
|
|
|
github.com/quic-go/quic-go v0.46.0 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/quic-go/webtransport-go v0.8.0 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/raulk/go-watchdog v1.3.0 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/rivo/uniseg v0.4.7 // indirect
|
2024-05-13 09:37:52 +00:00
|
|
|
github.com/shoenig/go-m1cpu v0.1.6 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/shopspring/decimal v1.3.1 // indirect
|
2024-06-22 06:17:41 +00:00
|
|
|
github.com/sirupsen/logrus v1.9.3 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect
|
|
|
|
github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect
|
|
|
|
github.com/spaolacci/murmur3 v1.1.0 // indirect
|
feat(llama.cpp): Totally decentralized, private, distributed, p2p inference (#2343)
* feat(llama.cpp): Enable decentralized, distributed inference
As https://github.com/mudler/LocalAI/pull/2324 introduced distributed inferencing thanks to
@rgerganov implementation in https://github.com/ggerganov/llama.cpp/pull/6829 in upstream llama.cpp, now
it is possible to distribute the workload to remote llama.cpp gRPC server.
This changeset now uses mudler/edgevpn to establish a secure, distributed network between the nodes using a shared token.
The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers
with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
As per how mudler/edgevpn works, a network is established between the server and the workers with dht and mdns discovery protocols,
the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally.
Then llama.cpp is configured to use the services.
This feature is behind the "p2p" GO_FLAGS
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* go mod tidy
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: add p2p tag
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* better message
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-05-20 17:17:59 +00:00
|
|
|
github.com/spf13/cast v1.5.0 // indirect
|
2024-03-29 21:29:33 +00:00
|
|
|
github.com/swaggo/files/v2 v2.0.0 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/tinylib/msgp v1.1.8 // indirect
|
2024-08-23 22:27:14 +00:00
|
|
|
github.com/tklauser/go-sysconf v0.3.14 // indirect
|
|
|
|
github.com/tklauser/numcpus v0.8.0 // indirect
|
2023-06-24 06:18:17 +00:00
|
|
|
github.com/ulikunitz/xz v0.5.9 // indirect
|
2024-05-13 09:37:52 +00:00
|
|
|
github.com/valyala/bytebufferpool v1.0.0 // indirect
|
|
|
|
github.com/valyala/tcplisten v1.0.0 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
github.com/vbatts/tar-split v0.11.3 // indirect
|
|
|
|
github.com/vishvananda/netlink v1.2.1-beta.2 // indirect
|
|
|
|
github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 // indirect
|
|
|
|
github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 // indirect
|
2024-06-22 06:17:41 +00:00
|
|
|
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
|
2024-03-22 20:13:11 +00:00
|
|
|
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
|
|
|
|
github.com/xeipuuv/gojsonschema v1.2.0 // indirect
|
2023-06-24 06:18:17 +00:00
|
|
|
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
|
2024-04-11 15:41:58 +00:00
|
|
|
github.com/yuin/goldmark v1.5.4 // indirect
|
|
|
|
github.com/yuin/goldmark-emoji v1.0.2 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
github.com/yusufpapurcu/wmi v1.2.4 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
go.opencensus.io v0.24.0 // indirect
|
2024-07-12 19:54:08 +00:00
|
|
|
go.opentelemetry.io/otel/sdk v1.28.0 // indirect
|
|
|
|
go.opentelemetry.io/otel/trace v1.28.0 // indirect
|
2024-08-20 17:17:35 +00:00
|
|
|
go.uber.org/dig v1.18.0 // indirect
|
|
|
|
go.uber.org/fx v1.22.2 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
go.uber.org/multierr v1.11.0 // indirect
|
|
|
|
go.uber.org/zap v1.27.0 // indirect
|
2024-08-11 08:46:17 +00:00
|
|
|
golang.org/x/crypto v0.26.0 // indirect
|
2024-08-20 17:17:35 +00:00
|
|
|
golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect
|
2024-08-11 08:46:17 +00:00
|
|
|
golang.org/x/mod v0.20.0 // indirect
|
|
|
|
golang.org/x/net v0.28.0 // indirect
|
|
|
|
golang.org/x/sync v0.8.0 // indirect
|
|
|
|
golang.org/x/sys v0.24.0 // indirect
|
|
|
|
golang.org/x/term v0.23.0 // indirect
|
|
|
|
golang.org/x/text v0.17.0 // indirect
|
|
|
|
golang.org/x/tools v0.24.0 // indirect
|
2024-06-23 08:24:36 +00:00
|
|
|
golang.zx2c4.com/wintun v0.0.0-20211104114900-415007cec224 // indirect
|
|
|
|
golang.zx2c4.com/wireguard v0.0.0-20220703234212-c31a7b1ab478 // indirect
|
|
|
|
golang.zx2c4.com/wireguard/windows v0.5.3 // indirect
|
2024-08-26 18:19:27 +00:00
|
|
|
gonum.org/v1/gonum v0.15.0 // indirect
|
|
|
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20240617180043-68d350f18fd4 // indirect
|
2023-07-14 23:19:43 +00:00
|
|
|
gopkg.in/fsnotify.v1 v1.4.7 // indirect
|
|
|
|
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
|
2024-05-05 07:10:23 +00:00
|
|
|
howett.net/plist v1.0.0 // indirect
|
2024-08-20 17:17:35 +00:00
|
|
|
lukechampine.com/blake3 v1.3.0 // indirect
|
2023-06-24 06:18:17 +00:00
|
|
|
)
|