mirror of
https://github.com/mudler/LocalAI.git
synced 2025-04-25 21:39:51 +00:00
Some checks failed
Explorer deployment / build-linux (push) Has been cancelled
GPU tests / ubuntu-latest (1.21.x) (push) Has been cancelled
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Has been cancelled
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, extras, latest-gpu-hipblas, latest-aio-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -hipblas-ffmpeg) (push) Has been cancelled
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, false, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas-core) (push) Has been cancelled
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, false, ubuntu:22.04, extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas) (push) Has been cancelled
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, core, latest-gpu-hipblas-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas-ffmpeg-core) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, extras, latest-gpu-intel-f16, latest-aio-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -sycl-f16-ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, extras, latest-gpu-intel-f32, latest-aio-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -sycl-f32-ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, extras, latest-gpu-nvidia-cuda-11, latest-aio-gpu-nvidia-cuda-11, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -cublas-cuda11-ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, extras, latest-gpu-nvidia-cuda-12, latest-aio-gpu-nvidia-cuda-12, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -cublas-cuda12-ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, false, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-core) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, core, latest-gpu-intel-f16-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-ffmpeg-core) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, false, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-core) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, core, latest-gpu-intel-f32-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-ffmpeg-core) (push) Has been cancelled
build container images / self-hosted-jobs (ubuntu:22.04, , , extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, ) (push) Has been cancelled
build container images / self-hosted-jobs (ubuntu:22.04, , true, extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (ubuntu:22.04, cublas, 11, 7, , extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda11) (push) Has been cancelled
build container images / self-hosted-jobs (ubuntu:22.04, cublas, 12, 0, , extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda12) (push) Has been cancelled
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, core, latest-cpu, latest-aio-cpu, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, arc-runner-set, false, auto, -ffmpeg-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, , core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, true, core, latest-gpu-nvidia-cuda-12-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11-ffmpeg-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, , core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, true, core, latest-gpu-nvidia-cuda-12-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12-ffmpeg-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, vulkan, true, core, latest-gpu-vulkan-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -vulkan-ffmpeg-core) (push) Has been cancelled
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, core, latest-nvidia-l4t-arm64-core, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, false, -nvidia-l4t-arm64-core) (push) Has been cancelled
Security Scan / tests (push) Has been cancelled
Tests extras backends / tests-transformers (push) Has been cancelled
Tests extras backends / tests-rerankers (push) Has been cancelled
Tests extras backends / tests-diffusers (push) Has been cancelled
Tests extras backends / tests-coqui (push) Has been cancelled
tests / tests-linux (1.21.x) (push) Has been cancelled
tests / tests-aio-container (push) Has been cancelled
tests / tests-apple (1.21.x) (push) Has been cancelled
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
485 lines
12 KiB
Go
485 lines
12 KiB
Go
package grpc
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"sync"
|
|
"time"
|
|
|
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/credentials/insecure"
|
|
)
|
|
|
|
type Client struct {
|
|
address string
|
|
busy bool
|
|
parallel bool
|
|
sync.Mutex
|
|
opMutex sync.Mutex
|
|
wd WatchDog
|
|
}
|
|
|
|
type WatchDog interface {
|
|
Mark(address string)
|
|
UnMark(address string)
|
|
}
|
|
|
|
func (c *Client) IsBusy() bool {
|
|
c.Lock()
|
|
defer c.Unlock()
|
|
return c.busy
|
|
}
|
|
|
|
func (c *Client) setBusy(v bool) {
|
|
c.Lock()
|
|
c.busy = v
|
|
c.Unlock()
|
|
}
|
|
|
|
func (c *Client) wdMark() {
|
|
if c.wd != nil {
|
|
c.wd.Mark(c.address)
|
|
}
|
|
}
|
|
|
|
func (c *Client) wdUnMark() {
|
|
if c.wd != nil {
|
|
c.wd.UnMark(c.address)
|
|
}
|
|
}
|
|
|
|
func (c *Client) HealthCheck(ctx context.Context) (bool, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
|
|
// The healthcheck call shouldn't take long time
|
|
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
|
defer cancel()
|
|
|
|
res, err := client.Health(ctx, &pb.HealthMessage{})
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
if string(res.Message) == "OK" {
|
|
return true, nil
|
|
}
|
|
|
|
return false, fmt.Errorf("health check failed: %s", res.Message)
|
|
}
|
|
|
|
func (c *Client) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
|
|
return client.Embedding(ctx, in, opts...)
|
|
}
|
|
|
|
func (c *Client) Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
|
|
return client.Predict(ctx, in, opts...)
|
|
}
|
|
|
|
func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
return client.LoadModel(ctx, in, opts...)
|
|
}
|
|
|
|
func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
|
|
stream, err := client.PredictStream(ctx, in, opts...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for {
|
|
reply, err := stream.Recv()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
fmt.Println("Error", err)
|
|
|
|
return err
|
|
}
|
|
f(reply)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (c *Client) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
return client.GenerateImage(ctx, in, opts...)
|
|
}
|
|
|
|
func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
return client.TTS(ctx, in, opts...)
|
|
}
|
|
|
|
func (c *Client) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
return client.SoundGeneration(ctx, in, opts...)
|
|
}
|
|
|
|
func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
return client.AudioTranscription(ctx, in, opts...)
|
|
}
|
|
|
|
func (c *Client) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
|
|
res, err := client.TokenizeString(ctx, in, opts...)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return res, nil
|
|
}
|
|
|
|
func (c *Client) Status(ctx context.Context) (*pb.StatusResponse, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
return client.Status(ctx, &pb.HealthMessage{})
|
|
}
|
|
|
|
func (c *Client) StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ...grpc.CallOption) (*pb.Result, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
return client.StoresSet(ctx, in, opts...)
|
|
}
|
|
|
|
func (c *Client) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
return client.StoresDelete(ctx, in, opts...)
|
|
}
|
|
|
|
func (c *Client) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
return client.StoresGet(ctx, in, opts...)
|
|
}
|
|
|
|
func (c *Client) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
return client.StoresFind(ctx, in, opts...)
|
|
}
|
|
|
|
func (c *Client) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
return client.Rerank(ctx, in, opts...)
|
|
}
|
|
|
|
func (c *Client) GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opts ...grpc.CallOption) (*pb.MetricsResponse, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
return client.GetMetrics(ctx, in, opts...)
|
|
}
|
|
|
|
func (c *Client) VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error) {
|
|
if !c.parallel {
|
|
c.opMutex.Lock()
|
|
defer c.opMutex.Unlock()
|
|
}
|
|
c.setBusy(true)
|
|
defer c.setBusy(false)
|
|
c.wdMark()
|
|
defer c.wdUnMark()
|
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithDefaultCallOptions(
|
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
|
))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer conn.Close()
|
|
client := pb.NewBackendClient(conn)
|
|
return client.VAD(ctx, in, opts...)
|
|
}
|