feat: tokenization with llama.cpp (#4724)

feat: tokenization

Signed-off-by: shraddhazpy <shraddha@shraddhafive.in>
This commit is contained in:
Shraddha 2025-02-02 23:09:43 +05:30 committed by GitHub
parent 1d6afbd65d
commit 03974a4dd4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 19 additions and 9 deletions

View File

@ -2542,6 +2542,18 @@ public:
return grpc::Status::OK;
}
grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){
json data = parse_options(false, request, llama);
std::vector<llama_token> tokens = llama.tokenize(data["prompt"],false);
for (int i=0 ; i< tokens.size(); i++){
response->add_tokens(tokens[i]);
}
return grpc::Status::OK;
}
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
llama_client_slot* active_slot = llama.get_active_slot();

View File

@ -16,12 +16,7 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
if backendConfig.Backend == "" {
inferenceModel, err = loader.Load(opts...)
} else {
opts = append(opts, model.WithBackendString(backendConfig.Backend))
inferenceModel, err = loader.Load(opts...)
}
inferenceModel, err = loader.Load(opts...)
if err != nil {
return schema.TokenizeResponse{}, err
}
@ -35,6 +30,10 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
return schema.TokenizeResponse{}, err
}
if resp.Tokens == nil {
resp.Tokens = make([]int32, 0)
}
return schema.TokenizeResponse{
Tokens: resp.Tokens,
}, nil

View File

@ -12,6 +12,7 @@ import (
// TokenizeEndpoint exposes a REST API to tokenize the content
// @Summary Tokenize the input.
// @Param request body schema.TokenizeRequest true "Request"
// @Success 200 {object} schema.TokenizeResponse "Response"
// @Router /v1/tokenize [post]
func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
@ -51,8 +52,6 @@ func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, app
return err
}
c.JSON(tokenResponse)
return nil
return c.JSON(tokenResponse)
}
}