talk-llama : sync llama.cpp

This commit is contained in:
Georgi Gerganov
2024-09-24 13:22:55 +03:00
parent 234f9bd320
commit fe18c29ab8
14 changed files with 4319 additions and 1214 deletions

View File

@ -177,7 +177,7 @@ static bool ggml_graph_compute_helper(
int n_threads,
ggml_abort_callback abort_callback,
void * abort_callback_data) {
struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
struct ggml_cplan plan = ggml_graph_plan(graph, n_threads, nullptr);
plan.abort_callback = abort_callback;
plan.abort_callback_data = abort_callback_data;
@ -2894,7 +2894,7 @@ static bool whisper_decode_internal(
ggml_backend_tensor_set(KQ_mask, wstate.inp_mask.data(), 0, ggml_nelements(KQ_mask)*sizeof(float));
}
logits = gf->nodes[gf->n_nodes - 1];
logits = ggml_graph_node(gf, -1);
if (!ggml_graph_compute_helper(sched, gf, n_threads)) {
return false;