mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-12 20:18:08 +00:00
talk, talk-llama : pass text_to_speak as a file (#1865)
* talk-llama: pass file instead of arg it is too hard to quote text in a portable way * talk-llama: pass heard_ok as a file * talk-llama: let eleven-labs.py accept options Options: -v voice, -s savefile, -p (--play) * talk-llama: check installed commands in "speak" Pass "-q" to eleven-labs.py to skip checking whether elevenlabs is installed * talk-llama: pass voice_id again in order to sync talk with talk-llama * talk: sync with talk-llama Passing text_to_speak as a file is safer and more portable cf. https://stackoverflow.com/a/59036879/45375 * talk and talk-llama: get all installed voices in speak.ps1 * talk and talk-llama: get voices from api * talk and talk-llama: add more options to eleven-labs.py and remove DEFAULT_VOICE because it is deprecated (https://www.reddit.com/r/ElevenLabs/comments/1830abt/what_happened_to_bella/) ``` usage: eleven-labs.py [-q] [-l] [-h] [-n NAME | -v NUMBER] [-f KEY=VAL] [-s FILE | -p] [TEXTFILE] options: -q, --quick skip checking the required library action: TEXTFILE read the text file (default: stdin) -l, --list show the list of voices and exit -h, --help show this help and exit voice selection: -n NAME, --name NAME get a voice object by name (default: Arnold) -v NUMBER, --voice NUMBER get a voice object by number (see --list) -f KEY=VAL, --filter KEY=VAL filter voices by labels (default: "use case=narration") this option can be used multiple times filtering will be disabled if the first -f has no "=" (e.g. -f "any") output: -s FILE, --save FILE save the TTS to a file (default: audio.mp3) -p, --play play the TTS with ffplay ``` * examples: add speak_with_file() as suggested in the review * talk and talk-llama: ignore to_speak.txt
This commit is contained in:
committed by
GitHub
parent
a0ddd8392c
commit
f18738f247
@ -75,6 +75,7 @@ struct whisper_params {
|
||||
std::string model_wsp = "models/ggml-base.en.bin";
|
||||
std::string model_llama = "models/ggml-llama-7B.bin";
|
||||
std::string speak = "./examples/talk-llama/speak";
|
||||
std::string speak_file = "./examples/talk-llama/to_speak.txt";
|
||||
std::string prompt = "";
|
||||
std::string fname_out;
|
||||
std::string path_session = ""; // path to file for saving/loading model eval state
|
||||
@ -113,6 +114,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
|
||||
else if (arg == "-ml" || arg == "--model-llama") { params.model_llama = argv[++i]; }
|
||||
else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; }
|
||||
else if (arg == "-sf" || arg == "--speak-file") { params.speak_file = argv[++i]; }
|
||||
else if (arg == "--prompt-file") {
|
||||
std::ifstream file(argv[++i]);
|
||||
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
|
||||
@ -160,6 +162,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
|
||||
fprintf(stderr, " -ml FILE, --model-llama [%-7s] llama model file\n", params.model_llama.c_str());
|
||||
fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str());
|
||||
fprintf(stderr, " -sf FILE, --speak-file [%-7s] file to pass to TTS\n", params.speak_file.c_str());
|
||||
fprintf(stderr, " --prompt-file FNAME [%-7s] file with custom prompt to start dialog\n", "");
|
||||
fprintf(stderr, " --session FNAME file to cache model state in (may be large!) (default: none)\n");
|
||||
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
||||
@ -546,10 +549,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
// optionally give audio feedback that the current text is being processed
|
||||
if (!params.heard_ok.empty()) {
|
||||
int ret = system((params.speak + " " + std::to_string(voice_id) + " '" + params.heard_ok + "'").c_str());
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "%s: failed to speak\n", __func__);
|
||||
}
|
||||
speak_with_file(params.speak, params.heard_ok, params.speak_file, voice_id);
|
||||
}
|
||||
|
||||
// remove text between brackets using regex
|
||||
@ -748,11 +748,7 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
}
|
||||
|
||||
text_to_speak = ::replace(text_to_speak, "'", "'\"'\"'");
|
||||
int ret = system((params.speak + " " + std::to_string(voice_id) + " '" + text_to_speak + "'").c_str());
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "%s: failed to speak\n", __func__);
|
||||
}
|
||||
speak_with_file(params.speak, text_to_speak, params.speak_file, voice_id);
|
||||
|
||||
audio.clear();
|
||||
}
|
||||
|
Reference in New Issue
Block a user