mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-01-21 20:08:16 +00:00
363 lines
16 KiB
VimL
363 lines
16 KiB
VimL
|
if !exists("g:whisper_dir")
|
||
|
let g:whisper_dir = expand($WHISPER_CPP_HOME)
|
||
|
if g:whisper_dir == ""
|
||
|
echoerr "Please provide a path to the whisper.cpp repo in either the $WHISPER_CPP_HOME environment variable, or g:whisper_dir"
|
||
|
endif
|
||
|
endif
|
||
|
if !exists("g:whisper_lsp_path")
|
||
|
let g:whisper_lsp_path = g:whisper_dir .. "lsp"
|
||
|
if !filereadable(g:whisper_lsp_path)
|
||
|
echoerr "Was not able to locate a lsp executable at: " .. g:whisper_lsp_path
|
||
|
throw "Executable not found"
|
||
|
endif
|
||
|
endif
|
||
|
if !exists("g:whisper_model_path")
|
||
|
" TODO: allow custom paths relative to the repo dir
|
||
|
let g:whisper_model_path = g:whisper_dir .. "models/ggml-base.en.bin"
|
||
|
if !filereadable(g:whisper_model_path)
|
||
|
echoerr "Could not find model at: " .. g:whisper_model_path
|
||
|
throw "Model not found"
|
||
|
endif
|
||
|
endif
|
||
|
let s:output_buffer = bufnr("whisper_log", v:true)
|
||
|
call setbufvar(s:output_buffer,"&buftype","nofile")
|
||
|
let s:lsp_command = [g:whisper_lsp_path,"-m",g:whisper_model_path]
|
||
|
" For faster execution. TODO: server load multiple models/run multiple servers?
|
||
|
" let s:lsp_command = [g:whisper_lsp_path, "-m", g:whisper_dir .. "models/ggml-tiny.en.bin", "-ac", "128"]
|
||
|
|
||
|
" requestCommands([params_dict])
|
||
|
func whisper#requestCommands(...)
|
||
|
let l:req = {"method": "guided", "params": {"commandset_index": 0}}
|
||
|
if a:0 > 0
|
||
|
call extend(l:req.params, a:1)
|
||
|
endif
|
||
|
let resp = ch_sendexpr(g:lsp_job, l:req, {"callback": function("s:commandCallback", [l:req.params, 0])})
|
||
|
endfunction
|
||
|
|
||
|
" doTranscription([params_dict])
|
||
|
func whisper#doTranscription(...)
|
||
|
let l:req = {"method": "unguided", "params": {}}
|
||
|
if a:0 > 0
|
||
|
call extend(l:req.params, a:1)
|
||
|
endif
|
||
|
let resp = ch_sendexpr(g:lsp_job, l:req, {"callback": function("s:transcriptionCallback", [function("s:insertText"),function("s:endTranscription")])})
|
||
|
endfunction
|
||
|
|
||
|
" For testing
|
||
|
func whisper#uppertest(cha)
|
||
|
echo tr(a:cha, s:c_lowerkeys, s:c_upperkeys)
|
||
|
endfunction
|
||
|
|
||
|
|
||
|
" (upper, exit, count, motion, command, insert/append, save run) "base"
|
||
|
" (upper, exit, count, motion, command, inside/around) "motion/visual"
|
||
|
" (upper, exit, count, motion, line, inside/around) "command already entered"
|
||
|
" (upper, exit, key, ) "from/till"
|
||
|
|
||
|
" upper and lower keys is used to translate between cases with tr
|
||
|
" Must be sunchronized
|
||
|
let s:c_lowerkeys = "1234567890-=qwertyuiop[]\\asdfghjkl;'zxcvbnm,./\""
|
||
|
let s:c_upperkeys = "!@#$%^&*()_+QWERTYUIOP{}|ASDFGHJKL:\"ZXCVBNM<>?'"
|
||
|
let s:c_count = split("1234567890\"",'\zs')
|
||
|
let s:c_command = split("ryuogpdxcv.iam", '\zs')
|
||
|
let s:c_motion = split("wetf'hjklnb$^)",'\zs')
|
||
|
" object words: Word, Sentence, Paragraph, [, (, <, Tag, {. ", '
|
||
|
let s:c_area = split("wsp])>t}\"'",'\zs')
|
||
|
"Special commands.
|
||
|
let s:c_special_always = ["exit", "upper"]
|
||
|
let s:c_special_normal = ["save", "run", "space"]
|
||
|
|
||
|
" If not in dict, key is spoken word,
|
||
|
" If key resolves to string, value is used for normal/motion, but key for chars
|
||
|
" If key resolves to dict, {0: "normal",1: "motion",2:"single char",3: "area"}
|
||
|
" Missing entries fall back as follows {0: "required", 1: 0, 2: "key", 3: 0}
|
||
|
let s:spoken_dict = {"w": "word", "e": "end", "r": "replace", "t": {0: "till", 3: "tag"}, "y": "yank", "u": "undo", "i": {0: "insert", 1: "inside"}, "o": "open", "p": {0: "paste", 3: "paragraph"}, "a": {0: "append", 1: "around"}, "s": {0: "substitute", 3: "sentence"}, "d": "delete", "f": "from", "g": "go", "h": "left", "j": "down", "k": "up", "l": "right", "c": "change", "v": "visual", "b": "back", "n": "next", "m": "mark", ".": {0: "repeat", 2: "period"}, "]": {0: "bracket", 2: "bracket"}, "'": {0: "jump", 2: "apostrophe", 3: "apostrophe"}, '"': {0: 'register', 2: "quotation", 3: "quotation"}, "-": {0: "minus", 2: "minus"}, "$": {0: "dollar", 2: "dollar"}, "^": {0: "carrot", 2: "carrot"}, ")": {0: "sentence", 2: "parenthesis", 3: "parenthesis"}, "}": {0: "paragraph", 2: "brace", 3: "brace"}, ">": {0: "indent", 2: "angle", 3: "angle"}}
|
||
|
|
||
|
" Give this another pass. This seems overly hacky even if it's functional
|
||
|
let s:sub_tran_msg = ""
|
||
|
func s:subTranProg(msg)
|
||
|
if s:sub_tran_msg != ""
|
||
|
let s:sub_tran_msg = s:sub_tran_msg .. a:msg
|
||
|
if mode() !=? 'v'
|
||
|
exe "normal" "u" .. s:sub_tran_msg
|
||
|
endif
|
||
|
else
|
||
|
if s:command_backlog == ""
|
||
|
" this should not occur
|
||
|
call s:logCallback(0, "Warning: Encountered sub transcription without prior command")
|
||
|
let s:command_backlog = "a"
|
||
|
endif
|
||
|
if a:msg[0] == ' '
|
||
|
let s:sub_tran_msg = s:command_backlog .. a:msg[1:-1]
|
||
|
else
|
||
|
let s:sub_tran_msg = s:command_backlog .. a:msg
|
||
|
endif
|
||
|
if mode() !=? 'v'
|
||
|
exe "normal" s:sub_tran_msg
|
||
|
endif
|
||
|
endif
|
||
|
call appendbufline(s:output_buffer, "$", s:sub_tran_msg .. ":" .. string(a:msg ))
|
||
|
endfunction
|
||
|
|
||
|
func s:subTranFinish(params, timestamp)
|
||
|
let s:repeat_command = s:sub_tran_msg
|
||
|
" Visual selection is lot if used with streaming, so streaming of partial
|
||
|
" transcriptions is disabled in visual mode
|
||
|
if mode() ==? 'v'
|
||
|
exe "normal" s:sub_tran_msg
|
||
|
endif
|
||
|
let s:sub_tran_msg = ""
|
||
|
let s:command_backlog = ""
|
||
|
exe "normal a\<C-G>u"
|
||
|
let l:params = a:params
|
||
|
let l:params.timestamp = a:timestamp
|
||
|
if exists("l:params.commandset_index")
|
||
|
unlet l:params.commandset_index
|
||
|
endif
|
||
|
call whisper#requestCommands(a:params)
|
||
|
endfunction
|
||
|
|
||
|
func s:logCallback(channel, msg)
|
||
|
call appendbufline(s:output_buffer,"$",a:msg)
|
||
|
endfunction
|
||
|
|
||
|
|
||
|
func s:transcriptionCallback(progressCallback, finishedCallback, channel, msg)
|
||
|
let l:tr = a:msg.result.transcription
|
||
|
|
||
|
let l:ex_ind = match(tolower(l:tr),"exit", len(l:tr)-6)
|
||
|
" The worst case I've observed so far is " Exit.", which is 6 characters
|
||
|
if l:ex_ind != -1
|
||
|
call a:progressCallback(strpart(l:tr,0,l:ex_ind-1))
|
||
|
call a:finishedCallback(a:msg.result.timestamp)
|
||
|
else
|
||
|
call a:progressCallback(l:tr)
|
||
|
let req = {"method": "unguided", "params": {"timestamp": a:msg.result.timestamp, "no_context": v:true}}
|
||
|
let resp = ch_sendexpr(g:lsp_job, req, {"callback": function("s:transcriptionCallback", [a:progressCallback, a:finishedCallback])})
|
||
|
endif
|
||
|
endfunc
|
||
|
func s:insertText(msg)
|
||
|
exe "normal a" .. a:msg
|
||
|
endfunction
|
||
|
func s:endTranscription(timestamp)
|
||
|
call appendbufline(s:output_buffer, "$", "Ending unguided transcription")
|
||
|
endfunction
|
||
|
|
||
|
|
||
|
|
||
|
" If a command does not include a whole actionable step, attempting to execute
|
||
|
" it discards the remainder of things. There is likely a simpler solution,
|
||
|
" but it can be made functional now by storing a backbuffer until actionable
|
||
|
let s:command_backlog = ""
|
||
|
let s:repeat_command = ""
|
||
|
let s:preceeding_upper = v:false
|
||
|
func s:commandCallback(params, commandset_index, channel, msg)
|
||
|
let l:command_index = a:msg.result.command_index
|
||
|
let l:do_execute = v:false
|
||
|
let l:next_mode = a:commandset_index
|
||
|
let l:command = s:commandset_list[a:commandset_index][l:command_index]
|
||
|
call s:logCallback(0, string(a:msg) .. " " .. a:commandset_index .. " " .. l:command)
|
||
|
if l:command_index == 0
|
||
|
"exit
|
||
|
"if s:command_backlog == ""
|
||
|
call s:logCallback(0,"Stopping command mode")
|
||
|
echo "No longer listening"
|
||
|
let s:command_backlog = ""
|
||
|
return
|
||
|
"else
|
||
|
" Legacy code to clear an existing buffer with exit.
|
||
|
" Was found to be rarely desired and is better introduced as a
|
||
|
" standalone command (clear?)
|
||
|
" call s:logCallback(0,"Clearing command_backlog" .. s:command_backlog)
|
||
|
" let s:command_backlog = ""
|
||
|
" let s:preceeding_upper = v:false
|
||
|
" endif
|
||
|
elseif l:command_index == 1
|
||
|
" upper
|
||
|
let s:preceeding_upper = !s:preceeding_upper
|
||
|
elseif l:command == "save"
|
||
|
" save and run can only happen in commandset 0,
|
||
|
exe "w"
|
||
|
elseif l:command == "run"
|
||
|
exe "make run"
|
||
|
elseif l:command == "space"
|
||
|
exe "normal i \<ESC>l"
|
||
|
elseif has_key(s:c_user, l:command)
|
||
|
let Userfunc = s:c_user[l:command]
|
||
|
if type(Userfunc) == v:t_string
|
||
|
let Userfunc = function(Userfunc)
|
||
|
endif
|
||
|
call Userfunc()
|
||
|
else
|
||
|
if s:preceeding_upper
|
||
|
" Upper should keep commandset
|
||
|
let s:preceeding_upper = v:false
|
||
|
let l:visual_command = tr(l:command, s:c_lowerkeys, s:c_upperkeys)
|
||
|
else
|
||
|
let l:visual_command = l:command
|
||
|
endif
|
||
|
echo s:command_backlog .. " - " .. l:visual_command
|
||
|
let s:command_backlog = s:command_backlog .. l:visual_command
|
||
|
if a:commandset_index == 2 || a:commandset_index == 3
|
||
|
" single key, either completes motion, replace, or register
|
||
|
" Should move to execute unless part of a register
|
||
|
" Change will be caught at execute
|
||
|
if s:command_backlog[-2:-2] !=# '"'
|
||
|
call s:logCallback(0,"not register")
|
||
|
let l:do_execute = v:true
|
||
|
end
|
||
|
let l:next_mode = 0
|
||
|
" commandset index only matters for a/i
|
||
|
elseif (l:command == "a" || l:command == "i") && a:commandset_index == 1
|
||
|
" inside/around. Is commandset 3
|
||
|
let l:next_mode = 3
|
||
|
elseif l:command ==# '"'
|
||
|
let l:next_mode = 2
|
||
|
elseif index(s:c_count, l:command) != -1
|
||
|
let l:next_mode = a:commandset_index
|
||
|
elseif index(s:c_motion, l:command) != -1
|
||
|
if l:command == 't' || l:command == 'f' || l:command == "'"
|
||
|
" prompt single key
|
||
|
let l:next_mode = 2
|
||
|
else
|
||
|
let l:do_execute = v:true
|
||
|
let l:next_mode = 0
|
||
|
endif
|
||
|
elseif index(s:c_command, l:command) != -1
|
||
|
if index(["y","g","d","c"], s:command_backlog[-1:-1]) != -1 && s:command_backlog[-1:-1] != s:command_backlog[-2:-2] && mode() !=? 'v'
|
||
|
" need motion or repeated command
|
||
|
" Potential for bad state here if disparaging command keys are
|
||
|
" entered (i.e. yd), but vim can handle checks for this at exe
|
||
|
" And checking for cases like y123d would complicate things
|
||
|
let l:next_mode = 1
|
||
|
elseif index(["i","a","c", "o", "s"], l:command) != -1 || s:command_backlog[-1:-1] ==# 'R'
|
||
|
"'Insert' mode, do general transcription
|
||
|
let l:req = {"method": "unguided", "params": a:params}
|
||
|
let l:req.params.timestamp = a:msg.result.timestamp
|
||
|
let l:req.params.no_context = v:true
|
||
|
let resp = ch_sendexpr(g:lsp_job, req, {"callback": function("s:transcriptionCallback", [function("s:subTranProg"), function("s:subTranFinish", [a:params])])})
|
||
|
return
|
||
|
elseif l:command == 'r' || l:command == 'm'
|
||
|
let l:next_mode = 2
|
||
|
elseif l:command == '.'
|
||
|
let l:next_mode = 0
|
||
|
let l:do_execute = v:true
|
||
|
let s:command_backlog = s:command_backlog[0:-2] .. s:repeat_command
|
||
|
else
|
||
|
if l:command ==? 'v'
|
||
|
let l:next_mode = 1
|
||
|
else
|
||
|
let l:next_mode = 0
|
||
|
endif
|
||
|
let l:do_execute = v:true
|
||
|
endif
|
||
|
else
|
||
|
throw "Invalid command state: " .. l:command .. " " .. a:commandset_index .. " " .. s:command_backlog
|
||
|
endif
|
||
|
endif
|
||
|
if l:do_execute
|
||
|
if mode() ==?'v' && l:next_mode == 0
|
||
|
let l:next_mode = 1
|
||
|
elseif match(s:command_backlog, 'c') != -1
|
||
|
let l:req = {"method": "unguided", "params": a:params}
|
||
|
let l:req.params.timestamp = a:msg.result.timestamp
|
||
|
let l:req.params.no_context = v:true
|
||
|
let resp = ch_sendexpr(g:lsp_job, req, {"callback": function("s:transcriptionCallback", [function("s:subTranProg"), function("s:subTranFinish", [a:params])])})
|
||
|
return
|
||
|
endif
|
||
|
exe "normal" s:command_backlog
|
||
|
if index(s:c_motion + ["u"],l:command) == -1
|
||
|
exe "normal a\<C-G>u"
|
||
|
let s:repeat_command = s:command_backlog
|
||
|
call s:logCallback(0, s:command_backlog)
|
||
|
endif
|
||
|
let s:command_backlog = ""
|
||
|
endif
|
||
|
let l:req = {"method": "guided", "params": a:params}
|
||
|
let l:req.params.timestamp = a:msg.result.timestamp
|
||
|
let l:req.params.commandset_index = l:next_mode
|
||
|
let resp = ch_sendexpr(g:lsp_job, l:req, {"callback": function("s:commandCallback",[a:params, l:next_mode])})
|
||
|
endfunction
|
||
|
|
||
|
func s:loadedCallback(channel, msg)
|
||
|
echo "Loading complete"
|
||
|
call s:logCallback(a:channel, a:msg)
|
||
|
endfunction
|
||
|
|
||
|
func s:registerCommandset(commandlist, is_final)
|
||
|
let req = {"method": "registerCommandset"}
|
||
|
let req.params = a:commandlist
|
||
|
call s:logCallback(0, join(a:commandlist))
|
||
|
call add(g:whisper_commandlist_spoken, a:commandlist)
|
||
|
if a:is_final
|
||
|
let resp = ch_sendexpr(g:lsp_job, req, {"callback": "s:loadedCallback"})
|
||
|
else
|
||
|
let resp = ch_sendexpr(g:lsp_job, req, {"callback": "s:logCallback"})
|
||
|
endif
|
||
|
endfunction
|
||
|
|
||
|
func s:registerAllCommands()
|
||
|
let l:normal = s:c_special_always + s:c_special_normal + s:c_count + s:c_command + s:c_motion + keys(s:c_user)
|
||
|
let l:visual = s:c_special_always + s:c_count + s:c_command + s:c_motion
|
||
|
" Currently the same as visual.
|
||
|
" let l:post_command = s:c_special_always + s:c_count + s:c_command + s:c_motion
|
||
|
let l:single_key = s:c_special_always + split(s:c_lowerkeys, '\zs')
|
||
|
let l:area = s:c_special_always + s:c_area
|
||
|
|
||
|
" Used only for compatibility with the testing script
|
||
|
let g:whisper_commandlist_spoken = []
|
||
|
|
||
|
let s:commandset_list = [l:normal, l:visual, l:single_key, l:area]
|
||
|
call s:registerCommandset(s:commandsetToSpoken(l:normal, 0), v:false)
|
||
|
call s:registerCommandset(s:commandsetToSpoken(l:visual, 1), v:false)
|
||
|
call s:registerCommandset(s:commandsetToSpoken(l:single_key, 2), v:false)
|
||
|
call s:registerCommandset(s:commandsetToSpoken(l:area, 3), v:true)
|
||
|
endfunction
|
||
|
|
||
|
func s:commandsetToSpoken(commandset, spoken_index)
|
||
|
let l:spoken_list = []
|
||
|
for l:command in a:commandset
|
||
|
if has_key(s:spoken_dict, l:command)
|
||
|
let l:spoken_value = s:spoken_dict[l:command]
|
||
|
if type(l:spoken_value) == v:t_dict
|
||
|
if has_key(l:spoken_value, a:spoken_index)
|
||
|
let l:spoken_value = l:spoken_value[a:spoken_index]
|
||
|
else
|
||
|
if a:spoken_index == 2
|
||
|
let l:spoken_value = l:command
|
||
|
else
|
||
|
let l:spoken_value = l:spoken_value[0]
|
||
|
endif
|
||
|
endif
|
||
|
else
|
||
|
if a:spoken_index == 2
|
||
|
let l:spoken_value = l:command
|
||
|
endif
|
||
|
endif
|
||
|
else
|
||
|
let l:spoken_value = l:command
|
||
|
endif
|
||
|
call add(l:spoken_list, l:spoken_value)
|
||
|
endfor
|
||
|
return l:spoken_list
|
||
|
endfunction
|
||
|
|
||
|
" TODO: Check lifetime. If the script is resourced, is the existing
|
||
|
" s:lsp_job dropped and therefore killed?
|
||
|
" This seems to not be the case and I've had to deal with zombie processes
|
||
|
" that survive exiting vim, even though said behavior conflicts with my
|
||
|
" understanding of the provided documentation
|
||
|
let s:lsp_opts = {"in_mode": "lsp", "out_mode": "lsp", "err_mode": "nl", "err_io": "buffer", "err_buf": s:output_buffer}
|
||
|
if !exists("g:lsp_job")
|
||
|
if exists("g:whisper_user_commands")
|
||
|
let s:c_user = g:whisper_user_commands
|
||
|
else
|
||
|
let s:c_user = {}
|
||
|
endif
|
||
|
let g:lsp_job = job_start(s:lsp_command, s:lsp_opts)
|
||
|
if job_status(g:lsp_job) == "fail"
|
||
|
echoerr "Failed to start whisper job"
|
||
|
endif
|
||
|
call s:registerAllCommands()
|
||
|
endif
|