mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-27 00:01:07 +00:00
wip
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
b1f57a90fe
commit
78d45e5f69
@ -478,6 +478,8 @@ func handleVAD(session *Session, conversation *Conversation, c *websocket.Conn,
|
|||||||
cancel()
|
cancel()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
audioDetected := false
|
||||||
|
timeListening := time.Now()
|
||||||
// Implement VAD logic here
|
// Implement VAD logic here
|
||||||
// For brevity, this is a placeholder
|
// For brevity, this is a placeholder
|
||||||
// When VAD detects end of speech, generate a response
|
// When VAD detects end of speech, generate a response
|
||||||
@ -489,10 +491,14 @@ func handleVAD(session *Session, conversation *Conversation, c *websocket.Conn,
|
|||||||
default:
|
default:
|
||||||
// Check if there's audio data to process
|
// Check if there's audio data to process
|
||||||
session.AudioBufferLock.Lock()
|
session.AudioBufferLock.Lock()
|
||||||
|
|
||||||
if len(session.InputAudioBuffer) > 16000 {
|
if len(session.InputAudioBuffer) > 16000 {
|
||||||
|
|
||||||
adata := sound.BytesToInt16sLE(session.InputAudioBuffer)
|
adata := sound.BytesToInt16sLE(session.InputAudioBuffer)
|
||||||
|
|
||||||
|
// Resample from 24kHz to 16kHz
|
||||||
|
adata = sound.ResampleInt16(adata, 24000, 16000)
|
||||||
|
|
||||||
soundIntBuffer := &audio.IntBuffer{
|
soundIntBuffer := &audio.IntBuffer{
|
||||||
Format: &audio.Format{SampleRate: 16000, NumChannels: 1},
|
Format: &audio.Format{SampleRate: 16000, NumChannels: 1},
|
||||||
}
|
}
|
||||||
@ -538,23 +544,30 @@ func handleVAD(session *Session, conversation *Conversation, c *websocket.Conn,
|
|||||||
log.Debug().Msg("VAD detected no speech activity")
|
log.Debug().Msg("VAD detected no speech activity")
|
||||||
log.Debug().Msgf("audio length %d", len(session.InputAudioBuffer))
|
log.Debug().Msgf("audio length %d", len(session.InputAudioBuffer))
|
||||||
|
|
||||||
session.InputAudioBuffer = nil
|
if !audioDetected {
|
||||||
|
session.InputAudioBuffer = nil
|
||||||
|
}
|
||||||
log.Debug().Msgf("audio length(after) %d", len(session.InputAudioBuffer))
|
log.Debug().Msgf("audio length(after) %d", len(session.InputAudioBuffer))
|
||||||
|
|
||||||
session.AudioBufferLock.Unlock()
|
session.AudioBufferLock.Unlock()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
timeListening = time.Now()
|
||||||
|
|
||||||
log.Debug().Msgf("VAD detected %d segments", len(resp.Segments))
|
log.Debug().Msgf("VAD detected %d segments", len(resp.Segments))
|
||||||
log.Debug().Msgf("audio length %d", len(session.InputAudioBuffer))
|
log.Debug().Msgf("audio length %d", len(session.InputAudioBuffer))
|
||||||
|
|
||||||
speechStart = resp.Segments[0].Start
|
speechStart = resp.Segments[0].Start
|
||||||
log.Debug().Msgf("speech starts at %0.2fs", speechStart)
|
log.Debug().Msgf("speech starts at %0.2fs", speechStart)
|
||||||
|
|
||||||
|
audioDetected = true
|
||||||
|
|
||||||
for _, s := range resp.Segments {
|
for _, s := range resp.Segments {
|
||||||
if s.End > 0 {
|
if s.End > 0 {
|
||||||
log.Debug().Msgf("speech ends at %0.2fs", s.End)
|
log.Debug().Msgf("speech ends at %0.2fs", s.End)
|
||||||
speechEnd = s.End
|
speechEnd = s.End
|
||||||
|
audioDetected = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -599,6 +612,7 @@ func handleVAD(session *Session, conversation *Conversation, c *websocket.Conn,
|
|||||||
|
|
||||||
// Reset InputAudioBuffer
|
// Reset InputAudioBuffer
|
||||||
session.InputAudioBuffer = nil
|
session.InputAudioBuffer = nil
|
||||||
|
session.AudioBufferLock.Unlock()
|
||||||
|
|
||||||
// Send item.created event
|
// Send item.created event
|
||||||
sendEvent(c, OutgoingMessage{
|
sendEvent(c, OutgoingMessage{
|
||||||
@ -608,9 +622,10 @@ func handleVAD(session *Session, conversation *Conversation, c *websocket.Conn,
|
|||||||
|
|
||||||
// Generate a response
|
// Generate a response
|
||||||
generateResponse(session, conversation, ResponseCreate{}, c, websocket.TextMessage)
|
generateResponse(session, conversation, ResponseCreate{}, c, websocket.TextMessage)
|
||||||
|
} else {
|
||||||
|
session.AudioBufferLock.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
session.AudioBufferLock.Unlock()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user