diff --git a/core/http/endpoints/openai/realtime.go b/core/http/endpoints/openai/realtime.go index 15c21c68..21b12f2b 100644 --- a/core/http/endpoints/openai/realtime.go +++ b/core/http/endpoints/openai/realtime.go @@ -493,12 +493,16 @@ func handleVAD(session *Session, conversation *Conversation, c *websocket.Conn, log.Debug().Msgf("speech ends at %0.2fs", s.End) speechEnd = s.End } else { - log.Printf("speech is ongoing") - session.AudioBufferLock.Unlock() continue } } + if speechEnd == 0 && speechStart != 0 { + session.AudioBufferLock.Unlock() + log.Debug().Msg("speech is ongoing") + continue + } + // Handle when input is too long without a voice activity (reset the buffer) if speechStart == 0 && speechEnd == 0 { log.Debug().Msg("VAD detected no speech activity") @@ -531,9 +535,7 @@ func handleVAD(session *Session, conversation *Conversation, c *websocket.Conn, conversation.Lock.Unlock() // Reset InputAudioBuffer - session.AudioBufferLock.Lock() session.InputAudioBuffer = nil - session.AudioBufferLock.Unlock() // Send item.created event sendEvent(c, OutgoingMessage{