diff --git a/Dockerfile b/Dockerfile index eb5c9b05..ac42db5d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -282,6 +282,8 @@ COPY --from=grpc /opt/grpc /usr/local # Rebuild with defaults backends WORKDIR /build + +## Build the binary RUN make build RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \ diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 92b3c0a0..901b4d99 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -247,14 +247,23 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string { } if xsysinfo.HasCPUCaps(cpuid.AVX2) { - log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend) - grpcProcess = backendPath(assetDir, LLamaCPPAVX2) + p := backendPath(assetDir, LLamaCPPAVX2) + if _, err := os.Stat(p); err == nil { + log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend) + grpcProcess = p + } } else if xsysinfo.HasCPUCaps(cpuid.AVX) { - log.Info().Msgf("[%s] attempting to load with AVX variant", backend) - grpcProcess = backendPath(assetDir, LLamaCPPAVX) + p := backendPath(assetDir, LLamaCPPAVX) + if _, err := os.Stat(p); err == nil { + log.Info().Msgf("[%s] attempting to load with AVX variant", backend) + grpcProcess = p + } } else { - log.Info().Msgf("[%s] attempting to load with fallback variant", backend) - grpcProcess = backendPath(assetDir, LLamaCPPFallback) + p := backendPath(assetDir, LLamaCPPFallback) + if _, err := os.Stat(p); err == nil { + log.Info().Msgf("[%s] attempting to load with fallback variant", backend) + grpcProcess = p + } } return grpcProcess @@ -511,11 +520,23 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) { } if autoDetect && key == LLamaCPP && err != nil { - backendToUse := LLamaCPPFallback + // try as hard as possible to run the llama.cpp variants + backendToUse := "" if xsysinfo.HasCPUCaps(cpuid.AVX2) { - backendToUse = LLamaCPPAVX2 + if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil { + backendToUse = LLamaCPPAVX2 + } } else if xsysinfo.HasCPUCaps(cpuid.AVX) { - backendToUse = LLamaCPPAVX + if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil { + backendToUse = LLamaCPPAVX + } + } else { + if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPFallback)); err == nil { + backendToUse = LLamaCPPFallback + } else { + // If we don't have a fallback, just skip fallback + continue + } } // Autodetection failed, try the fallback