feat(dowloader): resume partial downloads (#4537)

* feat(resume downloads): add basic tests

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>

* test(resume downloads): implement file download tc

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>

* test(resume downloads): add resume partial download test

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>

* feat(resume downloads): implement resumable downloads for interrupted transfers

- Adds support for resuming partially downloaded files
- Uses HTTP Range header to continue from last byte position
- Maintains download progress across interruptions
- Preserves partial downloads with .partial extension
- Validates SHA256 checksum after completion

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>

* fix(resume downloads): incorrect download percent on front end

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>

* feat(resume download): add range header check tc

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>

* feat(resume download): implement range header check

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>

---------

Signed-off-by: Saarthak Verma <saarthakverma739@gmail.com>
This commit is contained in:
Saarthak Verma 2025-01-09 13:52:52 +05:30 committed by GitHub
parent ae1340d59b
commit 6765b17acd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 202 additions and 16 deletions

View File

@ -2,7 +2,9 @@ package downloader
import ( import (
"crypto/sha256" "crypto/sha256"
"errors"
"fmt" "fmt"
"hash"
"io" "io"
"net/http" "net/http"
"net/url" "net/url"
@ -204,6 +206,25 @@ func removePartialFile(tmpFilePath string) error {
return nil return nil
} }
func calculateHashForPartialFile(file *os.File) (hash.Hash, error) {
hash := sha256.New()
_, err := io.Copy(hash, file)
if err != nil {
return nil, err
}
return hash, nil
}
func (uri URI) checkSeverSupportsRangeHeader() (bool, error) {
url := uri.ResolveURL()
resp, err := http.Head(url)
if err != nil {
return false, err
}
defer resp.Body.Close()
return resp.Header.Get("Accept-Ranges") == "bytes", nil
}
func (uri URI) DownloadFile(filePath, sha string, fileN, total int, downloadStatus func(string, string, string, float64)) error { func (uri URI) DownloadFile(filePath, sha string, fileN, total int, downloadStatus func(string, string, string, float64)) error {
url := uri.ResolveURL() url := uri.ResolveURL()
if uri.LooksLikeOCI() { if uri.LooksLikeOCI() {
@ -266,8 +287,34 @@ func (uri URI) DownloadFile(filePath, sha string, fileN, total int, downloadStat
log.Info().Msgf("Downloading %q", url) log.Info().Msgf("Downloading %q", url)
// Download file req, err := http.NewRequest("GET", url, nil)
resp, err := http.Get(url) if err != nil {
return fmt.Errorf("failed to create request for %q: %v", filePath, err)
}
// save partial download to dedicated file
tmpFilePath := filePath + ".partial"
tmpFileInfo, err := os.Stat(tmpFilePath)
if err == nil {
support, err := uri.checkSeverSupportsRangeHeader()
if err != nil {
return fmt.Errorf("failed to check if uri server supports range header: %v", err)
}
if support {
startPos := tmpFileInfo.Size()
req.Header.Set("Range", fmt.Sprintf("bytes=%d-", startPos))
} else {
err := removePartialFile(tmpFilePath)
if err != nil {
return err
}
}
} else if !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("failed to check file %q existence: %v", filePath, err)
}
// Start the request
resp, err := http.DefaultClient.Do(req)
if err != nil { if err != nil {
return fmt.Errorf("failed to download file %q: %v", filePath, err) return fmt.Errorf("failed to download file %q: %v", filePath, err)
} }
@ -283,26 +330,20 @@ func (uri URI) DownloadFile(filePath, sha string, fileN, total int, downloadStat
return fmt.Errorf("failed to create parent directory for file %q: %v", filePath, err) return fmt.Errorf("failed to create parent directory for file %q: %v", filePath, err)
} }
// save partial download to dedicated file // Create and write file
tmpFilePath := filePath + ".partial" outFile, err := os.OpenFile(tmpFilePath, os.O_APPEND|os.O_RDWR|os.O_CREATE, 0644)
// remove tmp file
err = removePartialFile(tmpFilePath)
if err != nil { if err != nil {
return err return fmt.Errorf("failed to create / open file %q: %v", tmpFilePath, err)
}
// Create and write file content
outFile, err := os.Create(tmpFilePath)
if err != nil {
return fmt.Errorf("failed to create file %q: %v", tmpFilePath, err)
} }
defer outFile.Close() defer outFile.Close()
hash, err := calculateHashForPartialFile(outFile)
if err != nil {
return fmt.Errorf("failed to calculate hash for partial file")
}
progress := &progressWriter{ progress := &progressWriter{
fileName: tmpFilePath, fileName: tmpFilePath,
total: resp.ContentLength, total: resp.ContentLength,
hash: sha256.New(), hash: hash,
fileNo: fileN, fileNo: fileN,
totalFiles: total, totalFiles: total,
downloadStatus: downloadStatus, downloadStatus: downloadStatus,

View File

@ -1,6 +1,15 @@
package downloader_test package downloader_test
import ( import (
"crypto/rand"
"crypto/sha256"
"fmt"
"net/http"
"net/http/httptest"
"os"
"regexp"
"strconv"
. "github.com/mudler/LocalAI/pkg/downloader" . "github.com/mudler/LocalAI/pkg/downloader"
. "github.com/onsi/ginkgo/v2" . "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega" . "github.com/onsi/gomega"
@ -38,3 +47,139 @@ var _ = Describe("Gallery API tests", func() {
}) })
}) })
}) })
type RangeHeaderError struct {
msg string
}
func (e *RangeHeaderError) Error() string { return e.msg }
var _ = Describe("Download Test", func() {
var mockData []byte
var mockDataSha string
var filePath string
extractRangeHeader := func(rangeString string) (int, int, error) {
regex := regexp.MustCompile(`^bytes=(\d+)-(\d+|)$`)
matches := regex.FindStringSubmatch(rangeString)
rangeErr := RangeHeaderError{msg: "invalid / ill-formatted range"}
if matches == nil {
return -1, -1, &rangeErr
}
startPos, err := strconv.Atoi(matches[1])
if err != nil {
return -1, -1, err
}
endPos := -1
if matches[2] != "" {
endPos, err = strconv.Atoi(matches[2])
if err != nil {
return -1, -1, err
}
endPos += 1 // because range is inclusive in rangeString
}
return startPos, endPos, nil
}
getMockServer := func(supportsRangeHeader bool) *httptest.Server {
mockServer := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != "HEAD" && r.Method != "GET" {
w.WriteHeader(http.StatusNotFound)
return
}
if r.Method == "HEAD" {
if supportsRangeHeader {
w.Header().Add("Accept-Ranges", "bytes")
}
w.WriteHeader(http.StatusOK)
return
}
// GET method
startPos := 0
endPos := len(mockData)
var err error
var respData []byte
rangeString := r.Header.Get("Range")
if rangeString != "" {
startPos, endPos, err = extractRangeHeader(rangeString)
if err != nil {
if _, ok := err.(*RangeHeaderError); ok {
w.WriteHeader(http.StatusBadRequest)
return
}
Expect(err).ToNot(HaveOccurred())
}
if endPos == -1 {
endPos = len(mockData)
}
if startPos < 0 || startPos >= len(mockData) || endPos < 0 || endPos > len(mockData) || startPos > endPos {
w.WriteHeader(http.StatusBadRequest)
return
}
}
respData = mockData[startPos:endPos]
w.WriteHeader(http.StatusOK)
w.Write(respData)
}))
mockServer.EnableHTTP2 = true
mockServer.Start()
return mockServer
}
BeforeEach(func() {
mockData = make([]byte, 20000)
_, err := rand.Read(mockData)
Expect(err).ToNot(HaveOccurred())
_mockDataSha := sha256.New()
_, err = _mockDataSha.Write(mockData)
Expect(err).ToNot(HaveOccurred())
mockDataSha = fmt.Sprintf("%x", _mockDataSha.Sum(nil))
dir, err := os.Getwd()
filePath = dir + "/my_supercool_model"
Expect(err).NotTo(HaveOccurred())
})
Context("URI DownloadFile", func() {
It("fetches files from mock server", func() {
mockServer := getMockServer(true)
defer mockServer.Close()
uri := URI(mockServer.URL)
err := uri.DownloadFile(filePath, mockDataSha, 1, 1, func(s1, s2, s3 string, f float64) {})
Expect(err).ToNot(HaveOccurred())
})
It("resumes partially downloaded files", func() {
mockServer := getMockServer(true)
defer mockServer.Close()
uri := URI(mockServer.URL)
// Create a partial file
tmpFilePath := filePath + ".partial"
file, err := os.OpenFile(tmpFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
Expect(err).ToNot(HaveOccurred())
_, err = file.Write(mockData[0:10000])
Expect(err).ToNot(HaveOccurred())
err = uri.DownloadFile(filePath, mockDataSha, 1, 1, func(s1, s2, s3 string, f float64) {})
Expect(err).ToNot(HaveOccurred())
})
It("restarts download from 0 if server doesn't support Range header", func() {
mockServer := getMockServer(false)
defer mockServer.Close()
uri := URI(mockServer.URL)
// Create a partial file
tmpFilePath := filePath + ".partial"
file, err := os.OpenFile(tmpFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
Expect(err).ToNot(HaveOccurred())
_, err = file.Write(mockData[0:10000])
Expect(err).ToNot(HaveOccurred())
err = uri.DownloadFile(filePath, mockDataSha, 1, 1, func(s1, s2, s3 string, f float64) {})
Expect(err).ToNot(HaveOccurred())
})
})
AfterEach(func() {
os.Remove(filePath) // cleanup, also checks existance of filePath`
os.Remove(filePath + ".partial")
})
})