mirror of
https://github.com/mudler/LocalAI.git
synced 2025-02-20 09:26:15 +00:00
feat(explorer): relax token deletion with error threshold (#3211)
feat(explorer): relax token deletion with error threashold Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
parent
0c0bc18c94
commit
8627bc2dd4
@ -10,9 +10,10 @@ import (
|
||||
)
|
||||
|
||||
type ExplorerCMD struct {
|
||||
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
|
||||
PoolDatabase string `env:"LOCALAI_POOL_DATABASE,POOL_DATABASE" default:"explorer.json" help:"Path to the pool database" group:"api"`
|
||||
ConnectionTimeout string `env:"LOCALAI_CONNECTION_TIMEOUT,CONNECTION_TIMEOUT" default:"2m" help:"Connection timeout for the explorer" group:"api"`
|
||||
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
|
||||
PoolDatabase string `env:"LOCALAI_POOL_DATABASE,POOL_DATABASE" default:"explorer.json" help:"Path to the pool database" group:"api"`
|
||||
ConnectionTimeout string `env:"LOCALAI_CONNECTION_TIMEOUT,CONNECTION_TIMEOUT" default:"2m" help:"Connection timeout for the explorer" group:"api"`
|
||||
ConnectionErrorThreshold int `env:"LOCALAI_CONNECTION_ERROR_THRESHOLD,CONNECTION_ERROR_THRESHOLD" default:"3" help:"Connection failure threshold for the explorer" group:"api"`
|
||||
}
|
||||
|
||||
func (e *ExplorerCMD) Run(ctx *cliContext.Context) error {
|
||||
@ -26,7 +27,7 @@ func (e *ExplorerCMD) Run(ctx *cliContext.Context) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ds := explorer.NewDiscoveryServer(db, dur)
|
||||
ds := explorer.NewDiscoveryServer(db, dur, e.ConnectionErrorThreshold)
|
||||
|
||||
go ds.Start(context.Background())
|
||||
appHTTP := http.Explorer(db, ds)
|
||||
|
@ -15,9 +15,11 @@ import (
|
||||
|
||||
type DiscoveryServer struct {
|
||||
sync.Mutex
|
||||
database *Database
|
||||
networkState *NetworkState
|
||||
connectionTime time.Duration
|
||||
database *Database
|
||||
networkState *NetworkState
|
||||
connectionTime time.Duration
|
||||
failures map[string]int
|
||||
errorThreshold int
|
||||
}
|
||||
|
||||
type NetworkState struct {
|
||||
@ -32,16 +34,20 @@ func (s *DiscoveryServer) NetworkState() *NetworkState {
|
||||
|
||||
// NewDiscoveryServer creates a new DiscoveryServer with the given Database.
|
||||
// it keeps the db state in sync with the network state
|
||||
func NewDiscoveryServer(db *Database, dur time.Duration) *DiscoveryServer {
|
||||
func NewDiscoveryServer(db *Database, dur time.Duration, failureThreshold int) *DiscoveryServer {
|
||||
if dur == 0 {
|
||||
dur = 50 * time.Second
|
||||
}
|
||||
if failureThreshold == 0 {
|
||||
failureThreshold = 3
|
||||
}
|
||||
return &DiscoveryServer{
|
||||
database: db,
|
||||
connectionTime: dur,
|
||||
networkState: &NetworkState{
|
||||
Networks: map[string]Network{},
|
||||
},
|
||||
errorThreshold: failureThreshold,
|
||||
}
|
||||
}
|
||||
|
||||
@ -66,21 +72,21 @@ func (s *DiscoveryServer) runBackground() {
|
||||
n, err := p2p.NewNode(token)
|
||||
if err != nil {
|
||||
log.Err(err).Msg("Failed to create node")
|
||||
s.database.Delete(token)
|
||||
s.failedToken(token)
|
||||
continue
|
||||
}
|
||||
|
||||
err = n.Start(c)
|
||||
if err != nil {
|
||||
log.Err(err).Msg("Failed to start node")
|
||||
s.database.Delete(token)
|
||||
s.failedToken(token)
|
||||
continue
|
||||
}
|
||||
|
||||
ledger, err := n.Ledger()
|
||||
if err != nil {
|
||||
log.Err(err).Msg("Failed to start ledger")
|
||||
s.database.Delete(token)
|
||||
s.failedToken(token)
|
||||
continue
|
||||
}
|
||||
|
||||
@ -114,8 +120,27 @@ func (s *DiscoveryServer) runBackground() {
|
||||
}
|
||||
s.Unlock()
|
||||
} else {
|
||||
log.Info().Any("network", token).Msg("No workers found in the network. Removing it from the database")
|
||||
s.database.Delete(token)
|
||||
s.failedToken(token)
|
||||
}
|
||||
}
|
||||
|
||||
s.deleteFailedConnections()
|
||||
}
|
||||
|
||||
func (s *DiscoveryServer) failedToken(token string) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
s.failures[token]++
|
||||
}
|
||||
|
||||
func (s *DiscoveryServer) deleteFailedConnections() {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
for k, v := range s.failures {
|
||||
if v > s.errorThreshold {
|
||||
log.Info().Any("network", k).Msg("Network has been removed from the database")
|
||||
s.database.Delete(k)
|
||||
delete(s.failures, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user