Skip to content

Commit

Permalink
interval job to cleanup old servers
Browse files Browse the repository at this point in the history
  • Loading branch information
maddalax committed Nov 29, 2024
1 parent fd2550b commit 036af51
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 0 deletions.
85 changes: 85 additions & 0 deletions app/resource_monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package app

import (
"dockman/app/logger"
"github.com/maddalax/htmgo/framework/h"
"github.com/maddalax/htmgo/framework/service"
"slices"
"time"
)

Expand Down Expand Up @@ -52,6 +54,8 @@ func (monitor *ResourceMonitor) Start() {
runner.Add(source, "ResourceServerCleanup", "Detaches servers that no longer exist from resources", time.Minute, monitor.ResourceServerCleanup)
runner.Add(source, "ServerConnectionMonitor", "Monitors if connected servers are still connected by checking for a heartbeat", time.Second*5, monitor.ServerConnectionMonitor)
runner.Add(source, "ResourceCheckForNewCommits", "Checks if a resource has a new commit and starts a new deployment if enabled", time.Second*30, monitor.ResourceCheckForNewCommits)
runner.Add(source, "ServerDuplicateCleanup", "Checks if there are any servers with the same remote ip and dedupes them", time.Second*30, monitor.CleanupDuplicateServers)

}

// RunStatusMonitorJob Monitors the run status of resources and updates the status if necessary
Expand Down Expand Up @@ -154,3 +158,84 @@ func (monitor *ResourceMonitor) ResourceCheckForNewCommits() {
}
}
}

// CleanupDuplicateServers Detaches and deletes servers that have the same remote ip, keeping the newest one
// this can happen if a server os is reinstalled and has a new id
func (monitor *ResourceMonitor) CleanupDuplicateServers() {
registry := GetServiceRegistry(monitor.locator)
list, err := ServerList(monitor.locator)
if err != nil {
logger.Error("Error getting server list", err)
return
}
serverMap := make(map[string][]*Server)
for _, server := range list {
ip := server.IpAddress()
if serverMap[ip] == nil {
serverMap[ip] = []*Server{}
}
serverMap[ip] = append(serverMap[ip], server)
}

for _, servers := range serverMap {
if len(servers) < 2 {
continue
}

// sort by last seen, newest first
slices.SortFunc(servers, func(a, b *Server) int {
return b.LastSeen.Compare(a.LastSeen)
})

// check the rest of the servers, skip the first one because it's the newest
inaccessibleServers := h.Filter(servers[1:], func(s *Server) bool {
return !s.IsAccessible()
})

// all servers accessible, this really shouldn't happen... but safety first
if len(inaccessibleServers) == 0 {
logger.WarnWithFields("All servers are accessible, not detaching any", map[string]interface{}{
"server_ids": h.Map(servers, func(s *Server) string { return s.Id }),
})
continue
}

// all servers are inaccessible, don't detach any
if len(inaccessibleServers) == len(servers) {
logger.DebugWithFields("All servers are inaccessible, not detaching any", map[string]interface{}{
"server_ids": h.Map(servers, func(s *Server) string { return s.Id }),
})
continue
}

// detach from all resources
for _, server := range inaccessibleServers {
logger.InfoWithFields("Detaching duplicate server", map[string]interface{}{
"server_id": server.Id,
})
resources, err := GetResourcesForServer(monitor.locator, server.Id)
if err != nil {
continue
}
for _, resource := range resources {
err := DetachServerFromResource(monitor.locator, server.Id, resource.Id)
if err != nil {
logger.Error("Error detaching duplicate server", err)
} else {
registry.GetEventHandler().OnServerDetached(server.Id, resource)
}
}
}

for _, server := range inaccessibleServers {
logger.InfoWithFields("Deleting duplicate server", map[string]interface{}{
"server_id": server.Id,
})
err := ServerDelete(monitor.locator, server.Id)
if err != nil {
logger.Error("Error deleting duplicate server", err)
}
}
}

}
14 changes: 14 additions & 0 deletions app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,17 @@ func ServerList(locator *service.Locator) ([]*Server, error) {

return servers, nil
}

func ServerDelete(locator *service.Locator, id string) error {
client := service.Get[KvClient](locator)

bucket, err := client.GetOrCreateBucket(&nats.KeyValueConfig{
Bucket: "servers",
})

if err != nil {
return err
}

return bucket.Delete(id)
}

0 comments on commit 036af51

Please sign in to comment.