elastic · ShourieG · Sep 21, 2024 · Sep 21, 2024 · Sep 24, 2024 · Sep 24, 2024
diff --git a/CHANGELOG-developer.next.asciidoc b/CHANGELOG-developer.next.asciidoc
@@ -205,6 +205,7 @@ The list below covers the major changes between 7.0.0-rc2 and main only.
 - Add a configuration option for TCP/UDP network type. {issue}40407[40407] {pull}40623[40623]
 - Added debug logging to parquet reader in x-pack/libbeat/reader. {pull}40651[40651]
 - Added filebeat debug histograms for s3 object size and events per processed s3 object. {pull}40775[40775]
+- Simplified GCS input state checkpoint calculation logic. {issue}40878[40878] {pull}40937[40937] 
 
 ==== Deprecated
 

@@ -7,6 +7,8 @@ package gcs
 import (
 	"context"
 	"fmt"
+	"slices"
+	"sort"
 	"strings"
 	"sync"
 
@@ -181,41 +183,18 @@ func (s *scheduler) fetchObjectPager(ctx context.Context, pageSize int) *iterato
 // moveToLastSeenJob, moves to the latest job position past the last seen job
 // Jobs are stored in lexicographical order always, hence the latest position can be found either on the basis of job name or timestamp
 func (s *scheduler) moveToLastSeenJob(jobs []*job) []*job {
-	var latestJobs []*job
-	jobsToReturn := make([]*job, 0)
-	counter := 0
-	flag := false
-	ignore := false
-
-	for _, job := range jobs {
-		switch {
-		case job.Timestamp().After(s.state.checkpoint().LatestEntryTime):
-			latestJobs = append(latestJobs, job)
-		case job.Name() == s.state.checkpoint().ObjectName:
-			flag = true
-		case job.Name() > s.state.checkpoint().ObjectName:
-			flag = true
-			counter--
-		case job.Name() <= s.state.checkpoint().ObjectName && (!ignore):
-			ignore = true
-		}
-		counter++
-	}
-
-	if flag && (counter < len(jobs)-1) {
-		jobsToReturn = jobs[counter+1:]
-	} else if !flag && !ignore {
-		jobsToReturn = jobs
-	}
+	cp := s.state.checkpoint()
+	jobs = slices.DeleteFunc(jobs, func(j *job) bool {
+		return !(j.Timestamp().After(cp.LatestEntryTime) || j.Name() > cp.ObjectName)
+	})
 
-	// in a senario where there are some jobs which have a later time stamp
+	// in a scenario where there are some jobs which have a greater timestamp
 	// but lesser lexicographic order and some jobs have greater lexicographic order
-	// than the current checkpoint object name, then we append the latest jobs
-	if len(jobsToReturn) != len(jobs) && len(latestJobs) > 0 {
-		jobsToReturn = append(latestJobs, jobsToReturn...)
-	}
-
-	return jobsToReturn
+	// than the current checkpoint blob name, we then sort around the pivot checkpoint timestamp
+	sort.Slice(jobs, func(i, _ int) bool {
+		return jobs[i].Timestamp().After(cp.LatestEntryTime)
+	})
+	return jobs
 }
 
 func (s *scheduler) addFailedJobs(ctx context.Context, jobs []*job) []*job {