Skip to content

Commit 43dba4c

Browse files
committed
feat(nginx_log): enhance incremental indexing configuration and logic
1 parent ba5ea3d commit 43dba4c

File tree

7 files changed

+199
-101
lines changed

7 files changed

+199
-101
lines changed

app.example.ini

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,17 @@ ReloadCmd = nginx -s reload
6363
RestartCmd = start-stop-daemon --start --quiet --pidfile /var/run/nginx.pid --exec /usr/sbin/nginx
6464

6565
[nginx_log]
66-
AdvancedIndexingEnabled = false
67-
IndexPath =
66+
; Enable or disable nginx access log indexing and analytics.
67+
; When disabled, the UI will still work but log search/analytics features are turned off
68+
; and CPU usage will be significantly lower.
69+
IndexingEnabled = false
70+
IndexPath =
71+
; Interval (in minutes) for incremental indexing job.
72+
; This controls how often nginx-ui scans access logs for new data and performs
73+
; incremental indexing. Lower values keep analytics closer to real-time but
74+
; increase background CPU usage. Higher values reduce CPU usage at the cost
75+
; of more stale analytics data.
76+
IncrementalIndexInterval = 15
6877

6978
[node]
7079
Name = Local
@@ -77,7 +86,7 @@ BaseUrl =
7786
Token =
7887
Proxy =
7988
Model = gpt-4o
80-
APIType =
89+
APIType =
8190
EnableCodeCompletion = false
8291
CodeCompletionModel = gpt-4o-mini
8392

internal/cron/incremental_indexing.go

Lines changed: 95 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"github.com/0xJacky/Nginx-UI/internal/nginx_log"
99
"github.com/0xJacky/Nginx-UI/internal/nginx_log/indexer"
1010
"github.com/0xJacky/Nginx-UI/model"
11+
"github.com/0xJacky/Nginx-UI/settings"
1112
"github.com/go-co-op/gocron/v2"
1213
"github.com/uozi-tech/cosy/logger"
1314
)
@@ -21,19 +22,23 @@ type logIndexProvider interface {
2122
func setupIncrementalIndexingJob(s gocron.Scheduler) (gocron.Job, error) {
2223
logger.Info("Setting up incremental log indexing job")
2324

24-
// Run every 5 minutes to check for log file changes
25+
// Determine interval from settings, falling back to a conservative default
26+
interval := settings.NginxLogSettings.GetIncrementalIndexInterval()
27+
28+
// Run periodically to check for log file changes using incremental indexing
2529
job, err := s.NewJob(
26-
gocron.DurationJob(5*time.Minute),
30+
gocron.DurationJob(interval),
2731
gocron.NewTask(performIncrementalIndexing),
2832
gocron.WithName("incremental_log_indexing"),
33+
gocron.WithSingletonMode(gocron.LimitModeWait), // Prevent overlapping executions
2934
gocron.WithStartAt(gocron.WithStartImmediately()),
3035
)
3136

3237
if err != nil {
3338
return nil, err
3439
}
3540

36-
logger.Info("Incremental log indexing job scheduled to run every 5 minutes")
41+
logger.Infof("Incremental log indexing job scheduled to run every %s", interval)
3742
return job, nil
3843
}
3944

@@ -73,20 +78,41 @@ func performIncrementalIndexing() {
7378
return log.Type == "access"
7479
})
7580

81+
// Process files sequentially to avoid overwhelming the system
82+
// This is more conservative but prevents concurrent file indexing from consuming too much CPU
7683
changedCount := 0
7784
for _, log := range allLogs {
7885
// Check if file needs incremental indexing
7986
if needsIncrementalIndexing(log, persistence) {
80-
if err := queueIncrementalIndexing(log.Path, modernIndexer, logFileManager); err != nil {
81-
logger.Errorf("Failed to queue incremental indexing for %s: %v", log.Path, err)
87+
logger.Infof("Starting incremental indexing for file: %s", log.Path)
88+
89+
// Set status to indexing
90+
if err := setFileIndexStatus(log.Path, string(indexer.IndexStatusIndexing), logFileManager); err != nil {
91+
logger.Errorf("Failed to set indexing status for %s: %v", log.Path, err)
92+
continue
93+
}
94+
95+
// Perform incremental indexing synchronously (one file at a time)
96+
if err := performSingleFileIncrementalIndexing(log.Path, modernIndexer, logFileManager); err != nil {
97+
logger.Errorf("Failed incremental indexing for %s: %v", log.Path, err)
98+
// Set error status
99+
if statusErr := setFileIndexStatus(log.Path, string(indexer.IndexStatusError), logFileManager); statusErr != nil {
100+
logger.Errorf("Failed to set error status for %s: %v", log.Path, statusErr)
101+
}
82102
} else {
83103
changedCount++
104+
// Set status to indexed
105+
if err := setFileIndexStatus(log.Path, string(indexer.IndexStatusIndexed), logFileManager); err != nil {
106+
logger.Errorf("Failed to set indexed status for %s: %v", log.Path, err)
107+
}
84108
}
85109
}
86110
}
87111

88112
if changedCount > 0 {
89-
logger.Infof("Queued %d log files for incremental indexing", changedCount)
113+
logger.Infof("Completed incremental indexing for %d log files", changedCount)
114+
// Update searcher shards once after all files are processed
115+
nginx_log.UpdateSearcherShards()
90116
} else {
91117
logger.Debug("No log files need incremental indexing")
92118
}
@@ -114,6 +140,23 @@ func needsIncrementalIndexing(log *nginx_log.NginxLogWithIndex, persistence logI
114140
fileModTime := fileInfo.ModTime()
115141
fileSize := fileInfo.Size()
116142

143+
// CRITICAL FIX: For large files (>100MB), add additional check to prevent excessive re-indexing
144+
// If the file was recently indexed (within last 30 minutes), skip it even if size increased slightly
145+
// This prevents the "infinite indexing" issue reported in #1455
146+
const largeFileThreshold = 100 * 1024 * 1024 // 100MB
147+
const recentIndexThreshold = 30 * time.Minute
148+
149+
if fileSize > largeFileThreshold && log.LastIndexed > 0 {
150+
lastIndexTime := time.Unix(log.LastIndexed, 0)
151+
timeSinceLastIndex := time.Since(lastIndexTime)
152+
153+
if timeSinceLastIndex < recentIndexThreshold {
154+
logger.Debugf("Skipping large file %s (%d bytes): recently indexed %v ago (threshold: %v)",
155+
log.Path, fileSize, timeSinceLastIndex, recentIndexThreshold)
156+
return false
157+
}
158+
}
159+
117160
if persistence != nil {
118161
if logIndex, err := persistence.GetLogIndex(log.Path); err == nil {
119162
if logIndex.NeedsIndexing(fileModTime, fileSize) {
@@ -157,96 +200,66 @@ func needsIncrementalIndexing(log *nginx_log.NginxLogWithIndex, persistence logI
157200
return false
158201
}
159202

160-
// queueIncrementalIndexing queues a file for incremental indexing
161-
func queueIncrementalIndexing(logPath string, modernIndexer interface{}, logFileManager interface{}) error {
162-
// Set the file status to queued
163-
if err := setFileIndexStatus(logPath, string(indexer.IndexStatusQueued), logFileManager); err != nil {
164-
return err
165-
}
166-
167-
// Queue the indexing job asynchronously
168-
go func() {
169-
defer func() {
170-
// Ensure status is always updated, even on panic
171-
if r := recover(); r != nil {
172-
logger.Errorf("Recovered from panic during incremental indexing for %s: %v", logPath, r)
173-
_ = setFileIndexStatus(logPath, string(indexer.IndexStatusError), logFileManager)
174-
}
175-
}()
176-
177-
logger.Infof("Starting incremental indexing for file: %s", logPath)
178-
179-
// Set status to indexing
180-
if err := setFileIndexStatus(logPath, string(indexer.IndexStatusIndexing), logFileManager); err != nil {
181-
logger.Errorf("Failed to set indexing status for %s: %v", logPath, err)
182-
return
183-
}
184-
185-
// Perform incremental indexing
186-
startTime := time.Now()
187-
docsCountMap, minTime, maxTime, err := modernIndexer.(*indexer.ParallelIndexer).IndexSingleFileIncrementally(logPath, nil)
188-
189-
if err != nil {
190-
logger.Errorf("Failed incremental indexing for %s: %v", logPath, err)
191-
// Set error status
192-
if statusErr := setFileIndexStatus(logPath, string(indexer.IndexStatusError), logFileManager); statusErr != nil {
193-
logger.Errorf("Failed to set error status for %s: %v", logPath, statusErr)
194-
}
195-
return
196-
}
197-
198-
// Calculate total documents indexed
199-
var totalDocsIndexed uint64
200-
for _, docCount := range docsCountMap {
201-
totalDocsIndexed += docCount
203+
// performSingleFileIncrementalIndexing performs incremental indexing for a single file synchronously
204+
func performSingleFileIncrementalIndexing(logPath string, modernIndexer interface{}, logFileManager interface{}) error {
205+
defer func() {
206+
// Ensure status is always updated, even on panic
207+
if r := recover(); r != nil {
208+
logger.Errorf("Recovered from panic during incremental indexing for %s: %v", logPath, r)
209+
_ = setFileIndexStatus(logPath, string(indexer.IndexStatusError), logFileManager)
202210
}
211+
}()
203212

204-
// Save indexing metadata
205-
duration := time.Since(startTime)
213+
// Perform incremental indexing
214+
startTime := time.Now()
215+
docsCountMap, minTime, maxTime, err := modernIndexer.(*indexer.ParallelIndexer).IndexSingleFileIncrementally(logPath, nil)
206216

207-
if lfm, ok := logFileManager.(*indexer.LogFileManager); ok {
208-
persistence := lfm.GetPersistence()
209-
var existingDocCount uint64
217+
if err != nil {
218+
return fmt.Errorf("indexing failed: %w", err)
219+
}
210220

211-
existingIndex, err := persistence.GetLogIndex(logPath)
212-
if err != nil {
213-
logger.Warnf("Could not get existing log index for %s: %v", logPath, err)
214-
}
221+
// Calculate total documents indexed
222+
var totalDocsIndexed uint64
223+
for _, docCount := range docsCountMap {
224+
totalDocsIndexed += docCount
225+
}
215226

216-
// Determine if the file was rotated by checking if the current size is smaller than the last recorded size.
217-
// This is a strong indicator of log rotation.
218-
fileInfo, statErr := os.Stat(logPath)
219-
isRotated := false
220-
if statErr == nil && existingIndex != nil && fileInfo.Size() < existingIndex.LastSize {
221-
isRotated = true
222-
logger.Infof("Log rotation detected for %s: new size %d is smaller than last size %d. Resetting document count.",
223-
logPath, fileInfo.Size(), existingIndex.LastSize)
224-
}
227+
// Save indexing metadata
228+
duration := time.Since(startTime)
225229

226-
if existingIndex != nil && !isRotated {
227-
// If it's a normal incremental update (not a rotation), we build upon the existing count.
228-
existingDocCount = existingIndex.DocumentCount
229-
}
230-
// If the file was rotated, existingDocCount remains 0, effectively starting the count over for the new file.
230+
if lfm, ok := logFileManager.(*indexer.LogFileManager); ok {
231+
persistence := lfm.GetPersistence()
232+
var existingDocCount uint64
231233

232-
finalDocCount := existingDocCount + totalDocsIndexed
234+
existingIndex, err := persistence.GetLogIndex(logPath)
235+
if err != nil {
236+
logger.Warnf("Could not get existing log index for %s: %v", logPath, err)
237+
}
233238

234-
if err := lfm.SaveIndexMetadata(logPath, finalDocCount, startTime, duration, minTime, maxTime); err != nil {
235-
logger.Errorf("Failed to save incremental index metadata for %s: %v", logPath, err)
236-
}
239+
// Determine if the file was rotated by checking if the current size is smaller than the last recorded size.
240+
// This is a strong indicator of log rotation.
241+
fileInfo, statErr := os.Stat(logPath)
242+
isRotated := false
243+
if statErr == nil && existingIndex != nil && fileInfo.Size() < existingIndex.LastSize {
244+
isRotated = true
245+
logger.Infof("Log rotation detected for %s: new size %d is smaller than last size %d. Resetting document count.",
246+
logPath, fileInfo.Size(), existingIndex.LastSize)
237247
}
238248

239-
// Set status to indexed
240-
if err := setFileIndexStatus(logPath, string(indexer.IndexStatusIndexed), logFileManager); err != nil {
241-
logger.Errorf("Failed to set indexed status for %s: %v", logPath, err)
249+
if existingIndex != nil && !isRotated {
250+
// If it's a normal incremental update (not a rotation), we build upon the existing count.
251+
existingDocCount = existingIndex.DocumentCount
242252
}
253+
// If the file was rotated, existingDocCount remains 0, effectively starting the count over for the new file.
243254

244-
// Update searcher shards
245-
nginx_log.UpdateSearcherShards()
255+
finalDocCount := existingDocCount + totalDocsIndexed
246256

247-
logger.Infof("Successfully completed incremental indexing for %s, Documents: %d", logPath, totalDocsIndexed)
248-
}()
257+
if err := lfm.SaveIndexMetadata(logPath, finalDocCount, startTime, duration, minTime, maxTime); err != nil {
258+
return fmt.Errorf("failed to save metadata: %w", err)
259+
}
260+
}
249261

262+
logger.Infof("Successfully completed incremental indexing for %s, Documents: %d", logPath, totalDocsIndexed)
250263
return nil
251264
}
252265

internal/nginx_log/indexer/adaptive_optimization.go

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,15 +108,31 @@ type BatchAdjustment struct {
108108
func NewAdaptiveOptimizer(config *Config) *AdaptiveOptimizer {
109109
ctx, cancel := context.WithCancel(context.Background())
110110

111+
// Derive worker range from the configured worker count. We deliberately
112+
// treat the configured WorkerCount as the *maximum* concurrency the user
113+
// (or defaults) allow, and let the optimizer scale down when CPU is
114+
// saturated, then back up again, but never beyond this cap.
115+
maxProcs := runtime.GOMAXPROCS(0)
116+
initialWorkers := config.WorkerCount
117+
if initialWorkers <= 0 {
118+
if maxProcs > 0 {
119+
initialWorkers = maxProcs
120+
} else {
121+
initialWorkers = 2
122+
}
123+
}
124+
minWorkers := max(2, initialWorkers/4)
125+
111126
ao := &AdaptiveOptimizer{
112127
config: config,
113128
cpuMonitor: &CPUMonitor{
114-
targetUtilization: 0.75, // Target 75% CPU utilization (more conservative)
115-
measurementInterval: 5 * time.Second,
116-
adjustmentThreshold: 0.10, // Adjust if 10% deviation from target (more sensitive)
117-
maxWorkers: runtime.GOMAXPROCS(0) * 6, // Allow scaling up to 6x CPU cores for I/O-bound workloads
118-
minWorkers: max(2, runtime.GOMAXPROCS(0)/4), // Minimum 2 workers or 1/4 of cores for baseline performance
119-
measurements: make([]float64, 0, 12), // 1 minute history at 5s intervals
129+
// Keep target utilization, but relax thresholds to reduce oscillation.
130+
targetUtilization: 0.75, // Target 75% CPU utilization
131+
measurementInterval: 5 * time.Second, // Sample every 5 seconds
132+
adjustmentThreshold: 0.10, // Adjust if 10% deviation from target
133+
maxWorkers: initialWorkers, // Never scale above configured WorkerCount
134+
minWorkers: minWorkers, // Minimum 2 workers or 1/4 of configured workers
135+
measurements: make([]float64, 0, 12), // 1 minute history at 5s intervals
120136
maxSamples: 12,
121137
},
122138
batchSizeController: &BatchSizeController{

internal/nginx_log/indexer/adaptive_optimization_test.go

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,20 @@ func TestAdaptiveOptimizer_SetWorkerCountChangeCallback(t *testing.T) {
8383
}
8484

8585
func TestAdaptiveOptimizer_suggestWorkerIncrease(t *testing.T) {
86-
ao := createTestAdaptiveOptimizer(4)
86+
// Use higher initial worker count so there's room to increase
87+
// Since maxWorkers is now set to config.WorkerCount, we need to start with a config
88+
// where WorkerCount is higher than the current workers to test the increase logic
89+
config := &Config{
90+
WorkerCount: 16, // Set max to 16 so we can increase from current
91+
BatchSize: 1000,
92+
}
93+
94+
ao := NewAdaptiveOptimizer(config)
95+
ao.SetActivityPoller(mockActivityPoller{busy: true})
96+
97+
// Manually set current workers to a lower value to allow increase
98+
atomic.StoreInt64(&ao.workerCount, 8)
99+
ao.config.WorkerCount = 16 // Keep max at 16
87100

88101
var actualOldCount, actualNewCount int
89102
var callbackCalled bool
@@ -104,13 +117,17 @@ func TestAdaptiveOptimizer_suggestWorkerIncrease(t *testing.T) {
104117
t.Error("Expected worker count change callback to be called")
105118
}
106119

107-
if actualOldCount != 4 {
108-
t.Errorf("Expected old worker count 4, got %d", actualOldCount)
120+
if actualOldCount != 8 {
121+
t.Errorf("Expected old worker count 8, got %d", actualOldCount)
122+
}
123+
124+
// Should increase workers, but not more than max allowed (16)
125+
if actualNewCount <= 8 {
126+
t.Errorf("Expected new worker count to be greater than 8, got %d", actualNewCount)
109127
}
110128

111-
// Should increase workers, but not more than max allowed
112-
if actualNewCount <= 4 {
113-
t.Errorf("Expected new worker count to be greater than 4, got %d", actualNewCount)
129+
if actualNewCount > 16 {
130+
t.Errorf("Expected new worker count to not exceed max 16, got %d", actualNewCount)
114131
}
115132

116133
// Verify config was updated

internal/nginx_log/indexer/parser.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"compress/gzip"
66
"context"
77
"io"
8+
"runtime"
89
"strings"
910
"sync"
1011

@@ -27,7 +28,22 @@ func InitLogParser() {
2728
config := parser.DefaultParserConfig()
2829
config.MaxLineLength = 16 * 1024 // 16KB for large log lines
2930
config.BatchSize = 15000 // Maximum batch size for highest frontend throughput
30-
config.WorkerCount = 24 // Match CPU core count for high-throughput
31+
32+
// Derive parser worker count from available CPUs, with sane limits so that
33+
// small machines are not overwhelmed while larger hosts can still use
34+
// parallel parsing effectively.
35+
maxProcs := runtime.GOMAXPROCS(0)
36+
if maxProcs <= 0 {
37+
maxProcs = runtime.NumCPU()
38+
}
39+
workerCount := maxProcs
40+
if workerCount < 4 {
41+
workerCount = 4
42+
}
43+
if workerCount > 16 {
44+
workerCount = 16
45+
}
46+
config.WorkerCount = workerCount
3147
// Note: Caching is handled by the CachedUserAgentParser
3248

3349
// Initialize user agent parser with caching (10,000 cache size for production)

0 commit comments

Comments
 (0)