From cfba665cf9c703cd4e1a804fdf9d239a09ee4d35 Mon Sep 17 00:00:00 2001
From: Daniel Jaglowski <jaglows3@gmail.com>
Date: Fri, 8 Sep 2023 02:17:48 -0600
Subject: [PATCH 1/4] [chore][pkg/stanza] Reduce severity of log when no files
 found (#26526)

Fixes #26525
---
 pkg/stanza/fileconsumer/file.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pkg/stanza/fileconsumer/file.go b/pkg/stanza/fileconsumer/file.go
index 8192962cface..d610184ca1b9 100644
--- a/pkg/stanza/fileconsumer/file.go
+++ b/pkg/stanza/fileconsumer/file.go
@@ -58,7 +58,7 @@ func (m *Manager) Start(persister operator.Persister) error {
 	}
 
 	if _, err := m.fileMatcher.MatchFiles(); err != nil {
-		m.Warnw("finding files", "error", err.Error())
+		m.Warnf("finding files: %v", err)
 	}
 
 	// Start polling goroutine
@@ -115,7 +115,7 @@ func (m *Manager) poll(ctx context.Context) {
 	// Get the list of paths on disk
 	matches, err := m.fileMatcher.MatchFiles()
 	if err != nil {
-		m.Errorf("error finding files: %s", err)
+		m.Warnf("finding files: %v", err)
 	}
 
 	for len(matches) > m.maxBatchFiles {

From 75e29cea585c4f1014c37ce5a08fa49cb26aecb9 Mon Sep 17 00:00:00 2001
From: Daniel Jaglowski <jaglows3@gmail.com>
Date: Fri, 8 Sep 2023 03:39:21 -0600
Subject: [PATCH 2/4] [pkg/stanza] Extract flush package from tokenize package
 (#26517)

---
 .chloggen/pkg-stanza-flush.yaml               |  30 +++
 pkg/stanza/fileconsumer/config.go             |   6 +-
 pkg/stanza/fileconsumer/file_test.go          |   2 +-
 .../fileconsumer/internal/splitter/custom.go  |  15 +-
 .../internal/splitter/custom_test.go          |  26 +--
 .../internal/splitter/multiline.go            |  11 +-
 .../internal/splitter/multiline_test.go       |  13 +-
 pkg/stanza/fileconsumer/reader_test.go        |  35 +---
 .../{tokenize/flusher.go => flush/flush.go}   |  24 +--
 pkg/stanza/flush/flush_test.go                | 102 ++++++++++
 pkg/stanza/tokenize/multiline_test.go         | 175 +-----------------
 pkg/stanza/tokenize/splitter.go               |   8 +-
 12 files changed, 191 insertions(+), 256 deletions(-)
 create mode 100755 .chloggen/pkg-stanza-flush.yaml
 rename pkg/stanza/{tokenize/flusher.go => flush/flush.go} (79%)
 create mode 100644 pkg/stanza/flush/flush_test.go

diff --git a/.chloggen/pkg-stanza-flush.yaml b/.chloggen/pkg-stanza-flush.yaml
new file mode 100755
index 000000000000..bc731e2f1c1a
--- /dev/null
+++ b/.chloggen/pkg-stanza-flush.yaml
@@ -0,0 +1,30 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: breaking
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: pkg/stanza
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Remove Flusher from tokenize.SplitterConfig
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [26517]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext: Removes the following in favor of flush.WithPeriod
+ - tokenize.DefaultFlushPeriod
+ - tokenize.FlusherConfig
+ - tokenize.NewFlusherConfig
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: [api]
diff --git a/pkg/stanza/fileconsumer/config.go b/pkg/stanza/fileconsumer/config.go
index f015d3d8a4fa..25b39075f030 100644
--- a/pkg/stanza/fileconsumer/config.go
+++ b/pkg/stanza/fileconsumer/config.go
@@ -28,6 +28,7 @@ const (
 	defaultMaxLogSize         = 1024 * 1024
 	defaultMaxConcurrentFiles = 1024
 	defaultEncoding           = "utf-8"
+	defaultFlushPeriod        = 500 * time.Millisecond
 )
 
 var allowFileDeletion = featuregate.GlobalRegistry().MustRegister(
@@ -79,6 +80,7 @@ type Config struct {
 	Splitter                tokenize.SplitterConfig `mapstructure:",squash,omitempty"`
 	TrimConfig              trim.Config             `mapstructure:",squash,omitempty"`
 	Encoding                string                  `mapstructure:"encoding,omitempty"`
+	FlushPeriod             time.Duration           `mapstructure:"force_flush_period,omitempty"`
 	Header                  *HeaderConfig           `mapstructure:"header,omitempty"`
 }
 
@@ -99,7 +101,7 @@ func (c Config) Build(logger *zap.SugaredLogger, emit emit.Callback) (*Manager,
 	}
 
 	// Ensure that splitter is buildable
-	factory := splitter.NewMultilineFactory(c.Splitter, enc, int(c.MaxLogSize), c.TrimConfig.Func())
+	factory := splitter.NewMultilineFactory(c.Splitter, enc, int(c.MaxLogSize), c.TrimConfig.Func(), c.FlushPeriod)
 	if _, err := factory.Build(); err != nil {
 		return nil, err
 	}
@@ -118,7 +120,7 @@ func (c Config) BuildWithSplitFunc(logger *zap.SugaredLogger, emit emit.Callback
 	}
 
 	// Ensure that splitter is buildable
-	factory := splitter.NewCustomFactory(c.Splitter.Flusher, splitFunc)
+	factory := splitter.NewCustomFactory(splitFunc, c.FlushPeriod)
 	if _, err := factory.Build(); err != nil {
 		return nil, err
 	}
diff --git a/pkg/stanza/fileconsumer/file_test.go b/pkg/stanza/fileconsumer/file_test.go
index bcd91d530f6c..5d34cd04d799 100644
--- a/pkg/stanza/fileconsumer/file_test.go
+++ b/pkg/stanza/fileconsumer/file_test.go
@@ -548,7 +548,7 @@ func TestNoNewline(t *testing.T) {
 	cfg := NewConfig().includeDir(tempDir)
 	cfg.StartAt = "beginning"
 	cfg.Splitter = tokenize.NewSplitterConfig()
-	cfg.Splitter.Flusher.Period = time.Nanosecond
+	cfg.FlushPeriod = time.Nanosecond
 	operator, emitCalls := buildTestManager(t, cfg)
 
 	temp := openTemp(t, tempDir)
diff --git a/pkg/stanza/fileconsumer/internal/splitter/custom.go b/pkg/stanza/fileconsumer/internal/splitter/custom.go
index 712c63eb1427..04bdf6cdc650 100644
--- a/pkg/stanza/fileconsumer/internal/splitter/custom.go
+++ b/pkg/stanza/fileconsumer/internal/splitter/custom.go
@@ -5,26 +5,27 @@ package splitter // import "github.com/open-telemetry/opentelemetry-collector-co
 
 import (
 	"bufio"
+	"time"
 
-	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize"
+	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/flush"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
 )
 
 type customFactory struct {
-	flusherCfg tokenize.FlusherConfig
-	splitFunc  bufio.SplitFunc
+	splitFunc   bufio.SplitFunc
+	flushPeriod time.Duration
 }
 
 var _ Factory = (*customFactory)(nil)
 
-func NewCustomFactory(flusherCfg tokenize.FlusherConfig, splitFunc bufio.SplitFunc) Factory {
+func NewCustomFactory(splitFunc bufio.SplitFunc, flushPeriod time.Duration) Factory {
 	return &customFactory{
-		flusherCfg: flusherCfg,
-		splitFunc:  splitFunc,
+		splitFunc:   splitFunc,
+		flushPeriod: flushPeriod,
 	}
 }
 
 // Build builds Multiline Splitter struct
 func (f *customFactory) Build() (bufio.SplitFunc, error) {
-	return f.flusherCfg.Wrap(f.splitFunc, trim.Nop), nil
+	return flush.WithPeriod(f.splitFunc, trim.Nop, f.flushPeriod), nil
 }
diff --git a/pkg/stanza/fileconsumer/internal/splitter/custom_test.go b/pkg/stanza/fileconsumer/internal/splitter/custom_test.go
index 6ccdeb22f220..54002d18fb0d 100644
--- a/pkg/stanza/fileconsumer/internal/splitter/custom_test.go
+++ b/pkg/stanza/fileconsumer/internal/splitter/custom_test.go
@@ -6,36 +6,30 @@ package splitter
 import (
 	"bufio"
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/assert"
-
-	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize"
 )
 
 func TestCustomFactory(t *testing.T) {
-	type fields struct {
-		Flusher  tokenize.FlusherConfig
-		Splitter bufio.SplitFunc
-	}
 	tests := []struct {
-		name    string
-		fields  fields
-		wantErr bool
+		name        string
+		splitter    bufio.SplitFunc
+		flushPeriod time.Duration
+		wantErr     bool
 	}{
 		{
 			name: "default configuration",
-			fields: fields{
-				Flusher: tokenize.NewFlusherConfig(),
-				Splitter: func(data []byte, atEOF bool) (advance int, token []byte, err error) {
-					return len(data), data, nil
-				},
+			splitter: func(data []byte, atEOF bool) (advance int, token []byte, err error) {
+				return len(data), data, nil
 			},
-			wantErr: false,
+			flushPeriod: 100 * time.Millisecond,
+			wantErr:     false,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			factory := NewCustomFactory(tt.fields.Flusher, tt.fields.Splitter)
+			factory := NewCustomFactory(tt.splitter, tt.flushPeriod)
 			got, err := factory.Build()
 			if (err != nil) != tt.wantErr {
 				t.Errorf("Build() error = %v, wantErr %v", err, tt.wantErr)
diff --git a/pkg/stanza/fileconsumer/internal/splitter/multiline.go b/pkg/stanza/fileconsumer/internal/splitter/multiline.go
index a8b882cd3c00..258883e5e6f1 100644
--- a/pkg/stanza/fileconsumer/internal/splitter/multiline.go
+++ b/pkg/stanza/fileconsumer/internal/splitter/multiline.go
@@ -5,9 +5,11 @@ package splitter // import "github.com/open-telemetry/opentelemetry-collector-co
 
 import (
 	"bufio"
+	"time"
 
 	"golang.org/x/text/encoding"
 
+	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/flush"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
 )
@@ -17,6 +19,7 @@ type multilineFactory struct {
 	encoding    encoding.Encoding
 	maxLogSize  int
 	trimFunc    trim.Func
+	flushPeriod time.Duration
 }
 
 var _ Factory = (*multilineFactory)(nil)
@@ -26,16 +29,22 @@ func NewMultilineFactory(
 	encoding encoding.Encoding,
 	maxLogSize int,
 	trimFunc trim.Func,
+	flushPeriod time.Duration,
 ) Factory {
 	return &multilineFactory{
 		splitterCfg: splitterCfg,
 		encoding:    encoding,
 		maxLogSize:  maxLogSize,
 		trimFunc:    trimFunc,
+		flushPeriod: flushPeriod,
 	}
 }
 
 // Build builds Multiline Splitter struct
 func (f *multilineFactory) Build() (bufio.SplitFunc, error) {
-	return f.splitterCfg.Build(f.encoding, false, f.maxLogSize, f.trimFunc)
+	splitFunc, err := f.splitterCfg.Build(f.encoding, false, f.maxLogSize, f.trimFunc)
+	if err != nil {
+		return nil, err
+	}
+	return flush.WithPeriod(splitFunc, f.trimFunc, f.flushPeriod), nil
 }
diff --git a/pkg/stanza/fileconsumer/internal/splitter/multiline_test.go b/pkg/stanza/fileconsumer/internal/splitter/multiline_test.go
index cd8559245ede..9fac28465e9b 100644
--- a/pkg/stanza/fileconsumer/internal/splitter/multiline_test.go
+++ b/pkg/stanza/fileconsumer/internal/splitter/multiline_test.go
@@ -5,6 +5,7 @@ package splitter
 
 import (
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/assert"
 	"golang.org/x/text/encoding"
@@ -20,6 +21,7 @@ func TestMultilineBuild(t *testing.T) {
 		splitterConfig tokenize.SplitterConfig
 		encoding       encoding.Encoding
 		maxLogSize     int
+		flushPeriod    time.Duration
 		wantErr        bool
 	}{
 		{
@@ -27,25 +29,26 @@ func TestMultilineBuild(t *testing.T) {
 			splitterConfig: tokenize.NewSplitterConfig(),
 			encoding:       unicode.UTF8,
 			maxLogSize:     1024,
+			flushPeriod:    100 * time.Millisecond,
 			wantErr:        false,
 		},
 		{
 			name: "Multiline  error",
 			splitterConfig: tokenize.SplitterConfig{
-				Flusher: tokenize.NewFlusherConfig(),
 				Multiline: tokenize.MultilineConfig{
 					LineStartPattern: "START",
 					LineEndPattern:   "END",
 				},
 			},
-			encoding:   unicode.UTF8,
-			maxLogSize: 1024,
-			wantErr:    true,
+			flushPeriod: 100 * time.Millisecond,
+			encoding:    unicode.UTF8,
+			maxLogSize:  1024,
+			wantErr:     true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			factory := NewMultilineFactory(tt.splitterConfig, tt.encoding, tt.maxLogSize, trim.Nop)
+			factory := NewMultilineFactory(tt.splitterConfig, tt.encoding, tt.maxLogSize, trim.Nop, tt.flushPeriod)
 			got, err := factory.Build()
 			if (err != nil) != tt.wantErr {
 				t.Errorf("Build() error = %v, wantErr %v", err, tt.wantErr)
diff --git a/pkg/stanza/fileconsumer/reader_test.go b/pkg/stanza/fileconsumer/reader_test.go
index 1d49c4075767..ec444834fc97 100644
--- a/pkg/stanza/fileconsumer/reader_test.go
+++ b/pkg/stanza/fileconsumer/reader_test.go
@@ -10,7 +10,6 @@ import (
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
-	"golang.org/x/text/encoding/unicode"
 
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/decode"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/internal/fingerprint"
@@ -25,9 +24,7 @@ import (
 
 func TestPersistFlusher(t *testing.T) {
 	flushPeriod := 100 * time.Millisecond
-	sCfg := tokenize.NewSplitterConfig()
-	sCfg.Flusher.Period = flushPeriod
-	f, emitChan := testReaderFactoryWithSplitter(t, sCfg)
+	f, emitChan := testReaderFactory(t, tokenize.NewSplitterConfig(), defaultMaxLogSize, flushPeriod)
 
 	temp := openTemp(t, t.TempDir())
 	fp, err := f.newFingerprint(temp)
@@ -113,7 +110,7 @@ func TestTokenization(t *testing.T) {
 
 	for _, tc := range testCases {
 		t.Run(tc.testName, func(t *testing.T) {
-			f, emitChan := testReaderFactory(t)
+			f, emitChan := testReaderFactory(t, tokenize.NewSplitterConfig(), defaultMaxLogSize, defaultFlushPeriod)
 
 			temp := openTemp(t, t.TempDir())
 			_, err := temp.Write(tc.fileContent)
@@ -143,8 +140,7 @@ func TestTokenizationTooLong(t *testing.T) {
 		[]byte("aaa"),
 	}
 
-	f, emitChan := testReaderFactory(t)
-	f.readerConfig.maxLogSize = 10
+	f, emitChan := testReaderFactory(t, tokenize.NewSplitterConfig(), 10, defaultFlushPeriod)
 
 	temp := openTemp(t, t.TempDir())
 	_, err := temp.Write(fileContent)
@@ -174,15 +170,9 @@ func TestTokenizationTooLongWithLineStartPattern(t *testing.T) {
 		[]byte("2023-01-01 2"),
 	}
 
-	f, emitChan := testReaderFactory(t)
-
-	mlc := tokenize.NewMultilineConfig()
-	mlc.LineStartPattern = `\d+-\d+-\d+`
-	f.splitterFactory = splitter.NewMultilineFactory(tokenize.SplitterConfig{
-		Flusher:   tokenize.NewFlusherConfig(),
-		Multiline: mlc,
-	}, unicode.UTF8, 15, trim.Whitespace)
-	f.readerConfig.maxLogSize = 15
+	sCfg := tokenize.NewSplitterConfig()
+	sCfg.Multiline.LineStartPattern = `\d+-\d+-\d+`
+	f, emitChan := testReaderFactory(t, sCfg, 15, defaultFlushPeriod)
 
 	temp := openTemp(t, t.TempDir())
 	_, err := temp.Write(fileContent)
@@ -205,8 +195,7 @@ func TestTokenizationTooLongWithLineStartPattern(t *testing.T) {
 func TestHeaderFingerprintIncluded(t *testing.T) {
 	fileContent := []byte("#header-line\naaa\n")
 
-	f, _ := testReaderFactory(t)
-	f.readerConfig.maxLogSize = 10
+	f, _ := testReaderFactory(t, tokenize.NewSplitterConfig(), 10, defaultFlushPeriod)
 
 	regexConf := regex.NewConfig()
 	regexConf.Regex = "^#(?P<header>.*)"
@@ -234,11 +223,7 @@ func TestHeaderFingerprintIncluded(t *testing.T) {
 	require.Equal(t, []byte("#header-line\naaa\n"), r.Fingerprint.FirstBytes)
 }
 
-func testReaderFactory(t *testing.T) (*readerFactory, chan *emitParams) {
-	return testReaderFactoryWithSplitter(t, tokenize.NewSplitterConfig())
-}
-
-func testReaderFactoryWithSplitter(t *testing.T, splitterConfig tokenize.SplitterConfig) (*readerFactory, chan *emitParams) {
+func testReaderFactory(t *testing.T, sCfg tokenize.SplitterConfig, maxLogSize int, flushPeriod time.Duration) (*readerFactory, chan *emitParams) {
 	emitChan := make(chan *emitParams, 100)
 	enc, err := decode.LookupEncoding(defaultEncoding)
 	trimFunc := trim.Whitespace
@@ -247,11 +232,11 @@ func testReaderFactoryWithSplitter(t *testing.T, splitterConfig tokenize.Splitte
 		SugaredLogger: testutil.Logger(t),
 		readerConfig: &readerConfig{
 			fingerprintSize: fingerprint.DefaultSize,
-			maxLogSize:      defaultMaxLogSize,
+			maxLogSize:      maxLogSize,
 			emit:            testEmitFunc(emitChan),
 		},
 		fromBeginning:   true,
-		splitterFactory: splitter.NewMultilineFactory(splitterConfig, enc, defaultMaxLogSize, trimFunc),
+		splitterFactory: splitter.NewMultilineFactory(sCfg, enc, maxLogSize, trimFunc, flushPeriod),
 		encoding:        enc,
 	}, emitChan
 }
diff --git a/pkg/stanza/tokenize/flusher.go b/pkg/stanza/flush/flush.go
similarity index 79%
rename from pkg/stanza/tokenize/flusher.go
rename to pkg/stanza/flush/flush.go
index 60a44d4f62a8..f42e18c82370 100644
--- a/pkg/stanza/tokenize/flusher.go
+++ b/pkg/stanza/flush/flush.go
@@ -1,7 +1,7 @@
 // Copyright The OpenTelemetry Authors
 // SPDX-License-Identifier: Apache-2.0
 
-package tokenize // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize"
+package flush // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/flush"
 
 import (
 	"bufio"
@@ -10,26 +10,14 @@ import (
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
 )
 
-const DefaultFlushPeriod = 500 * time.Millisecond
-
-// FlusherConfig is a configuration of Flusher helper
-type FlusherConfig struct {
-	Period time.Duration `mapstructure:"force_flush_period"`
-}
-
-// NewFlusherConfig creates a default Flusher config
-func NewFlusherConfig() FlusherConfig {
-	return FlusherConfig{
-		// Empty or `0s` means that we will never force flush
-		Period: DefaultFlushPeriod,
-	}
-}
-
 // Wrap a bufio.SplitFunc with a flusher
-func (c *FlusherConfig) Wrap(splitFunc bufio.SplitFunc, trimFunc trim.Func) bufio.SplitFunc {
+func WithPeriod(splitFunc bufio.SplitFunc, trimFunc trim.Func, period time.Duration) bufio.SplitFunc {
+	if period <= 0 {
+		return splitFunc
+	}
 	f := &flusher{
 		lastDataChange:     time.Now(),
-		forcePeriod:        c.Period,
+		forcePeriod:        period,
 		previousDataLength: 0,
 	}
 	return f.splitFunc(splitFunc, trimFunc)
diff --git a/pkg/stanza/flush/flush_test.go b/pkg/stanza/flush/flush_test.go
new file mode 100644
index 000000000000..25d3aec0212b
--- /dev/null
+++ b/pkg/stanza/flush/flush_test.go
@@ -0,0 +1,102 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package flush
+
+import (
+	"bufio"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
+)
+
+func TestFlusher(t *testing.T) {
+
+	// bufio.ScanWords is a simple split function which with tokenize based on newlines.
+	// It will return a partial token if atEOF=true. In order to test the flusher,
+	// we don't want the split func to return partial tokens on its own. Instead, we only
+	// want the flusher to force the partial out based on its own behavior. Therefore, we
+	// always use atEOF=false.
+
+	flushPeriod := 100 * time.Millisecond
+	f := WithPeriod(bufio.ScanWords, trim.Nop, flushPeriod)
+
+	content := []byte("foo bar hellowo")
+
+	// The first token is complete
+	advance, token, err := f(content, false)
+	assert.NoError(t, err)
+	assert.Equal(t, 4, advance)
+	assert.Equal(t, []byte("foo"), token)
+
+	// The second token is also complete
+	advance, token, err = f(content[4:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 4, advance)
+	assert.Equal(t, []byte("bar"), token)
+
+	// We find a partial token, but we just updated, so don't flush it yet
+	advance, token, err = f(content[8:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 0, advance)
+	assert.Equal(t, []byte(nil), token)
+
+	// We find the same partial token, but we updated quite recently, so still don't flush it yet
+	advance, token, err = f(content[8:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 0, advance)
+	assert.Equal(t, []byte(nil), token)
+
+	time.Sleep(2 * flushPeriod)
+
+	// Now it's been a while, so we should just flush the partial token
+	advance, token, err = f(content[8:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 7, advance)
+	assert.Equal(t, []byte("hellowo"), token)
+}
+
+func TestNoFlushPeriod(t *testing.T) {
+	// Same test as above, but with a flush period of 0 we should never force flush.
+	// In other words, we should expect exactly the behavior of bufio.ScanWords.
+
+	flushPeriod := time.Duration(0)
+	f := WithPeriod(bufio.ScanWords, trim.Nop, flushPeriod)
+
+	content := []byte("foo bar hellowo")
+
+	// The first token is complete
+	advance, token, err := f(content, false)
+	assert.NoError(t, err)
+	assert.Equal(t, 4, advance)
+	assert.Equal(t, []byte("foo"), token)
+
+	// The second token is also complete
+	advance, token, err = f(content[4:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 4, advance)
+	assert.Equal(t, []byte("bar"), token)
+
+	// We find a partial token, but we're using flushPeriod = 0 so we should never flush
+	advance, token, err = f(content[8:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 0, advance)
+	assert.Equal(t, []byte(nil), token)
+
+	// We find the same partial token, but we're using flushPeriod = 0 so we should never flush
+	advance, token, err = f(content[8:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 0, advance)
+	assert.Equal(t, []byte(nil), token)
+
+	time.Sleep(2 * flushPeriod)
+
+	// Now it's been a while, but we are using flushPeriod=0, so we should never not flush
+	advance, token, err = f(content[8:], false)
+	assert.NoError(t, err)
+	assert.Equal(t, 0, advance)
+	assert.Equal(t, []byte(nil), token)
+}
diff --git a/pkg/stanza/tokenize/multiline_test.go b/pkg/stanza/tokenize/multiline_test.go
index a8c85db245e5..7c85799c54ef 100644
--- a/pkg/stanza/tokenize/multiline_test.go
+++ b/pkg/stanza/tokenize/multiline_test.go
@@ -10,7 +10,6 @@ import (
 	"fmt"
 	"regexp"
 	"testing"
-	"time"
 
 	"github.com/stretchr/testify/require"
 	"golang.org/x/text/encoding"
@@ -20,15 +19,8 @@ import (
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
 )
 
-const (
-	// Those values has been experimentally figured out for windows
-	sleepDuration time.Duration = time.Millisecond * 80
-	forcePeriod   time.Duration = time.Millisecond * 40
-)
-
 type MultiLineTokenizerTestCase struct {
 	tokenizetest.TestCase
-	Flusher *FlusherConfig
 }
 
 func TestLineStartSplitFunc(t *testing.T) {
@@ -42,7 +34,6 @@ func TestLineStartSplitFunc(t *testing.T) {
 					`LOGSTART 123 log1`,
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -54,7 +45,6 @@ func TestLineStartSplitFunc(t *testing.T) {
 					`LOGSTART 234 log2`,
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -66,7 +56,6 @@ func TestLineStartSplitFunc(t *testing.T) {
 					"LOGSTART 234 log2",
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -74,7 +63,6 @@ func TestLineStartSplitFunc(t *testing.T) {
 				Pattern: `LOGSTART \d+ `,
 				Input:   []byte(`file that has no matches in it`),
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -86,7 +74,6 @@ func TestLineStartSplitFunc(t *testing.T) {
 					`LOGSTART 123 part that matches`,
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -102,7 +89,6 @@ func TestLineStartSplitFunc(t *testing.T) {
 					`LOGSTART 123 ` + string(tokenizetest.GenerateBytes(100)),
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -118,7 +104,6 @@ func TestLineStartSplitFunc(t *testing.T) {
 					`LOGSTART 123 ` + string(tokenizetest.GenerateBytes(10000)),
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -132,7 +117,6 @@ func TestLineStartSplitFunc(t *testing.T) {
 				}(),
 				ExpectedError: errors.New("bufio.Scanner: token too long"),
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -144,7 +128,6 @@ func TestLineStartSplitFunc(t *testing.T) {
 					"LOGSTART 17 log2\nLOGPART log2\nanother line",
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -152,62 +135,6 @@ func TestLineStartSplitFunc(t *testing.T) {
 				Pattern: `^LOGSTART \d+`,
 				Input:   []byte("LOGPART log1\nLOGPART log1\t   \n"),
 			},
-			nil,
-		},
-		{
-			tokenizetest.TestCase{
-				Name:    "LogsWithFlusher",
-				Pattern: `^LOGSTART \d+`,
-				Input:   []byte("LOGPART log1\nLOGPART log1\t   \n"),
-				ExpectedTokens: []string{
-					"LOGPART log1\nLOGPART log1",
-				},
-
-				AdditionalIterations: 1,
-				Sleep:                sleepDuration,
-			},
-			&FlusherConfig{Period: forcePeriod},
-		},
-		{
-			tokenizetest.TestCase{
-				Name:    "LogsWithFlusherWithMultipleLogsInBuffer",
-				Pattern: `^LOGSTART \d+`,
-				Input:   []byte("LOGPART log1\nLOGSTART 123\nLOGPART log1\t   \n"),
-				ExpectedTokens: []string{
-					"LOGPART log1",
-					"LOGSTART 123\nLOGPART log1",
-				},
-				AdditionalIterations: 1,
-				Sleep:                sleepDuration,
-			},
-			&FlusherConfig{Period: forcePeriod},
-		},
-		{
-			tokenizetest.TestCase{
-				Name:    "LogsWithLongFlusherWithMultipleLogsInBuffer",
-				Pattern: `^LOGSTART \d+`,
-				Input:   []byte("LOGPART log1\nLOGSTART 123\nLOGPART log1\t   \n"),
-				ExpectedTokens: []string{
-					"LOGPART log1",
-				},
-				AdditionalIterations: 1,
-				Sleep:                forcePeriod / 4,
-			},
-			&FlusherConfig{Period: 16 * forcePeriod},
-		},
-		{
-			tokenizetest.TestCase{
-				Name:    "LogsWithFlusherWithLogStartingWithWhiteChars",
-				Pattern: `^LOGSTART \d+`,
-				Input:   []byte("\nLOGSTART 333"),
-				ExpectedTokens: []string{
-					"",
-					"LOGSTART 333",
-				},
-				AdditionalIterations: 1,
-				Sleep:                sleepDuration,
-			},
-			&FlusherConfig{Period: forcePeriod},
 		},
 	}
 
@@ -222,9 +149,6 @@ func TestLineStartSplitFunc(t *testing.T) {
 		}.Func()
 		splitFunc, err := cfg.getSplitFunc(unicode.UTF8, false, 0, trimFunc)
 		require.NoError(t, err)
-		if tc.Flusher != nil {
-			splitFunc = tc.Flusher.Wrap(splitFunc, trimFunc)
-		}
 		t.Run(tc.Name, tc.Run(splitFunc))
 	}
 
@@ -259,7 +183,6 @@ func TestLineEndSplitFunc(t *testing.T) {
 					`my log LOGEND 123`,
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -271,7 +194,6 @@ func TestLineEndSplitFunc(t *testing.T) {
 					`log2 LOGEND 234`,
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -283,7 +205,6 @@ func TestLineEndSplitFunc(t *testing.T) {
 					"log2 LOGEND",
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -291,7 +212,6 @@ func TestLineEndSplitFunc(t *testing.T) {
 				Pattern: `LOGEND \d+`,
 				Input:   []byte(`file that has no matches in it`),
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -302,7 +222,6 @@ func TestLineEndSplitFunc(t *testing.T) {
 					`part that matches LOGEND 123`,
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -317,7 +236,6 @@ func TestLineEndSplitFunc(t *testing.T) {
 					string(tokenizetest.GenerateBytes(100)) + `LOGEND 1`,
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -332,7 +250,6 @@ func TestLineEndSplitFunc(t *testing.T) {
 					string(tokenizetest.GenerateBytes(10000)) + `LOGEND 1`,
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -345,7 +262,6 @@ func TestLineEndSplitFunc(t *testing.T) {
 				}(),
 				ExpectedError: errors.New("bufio.Scanner: token too long"),
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -357,7 +273,6 @@ func TestLineEndSplitFunc(t *testing.T) {
 					"LOGSTART 17 log2\nLOGPART log2\nLOGEND log2",
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{
@@ -365,64 +280,6 @@ func TestLineEndSplitFunc(t *testing.T) {
 				Pattern: `^LOGEND.*$`,
 				Input:   []byte("LOGPART log1\nLOGPART log1\t   \n"),
 			},
-			nil,
-		},
-		{
-			tokenizetest.TestCase{
-				Name:    "LogsWithFlusher",
-				Pattern: `^LOGEND.*$`,
-				Input:   []byte("LOGPART log1\nLOGPART log1\t   \n"),
-				ExpectedTokens: []string{
-					"LOGPART log1\nLOGPART log1",
-				},
-
-				AdditionalIterations: 1,
-				Sleep:                sleepDuration,
-			},
-			&FlusherConfig{Period: forcePeriod},
-		},
-		{
-			tokenizetest.TestCase{
-				Name:    "LogsWithFlusherWithMultipleLogsInBuffer",
-				Pattern: `^LOGEND.*$`,
-				Input:   []byte("LOGPART log1\nLOGEND\nLOGPART log1\t   \n"),
-				ExpectedTokens: []string{
-					"LOGPART log1\nLOGEND",
-					"LOGPART log1",
-				},
-
-				AdditionalIterations: 1,
-				Sleep:                sleepDuration,
-			},
-			&FlusherConfig{Period: forcePeriod},
-		},
-		{
-			tokenizetest.TestCase{
-				Name:    "LogsWithLongFlusherWithMultipleLogsInBuffer",
-				Pattern: `^LOGEND.*$`,
-				Input:   []byte("LOGPART log1\nLOGEND\nLOGPART log1\t   \n"),
-				ExpectedTokens: []string{
-					"LOGPART log1\nLOGEND",
-				},
-
-				AdditionalIterations: 1,
-				Sleep:                forcePeriod / 4,
-			},
-			&FlusherConfig{Period: 16 * forcePeriod},
-		},
-		{
-			tokenizetest.TestCase{
-				Name:    "LogsWithFlusherWithLogStartingWithWhiteChars",
-				Pattern: `LOGEND \d+$`,
-				Input:   []byte("\nLOGEND 333"),
-				ExpectedTokens: []string{
-					"LOGEND 333",
-				},
-
-				AdditionalIterations: 1,
-				Sleep:                sleepDuration,
-			},
-			&FlusherConfig{Period: forcePeriod},
 		},
 	}
 
@@ -437,9 +294,6 @@ func TestLineEndSplitFunc(t *testing.T) {
 		}.Func()
 		splitFunc, err := cfg.getSplitFunc(unicode.UTF8, false, 0, trimFunc)
 		require.NoError(t, err)
-		if tc.Flusher != nil {
-			splitFunc = tc.Flusher.Wrap(splitFunc, trimFunc)
-		}
 		t.Run(tc.Name, tc.Run(splitFunc))
 	}
 }
@@ -452,7 +306,7 @@ func TestNewlineSplitFunc(t *testing.T) {
 				ExpectedTokens: []string{
 					`my log`,
 				},
-			}, nil,
+			},
 		},
 		{
 			tokenizetest.TestCase{Name: "OneLogCarriageReturn",
@@ -461,7 +315,6 @@ func TestNewlineSplitFunc(t *testing.T) {
 					`my log`,
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{Name: "TwoLogsSimple",
@@ -471,7 +324,6 @@ func TestNewlineSplitFunc(t *testing.T) {
 					`log2`,
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{Name: "TwoLogsCarriageReturn",
@@ -481,13 +333,11 @@ func TestNewlineSplitFunc(t *testing.T) {
 					`log2`,
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{Name: "NoTailingNewline",
 				Input: []byte(`foo`),
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{Name: "HugeLog100",
@@ -500,7 +350,6 @@ func TestNewlineSplitFunc(t *testing.T) {
 					string(tokenizetest.GenerateBytes(100)),
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{Name: "HugeLog10000",
@@ -513,7 +362,6 @@ func TestNewlineSplitFunc(t *testing.T) {
 					string(tokenizetest.GenerateBytes(10000)),
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{Name: "HugeLog1000000",
@@ -524,24 +372,11 @@ func TestNewlineSplitFunc(t *testing.T) {
 				}(),
 				ExpectedError: errors.New("bufio.Scanner: token too long"),
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{Name: "LogsWithoutFlusher",
 				Input: []byte("LOGPART log1"),
 			},
-			nil,
-		},
-		{
-			tokenizetest.TestCase{Name: "LogsWithFlusher",
-				Input: []byte("LOGPART log1"),
-				ExpectedTokens: []string{
-					"LOGPART log1",
-				},
-				AdditionalIterations: 1,
-				Sleep:                sleepDuration,
-			},
-			&FlusherConfig{Period: forcePeriod},
 		},
 		{
 			tokenizetest.TestCase{Name: "DefaultFlusherSplits",
@@ -551,7 +386,6 @@ func TestNewlineSplitFunc(t *testing.T) {
 					"log2",
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{Name: "LogsWithLogStartingWithWhiteChars",
@@ -561,7 +395,6 @@ func TestNewlineSplitFunc(t *testing.T) {
 					"LOGEND 333",
 				},
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{Name: "PreserveLeadingWhitespaces",
@@ -572,7 +405,6 @@ func TestNewlineSplitFunc(t *testing.T) {
 				},
 				PreserveLeadingWhitespaces: true,
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{Name: "PreserveTrailingWhitespaces",
@@ -583,7 +415,6 @@ func TestNewlineSplitFunc(t *testing.T) {
 				},
 				PreserveTrailingWhitespaces: true,
 			},
-			nil,
 		},
 		{
 			tokenizetest.TestCase{Name: "PreserveBothLeadingAndTrailingWhitespaces",
@@ -595,7 +426,6 @@ func TestNewlineSplitFunc(t *testing.T) {
 				PreserveLeadingWhitespaces:  true,
 				PreserveTrailingWhitespaces: true,
 			},
-			nil,
 		},
 	}
 
@@ -606,9 +436,6 @@ func TestNewlineSplitFunc(t *testing.T) {
 		}.Func()
 		splitFunc, err := NewlineSplitFunc(unicode.UTF8, false, trimFunc)
 		require.NoError(t, err)
-		if tc.Flusher != nil {
-			splitFunc = tc.Flusher.Wrap(splitFunc, trimFunc)
-		}
 		t.Run(tc.Name, tc.Run(splitFunc))
 	}
 }
diff --git a/pkg/stanza/tokenize/splitter.go b/pkg/stanza/tokenize/splitter.go
index 3cae0b137ded..c2d8a7444344 100644
--- a/pkg/stanza/tokenize/splitter.go
+++ b/pkg/stanza/tokenize/splitter.go
@@ -13,7 +13,6 @@ import (
 
 // SplitterConfig consolidates MultilineConfig and FlusherConfig
 type SplitterConfig struct {
-	Flusher   FlusherConfig   `mapstructure:",squash,omitempty"`
 	Multiline MultilineConfig `mapstructure:"multiline,omitempty"`
 }
 
@@ -21,15 +20,10 @@ type SplitterConfig struct {
 func NewSplitterConfig() SplitterConfig {
 	return SplitterConfig{
 		Multiline: NewMultilineConfig(),
-		Flusher:   FlusherConfig{Period: DefaultFlushPeriod},
 	}
 }
 
 // Build builds bufio.SplitFunc based on the config
 func (c *SplitterConfig) Build(enc encoding.Encoding, flushAtEOF bool, maxLogSize int, trimFunc trim.Func) (bufio.SplitFunc, error) {
-	splitFunc, err := c.Multiline.Build(enc, flushAtEOF, maxLogSize, trimFunc)
-	if err != nil {
-		return nil, err
-	}
-	return c.Flusher.Wrap(splitFunc, trimFunc), nil
+	return c.Multiline.Build(enc, flushAtEOF, maxLogSize, trimFunc)
 }

From df03442b6ecda48054b97caff87601a8ac4e0206 Mon Sep 17 00:00:00 2001
From: Ilias Katsakioris <elikatsis@gmail.com>
Date: Fri, 8 Sep 2023 15:22:37 +0300
Subject: [PATCH 3/4] [extension/oauth2clientauth] Enable dynamically reading
 ClientID and ClientSecret from files (#26310)

**Description:**
This PR implements the feature described in detail in the issue linked
below.

In a nutshell, it extends the `oauth2clientauth` extension to read
ClientID and/or ClientSecret from files whenever a new token is needed
for the OAuth flow.
As a result, the extension can use updated credentials (when the old
ones expire for example) without the need to restart the OTEL collector,
as long as the file contents are in sync.

**Link to tracking Issue:** #26117

**Testing:**
Apart from the unit testing you can see in the PR, I've tested this
feature in two real-life environments:

1. As a systemd service exporting `otlphttp` data
2. A Kubernetes microservice (deployed by an OpenTelemetryCollector CR)
exporting `otlphttp` data

In both cases, the collectors export the data to a service which sits
behind an OIDC authentication proxy.
Using the `oauth2clientauth` extension, the `otlphttp` exporter hits the
authentication provider to issue tokens for the OIDC client and
successfully authenticates to the service.

In my cases, the ClientSecret gets rotated quite frequently and there is
a stack making sure the ClientID and ClientSecret in the corresponding
files are up-to-date.

**Documentation:**
I have extended the extension's README file. I'm open to more
suggestions!

cc @jpkrohling @pavankrish123
---
 ...ature-oauth2clientauth-read-from-file.yaml |  29 +++++
 extension/oauth2clientauthextension/README.md |   6 ++
 .../clientcredentialsconfig.go                | 102 ++++++++++++++++++
 extension/oauth2clientauthextension/config.go |  10 +-
 .../oauth2clientauthextension/extension.go    |  22 ++--
 .../extension_test.go                         |  93 ++++++++++++++++
 .../testdata/test-cred-empty.txt              |   0
 .../testdata/test-cred.txt                    |   1 +
 8 files changed, 252 insertions(+), 11 deletions(-)
 create mode 100755 .chloggen/feature-oauth2clientauth-read-from-file.yaml
 create mode 100644 extension/oauth2clientauthextension/clientcredentialsconfig.go
 create mode 100644 extension/oauth2clientauthextension/testdata/test-cred-empty.txt
 create mode 100644 extension/oauth2clientauthextension/testdata/test-cred.txt

diff --git a/.chloggen/feature-oauth2clientauth-read-from-file.yaml b/.chloggen/feature-oauth2clientauth-read-from-file.yaml
new file mode 100755
index 000000000000..f944cb0d56f9
--- /dev/null
+++ b/.chloggen/feature-oauth2clientauth-read-from-file.yaml
@@ -0,0 +1,29 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: enhancement
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: oauth2clientauthextension
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Enable dynamically reading ClientID and ClientSecret from files
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [26117]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext: |
+  - Read the client ID and/or secret from a file by specifying the file path to the ClientIDFile (`client_id_file`) and ClientSecretFile (`client_secret_file`) fields respectively.
+  - The file is read every time the client issues a new token. This means that the corresponding value can change dynamically during the execution by modifying the file contents.
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: [user, api]
diff --git a/extension/oauth2clientauthextension/README.md b/extension/oauth2clientauthextension/README.md
index 4dc9b894390b..f52040a97db4 100644
--- a/extension/oauth2clientauthextension/README.md
+++ b/extension/oauth2clientauthextension/README.md
@@ -74,7 +74,13 @@ Following are the configuration fields
 
 - [**token_url**](https://datatracker.ietf.org/doc/html/rfc6749#section-3.2) - The resource server's token endpoint URLs.
 - [**client_id**](https://datatracker.ietf.org/doc/html/rfc6749#section-2.2) - The client identifier issued to the client.
+- **client_id_file** - The file path to retrieve the client identifier issued to the client.
+  The extension reads this file and updates the client ID used whenever it needs to issue a new token. This enables dynamically changing the client credentials by modifying the file contents when, for example, they need to rotate. <!-- Intended whitespace for compact new line -->  
+  This setting takes precedence over `client_id`.
 - [**client_secret**](https://datatracker.ietf.org/doc/html/rfc6749#section-2.3.1) - The secret string associated with above identifier.
+- **client_secret_file** - The file path to retrieve the secret string associated with above identifier.
+  The extension reads this file and updates the client secret used whenever it needs to issue a new token. This enables dynamically changing the client credentials by modifying the file contents when, for example, they need to rotate. <!-- Intended whitespace for compact new line -->  
+  This setting takes precedence over `client_secret`.
 - [**endpoint_params**](https://github.com/golang/oauth2/blob/master/clientcredentials/clientcredentials.go#L44) - Additional parameters that are sent to the token endpoint.
 - [**scopes**](https://datatracker.ietf.org/doc/html/rfc6749#section-3.3) - **Optional** optional requested permissions associated for the client.
 - [**timeout**](https://golang.org/src/net/http/client.go#L90) -  **Optional** specifies the timeout on the underlying client to authorization server for fetching the tokens (initial and while refreshing).
diff --git a/extension/oauth2clientauthextension/clientcredentialsconfig.go b/extension/oauth2clientauthextension/clientcredentialsconfig.go
new file mode 100644
index 000000000000..fd6e06f1738c
--- /dev/null
+++ b/extension/oauth2clientauthextension/clientcredentialsconfig.go
@@ -0,0 +1,102 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package oauth2clientauthextension // import "github.com/open-telemetry/opentelemetry-collector-contrib/extension/oauth2clientauthextension"
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"strings"
+
+	"go.uber.org/multierr"
+	"golang.org/x/oauth2"
+	"golang.org/x/oauth2/clientcredentials"
+)
+
+// clientCredentialsConfig is a clientcredentials.Config wrapper to allow
+// values read from files in the ClientID and ClientSecret fields.
+//
+// Values from files can be retrieved by populating the ClientIDFile or
+// the ClientSecretFile fields with the path to the file.
+//
+// Priority: File > Raw value
+//
+// Example - Retrieve secret from file:
+//
+//	cfg := clientCredentialsConfig{
+//		Config: clientcredentials.Config{
+//			ClientID:     "clientId",
+//			...
+//		},
+//		ClientSecretFile: "/path/to/client/secret",
+//	}
+type clientCredentialsConfig struct {
+	clientcredentials.Config
+
+	ClientIDFile     string
+	ClientSecretFile string
+}
+
+type clientCredentialsTokenSource struct {
+	ctx    context.Context
+	config *clientCredentialsConfig
+}
+
+// clientCredentialsTokenSource implements TokenSource
+var _ oauth2.TokenSource = (*clientCredentialsTokenSource)(nil)
+
+func readCredentialsFile(path string) (string, error) {
+	f, err := os.ReadFile(path)
+	if err != nil {
+		return "", fmt.Errorf("failed to read credentials file %q: %w", path, err)
+	}
+
+	credential := strings.TrimSpace(string(f))
+	if credential == "" {
+		return "", fmt.Errorf("empty credentials file %q", path)
+	}
+	return credential, nil
+}
+
+func getActualValue(value, filepath string) (string, error) {
+	if len(filepath) > 0 {
+		return readCredentialsFile(filepath)
+	}
+
+	return value, nil
+}
+
+// createConfig creates a proper clientcredentials.Config with values retrieved
+// from files, if the user has specified '*_file' values
+func (c *clientCredentialsConfig) createConfig() (*clientcredentials.Config, error) {
+	clientID, err := getActualValue(c.ClientID, c.ClientIDFile)
+	if err != nil {
+		return nil, multierr.Combine(errNoClientIDProvided, err)
+	}
+
+	clientSecret, err := getActualValue(c.ClientSecret, c.ClientSecretFile)
+	if err != nil {
+		return nil, multierr.Combine(errNoClientSecretProvided, err)
+	}
+
+	return &clientcredentials.Config{
+		ClientID:       clientID,
+		ClientSecret:   clientSecret,
+		TokenURL:       c.TokenURL,
+		Scopes:         c.Scopes,
+		EndpointParams: c.EndpointParams,
+	}, nil
+}
+
+func (c *clientCredentialsConfig) TokenSource(ctx context.Context) oauth2.TokenSource {
+	return oauth2.ReuseTokenSource(nil, clientCredentialsTokenSource{ctx: ctx, config: c})
+}
+
+func (ts clientCredentialsTokenSource) Token() (*oauth2.Token, error) {
+	cfg, err := ts.config.createConfig()
+	if err != nil {
+		return nil, err
+	}
+	return cfg.TokenSource(ts.ctx).Token()
+}
diff --git a/extension/oauth2clientauthextension/config.go b/extension/oauth2clientauthextension/config.go
index f8aace7f0c55..c5e31064070a 100644
--- a/extension/oauth2clientauthextension/config.go
+++ b/extension/oauth2clientauthextension/config.go
@@ -26,10 +26,16 @@ type Config struct {
 	// See https://datatracker.ietf.org/doc/html/rfc6749#section-2.2
 	ClientID string `mapstructure:"client_id"`
 
+	// ClientIDFile is the file path to read the application's ID from.
+	ClientIDFile string `mapstructure:"client_id_file"`
+
 	// ClientSecret is the application's secret.
 	// See https://datatracker.ietf.org/doc/html/rfc6749#section-2.3.1
 	ClientSecret configopaque.String `mapstructure:"client_secret"`
 
+	// ClientSecretFile is the file pathg to read the application's secret from.
+	ClientSecretFile string `mapstructure:"client_secret_file"`
+
 	// EndpointParams specifies additional parameters for requests to the token endpoint.
 	EndpointParams url.Values `mapstructure:"endpoint_params"`
 
@@ -54,10 +60,10 @@ var _ component.Config = (*Config)(nil)
 
 // Validate checks if the extension configuration is valid
 func (cfg *Config) Validate() error {
-	if cfg.ClientID == "" {
+	if cfg.ClientID == "" && cfg.ClientIDFile == "" {
 		return errNoClientIDProvided
 	}
-	if cfg.ClientSecret == "" {
+	if cfg.ClientSecret == "" && cfg.ClientSecretFile == "" {
 		return errNoClientSecretProvided
 	}
 	if cfg.TokenURL == "" {
diff --git a/extension/oauth2clientauthextension/extension.go b/extension/oauth2clientauthextension/extension.go
index 30260bc1f72d..7f263154440f 100644
--- a/extension/oauth2clientauthextension/extension.go
+++ b/extension/oauth2clientauthextension/extension.go
@@ -19,7 +19,7 @@ import (
 // clientAuthenticator provides implementation for providing client authentication using OAuth2 client credentials
 // workflow for both gRPC and HTTP clients.
 type clientAuthenticator struct {
-	clientCredentials *clientcredentials.Config
+	clientCredentials *clientCredentialsConfig
 	logger            *zap.Logger
 	client            *http.Client
 }
@@ -36,10 +36,10 @@ var _ oauth2.TokenSource = (*errorWrappingTokenSource)(nil)
 var errFailedToGetSecurityToken = fmt.Errorf("failed to get security token from token endpoint")
 
 func newClientAuthenticator(cfg *Config, logger *zap.Logger) (*clientAuthenticator, error) {
-	if cfg.ClientID == "" {
+	if cfg.ClientID == "" && cfg.ClientIDFile == "" {
 		return nil, errNoClientIDProvided
 	}
-	if cfg.ClientSecret == "" {
+	if cfg.ClientSecret == "" && cfg.ClientSecretFile == "" {
 		return nil, errNoClientSecretProvided
 	}
 	if cfg.TokenURL == "" {
@@ -55,12 +55,16 @@ func newClientAuthenticator(cfg *Config, logger *zap.Logger) (*clientAuthenticat
 	transport.TLSClientConfig = tlsCfg
 
 	return &clientAuthenticator{
-		clientCredentials: &clientcredentials.Config{
-			ClientID:       cfg.ClientID,
-			ClientSecret:   string(cfg.ClientSecret),
-			TokenURL:       cfg.TokenURL,
-			Scopes:         cfg.Scopes,
-			EndpointParams: cfg.EndpointParams,
+		clientCredentials: &clientCredentialsConfig{
+			Config: clientcredentials.Config{
+				ClientID:       cfg.ClientID,
+				ClientSecret:   string(cfg.ClientSecret),
+				TokenURL:       cfg.TokenURL,
+				Scopes:         cfg.Scopes,
+				EndpointParams: cfg.EndpointParams,
+			},
+			ClientIDFile:     cfg.ClientIDFile,
+			ClientSecretFile: cfg.ClientSecretFile,
 		},
 		logger: logger,
 		client: &http.Client{
diff --git a/extension/oauth2clientauthextension/extension_test.go b/extension/oauth2clientauthextension/extension_test.go
index 1b8e70a72001..6362d88b82b6 100644
--- a/extension/oauth2clientauthextension/extension_test.go
+++ b/extension/oauth2clientauthextension/extension_test.go
@@ -16,6 +16,7 @@ import (
 	"go.opentelemetry.io/collector/config/configtls"
 	"go.uber.org/zap"
 	"golang.org/x/oauth2"
+	"golang.org/x/oauth2/clientcredentials"
 	grpcOAuth "google.golang.org/grpc/credentials/oauth"
 )
 
@@ -134,6 +135,98 @@ func TestOAuthClientSettings(t *testing.T) {
 	}
 }
 
+func TestOAuthClientSettingsCredsConfig(t *testing.T) {
+	// test files for TLS testing
+	var (
+		testCredsFile        = "testdata/test-cred.txt"
+		testCredsEmptyFile   = "testdata/test-cred-empty.txt"
+		testCredsMissingFile = "testdata/test-cred-missing.txt"
+	)
+
+	tests := []struct {
+		name                 string
+		settings             *Config
+		expectedClientConfig *clientcredentials.Config
+		shouldError          bool
+		expectedError        error
+	}{
+		{
+			name: "client_id_file",
+			settings: &Config{
+				ClientIDFile: testCredsFile,
+				ClientSecret: "testsecret",
+				TokenURL:     "https://example.com/v1/token",
+				Scopes:       []string{"resource.read"},
+			},
+			expectedClientConfig: &clientcredentials.Config{
+				ClientID:     "testcreds",
+				ClientSecret: "testsecret",
+			},
+			shouldError:   false,
+			expectedError: nil,
+		},
+		{
+			name: "client_secret_file",
+			settings: &Config{
+				ClientID:         "testclientid",
+				ClientSecretFile: testCredsFile,
+				TokenURL:         "https://example.com/v1/token",
+				Scopes:           []string{"resource.read"},
+			},
+			expectedClientConfig: &clientcredentials.Config{
+				ClientID:     "testclientid",
+				ClientSecret: "testcreds",
+			},
+			shouldError:   false,
+			expectedError: nil,
+		},
+		{
+			name: "empty_client_creds_file",
+			settings: &Config{
+				ClientIDFile: testCredsEmptyFile,
+				ClientSecret: "testsecret",
+				TokenURL:     "https://example.com/v1/token",
+				Scopes:       []string{"resource.read"},
+			},
+			shouldError:   true,
+			expectedError: errNoClientIDProvided,
+		},
+		{
+			name: "missing_client_creds_file",
+			settings: &Config{
+				ClientID:         "testclientid",
+				ClientSecretFile: testCredsMissingFile,
+				TokenURL:         "https://example.com/v1/token",
+				Scopes:           []string{"resource.read"},
+			},
+			shouldError:   true,
+			expectedError: errNoClientSecretProvided,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			rc, _ := newClientAuthenticator(test.settings, zap.NewNop())
+			cfg, err := rc.clientCredentials.createConfig()
+			if test.shouldError {
+				assert.NotNil(t, err)
+				assert.ErrorAs(t, err, &test.expectedError)
+				return
+			}
+			assert.NoError(t, err)
+			assert.Equal(t, test.expectedClientConfig.ClientID, cfg.ClientID)
+			assert.Equal(t, test.expectedClientConfig.ClientSecret, cfg.ClientSecret)
+
+			// test tls settings
+			transport := rc.client.Transport.(*http.Transport)
+			tlsClientConfig := transport.TLSClientConfig
+			tlsTestSettingConfig, err := test.settings.TLSSetting.LoadTLSConfig()
+			assert.Nil(t, err)
+			assert.Equal(t, tlsClientConfig.Certificates, tlsTestSettingConfig.Certificates)
+		})
+	}
+}
+
 type testRoundTripper struct {
 	testString string
 }
diff --git a/extension/oauth2clientauthextension/testdata/test-cred-empty.txt b/extension/oauth2clientauthextension/testdata/test-cred-empty.txt
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/extension/oauth2clientauthextension/testdata/test-cred.txt b/extension/oauth2clientauthextension/testdata/test-cred.txt
new file mode 100644
index 000000000000..9059fbb71bf5
--- /dev/null
+++ b/extension/oauth2clientauthextension/testdata/test-cred.txt
@@ -0,0 +1 @@
+testcreds
\ No newline at end of file

From 841e692a432833c8a408b23b3e4f5fa2a43bc86c Mon Sep 17 00:00:00 2001
From: Daniel Jaglowski <jaglows3@gmail.com>
Date: Fri, 8 Sep 2023 07:19:03 -0600
Subject: [PATCH 4/4] [chore][pkg/stanza] Remove unnecessary test struct
 (#26527)

Followup to #26517

The `MultiLineTokenizerTestCase` became unnecessary with changes in the
previous PR but the diff caused by removing it was quite large. This PR
just removes the struct.
---
 pkg/stanza/tokenize/multiline_test.go | 464 +++++++++++---------------
 1 file changed, 203 insertions(+), 261 deletions(-)

diff --git a/pkg/stanza/tokenize/multiline_test.go b/pkg/stanza/tokenize/multiline_test.go
index 7c85799c54ef..d6a9eea6db75 100644
--- a/pkg/stanza/tokenize/multiline_test.go
+++ b/pkg/stanza/tokenize/multiline_test.go
@@ -19,122 +19,98 @@ import (
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim"
 )
 
-type MultiLineTokenizerTestCase struct {
-	tokenizetest.TestCase
-}
-
 func TestLineStartSplitFunc(t *testing.T) {
-	testCases := []MultiLineTokenizerTestCase{
-		{
-			tokenizetest.TestCase{
-				Name:    "OneLogSimple",
-				Pattern: `LOGSTART \d+ `,
-				Input:   []byte("LOGSTART 123 log1LOGSTART 123 a"),
-				ExpectedTokens: []string{
-					`LOGSTART 123 log1`,
-				},
+	testCases := []tokenizetest.TestCase{
+		{
+			Name:    "OneLogSimple",
+			Pattern: `LOGSTART \d+ `,
+			Input:   []byte("LOGSTART 123 log1LOGSTART 123 a"),
+			ExpectedTokens: []string{
+				`LOGSTART 123 log1`,
 			},
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "TwoLogsSimple",
-				Pattern: `LOGSTART \d+ `,
-				Input:   []byte(`LOGSTART 123 log1 LOGSTART 234 log2 LOGSTART 345 foo`),
-				ExpectedTokens: []string{
-					`LOGSTART 123 log1`,
-					`LOGSTART 234 log2`,
-				},
+			Name:    "TwoLogsSimple",
+			Pattern: `LOGSTART \d+ `,
+			Input:   []byte(`LOGSTART 123 log1 LOGSTART 234 log2 LOGSTART 345 foo`),
+			ExpectedTokens: []string{
+				`LOGSTART 123 log1`,
+				`LOGSTART 234 log2`,
 			},
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "TwoLogsLineStart",
-				Pattern: `^LOGSTART \d+ `,
-				Input:   []byte("LOGSTART 123 LOGSTART 345 log1\nLOGSTART 234 log2\nLOGSTART 345 foo"),
-				ExpectedTokens: []string{
-					"LOGSTART 123 LOGSTART 345 log1",
-					"LOGSTART 234 log2",
-				},
+			Name:    "TwoLogsLineStart",
+			Pattern: `^LOGSTART \d+ `,
+			Input:   []byte("LOGSTART 123 LOGSTART 345 log1\nLOGSTART 234 log2\nLOGSTART 345 foo"),
+			ExpectedTokens: []string{
+				"LOGSTART 123 LOGSTART 345 log1",
+				"LOGSTART 234 log2",
 			},
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "NoMatches",
-				Pattern: `LOGSTART \d+ `,
-				Input:   []byte(`file that has no matches in it`),
-			},
+			Name:    "NoMatches",
+			Pattern: `LOGSTART \d+ `,
+			Input:   []byte(`file that has no matches in it`),
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "PrecedingNonMatches",
-				Pattern: `LOGSTART \d+ `,
-				Input:   []byte(`part that doesn't match LOGSTART 123 part that matchesLOGSTART 123 foo`),
-				ExpectedTokens: []string{
-					`part that doesn't match`,
-					`LOGSTART 123 part that matches`,
-				},
+			Name:    "PrecedingNonMatches",
+			Pattern: `LOGSTART \d+ `,
+			Input:   []byte(`part that doesn't match LOGSTART 123 part that matchesLOGSTART 123 foo`),
+			ExpectedTokens: []string{
+				`part that doesn't match`,
+				`LOGSTART 123 part that matches`,
 			},
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "HugeLog100",
-				Pattern: `LOGSTART \d+ `,
-				Input: func() []byte {
-					newInput := []byte(`LOGSTART 123 `)
-					newInput = append(newInput, tokenizetest.GenerateBytes(100)...)
-					newInput = append(newInput, []byte(`LOGSTART 234 endlog`)...)
-					return newInput
-				}(),
-				ExpectedTokens: []string{
-					`LOGSTART 123 ` + string(tokenizetest.GenerateBytes(100)),
-				},
+			Name:    "HugeLog100",
+			Pattern: `LOGSTART \d+ `,
+			Input: func() []byte {
+				newInput := []byte(`LOGSTART 123 `)
+				newInput = append(newInput, tokenizetest.GenerateBytes(100)...)
+				newInput = append(newInput, []byte(`LOGSTART 234 endlog`)...)
+				return newInput
+			}(),
+			ExpectedTokens: []string{
+				`LOGSTART 123 ` + string(tokenizetest.GenerateBytes(100)),
 			},
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "HugeLog10000",
-				Pattern: `LOGSTART \d+ `,
-				Input: func() []byte {
-					newInput := []byte(`LOGSTART 123 `)
-					newInput = append(newInput, tokenizetest.GenerateBytes(10000)...)
-					newInput = append(newInput, []byte(`LOGSTART 234 endlog`)...)
-					return newInput
-				}(),
-				ExpectedTokens: []string{
-					`LOGSTART 123 ` + string(tokenizetest.GenerateBytes(10000)),
-				},
+			Name:    "HugeLog10000",
+			Pattern: `LOGSTART \d+ `,
+			Input: func() []byte {
+				newInput := []byte(`LOGSTART 123 `)
+				newInput = append(newInput, tokenizetest.GenerateBytes(10000)...)
+				newInput = append(newInput, []byte(`LOGSTART 234 endlog`)...)
+				return newInput
+			}(),
+			ExpectedTokens: []string{
+				`LOGSTART 123 ` + string(tokenizetest.GenerateBytes(10000)),
 			},
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "ErrTooLong",
-				Pattern: `LOGSTART \d+ `,
-				Input: func() []byte {
-					newInput := []byte(`LOGSTART 123 `)
-					newInput = append(newInput, tokenizetest.GenerateBytes(1000000)...)
-					newInput = append(newInput, []byte(`LOGSTART 234 endlog`)...)
-					return newInput
-				}(),
-				ExpectedError: errors.New("bufio.Scanner: token too long"),
-			},
+			Name:    "ErrTooLong",
+			Pattern: `LOGSTART \d+ `,
+			Input: func() []byte {
+				newInput := []byte(`LOGSTART 123 `)
+				newInput = append(newInput, tokenizetest.GenerateBytes(1000000)...)
+				newInput = append(newInput, []byte(`LOGSTART 234 endlog`)...)
+				return newInput
+			}(),
+			ExpectedError: errors.New("bufio.Scanner: token too long"),
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "MultipleMultilineLogs",
-				Pattern: `^LOGSTART \d+`,
-				Input:   []byte("LOGSTART 12 log1\t  \nLOGPART log1\nLOGPART log1\t   \nLOGSTART 17 log2\nLOGPART log2\nanother line\nLOGSTART 43 log5"),
-				ExpectedTokens: []string{
-					"LOGSTART 12 log1\t  \nLOGPART log1\nLOGPART log1",
-					"LOGSTART 17 log2\nLOGPART log2\nanother line",
-				},
+			Name:    "MultipleMultilineLogs",
+			Pattern: `^LOGSTART \d+`,
+			Input:   []byte("LOGSTART 12 log1\t  \nLOGPART log1\nLOGPART log1\t   \nLOGSTART 17 log2\nLOGPART log2\nanother line\nLOGSTART 43 log5"),
+			ExpectedTokens: []string{
+				"LOGSTART 12 log1\t  \nLOGPART log1\nLOGPART log1",
+				"LOGSTART 17 log2\nLOGPART log2\nanother line",
 			},
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "LogsWithoutFlusher",
-				Pattern: `^LOGSTART \d+`,
-				Input:   []byte("LOGPART log1\nLOGPART log1\t   \n"),
-			},
+			Name:    "LogsWithoutFlusher",
+			Pattern: `^LOGSTART \d+`,
+			Input:   []byte("LOGPART log1\nLOGPART log1\t   \n"),
 		},
 	}
 
@@ -173,113 +149,93 @@ func TestLineStartSplitFunc(t *testing.T) {
 }
 
 func TestLineEndSplitFunc(t *testing.T) {
-	testCases := []MultiLineTokenizerTestCase{
-		{
-			tokenizetest.TestCase{
-				Name:    "OneLogSimple",
-				Pattern: `LOGEND \d+`,
-				Input:   []byte(`my log LOGEND 123`),
-				ExpectedTokens: []string{
-					`my log LOGEND 123`,
-				},
+	testCases := []tokenizetest.TestCase{
+		{
+			Name:    "OneLogSimple",
+			Pattern: `LOGEND \d+`,
+			Input:   []byte(`my log LOGEND 123`),
+			ExpectedTokens: []string{
+				`my log LOGEND 123`,
 			},
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "TwoLogsSimple",
-				Pattern: `LOGEND \d+`,
-				Input:   []byte(`log1 LOGEND 123log2 LOGEND 234`),
-				ExpectedTokens: []string{
-					`log1 LOGEND 123`,
-					`log2 LOGEND 234`,
-				},
+			Name:    "TwoLogsSimple",
+			Pattern: `LOGEND \d+`,
+			Input:   []byte(`log1 LOGEND 123log2 LOGEND 234`),
+			ExpectedTokens: []string{
+				`log1 LOGEND 123`,
+				`log2 LOGEND 234`,
 			},
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "TwoLogsLineEndSimple",
-				Pattern: `LOGEND$`,
-				Input:   []byte("log1 LOGEND LOGEND\nlog2 LOGEND\n"),
-				ExpectedTokens: []string{
-					"log1 LOGEND LOGEND",
-					"log2 LOGEND",
-				},
+			Name:    "TwoLogsLineEndSimple",
+			Pattern: `LOGEND$`,
+			Input:   []byte("log1 LOGEND LOGEND\nlog2 LOGEND\n"),
+			ExpectedTokens: []string{
+				"log1 LOGEND LOGEND",
+				"log2 LOGEND",
 			},
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "NoMatches",
-				Pattern: `LOGEND \d+`,
-				Input:   []byte(`file that has no matches in it`),
-			},
+			Name:    "NoMatches",
+			Pattern: `LOGEND \d+`,
+			Input:   []byte(`file that has no matches in it`),
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "NonMatchesAfter",
-				Pattern: `LOGEND \d+`,
-				Input:   []byte(`part that matches LOGEND 123 part that doesn't match`),
-				ExpectedTokens: []string{
-					`part that matches LOGEND 123`,
-				},
+			Name:    "NonMatchesAfter",
+			Pattern: `LOGEND \d+`,
+			Input:   []byte(`part that matches LOGEND 123 part that doesn't match`),
+			ExpectedTokens: []string{
+				`part that matches LOGEND 123`,
 			},
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "HugeLog100",
-				Pattern: `LOGEND \d`,
-				Input: func() []byte {
-					newInput := tokenizetest.GenerateBytes(100)
-					newInput = append(newInput, []byte(`LOGEND 1 `)...)
-					return newInput
-				}(),
-				ExpectedTokens: []string{
-					string(tokenizetest.GenerateBytes(100)) + `LOGEND 1`,
-				},
+			Name:    "HugeLog100",
+			Pattern: `LOGEND \d`,
+			Input: func() []byte {
+				newInput := tokenizetest.GenerateBytes(100)
+				newInput = append(newInput, []byte(`LOGEND 1 `)...)
+				return newInput
+			}(),
+			ExpectedTokens: []string{
+				string(tokenizetest.GenerateBytes(100)) + `LOGEND 1`,
 			},
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "HugeLog10000",
-				Pattern: `LOGEND \d`,
-				Input: func() []byte {
-					newInput := tokenizetest.GenerateBytes(10000)
-					newInput = append(newInput, []byte(`LOGEND 1 `)...)
-					return newInput
-				}(),
-				ExpectedTokens: []string{
-					string(tokenizetest.GenerateBytes(10000)) + `LOGEND 1`,
-				},
+			Name:    "HugeLog10000",
+			Pattern: `LOGEND \d`,
+			Input: func() []byte {
+				newInput := tokenizetest.GenerateBytes(10000)
+				newInput = append(newInput, []byte(`LOGEND 1 `)...)
+				return newInput
+			}(),
+			ExpectedTokens: []string{
+				string(tokenizetest.GenerateBytes(10000)) + `LOGEND 1`,
 			},
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "HugeLog1000000",
-				Pattern: `LOGEND \d`,
-				Input: func() []byte {
-					newInput := tokenizetest.GenerateBytes(1000000)
-					newInput = append(newInput, []byte(`LOGEND 1 `)...)
-					return newInput
-				}(),
-				ExpectedError: errors.New("bufio.Scanner: token too long"),
-			},
+			Name:    "HugeLog1000000",
+			Pattern: `LOGEND \d`,
+			Input: func() []byte {
+				newInput := tokenizetest.GenerateBytes(1000000)
+				newInput = append(newInput, []byte(`LOGEND 1 `)...)
+				return newInput
+			}(),
+			ExpectedError: errors.New("bufio.Scanner: token too long"),
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "MultipleMultilineLogs",
-				Pattern: `^LOGEND.*$`,
-				Input:   []byte("LOGSTART 12 log1\t  \nLOGPART log1\nLOGEND log1\t   \nLOGSTART 17 log2\nLOGPART log2\nLOGEND log2\nLOGSTART 43 log5"),
-				ExpectedTokens: []string{
-					"LOGSTART 12 log1\t  \nLOGPART log1\nLOGEND log1",
-					"LOGSTART 17 log2\nLOGPART log2\nLOGEND log2",
-				},
+			Name:    "MultipleMultilineLogs",
+			Pattern: `^LOGEND.*$`,
+			Input:   []byte("LOGSTART 12 log1\t  \nLOGPART log1\nLOGEND log1\t   \nLOGSTART 17 log2\nLOGPART log2\nLOGEND log2\nLOGSTART 43 log5"),
+			ExpectedTokens: []string{
+				"LOGSTART 12 log1\t  \nLOGPART log1\nLOGEND log1",
+				"LOGSTART 17 log2\nLOGPART log2\nLOGEND log2",
 			},
 		},
 		{
-			tokenizetest.TestCase{
-				Name:    "LogsWithoutFlusher",
-				Pattern: `^LOGEND.*$`,
-				Input:   []byte("LOGPART log1\nLOGPART log1\t   \n"),
-			},
+			Name:    "LogsWithoutFlusher",
+			Pattern: `^LOGEND.*$`,
+			Input:   []byte("LOGPART log1\nLOGPART log1\t   \n"),
 		},
 	}
 
@@ -299,133 +255,119 @@ func TestLineEndSplitFunc(t *testing.T) {
 }
 
 func TestNewlineSplitFunc(t *testing.T) {
-	testCases := []MultiLineTokenizerTestCase{
+	testCases := []tokenizetest.TestCase{
 		{
-			tokenizetest.TestCase{Name: "OneLogSimple",
-				Input: []byte("my log\n"),
-				ExpectedTokens: []string{
-					`my log`,
-				},
+			Name:  "OneLogSimple",
+			Input: []byte("my log\n"),
+			ExpectedTokens: []string{
+				`my log`,
 			},
 		},
 		{
-			tokenizetest.TestCase{Name: "OneLogCarriageReturn",
-				Input: []byte("my log\r\n"),
-				ExpectedTokens: []string{
-					`my log`,
-				},
+			Name:  "OneLogCarriageReturn",
+			Input: []byte("my log\r\n"),
+			ExpectedTokens: []string{
+				`my log`,
 			},
 		},
 		{
-			tokenizetest.TestCase{Name: "TwoLogsSimple",
-				Input: []byte("log1\nlog2\n"),
-				ExpectedTokens: []string{
-					`log1`,
-					`log2`,
-				},
+			Name:  "TwoLogsSimple",
+			Input: []byte("log1\nlog2\n"),
+			ExpectedTokens: []string{
+				`log1`,
+				`log2`,
 			},
 		},
 		{
-			tokenizetest.TestCase{Name: "TwoLogsCarriageReturn",
-				Input: []byte("log1\r\nlog2\r\n"),
-				ExpectedTokens: []string{
-					`log1`,
-					`log2`,
-				},
+			Name:  "TwoLogsCarriageReturn",
+			Input: []byte("log1\r\nlog2\r\n"),
+			ExpectedTokens: []string{
+				`log1`,
+				`log2`,
 			},
 		},
 		{
-			tokenizetest.TestCase{Name: "NoTailingNewline",
-				Input: []byte(`foo`),
-			},
+			Name:  "NoTailingNewline",
+			Input: []byte(`foo`),
 		},
 		{
-			tokenizetest.TestCase{Name: "HugeLog100",
-				Input: func() []byte {
-					newInput := tokenizetest.GenerateBytes(100)
-					newInput = append(newInput, '\n')
-					return newInput
-				}(),
-				ExpectedTokens: []string{
-					string(tokenizetest.GenerateBytes(100)),
-				},
+			Name: "HugeLog100",
+			Input: func() []byte {
+				newInput := tokenizetest.GenerateBytes(100)
+				newInput = append(newInput, '\n')
+				return newInput
+			}(),
+			ExpectedTokens: []string{
+				string(tokenizetest.GenerateBytes(100)),
 			},
 		},
 		{
-			tokenizetest.TestCase{Name: "HugeLog10000",
-				Input: func() []byte {
-					newInput := tokenizetest.GenerateBytes(10000)
-					newInput = append(newInput, '\n')
-					return newInput
-				}(),
-				ExpectedTokens: []string{
-					string(tokenizetest.GenerateBytes(10000)),
-				},
+			Name: "HugeLog10000",
+			Input: func() []byte {
+				newInput := tokenizetest.GenerateBytes(10000)
+				newInput = append(newInput, '\n')
+				return newInput
+			}(),
+			ExpectedTokens: []string{
+				string(tokenizetest.GenerateBytes(10000)),
 			},
 		},
 		{
-			tokenizetest.TestCase{Name: "HugeLog1000000",
-				Input: func() []byte {
-					newInput := tokenizetest.GenerateBytes(1000000)
-					newInput = append(newInput, '\n')
-					return newInput
-				}(),
-				ExpectedError: errors.New("bufio.Scanner: token too long"),
-			},
+			Name: "HugeLog1000000",
+			Input: func() []byte {
+				newInput := tokenizetest.GenerateBytes(1000000)
+				newInput = append(newInput, '\n')
+				return newInput
+			}(),
+			ExpectedError: errors.New("bufio.Scanner: token too long"),
 		},
 		{
-			tokenizetest.TestCase{Name: "LogsWithoutFlusher",
-				Input: []byte("LOGPART log1"),
-			},
+			Name:  "LogsWithoutFlusher",
+			Input: []byte("LOGPART log1"),
 		},
 		{
-			tokenizetest.TestCase{Name: "DefaultFlusherSplits",
-				Input: []byte("log1\nlog2\n"),
-				ExpectedTokens: []string{
-					"log1",
-					"log2",
-				},
+			Name:  "DefaultFlusherSplits",
+			Input: []byte("log1\nlog2\n"),
+			ExpectedTokens: []string{
+				"log1",
+				"log2",
 			},
 		},
 		{
-			tokenizetest.TestCase{Name: "LogsWithLogStartingWithWhiteChars",
-				Input: []byte("\nLOGEND 333\nAnother one"),
-				ExpectedTokens: []string{
-					"",
-					"LOGEND 333",
-				},
+			Name:  "LogsWithLogStartingWithWhiteChars",
+			Input: []byte("\nLOGEND 333\nAnother one"),
+			ExpectedTokens: []string{
+				"",
+				"LOGEND 333",
 			},
 		},
 		{
-			tokenizetest.TestCase{Name: "PreserveLeadingWhitespaces",
-				Input: []byte("\n LOGEND 333 \nAnother one "),
-				ExpectedTokens: []string{
-					"",
-					" LOGEND 333",
-				},
-				PreserveLeadingWhitespaces: true,
+			Name:  "PreserveLeadingWhitespaces",
+			Input: []byte("\n LOGEND 333 \nAnother one "),
+			ExpectedTokens: []string{
+				"",
+				" LOGEND 333",
 			},
+			PreserveLeadingWhitespaces: true,
 		},
 		{
-			tokenizetest.TestCase{Name: "PreserveTrailingWhitespaces",
-				Input: []byte("\n LOGEND 333 \nAnother one "),
-				ExpectedTokens: []string{
-					"",
-					"LOGEND 333 ",
-				},
-				PreserveTrailingWhitespaces: true,
+			Name:  "PreserveTrailingWhitespaces",
+			Input: []byte("\n LOGEND 333 \nAnother one "),
+			ExpectedTokens: []string{
+				"",
+				"LOGEND 333 ",
 			},
+			PreserveTrailingWhitespaces: true,
 		},
 		{
-			tokenizetest.TestCase{Name: "PreserveBothLeadingAndTrailingWhitespaces",
-				Input: []byte("\n LOGEND 333 \nAnother one "),
-				ExpectedTokens: []string{
-					"",
-					" LOGEND 333 ",
-				},
-				PreserveLeadingWhitespaces:  true,
-				PreserveTrailingWhitespaces: true,
+			Name:  "PreserveBothLeadingAndTrailingWhitespaces",
+			Input: []byte("\n LOGEND 333 \nAnother one "),
+			ExpectedTokens: []string{
+				"",
+				" LOGEND 333 ",
 			},
+			PreserveLeadingWhitespaces:  true,
+			PreserveTrailingWhitespaces: true,
 		},
 	}