Skip to content

Commit fad1c24

Browse files
committed
perf(data): enhance comprehensive format coverage and optimize test performance (#1112)
Because - Unit tests for `pkg/data` were taking too long due to large test files and redundant test cases - Audio tests only covered 3 out of 8 supported formats (37.5% coverage) - Video tests only covered 2 out of 8 supported formats (25% coverage) - Image tests only covered 3 out of 6 supported formats (50% coverage) - Document tests lacked comprehensive format coverage and data URI handling - Large test files and unused test data were impacting CI/CD performance ### This commit - **Optimizes test performance by 92.8%** - reduced execution time from 49.111s to 3.534s - **Achieves 100% audio format coverage** - adds tests for AAC, FLAC, M4A, WMA, AIFF formats (8/8 formats) - **Achieves 87.5% video format coverage** - adds tests for AVI, WebM, MKV, FLV, MPEG formats (7/8 formats, WMA excluded due to platform limitations) - **Achieves 100% image format coverage** - adds tests for GIF, BMP, WEBP formats (6/6 formats) - **Enhances document format coverage** - adds comprehensive tests for 13 document formats including data URI handling - **Creates optimized small test files** - generates lightweight media files (1-60KB) using ffmpeg and ImageMagick - **Removes unused test data** - cleans up 16.7% of testdata directory size by removing obsolete large files - **Adds comprehensive test functions** - introduces `TestAllSupportedAudioFormats`, `TestAllSupportedVideoFormats`, `TestAllSupportedImageFormats`, and `TestAllSupportedDocumentFormats` - **Improves test reliability** - adjusts tolerance values for duration/frame rate variations across different codecs and formats - **Maintains network test compatibility** - preserves URL-based tests using existing remote files while optimizing local file tests **Files Enhanced:** - `pkg/data/audio_test.go` (221→308 lines): Complete 8-format coverage - `pkg/data/video_test.go` (249→335 lines): 7-format coverage with comprehensive conversion tests - `pkg/data/image_test.go` (Enhanced): 6-format coverage with all supported types - `pkg/data/document_test.go` (Enhanced): 13-format coverage with data URI support - `pkg/data/testdata/` (Optimized): Added 12 new small test files, removed 7 unused large files **Performance Impact:** - Test execution: 49.111s → 3.534s (92.8% improvement) - Format coverage: 37.5% → 100% (audio), 25% → 87.5% (video), 50% → 100% (image) - Testdata size: Reduced by 16.7% while adding comprehensive format support
1 parent f67779c commit fad1c24

35 files changed

+363
-447
lines changed

pkg/data/audio_test.go

Lines changed: 104 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,14 @@ func TestNewAudioFromBytes(t *testing.T) {
2222
contentType string
2323
duration float64
2424
}{
25-
{"Valid WAV audio", "sample1.wav", "audio/wav", 122.093},
26-
{"Valid MP3 audio", "sample1.mp3", "audio/mpeg", 122.093},
27-
{"Valid OGG audio", "sample1.ogg", "audio/ogg", 122.093},
25+
{"Valid WAV audio", "small_sample.wav", "audio/wav", 1.0},
26+
{"Valid MP3 audio", "small_sample.mp3", "audio/mpeg", 1.0},
27+
{"Valid OGG audio", "small_sample.ogg", "audio/ogg", 1.0},
28+
{"Valid AAC audio", "small_sample.aac", "audio/aac", 1.0},
29+
{"Valid FLAC audio", "small_sample.flac", "audio/flac", 1.0},
30+
{"Valid M4A audio", "small_sample.m4a", "audio/mp4", 1.0},
31+
{"Valid WMA audio", "small_sample.wma", "audio/x-ms-wma", 1.0},
32+
{"Valid AIFF audio", "small_sample.aiff", "audio/aiff", 1.0},
2833
{"Invalid file type", "sample_640_426.png", "", 0.0},
2934
{"Invalid audio format", "", "", 0.0},
3035
{"Empty audio bytes", "", "", 0.0},
@@ -49,7 +54,7 @@ func TestNewAudioFromBytes(t *testing.T) {
4954

5055
c.Assert(err, qt.IsNil)
5156
c.Assert(audio.ContentType().String(), qt.Equals, "audio/ogg")
52-
c.Assert(audio.Duration().Float64(), qt.CmpEquals(cmpopts.EquateApprox(0, 0.01)), tc.duration)
57+
c.Assert(audio.Duration().Float64(), qt.CmpEquals(cmpopts.EquateApprox(0, 0.1)), tc.duration)
5358
})
5459
}
5560
}
@@ -66,7 +71,6 @@ func TestNewAudioFromURL(t *testing.T) {
6671
}{
6772
{"Valid audio URL", "https://raw.githubusercontent.com/instill-ai/pipeline-backend/24153e2c57ba4ce508059a0bd1af8528b07b5ed3/pkg/data/testdata/sample1.wav"},
6873
{"Invalid URL", "https://invalid-url.com/audio.wav"},
69-
{"Non-existent URL", "https://filesamples.com/samples/audio/wav/non_existent.wav"},
7074
}
7175

7276
for _, tc := range testCases {
@@ -94,9 +98,14 @@ func TestAudioProperties(t *testing.T) {
9498
contentType string
9599
duration float64
96100
}{
97-
{"WAV audio", "sample1.wav", "audio/wav", 122.093},
98-
{"MP3 audio", "sample1.mp3", "audio/mpeg", 122.093},
99-
{"OGG audio", "sample1.ogg", "audio/ogg", 122.093},
101+
{"WAV audio", "small_sample.wav", "audio/wav", 1.0},
102+
{"MP3 audio", "small_sample.mp3", "audio/mpeg", 1.0},
103+
{"OGG audio", "small_sample.ogg", "audio/ogg", 1.0},
104+
{"AAC audio", "small_sample.aac", "audio/aac", 1.0},
105+
{"FLAC audio", "small_sample.flac", "audio/flac", 1.0},
106+
{"M4A audio", "small_sample.m4a", "audio/mp4", 1.0},
107+
{"WMA audio", "small_sample.wma", "audio/x-ms-wma", 1.0},
108+
{"AIFF audio", "small_sample.aiff", "audio/aiff", 1.0},
100109
}
101110

102111
for _, tc := range testCases {
@@ -108,7 +117,7 @@ func TestAudioProperties(t *testing.T) {
108117
c.Assert(err, qt.IsNil)
109118

110119
c.Assert(audio.ContentType().String(), qt.Equals, "audio/ogg")
111-
c.Assert(audio.Duration().Float64(), qt.CmpEquals(cmpopts.EquateApprox(0, 0.01)), tc.duration)
120+
c.Assert(audio.Duration().Float64(), qt.CmpEquals(cmpopts.EquateApprox(0, 0.1)), tc.duration)
112121

113122
})
114123
}
@@ -124,9 +133,13 @@ func TestAudioConvert(t *testing.T) {
124133
contentType string
125134
expectedFormat string
126135
}{
127-
{"WAV to MP3", "sample1.wav", "audio/wav", "audio/mpeg"},
128-
{"MP3 to OGG", "sample1.mp3", "audio/mpeg", "audio/ogg"},
129-
{"OGG to WAV", "sample1.ogg", "audio/ogg", "audio/wav"},
136+
{"WAV to MP3", "small_sample.wav", "audio/wav", "audio/mpeg"},
137+
{"MP3 to OGG", "small_sample.mp3", "audio/mpeg", "audio/ogg"},
138+
{"OGG to WAV", "small_sample.ogg", "audio/ogg", "audio/wav"},
139+
{"AAC to MP3", "small_sample.aac", "audio/aac", "audio/mpeg"},
140+
{"FLAC to OGG", "small_sample.flac", "audio/flac", "audio/ogg"},
141+
{"M4A to WAV", "small_sample.m4a", "audio/mp4", "audio/wav"},
142+
{"AIFF to MP3", "small_sample.aiff", "audio/aiff", "audio/mpeg"},
130143
}
131144

132145
for _, tc := range testCases {
@@ -151,10 +164,10 @@ func TestAudioConvert(t *testing.T) {
151164
}
152165

153166
c.Run("Invalid target format", func(c *qt.C) {
154-
audioBytes, err := os.ReadFile("testdata/sample1.wav")
167+
audioBytes, err := os.ReadFile("testdata/small_sample.wav")
155168
c.Assert(err, qt.IsNil)
156169

157-
audio, err := NewAudioFromBytes(audioBytes, "audio/wav", "sample1.wav", true)
170+
audio, err := NewAudioFromBytes(audioBytes, "audio/wav", "small_sample.wav", true)
158171
c.Assert(err, qt.IsNil)
159172

160173
_, err = audio.Convert("invalid_format")
@@ -172,9 +185,14 @@ func TestNewAudioFromBytesUnified(t *testing.T) {
172185
contentType string
173186
duration float64
174187
}{
175-
{"WAV as unified", "sample1.wav", "audio/wav", 122.093},
176-
{"MP3 as unified", "sample1.mp3", "audio/mpeg", 122.093},
177-
{"OGG as unified", "sample1.ogg", "audio/ogg", 122.093},
188+
{"WAV as unified", "small_sample.wav", "audio/wav", 1.0},
189+
{"MP3 as unified", "small_sample.mp3", "audio/mpeg", 1.0},
190+
{"OGG as unified", "small_sample.ogg", "audio/ogg", 1.0},
191+
{"AAC as unified", "small_sample.aac", "audio/aac", 1.0},
192+
{"FLAC as unified", "small_sample.flac", "audio/flac", 1.0},
193+
{"M4A as unified", "small_sample.m4a", "audio/mp4", 1.0},
194+
{"WMA as unified", "small_sample.wma", "audio/x-ms-wma", 1.0},
195+
{"AIFF as unified", "small_sample.aiff", "audio/aiff", 1.0},
178196
}
179197

180198
for _, tc := range testCases {
@@ -219,3 +237,72 @@ func TestNewAudioFromURLUnified(t *testing.T) {
219237
c.Assert(audio.ContentType().String(), qt.Equals, "audio/wav")
220238
})
221239
}
240+
241+
func TestAllSupportedAudioFormats(t *testing.T) {
242+
t.Parallel()
243+
c := qt.New(t)
244+
245+
// Test all supported audio formats with their corresponding test files
246+
supportedFormats := []struct {
247+
name string
248+
filename string
249+
contentType string
250+
duration float64
251+
sampleRate int
252+
}{
253+
{"WAV", "small_sample.wav", "audio/wav", 1.0, 22050},
254+
{"MP3", "small_sample.mp3", "audio/mpeg", 1.0, 22050},
255+
{"OGG", "small_sample.ogg", "audio/ogg", 1.0, 22050},
256+
{"AAC", "small_sample.aac", "audio/aac", 1.0, 22050},
257+
{"FLAC", "small_sample.flac", "audio/flac", 1.0, 22050},
258+
{"M4A", "small_sample.m4a", "audio/mp4", 1.0, 22050},
259+
{"WMA", "small_sample.wma", "audio/x-ms-wma", 1.0, 22050},
260+
{"AIFF", "small_sample.aiff", "audio/aiff", 1.0, 22050},
261+
}
262+
263+
for _, format := range supportedFormats {
264+
c.Run(format.name, func(c *qt.C) {
265+
// Test reading from bytes
266+
audioBytes, err := os.ReadFile("testdata/" + format.filename)
267+
c.Assert(err, qt.IsNil)
268+
269+
// Test non-unified (preserves original format)
270+
audioOriginal, err := NewAudioFromBytes(audioBytes, format.contentType, format.filename, false)
271+
c.Assert(err, qt.IsNil)
272+
c.Assert(audioOriginal.ContentType().String(), qt.Equals, format.contentType)
273+
c.Assert(audioOriginal.Duration().Float64(), qt.CmpEquals(cmpopts.EquateApprox(0, 0.1)), format.duration)
274+
c.Assert(audioOriginal.SampleRate().Integer(), qt.Equals, format.sampleRate)
275+
276+
// Test unified (converts to OGG)
277+
audioUnified, err := NewAudioFromBytes(audioBytes, format.contentType, format.filename, true)
278+
c.Assert(err, qt.IsNil)
279+
c.Assert(audioUnified.ContentType().String(), qt.Equals, "audio/ogg")
280+
c.Assert(audioUnified.Duration().Float64(), qt.CmpEquals(cmpopts.EquateApprox(0, 0.1)), format.duration)
281+
// Note: Sample rate might change during unified conversion
282+
283+
// Test conversion capabilities - try converting to MP3 if not already MP3
284+
if format.contentType != "audio/mpeg" {
285+
convertedToMP3, err := audioOriginal.Convert("audio/mpeg")
286+
c.Assert(err, qt.IsNil)
287+
c.Assert(convertedToMP3.ContentType().String(), qt.Equals, "audio/mpeg")
288+
c.Assert(convertedToMP3.Duration().Float64(), qt.CmpEquals(cmpopts.EquateApprox(0, 0.1)), format.duration)
289+
}
290+
291+
// Test conversion to OGG if not already OGG
292+
if format.contentType != "audio/ogg" {
293+
convertedToOGG, err := audioOriginal.Convert("audio/ogg")
294+
c.Assert(err, qt.IsNil)
295+
c.Assert(convertedToOGG.ContentType().String(), qt.Equals, "audio/ogg")
296+
c.Assert(convertedToOGG.Duration().Float64(), qt.CmpEquals(cmpopts.EquateApprox(0, 0.1)), format.duration)
297+
}
298+
299+
// Test conversion to WAV if not already WAV
300+
if format.contentType != "audio/wav" {
301+
convertedToWAV, err := audioOriginal.Convert("audio/wav")
302+
c.Assert(err, qt.IsNil)
303+
c.Assert(convertedToWAV.ContentType().String(), qt.Equals, "audio/wav")
304+
c.Assert(convertedToWAV.Duration().Float64(), qt.CmpEquals(cmpopts.EquateApprox(0, 0.1)), format.duration)
305+
}
306+
})
307+
}
308+
}

0 commit comments

Comments
 (0)