Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions evaluate/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,14 @@ func Evaluate(ctx *Context) (assessments report.AssessmentPerModelPerLanguagePer

withLoadedModel(ctx.Log, model, ctx.ProviderForModel[model], func() {
for rm := uint(0); rm < ctx.runsAtModelLevel(); rm++ {
if err := ResetTemporaryRepository(ctx.Log, temporaryRepositoryPath); err != nil {
ctx.Log.Panicf("ERROR: unable to reset temporary repository path: %s", err)
}

if ctx.Runs > 1 && ctx.RunsSequential {
ctx.Log.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID)
}

if err := ResetTemporaryRepository(ctx.Log, temporaryRepositoryPath); err != nil {
ctx.Log.Panicf("ERROR: unable to reset temporary repository path: %s", err)
}

assessment, ps, err := Repository(ctx.Log, ctx.ResultPath, model, language, temporaryRepositoryPath, repositoryPath)
assessments[model][language][repositoryPath].Add(assessment)
if err != nil {
Expand Down
48 changes: 26 additions & 22 deletions evaluate/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ import (
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/zimmski/osutil"
"github.com/zimmski/osutil/bytesutil"

"github.com/symflower/eval-dev-quality/evaluate/metrics"
metricstesting "github.com/symflower/eval-dev-quality/evaluate/metrics/testing"
"github.com/symflower/eval-dev-quality/evaluate/report"
"github.com/symflower/eval-dev-quality/language"
"github.com/symflower/eval-dev-quality/language/golang"
Expand All @@ -32,6 +34,26 @@ var (
ErrEmptyResponseFromModel = errors.New("empty response from model")
)

// file represents a file with path and content.
type file struct {
Path string
Content string
}

// testFiles holds common test files.
var testFiles = map[string]file{
"plain": file{
Path: "plain_test.go",
Content: bytesutil.StringTrimIndentations(`
package plain

import "testing"

func TestFunction(t *testing.T){}
`),
},
}

func TestEvaluate(t *testing.T) {
type testCase struct {
Name string
Expand Down Expand Up @@ -318,19 +340,13 @@ func TestEvaluate(t *testing.T) {
d = bytes.ReplaceAll(d, []byte("plain"), []byte("next"))
require.NoError(t, os.WriteFile(repositoryNextConfigPath, d, 0))

generateTestsForFilePlainSuccess := func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}
generateTestsForFilePlainSuccessMetrics := metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}
generateTestsForFilePlainError := errors.New("generateTestsForFile error")

generateSuccess := func(mockedModel *modeltesting.MockModel) {
mockedModel.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(generateTestsForFilePlainSuccessMetrics, nil).Run(generateTestsForFilePlainSuccess).Once()
mockedModel.RegisterGenerateSuccess(t, testFiles["plain"].Path, testFiles["plain"].Content, metricstesting.AssessmentsWithProcessingTime).Once()
}
generateError := func(mockedModel *modeltesting.MockModel) {
mockedModel.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, generateTestsForFilePlainError).Once()
mockedModel.RegisterGenerateError(generateTestsForFilePlainError).Once()
}

{
Expand Down Expand Up @@ -514,14 +530,8 @@ func TestEvaluate(t *testing.T) {
}
})
t.Run("Runs", func(t *testing.T) {
generateTestsForFilePlainSuccess := func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}
generateTestsForFilePlainSuccessMetrics := metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}
generateSuccess := func(mockedModel *modeltesting.MockModel) {
mockedModel.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(generateTestsForFilePlainSuccessMetrics, nil).Run(generateTestsForFilePlainSuccess)
mockedModel.RegisterGenerateSuccess(t, testFiles["plain"].Path, testFiles["plain"].Content, metricstesting.AssessmentsWithProcessingTime)
}
{
languageGolang := &golang.Language{}
Expand Down Expand Up @@ -628,14 +638,8 @@ func TestEvaluate(t *testing.T) {
})

t.Run("Preloading", func(t *testing.T) {
generateTestsForFilePlainSuccess := func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), "plain_test.go"), []byte("package plain\nimport \"testing\"\nfunc TestFunction(t *testing.T){}"), 0600))
}
generateTestsForFilePlainSuccessMetrics := metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}
generateSuccess := func(mockedModel *modeltesting.MockModel) {
mockedModel.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(generateTestsForFilePlainSuccessMetrics, nil).Run(generateTestsForFilePlainSuccess)
mockedModel.RegisterGenerateSuccess(t, testFiles["plain"].Path, testFiles["plain"].Content, metricstesting.AssessmentsWithProcessingTime)
}

{
Expand Down
5 changes: 5 additions & 0 deletions evaluate/metrics/testing/assessments.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,8 @@ func AssertAssessmentsEqual(t *testing.T, expected metrics.Assessments, actual m

assert.Truef(t, expected.Equal(actual), "expected:%s\nactual:%s", expected, actual)
}

// AssessmentsWithProcessingTime is an empty assessment collection with positive processing time.
var AssessmentsWithProcessingTime = metrics.Assessments{
metrics.AssessmentKeyProcessingTime: 1,
}
4 changes: 4 additions & 0 deletions evaluate/repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ func Repository(logger *log.Logger, resultPath string, model evalmodel.Model, la

repositoryAssessment = metrics.NewAssessments()
for _, filePath := range filePaths {
if err := ResetTemporaryRepository(logger, testDataPath); err != nil {
logger.Panicf("ERROR: unable to reset temporary repository path: %s", err)
}

assessments, err := model.GenerateTestsForFile(log, language, testDataPath, filePath)
if err != nil {
problems = append(problems, pkgerrors.WithMessage(err, filePath))
Expand Down
53 changes: 51 additions & 2 deletions evaluate/repository_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/symflower/eval-dev-quality/log"
"github.com/symflower/eval-dev-quality/model"
"github.com/symflower/eval-dev-quality/model/symflower"
modeltesting "github.com/symflower/eval-dev-quality/model/testing"
"github.com/symflower/eval-dev-quality/util"
)

Expand All @@ -32,7 +33,7 @@ func TestRepository(t *testing.T) {

ExpectedRepositoryAssessment metrics.Assessments
ExpectedResultFiles map[string]func(t *testing.T, filePath string, data string)
ExpectedProblems []error
ExpectedProblemContains []string
ExpectedError error
}

Expand All @@ -48,7 +49,16 @@ func TestRepository(t *testing.T) {
actualRepositoryAssessment, actualProblems, actualErr := Repository(logger, temporaryPath, tc.Model, tc.Language, temporaryRepositoryPath, tc.RepositoryPath)

metricstesting.AssertAssessmentsEqual(t, tc.ExpectedRepositoryAssessment, actualRepositoryAssessment)
assert.Equal(t, tc.ExpectedProblems, actualProblems)
if assert.Equal(t, len(tc.ExpectedProblemContains), len(actualProblems), "problems count") {
for i, expectedProblem := range tc.ExpectedProblemContains {
actualProblem := actualProblems[i]
assert.Containsf(t, actualProblem.Error(), expectedProblem, "Problem %d", i)
}
} else {
for i, problem := range actualProblems {
t.Logf("Actual problem %d:\n%+v", i, problem)
}
}
assert.Equal(t, tc.ExpectedError, actualErr)

actualResultFiles, err := osutil.FilesRecursive(temporaryPath)
Expand Down Expand Up @@ -100,6 +110,45 @@ func TestRepository(t *testing.T) {
},
},
})
t.Run("Clear repository on each task file", func(t *testing.T) {
temporaryDirectoryPath := t.TempDir()

repositoryPath := filepath.Join(temporaryDirectoryPath, "golang", "plain")
require.NoError(t, os.MkdirAll(repositoryPath, 0700))
require.NoError(t, os.WriteFile(filepath.Join(repositoryPath, "go.mod"), []byte("module plain\n\ngo 1.21.5"), 0600))
require.NoError(t, os.WriteFile(filepath.Join(repositoryPath, "taskA.go"), []byte("package plain\n\nfunc TaskA(){}"), 0600))
require.NoError(t, os.WriteFile(filepath.Join(repositoryPath, "taskB.go"), []byte("package plain\n\nfunc TaskB(){}"), 0600))

modelMock := modeltesting.NewMockModelNamed(t, "mocked-model")

// Generate invalid code for the first task.
modelMock.RegisterGenerateSuccess(t, "taskA_test.go", "does not compile", metricstesting.AssessmentsWithProcessingTime).Once()
// Generate valid code for the second task.
modelMock.RegisterGenerateSuccess(t, "taskB_test.go", "package plain\n\nimport \"testing\"\n\nfunc TestTaskB(t *testing.T){}", metricstesting.AssessmentsWithProcessingTime).Once()

validate(t, &testCase{
Name: "Plain",

Model: modelMock,
Language: &golang.Language{},
TestDataPath: temporaryDirectoryPath,
RepositoryPath: filepath.Join("golang", "plain"),

ExpectedRepositoryAssessment: metrics.Assessments{
metrics.AssessmentKeyFilesExecuted: 1,
metrics.AssessmentKeyResponseNoError: 2,
},
ExpectedProblemContains: []string{
"expected 'package', found does",
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join("mocked-model", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "Evaluating model \"mocked-model\"")
assert.Contains(t, data, "PASS: TestTaskB")
},
},
})
})
}

func TestTemporaryRepository(t *testing.T) {
Expand Down
22 changes: 21 additions & 1 deletion model/testing/helper.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
package modeltesting

import "testing"
import (
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/symflower/eval-dev-quality/evaluate/metrics"
)

// NewMockModelNamed returns a new named mocked model.
func NewMockModelNamed(t *testing.T, id string) *MockModel {
Expand All @@ -9,3 +17,15 @@ func NewMockModelNamed(t *testing.T, id string) *MockModel {

return m
}

// RegisterGenerateSuccess registers a mock call for successful generation.
func (m *MockModel) RegisterGenerateSuccess(t *testing.T, filePath string, fileContent string, assessment metrics.Assessments) *mock.Call {
return m.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(assessment, nil).Run(func(args mock.Arguments) {
require.NoError(t, os.WriteFile(filepath.Join(args.String(2), filePath), []byte(fileContent), 0600))
})
}

// RegisterGenerateError registers a mock call that errors on generation.
func (m *MockModel) RegisterGenerateError(err error) *mock.Call {
return m.On("GenerateTestsForFile", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, err)
}
2 changes: 1 addition & 1 deletion util/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func CommandWithResult(ctx context.Context, logger *log.Logger, command *Command
c.WaitDelay = 3 * time.Second // Some binaries do not like to be killed, e.g. "ollama", so we kill them after some time automatically.

if err := c.Run(); err != nil {
return writer.String(), pkgerrors.WithStack(err)
return writer.String(), pkgerrors.WithStack(pkgerrors.WithMessage(err, writer.String()))
}

return writer.String(), nil
Expand Down