From 2e5ec9e8e00c6040467a750fbfb70d3bae4ccc70 Mon Sep 17 00:00:00 2001 From: alpharush <0xalpharush@protonmail.com> Date: Wed, 4 Sep 2024 14:03:10 -0500 Subject: [PATCH] add function metadata to LCOV --- compilation/types/ast.go | 109 ++++++++++++++++++++--- fuzzing/coverage/source_analysis.go | 132 ++++++++++++++++++++++------ 2 files changed, 203 insertions(+), 38 deletions(-) diff --git a/compilation/types/ast.go b/compilation/types/ast.go index f6b21612..a224cce5 100644 --- a/compilation/types/ast.go +++ b/compilation/types/ast.go @@ -23,10 +23,55 @@ type Node interface { GetNodeType() string } +// FunctionDefinition is the function definition node +type FunctionDefinition struct { + // NodeType represents the node type (currently we only evaluate source unit node types) + NodeType string `json:"nodeType"` + // Src is the source file for this AST + Src string `json:"src"` + Name string `json:"name,omitempty"` +} + +func (s FunctionDefinition) GetNodeType() string { + return s.NodeType +} + +func (s FunctionDefinition) GetStart() int { + // 95:42:0 returns 95 + re := regexp.MustCompile(`([0-9]*):[0-9]*:[0-9]*`) + startCandidates := re.FindStringSubmatch(s.Src) + + if len(startCandidates) == 2 { // FindStringSubmatch includes the whole match as the first element + start, err := strconv.Atoi(startCandidates[1]) + if err == nil { + return start + } + } + return -1 +} + +func (s FunctionDefinition) GetLength() int { + // 95:42:0 returns 42 + re := regexp.MustCompile(`[0-9]*:([0-9]*):[0-9]*`) + endCandidates := re.FindStringSubmatch(s.Src) + + if len(endCandidates) == 2 { // FindStringSubmatch includes the whole match as the first element + end, err := strconv.Atoi(endCandidates[1]) + if err == nil { + return end + } + } + return -1 +} + // ContractDefinition is the contract definition node type ContractDefinition struct { - // NodeType represents the AST node type (note that it will always be a contract definition) + // NodeType represents the node type (currently we only evaluate source unit node types) NodeType string `json:"nodeType"` + // Nodes is a list of Nodes within the AST + Nodes []Node `json:"nodes"` + // Src is the source file for this AST + Src string `json:"src"` // CanonicalName is the name of the contract definition CanonicalName string `json:"canonicalName,omitempty"` // Kind is a ContractKind that represents what type of contract definition this is (contract, interface, or library) @@ -38,6 +83,49 @@ func (s ContractDefinition) GetNodeType() string { return s.NodeType } +func (c *ContractDefinition) UnmarshalJSON(data []byte) error { + // Unmarshal the top-level AST into our own representation. Defer the unmarshaling of all the individual nodes until later + type Alias ContractDefinition + aux := &struct { + Nodes []json.RawMessage `json:"nodes"` + + *Alias + }{ + Alias: (*Alias)(c), + } + + if err := json.Unmarshal(data, &aux); err != nil { + return err + } + + // Iterate through all the nodes of the contract definition + for _, nodeData := range aux.Nodes { + // Unmarshal the node data to retrieve the node type + var nodeType struct { + NodeType string `json:"nodeType"` + } + if err := json.Unmarshal(nodeData, &nodeType); err != nil { + return err + } + + // Unmarshal the contents of the node based on the node type + switch nodeType.NodeType { + case "FunctionDefinition": + // If this is a function definition, unmarshal it + var functionDefinition FunctionDefinition + if err := json.Unmarshal(nodeData, &functionDefinition); err != nil { + return err + } + c.Nodes = append(c.Nodes, functionDefinition) + default: + continue + } + } + + return nil + +} + // AST is the abstract syntax tree type AST struct { // NodeType represents the node type (currently we only evaluate source unit node types) @@ -62,11 +150,6 @@ func (a *AST) UnmarshalJSON(data []byte) error { return err } - // Check if nodeType is "SourceUnit". Return early otherwise - if aux.NodeType != "SourceUnit" { - return nil - } - // Iterate through all the nodes of the source unit for _, nodeData := range aux.Nodes { // Unmarshal the node data to retrieve the node type @@ -78,7 +161,6 @@ func (a *AST) UnmarshalJSON(data []byte) error { } // Unmarshal the contents of the node based on the node type - var node Node switch nodeType.NodeType { case "ContractDefinition": // If this is a contract definition, unmarshal it @@ -86,14 +168,21 @@ func (a *AST) UnmarshalJSON(data []byte) error { if err := json.Unmarshal(nodeData, &contractDefinition); err != nil { return err } - node = contractDefinition + a.Nodes = append(a.Nodes, contractDefinition) + + case "FunctionDefinition": + // If this is a function definition, unmarshal it + var functionDefinition FunctionDefinition + if err := json.Unmarshal(nodeData, &functionDefinition); err != nil { + return err + } + a.Nodes = append(a.Nodes, functionDefinition) + // TODO: Add cases for other node types as needed default: continue } - // Append the node - a.Nodes = append(a.Nodes, node) } return nil diff --git a/fuzzing/coverage/source_analysis.go b/fuzzing/coverage/source_analysis.go index 490d5977..0402dee7 100644 --- a/fuzzing/coverage/source_analysis.go +++ b/fuzzing/coverage/source_analysis.go @@ -2,6 +2,7 @@ package coverage import ( "bytes" + "encoding/json" "fmt" "sort" @@ -68,7 +69,7 @@ func (s *SourceAnalysis) GenerateLCOVReport() string { if line.IsActive { // DA:, if line.IsCovered { - buffer.WriteString(fmt.Sprintf("DA:%d,%d\n", idx+1, 1)) + buffer.WriteString(fmt.Sprintf("DA:%d,%d\n", idx+1, line.SuccessHitCount)) linesHit++ } else { buffer.WriteString(fmt.Sprintf("DA:%d,%d\n", idx+1, 0)) @@ -76,11 +77,40 @@ func (s *SourceAnalysis) GenerateLCOVReport() string { linesInstrumented++ } } - // LH: - // buffer.WriteString(fmt.Sprintf("LH:%d", linesHit)) - // LF: - // buffer.WriteString(fmt.Sprintf("LF:%d", linesInstrumented)) + // FN:, + // FNDA:, + for _, fn := range file.Functions { + byteStart := fn.GetStart() + length := fn.GetLength() + + startLine := sort.Search(len(file.CumulativeOffsetByLine), func(i int) bool { + return file.CumulativeOffsetByLine[i] > byteStart + }) + endLine := sort.Search(len(file.CumulativeOffsetByLine), func(i int) bool { + return file.CumulativeOffsetByLine[i] > byteStart+length + }) + + instrumented := 0 + hit := 0 + count := 0 + for i := startLine; i < endLine; i++ { + // index iz zero based, line numbers are 1 based + if file.Lines[i-1].IsActive { + instrumented++ + if file.Lines[i-1].IsCovered { + hit++ + } + } + } + if hit == instrumented { + count = 1 + } + + buffer.WriteString(fmt.Sprintf("FN:%d,%s\n", startLine, fn.Name)) + buffer.WriteString(fmt.Sprintf("FNDA:%d,%s\n", count, fn.Name)) + } } + buffer.WriteString("end_of_record\n") return buffer.String() } @@ -90,8 +120,14 @@ type SourceFileAnalysis struct { // Path describes the file path of the source file. This is kept here for access during report generation. Path string + // CumulativeOffsetByLine describes the cumulative byte offset for each line in the source file. + // For example, for a file with 5 lines, your list might look like: [0, 45, 98, 132, 189], where each number is the cumulative byte offset at the beginning of each line. + CumulativeOffsetByLine []int + // Lines describes information about a given source line and its coverage. Lines []*SourceLineAnalysis + + Functions []*types.FunctionDefinition } // ActiveLineCount returns the count of lines that are marked executable/active within the source file. @@ -160,13 +196,50 @@ func AnalyzeSourceCoverage(compilations []types.Compilation, coverageMaps *Cover return nil, fmt.Errorf("could not perform source code analysis, code was not cached for '%v'", sourcePath) } + lines, cumulativeOffset := parseSourceLines(compilation.SourceCode[sourcePath]) + funcs := make([]*types.FunctionDefinition, 0) + + var ast types.AST + b, err := json.Marshal(compilation.SourcePathToArtifact[sourcePath].Ast) + if err != nil { + return nil, fmt.Errorf("could not encode AST from sources: %v", err) + } + err = json.Unmarshal(b, &ast) + if err != nil { + return nil, fmt.Errorf("could not parse AST from sources: %v", err) + } + + for _, node := range ast.Nodes { + + if node.GetNodeType() == "FunctionDefinition" { + fn := node.(types.FunctionDefinition) + funcs = append(funcs, &fn) + } + if node.GetNodeType() == "ContractDefinition" { + contract := node.(types.ContractDefinition) + if contract.Kind == types.ContractKindInterface { + continue + } + for _, subNode := range contract.Nodes { + if subNode.GetNodeType() == "FunctionDefinition" { + fn := subNode.(types.FunctionDefinition) + funcs = append(funcs, &fn) + } + } + } + + } + // Obtain the parsed source code lines for this source. if _, ok := sourceAnalysis.Files[sourcePath]; !ok { sourceAnalysis.Files[sourcePath] = &SourceFileAnalysis{ - Path: sourcePath, - Lines: parseSourceLines(compilation.SourceCode[sourcePath]), + Path: sourcePath, + CumulativeOffsetByLine: cumulativeOffset, + Lines: lines, + Functions: funcs, } } + } } @@ -261,25 +334,26 @@ func analyzeContractSourceCoverage(compilation types.Compilation, sourceAnalysis // Obtain the source file this element maps to. if sourceFile, ok := sourceAnalysis.Files[sourcePath]; ok { // Mark all lines which fall within this range. - matchedSourceLine := false - for _, sourceLine := range sourceFile.Lines { - // Check if the line is within range - if sourceMapElement.Offset >= sourceLine.Start && sourceMapElement.Offset < sourceLine.End { - // Mark the line active/executable. - sourceLine.IsActive = true - - // Set its coverage state and increment hit counts - sourceLine.SuccessHitCount += succHitCount - sourceLine.RevertHitCount += revertHitCount - sourceLine.IsCovered = sourceLine.IsCovered || sourceLine.SuccessHitCount > 0 - sourceLine.IsCoveredReverted = sourceLine.IsCoveredReverted || sourceLine.RevertHitCount > 0 - - // Indicate we matched a source line, so when we stop matching sequentially, we know we can exit - // early. - matchedSourceLine = true - } else if matchedSourceLine { - break - } + start := sourceMapElement.Offset + + startLine := sort.Search(len(sourceFile.CumulativeOffsetByLine), func(i int) bool { + return sourceFile.CumulativeOffsetByLine[i] > start + }) + + // index iz zero based, line numbers are 1 based + sourceLine := sourceFile.Lines[startLine-1] + + // Check if the line is within range + if sourceMapElement.Offset < sourceLine.End { + // Mark the line active/executable. + sourceLine.IsActive = true + + // Set its coverage state and increment hit counts + sourceLine.SuccessHitCount += succHitCount + sourceLine.RevertHitCount += revertHitCount + sourceLine.IsCovered = sourceLine.IsCovered || sourceLine.SuccessHitCount > 0 + sourceLine.IsCoveredReverted = sourceLine.IsCoveredReverted || sourceLine.RevertHitCount > 0 + } } else { return fmt.Errorf("could not perform source code analysis, missing source '%v'", sourcePath) @@ -328,10 +402,11 @@ func filterSourceMaps(compilation types.Compilation, sourceMap types.SourceMap) // parseSourceLines splits the provided source code into SourceLineAnalysis objects. // Returns the SourceLineAnalysis objects. -func parseSourceLines(sourceCode []byte) []*SourceLineAnalysis { +func parseSourceLines(sourceCode []byte) ([]*SourceLineAnalysis, []int) { // Create our lines and a variable to track where our current line start offset is. var lines []*SourceLineAnalysis var lineStart int + var cumulativeOffset []int // Split the source code on new line characters sourceCodeLinesBytes := bytes.Split(sourceCode, []byte("\n")) @@ -347,9 +422,10 @@ func parseSourceLines(sourceCode []byte) []*SourceLineAnalysis { IsCovered: false, IsCoveredReverted: false, }) + cumulativeOffset = append(cumulativeOffset, int(lineStart)) lineStart = lineEnd } // Return the resulting lines - return lines + return lines, cumulativeOffset }