From 2e5ec9e8e00c6040467a750fbfb70d3bae4ccc70 Mon Sep 17 00:00:00 2001
From: alpharush <0xalpharush@protonmail.com>
Date: Wed, 4 Sep 2024 14:03:10 -0500
Subject: [PATCH] add function metadata to LCOV

---
 compilation/types/ast.go            | 109 ++++++++++++++++++++---
 fuzzing/coverage/source_analysis.go | 132 ++++++++++++++++++++++------
 2 files changed, 203 insertions(+), 38 deletions(-)

diff --git a/compilation/types/ast.go b/compilation/types/ast.go
index f6b21612..a224cce5 100644
--- a/compilation/types/ast.go
+++ b/compilation/types/ast.go
@@ -23,10 +23,55 @@ type Node interface {
 	GetNodeType() string
 }
 
+// FunctionDefinition is the function definition node
+type FunctionDefinition struct {
+	// NodeType represents the node type (currently we only evaluate source unit node types)
+	NodeType string `json:"nodeType"`
+	// Src is the source file for this AST
+	Src  string `json:"src"`
+	Name string `json:"name,omitempty"`
+}
+
+func (s FunctionDefinition) GetNodeType() string {
+	return s.NodeType
+}
+
+func (s FunctionDefinition) GetStart() int {
+	// 95:42:0 returns 95
+	re := regexp.MustCompile(`([0-9]*):[0-9]*:[0-9]*`)
+	startCandidates := re.FindStringSubmatch(s.Src)
+
+	if len(startCandidates) == 2 { // FindStringSubmatch includes the whole match as the first element
+		start, err := strconv.Atoi(startCandidates[1])
+		if err == nil {
+			return start
+		}
+	}
+	return -1
+}
+
+func (s FunctionDefinition) GetLength() int {
+	// 95:42:0 returns 42
+	re := regexp.MustCompile(`[0-9]*:([0-9]*):[0-9]*`)
+	endCandidates := re.FindStringSubmatch(s.Src)
+
+	if len(endCandidates) == 2 { // FindStringSubmatch includes the whole match as the first element
+		end, err := strconv.Atoi(endCandidates[1])
+		if err == nil {
+			return end
+		}
+	}
+	return -1
+}
+
 // ContractDefinition is the contract definition node
 type ContractDefinition struct {
-	// NodeType represents the AST node type (note that it will always be a contract definition)
+	// NodeType represents the node type (currently we only evaluate source unit node types)
 	NodeType string `json:"nodeType"`
+	// Nodes is a list of Nodes within the AST
+	Nodes []Node `json:"nodes"`
+	// Src is the source file for this AST
+	Src string `json:"src"`
 	// CanonicalName is the name of the contract definition
 	CanonicalName string `json:"canonicalName,omitempty"`
 	// Kind is a ContractKind that represents what type of contract definition this is (contract, interface, or library)
@@ -38,6 +83,49 @@ func (s ContractDefinition) GetNodeType() string {
 	return s.NodeType
 }
 
+func (c *ContractDefinition) UnmarshalJSON(data []byte) error {
+	// Unmarshal the top-level AST into our own representation. Defer the unmarshaling of all the individual nodes until later
+	type Alias ContractDefinition
+	aux := &struct {
+		Nodes []json.RawMessage `json:"nodes"`
+
+		*Alias
+	}{
+		Alias: (*Alias)(c),
+	}
+
+	if err := json.Unmarshal(data, &aux); err != nil {
+		return err
+	}
+
+	// Iterate through all the nodes of the contract definition
+	for _, nodeData := range aux.Nodes {
+		// Unmarshal the node data to retrieve the node type
+		var nodeType struct {
+			NodeType string `json:"nodeType"`
+		}
+		if err := json.Unmarshal(nodeData, &nodeType); err != nil {
+			return err
+		}
+
+		// Unmarshal the contents of the node based on the node type
+		switch nodeType.NodeType {
+		case "FunctionDefinition":
+			// If this is a function definition, unmarshal it
+			var functionDefinition FunctionDefinition
+			if err := json.Unmarshal(nodeData, &functionDefinition); err != nil {
+				return err
+			}
+			c.Nodes = append(c.Nodes, functionDefinition)
+		default:
+			continue
+		}
+	}
+
+	return nil
+
+}
+
 // AST is the abstract syntax tree
 type AST struct {
 	// NodeType represents the node type (currently we only evaluate source unit node types)
@@ -62,11 +150,6 @@ func (a *AST) UnmarshalJSON(data []byte) error {
 		return err
 	}
 
-	// Check if nodeType is "SourceUnit". Return early otherwise
-	if aux.NodeType != "SourceUnit" {
-		return nil
-	}
-
 	// Iterate through all the nodes of the source unit
 	for _, nodeData := range aux.Nodes {
 		// Unmarshal the node data to retrieve the node type
@@ -78,7 +161,6 @@ func (a *AST) UnmarshalJSON(data []byte) error {
 		}
 
 		// Unmarshal the contents of the node based on the node type
-		var node Node
 		switch nodeType.NodeType {
 		case "ContractDefinition":
 			// If this is a contract definition, unmarshal it
@@ -86,14 +168,21 @@ func (a *AST) UnmarshalJSON(data []byte) error {
 			if err := json.Unmarshal(nodeData, &contractDefinition); err != nil {
 				return err
 			}
-			node = contractDefinition
+			a.Nodes = append(a.Nodes, contractDefinition)
+
+		case "FunctionDefinition":
+			// If this is a function definition, unmarshal it
+			var functionDefinition FunctionDefinition
+			if err := json.Unmarshal(nodeData, &functionDefinition); err != nil {
+				return err
+			}
+			a.Nodes = append(a.Nodes, functionDefinition)
+
 		// TODO: Add cases for other node types as needed
 		default:
 			continue
 		}
 
-		// Append the node
-		a.Nodes = append(a.Nodes, node)
 	}
 
 	return nil
diff --git a/fuzzing/coverage/source_analysis.go b/fuzzing/coverage/source_analysis.go
index 490d5977..0402dee7 100644
--- a/fuzzing/coverage/source_analysis.go
+++ b/fuzzing/coverage/source_analysis.go
@@ -2,6 +2,7 @@ package coverage
 
 import (
 	"bytes"
+	"encoding/json"
 	"fmt"
 	"sort"
 
@@ -68,7 +69,7 @@ func (s *SourceAnalysis) GenerateLCOVReport() string {
 			if line.IsActive {
 				// DA:<line number>,<execution count>
 				if line.IsCovered {
-					buffer.WriteString(fmt.Sprintf("DA:%d,%d\n", idx+1, 1))
+					buffer.WriteString(fmt.Sprintf("DA:%d,%d\n", idx+1, line.SuccessHitCount))
 					linesHit++
 				} else {
 					buffer.WriteString(fmt.Sprintf("DA:%d,%d\n", idx+1, 0))
@@ -76,11 +77,40 @@ func (s *SourceAnalysis) GenerateLCOVReport() string {
 				linesInstrumented++
 			}
 		}
-		// LH:<number of lines with a non\-zero execution count>
-		// buffer.WriteString(fmt.Sprintf("LH:%d", linesHit))
-		// LF:<number of instrumented lines>
-		// buffer.WriteString(fmt.Sprintf("LF:%d", linesInstrumented))
+		// FN:<line number>,<function name>
+		// FNDA:<execution count>,<function name>
+		for _, fn := range file.Functions {
+			byteStart := fn.GetStart()
+			length := fn.GetLength()
+
+			startLine := sort.Search(len(file.CumulativeOffsetByLine), func(i int) bool {
+				return file.CumulativeOffsetByLine[i] > byteStart
+			})
+			endLine := sort.Search(len(file.CumulativeOffsetByLine), func(i int) bool {
+				return file.CumulativeOffsetByLine[i] > byteStart+length
+			})
+
+			instrumented := 0
+			hit := 0
+			count := 0
+			for i := startLine; i < endLine; i++ {
+				// index iz zero based, line numbers are 1 based
+				if file.Lines[i-1].IsActive {
+					instrumented++
+					if file.Lines[i-1].IsCovered {
+						hit++
+					}
+				}
+			}
+			if hit == instrumented {
+				count = 1
+			}
+
+			buffer.WriteString(fmt.Sprintf("FN:%d,%s\n", startLine, fn.Name))
+			buffer.WriteString(fmt.Sprintf("FNDA:%d,%s\n", count, fn.Name))
+		}
 	}
+
 	buffer.WriteString("end_of_record\n")
 	return buffer.String()
 }
@@ -90,8 +120,14 @@ type SourceFileAnalysis struct {
 	// Path describes the file path of the source file. This is kept here for access during report generation.
 	Path string
 
+	// CumulativeOffsetByLine describes the cumulative byte offset for each line in the source file.
+	// For example, for a file with 5 lines, your list might look like: [0, 45, 98, 132, 189], where each number is the cumulative byte offset at the beginning of each line.
+	CumulativeOffsetByLine []int
+
 	// Lines describes information about a given source line and its coverage.
 	Lines []*SourceLineAnalysis
+
+	Functions []*types.FunctionDefinition
 }
 
 // ActiveLineCount returns the count of lines that are marked executable/active within the source file.
@@ -160,13 +196,50 @@ func AnalyzeSourceCoverage(compilations []types.Compilation, coverageMaps *Cover
 				return nil, fmt.Errorf("could not perform source code analysis, code was not cached for '%v'", sourcePath)
 			}
 
+			lines, cumulativeOffset := parseSourceLines(compilation.SourceCode[sourcePath])
+			funcs := make([]*types.FunctionDefinition, 0)
+
+			var ast types.AST
+			b, err := json.Marshal(compilation.SourcePathToArtifact[sourcePath].Ast)
+			if err != nil {
+				return nil, fmt.Errorf("could not encode AST from sources: %v", err)
+			}
+			err = json.Unmarshal(b, &ast)
+			if err != nil {
+				return nil, fmt.Errorf("could not parse AST from sources: %v", err)
+			}
+
+			for _, node := range ast.Nodes {
+
+				if node.GetNodeType() == "FunctionDefinition" {
+					fn := node.(types.FunctionDefinition)
+					funcs = append(funcs, &fn)
+				}
+				if node.GetNodeType() == "ContractDefinition" {
+					contract := node.(types.ContractDefinition)
+					if contract.Kind == types.ContractKindInterface {
+						continue
+					}
+					for _, subNode := range contract.Nodes {
+						if subNode.GetNodeType() == "FunctionDefinition" {
+							fn := subNode.(types.FunctionDefinition)
+							funcs = append(funcs, &fn)
+						}
+					}
+				}
+
+			}
+
 			// Obtain the parsed source code lines for this source.
 			if _, ok := sourceAnalysis.Files[sourcePath]; !ok {
 				sourceAnalysis.Files[sourcePath] = &SourceFileAnalysis{
-					Path:  sourcePath,
-					Lines: parseSourceLines(compilation.SourceCode[sourcePath]),
+					Path:                   sourcePath,
+					CumulativeOffsetByLine: cumulativeOffset,
+					Lines:                  lines,
+					Functions:              funcs,
 				}
 			}
+
 		}
 	}
 
@@ -261,25 +334,26 @@ func analyzeContractSourceCoverage(compilation types.Compilation, sourceAnalysis
 		// Obtain the source file this element maps to.
 		if sourceFile, ok := sourceAnalysis.Files[sourcePath]; ok {
 			// Mark all lines which fall within this range.
-			matchedSourceLine := false
-			for _, sourceLine := range sourceFile.Lines {
-				// Check if the line is within range
-				if sourceMapElement.Offset >= sourceLine.Start && sourceMapElement.Offset < sourceLine.End {
-					// Mark the line active/executable.
-					sourceLine.IsActive = true
-
-					// Set its coverage state and increment hit counts
-					sourceLine.SuccessHitCount += succHitCount
-					sourceLine.RevertHitCount += revertHitCount
-					sourceLine.IsCovered = sourceLine.IsCovered || sourceLine.SuccessHitCount > 0
-					sourceLine.IsCoveredReverted = sourceLine.IsCoveredReverted || sourceLine.RevertHitCount > 0
-
-					// Indicate we matched a source line, so when we stop matching sequentially, we know we can exit
-					// early.
-					matchedSourceLine = true
-				} else if matchedSourceLine {
-					break
-				}
+			start := sourceMapElement.Offset
+
+			startLine := sort.Search(len(sourceFile.CumulativeOffsetByLine), func(i int) bool {
+				return sourceFile.CumulativeOffsetByLine[i] > start
+			})
+
+			// index iz zero based, line numbers are 1 based
+			sourceLine := sourceFile.Lines[startLine-1]
+
+			// Check if the line is within range
+			if sourceMapElement.Offset < sourceLine.End {
+				// Mark the line active/executable.
+				sourceLine.IsActive = true
+
+				// Set its coverage state and increment hit counts
+				sourceLine.SuccessHitCount += succHitCount
+				sourceLine.RevertHitCount += revertHitCount
+				sourceLine.IsCovered = sourceLine.IsCovered || sourceLine.SuccessHitCount > 0
+				sourceLine.IsCoveredReverted = sourceLine.IsCoveredReverted || sourceLine.RevertHitCount > 0
+
 			}
 		} else {
 			return fmt.Errorf("could not perform source code analysis, missing source '%v'", sourcePath)
@@ -328,10 +402,11 @@ func filterSourceMaps(compilation types.Compilation, sourceMap types.SourceMap)
 
 // parseSourceLines splits the provided source code into SourceLineAnalysis objects.
 // Returns the SourceLineAnalysis objects.
-func parseSourceLines(sourceCode []byte) []*SourceLineAnalysis {
+func parseSourceLines(sourceCode []byte) ([]*SourceLineAnalysis, []int) {
 	// Create our lines and a variable to track where our current line start offset is.
 	var lines []*SourceLineAnalysis
 	var lineStart int
+	var cumulativeOffset []int
 
 	// Split the source code on new line characters
 	sourceCodeLinesBytes := bytes.Split(sourceCode, []byte("\n"))
@@ -347,9 +422,10 @@ func parseSourceLines(sourceCode []byte) []*SourceLineAnalysis {
 			IsCovered:         false,
 			IsCoveredReverted: false,
 		})
+		cumulativeOffset = append(cumulativeOffset, int(lineStart))
 		lineStart = lineEnd
 	}
 
 	// Return the resulting lines
-	return lines
+	return lines, cumulativeOffset
 }