Skip to content

Commit 46bb81d

Browse files
authored
Merge pull request #10676 from ethereum/cli-bytecode-comparison
Bytecode comparison via CLI interface
2 parents b552566 + 53f9a11 commit 46bb81d

12 files changed

+529
-71
lines changed

.circleci/config.yml

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -925,13 +925,19 @@ jobs:
925925
- checkout
926926
- attach_workspace:
927927
at: build
928-
- run: scripts/bytecodecompare/storebytecode.sh && cp -v report.txt bytecode-report-ubuntu.txt
928+
- run: mkdir test-cases/
929+
- run: cd test-cases && ../scripts/isolate_tests.py ../test/
930+
- run: cd test-cases && ../scripts/bytecodecompare/prepare_report.py ../build/solc/solc --interface standard-json && mv -v report.txt ../bytecode-report-ubuntu-json.txt
931+
- run: cd test-cases && ../scripts/bytecodecompare/prepare_report.py ../build/solc/solc --interface cli && mv -v report.txt ../bytecode-report-ubuntu-cli.txt
929932
- store_artifacts:
930-
path: report.txt
933+
path: bytecode-report-ubuntu-json.txt
934+
- store_artifacts:
935+
path: bytecode-report-ubuntu-cli.txt
931936
- persist_to_workspace:
932937
root: .
933938
paths:
934-
- bytecode-report-ubuntu.txt
939+
- bytecode-report-ubuntu-json.txt
940+
- bytecode-report-ubuntu-cli.txt
935941

936942
b_bytecode_osx:
937943
macos:
@@ -942,13 +948,19 @@ jobs:
942948
- checkout
943949
- attach_workspace:
944950
at: .
945-
- run: scripts/bytecodecompare/storebytecode.sh && cp -v report.txt bytecode-report-osx.txt
951+
- run: mkdir test-cases/
952+
- run: cd test-cases && ../scripts/isolate_tests.py ../test/
953+
- run: cd test-cases && ../scripts/bytecodecompare/prepare_report.py ../build/solc/solc --interface standard-json && mv -v report.txt ../bytecode-report-osx-json.txt
954+
- run: cd test-cases && ../scripts/bytecodecompare/prepare_report.py ../build/solc/solc --interface cli && mv -v report.txt ../bytecode-report-osx-cli.txt
955+
- store_artifacts:
956+
path: bytecode-report-osx-json.txt
946957
- store_artifacts:
947-
path: report.txt
958+
path: bytecode-report-osx-cli.txt
948959
- persist_to_workspace:
949960
root: .
950961
paths:
951-
- bytecode-report-osx.txt
962+
- bytecode-report-osx-json.txt
963+
- bytecode-report-osx-cli.txt
952964

953965
b_bytecode_win:
954966
executor:
@@ -961,15 +973,19 @@ jobs:
961973
- checkout
962974
- attach_workspace:
963975
at: build
964-
- run: python scripts\isolate_tests.py test\
965-
- run: python scripts\bytecodecompare\prepare_report.py build\solc\Release\solc.exe
966-
- run: cp report.txt bytecode-report-windows.txt
976+
- run: mkdir test-cases\
977+
- run: cd test-cases\ && python ..\scripts\isolate_tests.py ..\test\
978+
- run: cd test-cases\ && python ..\scripts\bytecodecompare\prepare_report.py ..\build\solc\Release\solc.exe --interface standard-json && move report.txt ..\bytecode-report-windows-json.txt
979+
- run: cd test-cases\ && python ..\scripts\bytecodecompare\prepare_report.py ..\build\solc\Release\solc.exe --interface cli && move report.txt ..\bytecode-report-windows-cli.txt
980+
- store_artifacts:
981+
path: bytecode-report-windows-json.txt
967982
- store_artifacts:
968-
path: report.txt
983+
path: bytecode-report-windows-cli.txt
969984
- persist_to_workspace:
970985
root: .
971986
paths:
972-
- bytecode-report-windows.txt
987+
- bytecode-report-windows-json.txt
988+
- bytecode-report-windows-cli.txt
973989

974990
b_bytecode_ems:
975991
docker:
@@ -980,9 +996,9 @@ jobs:
980996
- checkout
981997
- attach_workspace:
982998
at: emscripten_build/libsolc
983-
- run: scripts/bytecodecompare/storebytecode.sh && cp -v report.txt bytecode-report-emscripten.txt
999+
- run: scripts/bytecodecompare/storebytecode.sh && mv -v report.txt bytecode-report-emscripten.txt
9841000
- store_artifacts:
985-
path: report.txt
1001+
path: bytecode-report-emscripten.txt
9861002
- persist_to_workspace:
9871003
root: .
9881004
paths:
@@ -994,7 +1010,15 @@ jobs:
9941010
steps:
9951011
- attach_workspace:
9961012
at: .
997-
- run: diff --report-identical-files --from-file bytecode-report-emscripten.txt bytecode-report-ubuntu.txt bytecode-report-osx.txt bytecode-report-windows.txt
1013+
- run: |
1014+
diff --report-identical-files --from-file \
1015+
bytecode-report-emscripten.txt \
1016+
bytecode-report-ubuntu-json.txt \
1017+
bytecode-report-ubuntu-cli.txt \
1018+
bytecode-report-osx-json.txt \
1019+
bytecode-report-osx-cli.txt \
1020+
bytecode-report-windows-json.txt \
1021+
bytecode-report-windows-cli.txt
9981022
9991023
workflows:
10001024
version: 2

scripts/bytecodecompare/prepare_report.js

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,25 @@ for (const optimize of [false, true])
2424

2525
const result = JSON.parse(compiler.compile(JSON.stringify(input)))
2626

27+
let internalCompilerError = false
28+
if ('errors' in result)
29+
{
30+
for (const error of result['errors'])
31+
// JSON interface still returns contract metadata in case of an internal compiler error while
32+
// CLI interface does not. To make reports comparable we must force this case to be detected as
33+
// an error in both cases.
34+
if (['UnimplementedFeatureError', 'CompilerError', 'CodeGenerationError'].includes(error['type']))
35+
{
36+
internalCompilerError = true
37+
break
38+
}
39+
}
40+
2741
if (
2842
!('contracts' in result) ||
2943
Object.keys(result['contracts']).length === 0 ||
30-
Object.keys(result['contracts']).every(file => Object.keys(result['contracts'][file]).length === 0)
44+
Object.keys(result['contracts']).every(file => Object.keys(result['contracts'][file]).length === 0) ||
45+
internalCompilerError
3146
)
3247
// NOTE: do not exit here because this may be run on source which cannot be compiled
3348
console.log(filename + ': <ERROR>')

scripts/bytecodecompare/prepare_report.py

Lines changed: 160 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,26 @@
33
import sys
44
import subprocess
55
import json
6+
import re
67
from argparse import ArgumentParser
78
from dataclasses import dataclass
9+
from enum import Enum
810
from glob import glob
911
from pathlib import Path
12+
from tempfile import TemporaryDirectory
1013
from typing import List, Optional, Tuple, Union
1114

1215

16+
CONTRACT_SEPARATOR_PATTERN = re.compile(r'^======= (?P<file_name>.+):(?P<contract_name>[^:]+) =======$', re.MULTILINE)
17+
BYTECODE_REGEX = re.compile(r'^Binary:\n(?P<bytecode>.*)$', re.MULTILINE)
18+
METADATA_REGEX = re.compile(r'^Metadata:\n(?P<metadata>\{.*\})$', re.MULTILINE)
19+
20+
21+
class CompilerInterface(Enum):
22+
CLI = 'cli'
23+
STANDARD_JSON = 'standard-json'
24+
25+
1326
@dataclass(frozen=True)
1427
class ContractReport:
1528
contract_name: str
@@ -53,10 +66,19 @@ def load_source(path: Union[Path, str]) -> str:
5366
def parse_standard_json_output(source_file_name: Path, standard_json_output: str) -> FileReport:
5467
decoded_json_output = json.loads(standard_json_output.strip())
5568

69+
# JSON interface still returns contract metadata in case of an internal compiler error while
70+
# CLI interface does not. To make reports comparable we must force this case to be detected as
71+
# an error in both cases.
72+
internal_compiler_error = any(
73+
error['type'] in ['UnimplementedFeatureError', 'CompilerError', 'CodeGenerationError']
74+
for error in decoded_json_output.get('errors', {})
75+
)
76+
5677
if (
5778
'contracts' not in decoded_json_output or
5879
len(decoded_json_output['contracts']) == 0 or
59-
all(len(file_results) == 0 for file_name, file_results in decoded_json_output['contracts'].items())
80+
all(len(file_results) == 0 for file_name, file_results in decoded_json_output['contracts'].items()) or
81+
internal_compiler_error
6082
):
6183
return FileReport(file_name=source_file_name, contract_reports=None)
6284

@@ -74,62 +96,145 @@ def parse_standard_json_output(source_file_name: Path, standard_json_output: str
7496
return file_report
7597

7698

77-
def prepare_compiler_input(compiler_path: Path, source_file_name: Path, optimize: bool) -> Tuple[List[str], str]:
78-
json_input: dict = {
79-
'language': 'Solidity',
80-
'sources': {
81-
str(source_file_name): {'content': load_source(source_file_name)}
82-
},
83-
'settings': {
84-
'optimizer': {'enabled': optimize},
85-
'outputSelection': {'*': {'*': ['evm.bytecode.object', 'metadata']}},
86-
'modelChecker': {'engine': 'none'},
87-
}
88-
}
99+
def parse_cli_output(source_file_name: Path, cli_output: str) -> FileReport:
100+
# re.split() returns a list containing the text between pattern occurrences but also inserts the
101+
# content of matched groups in between. It also never omits the empty elements so the number of
102+
# list items is predictable (3 per match + the text before the first match)
103+
output_segments = re.split(CONTRACT_SEPARATOR_PATTERN, cli_output)
104+
assert len(output_segments) % 3 == 1
89105

90-
command_line = [str(compiler_path), '--standard-json']
91-
compiler_input = json.dumps(json_input)
106+
if len(output_segments) == 1:
107+
return FileReport(file_name=source_file_name, contract_reports=None)
92108

93-
return (command_line, compiler_input)
109+
file_report = FileReport(file_name=source_file_name, contract_reports=[])
110+
for file_name, contract_name, contract_output in zip(output_segments[1::3], output_segments[2::3], output_segments[3::3]):
111+
bytecode_match = re.search(BYTECODE_REGEX, contract_output)
112+
metadata_match = re.search(METADATA_REGEX, contract_output)
113+
114+
assert file_report.contract_reports is not None
115+
file_report.contract_reports.append(ContractReport(
116+
contract_name=contract_name,
117+
file_name=Path(file_name),
118+
bytecode=bytecode_match['bytecode'] if bytecode_match is not None else None,
119+
metadata=metadata_match['metadata'] if metadata_match is not None else None,
120+
))
94121

122+
return file_report
95123

96-
def run_compiler(compiler_path: Path, source_file_name: Path, optimize: bool) -> FileReport:
97-
(command_line, compiler_input) = prepare_compiler_input(compiler_path, Path(Path(source_file_name).name), optimize)
98124

99-
process = subprocess.run(
100-
command_line,
101-
input=compiler_input,
102-
encoding='utf8',
103-
capture_output=True,
104-
check=False,
105-
)
125+
def prepare_compiler_input(
126+
compiler_path: Path,
127+
source_file_name: Path,
128+
optimize: bool,
129+
interface: CompilerInterface
130+
) -> Tuple[List[str], str]:
131+
132+
if interface == CompilerInterface.STANDARD_JSON:
133+
json_input: dict = {
134+
'language': 'Solidity',
135+
'sources': {
136+
str(source_file_name): {'content': load_source(source_file_name)}
137+
},
138+
'settings': {
139+
'optimizer': {'enabled': optimize},
140+
'outputSelection': {'*': {'*': ['evm.bytecode.object', 'metadata']}},
141+
'modelChecker': {'engine': 'none'},
142+
}
143+
}
144+
145+
command_line = [str(compiler_path), '--standard-json']
146+
compiler_input = json.dumps(json_input)
147+
else:
148+
assert interface == CompilerInterface.CLI
106149

107-
return parse_standard_json_output(Path(source_file_name), process.stdout)
150+
compiler_options = [str(source_file_name), '--bin', '--metadata', '--model-checker-engine', 'none']
151+
if optimize:
152+
compiler_options.append('--optimize')
108153

154+
command_line = [str(compiler_path)] + compiler_options
155+
compiler_input = load_source(source_file_name)
109156

110-
def generate_report(source_file_names: List[str], compiler_path: Path):
157+
return (command_line, compiler_input)
158+
159+
160+
def run_compiler(
161+
compiler_path: Path,
162+
source_file_name: Path,
163+
optimize: bool,
164+
interface: CompilerInterface,
165+
tmp_dir: Path,
166+
) -> FileReport:
167+
168+
if interface == CompilerInterface.STANDARD_JSON:
169+
(command_line, compiler_input) = prepare_compiler_input(
170+
compiler_path,
171+
Path(source_file_name.name),
172+
optimize,
173+
interface
174+
)
175+
176+
process = subprocess.run(
177+
command_line,
178+
input=compiler_input,
179+
encoding='utf8',
180+
capture_output=True,
181+
check=False,
182+
)
183+
184+
return parse_standard_json_output(Path(source_file_name), process.stdout)
185+
else:
186+
assert interface == CompilerInterface.CLI
187+
assert tmp_dir is not None
188+
189+
(command_line, compiler_input) = prepare_compiler_input(
190+
compiler_path.absolute(),
191+
Path(source_file_name.name),
192+
optimize,
193+
interface
194+
)
195+
196+
# Create a copy that we can use directly with the CLI interface
197+
modified_source_path = tmp_dir / source_file_name.name
198+
# NOTE: newline='' disables newline conversion.
199+
# We want the file exactly as is because changing even a single byte in the source affects metadata.
200+
with open(modified_source_path, 'w', encoding='utf8', newline='') as modified_source_file:
201+
modified_source_file.write(compiler_input)
202+
203+
process = subprocess.run(
204+
command_line,
205+
cwd=tmp_dir,
206+
encoding='utf8',
207+
capture_output=True,
208+
check=False,
209+
)
210+
211+
return parse_cli_output(Path(source_file_name), process.stdout)
212+
213+
214+
def generate_report(source_file_names: List[str], compiler_path: Path, interface: CompilerInterface):
111215
with open('report.txt', mode='w', encoding='utf8', newline='\n') as report_file:
112216
for optimize in [False, True]:
113-
for source_file_name in sorted(source_file_names):
114-
try:
115-
report = run_compiler(Path(compiler_path), Path(source_file_name), optimize)
116-
report_file.write(report.format_report())
117-
except subprocess.CalledProcessError as exception:
118-
print(
119-
f"\n\nInterrupted by an exception while processing file "
120-
f"'{source_file_name}' with optimize={optimize}\n\n"
121-
f"COMPILER STDOUT:\n{exception.stdout}\n"
122-
f"COMPILER STDERR:\n{exception.stderr}\n",
123-
file=sys.stderr
124-
)
125-
raise
126-
except:
127-
print(
128-
f"\n\nInterrupted by an exception while processing file "
129-
f"'{source_file_name}' with optimize={optimize}\n",
130-
file=sys.stderr
131-
)
132-
raise
217+
with TemporaryDirectory(prefix='prepare_report-') as tmp_dir:
218+
for source_file_name in sorted(source_file_names):
219+
try:
220+
report = run_compiler(compiler_path, Path(source_file_name), optimize, interface, Path(tmp_dir))
221+
report_file.write(report.format_report())
222+
except subprocess.CalledProcessError as exception:
223+
print(
224+
f"\n\nInterrupted by an exception while processing file "
225+
f"'{source_file_name}' with optimize={optimize}\n\n"
226+
f"COMPILER STDOUT:\n{exception.stdout}\n"
227+
f"COMPILER STDERR:\n{exception.stderr}\n",
228+
file=sys.stderr
229+
)
230+
raise
231+
except:
232+
print(
233+
f"\n\nInterrupted by an exception while processing file "
234+
f"'{source_file_name}' with optimize={optimize}\n",
235+
file=sys.stderr
236+
)
237+
raise
133238

134239

135240
def commandline_parser() -> ArgumentParser:
@@ -140,6 +245,13 @@ def commandline_parser() -> ArgumentParser:
140245

141246
parser = ArgumentParser(description=script_description)
142247
parser.add_argument(dest='compiler_path', help="Solidity compiler executable")
248+
parser.add_argument(
249+
'--interface',
250+
dest='interface',
251+
default=CompilerInterface.STANDARD_JSON.value,
252+
choices=[c.value for c in CompilerInterface],
253+
help="Compiler interface to use."
254+
)
143255
return parser;
144256

145257

@@ -148,4 +260,5 @@ def commandline_parser() -> ArgumentParser:
148260
generate_report(
149261
glob("*.sol"),
150262
Path(options.compiler_path),
263+
CompilerInterface(options.interface),
151264
)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Warning: SPDX license identifier not provided in source file. Before publishing, consider adding a comment containing "SPDX-License-Identifier: <SPDX-License>" to each source file. Use "SPDX-License-Identifier: UNLICENSED" for non-open-source code. Please see https://spdx.org for more information.
2+
--> test_1c3426238b8296745d8d8bd0ff995ab65a51992b568dc7c5ce73c3f59b107825_no_assignments_sol.sol
3+
4+
Warning: Source file does not specify required compiler version! Consider adding "pragma solidity ^0.8.0;"
5+
--> test_1c3426238b8296745d8d8bd0ff995ab65a51992b568dc7c5ce73c3f59b107825_no_assignments_sol.sol
6+
7+
Error: Some immutables were read from but never assigned, possibly because of optimization.
8+

0 commit comments

Comments
 (0)