Description
openedon Oct 15, 2019
While parsing DWARF in custom tooling, I've noticed that Rust generates 0-based columns, whereas other compilers, including LLVM-based (e.g. Clang and Swift) generate 1-based column.
This creates issues for tools that want to extract source spans, generate error messages or otherwise link to the original source location.
Let's take an example for Clang:
extern void abort();
void assert_less(int num1, int num2) {
if (num1 >= num2) {
abort();
}
}
int main() {
assert_less(10, 20);
assert_less(30, 20);
return 0;
}
This generates the following debug info (Godbolt):
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 9.0.0 (tags/RELEASE_900/final 372344)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None)
!1 = !DIFile(filename: "example.c", directory: "/home/ubuntu")
...
!7 = distinct !DISubprogram(name: "assert_less", scope: !8, file: !8, line: 3, type: !9, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!8 = !DIFile(filename: "./example.c", directory: "/home/ubuntu")
!9 = !DISubroutineType(types: !2)
!10 = !DILocation(line: 4, column: 9, scope: !7)
!11 = !DILocation(line: 4, column: 17, scope: !7)
!12 = !DILocation(line: 4, column: 14, scope: !7)
!13 = !DILocation(line: 5, column: 9, scope: !7)
!14 = !DILocation(line: 7, column: 1, scope: !7)
!15 = distinct !DISubprogram(name: "main", scope: !8, file: !8, line: 9, type: !9, scopeLine: 9, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!16 = !DILocation(line: 10, column: 5, scope: !15)
!17 = !DILocation(line: 11, column: 5, scope: !15)
!18 = !DILocation(line: 12, column: 5, scope: !15)
You can see how locations for expressions start at the first char, e.g. 5:9
for abort(...)
, 10:5
for assert_less(...)
etc.
Now let's take a Rust example:
extern {
fn abort();
}
fn assert_less(num1: i32, num2: i32) {
if num1 >= num2 {
unsafe {
abort();
}
}
}
pub fn main() {
assert_less(10, 20);
assert_less(30, 20);
}
This generates debug info (Godbolt):
...
!2 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !3, producer: "clang LLVM (rustc version 1.38.0 (625451e37 2019-09-23))", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4)
!3 = !DIFile(filename: "./example.rs", directory: "/home/ubuntu")
!4 = !{}
!5 = distinct !DISubprogram(name: "assert_less", linkageName: "_ZN7example11assert_less17h3f90e1d508137775E", scope: !6, file: !3, line: 5, type: !7, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !2, templateParams: !4, retainedNodes: !4)
!6 = !DINamespace(name: "example", scope: null)
!7 = !DISubroutineType(types: !4)
!8 = !DILocation(line: 6, column: 7, scope: !5)
!9 = !DILocation(line: 6, column: 4, scope: !5)
!10 = !DILocation(line: 8, column: 12, scope: !11)
!11 = distinct !DILexicalBlock(scope: !5, file: !3, line: 7, column: 8)
!12 = !DILocation(line: 11, column: 1, scope: !5)
!13 = distinct !DISubprogram(name: "main", linkageName: "_ZN7example4main17h3efe614eed321469E", scope: !6, file: !3, line: 13, type: !7, scopeLine: 13, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, templateParams: !4, retainedNodes: !4)
!14 = !DILocation(line: 14, column: 4, scope: !13)
!15 = !DILocation(line: 15, column: 4, scope: !13)
!16 = !DILocation(line: 16, column: 1, scope: !13)
You can see how locations for beginning of the line are still starting at column 1
like in Clang, but locations for various expressions are like 8:12
for abort
, 14:4
for assert_less
, etc. - that is, starting right before the expression rather than at its first character.
I wasn't sure which one is correct, so I also checked Swift compiler (it does the same as Clang) and GCC (it turns out not to implement column information yet).
I also looked at LLVM docs, and their examples for debug information also use 1-based columns, pointing at the first char of an expression: https://llvm.org/docs/SourceLevelDebugging.html
Finally, I checked the DWARF spec, and, while I couldn't find exactly how columns are supposed to be represented for expressions, I found this for declarations and expect it to be true / consistent for other items as well:
The value of the DW_AT_decl_line attribute represents the source line number at which the first character of the identifier of the declared object appears. The value 0 indicates that no source line has been specified.
The value of the DW_AT_decl_column attribute represents the source column number at which the first character of the identifier of the declared object appears. The value 0 indicates that no column has been specified.
All in all, it looks like Rust is the one generating columns that violate LLVM and, subsequently, DWARF descriptions and expectations.