Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions rust/ql/lib/codeql/rust/elements/LiteralExprExt.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/** Provides sub classes of literal expressions. */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does the Ext suffix in the filename indicate? I can't find any existing files with that suffix.

My first thought was it meant that these could be _ext_ended, but that's not the case.

Copy link
Contributor

@geoffw0 geoffw0 May 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it just means the file is an extension to the functionality provided in LiteralExpr.qll.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed.


private import internal.LiteralExprImpl

final class CharLiteralExpr = Impl::CharLiteralExpr;

final class StringLiteralExpr = Impl::StringLiteralExpr;

final class NumberLiteralExpr = Impl::NumberLiteralExpr;

final class IntegerLiteralExpr = Impl::IntegerLiteralExpr;

final class FloatLiteralExpr = Impl::FloatLiteralExpr;

final class BooleanLiteralExpr = Impl::BooleanLiteralExpr;
187 changes: 187 additions & 0 deletions rust/ql/lib/codeql/rust/elements/internal/LiteralExprImpl.qll
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,191 @@ module Impl {
)
}
}

/**
* A [character literal][1]. For example:
*
* ```rust
* 'x';
* ```
*
* [1]: https://doc.rust-lang.org/reference/tokens.html#character-literals
*/
class CharLiteralExpr extends LiteralExpr {
CharLiteralExpr() {
// todo: proper implementation
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What needs to be done to make this a "proper implementation"?

I was initially concerned about escaped quote characters, but I think with lazy matching they may just work. Testing will confirm.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic is a lot simpler than the official spec, which is why I added the comment.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest we remove the comment, or rephrase it away from "todo", if there is nothing specific we want to do here.

this.getTextValue().regexpMatch("'.*'")
}

override string getAPrimaryQlClass() { result = "CharLiteralExpr" }
}

/**
* A [string literal][1]. For example:
*
* ```rust
* "Hello, world!";
* ```
*
* [1]: https://doc.rust-lang.org/reference/tokens.html#string-literals
*/
class StringLiteralExpr extends LiteralExpr {
StringLiteralExpr() {
// todo: proper implementation
this.getTextValue().regexpMatch("r?#*\".*\"#*")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What needs to be done to make this a "proper implementation"?

I think it's OK that this matches some invalid string literals (e.g. r##"foo"#) as long as it matches all valid ones and there's no overlap with matching for literals of other types.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as for char literals, the spec is more involved, but perhaps as you say the above is good enough.

}

override string getAPrimaryQlClass() { result = "StringLiteralExpr" }
}

/**
* A number literal.
*/
abstract class NumberLiteralExpr extends LiteralExpr { }

// https://doc.rust-lang.org/reference/tokens.html#integer-literals
private module IntegerLiteralRegexs {
bindingset[s]
string paren(string s) { result = "(" + s + ")" }

string integerLiteral() {
result =
paren(paren(decLiteral()) + "|" + paren(binLiteral()) + "|" + paren(octLiteral()) + "|" +
paren(hexLiteral())) + paren(suffix()) + "?"
}

private string suffix() { result = "u8|i8|u16|i16|u32|i32|u64|i64|u128|i128|usize|isize" }

string decLiteral() { result = decDigit() + "(" + decDigit() + "|_)*" }

string binLiteral() {
result = "0b(" + binDigit() + "|_)*" + binDigit() + "(" + binDigit() + "|_)*"
}

string octLiteral() {
result = "0o(" + octDigit() + "|_)*" + octDigit() + "(" + octDigit() + "|_)*"
}

string hexLiteral() {
result = "0x(" + hexDigit() + "|_)*" + hexDigit() + "(" + hexDigit() + "|_)*"
}

string decDigit() { result = "[0-9]" }

string binDigit() { result = "[01]" }

string octDigit() { result = "[0-7]" }

string hexDigit() { result = "[0-9a-fA-F]" }
}

/**
* An [integer literal][1]. For example:
*
* ```rust
* 42;
* ```
*
* [1]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
*/
class IntegerLiteralExpr extends NumberLiteralExpr {
IntegerLiteralExpr() { this.getTextValue().regexpMatch(IntegerLiteralRegexs::integerLiteral()) }

/**
* Get the suffix of this integer literal, if any.
*
* For example, `42u8` has the suffix `u8`.
*/
string getSuffix() {
exists(string s, string reg, int last |
s = this.getTextValue() and
reg = IntegerLiteralRegexs::integerLiteral() and
last = strictcount(reg.indexOf("(")) and
result = s.regexpCapture(reg, last)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit of a scary approach, I'm not convinced it will be reliable.

Can't we just match against ".*(" + IntegerLiteralRegexs::suffix() + ")" here and grab the first group?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would really like to be able to just reuse the existing regexes; one way this could be made better is if QL supported named groups.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general the number of opening brackets isn't going to be equal to the number of capture groups, for example the regex (?is)(\\() has three ( but only one group. You might be able to stick to simpler regexs where the relationship holds but it feels a bit fragile to me.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree. Would it be possible to write the regex'es such that the thing we want is always in a fixed capture group? Then we could write something like s.regexpCapture(reg, 3) (or whatever the number it would be).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I can hard-code the numbers instead.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about a helper predicate for getting the last capture group? Something like:

bindingset[s, reg]
string regexpCaptureLast(string s, string reg) {
  exists(int i | result = s.regexpCapture(reg, i) and not exists(s.regexpCapture(reg, i + 1)))
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that will work; it will give you the last group that matches, but the suffix part is optional.

)
}

override string getAPrimaryQlClass() { result = "IntegerLiteralExpr" }
}

// https://doc.rust-lang.org/reference/tokens.html#floating-point-literals
private module FloatLiteralRegexs {
private import IntegerLiteralRegexs

string floatLiteral() {
result =
paren(decLiteral() + "\\.") + "|" + paren(floatLiteralSuffix1()) + "|" +
paren(floatLiteralSuffix2())
}

string floatLiteralSuffix1() {
result = decLiteral() + "\\." + decLiteral() + paren(suffix()) + "?"
}

string floatLiteralSuffix2() {
result =
decLiteral() + paren("\\." + decLiteral()) + "?" + paren(exponent()) + paren(suffix()) + "?"
}

string integerSuffixLiteral() {
result =
paren(paren(decLiteral()) + "|" + paren(binLiteral()) + "|" + paren(octLiteral()) + "|" +
paren(hexLiteral())) + paren(suffix())
}

private string suffix() { result = "f32|f64" }

string exponent() {
result = "(e|E)(\\+|-)?(" + decDigit() + "|_)*" + decDigit() + "(" + decDigit() + "|_)*"
}
}

/**
* A [floating-point literal][1]. For example:
*
* ```rust
* 42.0;
* ```
*
* [1]: https://doc.rust-lang.org/reference/tokens.html#floating-point-literals
*/
class FloatLiteralExpr extends NumberLiteralExpr {
FloatLiteralExpr() {
this.getTextValue()
.regexpMatch([
FloatLiteralRegexs::floatLiteral(), FloatLiteralRegexs::integerSuffixLiteral()
]) and
// E.g. `0x01_f32` is an integer, not a float
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused by the cases 0x01_f32 and 0x01_e3. I understand they're supposed to be understood as integers, I'm not sure why this is so (in the Rust language) and I'm not sure how it happens (in your QL).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is exactly the restriction on this line that makes it an integer only; otherwise it would be consider a float as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see how IntegerLiteralExpr accepts 0x01_f32 when its suffix() function doesn't include f32.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

0x01_f32 is just a HEX number, underscores are allowed as separators.

not this instanceof IntegerLiteralExpr
}

/**
* Get the suffix of this floating-point literal, if any.
*
* For example, `42.0f32` has the suffix `f32`.
*/
string getSuffix() {
exists(string s, string reg, int last |
s = this.getTextValue() and
reg =
[
FloatLiteralRegexs::floatLiteralSuffix1(), FloatLiteralRegexs::floatLiteralSuffix2(),
FloatLiteralRegexs::integerSuffixLiteral()
] and
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using the helper predicate above we could instead do a disjunction here:

Suggested change
reg =
[
FloatLiteralRegexs::floatLiteralSuffix1(), FloatLiteralRegexs::floatLiteralSuffix2(),
FloatLiteralRegexs::integerSuffixLiteral()
] and
reg =
FloatLiteralRegexs::floatLiteralSuffix1() + "|" +
FloatLiteralRegexs::floatLiteralSuffix2() + "|" +
FloatLiteralRegexs::integerSuffixLiteral()
and

last = strictcount(reg.indexOf("(")) and
result = s.regexpCapture(reg, last)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same thoughts as for IntegerLiteralExpr::getSuffix.

)
}

override string getAPrimaryQlClass() { result = "FloatLiteralExpr" }
}

/**
* A Boolean literal. Either `true` or `false`.
*/
class BooleanLiteralExpr extends LiteralExpr {
BooleanLiteralExpr() { this.getTextValue() = ["false", "true"] }

override string getAPrimaryQlClass() { result = "BooleanLiteralExpr" }
}
}
1 change: 1 addition & 0 deletions rust/ql/lib/rust.qll
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import codeql.rust.elements
import codeql.Locations
import codeql.files.FileSystem
import codeql.rust.elements.AssignmentOperation
import codeql.rust.elements.LiteralExprExt
import codeql.rust.elements.LogicalOperation
import codeql.rust.elements.AsyncBlockExpr
import codeql.rust.elements.Variable
Expand Down
42 changes: 42 additions & 0 deletions rust/ql/test/extractor-tests/literal/literal.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
charLiteral
| literal.rs:2:5:2:7 | 'a' |
| literal.rs:3:5:3:7 | 'b' |
stringLiteral
| literal.rs:8:5:8:9 | "foo" |
| literal.rs:9:5:9:10 | r"foo" |
| literal.rs:10:5:10:13 | "\\"foo\\"" |
| literal.rs:11:5:11:14 | r#""foo""# |
| literal.rs:13:5:13:18 | "foo #\\"# bar" |
| literal.rs:14:5:14:22 | r##"foo #"# bar"## |
| literal.rs:16:5:16:10 | "\\x52" |
| literal.rs:17:5:17:7 | "R" |
| literal.rs:18:5:18:8 | r"R" |
| literal.rs:19:5:19:11 | "\\\\x52" |
| literal.rs:20:5:20:11 | r"\\x52" |
integerLiteral
| literal.rs:25:5:25:7 | 123 | |
| literal.rs:26:5:26:10 | 123i32 | i32 |
| literal.rs:27:5:27:10 | 123u32 | u32 |
| literal.rs:28:5:28:11 | 123_u32 | u32 |
| literal.rs:30:5:30:8 | 0xff | |
| literal.rs:31:5:31:11 | 0xff_u8 | u8 |
| literal.rs:32:5:32:12 | 0x01_f32 | |
| literal.rs:33:5:33:11 | 0x01_e3 | |
| literal.rs:35:5:35:8 | 0o70 | |
| literal.rs:36:5:36:12 | 0o70_i16 | i16 |
| literal.rs:38:5:38:25 | 0b1111_1111_1001_0000 | |
| literal.rs:39:5:39:28 | 0b1111_1111_1001_0000i64 | i64 |
| literal.rs:40:5:40:15 | 0b________1 | |
| literal.rs:42:5:42:10 | 0usize | usize |
| literal.rs:45:5:46:10 | 128_i8 | i8 |
| literal.rs:47:5:48:10 | 256_u8 | u8 |
floatLiteral
| literal.rs:53:5:53:8 | 5f32 | f32 |
| literal.rs:55:5:55:12 | 123.0f64 | f64 |
| literal.rs:56:5:56:10 | 0.1f64 | f64 |
| literal.rs:57:5:57:10 | 0.1f32 | f32 |
| literal.rs:58:5:58:14 | 12E+99_f64 | f64 |
| literal.rs:59:18:59:19 | 2. | |
booleanLiteral
| literal.rs:63:5:63:8 | true |
| literal.rs:64:5:64:9 | false |
13 changes: 13 additions & 0 deletions rust/ql/test/extractor-tests/literal/literal.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import rust

query predicate charLiteral(CharLiteralExpr e) { any() }

query predicate stringLiteral(StringLiteralExpr e) { any() }

query predicate integerLiteral(IntegerLiteralExpr e, string suffix) {
suffix = concat(e.getSuffix())
}

query predicate floatLiteral(FloatLiteralExpr e, string suffix) { suffix = concat(e.getSuffix()) }

query predicate booleanLiteral(BooleanLiteralExpr e) { any() }
65 changes: 65 additions & 0 deletions rust/ql/test/extractor-tests/literal/literal.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
fn char_literals() {
'a';
'b';
}

fn string_literals() {
// from https://doc.rust-lang.org/reference/tokens.html#string-literals
"foo";
r"foo"; // foo
"\"foo\"";
r#""foo""#; // "foo"

"foo #\"# bar";
r##"foo #"# bar"##; // foo #"# bar

"\x52";
"R";
r"R"; // R
"\\x52";
r"\x52"; // \x52
}

fn integer_literals() {
// from https://doc.rust-lang.org/reference/tokens.html#integer-literals
123;
123i32;
123u32;
123_u32;

0xff;
0xff_u8;
0x01_f32; // integer 7986, not floating-point 1.0
0x01_e3; // integer 483, not floating-point 1000.0

0o70;
0o70_i16;

0b1111_1111_1001_0000;
0b1111_1111_1001_0000i64;
0b________1;

0usize;

// These are too big for their type, but are accepted as literal expressions.
#[allow(overflowing_literals)]
128_i8;
#[allow(overflowing_literals)]
256_u8;
}

fn float_literals() {
// This is an integer literal, accepted as a floating-point literal expression.
5f32;

123.0f64;
0.1f64;
0.1f32;
12E+99_f64;
let x: f64 = 2.;
}

fn boolean_literals() {
true;
false;
}