Skip to content

Commit

Permalink
[red-knot] Infer precise types for len() calls (#14599)
Browse files Browse the repository at this point in the history
## Summary

Resolves #14598.

## Test Plan

Markdown tests.

---------

Co-authored-by: Carl Meyer <carl@astral.sh>
  • Loading branch information
InSyncWithFoo and carljm authored Dec 4, 2024
1 parent 04c887c commit 155d34b
Show file tree
Hide file tree
Showing 4 changed files with 337 additions and 13 deletions.
219 changes: 219 additions & 0 deletions crates/red_knot_python_semantic/resources/mdtest/expression/len.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
# Length (`len()`)

## Literal and constructed iterables

### Strings and bytes literals

```py
reveal_type(len("no\rmal")) # revealed: Literal[6]
reveal_type(len(r"aw stri\ng")) # revealed: Literal[10]
reveal_type(len(r"conca\t" "ena\tion")) # revealed: Literal[14]
reveal_type(len(b"ytes lite" rb"al")) # revealed: Literal[11]
reveal_type(len("𝒰𝕹🄸©🕲𝕕ℇ")) # revealed: Literal[7]

reveal_type( # revealed: Literal[7]
len(
"""foo
bar"""
)
)
reveal_type( # revealed: Literal[9]
len(
r"""foo\r
bar"""
)
)
reveal_type( # revealed: Literal[7]
len(
b"""foo
bar"""
)
)
reveal_type( # revealed: Literal[9]
len(
rb"""foo\r
bar"""
)
)
```

### Tuples

```py
reveal_type(len(())) # revealed: Literal[0]
reveal_type(len((1,))) # revealed: Literal[1]
reveal_type(len((1, 2))) # revealed: Literal[2]

# TODO: Handle constructor calls
reveal_type(len(tuple())) # revealed: int

# TODO: Handle star unpacks; Should be: Literal[0]
reveal_type(len((*[],))) # revealed: Literal[1]

# TODO: Handle star unpacks; Should be: Literal[1]
reveal_type( # revealed: Literal[2]
len(
(
*[],
1,
)
)
)

# TODO: Handle star unpacks; Should be: Literal[2]
reveal_type(len((*[], 1, 2))) # revealed: Literal[3]

# TODO: Handle star unpacks; Should be: Literal[0]
reveal_type(len((*[], *{}))) # revealed: Literal[2]
```

### Lists, sets and dictionaries

```py
reveal_type(len([])) # revealed: int
reveal_type(len([1])) # revealed: int
reveal_type(len([1, 2])) # revealed: int
reveal_type(len([*{}, *dict()])) # revealed: int

reveal_type(len({})) # revealed: int
reveal_type(len({**{}})) # revealed: int
reveal_type(len({**{}, **{}})) # revealed: int

reveal_type(len({1})) # revealed: int
reveal_type(len({1, 2})) # revealed: int
reveal_type(len({*[], 2})) # revealed: int

reveal_type(len(list())) # revealed: int
reveal_type(len(set())) # revealed: int
reveal_type(len(dict())) # revealed: int
reveal_type(len(frozenset())) # revealed: int
```

## `__len__`

The returned value of `__len__` is implicitly and recursively converted to `int`.

### Literal integers

```py
from typing import Literal

class Zero:
def __len__(self) -> Literal[0]: ...

class ZeroOrOne:
def __len__(self) -> Literal[0, 1]: ...

class ZeroOrTrue:
def __len__(self) -> Literal[0, True]: ...

class OneOrFalse:
def __len__(self) -> Literal[1] | Literal[False]: ...

class OneOrFoo:
def __len__(self) -> Literal[1, "foo"]: ...

class ZeroOrStr:
def __len__(self) -> Literal[0] | str: ...

reveal_type(len(Zero())) # revealed: Literal[0]
reveal_type(len(ZeroOrOne())) # revealed: Literal[0, 1]
reveal_type(len(ZeroOrTrue())) # revealed: Literal[0, 1]
reveal_type(len(OneOrFalse())) # revealed: Literal[0, 1]

# TODO: Emit a diagnostic
reveal_type(len(OneOrFoo())) # revealed: int

# TODO: Emit a diagnostic
reveal_type(len(ZeroOrStr())) # revealed: int
```

### Literal booleans

```py
from typing import Literal

class LiteralTrue:
def __len__(self) -> Literal[True]: ...

class LiteralFalse:
def __len__(self) -> Literal[False]: ...

reveal_type(len(LiteralTrue())) # revealed: Literal[1]
reveal_type(len(LiteralFalse())) # revealed: Literal[0]
```

### Enums

```py
from enum import Enum, auto
from typing import Literal

class SomeEnum(Enum):
AUTO = auto()
INT = 2
STR = "4"
TUPLE = (8, "16")
INT_2 = 3_2

class Auto:
def __len__(self) -> Literal[SomeEnum.AUTO]: ...

class Int:
def __len__(self) -> Literal[SomeEnum.INT]: ...

class Str:
def __len__(self) -> Literal[SomeEnum.STR]: ...

class Tuple:
def __len__(self) -> Literal[SomeEnum.TUPLE]: ...

class IntUnion:
def __len__(self) -> Literal[SomeEnum.INT, SomeEnum.INT_2]: ...

reveal_type(len(Auto())) # revealed: int
reveal_type(len(Int())) # revealed: Literal[2]
reveal_type(len(Str())) # revealed: int
reveal_type(len(Tuple())) # revealed: int
reveal_type(len(IntUnion())) # revealed: Literal[2, 32]
```

### Negative integers

```py
from typing import Literal

class Negative:
def __len__(self) -> Literal[-1]: ...

# TODO: Emit a diagnostic
reveal_type(len(Negative())) # revealed: int
```

### Wrong signature

```py
from typing import Literal

class SecondOptionalArgument:
def __len__(self, v: int = 0) -> Literal[0]: ...

class SecondRequiredArgument:
def __len__(self, v: int) -> Literal[1]: ...

# TODO: Emit a diagnostic
reveal_type(len(SecondOptionalArgument())) # revealed: Literal[0]

# TODO: Emit a diagnostic
reveal_type(len(SecondRequiredArgument())) # revealed: Literal[1]
```

### No `__len__`

```py
class NoDunderLen:
pass

# TODO: Emit a diagnostic
reveal_type(len(NoDunderLen())) # revealed: int
```
39 changes: 39 additions & 0 deletions crates/red_knot_python_semantic/resources/mdtest/unpacking.md
Original file line number Diff line number Diff line change
Expand Up @@ -267,3 +267,42 @@ reveal_type(b) # revealed: LiteralString
# TODO: Should be list[int] once support for assigning to starred expression is added
reveal_type(c) # revealed: @Todo(starred unpacking)
```

### Unicode

```py
# TODO: Add diagnostic (need more values to unpack)
(a, b) = "é"

reveal_type(a) # revealed: LiteralString
reveal_type(b) # revealed: Unknown
```

### Unicode escape (1)

```py
# TODO: Add diagnostic (need more values to unpack)
(a, b) = "\u9E6C"

reveal_type(a) # revealed: LiteralString
reveal_type(b) # revealed: Unknown
```

### Unicode escape (2)

```py
# TODO: Add diagnostic (need more values to unpack)
(a, b) = "\U0010FFFF"

reveal_type(a) # revealed: LiteralString
reveal_type(b) # revealed: Unknown
```

### Surrogates

```py
(a, b) = "\uD800\uDFFF"

reveal_type(a) # revealed: LiteralString
reveal_type(b) # revealed: LiteralString
```
89 changes: 77 additions & 12 deletions crates/red_knot_python_semantic/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1417,21 +1417,76 @@ impl<'db> Type<'db> {
}
}

/// Return the type of `len()` on a type if it is known more precisely than `int`,
/// or `None` otherwise.
///
/// In the second case, the return type of `len()` in `typeshed` (`int`)
/// is used as a fallback.
fn len(&self, db: &'db dyn Db) -> Option<Type<'db>> {
fn non_negative_int_literal<'db>(db: &'db dyn Db, ty: Type<'db>) -> Option<Type<'db>> {
match ty {
// TODO: Emit diagnostic for non-integers and negative integers
Type::IntLiteral(value) => (value >= 0).then_some(ty),
Type::BooleanLiteral(value) => Some(Type::IntLiteral(value.into())),
Type::Union(union) => {
let mut builder = UnionBuilder::new(db);
for element in union.elements(db) {
builder = builder.add(non_negative_int_literal(db, *element)?);
}
Some(builder.build())
}
_ => None,
}
}

let usize_len = match self {
Type::BytesLiteral(bytes) => Some(bytes.python_len(db)),
Type::StringLiteral(string) => Some(string.python_len(db)),
Type::Tuple(tuple) => Some(tuple.len(db)),
_ => None,
};

if let Some(usize_len) = usize_len {
return usize_len.try_into().ok().map(Type::IntLiteral);
}

let return_ty = match self.call_dunder(db, "__len__", &[*self]) {
// TODO: emit a diagnostic
CallDunderResult::MethodNotAvailable => return None,

CallDunderResult::CallOutcome(outcome) | CallDunderResult::PossiblyUnbound(outcome) => {
outcome.return_ty(db)?
}
};

non_negative_int_literal(db, return_ty)
}

/// Return the outcome of calling an object of this type.
#[must_use]
fn call(self, db: &'db dyn Db, arg_types: &[Type<'db>]) -> CallOutcome<'db> {
match self {
// TODO validate typed call arguments vs callable signature
Type::FunctionLiteral(function_type) => {
if function_type.is_known(db, KnownFunction::RevealType) {
CallOutcome::revealed(
function_type.signature(db).return_ty,
*arg_types.first().unwrap_or(&Type::Unknown),
)
} else {
CallOutcome::callable(function_type.signature(db).return_ty)
Type::FunctionLiteral(function_type) => match function_type.known(db) {
Some(KnownFunction::RevealType) => CallOutcome::revealed(
function_type.signature(db).return_ty,
*arg_types.first().unwrap_or(&Type::Unknown),
),

Some(KnownFunction::Len) => {
let normal_return_ty = function_type.signature(db).return_ty;

let [only_arg] = arg_types else {
// TODO: Emit a diagnostic
return CallOutcome::callable(normal_return_ty);
};
let len_ty = only_arg.len(db);

CallOutcome::callable(len_ty.unwrap_or(normal_return_ty))
}
}

_ => CallOutcome::callable(function_type.signature(db).return_ty),
},

// TODO annotated return type on `__new__` or metaclass `__call__`
Type::ClassLiteral(ClassLiteralType { class }) => {
Expand Down Expand Up @@ -2597,13 +2652,15 @@ pub enum KnownFunction {
ConstraintFunction(KnownConstraintFunction),
/// `builtins.reveal_type`, `typing.reveal_type` or `typing_extensions.reveal_type`
RevealType,
/// `builtins.len`
Len,
}

impl KnownFunction {
pub fn constraint_function(self) -> Option<KnownConstraintFunction> {
match self {
Self::ConstraintFunction(f) => Some(f),
Self::RevealType => None,
Self::RevealType | Self::Len => None,
}
}

Expand All @@ -2620,6 +2677,7 @@ impl KnownFunction {
"issubclass" if definition.is_builtin_definition(db) => Some(
KnownFunction::ConstraintFunction(KnownConstraintFunction::IsSubclass),
),
"len" if definition.is_builtin_definition(db) => Some(KnownFunction::Len),
_ => None,
}
}
Expand Down Expand Up @@ -3074,8 +3132,9 @@ pub struct StringLiteralType<'db> {
}

impl<'db> StringLiteralType<'db> {
pub fn len(&self, db: &'db dyn Db) -> usize {
self.value(db).len()
/// The length of the string, as would be returned by Python's `len()`.
pub fn python_len(&self, db: &'db dyn Db) -> usize {
self.value(db).chars().count()
}
}

Expand All @@ -3085,6 +3144,12 @@ pub struct BytesLiteralType<'db> {
value: Box<[u8]>,
}

impl<'db> BytesLiteralType<'db> {
pub fn python_len(&self, db: &'db dyn Db) -> usize {
self.value(db).len()
}
}

#[salsa::interned]
pub struct SliceLiteralType<'db> {
start: Option<i32>,
Expand Down
Loading

0 comments on commit 155d34b

Please sign in to comment.