Skip to content

Commit

Permalink
Work on rewriting 'Concepts' chapter.
Browse files Browse the repository at this point in the history
  • Loading branch information
rodrigogiraoserrao committed Oct 4, 2024
1 parent 69032c7 commit c3e7eca
Show file tree
Hide file tree
Showing 18 changed files with 726 additions and 389 deletions.
42 changes: 0 additions & 42 deletions docs/source/src/python/user-guide/concepts/data-structures.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# --8<-- [start:series]
import polars as pl

s = pl.Series("ints", [1, 2, 3, 4, 5])
print(s)
# --8<-- [end:series]

# --8<-- [start:series-dtype]
s1 = pl.Series("ints", [1, 2, 3, 4, 5])
s2 = pl.Series("uints", [1, 2, 3, 4, 5], dtype=pl.UInt64)
print(s1.dtype, s2.dtype)
# --8<-- [end:series-dtype]

# --8<-- [start:df]
import datetime as dt

df = pl.DataFrame(
{
"name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
"birthdate": [
dt.date(1997, 1, 10),
dt.date(1985, 2, 15),
dt.date(1983, 3, 22),
dt.date(1981, 4, 30),
],
"weight": [57.9, 72.5, 53.6, 83.1], # (kg)
"height": [1.56, 1.77, 1.65, 1.75], # (m)
}
)

print(df)
# --8<-- [end:df]

# --8<-- [start:schema]
print(df.schema)
# --8<-- [end:schema]

# --8<-- [start:head]
print(df.head(3))
# --8<-- [end:head]

# --8<-- [start:tail]
print(df.tail(3))
# --8<-- [end:tail]

# --8<-- [start:sample]
import random

random.seed(42) # For reproducibility.

print(df.sample(2))
# --8<-- [end:sample]

# --8<-- [start:describe]
print(df.describe())
# --8<-- [end:describe]
105 changes: 97 additions & 8 deletions docs/source/src/python/user-guide/concepts/expressions.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,105 @@
# --8<-- [start:expression]
import polars as pl

pl.col("weight") / (pl.col("height") ** 2)
# --8<-- [end:expression]

# --8<-- [start:print-expr]
bmi_expr = pl.col("weight") / (pl.col("height") ** 2)
print(bmi_expr)
# --8<-- [end:print-expr]

# --8<-- [start:df]
import datetime as dt

df = pl.DataFrame(
{
"foo": [1, 2, 3, None, 5],
"bar": [1.5, 0.9, 2.0, 0.0, None],
"name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
"birthdate": [
dt.date(1997, 1, 10),
dt.date(1985, 2, 15),
dt.date(1983, 3, 22),
dt.date(1981, 4, 30),
],
"weight": [57.9, 72.5, 53.6, 83.1], # (kg)
"height": [1.56, 1.77, 1.65, 1.75], # (m)
}
)

# --8<-- [start:example1]
pl.col("foo").sort().head(2)
# --8<-- [end:example1]
print(df)
# --8<-- [end:df]

# --8<-- [start:select-1]
result = df.select(
bmi=bmi_expr,
avg_bmi=bmi_expr.mean(),
ideal_max_bmi=25,
)
print(result)
# --8<-- [end:select-1]

# --8<-- [start:select-2]
result = df.select(deviation=(bmi_expr - bmi_expr.mean()) / bmi_expr.std())
print(result)
# --8<-- [end:select-2]

# --8<-- [start:with_columns-1]
result = df.with_columns(
bmi=bmi_expr,
avg_bmi=bmi_expr.mean(),
ideal_max_bmi=25,
)
print(result)
# --8<-- [end:with_columns-1]

# --8<-- [start:filter-1]
result = df.filter(
pl.col("birthdate").is_between(dt.date(1982, 12, 31), dt.date(1996, 1, 1)),
pl.col("height") > 1.7,
)
print(result)
# --8<-- [end:filter-1]

# --8<-- [start:example2]
df.select(pl.col("foo").sort().head(2), pl.col("bar").filter(pl.col("foo") == 1).sum())
# --8<-- [end:example2]
# --8<-- [start:group_by-1]
result = df.group_by((pl.col("birthdate").dt.year() // 10 * 10).alias("decade")).agg(
pl.col("name")
)
print(result)
# --8<-- [end:group_by-1]

# --8<-- [start:group_by-2]
result = df.group_by(
(pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
(pl.col("height") < 1.7).alias("short?"),
).agg(pl.col("name"))
print(result)
# --8<-- [end:group_by-2]

# --8<-- [start:group_by-3]
result = df.group_by(
(pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
(pl.col("height") < 1.7).alias("short?"),
).agg(
pl.len(),
pl.col("height").max().alias("tallest"),
pl.col("weight", "height").mean().name.prefix("avg_"),
)
print(result)
# --8<-- [end:group_by-3]

# --8<-- [start:expression-expansion-1]
expr = (pl.col(pl.Float64) * 1.1).name.suffix("*1.1")
result = df.select(expr)
print(result)
# --8<-- [end:expression-expansion-1]

# --8<-- [start:expression-expansion-2]
df2 = pl.DataFrame(
{
"ints": [1, 2, 3, 4],
"letters": ["A", "B", "C", "D"],
}
)
result = df2.select(expr)
print(result)
# --8<-- [end:expression-expansion-2]
6 changes: 3 additions & 3 deletions docs/source/src/rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ path = "user-guide/getting-started.rs"
required-features = ["polars/lazy", "polars/temporal", "polars/round_series", "polars/strings"]

[[bin]]
name = "user-guide-concepts-data-structures"
path = "user-guide/concepts/data-structures.rs"
name = "user-guide-concepts-data-types-and-structures"
path = "user-guide/concepts/data-types-and-structures.rs"

[[bin]]
name = "user-guide-concepts-contexts"
Expand All @@ -41,7 +41,7 @@ required-features = ["polars/lazy"]
[[bin]]
name = "user-guide-concepts-expressions"
path = "user-guide/concepts/expressions.rs"
required-features = ["polars/lazy"]
required-features = ["polars/lazy", "polars/temporal", "polars/is_between"]
[[bin]]
name = "user-guide-concepts-lazy-vs-eager"
path = "user-guide/concepts/lazy-vs-eager.rs"
Expand Down
51 changes: 0 additions & 51 deletions docs/source/src/rust/user-guide/concepts/data-structures.rs

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
fn main() {
// --8<-- [start:series]
use polars::prelude::*;

let s = Series::new("ints".into(), &[1, 2, 3, 4, 5]);

println!("{}", s);
// --8<-- [end:series]

// --8<-- [start:series-dtype]
let s1 = Series::new("ints".into(), &[1, 2, 3, 4, 5]);
let s2 = Series::new("uints".into(), &[1, 2, 3, 4, 5])
.cast(&DataType::UInt64) // Here, we actually cast after inference.
.unwrap();
println!("{} {}", s1.dtype(), s2.dtype()); // i32 u64
// --8<-- [end:series-dtype]

// --8<-- [start:df]
use chrono::prelude::*;

let df: DataFrame = df!(
"name" => ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
"birthdate" => [
NaiveDate::from_ymd_opt(1997, 1, 10).unwrap(),
NaiveDate::from_ymd_opt(1985, 2, 15).unwrap(),
NaiveDate::from_ymd_opt(1983, 3, 22).unwrap(),
NaiveDate::from_ymd_opt(1981, 4, 30).unwrap(),
],
"weight" => [57.9, 72.5, 53.6, 83.1], // (kg)
"height" => [1.56, 1.77, 1.65, 1.75], // (m)
)
.unwrap();
println!("{}", df);
// --8<-- [end:df]

// --8<-- [start:schema]
println!("{:?}", df.schema());
// --8<-- [end:schema]

// --8<-- [start:head]
let df_head = df.head(Some(3));

println!("{}", df_head);
// --8<-- [end:head]

// --8<-- [start:tail]
let df_tail = df.tail(Some(3));

println!("{}", df_tail);
// --8<-- [end:tail]

// --8<-- [start:sample]
let n = Series::new("".into(), &[2]);
let sampled_df = df.sample_n(&n, false, false, None).unwrap();

println!("{}", sampled_df);
// --8<-- [end:sample]

// --8<-- [start:describe]
// Not available in Rust
// --8<-- [end:describe]
}
Loading

0 comments on commit c3e7eca

Please sign in to comment.