Open
Description
Describe the bug
- make test csv
import csv
import random
import decimal
random.seed(42)
def make_big_random_decimal():
n = random.randint(1, 1 << 53)
p = pow(10, random.randint(1, 8))
return decimal.Decimal(n) / decimal.Decimal(p)
def make_small_random_decimal():
n = random.randint(1, 100)
return decimal.Decimal(f"0.{n}")
decimals_1 = [make_big_random_decimal() for _ in range(100000)]
decimals_2 = [make_small_random_decimal() for _ in range(100000)]
SUM = decimal.Decimal(0)
with open("/tmp/decimal.csv", 'w') as f:
f = csv.writer(f)
for d1, d2 in zip(decimals_1, decimals_2):
f.writerow([d1, d2])
SUM += d1 * d2
print(SUM) // print 3318680488765741748.466457758
- calculate sum(d1*d2) in datafusion
use arrow_schema::{DataType, Field, Schema, SchemaBuilder};
use datafusion::error::Result;
use datafusion::prelude::*;
#[tokio::main]
async fn main() -> Result<()> {
let ctx = SessionContext::new();
let schema = Schema::new(vec![
Field::new("d1", DataType::Decimal128(38, 10), false),
Field::new("d2", DataType::Decimal128(38, 10), false),
]);
let schema = SchemaBuilder::from(schema.fields).finish();
let options = CsvReadOptions::new()
.schema(&schema)
.has_header(false)
.file_extension(".csv");
ctx.register_csv("tb", "/tmp/decimal.csv", options).await?;
ctx.sql("select sum(d1 * d2) from tb").await?.show().await?;
Ok(())
}
+-----------------------------------------+
| sum(tb.d1 * tb.d2) |
+-----------------------------------------+
| -84143180443642886.16728833341768211456 |
+-----------------------------------------+
To Reproduce
No response
Expected behavior
No response
Additional context
No response