Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
293 commits
Select commit Hold shift + click to select a range
340f92f
Remove unused import of LightweightMemoryTracker and initialize memor…
kosiew Jul 30, 2025
1204df1
Add memory profiling methods to SessionContext and enhance documentation
kosiew Jul 30, 2025
b482cc3
Fix memory profiling methods in MyUnionerContext to ensure proper ret…
kosiew Jul 30, 2025
b566cf9
Remove ExplainMemory features and related code from various modules
kosiew Jul 30, 2025
714ac4c
Merge branch 'main' into memory-16904a
kosiew Jul 31, 2025
05b5fc2
Refactor memory profiling test to compare enabled vs disabled states
kosiew Jul 31, 2025
2de502f
Fix typo in memory profiling test configuration method
kosiew Jul 31, 2025
1bd3c18
Fix memory profiling test context initialization for configuration
kosiew Jul 31, 2025
8cac4fb
Refactor memory profiling test to initialize context with configuration
kosiew Jul 31, 2025
8135ec8
Update memory profiling test to use 'on_demand' setting and adjust ov…
kosiew Jul 31, 2025
9537eb1
Fix memory profiling configuration path in test
kosiew Jul 31, 2025
aa3ce0b
Fix memory profiling configuration path and adjust overhead calculati…
kosiew Jul 31, 2025
ec26adf
fix: correct spelling of "Apache" in comments and remove unnecessary …
kosiew Jul 31, 2025
85481d4
feat: add memory profiling example with detailed usage and reporting
kosiew Jul 31, 2025
d0fb807
Revert "feat: add memory profiling example with detailed usage and re…
kosiew Jul 31, 2025
7a381d3
feat: add memory profiling example for DataFusion with comprehensive …
kosiew Jul 31, 2025
07edefb
Revert "feat: add memory profiling example for DataFusion with compre…
kosiew Jul 31, 2025
4fcc6a4
feat: add qwen comprehensive memory profiling example for DataFusion
kosiew Jul 31, 2025
3a3f5cc
Revert "feat: add qwen comprehensive memory profiling example for Dat…
kosiew Jul 31, 2025
6087e8f
feat: add memory profiling example for DataFusion (kimi)
kosiew Jul 31, 2025
0d63a88
feat: add memory profiling example for DataFusion (qwen)
kosiew Jul 31, 2025
62497b4
feat: add memory profiling example for DataFusion (codex)
kosiew Jul 31, 2025
2fd387a
feat: add memory profiling for DataFrame collect methods
kosiew Jul 31, 2025
ced0904
Revert "feat: add memory profiling for DataFrame collect methods"
kosiew Jul 31, 2025
52941fa
feat: add memory profiling to DataFrame collect methods
kosiew Aug 1, 2025
c3f5b74
feat: enhance memory profiling examples with multi-stage queries and …
kosiew Aug 1, 2025
fd9f047
feat: format memory usage output in MB for better readability
kosiew Aug 1, 2025
07312b7
feat: enhance memory profiling with detailed analysis and operator ca…
kosiew Aug 1, 2025
f1b1aa7
fix: correct formatting and improve readability in memory profiling e…
kosiew Aug 1, 2025
2e05a15
feat: implement enhanced memory profiling report with detailed analys…
kosiew Aug 1, 2025
0cd6d62
feat: implement enhanced memory profiling with detailed categorizatio…
kosiew Aug 1, 2025
a6acf76
fix: remove unused EnhancedMemoryReport import from prelude
kosiew Aug 1, 2025
4044951
feat: add global memory tracker for enhanced memory management
kosiew Aug 1, 2025
cc37d64
feat: add memory profiling example with detailed analysis and operato…
kosiew Aug 1, 2025
6e1063f
refactor: remove redundant memory profiling notes and summary from ex…
kosiew Aug 1, 2025
4ad99c7
feat: remove unused diagnostic configuration test
kosiew Aug 1, 2025
c9b04c9
fix: return None for empty memory report in CLI session context
kosiew Aug 1, 2025
ee23d74
fix: clarify memory command description in CLI
kosiew Aug 1, 2025
053745e
fix: update categorized_operators type to static str for enhanced mem…
kosiew Aug 1, 2025
885b2a1
feat: add memory tracking for incremental allocations in MemoryReserv…
kosiew Aug 1, 2025
4ce2330
fix: simplify mutex usage in memory tracker by removing redundant imp…
kosiew Aug 1, 2025
ce1954d
fix: remove unnecessary unwrap calls from mutex lock in LightweightMe…
kosiew Aug 1, 2025
78f7990
fix: add memory profiling configuration to execution settings
kosiew Aug 1, 2025
9fccada
fix: update memory profiling test to assert duration overhead within …
kosiew Aug 1, 2025
8e11a2f
fix: update memory profiling test to use a complex query for baseline…
kosiew Aug 1, 2025
24b5d3c
fix: add memory profiling integration tests to evaluate performance o…
kosiew Aug 1, 2025
7daa999
fix: remove memory profiling test for enabled vs disabled comparison
kosiew Aug 1, 2025
3342b17
fix: add memory profiling report content test to verify metrics capture
kosiew Aug 1, 2025
f40b74d
fix: update memory profiling report test to assert expected operator …
kosiew Aug 1, 2025
65329e6
fix: update memory profiling report test to include additional expect…
kosiew Aug 1, 2025
578ad5b
fix: update memory profiling report test to validate non-zero entries…
kosiew Aug 1, 2025
4ec463c
fix: improve comment clarity in memory profiling report content test
kosiew Aug 1, 2025
b23228b
test: add memory profiling report test for disabled profiling scenario
kosiew Aug 1, 2025
f760140
fix: enhance error message for non-zero memory entry assertion in pro…
kosiew Aug 1, 2025
265bb38
fix: update expected operator prefixes in memory profiling report test
kosiew Aug 1, 2025
d528496
fix: update comment to reflect accurate operator names in memory prof…
kosiew Aug 1, 2025
1b8fff0
fix: remove top consumer example as it is no longer needed
kosiew Aug 1, 2025
55e6738
fix: remove AutoSample variant from MemoryProfilingMode enum
kosiew Aug 1, 2025
1413f0d
fix: update memory report handling in documentation for clarity
kosiew Aug 1, 2025
568e19c
fix: conditionally enable Avro example in doctests based on feature flag
kosiew Aug 1, 2025
0831b3a
fix: remove outdated memory profiling status messages from EnhancedMe…
kosiew Aug 1, 2025
5456ef1
fix: add memory commands to DataFusion CLI usage documentation
kosiew Aug 1, 2025
122d5dd
fix: update memory commands in DataFusion CLI usage documentation to …
kosiew Aug 1, 2025
989a08f
fix: add memory profiling support to DataFusion CLI
kosiew Aug 1, 2025
22f0f95
fix: enable memory profiling in execute
kosiew Aug 1, 2025
d58d13f
Revert "fix: enable memory profiling in execute"
kosiew Aug 1, 2025
e3295e7
Revert "fix: add memory profiling support to DataFusion CLI"
kosiew Aug 1, 2025
b32cd2b
feat: add memory profiling support to DataFusion CLI
kosiew Aug 1, 2025
e7aacc9
Revert "feat: add memory profiling support to DataFusion CLI"
kosiew Aug 1, 2025
1655e99
feat: add memory profiling commands, tests and update documentation
kosiew Aug 1, 2025
405d323
fix(tests): update snapshot for CLI memory profiling output
kosiew Aug 1, 2025
c135750
Revert "fix(tests): update snapshot for CLI memory profiling output"
kosiew Aug 1, 2025
682aedf
```
kosiew Aug 1, 2025
9a4ff0e
fix(tests): restore snapshot for CLI memory profiling enable and show…
kosiew Aug 1, 2025
5e1cf5b
fix(tests): remove obsolete snapshot for CLI memory profiling enable …
kosiew Aug 2, 2025
9c1e3f1
fix(tests): add new snapshot for CLI memory profiling enable and show…
kosiew Aug 2, 2025
6ecbf65
fix(cli): rename memory command to memory profiling and update usage …
kosiew Aug 2, 2025
7df3a9c
fix(tests): update CLI memory command to memory profiling in integrat…
kosiew Aug 2, 2025
5cb4a5d
feat(cli): add enhanced memory report functionality and update comman…
kosiew Aug 2, 2025
dcf4f1a
feat(tests): update snapshot for memory profiling commands in CLI tests
kosiew Aug 2, 2025
e279780
Merge branch 'main' into memory-16904a
kosiew Aug 2, 2025
8e02d83
fix(docs): update memory profiling commands in README for consistency
kosiew Aug 2, 2025
9b071eb
refactor: delegate memory report logic to core SessionContext impleme…
kosiew Aug 2, 2025
5812981
refactor: remove once_cell dependency and replace with LazyLock in me…
kosiew Aug 2, 2025
fd96dc8
feat: add operator categorization and utility function for query plans
kosiew Aug 2, 2025
27f00dd
test: add unit tests for EnhancedMemoryReport categorization and memo…
kosiew Aug 2, 2025
e3efb72
refactor: reorganize EnhancedMemoryReport structure and improve opera…
kosiew Aug 2, 2025
5953b0d
fix: add missing import for Url in EnhancedMemoryReport
kosiew Aug 2, 2025
de14455
feat: add print_analysis method to EnhancedMemoryReport for CLI output
kosiew Aug 2, 2025
1965384
feat: add join operation categorization to EnhancedMemoryReport
kosiew Aug 2, 2025
a90fc3a
feat: implement MemoryUsage struct and MemoryExplain trait for memory…
kosiew Aug 2, 2025
c93b848
feat: add license headers and module documentation for memory profili…
kosiew Aug 3, 2025
79bf76e
fix fmt errors
kosiew Aug 3, 2025
fd5e5a8
refactor: remove obsolete CLI memory snapshot test
kosiew Aug 3, 2025
448f491
feat: remove blank line in Cargo.toml for cleaner formatting
kosiew Aug 3, 2025
b9cf128
refactor: replace LightweightMemoryTracker with MemoryTracker for imp…
kosiew Aug 3, 2025
5fbcea1
```
kosiew Aug 3, 2025
eb0791b
fix: remove unnecessary whitespace in row_hash.rs file
kosiew Aug 3, 2025
b1f5c69
Merge branch 'main' into memory-16904
kosiew Aug 3, 2025
4b163a2
fix clippy error
kosiew Aug 3, 2025
08eaa4a
refactor: reorganize imports in memory_profiling.rs for better readab…
kosiew Aug 3, 2025
d8f32f7
refactor: reorganize and group imports for improved clarity in sessio…
kosiew Aug 3, 2025
5d0e5f5
feat(docs): add memory profiling configuration option to user guide
kosiew Aug 3, 2025
018a593
fix: implement Default for MemoryTracker to satisfy clippy lint
kosiew Aug 3, 2025
7279659
fix: use Arc::clone for memory tracker in DataFrame and SessionContext
kosiew Aug 3, 2025
1c15b66
fix md errors
kosiew Aug 3, 2025
08e2d25
Update md docs
kosiew Aug 3, 2025
75d7d32
fix: use Arc::clone for memory tracker in DataFrame to improve memory…
kosiew Aug 3, 2025
de25176
fix: replace std::time::Instant with datafusion_common::instant::Inst…
kosiew Aug 3, 2025
6e8207d
fix: add datafusion-common dependency to Cargo.toml for example projects
kosiew Aug 3, 2025
12b9fa4
fix: add datafusion-common dependency and improve print formatting in…
kosiew Aug 3, 2025
d34e0d7
Merge branch 'main' into memory-16904
kosiew Aug 5, 2025
173486c
fix: Allow 'on' as an alias for 'enable' in MemoryProfiling command
kosiew Aug 6, 2025
30c22ae
refactor: Remove tests for large dataset creation and memory profilin…
kosiew Aug 6, 2025
bfcc17f
Remove comment of unused import of LightweightMemoryTracker
kosiew Aug 6, 2025
5aef326
refactor: Remove unnecessary blank line in tests module
kosiew Aug 6, 2025
61b827a
refactor: Remove OperatorCategory enum and categorize_operator function
kosiew Aug 6, 2025
0720d56
refactor: Remove lz4, zstd features
kosiew Aug 6, 2025
59b4b1f
Merge branch 'main' into memory-16904
kosiew Aug 6, 2025
e394307
fix: Use options_mut() to set memory profiling mode in SessionConfig
kosiew Aug 6, 2025
922cc45
refactor: Remove unused Operator import from execution context
kosiew Aug 6, 2025
3a8af55
fix: Correct case of memory profiling commands in CLI usage documenta…
kosiew Aug 6, 2025
9e06580
fix: Update memory profiling documentation and improve related code c…
kosiew Aug 6, 2025
f3aac60
feat: Enhance memory profiling with new MemoryReport struct and updat…
kosiew Aug 6, 2025
e3b731e
fix: Add missing import for clap::ValueEnum in command.rs
kosiew Aug 6, 2025
38b8061
refactor: Rename print method to print_analysis in EnhancedMemoryRepo…
kosiew Aug 6, 2025
deef59b
refactor: Replace StdMutex with parking_lot::Mutex for improved perfo…
kosiew Aug 6, 2025
5afd8f5
docs: Add documentation for print_analysis method in EnhancedMemoryRe…
kosiew Aug 6, 2025
1a112e3
fix prettier errors
kosiew Aug 6, 2025
d82ee4c
refactor: Update memory profiling mode description and fix formatting…
kosiew Aug 6, 2025
32749d6
Merge branch 'main' into memory-16904
kosiew Aug 6, 2025
9922603
feat: Implement IntoIterator for MemoryReport to enable iteration ove…
kosiew Aug 6, 2025
2909ed3
docs: Update comment for EnhancedMemoryReport to clarify its purpose
kosiew Aug 6, 2025
c58d5f6
docs: Fix formatting of memory profiling commands in README.md
kosiew Aug 6, 2025
c39916c
docs: Update memory profiling section in README.md with example and e…
kosiew Aug 6, 2025
aef5480
Merge branch 'main' into memory-16904
kosiew Aug 9, 2025
208a540
Refactor memory profiling functionality in DataFusion
kosiew Aug 9, 2025
d18838d
Add memory profiling example to demonstrate tracking and reporting me…
kosiew Aug 9, 2025
c2631df
test(cli): update memory_enable_show snapshot to reflect recorded mem…
kosiew Aug 9, 2025
a480b78
feat(memory): integrate tracked memory pool and enhance profiling met…
kosiew Aug 9, 2025
dc85ae1
fix(tests): update memory profiling snapshot to reflect accurate output
kosiew Aug 9, 2025
da2b2c0
fix(tests): format memory profiling snapshot for consistency
kosiew Aug 9, 2025
107a770
fix(metrics): standardize operator categorization to lowercase for co…
kosiew Aug 9, 2025
3e09c22
fix(docs): update memory profiling output in README for accuracy
kosiew Aug 9, 2025
dc38e85
refactor(command): reorganize imports for improved readability
kosiew Aug 9, 2025
c8ce5ed
fix(command): datafusion-cli don't store metrics in print_options
kosiew Aug 9, 2025
86db315
fix(exec): remove disable_tracking in exec_and_print
kosiew Aug 9, 2025
abeeff5
test(cli): add snapshot for memory profiling integration test
kosiew Aug 9, 2025
2519537
fix(memory): immutable print_options
kosiew Aug 9, 2025
6f6b111
fix(print_options): remove last_memory_metrics from PrintOptions
kosiew Aug 9, 2025
1d36617
fix(config): remove unnecessary blank lines in config.rs
kosiew Aug 9, 2025
f4013db
fix(dataframe): remove unnecessary blank line in cache method
kosiew Aug 9, 2025
d549fe4
fix(mod.rs): add missing newline before module declarations
kosiew Aug 9, 2025
fe101cf
fix(session_state): reorganize imports for better readability
kosiew Aug 9, 2025
2c9e4b9
fix(usage): update memory profiling output for clarity
kosiew Aug 9, 2025
368c0e5
fix(configs): format license comment for improved readability
kosiew Aug 9, 2025
7c591c3
fix fmt errors
kosiew Aug 9, 2025
66ca74d
fix(license): improve formatting of license comments for consistency
kosiew Aug 9, 2025
3a5694d
fix(command): update memory profiling command syntax for consistency
kosiew Aug 12, 2025
afa4018
fix(command): clarify memory profiling command description for better…
kosiew Aug 12, 2025
e3fb00c
fix(docs): amend README memory profiling command description for cons…
kosiew Aug 12, 2025
9cdef04
Merge branch 'main' into memory-16904a
kosiew Aug 12, 2025
0345c12
feat(memory): implement memory profiling support in CLI context
kosiew Aug 12, 2025
b08d4e2
Revert "feat(memory): implement memory profiling support in CLI context"
kosiew Aug 12, 2025
c6a7ba1
feat(memory): refactor memory profiling support in CLI context
kosiew Aug 12, 2025
dd48d0b
fix(reader): rename parameter for clarity in get_metadata function
kosiew Aug 12, 2025
40b7663
fix(metrics): update log message for clarity in print_metrics function
kosiew Aug 12, 2025
cccdd14
docs: add memory profiling top_memory_consumers tip to README and usa…
kosiew Aug 12, 2025
53f3a11
refactor(metrics): rename print_metrics to format_metrics and update …
kosiew Aug 12, 2025
5488251
test(datafusion-cli): update cli_memory_enable_show snapshot (add tra…
kosiew Aug 12, 2025
b7014f9
fix(docs): correct memory usage label from 'Other' to 'Repartition' i…
kosiew Aug 12, 2025
73b24a4
refactor(metrics): reorganize import statements for clarity
kosiew Aug 12, 2025
292bdbb
docs(cli): fix memory profiling tip formatting in CLI usage docs
kosiew Aug 12, 2025
ecd135f
fix(reader): rename parameter for clarity in get_metadata function
kosiew Aug 12, 2025
5d83cb8
fix(cli): update usage message for memory profiling command
kosiew Aug 13, 2025
16d5072
fix(cli): loosen memory profiling output by replacing dynamic values …
kosiew Aug 13, 2025
11d8b28
fix(cli): update memory profiling output to use placeholders for clarity
kosiew Aug 13, 2025
c78b834
fix(reader): rename parameter for clarity in get_metadata function
kosiew Aug 13, 2025
ac15b2a
feat(memory): add peak size method to MemoryReservation and update di…
kosiew Aug 13, 2025
e4e1cee
feat(cli): enhance memory pool management in ReplSessionContext
kosiew Aug 13, 2025
3e8feee
refactor: remove memory profiling documentation from SessionContext
kosiew Aug 13, 2025
4829d01
refactor: simplify Avro example documentation condition
kosiew Aug 13, 2025
51a8a76
refactor: simplify operator category matching using a lookup table
kosiew Aug 13, 2025
62edbdb
refactor: add additional operator categories for memory usage reporting
kosiew Aug 13, 2025
47e9de3
refactor: remove unnecessary pool tracking enabling in exec_and_print
kosiew Aug 13, 2025
6b5ffdb
refactor: change operator_category function visibility to public
kosiew Aug 13, 2025
cb3dbc2
refactor: update memory profiling test to include larger dataset and …
kosiew Aug 13, 2025
d860a5a
refactor: update memory profiling test to include additional query an…
kosiew Aug 13, 2025
7b7c984
refactor: update memory profiling command usage message to include al…
kosiew Aug 13, 2025
a55fdc3
refactor: increase default value for top memory consumers from 3 to 5
kosiew Aug 13, 2025
be5c1b4
refactor: add CLI tests for memory profiling show commands
kosiew Aug 13, 2025
5e9dde7
Merge branch 'main' into memory-16904
kosiew Aug 13, 2025
062a19c
refactor: clean up imports in main.rs by removing redundant entry
kosiew Aug 13, 2025
6d88978
fix: update context reference for registering `metadata_cache` UDTF i…
kosiew Aug 13, 2025
caf1863
fix: update parameter name for options in get_metadata method in Cach…
kosiew Aug 13, 2025
fba5cb1
remove \memory_profiling show
kosiew Aug 19, 2025
3cf46f1
Merge branch 'main' into memory-16904
kosiew Aug 19, 2025
b51998a
refactor: simplify memory profiling logic in ReplSessionContext
kosiew Aug 19, 2025
78efcf5
test: add CLI snapshot tests for various output formats and memory pr…
kosiew Aug 19, 2025
45bc14d
test: update CLI memory profiling test input for improved formatting
kosiew Aug 19, 2025
d44798b
style: reorder import statements for improved readability
kosiew Aug 19, 2025
d80f994
refactor: remove mut base_pool
kosiew Aug 19, 2025
5683217
test: enhance backtrace output verification to include planning error…
kosiew Aug 19, 2025
b6765ab
refactor: streamline memory pool implementation and enhance Arc<T> su…
kosiew Aug 19, 2025
907c6af
refactor: simplify memory profiling logic in ReplSessionContext
kosiew Aug 19, 2025
921be60
refactor: update memory profiling command to toggle state without arg…
kosiew Aug 19, 2025
8224541
Merge branch 'main' into memory-16904
kosiew Aug 19, 2025
19e2fb8
docs: clarify memory profiling command as a toggle in CLI usage
kosiew Aug 19, 2025
6db9339
docs: improve documentation for MemoryPool implementation with Arc<T>
kosiew Aug 19, 2025
be23d39
fix(tests): update AWS region auto resolution snapshots to include me…
kosiew Aug 19, 2025
28559b3
docs: add tip for memory profiling requirement in CLI usage documenta…
kosiew Aug 19, 2025
27f85d3
fix(tests): update snapshot paths and add AWS environment variables
kosiew Aug 19, 2025
660899b
docs: clarify memory profiling toggle note in CLI usage documentation
kosiew Aug 19, 2025
333e5da
prettier config docs
kosiew Aug 19, 2025
eb33d21
fix(memory): update MemoryPool implementation for Arc<dyn MemoryPool>
kosiew Aug 19, 2025
d847244
Merge branch 'main' into memory-16904
kosiew Aug 19, 2025
d47fcec
refactor(session_state): reorganize use statements for improved reada…
kosiew Aug 21, 2025
a492a67
refactor(dataframe): simplify collect method by removing unnecessary …
kosiew Aug 21, 2025
896d16d
refactor(command): remove FromStr implementation for MemoryProfilingC…
kosiew Aug 21, 2025
20eec41
refactor(docs): move memory profiling instructions from README to usage
kosiew Aug 21, 2025
fe1b656
refactor(command): update MemoryProfiling command to disallow arguments
kosiew Aug 21, 2025
ab1aaab
refactor(memory): disable memory profiling by default and update rela…
kosiew Aug 21, 2025
03ea519
refactor(memory): streamline memory pool initialization and remove un…
kosiew Aug 21, 2025
efabb40
refactor(cli): simplify memory profiling command syntax and update do…
kosiew Aug 21, 2025
88c6343
Added a new tracking_enabled method to the TrackedPool trait and impl…
kosiew Aug 21, 2025
b6d07fa
Merge branch 'main' into memory-16904
kosiew Aug 21, 2025
e67f1d9
Merge branch 'main' into memory-16904
kosiew Aug 21, 2025
16382fc
fix: update AWS endpoint in snapshot tests for consistency
kosiew Aug 21, 2025
07bd009
style: reorder and format use statements for improved readability
kosiew Aug 21, 2025
df45d18
style: reorder and format use statements for improved readability
kosiew Aug 21, 2025
e055441
refactor(cli): simplify memory profiling command syntax and update do…
kosiew Aug 21, 2025
b791042
Merge branch 'main' into memory-16904
kosiew Aug 21, 2025
c161355
Merge branch 'main' into memory-16904
kosiew Aug 21, 2025
38137b1
fix: update AWS endpoint in snapshot tests for consistency
kosiew Aug 21, 2025
84e3458
style: reorder and format use statements for improved readability
kosiew Aug 21, 2025
5c387e4
Merge branch 'memory-16904-toggle' into memory-16904
kosiew Aug 21, 2025
63de515
fix: simplify memory profiling commands in cli_memory_disable_stops_r…
kosiew Aug 21, 2025
c3d4382
Merge branch 'main' into memory-16904
kosiew Aug 25, 2025
5b08791
Remove unrelated changes and tidy-up
kosiew Aug 25, 2025
fe8f856
fix(docs): update default value for top memory consumers in CLI usage…
kosiew Aug 25, 2025
9dd2518
feat(tests): add memory profiling command to top memory consumers test
kosiew Aug 26, 2025
0a377c0
feat(examples): enhance memory profiling example with per-consumer tr…
kosiew Aug 26, 2025
3d00ee2
fix test_cli_top_memory_consumers
kosiew Aug 26, 2025
f1cf29e
amend filter for backtrace
kosiew Aug 26, 2025
e09db5f
Merge branch 'main' into memory-16904
kosiew Aug 26, 2025
a7b04cc
Merge branch 'main' into memory-16904
kosiew Aug 31, 2025
a064cf0
fix(tests): update memory consumer regex to match optional peak memor…
kosiew Sep 1, 2025
b4b852a
Merge branch 'main' into memory-16904
alamb Sep 5, 2025
e049ebc
Merge branch 'main' into memory-16904
kosiew Sep 6, 2025
ea0f65d
Merge branch 'memory-16904' of github.com:kosiew/datafusion into memo…
kosiew Sep 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ datafusion = { workspace = true, features = [
"unicode_expressions",
"compression",
] }
datafusion-execution = { workspace = true }
dirs = "6.0.0"
env_logger = { workspace = true }
futures = { workspace = true }
Expand Down
123 changes: 108 additions & 15 deletions datafusion-cli/src/cli_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,38 @@ use std::sync::Arc;
use datafusion::{
dataframe::DataFrame,
error::DataFusionError,
execution::{context::SessionState, TaskContext},
execution::{context::SessionState, memory_pool::TrackedPool, TaskContext},
logical_expr::LogicalPlan,
prelude::SessionContext,
};
use object_store::ObjectStore;

use crate::object_storage::{AwsOptions, GcpOptions};

/// Registers table option extensions based on the provided URL scheme.
///
/// Supported schemes are:
/// * `s3`, `oss`, `cos` - registers [`AwsOptions`]
/// * `gs`, `gcs` - registers [`GcpOptions`]
///
/// Any other scheme is ignored.
pub fn register_table_options_from_scheme(ctx: &SessionContext, scheme: &str) {
match scheme {
// For Amazon S3 or Alibaba Cloud OSS
"s3" | "oss" | "cos" => {
// Register AWS specific table options in the session context:
ctx.register_table_options_extension(AwsOptions::default())
}
// For Google Cloud Storage
"gs" | "gcs" => {
// Register GCP specific table options in the session context:
ctx.register_table_options_extension(GcpOptions::default())
}
// For unsupported schemes, do nothing:
_ => {}
}
}

#[async_trait::async_trait]
/// The CLI session context trait provides a way to have a session context that can be used with datafusion's CLI code.
pub trait CliSessionContext {
Expand All @@ -52,6 +76,19 @@ pub trait CliSessionContext {
&self,
plan: LogicalPlan,
) -> Result<DataFrame, DataFusionError>;

/// Return true if memory profiling is enabled.
fn memory_profiling(&self) -> bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the memory profiling flag would best be stored on PrintOptions, similarly to the quiet mode flag (that suppresses execution time printing). Then you would not need to introduce so much new code and a new trait

https://github.com/apache/datafusion/blob/df45d186d34f2ac131d64e4a068d9f39b35e99c7/datafusion-cli/src/print_options.rs#L73-L72

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would require print_options to be mut because we can
toggle memory_profiling
It was moved out of print_options after a comment that print_options should not be mut for memory profiling.

false
}

/// Enable or disable memory profiling.
fn set_memory_profiling(&self, _enable: bool) {}

/// Return the tracked memory pool used for profiling, if any.
fn tracked_memory_pool(&self) -> Option<Arc<dyn TrackedPool>> {
None
}
}

#[async_trait::async_trait]
Expand All @@ -73,26 +110,82 @@ impl CliSessionContext for SessionContext {
}

fn register_table_options_extension_from_scheme(&self, scheme: &str) {
match scheme {
// For Amazon S3 or Alibaba Cloud OSS
"s3" | "oss" | "cos" => {
// Register AWS specific table options in the session context:
self.register_table_options_extension(AwsOptions::default())
}
// For Google Cloud Storage
"gs" | "gcs" => {
// Register GCP specific table options in the session context:
self.register_table_options_extension(GcpOptions::default())
}
// For unsupported schemes, do nothing:
_ => {}
register_table_options_from_scheme(self, scheme);
}

async fn execute_logical_plan(
&self,
plan: LogicalPlan,
) -> Result<DataFrame, DataFusionError> {
SessionContext::execute_logical_plan(self, plan).await
}
}

/// Session context used by the CLI with memory profiling support.
pub struct ReplSessionContext {
ctx: SessionContext,
tracked_memory_pool: Option<Arc<dyn TrackedPool>>,
}

impl ReplSessionContext {
pub fn new(
ctx: SessionContext,
tracked_memory_pool: Option<Arc<dyn TrackedPool>>,
) -> Self {
Self {
ctx,
tracked_memory_pool,
}
}
}

#[async_trait::async_trait]
impl CliSessionContext for ReplSessionContext {
fn task_ctx(&self) -> Arc<TaskContext> {
self.ctx.task_ctx()
}

fn session_state(&self) -> SessionState {
self.ctx.state()
}

fn register_object_store(
&self,
url: &url::Url,
object_store: Arc<dyn ObjectStore>,
) -> Option<Arc<dyn ObjectStore + 'static>> {
self.ctx.register_object_store(url, object_store)
}

fn register_table_options_extension_from_scheme(&self, scheme: &str) {
register_table_options_from_scheme(&self.ctx, scheme);
}

async fn execute_logical_plan(
&self,
plan: LogicalPlan,
) -> Result<DataFrame, DataFusionError> {
self.execute_logical_plan(plan).await
self.ctx.execute_logical_plan(plan).await
}

fn memory_profiling(&self) -> bool {
self.tracked_memory_pool
.as_ref()
.map(|pool| pool.tracking_enabled())
.unwrap_or(false)
}

fn set_memory_profiling(&self, enable: bool) {
if let Some(pool) = &self.tracked_memory_pool {
if enable {
pool.enable_tracking();
} else {
pool.disable_tracking();
}
}
}

fn tracked_memory_pool(&self) -> Option<Arc<dyn TrackedPool>> {
self.tracked_memory_pool.clone()
}
}
19 changes: 18 additions & 1 deletion datafusion-cli/src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ pub enum Command {
SearchFunctions(String),
QuietMode(Option<bool>),
OutputFormat(Option<String>),
MemoryProfiling,
}

pub enum OutputFormat {
Expand Down Expand Up @@ -110,6 +111,15 @@ impl Command {
}
Ok(())
}
Self::MemoryProfiling => {
let enable = !ctx.memory_profiling();
ctx.set_memory_profiling(enable);
println!(
"Memory profiling {}",
if enable { "enabled" } else { "disabled" }
);
Ok(())
}
Self::Quit => exec_err!("Unexpected quit, this should be handled outside"),
Self::ListFunctions => display_all_functions(),
Self::SearchFunctions(function) => {
Expand Down Expand Up @@ -142,11 +152,15 @@ impl Command {
Self::OutputFormat(_) => {
("\\pset [NAME [VALUE]]", "set table output option\n(format)")
}
Self::MemoryProfiling => (
"\\memory_profiling",
"toggle memory profiling (requires --top-memory-consumers N at startup for metrics)",
),
}
}
}

const ALL_COMMANDS: [Command; 9] = [
const ALL_COMMANDS: [Command; 10] = [
Command::ListTables,
Command::DescribeTableStmt(String::new()),
Command::Quit,
Expand All @@ -156,6 +170,7 @@ const ALL_COMMANDS: [Command; 9] = [
Command::SearchFunctions(String::new()),
Command::QuietMode(None),
Command::OutputFormat(None),
Command::MemoryProfiling,
];

fn all_commands_info() -> RecordBatch {
Expand Down Expand Up @@ -206,6 +221,8 @@ impl FromStr for Command {
Self::OutputFormat(Some(subcommand.to_string()))
}
("pset", None) => Self::OutputFormat(None),
("memory_profiling", None) => Self::MemoryProfiling,
("memory_profiling", Some(_)) => return Err(()),
_ => return Err(()),
})
}
Expand Down
7 changes: 7 additions & 0 deletions datafusion-cli/src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

use crate::cli_context::CliSessionContext;
use crate::helper::split_from_semicolon;
use crate::memory_metrics::format_metrics;
use crate::print_format::PrintFormat;
use crate::{
command::{Command, OutputFormat},
Expand Down Expand Up @@ -313,6 +314,12 @@ impl StatementExecutor {
)?;
reservation.free();
}
if ctx.memory_profiling() {
if let Some(pool) = ctx.tracked_memory_pool() {
let metrics = pool.consumer_metrics();
println!("{}", format_metrics(&metrics));
}
}

Ok(())
}
Expand Down
1 change: 1 addition & 0 deletions datafusion-cli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ pub mod exec;
pub mod functions;
pub mod helper;
pub mod highlighter;
pub mod memory_metrics;
pub mod object_storage;
pub mod pool_type;
pub mod print_format;
Expand Down
66 changes: 36 additions & 30 deletions datafusion-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ use std::sync::{Arc, LazyLock};
use datafusion::error::{DataFusionError, Result};
use datafusion::execution::context::SessionConfig;
use datafusion::execution::memory_pool::{
FairSpillPool, GreedyMemoryPool, MemoryPool, TrackConsumersPool,
FairSpillPool, GreedyMemoryPool, MemoryPool, TrackConsumersPool, TrackedPool,
UnboundedMemoryPool,
};
use datafusion::execution::runtime_env::RuntimeEnvBuilder;
use datafusion::prelude::SessionContext;
Expand All @@ -39,6 +40,8 @@ use datafusion_cli::{
DATAFUSION_CLI_VERSION,
};

use datafusion_cli::cli_context::ReplSessionContext;

use clap::Parser;
use datafusion::common::config_err;
use datafusion::config::ConfigOptions;
Expand Down Expand Up @@ -123,7 +126,7 @@ struct Args {
#[clap(
long,
help = "The number of top memory consumers to display when query fails due to memory exhaustion. To disable memory consumer tracking, set this value to 0",
default_value = "3"
default_value = "5"
)]
top_memory_consumers: usize,

Expand Down Expand Up @@ -174,28 +177,30 @@ async fn main_inner() -> Result<()> {
let session_config = get_session_config(&args)?;

let mut rt_builder = RuntimeEnvBuilder::new();
// set memory pool size
if let Some(memory_limit) = args.memory_limit {
// set memory pool type
let pool: Arc<dyn MemoryPool> = match args.mem_pool_type {
PoolType::Fair if args.top_memory_consumers == 0 => {
Arc::new(FairSpillPool::new(memory_limit))
}
PoolType::Fair => Arc::new(TrackConsumersPool::new(
FairSpillPool::new(memory_limit),
NonZeroUsize::new(args.top_memory_consumers).unwrap(),
)),
PoolType::Greedy if args.top_memory_consumers == 0 => {
Arc::new(GreedyMemoryPool::new(memory_limit))

// set memory pool type
let base_memory_pool: Arc<dyn MemoryPool> =
if let Some(memory_limit) = args.memory_limit {
match args.mem_pool_type {
PoolType::Fair => Arc::new(FairSpillPool::new(memory_limit)),
PoolType::Greedy => Arc::new(GreedyMemoryPool::new(memory_limit)),
}
PoolType::Greedy => Arc::new(TrackConsumersPool::new(
GreedyMemoryPool::new(memory_limit),
NonZeroUsize::new(args.top_memory_consumers).unwrap(),
)),
} else {
Arc::new(UnboundedMemoryPool::default())
};

rt_builder = rt_builder.with_memory_pool(pool)
}
let tracked_pool: Option<Arc<dyn TrackedPool>> = if args.top_memory_consumers > 0 {
let tracked = Arc::new(TrackConsumersPool::new(
base_memory_pool.clone(),
NonZeroUsize::new(args.top_memory_consumers).unwrap(),
));
tracked.disable_tracking();
rt_builder = rt_builder.with_memory_pool(tracked.clone());
Some(tracked as Arc<dyn TrackedPool>)
} else {
rt_builder = rt_builder.with_memory_pool(base_memory_pool);
None
};

// set disk limit
if let Some(disk_limit) = args.disk_limit {
Expand All @@ -208,24 +213,25 @@ async fn main_inner() -> Result<()> {
let runtime_env = rt_builder.build_arc()?;

// enable dynamic file query
let ctx = SessionContext::new_with_config_rt(session_config, runtime_env)
let session_ctx = SessionContext::new_with_config_rt(session_config, runtime_env)
.enable_url_table();
ctx.refresh_catalogs().await?;
session_ctx.refresh_catalogs().await?;
// install dynamic catalog provider that can register required object stores
ctx.register_catalog_list(Arc::new(DynamicObjectStoreCatalog::new(
ctx.state().catalog_list().clone(),
ctx.state_weak_ref(),
session_ctx.register_catalog_list(Arc::new(DynamicObjectStoreCatalog::new(
session_ctx.state().catalog_list().clone(),
session_ctx.state_weak_ref(),
)));
// register `parquet_metadata` table function to get metadata from parquet files
ctx.register_udtf("parquet_metadata", Arc::new(ParquetMetadataFunc {}));

session_ctx.register_udtf("parquet_metadata", Arc::new(ParquetMetadataFunc {}));
// register `metadata_cache` table function to get the contents of the file metadata cache
ctx.register_udtf(
session_ctx.register_udtf(
"metadata_cache",
Arc::new(MetadataCacheFunc::new(
ctx.task_ctx().runtime_env().cache_manager.clone(),
session_ctx.task_ctx().runtime_env().cache_manager.clone(),
)),
);
// wrap the SessionContext in a REPL context (adds profiling, top consumers, etc.)
let ctx = ReplSessionContext::new(session_ctx, tracked_pool);

let mut print_options = PrintOptions {
format: args.format,
Expand Down
Loading