Skip to content

Commit

Permalink
commands will use NORMAL OOM check by default
Browse files Browse the repository at this point in the history
instead of the CONSERVATIVE OOM check as it was causing too many false positives
  • Loading branch information
jqnatividad committed Apr 22, 2023
1 parent 9e83cbb commit 7bab9f4
Show file tree
Hide file tree
Showing 9 changed files with 69 additions and 69 deletions.
6 changes: 3 additions & 3 deletions src/cmd/dedup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Common options:
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. (default: ,)
-Q, --quiet Do not print duplicate count to stderr.
--no-memcheck Do not check if there is enough memory to load the
--memcheck Check if there is enough memory to load the
entire CSV into memory.
"#;

Expand Down Expand Up @@ -76,7 +76,7 @@ struct Args {
flag_human_readable: bool,
flag_jobs: Option<usize>,
flag_quiet: bool,
flag_no_memcheck: bool,
flag_memcheck: bool,
}

pub fn run(argv: &[&str]) -> CliResult<()> {
Expand Down Expand Up @@ -140,7 +140,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
} else {
// we're loading the entire file into memory, we need to check avail mem
if let Some(path) = rconfig.path.clone() {
util::mem_file_check(&path, false, args.flag_no_memcheck)?;
util::mem_file_check(&path, false, args.flag_memcheck)?;
}

// set RAYON_NUM_THREADS for parallel sort
Expand Down
24 changes: 12 additions & 12 deletions src/cmd/frequency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Common options:
names.
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. (default: ,)
--no-memcheck Do not check if there is enough memory to load the
--memcheck Check if there is enough memory to load the
entire CSV into memory.
"#;

Expand All @@ -65,16 +65,16 @@ use crate::{

#[derive(Clone, Deserialize)]
pub struct Args {
pub arg_input: Option<String>,
pub flag_select: SelectColumns,
pub flag_limit: usize,
pub flag_asc: bool,
pub flag_no_nulls: bool,
pub flag_jobs: Option<usize>,
pub flag_output: Option<String>,
pub flag_no_headers: bool,
pub flag_delimiter: Option<Delimiter>,
pub flag_no_memcheck: bool,
pub arg_input: Option<String>,
pub flag_select: SelectColumns,
pub flag_limit: usize,
pub flag_asc: bool,
pub flag_no_nulls: bool,
pub flag_jobs: Option<usize>,
pub flag_output: Option<String>,
pub flag_no_headers: bool,
pub flag_delimiter: Option<Delimiter>,
pub flag_memcheck: bool,
}

pub fn run(argv: &[&str]) -> CliResult<()> {
Expand All @@ -83,7 +83,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {

// we're loading the entire file into memory, we need to check avail mem
if let Some(path) = rconfig.path.clone() {
util::mem_file_check(&path, false, args.flag_no_memcheck)?;
util::mem_file_check(&path, false, args.flag_memcheck)?;
}

let mut wtr = Config::new(&args.flag_output).writer()?;
Expand Down
14 changes: 7 additions & 7 deletions src/cmd/reverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Common options:
appear as the header row in the output.
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. (default: ,)
--no-memcheck Do not check if there is enough memory to load the
--memcheck Check if there is enough memory to load the
entire CSV into memory.
"#;

Expand All @@ -32,11 +32,11 @@ use crate::{

#[derive(Deserialize)]
struct Args {
arg_input: Option<String>,
flag_output: Option<String>,
flag_no_headers: bool,
flag_delimiter: Option<Delimiter>,
flag_no_memcheck: bool,
arg_input: Option<String>,
flag_output: Option<String>,
flag_no_headers: bool,
flag_delimiter: Option<Delimiter>,
flag_memcheck: bool,
}

pub fn run(argv: &[&str]) -> CliResult<()> {
Expand All @@ -49,7 +49,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {

// we're loading the entire file into memory, we need to check avail mem
if let Some(path) = rconfig.path.clone() {
util::mem_file_check(&path, false, args.flag_no_memcheck)?;
util::mem_file_check(&path, false, args.flag_memcheck)?;
}

let mut all = rdr.byte_records().collect::<Result<Vec<_>, _>>()?;
Expand Down
32 changes: 16 additions & 16 deletions src/cmd/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ Common options:
appear as the header row in the output.
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. [default: ,]
--no-memcheck Do not check if there is enough memory to load the
--memcheck Check if there is enough memory to load the
entire CSV into memory.
"#;

Expand Down Expand Up @@ -101,7 +101,7 @@ pub struct Args {
pub flag_no_headers: bool,
pub flag_delimiter: Option<Delimiter>,
pub arg_input: Option<String>,
pub flag_no_memcheck: bool,
pub flag_memcheck: bool,
}

const STDIN_CSV: &str = "stdin.csv";
Expand Down Expand Up @@ -135,7 +135,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
util::mem_file_check(
&std::path::PathBuf::from(&input_path),
false,
args.flag_no_memcheck,
args.flag_memcheck,
)?;

// we can do this directly here, since args is mutable and
Expand Down Expand Up @@ -434,7 +434,7 @@ fn get_stats_records(args: &Args) -> CliResult<(ByteRecord, Vec<Stats>, AHashMap
flag_output: None,
flag_no_headers: args.flag_no_headers,
flag_delimiter: args.flag_delimiter,
flag_no_memcheck: args.flag_no_memcheck,
flag_memcheck: args.flag_memcheck,
flag_stats_binout: None,
};

Expand Down Expand Up @@ -518,8 +518,8 @@ fn get_stats_records(args: &Args) -> CliResult<(ByteRecord, Vec<Stats>, AHashMap
let delim = delimiter.as_byte() as char;
stats_args_str = format!("{stats_args_str} --delimiter {delim}");
}
if args.flag_no_memcheck {
stats_args_str = format!("{stats_args_str} --no-memcheck");
if args.flag_memcheck {
stats_args_str = format!("{stats_args_str} --memcheck");
}
if let Some(mut jobs) = stats_args.flag_jobs {
if jobs > 2 {
Expand Down Expand Up @@ -617,16 +617,16 @@ fn get_unique_values(
) -> CliResult<AHashMap<String, Vec<String>>> {
// prepare arg for invoking cmd::frequency
let freq_args = crate::cmd::frequency::Args {
arg_input: args.arg_input.clone(),
flag_select: crate::select::SelectColumns::parse(column_select_arg).unwrap(),
flag_limit: args.flag_enum_threshold,
flag_asc: false,
flag_no_nulls: true,
flag_jobs: Some(util::njobs(args.flag_jobs)),
flag_output: None,
flag_no_headers: args.flag_no_headers,
flag_delimiter: args.flag_delimiter,
flag_no_memcheck: args.flag_no_memcheck,
arg_input: args.arg_input.clone(),
flag_select: crate::select::SelectColumns::parse(column_select_arg).unwrap(),
flag_limit: args.flag_enum_threshold,
flag_asc: false,
flag_no_nulls: true,
flag_jobs: Some(util::njobs(args.flag_jobs)),
flag_output: None,
flag_no_headers: args.flag_no_headers,
flag_delimiter: args.flag_delimiter,
flag_memcheck: args.flag_memcheck,
};

let (headers, ftables) = match freq_args.rconfig().indexed()? {
Expand Down
6 changes: 3 additions & 3 deletions src/cmd/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Common options:
appear as the header row in the output.
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. (default: ,)
--no-memcheck Do not check if there is enough memory to load the
--memcheck Check if there is enough memory to load the
entire CSV into memory.
"#;

Expand Down Expand Up @@ -71,7 +71,7 @@ struct Args {
flag_no_headers: bool,
flag_delimiter: Option<Delimiter>,
flag_unique: bool,
flag_no_memcheck: bool,
flag_memcheck: bool,
}

pub fn run(argv: &[&str]) -> CliResult<()> {
Expand All @@ -86,7 +86,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {

// we're loading the entire file into memory, we need to check avail mem
if let Some(path) = rconfig.path.clone() {
util::mem_file_check(&path, false, args.flag_no_memcheck)?;
util::mem_file_check(&path, false, args.flag_memcheck)?;
}

let mut rdr = rconfig.reader()?;
Expand Down
6 changes: 3 additions & 3 deletions src/cmd/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ Common options:
in statistics.
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. (default: ,)
--no-memcheck Do not check if there is enough memory to load the
--memcheck Check if there is enough memory to load the
entire CSV into memory.
"#;

Expand Down Expand Up @@ -201,7 +201,7 @@ pub struct Args {
pub flag_output: Option<String>,
pub flag_no_headers: bool,
pub flag_delimiter: Option<Delimiter>,
pub flag_no_memcheck: bool,
pub flag_memcheck: bool,
pub flag_stats_binout: Option<String>,
}

Expand Down Expand Up @@ -410,7 +410,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
|| args.flag_quartiles
|| args.flag_mad
{
util::mem_file_check(&path, false, args.flag_no_memcheck)?;
util::mem_file_check(&path, false, args.flag_memcheck)?;
}

// we need to count the number of records in the file to calculate sparsity
Expand Down
20 changes: 10 additions & 10 deletions src/cmd/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Common options:
-o, --output <file> Write output to <file> instead of stdout.
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. (default: ,)
--no-memcheck Do not check if there is enough memory to load the
--memcheck Check if there is enough memory to load the
entire CSV into memory.
"#;

Expand All @@ -45,14 +45,14 @@ use crate::{

#[derive(Deserialize)]
struct Args {
arg_input: Option<String>,
flag_width: usize,
flag_pad: usize,
flag_output: Option<String>,
flag_delimiter: Option<Delimiter>,
flag_align: Align,
flag_condense: Option<usize>,
flag_no_memcheck: bool,
arg_input: Option<String>,
flag_width: usize,
flag_pad: usize,
flag_output: Option<String>,
flag_delimiter: Option<Delimiter>,
flag_align: Align,
flag_condense: Option<usize>,
flag_memcheck: bool,
}

#[derive(Deserialize, Clone, Copy)]
Expand Down Expand Up @@ -81,7 +81,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {

// we're loading the entire file into memory, we need to check avail mem
if let Some(path) = rconfig.path.clone() {
util::mem_file_check(&path, false, args.flag_no_memcheck)?;
util::mem_file_check(&path, false, args.flag_memcheck)?;
}

let wconfig = Config::new(&args.flag_output).delimiter(Some(Delimiter(b'\t')));
Expand Down
16 changes: 8 additions & 8 deletions src/cmd/tojsonl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Common options:
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. (default: ,)
-o, --output <file> Write output to <file> instead of stdout.
--no-memcheck Do not check if there is enough memory to load the
--memcheck Check if there is enough memory to load the
entire CSV into memory.
"#;

Expand All @@ -44,11 +44,11 @@ use crate::{

#[derive(Deserialize, Clone)]
struct Args {
arg_input: Option<String>,
flag_jobs: Option<usize>,
flag_delimiter: Option<Delimiter>,
flag_output: Option<String>,
flag_no_memcheck: bool,
arg_input: Option<String>,
flag_jobs: Option<usize>,
flag_delimiter: Option<Delimiter>,
flag_output: Option<String>,
flag_memcheck: bool,
}

impl From<std::fmt::Error> for CliError {
Expand Down Expand Up @@ -98,7 +98,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {

// we're loading the entire file into memory, we need to check avail mem
if let Some(path) = conf.path.clone() {
util::mem_file_check(&path, false, args.flag_no_memcheck)?;
util::mem_file_check(&path, false, args.flag_memcheck)?;
}

// we're calling the schema command to infer data types and enums
Expand All @@ -118,7 +118,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
flag_no_headers: false,
flag_delimiter: args.flag_delimiter,
arg_input: args.arg_input.clone(),
flag_no_memcheck: args.flag_no_memcheck,
flag_memcheck: args.flag_memcheck,
};
// build schema for each field by their inferred type, min/max value/length, and unique values
let properties_map: Map<String, Value> =
Expand Down
14 changes: 7 additions & 7 deletions src/cmd/transpose.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Common options:
-o, --output <file> Write output to <file> instead of stdout.
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. (default: ,)
--no-memcheck Do not check if there is enough memory to load the
--memcheck Check if there is enough memory to load the
entire CSV into memory. Ignored with --multipass.
"#;

Expand All @@ -37,11 +37,11 @@ use crate::{

#[derive(Deserialize)]
struct Args {
arg_input: Option<String>,
flag_output: Option<String>,
flag_delimiter: Option<Delimiter>,
flag_multipass: bool,
flag_no_memcheck: bool,
arg_input: Option<String>,
flag_output: Option<String>,
flag_delimiter: Option<Delimiter>,
flag_multipass: bool,
flag_memcheck: bool,
}

pub fn run(argv: &[&str]) -> CliResult<()> {
Expand All @@ -64,7 +64,7 @@ impl Args {
fn in_memory_transpose(&self) -> CliResult<()> {
// we're loading the entire file into memory, we need to check avail mem
if let Some(path) = self.rconfig().path {
util::mem_file_check(&path, false, self.flag_no_memcheck)?;
util::mem_file_check(&path, false, self.flag_memcheck)?;
}

let mut rdr = self.rconfig().reader()?;
Expand Down

0 comments on commit 7bab9f4

Please sign in to comment.