Skip to content

Commit

Permalink
Update signal plot default window size
Browse files Browse the repository at this point in the history
  • Loading branch information
jonperdomo committed Sep 26, 2024
1 parent 26a4609 commit 13ceda6
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 28 deletions.
6 changes: 3 additions & 3 deletions src/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ def fast5_module(margs):
sys.exit(0)

else:
logging.info('Input file(s) are:\n%s', '\n'.join(param_dict["input_files"]))
# logging.info('Input file(s) are:\n%s', '\n'.join(param_dict["input_files"]))
param_dict["out_prefix"] += "FAST5"
input_para = lrst.Input_Para()
input_para.threads = param_dict["threads"]
Expand Down Expand Up @@ -400,7 +400,7 @@ def fast5_signal_module(margs):
sys.exit(0)

else:
logging.info('Input file(s) are:\n%s', '\n'.join(param_dict["input_files"]))
# logging.info('Input file(s) are:\n%s', '\n'.join(param_dict["input_files"]))
param_dict["out_prefix"] += "fast5_signal"
input_para = lrst.Input_Para()
input_para.threads = param_dict["threads"]
Expand Down Expand Up @@ -466,7 +466,7 @@ def pod5_module(margs):
sys.exit(0)

else:
logging.info('Input file(s) are:\n%s', '\n'.join(param_dict["input_files"]))
# logging.info('Input file(s) are:\n%s', '\n'.join(param_dict["input_files"]))
param_dict["out_prefix"] += "POD5"
input_para = {}
input_para['threads'] = param_dict["threads"]
Expand Down
40 changes: 19 additions & 21 deletions src/fast5_module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ std::string getChromosome(H5::H5File f5, std::string read_name)
H5::Exception::dontPrint(); // Disable HDF5 error printing
try {
// Get the alignment start position
std::cout << "Opening mapping group..." << std::endl;
// std::cout << "Opening mapping group..." << std::endl;

// Get the alignment start position
H5::Group align_group_obj;
Expand All @@ -120,12 +120,12 @@ std::string getChromosome(H5::H5File f5, std::string read_name)
H5::DataType chrom_datatype= chrom_obj.getDataType();
std::string chrom_buffer;
chrom_obj.read(chrom_datatype, chrom_buffer);
std::cout << "Chromosome = " << chrom_buffer << std::endl;
// std::cout << "Chromosome = " << chrom_buffer << std::endl;
mapped_chrom = chrom_buffer;
} catch (FileIException &error) {
std::cout << error_msg << std::endl;
// std::cout << error_msg << std::endl;
} catch (AttributeIException &error) {
std::cout << error_msg << std::endl;
// std::cout << error_msg << std::endl;
}

return mapped_chrom;
Expand Down Expand Up @@ -160,7 +160,7 @@ Base_Signals getReadBaseSignalData(H5::H5File f5, std::string read_name, bool si
signal_dataset_str = signal_group_str + "/Signal";
}

std::cout << "Opening signal group " << signal_group_str << std::endl;
// std::cout << "Opening signal group " << signal_group_str << std::endl;
H5::Group signal_group = f5.openGroup(signal_group_str);

// Get the signal dataset
Expand All @@ -175,17 +175,17 @@ Base_Signals getReadBaseSignalData(H5::H5File f5, std::string read_name, bool si
//std::cout << "Getting data dimensions..." << std::endl;
dataspace.getSimpleExtentDims(dims, NULL); // rank = 1
int data_count = (int) dims[0];
std::cout << "Data count = " << data_count << std::endl;
// std::cout << "Data count = " << data_count << std::endl;

// Read the signals if available
if (data_count == 0) {
std::cout << "No signal data found." << std::endl;
// std::cout << "No signal data found." << std::endl;
Base_Signals basecall_obj(read_info, sequence_data_str, basecall_signals);
return basecall_obj;
}

// Store the signals in an array
std::cout << "Reading data..." << std::endl;
// std::cout << "Reading data..." << std::endl;
int16_t f5signals_16 [data_count];
signal_ds.read(f5signals_16, mdatatype);

Expand All @@ -194,7 +194,7 @@ Base_Signals getReadBaseSignalData(H5::H5File f5, std::string read_name, bool si
for (int i = 0; i < data_count; i++) { f5signals[i] = (int) f5signals_16[i]; };

// Get the sequence string if available
std::cout << "Getting FASTQ..." << std::endl;
// std::cout << "Getting FASTQ..." << std::endl;
std::string analysis_group;
std::string basecall_group;
if (single_read) {
Expand All @@ -217,7 +217,7 @@ Base_Signals getReadBaseSignalData(H5::H5File f5, std::string read_name, bool si
sequence_data_str = fq[1];

// Get the block stride (window length) attribute
std::cout << "Opening basecall group..." << std::endl;
// std::cout << "Opening basecall group..." << std::endl;
H5::Group group_obj;
H5::Attribute block_stride_obj;
if (single_read) {
Expand Down Expand Up @@ -263,11 +263,11 @@ Base_Signals getReadBaseSignalData(H5::H5File f5, std::string read_name, bool si
H5::DataSet move_dataset_obj;
if (single_read) {
std::string move_group = "/Analyses/Basecall_1D_000/BaseCalled_template/Move";
std::cout << "Opening move dataset: " << move_group << std::endl;
// std::cout << "Opening move dataset: " << move_group << std::endl;
move_dataset_obj = f5.openDataSet(move_group);
} else {
std::string move_group = "/" + read_name + "/Analyses/Basecall_1D_000/BaseCalled_template/Move";
std::cout << "Opening move dataset: " << move_group << std::endl;
// std::cout << "Opening move dataset: " << move_group << std::endl;
move_dataset_obj = f5.openDataSet(move_group);
}

Expand Down Expand Up @@ -377,7 +377,7 @@ static int writeBaseQCDetails(const char *input_file, Output_FAST5 &output_data,
std::string analysis_group = "/Analyses";
std::string basecall_group = analysis_group + "/Basecall_1D_000";
if (f5.nameExists("/Raw")) {
std::cout << "Single read FAST5" << std::endl;
// std::cout << "Single read FAST5" << std::endl;

// Check if the top-level analysis group exists
if (f5.nameExists(analysis_group)) {
Expand Down Expand Up @@ -405,18 +405,18 @@ static int writeBaseQCDetails(const char *input_file, Output_FAST5 &output_data,
}

} else {
std::cout << "Multi-read FAST5" << std::endl;
// std::cout << "Multi-read FAST5" << std::endl;

// Loop through each read
std::cout << "Reading all reads" << std::endl;
// std::cout << "Reading all reads" << std::endl;
H5::Group root_group = f5.openGroup("/");
size_t num_objs = root_group.getNumObjs();
for (size_t i=0; i < num_objs; i++) {
read_name = root_group.getObjnameByIdx(i);

// First remove the prefix
std::string read_id = read_name.substr(5);
// std::cout << "Processing read ID: " << read_id << std::endl;
std::cout << "Processing sequence for read ID: " << read_id << std::endl;
//std::cout << "Read: " << read_name << std::endl;

// Set up the analysis and basecall group
Expand Down Expand Up @@ -486,19 +486,17 @@ static int writeSignalQCDetails(const char *input_file, Output_FAST5 &output_dat
std::string signal_group_str;
std::string read_name;
if (f5.nameExists("/Raw")) {
std::cout << "Single read FAST5" << std::endl;

// Append the basecall signals to the output structure
signal_group_str = "/Raw/Reads";
read_name = getFileReadName(f5);
std::cout << read_name << std::endl;
std::cout << "Processing read ID: " << read_name << std::endl;
Base_Signals basecall_obj = getReadBaseSignalData(f5, read_name, true);
output_data.addReadBaseSignals(basecall_obj);
} else {
std::cout << "Multi-read FAST5" << std::endl;
std::cout << "Processing multi-read FAST5..." << std::endl;

// Loop through each read
std::cout << "Reading all reads" << std::endl;
H5::Group root_group = f5.openGroup("/");
size_t num_objs = root_group.getNumObjs();
for (size_t i=0; i < num_objs; i++) {
Expand All @@ -512,7 +510,7 @@ static int writeSignalQCDetails(const char *input_file, Output_FAST5 &output_dat
//std::cout << "Skipping read ID: " << read_id << std::endl;
continue;
} else {
// std::cout << "Processing read ID: " << read_id << std::endl;
std::cout << "Processing read ID: " << read_id << std::endl;
}
}
// std::cout << "Read: " << read_name << std::endl;
Expand Down
10 changes: 6 additions & 4 deletions src/plot_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ def plot_pod5(pod5_output, para_dict, bam_output=None):

# Set up the output CSV
csv_qc_filepath = os.path.join(out_path, nth_read_name + '_QC.csv')
qc_file = open(csv_qc_filepath, 'w')
qc_file = open(csv_qc_filepath, 'w', encoding='utf-8')
qc_writer = csv.writer(qc_file)
qc_writer.writerow(["Raw_Signal", "Length", "Mean", "Median", "StdDev", "PearsonSkewnessCoeff", "Kurtosis"])

Expand Down Expand Up @@ -613,7 +613,8 @@ def plot_pod5(pod5_output, para_dict, bam_output=None):
title=nth_read_name,
yaxis_title="Signal",
showlegend=False,
font=dict(size=PLOT_FONT_SIZE)
font=dict(size=PLOT_FONT_SIZE),
xaxis=dict(range=[0, 100])
)
fig.update_traces(marker={'size': marker_size})
# fig.update_xaxes(title="Index")
Expand Down Expand Up @@ -673,7 +674,7 @@ def plot_signal(output_data, para_dict):

# Set up the output CSVs
csv_qc_filepath = os.path.join(output_dir, nth_read_name + '_QC.csv')
qc_file = open(csv_qc_filepath, 'w')
qc_file = open(csv_qc_filepath, 'w', encoding='utf-8')
qc_writer = csv.writer(qc_file)
qc_writer.writerow(["Base", "Raw_Signal", "Length", "Mean", "Median", "StdDev", "PearsonSkewnessCoeff", "Kurtosis"])

Expand Down Expand Up @@ -724,7 +725,8 @@ def plot_signal(output_data, para_dict):
title=nth_read_name,
yaxis_title="Signal",
showlegend=False,
font=dict(size=PLOT_FONT_SIZE)
font=dict(size=PLOT_FONT_SIZE),
xaxis=dict(range=[0, 100])
)
fig.update_traces(marker={'size': marker_size})

Expand Down

0 comments on commit 13ceda6

Please sign in to comment.