Skip to content

Commit

Permalink
Merge pull request #448 from pachterlab/devel
Browse files Browse the repository at this point in the history
lr-kallisto
  • Loading branch information
Yenaled authored Jul 20, 2024
2 parents 010907c + 5ef335b commit fa01edd
Show file tree
Hide file tree
Showing 15 changed files with 1,234 additions and 227 deletions.
19 changes: 18 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,23 @@ project(kallisto)

include(GNUInstallDirs)

if(NOT MAX_KMER_SIZE)
set(MAX_KMER_SIZE "32")
endif()

set(DO_ENABLE_AVX2 "")
set(DO_ENABLE_COMPILATION_ARCH "")
if(ENABLE_AVX2 MATCHES "OFF")
add_compile_definitions("ENABLE_AVX2=OFF")
set(DO_ENABLE_AVX2 "-DENABLE_AVX2=OFF")
endif(ENABLE_AVX2 MATCHES "OFF")
if(COMPILATION_ARCH MATCHES "OFF")
add_compile_definitions("COMPILATION_ARCH=OFF")
set(DO_ENABLE_COMPILATION_ARCH "-DCOMPILATION_ARCH=OFF")
endif(COMPILATION_ARCH MATCHES "OFF")

add_compile_definitions("MAX_KMER_SIZE=${MAX_KMER_SIZE}")


option(USE_HDF5 "Compile with HDF5 support" OFF) #OFF by default
option(USE_BAM "Compile with HTSLIB support" OFF) # OFF by default
Expand Down Expand Up @@ -72,7 +89,7 @@ ExternalProject_Add(bifrost
PREFIX ${PROJECT_SOURCE_DIR}/ext/bifrost
SOURCE_DIR ${PROJECT_SOURCE_DIR}/ext/bifrost
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND mkdir -p build && cd build && cmake .. -DCMAKE_INSTALL_PREFIX=${PREFIX} -DCMAKE_CXX_FLAGS=${PROJECT_BIFROST_CMAKE_CXX_FLAGS}
CONFIGURE_COMMAND mkdir -p build && cd build && cmake .. -DMAX_KMER_SIZE=${MAX_KMER_SIZE} -DCMAKE_INSTALL_PREFIX=${PREFIX} -DCMAKE_CXX_FLAGS=${PROJECT_BIFROST_CMAKE_CXX_FLAGS} ${DO_ENABLE_AVX2} ${DO_ENABLE_COMPILATION_ARCH}
BUILD_COMMAND cd build && make
INSTALL_COMMAND ""
)
Expand Down
1 change: 0 additions & 1 deletion ext/bifrost/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ project(Bifrost)
find_package(Threads REQUIRED)

# To enable a larger default k-mer size, replace MAX_KMER_SIZE with a larger multiple of 32: actual maximum k-mer size will be MAX_KMER_SIZE-1.
SET(MAX_KMER_SIZE "32" CACHE STRING "MAX_KMER_SIZE")
SET(MAX_GMER_SIZE "${MAX_KMER_SIZE}" CACHE STRING "MAX_GMER_SIZE")
# Enable architecture optimizations
SET(COMPILATION_ARCH "native" CACHE STRING "COMPILATION_ARCH")
Expand Down
8 changes: 6 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ if (USE_BAM)
include_directories(../ext/htslib)
endif(USE_BAM)

if(NOT MAX_KMER_SIZE)
set(MAX_KMER_SIZE "32")
endif()

add_compile_options(-Wno-subobject-linkage) # Suppress bifrost warning

add_library(kallisto_core ${sources} ${headers})
Expand All @@ -17,9 +21,9 @@ add_executable(kallisto main.cpp)
find_package( Threads REQUIRED )
ExternalProject_Get_Property(bifrost install_dir)
if (USE_BAM)
target_link_libraries(kallisto kallisto_core pthread ${CMAKE_CURRENT_SOURCE_DIR}/../ext/htslib/libhts.a ${install_dir}/build/src/libbifrost.a)
target_link_libraries(kallisto kallisto_core pthread ${CMAKE_CURRENT_SOURCE_DIR}/../ext/htslib/libhts.a ${install_dir}/build/src/libbifrost.a -DMAX_KMER_SIZE=${MAX_KMER_SIZE})
else()
target_link_libraries(kallisto kallisto_core pthread ${install_dir}/build/src/libbifrost.a)
target_link_libraries(kallisto kallisto_core pthread ${install_dir}/build/src/libbifrost.a -DMAX_KMER_SIZE=${MAX_KMER_SIZE})
endif(USE_BAM)

if(LINK MATCHES static)
Expand Down
137 changes: 136 additions & 1 deletion src/EMAlgorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ struct EMAlgorithm {
if (alpha_.size() == priors.size()) {

alpha_.assign(priors.begin(), priors.end());
} else {
} else if (priors.size() != 0) {

std::cerr << "[ em] number of priors does not match number of transcripts." << std::endl;
std::cerr << " defaulting to uniform priors." << std::endl;
Expand All @@ -108,6 +108,7 @@ struct EMAlgorithm {
}

int i;
if (!opt.long_read || (opt.long_read && opt.platform == "ONT")){
for (i = 0; i < n_iter; ++i) {
if (recomputeEffLen && (i == min_rounds || i == min_rounds + 500)) {
eff_lens_ = update_eff_lens(all_fl_means, tc_, index_, alpha_, eff_lens_, post_bias_, opt);
Expand Down Expand Up @@ -220,6 +221,140 @@ struct EMAlgorithm {
}

}
} else {
for (i = 0; i < n_iter; ++i) {
if (recomputeEffLen && (i == min_rounds || i == min_rounds + 500) && !opt.long_read) {
eff_lens_ = update_eff_lens(all_fl_means, tc_, index_, alpha_, eff_lens_, post_bias_, opt);
weight_map_ = calc_weights (tc_.counts, index_.ecmapinv, eff_lens_);
}

if (recomputeEffLen && (i == min_rounds || i % min_rounds == 0) && opt.long_read) {
weight_map_ = calc_weights (tc_.counts, index_.ecmapinv, eff_lens_);
}


/*** should this go after main loop?
for (const auto& it : ecmapinv_) {
if (it.first.cardinality() == 1) {
next_alpha[it.first.maximum()] = counts_[it.second];
//std::cerr << "cardinality 1 : " << it.first.maximum() << " it.second: " << it.second << " counts_[it.second]: " << counts_[it.second] << std::endl; std::cerr.flush();
}
}
***/

for (const auto& it : ecmapinv_) {
if (it.first.cardinality() == 1) { // Individual transcript
//std::cerr << "Should always enter here with toy error free simulation" << std::endl; std::cerr.flush();
continue;
}

denom = 0.0;

if (counts_[it.second] == 0) {
continue;
}

// first, compute the denominator: a normalizer
// iterate over targets in EC map
auto& wv = weight_map_[it.second];

// everything in ecmap should be in weight_map
//assert( w_search != weight_map_.end() );
//assert( w_search->second.size() == ec_kv.second.size() );

// wv is weights vector
// trs is a vector of transcript ids

auto& r = it.first; //ecmap_.find(ec)->second;
auto numEC = r.cardinality();
uint32_t* trs = new uint32_t[numEC];
r.toUint32Array(trs);

for (auto t_it = 0; t_it < numEC; ++t_it) {
denom += alpha_[trs[t_it]] * wv[t_it];
}

if (denom < TOLERANCE) {
continue;
}

// compute the update step
auto countNorm = counts_[it.second] / denom;

//std::cerr << "countNorm: " << countNorm << std::endl; std::cerr.flush();
//std::cerr <<"numEC is " << numEC << std::endl; std::cerr.flush();
for (auto t_it = 0; t_it < numEC; ++t_it) {
//std::cerr <<"t_it is: " << t_it << ", alpha is: " << alpha_[trs[t_it]] << " wv[t_it] is: " << wv[t_it] << " trs[t_it] is:" << trs[t_it] << std::endl; std::cerr.flush();
next_alpha[trs[t_it]] += (wv[t_it] * alpha_[trs[t_it]]) * countNorm;
}

delete[] trs;
trs = nullptr;

}

// TODO: check for relative difference for convergence in EM

bool stopEM = false; //!finalRound && (i >= min_rounds); // false initially
//double maxChange = 0.0;
int chcount = 0;
for (int ec = 0; ec < num_trans_; ec++) {
if (next_alpha[ec] > alpha_change_limit && (std::fabs(next_alpha[ec] - alpha_[ec]) / next_alpha[ec]) > alpha_change) {
chcount++;
}

//if (stopEM && next_alpha[ec] >= alpha_limit) {

/* double reldiff = abs(next_alpha[ec]-alpha_[ec]) / next_alpha[ec];
if (reldiff >= alpha_change) {
stopEM = false;
}*/
//}

/*
if (next_alpha[ec] > alpha_limit) {
maxChange = std::max(maxChange,std::fabs(next_alpha[ec]-alpha_[ec]) / next_alpha[ec]);
}
*/
// reassign alpha_ to next_alpha
alpha_[ec] = next_alpha[ec];

// clear all next_alpha values 0 for next iteration
next_alpha[ec] = 0.0;
}

//std::cout << chcount << std::endl;
if (chcount == 0 && i > min_rounds) {

stopEM=true;
}

if (finalRound) {
break;
}

// std::cout << maxChange << std::endl;
if (stopEM) {
finalRound = true;
alpha_before_zeroes_.resize( alpha_.size() );
for (int ec = 0; ec < num_trans_; ec++) {
alpha_before_zeroes_[ec] = alpha_[ec];
if (alpha_[ec] < alpha_limit/10.0) {
alpha_[ec] = 0.0;
}
}
}

}

//TRYING PLACING HERE INSTEAD
for (const auto& it : ecmapinv_) {
if (it.first.cardinality() == 1) {
alpha_[it.first.maximum()] += counts_[it.second];
//std::cerr << "cardinality 1 : " << it.first.maximum() << " it.second: " << it.second << " counts_[it.second]: " << counts_[it.second] << std::endl; std::cerr.flush();
}
}
}

// ran for the maximum number of iterations
if (n_iter == i) {
Expand Down
1 change: 1 addition & 0 deletions src/H5Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ void H5Converter::write_aux() {
std::string(std::to_string(-1)),
kal_version_,
std::string(std::to_string(idx_version_)),
std::string("dummy k-mer length"),
start_time_,
call_
);
Expand Down
Loading

0 comments on commit fa01edd

Please sign in to comment.