Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 25 additions & 54 deletions src/Native/LdaNative/lda_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ namespace lda {
mh_step_(mhstep),
alpha_sum_(alphaSum),
maxDocToken_(maxDocToken),
samplers_(nullptr),
document_buffer_(nullptr)
{
if (numThread > 0)
Expand All @@ -68,18 +67,16 @@ namespace lda {
printf("using %d thread(s) to do train/test\n", num_threads_);

bAlphaSumMultiplied = false;
atomic_stats_ = new LDAEngineAtomics();
model_block_ = new LDAModelBlock();
data_block_ = new LDADataBlock(num_threads_);
process_barrier_ = new SimpleBarrier(num_threads_);
samplerQueue_ = new CBlockedIntQueue();
atomic_stats_ = std::make_unique<LDAEngineAtomics>();
model_block_ = std::make_unique<LDAModelBlock>();
data_block_ = std::make_unique<LDADataBlock>(num_threads_);
process_barrier_ = std::make_unique<SimpleBarrier>(num_threads_);
samplerQueue_ = std::make_unique<CBlockedIntQueue>();

document_buffer_ = new int32_t*[num_threads_];
for (int i = 0; i < num_threads_; i++)
document_buffer_[i] = new int32_t[maxDocToken_ * 2 + 1];

likelihood_in_iter_ = nullptr;

beta_sum_ = beta_ * V_;
}

Expand All @@ -92,7 +89,6 @@ namespace lda {
mh_step_(mh_step),
alpha_sum_(alpha_sum),
maxDocToken_(maxDocToken),
samplers_(nullptr),
document_buffer_(nullptr)
{
if (num_threads > 0)
Expand All @@ -105,44 +101,30 @@ namespace lda {
num_threads_ = std::max(1, (int)(uNumCPU - 2));
}
bAlphaSumMultiplied = false;
process_barrier_ = new SimpleBarrier(num_threads_);
atomic_stats_ = new LDAEngineAtomics();
data_block_ = new LDADataBlock(num_threads_);
model_block_ = new LDAModelBlock();
samplerQueue_ = new CBlockedIntQueue();
process_barrier_ = std::make_unique<SimpleBarrier>(num_threads_);
atomic_stats_ = std::make_unique<LDAEngineAtomics>();
data_block_ = std::make_unique<LDADataBlock>(num_threads_);
model_block_ = std::make_unique<LDAModelBlock>();
samplerQueue_ = std::make_unique<CBlockedIntQueue>();

document_buffer_ = new int32_t*[num_threads_];
for (int i = 0; i < num_threads_; i++)
document_buffer_[i] = new int32_t[maxDocToken_ * 2 + 1];

likelihood_in_iter_ = nullptr;
beta_sum_ = beta_ * V_;
}


LdaEngine::~LdaEngine()
{
//delete memory space
delete process_barrier_;
process_barrier_ = nullptr;

delete data_block_;
data_block_ = nullptr;

delete atomic_stats_;
atomic_stats_ = nullptr;

delete model_block_;
model_block_ = nullptr;
process_barrier_.reset(nullptr);
data_block_.reset(nullptr);
atomic_stats_.reset(nullptr);
model_block_.reset(nullptr);
samplerQueue_.reset(nullptr);

delete samplerQueue_;
samplerQueue_ = nullptr;

for (int i = 0; i < num_threads_; ++i)
{
delete samplers_[i];
}
delete[] samplers_;
Comment on lines -141 to -145
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📝 This block was responsible for some of the crashes observed in CI, and especially the crashes prior to 0cbfa54. The destructor is missing a null check for samplers_.

samplers_.reset(nullptr);

if (document_buffer_)
{
Expand All @@ -155,19 +137,15 @@ namespace lda {
document_buffer_ = nullptr;
}

if (likelihood_in_iter_)
{
delete[] likelihood_in_iter_;
likelihood_in_iter_ = nullptr;
}
likelihood_in_iter_.reset(nullptr);
}

bool LdaEngine::InitializeBeforeTrain()
{
CTimer tmDebug(true);
CheckFunction(0, tmDebug, "enter initializeBeforeTrain", false);
//allocate model memory from the data preloaded
AllocateModelMemory(data_block_);
AllocateModelMemory(*data_block_);
CheckFunction(0, tmDebug, "allocate model memory", false);

double alloc_start = lda::get_time();
Expand All @@ -193,12 +171,12 @@ namespace lda {
word_range_for_each_thread_[num_threads_] = V_;

//setup sampler
samplers_ = new LightDocSampler*[num_threads_];
samplers_.reset(new std::unique_ptr<LightDocSampler>[num_threads_]);
samplerQueue_->clear();

for (int i = 0; i < num_threads_; ++i)
{
samplers_[i] = new LightDocSampler(
samplers_[i] = std::make_unique<LightDocSampler>(
K_,
V_,
num_threads_,
Expand Down Expand Up @@ -250,14 +228,7 @@ namespace lda {
word_range_for_each_thread_[num_threads_] = V_;

//setup sampler
if (samplers_)
{
for (int i = 0; i < num_threads_; ++i)
{
delete samplers_[i];
}
delete[] samplers_;
}
samplers_.reset(nullptr);
if (document_buffer_)
{
for (int i = 0; i < num_threads_; ++i)
Expand All @@ -269,13 +240,13 @@ namespace lda {
document_buffer_ = nullptr;
}

samplers_ = new LightDocSampler*[num_threads_];
samplers_.reset(new std::unique_ptr<LightDocSampler>[num_threads_]);
document_buffer_ = new int32_t*[num_threads_];
samplerQueue_->clear();

for (int i = 0; i < num_threads_; ++i)
{
samplers_[i] = new LightDocSampler(
samplers_[i] = std::make_unique<LightDocSampler>(
K_,
V_,
num_threads_,
Expand Down Expand Up @@ -342,7 +313,7 @@ namespace lda {
atomic_stats_->thread_counter_ = 0;
burnin_iterations_ = burnin_iter;

likelihood_in_iter_ = new float[burnin_iterations_];
likelihood_in_iter_.reset(new float[burnin_iterations_]);
for (int i = 0; i < burnin_iterations_; i++)
{
likelihood_in_iter_[i] = 0.0;
Expand Down Expand Up @@ -676,7 +647,7 @@ namespace lda {
data_block_->Allocate(num_document, corpus_size);
}

void LdaEngine::AllocateModelMemory(const LDADataBlock* data_block)
void LdaEngine::AllocateModelMemory(const LDADataBlock& data_block)
{
model_block_->InitFromDataBlock(data_block, V_, K_);

Expand Down
16 changes: 8 additions & 8 deletions src/Native/LdaNative/lda_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ namespace lda {
void InitializeBeforeTest();
bool InitializeBeforeTrain();
void AllocateDataMemory(int num_document, int64_t corpus_size);
void AllocateModelMemory(const LDADataBlock* data_block); //in this case, model memory is allocated according to the datablock;
void AllocateModelMemory(const LDADataBlock& data_block); //in this case, model memory is allocated according to the datablock;
void AllocateModelMemory(int num_vocabs, int num_topics, int64_t nonzero_num);
void AllocateModelMemory(int num_vocabs, int num_topics, int64_t mem_block_size, int64_t alias_mem_block_size);
void SetAlphaSum(float avgDocLength); //alphasum parameter is set by avgdoclength * alpha
Expand Down Expand Up @@ -110,11 +110,11 @@ namespace lda {
bool bAlphaSumMultiplied; //used to check whether alpha_sum_ is real alpha sum but not alpha
std::vector<int32_t> word_range_for_each_thread_;

LDAEngineAtomics* atomic_stats_;
SimpleBarrier* process_barrier_; // Local barrier across threads.
std::unique_ptr<LDAEngineAtomics> atomic_stats_;
std::unique_ptr<SimpleBarrier> process_barrier_; // Local barrier across threads.

LDADataBlock* data_block_;
LDAModelBlock* model_block_;
std::unique_ptr<LDADataBlock> data_block_;
std::unique_ptr<LDAModelBlock> model_block_;

std::vector<lda::hybrid_map> global_word_topic_table_;
std::vector<lda::hybrid_alias_map> global_alias_k_v_;
Expand All @@ -126,13 +126,13 @@ namespace lda {
float beta_mass_;
std::vector<wood::alias_k_v> beta_k_v_;

LightDocSampler **samplers_;
float* likelihood_in_iter_;
std::unique_ptr<std::unique_ptr<LightDocSampler>[]> samplers_;
std::unique_ptr<float[]> likelihood_in_iter_;

// For TestDocSafe purpose
int32_t **document_buffer_;

wood::xorshift_rng rng_;
CBlockedIntQueue *samplerQueue_;
std::unique_ptr<CBlockedIntQueue> samplerQueue_;
};
} // namespace lda
6 changes: 3 additions & 3 deletions src/Native/LdaNative/model_block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -353,12 +353,12 @@ namespace lda
cout << "alias_mem_block_size = " << sizeof(alias_mem_block_size_) << endl;
}

void LDAModelBlock::InitFromDataBlock(const LDADataBlock *data_block, int32_t num_vocabs, int32_t num_topics)
void LDAModelBlock::InitFromDataBlock(const LDADataBlock& data_block, int32_t num_vocabs, int32_t num_topics)
{
num_vocabs_ = num_vocabs;
num_topics_ = num_topics;

int32_t doc_num = data_block->num_documents();
int32_t doc_num = data_block.num_documents();
dict_ = new WordEntry[num_vocabs_];
for (int i = 0; i < num_vocabs_; ++i)
{
Expand All @@ -367,7 +367,7 @@ namespace lda

for (int i = 0; i < doc_num; ++i)
{
shared_ptr<LDADocument> doc = data_block->GetOneDoc(i);
shared_ptr<LDADocument> doc = data_block.GetOneDoc(i);
int32_t doc_size = doc->size();
for (int j = 0; j < doc_size; ++j)
{
Expand Down
2 changes: 1 addition & 1 deletion src/Native/LdaNative/model_block.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ namespace lda
void Init(int32_t num_vocabs, int32_t num_topics, int64_t nonzero_num);
void Init(int32_t num_vocabs, int32_t num_topics, int64_t mem_block_size, int64_t alias_mem_block_size);

void InitFromDataBlock(const LDADataBlock *data_block, int32_t num_vocabs, int32_t num_topics);
void InitFromDataBlock(const LDADataBlock &data_block, int32_t num_vocabs, int32_t num_topics);

void GetModelStat(int64_t &mem_block_size, int64_t &alias_mem_block_size);

Expand Down