Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,8 @@ wheels/
*.exe
*.out
*.app

# Dev environment
.vscode
.vs
CMakeSettings.json
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ target_compile_definitions(
flashlight-text
PUBLIC
FL_TEXT_USE_KENLM=$<BOOL:${FL_TEXT_USE_KENLM}>
FL_TEXT_DLL
)


Expand Down
23 changes: 23 additions & 0 deletions flashlight/lib/text/Defines.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#if defined(_WIN32) || defined(_MSC_VER)

#ifdef FL_TEXT_DLL
#define FL_TEXT_API __declspec(dllexport)
#else // FL_TEXT_DLL
#define FL_TEXT_API __declspec(dllimport)
#endif // FL_TEXT_DLL

#else // defined(_WIN32) || defined(_MSC_VER)

#define FL_TEXT_API __attribute__((visibility("default")))
#define FL_DEPRECATED(msg) __attribute__((deprecated(msg)))

#endif // defined(_WIN32) || defined(_MSC_VER)
29 changes: 17 additions & 12 deletions flashlight/lib/text/String.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include <unordered_map>
#include <vector>

#include "flashlight/lib/text/Defines.h"

namespace fl {
namespace lib {

Expand All @@ -32,37 +34,39 @@ using EnableIfSame = typename std::enable_if<std::is_same<S, T>::value>::type;
// ================================== Functions
// ==================================

std::string trim(const std::string& str);
FL_TEXT_API std::string trim(const std::string& str);

void replaceAll(
std::string& str,
const std::string& from,
const std::string& repl);
FL_TEXT_API void
replaceAll(std::string& str, const std::string& from, const std::string& repl);

bool startsWith(const std::string& input, const std::string& pattern);
bool endsWith(const std::string& input, const std::string& pattern);
FL_TEXT_API bool startsWith(
const std::string& input,
const std::string& pattern);
FL_TEXT_API bool endsWith(const std::string& input, const std::string& pattern);

std::vector<std::string>
FL_TEXT_API std::vector<std::string>
split(char delim, const std::string& input, bool ignoreEmpty = false);

std::vector<std::string> split(
FL_TEXT_API std::vector<std::string> split(
const std::string& delim,
const std::string& input,
bool ignoreEmpty = false);

std::vector<std::string> splitOnAnyOf(
FL_TEXT_API std::vector<std::string> splitOnAnyOf(
const std::string& delim,
const std::string& input,
bool ignoreEmpty = false);

std::vector<std::string> splitOnWhitespace(
FL_TEXT_API std::vector<std::string> splitOnWhitespace(
const std::string& input,
bool ignoreEmpty = false);

/**
* Join a vector of `std::string` inserting `delim` in between.
*/
std::string join(const std::string& delim, const std::vector<std::string>& vec);
FL_TEXT_API std::string join(
const std::string& delim,
const std::vector<std::string>& vec);

/**
* Join a range of `std::string` specified by iterators.
Expand Down Expand Up @@ -126,5 +130,6 @@ void dedup(std::vector<T>& in) {
auto it = std::unique(in.begin(), in.end());
in.resize(std::distance(in.begin(), it));
}

} // namespace lib
} // namespace fl
1 change: 1 addition & 0 deletions flashlight/lib/text/decoder/Decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class Decoder {
/* Get all the final hypothesis */
virtual std::vector<DecodeResult> getAllFinalHypothesis() const = 0;
};

} // namespace text
} // namespace lib
} // namespace fl
4 changes: 3 additions & 1 deletion flashlight/lib/text/decoder/LexiconDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <unordered_map>

#include "flashlight/lib/text/Defines.h"
#include "flashlight/lib/text/decoder/Decoder.h"
#include "flashlight/lib/text/decoder/Trie.h"
#include "flashlight/lib/text/decoder/lm/LM.h"
Expand Down Expand Up @@ -111,7 +112,7 @@ struct LexiconDecoderState {
* search space and all candidate words are generated from it if unkScore is
* -inf, otherwise <UNK> will be generated for OOVs.
*/
class LexiconDecoder : public Decoder {
class FL_TEXT_API LexiconDecoder : public Decoder {
public:
LexiconDecoder(
LexiconDecoderOptions opt,
Expand Down Expand Up @@ -182,6 +183,7 @@ class LexiconDecoder : public Decoder {
int nDecodedFrames_; // Total number of decoded frames.
int nPrunedFrames_; // Total number of pruned frames from hyp_.
};

} // namespace text
} // namespace lib
} // namespace fl
4 changes: 3 additions & 1 deletion flashlight/lib/text/decoder/LexiconFreeDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <unordered_map>

#include "flashlight/lib/text/Defines.h"
#include "flashlight/lib/text/decoder/Decoder.h"
#include "flashlight/lib/text/decoder/lm/LM.h"

Expand Down Expand Up @@ -96,7 +97,7 @@ struct LexiconFreeDecoderState {
* score of the transcription W. We are allowed to generate words from all the
* possible combination of tokens.
*/
class LexiconFreeDecoder : public Decoder {
class FL_TEXT_API LexiconFreeDecoder : public Decoder {
public:
LexiconFreeDecoder(
LexiconFreeDecoderOptions opt,
Expand Down Expand Up @@ -165,6 +166,7 @@ class LexiconFreeDecoder : public Decoder {
int nDecodedFrames_; // Total number of decoded frames.
int nPrunedFrames_; // Total number of pruned frames from hyp_.
};

} // namespace text
} // namespace lib
} // namespace fl
4 changes: 3 additions & 1 deletion flashlight/lib/text/decoder/LexiconFreeSeq2SeqDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <optional>
#include <unordered_map>

#include "flashlight/lib/text/Defines.h"
#include "flashlight/lib/text/decoder/Decoder.h"
#include "flashlight/lib/text/decoder/Utils.h"
#include "flashlight/lib/text/decoder/lm/LM.h"
Expand Down Expand Up @@ -93,7 +94,7 @@ struct LexiconFreeSeq2SeqDecoderState {
* TODO: Doesn't support online decoding now.
*
*/
class LexiconFreeSeq2SeqDecoder : public Decoder {
class FL_TEXT_API LexiconFreeSeq2SeqDecoder : public Decoder {
public:
LexiconFreeSeq2SeqDecoder(
LexiconFreeSeq2SeqDecoderOptions opt,
Expand Down Expand Up @@ -136,6 +137,7 @@ class LexiconFreeSeq2SeqDecoder : public Decoder {

std::unordered_map<int, std::vector<LexiconFreeSeq2SeqDecoderState>> hyp_;
};

} // namespace text
} // namespace lib
} // namespace fl
4 changes: 3 additions & 1 deletion flashlight/lib/text/decoder/LexiconSeq2SeqDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <optional>
#include <unordered_map>

#include "flashlight/lib/text/Defines.h"
#include "flashlight/lib/text/decoder/Decoder.h"
#include "flashlight/lib/text/decoder/Trie.h"
#include "flashlight/lib/text/decoder/Utils.h"
Expand Down Expand Up @@ -111,7 +112,7 @@ struct LexiconSeq2SeqDecoderState {
* TODO: Doesn't support online decoding now.
*
*/
class LexiconSeq2SeqDecoder : public Decoder {
class FL_TEXT_API LexiconSeq2SeqDecoder : public Decoder {
public:
LexiconSeq2SeqDecoder(
LexiconSeq2SeqDecoderOptions opt,
Expand Down Expand Up @@ -160,6 +161,7 @@ class LexiconSeq2SeqDecoder : public Decoder {

std::unordered_map<int, std::vector<LexiconSeq2SeqDecoderState>> hyp_;
};

} // namespace text
} // namespace lib
} // namespace fl
5 changes: 4 additions & 1 deletion flashlight/lib/text/decoder/Trie.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include <unordered_map>
#include <vector>

#include "flashlight/lib/text/Defines.h"

namespace fl {
namespace lib {
namespace text {
Expand Down Expand Up @@ -59,7 +61,7 @@ using TrieNodePtr = std::shared_ptr<TrieNode>;
* the search space in deocder and quickly look up scores for a given token
* (completed word) or make prediction for incompleted ones based on smearing.
*/
class Trie {
class FL_TEXT_API Trie {
public:
Trie(int maxChildren, int rootIdx)
: root_(std::make_shared<TrieNode>(rootIdx)), maxChildren_(maxChildren) {}
Expand Down Expand Up @@ -90,6 +92,7 @@ class Trie {
};

using TriePtr = std::shared_ptr<Trie>;

} // namespace text
} // namespace lib
} // namespace fl
5 changes: 1 addition & 4 deletions flashlight/lib/text/decoder/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@

namespace fl {
namespace lib {
namespace text {

// Place holder
} // namespace text
namespace text {} // namespace text
} // namespace lib
} // namespace fl
1 change: 1 addition & 0 deletions flashlight/lib/text/decoder/lm/ConvLM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ void ConvLM::updateCache(std::vector<LMStatePtr> states) {
}
}
}

} // namespace text
} // namespace lib
} // namespace fl
4 changes: 3 additions & 1 deletion flashlight/lib/text/decoder/lm/ConvLM.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <functional>

#include "flashlight/lib/text/Defines.h"
#include "flashlight/lib/text/decoder/lm/LM.h"
#include "flashlight/lib/text/dictionary/Defines.h"
#include "flashlight/lib/text/dictionary/Dictionary.h"
Expand All @@ -29,7 +30,7 @@ struct ConvLMState : LMState {
: tokens(std::vector<int>(size)), length(size) {}
};

class ConvLM : public LM {
class FL_TEXT_API ConvLM : public LM {
public:
ConvLM(
const GetConvLmScoreFunc& getConvLmScoreFunc,
Expand Down Expand Up @@ -68,6 +69,7 @@ class ConvLM : public LM {
const LMStatePtr& state,
const int tokenIdx);
};

} // namespace text
} // namespace lib
} // namespace fl
7 changes: 5 additions & 2 deletions flashlight/lib/text/decoder/lm/KenLM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@

#ifdef USE_KENLM_FROM_LANGTECH
#include "language_technology/jedi/lm/model.hh"
#else
#else // USE_KENLM_FROM_LANGTECH
#if __has_include(<kenlm/lm/model.hh>)
#include <kenlm/lm/model.hh>
#elif __has_include(<lm/model.hh>)
#include <lm/model.hh>
#else
#else // __has_include
#error "KenLM header not found (kenlm/lm/model.hh)."
#endif // __has_include
#endif // USE_KENLM_FROM_LANGTECH
Expand All @@ -27,6 +27,8 @@ namespace text {

KenLMState::KenLMState() : ken_(std::make_unique<lm::ngram::State>()) {}

KenLMState::~KenLMState() {}

KenLM::KenLM(const std::string& path, const Dictionary& usrTknDict) {
// Load LM
model_.reset(lm::ngram::LoadVirtual(path.c_str()));
Expand Down Expand Up @@ -79,6 +81,7 @@ std::pair<LMStatePtr, float> KenLM::finish(const LMStatePtr& state) {
model_->BaseScore(inState->ken(), vocab_->EndSentence(), outState->ken());
return std::make_pair(std::move(outState), score);
}

} // namespace text
} // namespace lib
} // namespace fl
7 changes: 5 additions & 2 deletions flashlight/lib/text/decoder/lm/KenLM.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <memory>

#include "flashlight/lib/text/Defines.h"
#include "flashlight/lib/text/decoder/lm/LM.h"
#include "flashlight/lib/text/dictionary/Dictionary.h"

Expand Down Expand Up @@ -36,8 +37,9 @@ namespace text {
* indicies and compare functions
* https://github.com/kpu/kenlm/blob/master/lm/state.hh.
*/
struct KenLMState : LMState {
struct FL_TEXT_API KenLMState : LMState {
KenLMState();
~KenLMState();
std::unique_ptr<lm::ngram::State> ken_;
lm::ngram::State* ken() {
return ken_.get();
Expand All @@ -47,7 +49,7 @@ struct KenLMState : LMState {
/**
* KenLM extends LM by using the toolkit https://kheafield.com/code/kenlm/.
*/
class KenLM : public LM {
class FL_TEXT_API KenLM : public LM {
public:
KenLM(const std::string& path, const Dictionary& usrTknDict);

Expand All @@ -65,6 +67,7 @@ class KenLM : public LM {
};

using KenLMPtr = std::shared_ptr<KenLM>;

} // namespace text
} // namespace lib
} // namespace fl
3 changes: 2 additions & 1 deletion flashlight/lib/text/decoder/lm/ZeroLM.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#pragma once

#include "flashlight/lib/text/Defines.h"
#include "flashlight/lib/text/decoder/lm/LM.h"

namespace fl {
Expand All @@ -17,7 +18,7 @@ namespace text {
* ZeroLM is a dummy language model class, which mimics the behavious of a
* uni-gram language model but always returns 0 as score.
*/
class ZeroLM : public LM {
class FL_TEXT_API ZeroLM : public LM {
public:
LMStatePtr start(bool startWithNothing) override;

Expand Down
5 changes: 4 additions & 1 deletion flashlight/lib/text/dictionary/Dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@
#include <unordered_map>
#include <vector>

#include "flashlight/lib/text/Defines.h"

namespace fl {
namespace lib {
namespace text {

// A simple dictionary class which holds a bidirectional map
// entry (strings) <--> integer indices. Not thread-safe !
class Dictionary {
class FL_TEXT_API Dictionary {
public:
// Creates an empty dictionary
Dictionary() {}
Expand Down
Loading