Skip to content

Commit

Permalink
Merge pull request #27 from beclab/feat/rank-no-time-limit
Browse files Browse the repository at this point in the history
Always run ranking no matter whether extractor runs.
  • Loading branch information
haochengwang authored Aug 8, 2024
2 parents 9fe1fc6 + 707b435 commit 0cbea92
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 36 deletions.
1 change: 1 addition & 0 deletions Dockerfile.r4rank.amd64
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ RUN cd /opt && \
ADD train-rank/src /opt/rss-terminus-v2-rank/src
ADD train-rank/test /opt/rss-terminus-v2-rank/test
COPY train-rank/CMakeLists.txt /opt/rss-terminus-v2-rank/CMakeLists.txt
COPY train-rank/*.cmake /opt/rss-terminus-v2-rank/
RUN cd /opt/rss-terminus-v2-rank && \
mkdir build && \
cd build && \
Expand Down
1 change: 1 addition & 0 deletions Dockerfile.r4rank.arm64
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ RUN cd /opt && \
ADD train-rank/src /opt/rss-terminus-v2-rank/src
ADD train-rank/test /opt/rss-terminus-v2-rank/test
COPY train-rank/CMakeLists.txt /opt/rss-terminus-v2-rank/CMakeLists.txt
COPY train-rank/*.cmake /opt/rss-terminus-v2-rank/
RUN cd /opt/rss-terminus-v2-rank && \
mkdir build && \
cd build && \
Expand Down
1 change: 1 addition & 0 deletions Dockerfile.r4train.amd64
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ RUN cd /opt && \
ADD train-rank/src /opt/rss-terminus-v2-rank/src
ADD train-rank/test /opt/rss-terminus-v2-rank/test
COPY train-rank/CMakeLists.txt /opt/rss-terminus-v2-rank/CMakeLists.txt
COPY train-rank/*.cmake /opt/rss-terminus-v2-rank/
RUN cd /opt/rss-terminus-v2-rank && \
mkdir build && \
cd build && \
Expand Down
1 change: 1 addition & 0 deletions Dockerfile.r4train.arm64
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ RUN cd /opt && \
ADD train-rank/src /opt/rss-terminus-v2-rank/src
ADD train-rank/test /opt/rss-terminus-v2-rank/test
COPY train-rank/CMakeLists.txt /opt/rss-terminus-v2-rank/CMakeLists.txt
COPY train-rank/*.cmake /opt/rss-terminus-v2-rank/
RUN cd /opt/rss-terminus-v2-rank && \
mkdir build && \
cd build && \
Expand Down
14 changes: 13 additions & 1 deletion train-rank/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@ link_directories("/opt/homebrew/opt/openssl/lib/")
link_directories("/opt/homebrew/opt/boost/lib/")
link_directories("/opt/homebrew/opt/llvm/lib/c++/")

# Add a custom command that produces version.cpp, plus
# a dummy output that's not actually produced, in order
# to force version.cmake to always be re-run before the build
add_custom_command(
OUTPUT src/gitinfo.h
COMMAND ${CMAKE_COMMAND} -P
${CMAKE_CURRENT_SOURCE_DIR}/version.cmake)

add_custom_target(
gen_git_info
DEPENDS src/gitinfo.h
)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
Expand All @@ -41,7 +53,7 @@ add_library(rankcommon STATIC src/data_process.cpp
src/lr/model_serializer.cpp
src/entity/reco_metadata.cpp)


add_dependencies(rankcommon gen_git_info)


find_package(xgboost REQUIRED)
Expand Down
6 changes: 6 additions & 0 deletions train-rank/src/bert_v2_rank.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,17 @@
#include <iostream>
#include <thread>

#include "gitinfo.h"
#include "rssrank.h"
#include "common_tool.h"

int main(int argc, char** argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
init_log();
LOG(INFO) << "Git rev: " << GIT_REV;
LOG(INFO) << "Git tag: " << GIT_TAG;
LOG(INFO) << "Git branch: " << GIT_BRANCH;

const char *source_name = std::getenv(TERMINUS_RECOMMEND_SOURCE_NAME);
if (source_name == nullptr) {
LOG(ERROR) << TERMINUS_RECOMMEND_SOURCE_NAME << " NOT EXIST" << std::endl;
Expand Down
35 changes: 0 additions & 35 deletions train-rank/src/rssrank.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ using lr::EmbeddingDistanceExtractor;

DEFINE_string(model_path_root, envOrBlank("MODEL_PATH_ROOT"), "Model path root");
DEFINE_string(recommend_source_name, envOrBlank("TERMINUS_RECOMMEND_SOURCE_NAME"), "Terminus recommend source name");
DEFINE_bool(forced, false, "Whether forced execution, ignoring last rank time");
DEFINE_bool(upload_score, true, "Whether upload score to knowledge");
DEFINE_bool(verbose, false, "Whether output all the details");

Expand Down Expand Up @@ -694,46 +693,12 @@ vector<FeatureExtractor*> initLRFeatureExtractors() {
return {new EmbeddingDistanceExtractor()};
}

bool needRerank() {
if (FLAGS_forced) {
LOG(INFO) << "Forced execution ignoring last rank and extractor time" << std::endl;
return true;
} else {
int64_t last_rank_time =
knowledgebase::getLastRankTime(FLAGS_recommend_source_name);
LOG(DEBUG) << knowledgebase::LAST_RANK_TIME << last_rank_time << std::endl;
int64_t last_extractor_time =
knowledgebase::getLastExtractorTime(FLAGS_recommend_source_name);
LOG(DEBUG) << knowledgebase::LAST_EXTRACTOR_TIME << last_extractor_time
<< std::endl;

if (last_extractor_time == -1) {
LOG(DEBUG) << "last_extractor_time is " << last_extractor_time
<< " mean extractor not executed" << std::endl;
return false;
}

if (last_rank_time != -1 && last_extractor_time != -1 &&
last_rank_time > last_extractor_time) {
LOG(DEBUG) << knowledgebase::LAST_RANK_TIME << " bigger than"
<< knowledgebase::LAST_EXTRACTOR_TIME << " task top"
<< std::endl;
return false;
}
return true;
}
}

bool rankLR() {
if (FLAGS_recommend_source_name.size() == 0) {
LOG(ERROR) << "recommend_source_name not provided." << std::endl;
return false;
}

if (!needRerank()) {
return false;
}

if (!doRank()) {
std::unordered_map<std::string, ScoreWithMetadata> entry_to_score_with_metadata =
rssrank::getAllEntryToPrerankSourceForCurrentSourceKnowledge();
Expand Down
43 changes: 43 additions & 0 deletions train-rank/version.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
execute_process(COMMAND git log --pretty=format:'%h' -n 1
OUTPUT_VARIABLE GIT_REV
ERROR_QUIET)

# Check whether we got any revision (which isn't
# always the case, e.g. when someone downloaded a zip
# file from Github instead of a checkout)
if ("${GIT_REV}" STREQUAL "")
set(GIT_REV "N/A")
set(GIT_DIFF "")
set(GIT_TAG "N/A")
set(GIT_BRANCH "N/A")
else()
execute_process(
COMMAND bash -c "git diff --quiet --exit-code || echo +"
OUTPUT_VARIABLE GIT_DIFF)
execute_process(
COMMAND git describe --exact-match --tags
OUTPUT_VARIABLE GIT_TAG ERROR_QUIET)
execute_process(
COMMAND git rev-parse --abbrev-ref HEAD
OUTPUT_VARIABLE GIT_BRANCH)

string(STRIP "${GIT_REV}" GIT_REV)
string(SUBSTRING "${GIT_REV}" 1 7 GIT_REV)
string(STRIP "${GIT_DIFF}" GIT_DIFF)
string(STRIP "${GIT_TAG}" GIT_TAG)
string(STRIP "${GIT_BRANCH}" GIT_BRANCH)
endif()

set(VERSION "const char* GIT_REV=\"${GIT_REV}${GIT_DIFF}\";
const char* GIT_TAG=\"${GIT_TAG}\";
const char* GIT_BRANCH=\"${GIT_BRANCH}\";")

if(EXISTS ../src/gitinfo.h)
file(READ ../src/gitinfo.h VERSION_)
else()
set(VERSION_ "")
endif()

if (NOT "${VERSION}" STREQUAL "${VERSION_}")
file(WRITE ../src/gitinfo.h "${VERSION}")
endif()

0 comments on commit 0cbea92

Please sign in to comment.