Skip to content

[Env] Add XFT_ENGINE env variable. #231

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/comm_helper/comm_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ extern "C" int init(int *world_size, int *world_rank, int *world_color) {
MPI_Comm_rank(MPI_COMM_WORLD, world_rank);

// world_color = world_rank / tpSize = world_rank / (world_size / ppSize)
// like: world_color = 0~7 / (8 / 4), XFT_PIPELINE_STAGES = ppSize = 4; tpSize = 2
// like: world_color = 0~7 / (8 / 4), XFT_PIPELINE_STAGE = ppSize = 4; tpSize = 2
// world_rank = 0, 1, -> world_color = ppRank = 0, 0, -> tpRank = 0, 1;
// 2, 3, 1, 1, 0, 1;
// 4, 5, 2, 2, 0, 1;
Expand Down
8 changes: 6 additions & 2 deletions src/models/common_decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ struct MlpTypeExtractor<ChatGLM2MLP<WeiT, InT, ImT, OutT, NORM_CLS, true>> {
/*
Pipeline parallel and tensor parallel introduction:

1) MPI_Instances = 16,XFT_PIPELINE_STAGES = 4 => ctx->ppSize = 4, ctx->tpSize = 4
1) MPI_Instances = 16,XFT_PIPELINE_STAGE = 4 => ctx->ppSize = 4, ctx->tpSize = 4
2) TP sync by oneCCL(row_comm) or shared_memory
3) PP sync by MPI MPI_COMM_WORLD

Expand Down Expand Up @@ -614,7 +614,11 @@ class CommonDecoder : public AbstractDecoder {
this->context.reset(new DecoderContext(layers, hiddenSize, attHeadNum, kvHeadNum, imSize, act, epsilon,
vocabSize, embeddingSize, maxPositions, maxPosEmbed, maxSeqLength, tpRank, tpSize, ppSize, ppRank,
ropeParamsPtr));
this->context->mmHelper = new MMHelper(xft::DeviceKind::iCPU, 0);

if (Env::getEngineKind() == xft::DeviceKind::iGPU && Env::getEngineIndex() < 0) // Sequential assignment
this->context->mmHelper = new MMHelper(Env::getEngineKind(), ppRank * tpSize + tpRank);
else // assignment through the user
this->context->mmHelper = new MMHelper(Env::getEngineKind(), Env::getEngineIndex());
}

return this->context.get();
Expand Down
1 change: 1 addition & 0 deletions src/models/models.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ GenerationMode getGenerationMode(SearcherConfig &config_) {
Model::Model() : decoder(nullptr), searcher(nullptr), isNewInput(true) {
Env::initVerbose();
Env::initPipelineStage();
Env::initEngineKindIndex();
TimeLine::init();
}

Expand Down
68 changes: 64 additions & 4 deletions src/utils/verbose.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
#include <cinttypes>
#include <iostream>
#include <mutex>
#include <sstream>

#include "dtype.h"

class FunTimer {
private:
Expand Down Expand Up @@ -45,6 +48,7 @@ class Printer {
};

class Env {
// Verbose
private:
static int &verboseValue() {
static int value = 0;
Expand All @@ -56,7 +60,10 @@ class Env {
char *xft_verbose_value = getenv("XFT_VERBOSE");
if (xft_verbose_value != NULL) {
int value = atoi(xft_verbose_value);
verboseValue() = value;
if (value >= 0)
verboseValue() = value;
else
printf("[ERROR] XFT_VERBOSE value need to be greater than or equal to 0.\n");
} else {
verboseValue() = 0;
}
Expand All @@ -69,7 +76,7 @@ class Env {

static int getVerbose() { return verboseValue(); }

// Pipeline Parallel
// Pipeline Parallel
private:
static int &pipelineStageValue() {
static int value = 1;
Expand All @@ -78,21 +85,74 @@ class Env {

public:
static void initPipelineStage() {
char *xft_pipeline_value = getenv("XFT_PIPELINE_STAGES");
char *xft_pipeline_value = getenv("XFT_PIPELINE_STAGE");
if (xft_pipeline_value != NULL) {
#ifdef PIPELINE_PARALLEL
int value = atoi(xft_pipeline_value);
if (value >= 1)
pipelineStageValue() = value;
else
printf("[ERROR] XFT_PIPELINE_STAGE value need to be greater than 0.\n");
#else
printf("[WARNING] XFT_PIPELINE_STAGES need to build with WITH_PIPELINE_PARALLEL=ON.\n");
printf("[WARNING] XFT_PIPELINE_STAGE need to build with WITH_PIPELINE_PARALLEL=ON.\n");
#endif
} else {
pipelineStageValue() = 1;
}
}

static int getPipelineStage() { return pipelineStageValue(); }

// Engine Kind and Index
private:
static xft::DeviceKind &engineKindValue() {
static xft::DeviceKind value = xft::DeviceKind::iCPU;
return value;
}

static int &engineIndexValue() {
static int value = 0;
return value;
}

public:
static void initEngineKindIndex() {
char *xft_engine_env = getenv("XFT_ENGINE");
if (xft_engine_env != NULL) {
std::string xft_engine_str(xft_engine_env);
std::stringstream ss(xft_engine_str);
std::string token;

if (std::getline(ss, token, ':')) {
if (token == "CPU") {
engineKindValue() = xft::DeviceKind::iCPU;
engineIndexValue() = 0;
return;
} else if (token == "GPU")
engineKindValue() = xft::DeviceKind::iGPU;
else
printf("[ERROR] Undefined device kind in XFT_ENGINE.\n");
} else {
printf("[ERROR] Wrong value: XFT_ENGINE.\n");
}

if (std::getline(ss, token, ':')) {
int value = std::stoi(token);
if (value >= 0)
engineIndexValue() = value;
else
printf("[ERROR] Undefined device index in XFT_ENGINE.\n");
} else {
engineIndexValue() = -1;
}
} else {
engineKindValue() = xft::DeviceKind::iCPU;
engineIndexValue() = 0;
}
}

static xft::DeviceKind getEngineKind() { return engineKindValue(); }
static int getEngineIndex() { return engineIndexValue(); }
};

#define GEMMVERBOSE(api_func, compute_func) \
Expand Down