Skip to content

Commit

Permalink
remove some commented lines:
Browse files Browse the repository at this point in the history
  • Loading branch information
huangzhengxiang committed Oct 31, 2024
1 parent f8d0ddc commit 69e1187
Show file tree
Hide file tree
Showing 8 changed files with 14 additions and 77 deletions.
59 changes: 0 additions & 59 deletions TODO.md

This file was deleted.

7 changes: 7 additions & 0 deletions docs/transformers/llm.md
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ sh package_scripts/ios/buildiOS.sh "-DMNN_ARM82=true -DMNN_LOW_MEMORY=true -DMNN

#### 推理用法
`chat_demo`的用法如下:
pc端直接推理
```
# 使用config.json
## 交互式聊天
Expand All @@ -243,6 +244,12 @@ sh package_scripts/ios/buildiOS.sh "-DMNN_ARM82=true -DMNN_LOW_MEMORY=true -DMNN
./chat_demo model_dir/llm.mnn prompt.txt
```
手机端adb推理用法:
```bash
# 利用adb push将链接库push到手机上
adb push chat_demo libllm.so libMNN_CL.so libMNN_Express.so libMNN.so tools/cv/libMNNOpenCV.so /data/local/tmp/llm
```

#### GPTQ权重加载
- 使用脚本生成GPTQ模型权重,用法参考: [apply_gptq.py](../tools/script.html#apply-gptq-py)
- 创建`gptq.json`配置文件
Expand Down
3 changes: 0 additions & 3 deletions express/NeuralNetWorkOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -476,9 +476,6 @@ VARP _Softmax(VARP logits, int axis) {
softmax->main.AsAxis()->axis = axis;
return (Variable::create(Expr::create(softmax.get(), {logits})));
}
VARP _TempratureSoftmax(VARP logits, float temperature, int axis) {
return _Softmax(logits * _Scalar<float>(1.0f / temperature), axis);
}
/*Computes softplus: log(exp(features) + 1).
Args:
features: A variable. Must be Halide_Type_Float.
Expand Down
1 change: 0 additions & 1 deletion include/MNN/expr/NeuralNetWorkOp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ MNN_PUBLIC VARP _Relu(VARP x, float slope = 0.0f);
MNN_PUBLIC VARP _Relu6(VARP x, float minValue = 0.0f, float maxValue = 6.0f);
MNN_PUBLIC VARP _PRelu(VARP x, std::vector<float> &&slopes);
MNN_PUBLIC VARP _Softmax(VARP logits, int axis = -1);
MNN_PUBLIC VARP _TempratureSoftmax(VARP logits, float temperature, int axis = -1);
MNN_PUBLIC VARP _Softplus(VARP features);
MNN_PUBLIC VARP _Softsign(VARP features);
MNN_PUBLIC std::vector<VARP> _Split(VARP value, INTS size_splits, int axis = 0);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.9-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
13 changes: 0 additions & 13 deletions transformers/llm/engine/include/llmconfig.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,22 +322,9 @@ class LlmConfig {
return llm_config_.value("attention_fused", true);
}

// std::string chat_template() const {
// return llm_config_.value("chat_template", "");
// }

// std::string prompt_template() const {
// return llm_config_.value("prompt_template", "");
// }

// std::string eos_token() const {
// return llm_config_.value("eos_token", "")
// }

std::string system_prompt_template() const {
return llm_config_.value("system_prompt_template", "<|im_start|>system\n%s<|im_end|>\n");
}

std::string user_prompt_template() const {
return llm_config_.value("user_prompt_template", "<|im_start|>user\n%s<|im_end|>\n");
}
Expand Down
2 changes: 2 additions & 0 deletions transformers/llm/engine/include/sampler/sampler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
namespace MNN {
namespace Transformer {

MNN_PUBLIC VARP _TempratureSoftmax(VARP logits, float temperature, int axis = -1);

class Llm;

// a index and its corresponding score
Expand Down
4 changes: 4 additions & 0 deletions transformers/llm/engine/src/sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
namespace MNN{
namespace Transformer{

VARP _TempratureSoftmax(VARP logits, float temperature, int axis) {
return _Softmax(logits * _Scalar<float>(1.0f / temperature), axis);
}

/* ----------Sampler's members---------- */
int Sampler::select(struct SubsetLogits& subset, int id) {
if (!(subset.is_subset)) return id;
Expand Down

0 comments on commit 69e1187

Please sign in to comment.