Skip to content

Commit 669bd66

Browse files
authored
[README] Update README.md. (#434)
1 parent 76ddad7 commit 669bd66

File tree

6 files changed

+16
-16
lines changed

6 files changed

+16
-16
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ Use MPI to run in the multi-ranks mode, please install oneCCL firstly.
265265
- Here is a example on local.
266266
```bash
267267
# or export LD_PRELOAD=libiomp5.so manually
268-
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')`
268+
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')
269269
OMP_NUM_THREADS=48 mpirun \
270270
-n 1 numactl -N 0 -m 0 ${RUN_WORKLOAD} : \
271271
-n 1 numactl -N 1 -m 1 ${RUN_WORKLOAD}
@@ -313,7 +313,7 @@ A web demo based on [Gradio](https://www.gradio.app/) is provided in repo. Now s
313313
```bash
314314
# Recommend preloading `libiomp5.so` to get a better performance.
315315
# or LD_PRELOAD=libiomp5.so manually, `libiomp5.so` file will be in `3rdparty/mkl/lib` directory after build xFasterTransformer.
316-
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')`
316+
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')
317317
python examples/web_demo/ChatGLM.py \
318318
--dtype=bf16 \
319319
--token_path=${TOKEN_PATH} \
@@ -334,12 +334,12 @@ pip install vllm-xft
334334
***Notice: Preload libiomp5.so is required!***
335335
```bash
336336
# Preload libiomp5.so by following cmd or LD_PRELOAD=libiomp5.so manually
337-
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')`
337+
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')
338338

339339
python -m vllm.entrypoints.openai.api_server \
340340
--model ${XFT_MODEL} \
341341
--tokenizer ${TOKENIZER_DIR} \
342-
--dtype fp16 \
342+
--dtype bf16 \
343343
--kv-cache-dtype fp16 \
344344
--served-model-name xft \
345345
--port 8000 \
@@ -348,7 +348,7 @@ python -m vllm.entrypoints.openai.api_server \
348348
For multi-rank mode, please use `python -m vllm.entrypoints.slave` as slave and keep params of slaves align with master.
349349
```bash
350350
# Preload libiomp5.so by following cmd or LD_PRELOAD=libiomp5.so manually
351-
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')`
351+
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')
352352

353353
OMP_NUM_THREADS=48 mpirun \
354354
-n 1 numactl --all -C 0-47 -m 0 \

README_CN.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ xFasterTransformer 会自动检查 MPI 环境,或者使用 `SINGLE_INSTANCE=1`
266266
- 下面是一个本地环境的运行方式示例。
267267
```bash
268268
# 或者手动预加载 export LD_PRELOAD=libiomp5.so
269-
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')`
269+
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')
270270
OMP_NUM_THREADS=48 mpirun \
271271
-n 1 numactl -N 0 -m 0 ${RUN_WORKLOAD} : \
272272
-n 1 numactl -N 1 -m 1 ${RUN_WORKLOAD}
@@ -315,7 +315,7 @@ while (1) {
315315
# 推荐预加载`libiomp5.so`来获得更好的性能。
316316
# `libiomp5.so`文件会位于编译后`3rdparty/mklml/lib`文件夹中。
317317
# 或者手动预加载LD_PRELOAD=libiomp5.so manually, `libiomp5.so`文件会位于编译后`3rdparty/mkl/lib`文件夹中
318-
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')`
318+
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')
319319
python examples/web_demo/ChatGLM.py \
320320
--dtype=bf16 \
321321
--token_path=${TOKEN_PATH} \
@@ -337,12 +337,12 @@ pip install vllm-xft
337337
***注意:需要预加载 `libiomp5`***
338338
```bash
339339
# 通过以下命令或手动设置 LD_PRELOAD=libiomp5.so 预加载 libiomp5.so
340-
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')`
340+
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')
341341

342342
python -m vllm.entrypoints.openai.api_server \
343343
--model ${XFT_MODEL} \
344344
--tokenizer ${TOKENIZER_DIR} \
345-
--dtype fp16 \
345+
--dtype bf16 \
346346
--kv-cache-dtype fp16 \
347347
--served-model-name xft \
348348
--port 8000 \
@@ -351,7 +351,7 @@ python -m vllm.entrypoints.openai.api_server \
351351
对于分布式模式,请使用 `python -m vllm.entrypoints.slave` 作为从节点,并确保从节点的参数与主节点一致。
352352
```bash
353353
# 通过以下命令或手动设置 LD_PRELOAD=libiomp5.so 预加载 libiomp5.so
354-
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')`
354+
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')
355355

356356
OMP_NUM_THREADS=48 mpirun \
357357
-n 1 numactl --all -C 0-47 -m 0 \

examples/cpp/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ Please refer to [Prepare model](../README.md#prepare-model)
1111
```bash
1212
# Recommend preloading `libiomp5.so` to get a better performance.
1313
# or LD_PRELOAD=libiomp5.so manually, `libiomp5.so` file will be in `3rdparty/mkl/lib` directory after build xFasterTransformer.
14-
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')`
14+
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')
1515

1616
# run single instance like
1717
./example -m ${MODEL_PATH} -t ${TOKEN_PATH}

examples/pytorch/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ Please refer to [Prepare model](../README.md#prepare-model)
2020
```bash
2121
# Recommend preloading `libiomp5.so` to get a better performance.
2222
# or LD_PRELOAD=libiomp5.so manually, `libiomp5.so` file will be in `3rdparty/mkl/lib` directory after build xFasterTransformer.
23-
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')`
23+
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')
2424
2525
# run single instance like
2626
python demo.py --dtype=bf16 --token_path=${TOKEN_PATH} --model_path=${MODEL_PATH}

examples/web_demo/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ After the web server started, open the output URL in the browser to use the demo
2929
```bash
3030
# Recommend preloading `libiomp5.so` to get a better performance.
3131
# or LD_PRELOAD=libiomp5.so manually, `libiomp5.so` file will be in `3rdparty/mkl/lib` directory after build xFasterTransformer.
32-
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')`
32+
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')
3333
3434
# run single instance like
3535
python examples/web_demo/ChatGLM.py \

serving/vllm-xft.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@ pip install vllm-xft
1212
### Serving(OpenAI Compatible Server)
1313
```shell
1414
# Preload libiomp5.so by following cmd or LD_PRELOAD=libiomp5.so manually
15-
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')`
15+
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')
1616

1717
python -m vllm.entrypoints.openai.api_server \
1818
--model ${XFT_MODEL} \
1919
--tokenizer ${TOKENIZER_DIR} \
20-
--dtype fp16 \
20+
--dtype bf16 \
2121
--kv-cache-dtype fp16 \
2222
--served-model-name xft \
2323
--port 8000 \
@@ -51,7 +51,7 @@ Please keep params of slaves align with master.
5151
Here is a example on 2Socket platform, 48 cores pre socket.
5252
```bash
5353
# Preload libiomp5.so by following cmd or LD_PRELOAD=libiomp5.so manually
54-
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')`
54+
export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')
5555

5656
OMP_NUM_THREADS=48 mpirun \
5757
-n 1 numactl --all -C 0-47 -m 0 \

0 commit comments

Comments
 (0)