@@ -265,7 +265,7 @@ Use MPI to run in the multi-ranks mode, please install oneCCL firstly.
265265- Here is a example on local.
266266 ```bash
267267 # or export LD_PRELOAD=libiomp5.so manually
268- export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')`
268+ export $(python -c 'import xfastertransformer as xft; print(xft.get_env())')
269269 OMP_NUM_THREADS=48 mpirun \
270270 -n 1 numactl -N 0 -m 0 ${RUN_WORKLOAD} : \
271271 -n 1 numactl -N 1 -m 1 ${RUN_WORKLOAD}
@@ -313,7 +313,7 @@ A web demo based on [Gradio](https://www.gradio.app/) is provided in repo. Now s
313313``` bash
314314# Recommend preloading `libiomp5.so` to get a better performance.
315315# or LD_PRELOAD=libiomp5.so manually, `libiomp5.so` file will be in `3rdparty/mkl/lib` directory after build xFasterTransformer.
316- export $( python -c ' import xfastertransformer as xft; print(xft.get_env())' ) `
316+ export $( python -c ' import xfastertransformer as xft; print(xft.get_env())' )
317317python examples/web_demo/ChatGLM.py \
318318 --dtype=bf16 \
319319 --token_path=${TOKEN_PATH} \
@@ -334,12 +334,12 @@ pip install vllm-xft
334334*** Notice: Preload libiomp5.so is required!***
335335``` bash
336336# Preload libiomp5.so by following cmd or LD_PRELOAD=libiomp5.so manually
337- export $(python -c ' import xfastertransformer as xft; print(xft.get_env ())' )`
337+ export $( python -c ' import xfastertransformer as xft; print(xft.get_env())' )
338338
339339python -m vllm.entrypoints.openai.api_server \
340340 --model ${XFT_MODEL} \
341341 --tokenizer ${TOKENIZER_DIR} \
342- --dtype fp16 \
342+ --dtype bf16 \
343343 --kv-cache-dtype fp16 \
344344 --served-model-name xft \
345345 --port 8000 \
@@ -348,7 +348,7 @@ python -m vllm.entrypoints.openai.api_server \
348348For multi-rank mode, please use ` python -m vllm.entrypoints.slave ` as slave and keep params of slaves align with master.
349349``` bash
350350# Preload libiomp5.so by following cmd or LD_PRELOAD=libiomp5.so manually
351- export $(python -c ' import xfastertransformer as xft; print(xft.get_env ())' )`
351+ export $( python -c ' import xfastertransformer as xft; print(xft.get_env())' )
352352
353353OMP_NUM_THREADS=48 mpirun \
354354 -n 1 numactl --all -C 0-47 -m 0 \
0 commit comments