-
Notifications
You must be signed in to change notification settings - Fork 453
Description
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 111
CUDA SETUP: Loading binary /usr/local/lib/python3.8/dist-packages/bitsandbytes/libbitsandbytes_cuda111.so...
Initializing...
Loading model...
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /home/jovyan/chenpeng6-01/ToolBench/toolbench/inference/toolbench_server.py:98 in │
│ │
│ 95 │ │ args = parser.parse_args() │
│ 96 │ │ return args │
│ 97 │
│ ❱ 98 model = Model() │
│ 99 │
│ 100 │
│ 101 @app.route('/stream', methods=['GET', 'POST']) │
│ │
│ /home/jovyan/chenpeng6-01/ToolBench/toolbench/inference/toolbench_server.py:26 in init │
│ │
│ 23 │ │ self.args = self.get_args() │
│ 24 │ │ self.pipeline = pipeline_runner(self.args, add_retrieval=False, server=True) │
│ 25 │ │ print("Loading model...") │
│ ❱ 26 │ │ self.llm = self.pipeline.get_backbone_model() │
│ 27 │ │ print("Model loaded in {} seconds".format(time.time() - starting_time)) │
│ 28 │ │ starting_time = time.time() │
│ 29 │ │ print("Loading retriever...") │
│ │
│ /home/jovyan/chenpeng6-01/ToolBench/toolbench/inference/Downstream_tasks/rapidapi.py:404 in │
│ get_backbone_model │
│ │
│ 401 │ │ args = self.args │
│ 402 │ │ if args.backbone_model == "toolllama": │
│ 403 │ │ │ # ratio = 4 means the sequence length is expanded by 4, remember to change t │
│ ❱ 404 │ │ │ ratio = int(args.max_sequence_length/args.max_source_sequence_length) │
│ 405 │ │ │ replace_llama_with_condense(ratio=ratio) │
│ 406 │ │ │ if args.lora: │
│ 407 │ │ │ │ backbone_model = ToolLLaMALoRA(base_name_or_path=args.model_path, model_ │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
AttributeError: 'Namespace' object has no attribute 'max_sequence_length'
python3 toolbench/inference/toolbench_server.py \
--tool_root_dir data/toolenv/tools/ \ --corpus_tsv_path data/retrieval/G1/corpus.tsv \ --retrieved_api_nums 5 \ --backbone_model toolllama \ --model_path /home/jovyan/chenpeng6-01/ToolModel \ --max_observation_length 1024 \ --method DFS_woFilter_w2 \ --input_query_file data/test_instruction/G1_instruction.json \ --output_answer_file toolllama_lora_dfs_open_domain_result \ --rapidapi_key xxxxxxxxxxx