Skip to content

Commit

Permalink
modification after local tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Armel Zebaze authored and Armel Zebaze committed May 25, 2023
1 parent a26a608 commit 6ebb641
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 50 deletions.
93 changes: 46 additions & 47 deletions instruction_iio/local_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,52 +66,51 @@ def make_requests(
else :
# single prompt, i.e str
prompts = [prompts]
state = accelerator.state
tokenized_dataset = TokenizedDataset(tokenizer=tokenizer, dataset=prompts)
dataloader = DataLoader(tokenized_dataset, batch_size=accelerator.num_processes)
for step, batch_ in tqdm.tqdm(enumerate(dataloader)):
with state.split_between_processes(batch_) as batch:
with torch.no_grad():
input_ids = batch["input_ids"].to(state.device)
attention_mask = batch["attention_mask"]
index_prompt = batch["index_prompt"]
stopping_criteria = StoppingCriteriaList([EndOfFunctionCriteria(attention_mask.sum(), stop_words, tokenizer)])
try :
response = accelerator.unwrap_model(model).generate(
input_ids,
max_length=max_length,
temperature=temperature,
num_return_sequences=num_return_sequences,
top_p=top_p,
num_beams=num_beams,
repetition_penalty=repetition_penalty,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
stopping_criteria=stopping_criteria
)
except RuntimeError :
print("An error occured, please check the size of input_ids compare to the value of max_length")
continue
response = accelerator.pad_across_processes(
response, dim=1, pad_index=tokenizer.pad_token_id
dataloader = DataLoader(tokenized_dataset, batch_size=1)
dataloader = accelerator.prepare(dataloader)
for step, batch in tqdm.tqdm(enumerate(dataloader)):
with torch.no_grad():
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
index_prompt = batch["index_prompt"]
stopping_criteria = StoppingCriteriaList([EndOfFunctionCriteria(attention_mask.sum(), stop_words, tokenizer)])
try :
response = accelerator.unwrap_model(model).generate(
input_ids,
max_length=max_length,
temperature=temperature,
num_return_sequences=num_return_sequences,
top_p=top_p,
num_beams=num_beams,
repetition_penalty=repetition_penalty,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
stopping_criteria=stopping_criteria
)
response = accelerator.gather(response)
outputs = []
for response_i in response :
outputs.append(tokenizer.decode(response_i, skip_special_tokens=True))
data = {
"prompt": prompts[index_prompt],
"response":
[
{
"text" : outputs[i],
"index": i,
"finish_reason" : "stop" if (len(response[i]) < max_length) else "length"
}
for i in range(len(outputs))

] if outputs else None,
"created_at": str(datetime.now()),
}
results.append(data)
return results
except RuntimeError :
print("An error occured, please check the size of input_ids compare to the value of max_length")
continue
response = accelerator.pad_across_processes(
response, dim=1, pad_index=tokenizer.pad_token_id
)
response = accelerator.gather(response)
outputs = []
for response_i in response :
outputs.append(tokenizer.decode(response_i, skip_special_tokens=True))
data = {
"prompt": prompts[index_prompt],
"response":
[
{
"text" : outputs[i],
"index": i,
"finish_reason" : "stop" if (len(response[i]) < max_length) else "length"
}
for i in range(len(outputs))

] if outputs else None,
"created_at": str(datetime.now()),
}
results.append(data)
return results
5 changes: 2 additions & 3 deletions instruction_io/output_instruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,14 @@ def parse_args():
"--batch_dir",
type=str,
required=True,
default="data/starcoder_generations/",
default="data_io/starcoder_generations/",
help="The directory where the batch is stored.",
)
parser.add_argument(
"--seed_tasks_path",
type=str,
required=True,
default="data/seed_tasks.jsonl",
default="data_io/code_tasks.jsonl",
help="The path to the human written data.",
)
parser.add_argument(
Expand Down Expand Up @@ -329,6 +329,5 @@ def parse_args():
output_dictionary
)+ "\n"
)
print(output_dictionary)
df.to_csv("statistics.csv", index=False)
np.savetxt("forgotten.txt", np.array(L))

0 comments on commit 6ebb641

Please sign in to comment.