diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index eae263e7..05243d5b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ minimum_pre_commit_version: "2.9.0" repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.3.0 + rev: v4.4.0 hooks: - id: check-yaml args: [--allow-multiple-documents] @@ -20,15 +20,15 @@ repos: - id: reorder-python-imports args: [--py39-plus] - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 23.1.0 hooks: - id: black - repo: https://github.com/PyCQA/flake8 - rev: 3.9.2 + rev: 6.0.0 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.991 + rev: v1.1.1 hooks: - id: mypy additional_dependencies: [numpy, httpx, pytest, structlog, types-PyYAML] diff --git a/ice/agents/approval.py b/ice/agents/approval.py index 1c9504f4..42689032 100644 --- a/ice/agents/approval.py +++ b/ice/agents/approval.py @@ -73,7 +73,6 @@ async def relevance(self, *, question, context, verbose=False, default=None): return score async def _check(self, prompt: str, candidate: str): - approval_prompt = f"""Evaluate whether the following output is correct. Input: diff --git a/ice/agents/openai.py b/ice/agents/openai.py index 31da9d18..cadb5423 100644 --- a/ice/agents/openai.py +++ b/ice/agents/openai.py @@ -116,7 +116,7 @@ def _compute_relative_probs( def lookup_prob(choice: str): scores = 0.0 - for (token, prob) in prediction.items(): + for token, prob in prediction.items(): if choice[len(choice_prefix) :].startswith(token): scores += prob return scores diff --git a/ice/agents/openai_reasoning.py b/ice/agents/openai_reasoning.py index 0a655502..0f852751 100644 --- a/ice/agents/openai_reasoning.py +++ b/ice/agents/openai_reasoning.py @@ -119,7 +119,7 @@ async def _parse_and_aggregate_responses( # Parse the responses and aggregate the answers and reasonings answers: Counter[str] = Counter() reasonings: list[str] = [] - for (i, response_text) in enumerate(response_texts): + for i, response_text in enumerate(response_texts): # Check if the response contains the answer prefix if answer_prefix not in response_text: # If not, request an explicit answer from the API @@ -200,7 +200,6 @@ def _parse_answer_and_reasoning( def _format_result( self, answers: Counter[str], reasonings: list[str] ) -> tuple[dict[str, float], str]: - # Join the reasonings with counts joined_reasonings = self._join_texts_with_counts(reasonings) diff --git a/ice/agents/squad.py b/ice/agents/squad.py index d19fe5ce..de301ee9 100644 --- a/ice/agents/squad.py +++ b/ice/agents/squad.py @@ -6,7 +6,6 @@ class SquadAgent(Agent): def __init__(self, model_name: str = "z-uo/roberta-qasper"): - self.nlp = pipeline( "question-answering", model=model_name, tokenizer=model_name ) diff --git a/ice/metrics/gold_paragraphs.py b/ice/metrics/gold_paragraphs.py index 300698e9..9e5d85e6 100644 --- a/ice/metrics/gold_paragraphs.py +++ b/ice/metrics/gold_paragraphs.py @@ -85,7 +85,6 @@ def get_containing_paragraph( def get_gold_paragraph_df(question_short_name: str): - gold_standards = get_question_gold_standards(question_short_name) entries = [] diff --git a/ice/metrics/nubia.py b/ice/metrics/nubia.py index 2d899e02..1d1222f1 100644 --- a/ice/metrics/nubia.py +++ b/ice/metrics/nubia.py @@ -29,7 +29,6 @@ class NubiaResponse(BaseModel): async def _single_nubia(sample: Sample) -> list[NubiaResponse]: - samples = list(product(sample.left, sample.right)) async with httpx.AsyncClient( diff --git a/ice/metrics/rouge.py b/ice/metrics/rouge.py index e5630c62..609999dd 100644 --- a/ice/metrics/rouge.py +++ b/ice/metrics/rouge.py @@ -57,7 +57,7 @@ async def _compute_single(sample: Sample) -> RougeResult: ) return RougeResult.parse_obj(result_dict) - return [await (_compute_single(s)) for s in sample] + return [await _compute_single(s) for s in sample] @diskcache() diff --git a/ice/nn/bert_t5_t0_ensemble.py b/ice/nn/bert_t5_t0_ensemble.py index 318bd538..98a2580f 100644 --- a/ice/nn/bert_t5_t0_ensemble.py +++ b/ice/nn/bert_t5_t0_ensemble.py @@ -163,6 +163,7 @@ def T0_classify( # Credit: https://stackoverflow.com/questions/39936527/python-removing-references-from-a-scientific-paper + # Remove citations def remove_citations(s: str) -> str: return re.sub(r"\s\([A-Z][a-z]+,\s[A-Z][a-z]?\.[^\)]*,\s\d{4}\)", "", s) diff --git a/ice/nn/bert_t5_t0_example.py b/ice/nn/bert_t5_t0_example.py index 7c5abdd4..f00c909f 100644 --- a/ice/nn/bert_t5_t0_example.py +++ b/ice/nn/bert_t5_t0_example.py @@ -65,7 +65,6 @@ def extract_numbers(text: str) -> list[str]: def classify_example(): - abstract = """In this study we will examine the impact of the use of ...""" paragraph = """[..] The adherence rate is 88.2%.""" numbers = extract_numbers(paragraph) diff --git a/ice/recipes/combine_abstract_answers.py b/ice/recipes/combine_abstract_answers.py index 702b2e99..7a3dbb0e 100644 --- a/ice/recipes/combine_abstract_answers.py +++ b/ice/recipes/combine_abstract_answers.py @@ -4,7 +4,6 @@ def make_prompt(question: str, abstracts: list[Abstract], answers: list[str]) -> str: - abstract_answers_str = "\n\n".join( [ f"Title B{i}: {abstract.title}\nAbstract B{i}: {abstract.text}\nAnswer B{i}: {answer}" diff --git a/ice/recipes/comparisons_qa.py b/ice/recipes/comparisons_qa.py index 764f82d1..346d5dc8 100644 --- a/ice/recipes/comparisons_qa.py +++ b/ice/recipes/comparisons_qa.py @@ -47,7 +47,6 @@ async def run( num_paragraphs: int = 3, answer_prefix: str = DEFAULT_ANSWER_PREFIX, ): - rank_paragraphs = RankParagraphs(mode=self.mode) top_paragraphs = await rank_paragraphs.run( diff --git a/ice/recipes/consort_flow/baseline_elicit_answer.py b/ice/recipes/consort_flow/baseline_elicit_answer.py index f43d31a9..d46c10c3 100644 --- a/ice/recipes/consort_flow/baseline_elicit_answer.py +++ b/ice/recipes/consort_flow/baseline_elicit_answer.py @@ -24,7 +24,6 @@ async def answer_like_elicit_qa( question: str, passage: str, ) -> str: - prompt = elicit_qa_prompt( qa_question=question, excerpt=passage, @@ -68,7 +67,6 @@ def elicit_qa_prompt( qa_question: str, excerpt: str, ) -> str: - full_answer_prefix = "Answer:" return f"""Answer the question "{qa_question}" based on the excerpt from a research paper. \ diff --git a/ice/recipes/evaluate_result.py b/ice/recipes/evaluate_result.py index 65e4f7d9..24880647 100644 --- a/ice/recipes/evaluate_result.py +++ b/ice/recipes/evaluate_result.py @@ -150,7 +150,6 @@ async def run( gold_result: Optional[str] = None, question: Optional[str] = None, ) -> ResultComparison: - if self.mode == "test": model_results, gold_results, question = self.test_data() model_result = model_results[0] diff --git a/ice/recipes/placebo_description.py b/ice/recipes/placebo_description.py index b2b88a0c..f8fdea94 100644 --- a/ice/recipes/placebo_description.py +++ b/ice/recipes/placebo_description.py @@ -60,7 +60,6 @@ async def get_gold_experiments(self, paper: Paper) -> list[str]: async def placebo_for_experiment( self, paper: Paper, experiment: str, record=recorder ) -> str: - # Generate the QA prompt qa_prompt = self.make_prompt(paper, experiment) diff --git a/ice/recipes/placebo_dialogs.py b/ice/recipes/placebo_dialogs.py index 101ed4f2..ce562956 100644 --- a/ice/recipes/placebo_dialogs.py +++ b/ice/recipes/placebo_dialogs.py @@ -73,14 +73,13 @@ async def ask(self, question: str, multiline=True, answer_prefix=""): async def multiple_choice( self, question: str, answers: list[str] ) -> tuple[dict[str, float], "DialogState"]: - answer_prefix = longest_common_prefix(answers).rstrip() new_context = f"{self.context}\n\nQ: {question}\n\nA: {answer_prefix}" prediction = await self.agent.predict(context=new_context, default=" ") def lookup_prob(answer: str): scores = 0.0 - for (token, prob) in prediction.items(): + for token, prob in prediction.items(): if answer[len(answer_prefix) :].startswith(token): scores += prob return scores @@ -129,7 +128,6 @@ def make_initial_paragraph_context( class PlaceboDialogs(Recipe): - verbose = False msg = SimpleNamespace( @@ -367,7 +365,6 @@ async def aggregate_placebo_kind( return {"answer": answer, "quotes": quotes, "component_answers": answers} async def analyze_experiment(self, paper: Paper, experiment: Experiment): - paragraphs = [ paragraph for paragraph in paper.paragraphs diff --git a/ice/recipes/primer/sequential_action.py b/ice/recipes/primer/sequential_action.py index 7f72cc7b..50b8a840 100644 --- a/ice/recipes/primer/sequential_action.py +++ b/ice/recipes/primer/sequential_action.py @@ -259,7 +259,6 @@ async def sequential_action( log: list[str] = [] for actions_left in range(max_actions, 0, -1): - sufficient_info = await is_info_sufficient(question, log) if sufficient_info: break diff --git a/ice/recipes/single_prompt.py b/ice/recipes/single_prompt.py index e6b845f4..54304389 100644 --- a/ice/recipes/single_prompt.py +++ b/ice/recipes/single_prompt.py @@ -41,7 +41,6 @@ class SinglePrompt(Recipe): default_answer_classification: Optional[str] async def run(self, paper: Paper): - # Get the full paper text and truncate it full_paper_text = get_paper_text(paper) paper_text = truncate_by_tokens(full_paper_text, max_tokens=self.max_tokens) diff --git a/ice/recipes/synthesize.py b/ice/recipes/synthesize.py index 201e3136..a4600984 100644 --- a/ice/recipes/synthesize.py +++ b/ice/recipes/synthesize.py @@ -95,7 +95,6 @@ def _get_reference(authors: list[str], year: Optional[int]) -> str: async def synthesize(question: str, abstracts: list[Abstract]) -> str: - papers_str = "\n\n".join( [ PAPER_FORMAT.format( diff --git a/main.py b/main.py index 0c553e88..f1283fd9 100644 --- a/main.py +++ b/main.py @@ -193,8 +193,7 @@ async def print_results( """ results_json: list[dict] = [] - for (document_id, final_result) in results_by_doc.items(): - + for document_id, final_result in results_by_doc.items(): if json_out is not None: results_json.extend(recipe.to_json(final_result))