typilus · Karim-53 · Jan 12, 2023 · Jan 12, 2023 · Jan 12, 2023 · Jan 12, 2023
diff --git a/Dockerfile b/Dockerfile
@@ -5,8 +5,9 @@ ENV PYTHONUNBUFFERED=1
 RUN apt update && apt -y upgrade
 RUN apt install -y python3-numpy python3-pip python3-requests
 RUN pip3 install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
-RUN pip3 install torch-scatter==2.0.4+cpu -f https://pytorch-geometric.com/whl/torch-1.5.0.html
-RUN pip3 install dpu-utils typed-ast ptgnn
+# https://stackoverflow.com/questions/67074684/pip-has-problems-with-metadata
+RUN pip3 install --use-deprecated=legacy-resolver torch-scatter==2.0.4+cpu -f https://pytorch-geometric.com/whl/torch-1.5.0.html
+RUN pip3 install dpu-utils typed-ast ptgnn==0.8.5
 
 ENV PYTHONPATH=/usr/src/
 ADD https://github.com/typilus/typilus-action/releases/download/v0.1/typilus20200507.pkl.gz /usr/src/model.pkl.gz

diff --git a/README.md b/README.md
@@ -27,17 +27,19 @@ suggestions with only a partial context, at the cost of suggesting some false
 positives.
 
 
-### Install Action in your Repository
+### How to use the Action in your Repository
 
 To use the GitHub action, create a workflow file. For example,
 ```yaml
-name: Typilus Type Annotation Suggestions
+name: Annotation Suggestions
 
 # Controls when the action will run. Triggers the workflow on push or pull request
-# events but only for the master branch
+# events but only for the main branch
 on:
   pull_request:
-    branches: [ master ]
+    paths:
+      - '**.py'
+    branches: [master, main]
 
 jobs:
   suggest:
@@ -46,11 +48,10 @@ jobs:
 
     steps:
     # Checks-out your repository under $GITHUB_WORKSPACE, so that typilus can access it.
-    - uses: actions/checkout@v2
-    - uses: typilus/typilus-action@master
+    - uses: actions/checkout@v3
+    - uses: Karim-53/typilus-action@master
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        MODEL_PATH: path/to/model.pkl.gz   # Optional: provide the path of a custom model instead of the pre-trained model.
         SUGGESTION_CONFIDENCE_THRESHOLD: 0.8   # Configure this to limit the confidence of suggestions on un-annotated locations. A float in [0, 1]. Default 0.8
         DISAGREEMENT_CONFIDENCE_THRESHOLD: 0.95  # Configure this to limit the confidence of suggestions on annotated locations.  A float in [0, 1]. Default 0.95
 ```

diff --git a/action.yml b/action.yml
@@ -1,6 +1,6 @@
 # action.yml
-name: 'Typilus: Suggest Python Type Annotations'
-description: 'Suggest Likely Python Type Annotations'
+name: 'Typilus: Suggest Python Type Annotations with AI (fix)'
+description: 'Suggest Likely Python Type Annotations using AI'
 branding:
     icon: box
     color: gray-dark

diff --git a/entrypoint.py b/entrypoint.py
@@ -77,140 +77,146 @@ def __repr__(self) -> str:
 )
 print("Diff GET Status Code: ", diff_rq.status_code)
 
+try:
+    changed_files = get_changed_files(diff_rq.text)
+    if len(changed_files) == 0:
+        print("No relevant changes found.")
+        sys.exit(0)
 
-changed_files = get_changed_files(diff_rq.text)
-if len(changed_files) == 0:
-    print("No relevant changes found.")
-    sys.exit(0)
 
+    monitoring = Monitoring()
+    suggestion_confidence_threshold = float(os.getenv("SUGGESTION_CONFIDENCE_THRESHOLD", 0.5))
+    diagreement_confidence_threshold = float(os.getenv("DISAGREEMENT_CONFIDENCE_THRESHOLD", 0.95))
 
-monitoring = Monitoring()
-suggestion_confidence_threshold = float(os.getenv("SUGGESTION_CONFIDENCE_THRESHOLD", 0.5))
-diagreement_confidence_threshold = float(os.getenv("DISAGREEMENT_CONFIDENCE_THRESHOLD", 0.95))
-
-if debug:
-    print(
-        f"Confidence thresholds {suggestion_confidence_threshold:.2f} and {diagreement_confidence_threshold:.2f}."
-    )
-
-
-with TemporaryDirectory() as out_dir:
-    typing_rules_path = os.path.join(os.path.dirname(__file__), "metadata", "typingRules.json")
-    extract_graphs(
-        repo_path, typing_rules_path, files_to_extract=set(changed_files), target_folder=out_dir,
-    )
-
-    def data_iter():
-        for datafile_path in iglob(os.path.join(out_dir, "*.jsonl.gz")):
-            print(f"Looking into {datafile_path}...")
-            for graph in load_jsonl_gz(datafile_path):
-                yield graph
+    if debug:
+        print(
+            f"Confidence thresholds {suggestion_confidence_threshold:.2f} and {diagreement_confidence_threshold:.2f}."
+        )
 
-    model_path = os.getenv("MODEL_PATH", "/usr/src/model.pkl.gz")
-    model, nn = Graph2Class.restore_model(model_path, "cpu")
 
-    type_suggestions: List[TypeSuggestion] = []
-    for graph, predictions in model.predict(data_iter(), nn, "cpu"):
-        # predictions has the type: Dict[int, Tuple[str, float]]
-        filepath = graph["filename"]
+    with TemporaryDirectory() as out_dir:
+        typing_rules_path = os.path.join(os.path.dirname(__file__), "metadata", "typingRules.json")
+        extract_graphs(
+            repo_path, typing_rules_path, files_to_extract=set(changed_files), target_folder=out_dir,
+        )
 
+        def data_iter():
+            for datafile_path in iglob(os.path.join(out_dir, "*.jsonl.gz")):
+                print(f"Looking into {datafile_path}...")
+                yield from load_jsonl_gz(datafile_path)
+
+        model_path = os.getenv("MODEL_PATH", "/usr/src/model.pkl.gz")
+        model, nn = Graph2Class.restore_model(model_path, "cpu")
+
+        type_suggestions: List[TypeSuggestion] = []
+        for graph, predictions in model.predict(data_iter(), nn, "cpu"):
+            # predictions has the type: Dict[int, Tuple[str, float]]
+            filepath = graph["filename"]
+
+            if debug:
+                print("Predictions:", predictions)
+                print("SuperNodes:", graph["supernodes"])
+
+            for supernode_idx, (predicted_type, predicted_prob) in predictions.items():
+                supernode_data = graph["supernodes"][str(supernode_idx)]
+                if supernode_data["type"] == "variable":
+                    continue  # Do not suggest annotations on variables for now.
+                lineno, colno = supernode_data["location"]
+                suggestion = TypeSuggestion(
+                    filepath,
+                    supernode_data["name"],
+                    (lineno, colno),
+                    annotation_rewrite(predicted_type),
+                    supernode_data["type"],
+                    predicted_prob,
+                    is_disagreement=supernode_data["annotation"] != "??"
+                    and supernode_data["annotation"] != predicted_type,
+                )
+
+                print("Suggestion: ", suggestion)
+
+                if lineno not in changed_files[filepath]:
+                    continue
+                elif suggestion.name == "%UNK%":
+                    continue
+
+                if (
+                    supernode_data["annotation"] == "??"
+                    and suggestion.confidence > suggestion_confidence_threshold
+                ):
+                    type_suggestions.append(suggestion)
+                elif (
+                    suggestion.is_disagreement
+                    # and suggestion.confidence > diagreement_confidence_threshold
+                ):
+                    pass  # TODO: Disabled for now: type_suggestions.append(suggestion)
+
+        # Add PR comments
         if debug:
-            print("Predictions:", predictions)
-            print("SuperNodes:", graph["supernodes"])
-
-        for supernode_idx, (predicted_type, predicted_prob) in predictions.items():
-            supernode_data = graph["supernodes"][str(supernode_idx)]
-            if supernode_data["type"] == "variable":
-                continue  # Do not suggest annotations on variables for now.
-            lineno, colno = supernode_data["location"]
-            suggestion = TypeSuggestion(
-                filepath,
-                supernode_data["name"],
-                (lineno, colno),
-                annotation_rewrite(predicted_type),
-                supernode_data["type"],
-                predicted_prob,
-                is_disagreement=supernode_data["annotation"] != "??"
-                and supernode_data["annotation"] != predicted_type,
-            )
-
-            print("Suggestion: ", suggestion)
-
-            if lineno not in changed_files[filepath]:
-                continue
-            elif suggestion.name == "%UNK%":
-                continue
-
-            if (
-                supernode_data["annotation"] == "??"
-                and suggestion.confidence > suggestion_confidence_threshold
-            ):
-                type_suggestions.append(suggestion)
-            elif (
-                suggestion.is_disagreement
-                # and suggestion.confidence > diagreement_confidence_threshold
-            ):
-                pass  # TODO: Disabled for now: type_suggestions.append(suggestion)
-
-    # Add PR comments
-    if debug:
-        print("# Suggestions:", len(type_suggestions))
-        for suggestion in type_suggestions:
-            print(suggestion)
+            print("# Suggestions:", len(type_suggestions))
+            for suggestion in type_suggestions:
+                print(suggestion)
 
-    comment_url = event_data["pull_request"]["review_comments_url"]
-    commit_id = event_data["pull_request"]["head"]["sha"]
+        comment_url = event_data["pull_request"]["review_comments_url"]
+        commit_id = event_data["pull_request"]["head"]["sha"]
 
-    for suggestion in type_suggestions:
-        if suggestion.symbol_kind == "class-or-function":
-            suggestion.annotation_lineno = find_annotation_line(
-                suggestion.filepath[1:], suggestion.file_location, suggestion.name
+        for suggestion in type_suggestions:
+            if suggestion.symbol_kind == "class-or-function":
+                suggestion.annotation_lineno = find_annotation_line(
+                    suggestion.filepath[1:], suggestion.file_location, suggestion.name
+                )
+            else:  # when the underlying symbol is a parameter
+                suggestion.annotation_lineno = suggestion.file_location[0]
+
+        # Group type suggestions by (filepath + lineno)
+        grouped_suggestions = group_suggestions(type_suggestions)
+
+        def bucket_confidences(confidence: float) -> str:
+            if confidence >= 0.95:
+                return ":fire:"
+            if confidence >= 0.85:
+                return ":bell:"
+            return ":confused:" if confidence >= 0.7 else ":question:"
+
+        def report_confidence(suggestions):
+            suggestions = sorted(suggestions, key=lambda s: -s.confidence)
+            return "".join(
+                f"| `{s.name}` | `{s.suggestion}` | {s.confidence:.1%} {bucket_confidences(s.confidence)} | \n"
+                for s in suggestions
             )
-        else:  # when the underlying symbol is a parameter
-            suggestion.annotation_lineno = suggestion.file_location[0]
-
-    # Group type suggestions by (filepath + lineno)
-    grouped_suggestions = group_suggestions(type_suggestions)
-
-    def bucket_confidences(confidence: float) -> str:
-        if confidence >= 0.95:
-            return ":fire:"
-        if confidence >= 0.85:
-            return ":bell:"
-        if confidence >= 0.7:
-            return ":confused:"
-        return ":question:"
-
-    def report_confidence(suggestions):
-        suggestions = sorted(suggestions, key=lambda s: -s.confidence)
-        return "".join(
-            f"| `{s.name}` | `{s.suggestion}` | {s.confidence:.1%} {bucket_confidences(s.confidence)} | \n"
-            for s in suggestions
-        )
 
-    for same_line_suggestions in grouped_suggestions:
-        suggestion = same_line_suggestions[0]
-        path = suggestion.filepath[1:]  # No slash in the beginning
-        annotation_lineno = suggestion.annotation_lineno
-        with open(path) as file:
-            target_line = file.readlines()[annotation_lineno - 1]
-        data = {
-            "path": path,
-            "line": annotation_lineno,
-            "side": "RIGHT",
-            "commit_id": commit_id,
-            "body": "The following type annotation(s) might be useful:\n ```suggestion\n"
-            f"{annotate_line(target_line, same_line_suggestions)}```\n"
-            f"### :chart_with_upwards_trend: Prediction Stats\n"
-            f"| Symbol | Annotation | Confidence |\n"
-            f"| -- | -- | --: |\n"
-            f"{report_confidence(same_line_suggestions)}",
-        }
-        headers = {
-            "authorization": f"Bearer {github_token}",
-            "Accept": "application/vnd.github.v3.raw+json",
-        }
-        r = requests.post(comment_url, data=json.dumps(data), headers=headers)
-        if debug:
-            print("URL: ", comment_url)
-            print(f"Data: {data}. Status Code: {r.status_code}. Text: {r.text}")
+        for same_line_suggestions in grouped_suggestions:
+            suggestion = same_line_suggestions[0]
+            path = suggestion.filepath[1:]  # No slash in the beginning
+            annotation_lineno = suggestion.annotation_lineno
+            with open(path) as file:
+                target_line = file.readlines()[annotation_lineno - 1]
+            data = {
+                "path": path,
+                "line": annotation_lineno,
+                "side": "RIGHT",
+                "commit_id": commit_id,
+                "body": "The following type annotation(s) might be useful:\n ```suggestion\n"
+                f"{annotate_line(target_line, same_line_suggestions)}```\n"
+                f"### :chart_with_upwards_trend: Prediction Stats\n"
+                f"| Symbol | Annotation | Confidence |\n"
+                f"| -- | -- | --: |\n"
+                f"{report_confidence(same_line_suggestions)}",
+            }
+            headers = {
+                "authorization": f"Bearer {github_token}",
+                "Accept": "application/vnd.github.v3.raw+json",
+            }
+            r = requests.post(comment_url, data=json.dumps(data), headers=headers)
+            if debug:
+                print("URL: ", comment_url)
+                print(f"Data: {data}. Status Code: {r.status_code}. Text: {r.text}")
+except AssertionError:
+    import traceback
+    _, _, tb = sys.exc_info()
+    traceback.print_tb(tb) # Fixed format
+    tb_info = traceback.extract_tb(tb)
+    filename, line, func, text = tb_info[-1]
+
+    print('An error occurred on line {} in statement {}'.format(line, text))
+    exit()                
diff --git a/src/annotationutils.py b/src/annotationutils.py
@@ -5,11 +5,9 @@
 
 
 def find_suggestion_for_return(suggestions):
-    for s in suggestions:
-        if s.symbol_kind == "class-or-function":
-            return s
-    else:
-        return None
+    return next(
+        (s for s in suggestions if s.symbol_kind == "class-or-function"), None
+    )
 
 
 def annotate_line(line, suggestions):
@@ -33,7 +31,7 @@ def annotate_parameters(line, suggestions):
     """
     Annotate the parameters of a function on a particular line
     """
-    annotated_line = " " + line
+    annotated_line = f" {line}"
     length_increase = 0
     for s in suggestions:
         assert line[s.file_location[1] :].startswith(s.name)
@@ -48,7 +46,7 @@ def annotate_return(line, suggestion):
     Annotate the return of a function
     """
     assert line.rstrip().endswith(":")
-    return line.rstrip()[:-1] + f" -> {suggestion.suggestion}" + ":\n"
+    return f"{line.rstrip()[:-1]} -> {suggestion.suggestion}" + ":\n"
 
 
 def find_annotation_line(filepath, location, func_name):

diff --git a/src/changeutils.py b/src/changeutils.py
@@ -9,8 +9,7 @@ def get_line_ranges_of_interest(diff_lines: List[str]) -> Set[int]:
     lines_of_interest = set()
     current_line = 0
     for line in diff_lines:
-        hunk_start_match = HUNK_MATCH.match(line)
-        if hunk_start_match:
+        if hunk_start_match := HUNK_MATCH.match(line):
             current_line = int(hunk_start_match.group(1))
         elif line.startswith("+"):
             lines_of_interest.add(current_line)
@@ -47,6 +46,8 @@ def get_changed_files(diff: str, suffix=".py") -> Dict[str, Set[int]]:
         elif file_diff_lines[1].startswith("similarity"):
             assert file_diff_lines[2].startswith("rename")
             assert file_diff_lines[3].startswith("rename")
+            if len(file_diff_lines) == 4:
+                continue # skip file renames \wo any changes
             assert file_diff_lines[4].startswith("index")
             assert file_diff_lines[5].startswith("--- a/")
             assert file_diff_lines[6].startswith("+++ b/")