Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ ENV PYTHONUNBUFFERED=1
RUN apt update && apt -y upgrade
RUN apt install -y python3-numpy python3-pip python3-requests
RUN pip3 install torch==1.5.0+cpu torchvision==0.6.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
RUN pip3 install torch-scatter==2.0.4+cpu -f https://pytorch-geometric.com/whl/torch-1.5.0.html
RUN pip3 install dpu-utils typed-ast ptgnn
# https://stackoverflow.com/questions/67074684/pip-has-problems-with-metadata
RUN pip3 install --use-deprecated=legacy-resolver torch-scatter==2.0.4+cpu -f https://pytorch-geometric.com/whl/torch-1.5.0.html
RUN pip3 install dpu-utils typed-ast ptgnn==0.8.5

ENV PYTHONPATH=/usr/src/
ADD https://github.com/typilus/typilus-action/releases/download/v0.1/typilus20200507.pkl.gz /usr/src/model.pkl.gz
Expand Down
15 changes: 8 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,19 @@ suggestions with only a partial context, at the cost of suggesting some false
positives.


### Install Action in your Repository
### How to use the Action in your Repository

To use the GitHub action, create a workflow file. For example,
```yaml
name: Typilus Type Annotation Suggestions
name: Annotation Suggestions

# Controls when the action will run. Triggers the workflow on push or pull request
# events but only for the master branch
# events but only for the main branch
on:
pull_request:
branches: [ master ]
paths:
- '**.py'
branches: [master, main]

jobs:
suggest:
Expand All @@ -46,11 +48,10 @@ jobs:

steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so that typilus can access it.
- uses: actions/checkout@v2
- uses: typilus/typilus-action@master
- uses: actions/checkout@v3
- uses: Karim-53/typilus-action@master
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
MODEL_PATH: path/to/model.pkl.gz # Optional: provide the path of a custom model instead of the pre-trained model.
SUGGESTION_CONFIDENCE_THRESHOLD: 0.8 # Configure this to limit the confidence of suggestions on un-annotated locations. A float in [0, 1]. Default 0.8
DISAGREEMENT_CONFIDENCE_THRESHOLD: 0.95 # Configure this to limit the confidence of suggestions on annotated locations. A float in [0, 1]. Default 0.95
```
Expand Down
4 changes: 2 additions & 2 deletions action.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# action.yml
name: 'Typilus: Suggest Python Type Annotations'
description: 'Suggest Likely Python Type Annotations'
name: 'Typilus: Suggest Python Type Annotations with AI (fix)'
description: 'Suggest Likely Python Type Annotations using AI'
branding:
icon: box
color: gray-dark
Expand Down
258 changes: 132 additions & 126 deletions entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,140 +77,146 @@ def __repr__(self) -> str:
)
print("Diff GET Status Code: ", diff_rq.status_code)

try:
changed_files = get_changed_files(diff_rq.text)
if len(changed_files) == 0:
print("No relevant changes found.")
sys.exit(0)

changed_files = get_changed_files(diff_rq.text)
if len(changed_files) == 0:
print("No relevant changes found.")
sys.exit(0)

monitoring = Monitoring()
suggestion_confidence_threshold = float(os.getenv("SUGGESTION_CONFIDENCE_THRESHOLD", 0.5))
diagreement_confidence_threshold = float(os.getenv("DISAGREEMENT_CONFIDENCE_THRESHOLD", 0.95))

monitoring = Monitoring()
suggestion_confidence_threshold = float(os.getenv("SUGGESTION_CONFIDENCE_THRESHOLD", 0.5))
diagreement_confidence_threshold = float(os.getenv("DISAGREEMENT_CONFIDENCE_THRESHOLD", 0.95))

if debug:
print(
f"Confidence thresholds {suggestion_confidence_threshold:.2f} and {diagreement_confidence_threshold:.2f}."
)


with TemporaryDirectory() as out_dir:
typing_rules_path = os.path.join(os.path.dirname(__file__), "metadata", "typingRules.json")
extract_graphs(
repo_path, typing_rules_path, files_to_extract=set(changed_files), target_folder=out_dir,
)

def data_iter():
for datafile_path in iglob(os.path.join(out_dir, "*.jsonl.gz")):
print(f"Looking into {datafile_path}...")
for graph in load_jsonl_gz(datafile_path):
yield graph
if debug:
print(
f"Confidence thresholds {suggestion_confidence_threshold:.2f} and {diagreement_confidence_threshold:.2f}."
)

model_path = os.getenv("MODEL_PATH", "/usr/src/model.pkl.gz")
model, nn = Graph2Class.restore_model(model_path, "cpu")

type_suggestions: List[TypeSuggestion] = []
for graph, predictions in model.predict(data_iter(), nn, "cpu"):
# predictions has the type: Dict[int, Tuple[str, float]]
filepath = graph["filename"]
with TemporaryDirectory() as out_dir:
typing_rules_path = os.path.join(os.path.dirname(__file__), "metadata", "typingRules.json")
extract_graphs(
repo_path, typing_rules_path, files_to_extract=set(changed_files), target_folder=out_dir,
)

def data_iter():
for datafile_path in iglob(os.path.join(out_dir, "*.jsonl.gz")):
print(f"Looking into {datafile_path}...")
yield from load_jsonl_gz(datafile_path)

model_path = os.getenv("MODEL_PATH", "/usr/src/model.pkl.gz")
model, nn = Graph2Class.restore_model(model_path, "cpu")

type_suggestions: List[TypeSuggestion] = []
for graph, predictions in model.predict(data_iter(), nn, "cpu"):
# predictions has the type: Dict[int, Tuple[str, float]]
filepath = graph["filename"]

if debug:
print("Predictions:", predictions)
print("SuperNodes:", graph["supernodes"])

for supernode_idx, (predicted_type, predicted_prob) in predictions.items():
supernode_data = graph["supernodes"][str(supernode_idx)]
if supernode_data["type"] == "variable":
continue # Do not suggest annotations on variables for now.
lineno, colno = supernode_data["location"]
suggestion = TypeSuggestion(
filepath,
supernode_data["name"],
(lineno, colno),
annotation_rewrite(predicted_type),
supernode_data["type"],
predicted_prob,
is_disagreement=supernode_data["annotation"] != "??"
and supernode_data["annotation"] != predicted_type,
)

print("Suggestion: ", suggestion)

if lineno not in changed_files[filepath]:
continue
elif suggestion.name == "%UNK%":
continue

if (
supernode_data["annotation"] == "??"
and suggestion.confidence > suggestion_confidence_threshold
):
type_suggestions.append(suggestion)
elif (
suggestion.is_disagreement
# and suggestion.confidence > diagreement_confidence_threshold
):
pass # TODO: Disabled for now: type_suggestions.append(suggestion)

# Add PR comments
if debug:
print("Predictions:", predictions)
print("SuperNodes:", graph["supernodes"])

for supernode_idx, (predicted_type, predicted_prob) in predictions.items():
supernode_data = graph["supernodes"][str(supernode_idx)]
if supernode_data["type"] == "variable":
continue # Do not suggest annotations on variables for now.
lineno, colno = supernode_data["location"]
suggestion = TypeSuggestion(
filepath,
supernode_data["name"],
(lineno, colno),
annotation_rewrite(predicted_type),
supernode_data["type"],
predicted_prob,
is_disagreement=supernode_data["annotation"] != "??"
and supernode_data["annotation"] != predicted_type,
)

print("Suggestion: ", suggestion)

if lineno not in changed_files[filepath]:
continue
elif suggestion.name == "%UNK%":
continue

if (
supernode_data["annotation"] == "??"
and suggestion.confidence > suggestion_confidence_threshold
):
type_suggestions.append(suggestion)
elif (
suggestion.is_disagreement
# and suggestion.confidence > diagreement_confidence_threshold
):
pass # TODO: Disabled for now: type_suggestions.append(suggestion)

# Add PR comments
if debug:
print("# Suggestions:", len(type_suggestions))
for suggestion in type_suggestions:
print(suggestion)
print("# Suggestions:", len(type_suggestions))
for suggestion in type_suggestions:
print(suggestion)

comment_url = event_data["pull_request"]["review_comments_url"]
commit_id = event_data["pull_request"]["head"]["sha"]
comment_url = event_data["pull_request"]["review_comments_url"]
commit_id = event_data["pull_request"]["head"]["sha"]

for suggestion in type_suggestions:
if suggestion.symbol_kind == "class-or-function":
suggestion.annotation_lineno = find_annotation_line(
suggestion.filepath[1:], suggestion.file_location, suggestion.name
for suggestion in type_suggestions:
if suggestion.symbol_kind == "class-or-function":
suggestion.annotation_lineno = find_annotation_line(
suggestion.filepath[1:], suggestion.file_location, suggestion.name
)
else: # when the underlying symbol is a parameter
suggestion.annotation_lineno = suggestion.file_location[0]

# Group type suggestions by (filepath + lineno)
grouped_suggestions = group_suggestions(type_suggestions)

def bucket_confidences(confidence: float) -> str:
if confidence >= 0.95:
return ":fire:"
if confidence >= 0.85:
return ":bell:"
return ":confused:" if confidence >= 0.7 else ":question:"

def report_confidence(suggestions):
suggestions = sorted(suggestions, key=lambda s: -s.confidence)
return "".join(
f"| `{s.name}` | `{s.suggestion}` | {s.confidence:.1%} {bucket_confidences(s.confidence)} | \n"
for s in suggestions
)
else: # when the underlying symbol is a parameter
suggestion.annotation_lineno = suggestion.file_location[0]

# Group type suggestions by (filepath + lineno)
grouped_suggestions = group_suggestions(type_suggestions)

def bucket_confidences(confidence: float) -> str:
if confidence >= 0.95:
return ":fire:"
if confidence >= 0.85:
return ":bell:"
if confidence >= 0.7:
return ":confused:"
return ":question:"

def report_confidence(suggestions):
suggestions = sorted(suggestions, key=lambda s: -s.confidence)
return "".join(
f"| `{s.name}` | `{s.suggestion}` | {s.confidence:.1%} {bucket_confidences(s.confidence)} | \n"
for s in suggestions
)

for same_line_suggestions in grouped_suggestions:
suggestion = same_line_suggestions[0]
path = suggestion.filepath[1:] # No slash in the beginning
annotation_lineno = suggestion.annotation_lineno
with open(path) as file:
target_line = file.readlines()[annotation_lineno - 1]
data = {
"path": path,
"line": annotation_lineno,
"side": "RIGHT",
"commit_id": commit_id,
"body": "The following type annotation(s) might be useful:\n ```suggestion\n"
f"{annotate_line(target_line, same_line_suggestions)}```\n"
f"### :chart_with_upwards_trend: Prediction Stats\n"
f"| Symbol | Annotation | Confidence |\n"
f"| -- | -- | --: |\n"
f"{report_confidence(same_line_suggestions)}",
}
headers = {
"authorization": f"Bearer {github_token}",
"Accept": "application/vnd.github.v3.raw+json",
}
r = requests.post(comment_url, data=json.dumps(data), headers=headers)
if debug:
print("URL: ", comment_url)
print(f"Data: {data}. Status Code: {r.status_code}. Text: {r.text}")
for same_line_suggestions in grouped_suggestions:
suggestion = same_line_suggestions[0]
path = suggestion.filepath[1:] # No slash in the beginning
annotation_lineno = suggestion.annotation_lineno
with open(path) as file:
target_line = file.readlines()[annotation_lineno - 1]
data = {
"path": path,
"line": annotation_lineno,
"side": "RIGHT",
"commit_id": commit_id,
"body": "The following type annotation(s) might be useful:\n ```suggestion\n"
f"{annotate_line(target_line, same_line_suggestions)}```\n"
f"### :chart_with_upwards_trend: Prediction Stats\n"
f"| Symbol | Annotation | Confidence |\n"
f"| -- | -- | --: |\n"
f"{report_confidence(same_line_suggestions)}",
}
headers = {
"authorization": f"Bearer {github_token}",
"Accept": "application/vnd.github.v3.raw+json",
}
r = requests.post(comment_url, data=json.dumps(data), headers=headers)
if debug:
print("URL: ", comment_url)
print(f"Data: {data}. Status Code: {r.status_code}. Text: {r.text}")
except AssertionError:
import traceback
_, _, tb = sys.exc_info()
traceback.print_tb(tb) # Fixed format
tb_info = traceback.extract_tb(tb)
filename, line, func, text = tb_info[-1]

print('An error occurred on line {} in statement {}'.format(line, text))
exit()
12 changes: 5 additions & 7 deletions src/annotationutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@


def find_suggestion_for_return(suggestions):
for s in suggestions:
if s.symbol_kind == "class-or-function":
return s
else:
return None
return next(
(s for s in suggestions if s.symbol_kind == "class-or-function"), None
)


def annotate_line(line, suggestions):
Expand All @@ -33,7 +31,7 @@ def annotate_parameters(line, suggestions):
"""
Annotate the parameters of a function on a particular line
"""
annotated_line = " " + line
annotated_line = f" {line}"
length_increase = 0
for s in suggestions:
assert line[s.file_location[1] :].startswith(s.name)
Expand All @@ -48,7 +46,7 @@ def annotate_return(line, suggestion):
Annotate the return of a function
"""
assert line.rstrip().endswith(":")
return line.rstrip()[:-1] + f" -> {suggestion.suggestion}" + ":\n"
return f"{line.rstrip()[:-1]} -> {suggestion.suggestion}" + ":\n"


def find_annotation_line(filepath, location, func_name):
Expand Down
5 changes: 3 additions & 2 deletions src/changeutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ def get_line_ranges_of_interest(diff_lines: List[str]) -> Set[int]:
lines_of_interest = set()
current_line = 0
for line in diff_lines:
hunk_start_match = HUNK_MATCH.match(line)
if hunk_start_match:
if hunk_start_match := HUNK_MATCH.match(line):
current_line = int(hunk_start_match.group(1))
elif line.startswith("+"):
lines_of_interest.add(current_line)
Expand Down Expand Up @@ -47,6 +46,8 @@ def get_changed_files(diff: str, suffix=".py") -> Dict[str, Set[int]]:
elif file_diff_lines[1].startswith("similarity"):
assert file_diff_lines[2].startswith("rename")
assert file_diff_lines[3].startswith("rename")
if len(file_diff_lines) == 4:
continue # skip file renames \wo any changes
assert file_diff_lines[4].startswith("index")
assert file_diff_lines[5].startswith("--- a/")
assert file_diff_lines[6].startswith("+++ b/")
Expand Down
Loading