Skip to content

Commit 36daf8d

Browse files
authored
Merge pull request #115 from codellm-devkit/114-support-better-java-comment-parsers-report-declarations-and-parameter-location-parsing
Address Feature Request Issue 114: Update APIs to match codeanalyzer 2.3.0
2 parents ad536ac + 1af02b2 commit 36daf8d

File tree

13 files changed

+79304
-28105
lines changed

13 files changed

+79304
-28105
lines changed

cldk/analysis/java/codeanalyzer/bin/.gitignore

Lines changed: 0 additions & 1 deletion
This file was deleted.

cldk/analysis/java/codeanalyzer/bin/__init__.py

Lines changed: 0 additions & 19 deletions
This file was deleted.

cldk/analysis/java/codeanalyzer/codeanalyzer.py

Lines changed: 97 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from cldk.analysis.commons.treesitter import TreesitterJava
3232
from cldk.models.java import JGraphEdges
3333
from cldk.models.java.enums import CRUDOperationType
34-
from cldk.models.java.models import JApplication, JCRUDOperation, JCallable, JField, JMethodDetail, JType, JCompilationUnit, JGraphEdgesST
34+
from cldk.models.java.models import JApplication, JCRUDOperation, JCallable, JCallableParameter, JComment, JField, JMethodDetail, JType, JCompilationUnit, JGraphEdgesST
3535
from cldk.utils.exceptions.exceptions import CodeanalyzerExecutionException
3636

3737
logger = logging.getLogger(__name__)
@@ -47,7 +47,6 @@ class JCodeanalyzer:
4747
analysis_json_path (str or Path, optional): The path to save the intermediate code analysis outputs.
4848
If None, the analysis will be read from the pipe.
4949
analysis_level (str): The level of analysis ('symbol_table' or 'call_graph').
50-
use_graalvm_binary (bool): If True, the GraalVM binary will be used instead of the codeanalyzer jar.
5150
eager_analysis (bool): If True, the analysis will be performed every time the object is created.
5251
5352
Methods:
@@ -92,15 +91,13 @@ def __init__(
9291
analysis_backend_path: Union[str, Path, None],
9392
analysis_json_path: Union[str, Path, None],
9493
analysis_level: str,
95-
use_graalvm_binary: bool,
9694
eager_analysis: bool,
9795
target_files: List[str] | None,
9896
) -> None:
9997
self.project_dir = project_dir
10098
self.source_code = source_code
10199
self.analysis_backend_path = analysis_backend_path
102100
self.analysis_json_path = analysis_json_path
103-
self.use_graalvm_binary = use_graalvm_binary
104101
self.eager_analysis = eager_analysis
105102
self.analysis_level = analysis_level
106103
self.target_files = target_files
@@ -128,27 +125,22 @@ def _get_codeanalyzer_exec(self) -> List[str]:
128125
List[str]: The executable command for codeanalyzer.
129126
130127
Notes:
131-
- If the use_graalvm_binary flag is set, the codeanalyzer binary from GraalVM will be used.
132128
- If the analysis_backend_path is provided, the codeanalyzer jar from that path will be used.
133129
- If not provided, the latest codeanalyzer jar from GitHub will be downloaded.
134130
"""
135131

136-
if self.use_graalvm_binary:
137-
with resources.as_file(resources.files("cldk.analysis.java.codeanalyzer.bin") / "codeanalyzer") as codeanalyzer_bin_path:
138-
codeanalyzer_exec = shlex.split(codeanalyzer_bin_path.__str__())
132+
if self.analysis_backend_path:
133+
analysis_backend_path = Path(self.analysis_backend_path)
134+
logger.info(f"Using codeanalyzer jar from {analysis_backend_path}")
135+
codeanalyzer_jar_file = next(analysis_backend_path.rglob("codeanalyzer-*.jar"), None)
136+
if codeanalyzer_jar_file is None:
137+
raise CodeanalyzerExecutionException("Codeanalyzer jar not found in the provided path.")
138+
codeanalyzer_exec = shlex.split(f"java -jar {codeanalyzer_jar_file}")
139139
else:
140-
if self.analysis_backend_path:
141-
analysis_backend_path = Path(self.analysis_backend_path)
142-
logger.info(f"Using codeanalyzer jar from {analysis_backend_path}")
143-
codeanalyzer_jar_file = next(analysis_backend_path.rglob("codeanalyzer-*.jar"), None)
144-
if codeanalyzer_jar_file is None:
145-
raise CodeanalyzerExecutionException("Codeanalyzer jar not found in the provided path.")
140+
# Since the path to codeanalyzer.jar we will use the default jar from the cldk/analysis/java/codeanalyzer/jar folder
141+
with resources.as_file(resources.files("cldk.analysis.java.codeanalyzer.jar")) as codeanalyzer_jar_path:
142+
codeanalyzer_jar_file = next(codeanalyzer_jar_path.rglob("codeanalyzer-*.jar"), None)
146143
codeanalyzer_exec = shlex.split(f"java -jar {codeanalyzer_jar_file}")
147-
else:
148-
# Since the path to codeanalyzer.jar we will use the default jar from the cldk/analysis/java/codeanalyzer/jar folder
149-
with resources.as_file(resources.files("cldk.analysis.java.codeanalyzer.jar")) as codeanalyzer_jar_path:
150-
codeanalyzer_jar_file = next(codeanalyzer_jar_path.rglob("codeanalyzer-*.jar"), None)
151-
codeanalyzer_exec = shlex.split(f"java -jar {codeanalyzer_jar_file}")
152144
return codeanalyzer_exec
153145

154146
@staticmethod
@@ -497,6 +489,29 @@ def get_method(self, qualified_class_name, method_signature) -> JCallable:
497489
if cd == method_signature:
498490
return ci.callable_declarations[cd]
499491

492+
def get_method_parameters(self, qualified_class_name, method_signature) -> List[JCallableParameter]:
493+
"""Should return a dictionary of method parameters given the qualified class name and method signature.
494+
495+
Args:
496+
qualified_class_name (str): The qualified name of the class.
497+
method_signature (str): The signature of the method.
498+
499+
Returns:
500+
Dict[str, str]: A dictionary of method parameters for the given qualified class name and method signature.
501+
"""
502+
return self.get_method(qualified_class_name, method_signature).parameters
503+
504+
def get_parameters_from_callable(self, callable: JCallable) -> List[JCallableParameter]:
505+
"""Should return a dictionary of method parameters given the callable.
506+
507+
Args:
508+
callable (JCallable): The callable object.
509+
510+
Returns:
511+
Dict[str, str]: A dictionary of method parameters for the given callable.
512+
"""
513+
return callable.parameters
514+
500515
def get_java_file(self, qualified_class_name) -> str:
501516
"""Should return java file name given the qualified class name.
502517
@@ -1006,3 +1021,66 @@ def get_all_delete_operations(self) -> List[Dict[str, Union[JType, JCallable, Li
10061021
}
10071022
)
10081023
return crud_delete_operations
1024+
1025+
# Some APIs to process comments
1026+
def get_comments_in_a_method(self, qualified_class_name: str, method_signature: str) -> List[JComment]:
1027+
"""Get all comments in a method.
1028+
1029+
Args:
1030+
qualified_class_name (str): Qualified name of the class.
1031+
method_signature (str): Signature of the method.
1032+
1033+
Returns:
1034+
List[str]: List of comments in the method.
1035+
"""
1036+
callable = self.get_method(qualified_class_name, method_signature)
1037+
return callable.comments
1038+
1039+
def get_comments_in_a_class(self, qualified_class_name: str) -> List[JComment]:
1040+
"""Get all comments in a class.
1041+
1042+
Args:
1043+
qualified_class_name (str): Qualified name of the class.
1044+
1045+
Returns:
1046+
List[str]: List of comments in the class.
1047+
"""
1048+
klass = self.get_class(qualified_class_name)
1049+
return klass.comments
1050+
1051+
def get_comment_in_file(self, file_path: str) -> List[JComment]:
1052+
"""Get all comments in a file.
1053+
1054+
Args:
1055+
file_path (str): Path to the file.
1056+
1057+
Returns:
1058+
List[str]: List of comments in the file.
1059+
"""
1060+
compilation_unit = self.get_symbol_table().get(file_path, None)
1061+
if compilation_unit is None:
1062+
raise CodeanalyzerExecutionException(f"File {file_path} not found in the symbol table.")
1063+
return compilation_unit.comments
1064+
1065+
def get_all_comments(self) -> Dict[str, List[JComment]]:
1066+
"""Get all comments in the Java application.
1067+
1068+
Returns:
1069+
Dict[str, List[str]]: Dictionary of file paths and their corresponding comments.
1070+
"""
1071+
comments = {}
1072+
for file_path, _ in self.get_symbol_table().items():
1073+
comments[file_path] = self.get_comment_in_file(file_path)
1074+
return comments
1075+
1076+
def get_all_docstrings(self) -> List[Tuple[str, JComment]]:
1077+
"""Get all docstrings in the Java application.
1078+
1079+
Returns:
1080+
Dict[str, List[str]]: Dictionary of file paths and their corresponding docstrings.
1081+
"""
1082+
docstrings = []
1083+
for file_path, list_of_comments in self.get_all_comments().items():
1084+
docstrings += [(file_path, docstring) for docstring in list_of_comments if docstring.is_javadoc]
1085+
1086+
return docstrings

cldk/analysis/java/java_analysis.py

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from cldk.analysis.commons.treesitter import TreesitterJava
2828
from cldk.models.java import JCallable
2929
from cldk.models.java import JApplication
30-
from cldk.models.java.models import JCRUDOperation, JCompilationUnit, JMethodDetail, JType, JField
30+
from cldk.models.java.models import JCRUDOperation, JComment, JCompilationUnit, JMethodDetail, JType, JField
3131
from cldk.analysis.java.codeanalyzer import JCodeanalyzer
3232

3333

@@ -41,7 +41,6 @@ def __init__(
4141
analysis_json_path: str | Path | None,
4242
analysis_level: str,
4343
target_files: List[str] | None,
44-
use_graalvm_binary: bool,
4544
eager_analysis: bool,
4645
) -> None:
4746
"""Initialization method for Java Analysis backend.
@@ -52,7 +51,6 @@ def __init__(
5251
analysis_backend_path (str | None): The path to the analysis_backend, defaults to None and in the case of codeql, it is assumed that the cli is installed and available in the PATH. In the case of codeanalyzer the codeanalyzer.jar is downloaded from the lastest release.
5352
analysis_json_path (str | Path | None): The path save the to the analysis database (analysis.json), defaults to None. If None, the analysis database is not persisted.
5453
analysis_level (str): Analysis level (symbol-table, call-graph)
55-
use_graalvm_binary (bool): A flag indicating whether to use the GraalVM binary for SDG analysis, defaults to False. If False, the default Java binary is used and one needs to have Java 17 or higher installed.
5654
eager_analysis (bool): A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed eagerly. That is, the analysis.json file is created during analysis every time even if it already exists.
5755
5856
Raises:
@@ -69,7 +67,6 @@ def __init__(
6967
self.analysis_json_path = analysis_json_path
7068
self.analysis_backend_path = analysis_backend_path
7169
self.eager_analysis = eager_analysis
72-
self.use_graalvm_binary = use_graalvm_binary
7370
self.target_files = target_files
7471
self.treesitter_java: TreesitterJava = TreesitterJava()
7572
# Initialize the analysis analysis_backend
@@ -79,7 +76,6 @@ def __init__(
7976
eager_analysis=self.eager_analysis,
8077
analysis_level=self.analysis_level,
8178
analysis_json_path=self.analysis_json_path,
82-
use_graalvm_binary=self.use_graalvm_binary,
8379
analysis_backend_path=self.analysis_backend_path,
8480
target_files=self.target_files,
8581
)
@@ -327,6 +323,21 @@ def get_method(self, qualified_class_name: str, qualified_method_name: str) -> J
327323
"""
328324
return self.backend.get_method(qualified_class_name, qualified_method_name)
329325

326+
def get_method_parameters(self, qualified_class_name: str, qualified_method_name: str) -> List[str]:
327+
"""Should return a list of method parameters given qualified class and method names.
328+
329+
Args:
330+
qualified_class_name (str): The qualified name of the class.
331+
qualified_method_name (str): The qualified name of the method.
332+
333+
Raises:
334+
NotImplementedError: Raised when we do not support this function.
335+
336+
Returns:
337+
JCallable: A method for the given qualified method name.
338+
"""
339+
return self.backend.get_method_parameters(qualified_class_name, qualified_method_name)
340+
330341
def get_java_file(self, qualified_class_name: str) -> str:
331342
"""Should return a class given qualified class name.
332343
@@ -606,3 +617,54 @@ def get_all_delete_operations(self) -> List[Dict[str, Union[JType, JCallable, Li
606617
List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]: A list of all delete operations in the source code.
607618
"""
608619
return self.backend.get_all_delete_operations()
620+
621+
# Some APIs to process comments
622+
def get_comments_in_a_method(self, qualified_class_name: str, method_signature: str) -> List[JComment]:
623+
"""Get all comments in a method.
624+
625+
Args:
626+
qualified_class_name (str): Qualified name of the class.
627+
method_signature (str): Signature of the method.
628+
629+
Returns:
630+
List[str]: List of comments in the method.
631+
"""
632+
return self.backend.get_comments_in_a_method(qualified_class_name, method_signature)
633+
634+
def get_comments_in_a_class(self, qualified_class_name: str) -> List[JComment]:
635+
"""Get all comments in a class.
636+
637+
Args:
638+
qualified_class_name (str): Qualified name of the class.
639+
640+
Returns:
641+
List[str]: List of comments in the class.
642+
"""
643+
return self.backend.get_comments_in_a_class(qualified_class_name)
644+
645+
def get_comment_in_file(self, file_path: str) -> List[JComment]:
646+
"""Get all comments in a file.
647+
648+
Args:
649+
file_path (str): Path to the file.
650+
651+
Returns:
652+
List[str]: List of comments in the file.
653+
"""
654+
return self.backend.get_comment_in_file(file_path)
655+
656+
def get_all_comments(self) -> Dict[str, List[JComment]]:
657+
"""Get all comments in the Java application.
658+
659+
Returns:
660+
Dict[str, List[str]]: Dictionary of file paths and their corresponding comments.
661+
"""
662+
return self.backend.get_all_comments()
663+
664+
def get_all_docstrings(self) -> Dict[str, List[JComment]]:
665+
"""Get all docstrings in the Java application.
666+
667+
Returns:
668+
Dict[str, List[str]]: Dictionary of file paths and their corresponding docstrings.
669+
"""
670+
return self.backend.get_all_docstrings()

cldk/analysis/python/python_analysis.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,12 @@ def __init__(
3535
source_code: str | None,
3636
analysis_backend_path: str | None,
3737
analysis_json_path: str | Path | None,
38-
use_graalvm_binary: bool = None,
3938
) -> None:
4039
self.project_dir = project_dir
4140
self.source_code = source_code
4241
self.analysis_json_path = analysis_json_path
4342
self.analysis_backend_path = analysis_backend_path
4443
self.eager_analysis = eager_analysis
45-
self.use_graalvm_binary = use_graalvm_binary
4644
self.analysis_backend: TreesitterPython = TreesitterPython()
4745

4846
def get_methods(self) -> List[PyMethod]:

cldk/core.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ def analysis(
6060
target_files: List[str] | None = None,
6161
analysis_backend_path: str | None = None,
6262
analysis_json_path: str | Path = None,
63-
use_graalvm_binary: bool = False,
6463
) -> JavaAnalysis:
6564
"""
6665
Initialize the preprocessor based on the specified language.
@@ -116,7 +115,6 @@ def analysis(
116115
analysis_level=analysis_level,
117116
analysis_backend_path=analysis_backend_path,
118117
analysis_json_path=analysis_json_path,
119-
use_graalvm_binary=use_graalvm_binary,
120118
target_files=target_files,
121119
eager_analysis=eager,
122120
)

0 commit comments

Comments
 (0)