Skip to content

Commit 22a0e85

Browse files
committed
feat: Add COREF for SQL extractor source code
1 parent d3da21d commit 22a0e85

File tree

2,132 files changed

+376482
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,132 files changed

+376482
-0
lines changed

language/sql/extractor/BUILD

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
load("//:junit5.bzl", "java_junit5_test")
2+
3+
# add lombok annotation processor
4+
java_import(
5+
name = "lombok_jar",
6+
jars = [
7+
"@maven//:v1/http/mvn.dev.alipay.net/artifactory/content/groups/public/org/projectlombok/lombok/1.18.16/lombok-1.18.16.jar",
8+
],
9+
)
10+
11+
java_plugin(
12+
name = "lombok_processor",
13+
# should be specified
14+
processor_class = "lombok.launch.AnnotationProcessorHider$AnnotationProcessor",
15+
deps = [
16+
":lombok_jar",
17+
],
18+
)
19+
20+
# add picocli annotation processor
21+
java_import(
22+
name = "picocli_jar",
23+
jars = [
24+
"@maven//:v1/http/mvn.dev.alipay.net/artifactory/content/groups/public/info/picocli/picocli/4.6.1/picocli-4.6.1.jar",
25+
"@maven//:v1/http/mvn.dev.alipay.net/artifactory/content/groups/public/info/picocli/picocli-codegen/4.6.1/picocli-codegen-4.6.1.jar",
26+
],
27+
)
28+
29+
java_plugin(
30+
name = "picocli_processor",
31+
processor_class = "picocli.codegen.aot.graalvm.processor.NativeImageConfigGeneratorProcessor",
32+
# processor_class = "picocli.codegen.aot.graalvm.processor.AbstractCompositeGeneratorProcessor",
33+
deps = [
34+
":picocli_jar",
35+
],
36+
)
37+
38+
# all in one exported processors library
39+
java_library(
40+
name = "all_processors",
41+
exported_plugins = [
42+
":lombok_processor",
43+
":picocli_processor",
44+
],
45+
tags = ["maven:compile_only"],
46+
exports = [
47+
"@maven//:info_picocli_picocli_codegen",
48+
"@maven//:org_projectlombok_lombok",
49+
],
50+
)
51+
52+
java_library(
53+
name = "util",
54+
srcs = glob(["src/main/java/com/alipay/codequery/util/*.java"]),
55+
deps = [
56+
"@maven//:org_apache_commons_commons_lang3",
57+
"@maven//:org_apache_logging_log4j_log4j_api",
58+
"@maven//:org_apache_logging_log4j_log4j_core",
59+
"@maven//:org_apache_logging_log4j_log4j_slf4j_impl",
60+
"@maven//:org_jetbrains_annotations",
61+
"@maven//:org_slf4j_slf4j_api",
62+
],
63+
)
64+
65+
java_binary(
66+
name = "coref-sql-src-extractor",
67+
srcs = glob(["src/main/java/**/*.java"]),
68+
main_class = "com.alipay.codequery.coref.sql.Extractor",
69+
resources = glob(["src/main/resources/**/*"]),
70+
visibility = ["//visibility:public"],
71+
deps = [
72+
":all_processors",
73+
":util",
74+
"@maven//:com_alibaba_druid",
75+
"@maven//:com_google_code_gson_gson",
76+
"@maven//:com_google_guava_guava",
77+
"@maven//:com_google_re2j_re2j",
78+
"@maven//:com_ibm_icu_icu4j",
79+
"@maven//:commons_codec_commons_codec",
80+
"@maven//:commons_io_commons_io",
81+
"@maven//:info_picocli_picocli",
82+
"@maven//:javax_annotation_javax_annotation_api",
83+
"@maven//:me_tongfei_progressbar",
84+
"@maven//:net_java_dev_jna_jna",
85+
"@maven//:org_apache_commons_commons_lang3",
86+
"@maven//:org_apache_logging_log4j_log4j_api",
87+
"@maven//:org_apache_logging_log4j_log4j_core",
88+
"@maven//:org_apache_logging_log4j_log4j_slf4j_impl",
89+
"@maven//:org_hamcrest_hamcrest_all",
90+
"@maven//:org_ini4j_ini4j",
91+
"@maven//:org_jetbrains_annotations",
92+
"@maven//:org_mybatis_dynamic_sql_mybatis_dynamic_sql",
93+
"@maven//:org_mybatis_mybatis",
94+
"@maven//:org_projectlombok_lombok",
95+
"@maven//:org_slf4j_slf4j_api",
96+
"@maven//:org_xerial_sqlite_jdbc",
97+
"@maven//:tk_mybatis_mapper",
98+
"@maven//:uk_com_robust_it_cloning",
99+
],
100+
)

language/sql/extractor/README.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# CodeFuse-Query SQL Extractor
2+
3+
The CodeFuse-Query SQL Extractor converts the source code of SQL files into standardized coref-sql data, which is then used for further analysis by codefuse-query.
4+
5+
## Quick Start
6+
7+
1. Set up JAVA_HOME. Run `echo $JAVA_HOME` to display the current setting. If it shows nothing, it means it hasn't been configured yet.
8+
2. Build. Run `mvn clean install`.
9+
3. Run. Execute the following command:
10+
```bash
11+
java -jar target/coref-sql-src-extractor-0.0.1-jar-with-dependencies.jar ${YOUR_REPO} ./db
12+
```
13+
After execution, a file named coref_sql_src.db will be generated in the ./db directory.
14+
15+
## How to Develop
16+
17+
`coref-sql-src-extractor` uses Sparrow to analyze Druid source code and extract SQL AST information. It then uses jinja2 template generation tools to produce the required code and configurations, including Java code, PlantUML diagrams, Mybatis XML configuration files, COREF table creation SQL statements, and the Godel library.
18+
19+
Therefore, the development process requires modifying the relevant jinja2 templates and then applying the corresponding changes as follows:
20+
21+
1. Extract Druid AST node information. (If there are no changes needed for the Druid AST node information, this step can be skipped). Use Sparrow to query all Druid SQL AST node information and their available methods for extraction (e.g., Getter, isXXX methods).
22+
```bash
23+
# Assuming Druid's COREF information has been extracted to <path-to-druid-repo>/db
24+
# The output path must be language/sql/extractor/utils/code_generator/FindDruidSqlAstAllPublicGetterMethods.json, as step 2 will use this location by default
25+
sparrow query run <path-to-sparrow-cli-repo>/language/sql/extractor/utils/code_generator/FindDruidSqlAstAllPublicGetterMethods.gdl -f json -o <path-to-sparrow-cli-repo>/language/sql/extractor/utils/code_generator -d <path-to-druid-repo>/db
26+
```
27+
28+
2. Generate the SQL Extractor's code based on the extracted Druid AST information.
29+
```bash
30+
# This will generate a series of files based on the json file `language/sql/extractor/utils/code_generator/FindDruidSqlAstAllPublicGetterMethods.json` from step 1.
31+
bash <path-to-sparrow-cli-repo>/language/sql/extractor/utils/generate_all_code.sh
32+
```
33+
34+
## How to Generate SQL AST Nodes Class Diagram
35+
36+
```bash
37+
# Clone the Druid repository and switch to the corresponding release version
38+
git clone https://github.com/alibaba/druid
39+
# <druid-release-version> refers to the version specified in pom.xml
40+
git checkout tags/<druid-release-version>
41+
cd druid
42+
# Extract Java source information from Druid
43+
sparrow database create -lang java -s . ./db
44+
# Output the Druid SQL AST node relationships
45+
sparrow query run <path-to-sparrow-cli-repo>/language/sql/extractor/utils/druid_sql_ast_nodes/FindDruidSqlAstNodes.gdl -f json -o <path-to-sparrow-cli-repo>/language/sql/extractor/utils/druid_sql_ast_nodes -d ./db
46+
# Analyze the Sparrow output data and generate a class diagram in Dot format
47+
python3 <path-to-sparrow-cli-repo>/language/sql/extractor/utils/druid_sql_ast_nodes/generate_dot_diagram.py
48+
```

language/sql/extractor/README_cn.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# CodeFuse-Query SQL 抽取器
2+
CodeFuse-Query SQL 提取器将 SQL 文件的源代码转换为标准化的 coref-sql 数据,这些数据用于 codefuse-query 进行进一步分析。
3+
4+
# 快速开始
5+
1. 设置 JAVA_HOME。执行 `echo $JAVA_HOME` 来显示当前的设置。如果显示为空,则表示尚未配置。
6+
2. 构建。执行 `mvn clean install`
7+
3. 运行。执行如下命令
8+
```bash
9+
java -jar target/coref-sql-src-extractor-0.0.1-jar-with-dependencies.jar ${YOUR_REPO} ./db
10+
```
11+
执行后,一个名为 coref_sql_src.db 的文件将生成在 ./db 目录下。
12+
13+
14+
# 如何开发
15+
`coref-sql-src-extractor` 采用Sparrow来分析Druid源码并分析SQL AST信息,再通过jinja2模版生成工具来生成所需代码和配置,
16+
包括Java代码、PlantUML图、Mybatis XML配置文件、COREF建表SQL语句、Godel库。
17+
18+
因此开发流程需要修改相应jinja2模版然后再通过以下方法进行对应的修改:
19+
1. 提取Druid AST节点信息。(如果Druid AST节点信息无需修改,此步骤可跳过)通过Sparrow查询出所有Druid SQL AST节点信息以及它们的所有可用于抽取的方法(例如Getter,isXXX等方法)
20+
```bash
21+
# 假设Druid的COREF信息已抽取到 <path-to-druid-repo>/db
22+
# 输出路径必须为 language/sql/extractor/utils/code_generator/FindDruidSqlAstAllPublicGetterMethods.json,步骤2会默认使用该位置
23+
sparrow query run <path-to-sparrow-cli-repo>/language/sql/extractor/utils/code_generator/FindDruidSqlAstAllPublicGetterMethods.gdl -f json -o <path-to-sparrow-cli-repo>/language/sql/extractor/utils/code_generator -d <path-to-druid-repo>/db
24+
```
25+
26+
2. 按提取的Druid AST信息生成SQL抽取器的代码。
27+
```bash
28+
# 会依据步骤1生成的json文件 `language/sql/extractor/utils/code_generator/FindDruidSqlAstAllPublicGetterMethods.json` 来生成一系列文件。
29+
bash <path-to-sparrow-cli-repo>/language/sql/extractor/utils/generate_all_code.sh
30+
```
31+
32+
# 如何生成SQL AST节点类图
33+
```bash
34+
# Clone Druid仓库并切换对应release版本
35+
git clone https://github.com/alibaba/druid
36+
# <druid-release-version>看pom.xm指定的版本
37+
git checkout tags/<druid-release-version>
38+
cd druid
39+
# 抽取Druid的Java源码信息
40+
sparrow database create -lang java -s . ./db
41+
# 输出Druid SQL AST 节点关系
42+
sparrow query run <path-to-sparrow-cli-repo>/language/sql/extractor/utils/druid_sql_ast_nodes/FindDruidSqlAstNodes.gdl -f json -o <path-to-sparrow-cli-repo>/language/sql/extractor/utils/druid_sql_ast_nodes -d ./db
43+
# 分析Sparrow输出数据并生成Dot格式的类图
44+
python3 <path-to-sparrow-cli-repo>/language/sql/extractor/utils/druid_sql_ast_nodes/generate_dot_diagram.py
45+
```

0 commit comments

Comments
 (0)