Skip to content

Commit

Permalink
*: add org new countries/orgs pipelines and queries (pingcap#1661)
Browse files Browse the repository at this point in the history
* *: add org new countries/orgs pipelines and queries

* spilt commenter to pr/issue commenter

* fix corn
  • Loading branch information
Mini256 authored Oct 19, 2023
1 parent 7e44eb8 commit 41f43e1
Show file tree
Hide file tree
Showing 56 changed files with 662 additions and 38 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `mv_repo_countries_commit_author_role`
(
`repo_id` INT(11),
`country_code` INT(11),
`first_seen_at` DATE NOT NULL,
PRIMARY KEY (`repo_id`, `country_code`),
KEY idx_mrc_car_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`)
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `mv_repo_countries_issue_commenter_role`
(
`repo_id` INT(11),
`country_code` INT(11),
`first_seen_at` DATE NOT NULL,
PRIMARY KEY (`repo_id`, `country_code`),
KEY idx_mrc_icr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`)
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `mv_repo_countries_issue_creator_role`
(
`repo_id` INT(11),
`country_code` INT(11),
`first_seen_at` DATE NOT NULL,
PRIMARY KEY (`repo_id`, `country_code`),
KEY idx_mrc_icr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`)
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `mv_repo_countries_pr_commenter_role`
(
`repo_id` INT(11),
`country_code` INT(11),
`first_seen_at` DATE NOT NULL,
PRIMARY KEY (`repo_id`, `country_code`),
KEY idx_mrc_pcr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`)
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `mv_repo_countries_pr_creator_role`
(
`repo_id` INT(11),
`country_code` INT(11),
`first_seen_at` DATE NOT NULL,
PRIMARY KEY (`repo_id`, `country_code`),
KEY idx_mrc_pcr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`)
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `mv_repo_countries_pr_reviewer_role`
(
`repo_id` INT(11),
`country_code` INT(11),
`first_seen_at` DATE NOT NULL,
PRIMARY KEY (`repo_id`, `country_code`),
KEY idx_mrc_prr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`)
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `mv_repo_countries_stargazer_role`
(
`repo_id` INT(11),
`country_code` INT(11),
`first_seen_at` DATE NOT NULL,
PRIMARY KEY (`repo_id`, `country_code`),
KEY idx_mrc_sr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`)
);
6 changes: 6 additions & 0 deletions configs/materialized_views/mv_repo_issues/ddl.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE TABLE IF NOT EXISTS `mv_repo_issues`
(
`repo_id` INT(11),
`number` INT(11),
PRIMARY KEY (`repo_id`, `number`)
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `mv_repo_organizations_commit_author_role`
(
`repo_id` INT(11),
`org_name` VARCHAR(255) NOT NULL,
`first_seen_at` DATE NOT NULL,
PRIMARY KEY (`repo_id`, `org_name`),
KEY idx_mro_car_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`)
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `mv_repo_organizations_issue_commenter_role`
(
`repo_id` INT(11),
`org_name` VARCHAR(255) NOT NULL,
`first_seen_at` DATE NOT NULL,
PRIMARY KEY (`repo_id`, `org_name`),
KEY idx_mro_icr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`)
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `mv_repo_organizations_issue_creator_role`
(
`repo_id` INT(11),
`org_name` VARCHAR(255) NOT NULL,
`first_seen_at` DATE NOT NULL,
PRIMARY KEY (`repo_id`, `org_name`),
KEY idx_mro_icr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`)
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `mv_repo_organizations_pr_commenter_role`
(
`repo_id` INT(11),
`org_name` VARCHAR(255) NOT NULL,
`first_seen_at` DATE NOT NULL,
PRIMARY KEY (`repo_id`, `org_name`),
KEY idx_mro_pcr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`)
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `mv_repo_organizations_pr_creator_role`
(
`repo_id` INT(11),
`org_name` VARCHAR(255) NOT NULL,
`first_seen_at` DATE NOT NULL,
PRIMARY KEY (`repo_id`, `org_name`),
KEY idx_mro_pcr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`)
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `mv_repo_organizations_pr_reviewer_role`
(
`repo_id` INT(11),
`org_name` VARCHAR(255) NOT NULL,
`first_seen_at` DATE NOT NULL,
PRIMARY KEY (`repo_id`, `org_name`),
KEY idx_mro_prr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`)
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `mv_repo_organizations_stargazer_role`
(
`repo_id` INT(11),
`org_name` VARCHAR(255) NOT NULL,
`first_seen_at` DATE NOT NULL,
PRIMARY KEY (`repo_id`, `org_name`),
KEY idx_mro_sr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`)
);
6 changes: 6 additions & 0 deletions configs/materialized_views/mv_repo_pull_requests/ddl.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE TABLE IF NOT EXISTS `mv_repo_pull_requests`
(
`repo_id` INT(11),
`number` INT(11),
PRIMARY KEY (`repo_id`, `number`)
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"name": "sync_org_repo_countries_commit_author_role",
"description": "None",
"cron": "0 0 1 * * *",
"incremental": {
"timeRange": "last_day"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
INSERT INTO mv_repo_countries_commit_author_role(repo_id, country_code, first_seen_at)
SELECT
/*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */
ge.repo_id,
gu.country_code,
MIN(ge.created_at) AS new_first_seen_at
FROM github_events ge
JOIN github_users gu ON ge.actor_login = gu.login
WHERE
ge.type = 'PushEvent'
AND ge.org_id != 0
AND ge.created_at BETWEEN :from AND :to
AND gu.country_code NOT IN ('N/A', 'UND', '')
GROUP BY ge.repo_id, gu.country_code
ON DUPLICATE KEY UPDATE
first_seen_at = LEAST(first_seen_at, new_first_seen_at)
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"name": "sync_org_repo_countries_issue_commenter_role",
"description": "None",
"cron": "0 05 1 * * *",
"incremental": {
"timeRange": "last_day"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
INSERT INTO mv_repo_countries_issue_commenter_role(repo_id, country_code, first_seen_at)
SELECT
/*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */
ge.repo_id,
gu.country_code,
MIN(ge.created_at) AS new_first_seen_at
FROM github_events ge
JOIN github_users gu ON ge.actor_login = gu.login
JOIN mv_repo_issues ri ON ge.repo_id = ri.repo_id AND ge.number = ri.number
WHERE
ge.type = 'IssueCommentEvent'
AND ge.org_id != 0
AND ge.created_at BETWEEN :from AND :to
AND gu.country_code NOT IN ('N/A', 'UND', '')
GROUP BY ge.repo_id, gu.country_code
ON DUPLICATE KEY UPDATE
first_seen_at = LEAST(first_seen_at, new_first_seen_at)
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"name": "sync_org_repo_countries_issue_creator_role",
"description": "None",
"cron": "0 10 1 * * *",
"incremental": {
"timeRange": "last_day"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
INSERT INTO mv_repo_countries_issue_creator_role(repo_id, country_code, first_seen_at)
SELECT
/*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */
ge.repo_id,
gu.country_code,
MIN(ge.created_at) AS new_first_seen_at
FROM github_events ge
JOIN github_users gu ON ge.actor_login = gu.login
WHERE
ge.type = 'IssuesEvent'
AND ge.action = 'opened'
AND ge.org_id != 0
AND ge.created_at BETWEEN :from AND :to
AND gu.country_code NOT IN ('N/A', 'UND', '')
GROUP BY ge.repo_id, gu.country_code
ON DUPLICATE KEY UPDATE
first_seen_at = LEAST(first_seen_at, new_first_seen_at)
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"name": "sync_org_repo_countries_pr_commenter_role",
"description": "None",
"cron": "0 15 1 * * *",
"incremental": {
"timeRange": "last_day"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
INSERT INTO mv_repo_countries_issue_commenter_role(repo_id, country_code, first_seen_at)
SELECT
/*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */
ge.repo_id,
gu.country_code,
MIN(ge.created_at) AS new_first_seen_at
FROM github_events ge
JOIN github_users gu ON ge.actor_login = gu.login
JOIN mv_repo_pull_requests rpr ON ge.repo_id = rpr.repo_id AND ge.number = rpr.number
WHERE
ge.type = 'IssueCommentEvent'
AND ge.org_id != 0
AND ge.created_at BETWEEN :from AND :to
AND gu.country_code NOT IN ('N/A', 'UND', '')
GROUP BY ge.repo_id, gu.country_code
ON DUPLICATE KEY UPDATE
first_seen_at = LEAST(first_seen_at, new_first_seen_at)
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"name": "sync_org_repo_countries_pr_creator_role",
"description": "None",
"cron": "0 20 1 * * *",
"incremental": {
"timeRange": "last_day"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
INSERT INTO mv_repo_countries_pr_creator_role(repo_id, country_code, first_seen_at)
SELECT
/*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */
ge.repo_id,
gu.country_code,
MIN(ge.created_at) AS new_first_seen_at
FROM github_events ge
JOIN github_users gu ON ge.actor_login = gu.login
WHERE
ge.type = 'PullRequestEvent'
AND ge.action = 'opened'
AND ge.org_id != 0
AND ge.created_at BETWEEN :from AND :to
AND gu.country_code NOT IN ('N/A', 'UND', '')
GROUP BY ge.repo_id, gu.country_code
ON DUPLICATE KEY UPDATE
first_seen_at = LEAST(first_seen_at, new_first_seen_at)
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"name": "sync_org_repo_countries_pr_reviewer_role",
"description": "None",
"cron": "0 25 1 * * *",
"incremental": {
"timeRange": "last_day"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
INSERT INTO mv_repo_countries_pr_reviewer_role(repo_id, country_code, first_seen_at)
SELECT
/*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */
ge.repo_id,
gu.country_code,
MIN(ge.created_at) AS new_first_seen_at
FROM github_events ge
JOIN github_users gu ON ge.actor_login = gu.login
WHERE
ge.type = 'PullRequestReviewEvent'
AND ge.org_id != 0
AND ge.created_at BETWEEN :from AND :to
AND gu.country_code NOT IN ('N/A', 'UND', '')
GROUP BY ge.repo_id, gu.country_code
ON DUPLICATE KEY UPDATE
first_seen_at = LEAST(first_seen_at, new_first_seen_at)
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"name": "sync_org_repo_countries_stargazer_role",
"description": "None",
"cron": "0 30 1 * * *",
"incremental": {
"timeRange": "last_day"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
INSERT INTO mv_repo_countries_pr_reviewer_role(repo_id, country_code, first_seen_at)
SELECT
/*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */
ge.repo_id,
gu.country_code,
MIN(ge.created_at) AS new_first_seen_at
FROM github_events ge
JOIN github_users gu ON ge.actor_login = gu.login
WHERE
ge.type = 'WatchEvent'
AND ge.org_id != 0
AND ge.created_at BETWEEN :from AND :to
AND gu.country_code NOT IN ('N/A', 'UND', '')
GROUP BY ge.repo_id, gu.country_code
ON DUPLICATE KEY UPDATE
first_seen_at = LEAST(first_seen_at, new_first_seen_at)
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"name": "sync_org_repo_organizations_commit_author_role",
"description": "None",
"cron": "0 35 1 * * *",
"incremental": {
"timeRange": "last_day"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
INSERT INTO mv_repo_organizations_commit_author_role(repo_id, org_name, first_seen_at)
SELECT
/*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */
ge.repo_id,
LEFT(gu.organization_formatted, 40) AS org_name,
MIN(ge.created_at) AS new_first_seen_at
FROM github_events ge
JOIN github_users gu ON ge.actor_login = gu.login
WHERE
ge.type = 'PushEvent'
AND ge.org_id != 0
AND ge.created_at BETWEEN :from AND :to
AND gu.organization_formatted != ''
GROUP BY ge.repo_id, org_name
ON DUPLICATE KEY UPDATE
first_seen_at = LEAST(first_seen_at, new_first_seen_at)
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"name": "sync_org_repo_organizations_issue_commenter_role",
"description": "None",
"cron": "0 40 1 * * *",
"incremental": {
"timeRange": "last_day"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
INSERT INTO mv_repo_organizations_issue_commenter_role(repo_id, org_name, first_seen_at)
SELECT
/*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */
ge.repo_id,
LEFT(gu.organization_formatted, 40) AS org_name,
MIN(ge.created_at) AS new_first_seen_at
FROM github_events ge
JOIN github_users gu ON ge.actor_login = gu.login
JOIN mv_repo_issues ri ON ge.repo_id = ri.repo_id AND ge.number = ri.number
WHERE
ge.type = 'IssueCommentEvent'
AND ge.org_id != 0
AND ge.created_at BETWEEN :from AND :to
AND gu.organization_formatted != ''
GROUP BY ge.repo_id, org_name
ON DUPLICATE KEY UPDATE
first_seen_at = LEAST(first_seen_at, new_first_seen_at)
;
Loading

0 comments on commit 41f43e1

Please sign in to comment.