diff --git a/configs/materialized_views/mv_repo_countries_commit_author_role/ddl.sql b/configs/materialized_views/mv_repo_countries_commit_author_role/ddl.sql new file mode 100644 index 00000000000..4184cc68fed --- /dev/null +++ b/configs/materialized_views/mv_repo_countries_commit_author_role/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_countries_commit_author_role` +( + `repo_id` INT(11), + `country_code` INT(11), + `first_seen_at` DATE NOT NULL, + PRIMARY KEY (`repo_id`, `country_code`), + KEY idx_mrc_car_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`) +); diff --git a/configs/materialized_views/mv_repo_countries_issue_commenter_role/ddl.sql b/configs/materialized_views/mv_repo_countries_issue_commenter_role/ddl.sql new file mode 100644 index 00000000000..0f5fd0db09f --- /dev/null +++ b/configs/materialized_views/mv_repo_countries_issue_commenter_role/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_countries_issue_commenter_role` +( + `repo_id` INT(11), + `country_code` INT(11), + `first_seen_at` DATE NOT NULL, + PRIMARY KEY (`repo_id`, `country_code`), + KEY idx_mrc_icr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`) +); diff --git a/configs/materialized_views/mv_repo_countries_issue_creator_role/ddl.sql b/configs/materialized_views/mv_repo_countries_issue_creator_role/ddl.sql new file mode 100644 index 00000000000..7be6f22bccf --- /dev/null +++ b/configs/materialized_views/mv_repo_countries_issue_creator_role/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_countries_issue_creator_role` +( + `repo_id` INT(11), + `country_code` INT(11), + `first_seen_at` DATE NOT NULL, + PRIMARY KEY (`repo_id`, `country_code`), + KEY idx_mrc_icr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`) +); diff --git a/configs/materialized_views/mv_repo_countries_pr_commenter_role/ddl.sql b/configs/materialized_views/mv_repo_countries_pr_commenter_role/ddl.sql new file mode 100644 index 00000000000..ced78066fb0 --- /dev/null +++ b/configs/materialized_views/mv_repo_countries_pr_commenter_role/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_countries_pr_commenter_role` +( + `repo_id` INT(11), + `country_code` INT(11), + `first_seen_at` DATE NOT NULL, + PRIMARY KEY (`repo_id`, `country_code`), + KEY idx_mrc_pcr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`) +); diff --git a/configs/materialized_views/mv_repo_countries_pr_creator_role/ddl.sql b/configs/materialized_views/mv_repo_countries_pr_creator_role/ddl.sql new file mode 100644 index 00000000000..07040ffc5eb --- /dev/null +++ b/configs/materialized_views/mv_repo_countries_pr_creator_role/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_countries_pr_creator_role` +( + `repo_id` INT(11), + `country_code` INT(11), + `first_seen_at` DATE NOT NULL, + PRIMARY KEY (`repo_id`, `country_code`), + KEY idx_mrc_pcr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`) +); diff --git a/configs/materialized_views/mv_repo_countries_pr_reviewer_role/ddl.sql b/configs/materialized_views/mv_repo_countries_pr_reviewer_role/ddl.sql new file mode 100644 index 00000000000..ff98d868084 --- /dev/null +++ b/configs/materialized_views/mv_repo_countries_pr_reviewer_role/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_countries_pr_reviewer_role` +( + `repo_id` INT(11), + `country_code` INT(11), + `first_seen_at` DATE NOT NULL, + PRIMARY KEY (`repo_id`, `country_code`), + KEY idx_mrc_prr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`) +); diff --git a/configs/materialized_views/mv_repo_countries_stargazer_role/ddl.sql b/configs/materialized_views/mv_repo_countries_stargazer_role/ddl.sql new file mode 100644 index 00000000000..b718fdaf5fe --- /dev/null +++ b/configs/materialized_views/mv_repo_countries_stargazer_role/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_countries_stargazer_role` +( + `repo_id` INT(11), + `country_code` INT(11), + `first_seen_at` DATE NOT NULL, + PRIMARY KEY (`repo_id`, `country_code`), + KEY idx_mrc_sr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`) +); diff --git a/configs/materialized_views/mv_repo_issues/ddl.sql b/configs/materialized_views/mv_repo_issues/ddl.sql new file mode 100644 index 00000000000..e387b3c7163 --- /dev/null +++ b/configs/materialized_views/mv_repo_issues/ddl.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_issues` +( + `repo_id` INT(11), + `number` INT(11), + PRIMARY KEY (`repo_id`, `number`) +); diff --git a/configs/materialized_views/mv_repo_organizations_commit_author_role/ddl.sql b/configs/materialized_views/mv_repo_organizations_commit_author_role/ddl.sql new file mode 100644 index 00000000000..3e3770ffe66 --- /dev/null +++ b/configs/materialized_views/mv_repo_organizations_commit_author_role/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_organizations_commit_author_role` +( + `repo_id` INT(11), + `org_name` VARCHAR(255) NOT NULL, + `first_seen_at` DATE NOT NULL, + PRIMARY KEY (`repo_id`, `org_name`), + KEY idx_mro_car_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`) +); diff --git a/configs/materialized_views/mv_repo_organizations_issue_commenter_role/ddl.sql b/configs/materialized_views/mv_repo_organizations_issue_commenter_role/ddl.sql new file mode 100644 index 00000000000..80458432351 --- /dev/null +++ b/configs/materialized_views/mv_repo_organizations_issue_commenter_role/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_organizations_issue_commenter_role` +( + `repo_id` INT(11), + `org_name` VARCHAR(255) NOT NULL, + `first_seen_at` DATE NOT NULL, + PRIMARY KEY (`repo_id`, `org_name`), + KEY idx_mro_icr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`) +); diff --git a/configs/materialized_views/mv_repo_organizations_issue_creator_role/ddl.sql b/configs/materialized_views/mv_repo_organizations_issue_creator_role/ddl.sql new file mode 100644 index 00000000000..ee8e91bc70b --- /dev/null +++ b/configs/materialized_views/mv_repo_organizations_issue_creator_role/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_organizations_issue_creator_role` +( + `repo_id` INT(11), + `org_name` VARCHAR(255) NOT NULL, + `first_seen_at` DATE NOT NULL, + PRIMARY KEY (`repo_id`, `org_name`), + KEY idx_mro_icr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`) +); diff --git a/configs/materialized_views/mv_repo_organizations_pr_commenter_role/ddl.sql b/configs/materialized_views/mv_repo_organizations_pr_commenter_role/ddl.sql new file mode 100644 index 00000000000..5e3cd0caa1a --- /dev/null +++ b/configs/materialized_views/mv_repo_organizations_pr_commenter_role/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_organizations_pr_commenter_role` +( + `repo_id` INT(11), + `org_name` VARCHAR(255) NOT NULL, + `first_seen_at` DATE NOT NULL, + PRIMARY KEY (`repo_id`, `org_name`), + KEY idx_mro_pcr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`) +); diff --git a/configs/materialized_views/mv_repo_organizations_pr_creator_role/ddl.sql b/configs/materialized_views/mv_repo_organizations_pr_creator_role/ddl.sql new file mode 100644 index 00000000000..727df2c119a --- /dev/null +++ b/configs/materialized_views/mv_repo_organizations_pr_creator_role/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_organizations_pr_creator_role` +( + `repo_id` INT(11), + `org_name` VARCHAR(255) NOT NULL, + `first_seen_at` DATE NOT NULL, + PRIMARY KEY (`repo_id`, `org_name`), + KEY idx_mro_pcr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`) +); diff --git a/configs/materialized_views/mv_repo_organizations_pr_reviewer_role/ddl.sql b/configs/materialized_views/mv_repo_organizations_pr_reviewer_role/ddl.sql new file mode 100644 index 00000000000..aab99426c34 --- /dev/null +++ b/configs/materialized_views/mv_repo_organizations_pr_reviewer_role/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_organizations_pr_reviewer_role` +( + `repo_id` INT(11), + `org_name` VARCHAR(255) NOT NULL, + `first_seen_at` DATE NOT NULL, + PRIMARY KEY (`repo_id`, `org_name`), + KEY idx_mro_prr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`) +); diff --git a/configs/materialized_views/mv_repo_organizations_stargazer_role/ddl.sql b/configs/materialized_views/mv_repo_organizations_stargazer_role/ddl.sql new file mode 100644 index 00000000000..ab65bb3cb98 --- /dev/null +++ b/configs/materialized_views/mv_repo_organizations_stargazer_role/ddl.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_organizations_stargazer_role` +( + `repo_id` INT(11), + `org_name` VARCHAR(255) NOT NULL, + `first_seen_at` DATE NOT NULL, + PRIMARY KEY (`repo_id`, `org_name`), + KEY idx_mro_sr_on_repo_id_first_seen_at(`repo_id`, `first_seen_at`) +); diff --git a/configs/materialized_views/mv_repo_pull_requests/ddl.sql b/configs/materialized_views/mv_repo_pull_requests/ddl.sql new file mode 100644 index 00000000000..50d3eb25c30 --- /dev/null +++ b/configs/materialized_views/mv_repo_pull_requests/ddl.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS `mv_repo_pull_requests` +( + `repo_id` INT(11), + `number` INT(11), + PRIMARY KEY (`repo_id`, `number`) +); diff --git a/configs/pipelines/sync_org_repo_countries_commit_author_role/config.json b/configs/pipelines/sync_org_repo_countries_commit_author_role/config.json new file mode 100644 index 00000000000..95033a0805f --- /dev/null +++ b/configs/pipelines/sync_org_repo_countries_commit_author_role/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_org_repo_countries_commit_author_role", + "description": "None", + "cron": "0 0 1 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_org_repo_countries_commit_author_role/process.sql b/configs/pipelines/sync_org_repo_countries_commit_author_role/process.sql new file mode 100644 index 00000000000..5538c8277ca --- /dev/null +++ b/configs/pipelines/sync_org_repo_countries_commit_author_role/process.sql @@ -0,0 +1,17 @@ +INSERT INTO mv_repo_countries_commit_author_role(repo_id, country_code, first_seen_at) +SELECT + /*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */ + ge.repo_id, + gu.country_code, + MIN(ge.created_at) AS new_first_seen_at +FROM github_events ge +JOIN github_users gu ON ge.actor_login = gu.login +WHERE + ge.type = 'PushEvent' + AND ge.org_id != 0 + AND ge.created_at BETWEEN :from AND :to + AND gu.country_code NOT IN ('N/A', 'UND', '') +GROUP BY ge.repo_id, gu.country_code +ON DUPLICATE KEY UPDATE + first_seen_at = LEAST(first_seen_at, new_first_seen_at) +; diff --git a/configs/pipelines/sync_org_repo_countries_issue_commenter_role/config.json b/configs/pipelines/sync_org_repo_countries_issue_commenter_role/config.json new file mode 100644 index 00000000000..c2b2f3320be --- /dev/null +++ b/configs/pipelines/sync_org_repo_countries_issue_commenter_role/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_org_repo_countries_issue_commenter_role", + "description": "None", + "cron": "0 05 1 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_org_repo_countries_issue_commenter_role/process.sql b/configs/pipelines/sync_org_repo_countries_issue_commenter_role/process.sql new file mode 100644 index 00000000000..8ccc2e9669c --- /dev/null +++ b/configs/pipelines/sync_org_repo_countries_issue_commenter_role/process.sql @@ -0,0 +1,18 @@ +INSERT INTO mv_repo_countries_issue_commenter_role(repo_id, country_code, first_seen_at) +SELECT + /*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */ + ge.repo_id, + gu.country_code, + MIN(ge.created_at) AS new_first_seen_at +FROM github_events ge +JOIN github_users gu ON ge.actor_login = gu.login +JOIN mv_repo_issues ri ON ge.repo_id = ri.repo_id AND ge.number = ri.number +WHERE + ge.type = 'IssueCommentEvent' + AND ge.org_id != 0 + AND ge.created_at BETWEEN :from AND :to + AND gu.country_code NOT IN ('N/A', 'UND', '') +GROUP BY ge.repo_id, gu.country_code +ON DUPLICATE KEY UPDATE + first_seen_at = LEAST(first_seen_at, new_first_seen_at) +; diff --git a/configs/pipelines/sync_org_repo_countries_issue_creator_role/config.json b/configs/pipelines/sync_org_repo_countries_issue_creator_role/config.json new file mode 100644 index 00000000000..288c653905e --- /dev/null +++ b/configs/pipelines/sync_org_repo_countries_issue_creator_role/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_org_repo_countries_issue_creator_role", + "description": "None", + "cron": "0 10 1 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_org_repo_countries_issue_creator_role/process.sql b/configs/pipelines/sync_org_repo_countries_issue_creator_role/process.sql new file mode 100644 index 00000000000..af2713c50d8 --- /dev/null +++ b/configs/pipelines/sync_org_repo_countries_issue_creator_role/process.sql @@ -0,0 +1,18 @@ +INSERT INTO mv_repo_countries_issue_creator_role(repo_id, country_code, first_seen_at) +SELECT + /*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */ + ge.repo_id, + gu.country_code, + MIN(ge.created_at) AS new_first_seen_at +FROM github_events ge +JOIN github_users gu ON ge.actor_login = gu.login +WHERE + ge.type = 'IssuesEvent' + AND ge.action = 'opened' + AND ge.org_id != 0 + AND ge.created_at BETWEEN :from AND :to + AND gu.country_code NOT IN ('N/A', 'UND', '') +GROUP BY ge.repo_id, gu.country_code +ON DUPLICATE KEY UPDATE + first_seen_at = LEAST(first_seen_at, new_first_seen_at) +; diff --git a/configs/pipelines/sync_org_repo_countries_pr_commenter_role/config.json b/configs/pipelines/sync_org_repo_countries_pr_commenter_role/config.json new file mode 100644 index 00000000000..ada1c5173d0 --- /dev/null +++ b/configs/pipelines/sync_org_repo_countries_pr_commenter_role/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_org_repo_countries_pr_commenter_role", + "description": "None", + "cron": "0 15 1 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_org_repo_countries_pr_commenter_role/process.sql b/configs/pipelines/sync_org_repo_countries_pr_commenter_role/process.sql new file mode 100644 index 00000000000..a6e15ac8f9c --- /dev/null +++ b/configs/pipelines/sync_org_repo_countries_pr_commenter_role/process.sql @@ -0,0 +1,18 @@ +INSERT INTO mv_repo_countries_issue_commenter_role(repo_id, country_code, first_seen_at) +SELECT + /*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */ + ge.repo_id, + gu.country_code, + MIN(ge.created_at) AS new_first_seen_at +FROM github_events ge +JOIN github_users gu ON ge.actor_login = gu.login +JOIN mv_repo_pull_requests rpr ON ge.repo_id = rpr.repo_id AND ge.number = rpr.number +WHERE + ge.type = 'IssueCommentEvent' + AND ge.org_id != 0 + AND ge.created_at BETWEEN :from AND :to + AND gu.country_code NOT IN ('N/A', 'UND', '') +GROUP BY ge.repo_id, gu.country_code +ON DUPLICATE KEY UPDATE + first_seen_at = LEAST(first_seen_at, new_first_seen_at) +; diff --git a/configs/pipelines/sync_org_repo_countries_pr_creator_role/config.json b/configs/pipelines/sync_org_repo_countries_pr_creator_role/config.json new file mode 100644 index 00000000000..0fa6237747d --- /dev/null +++ b/configs/pipelines/sync_org_repo_countries_pr_creator_role/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_org_repo_countries_pr_creator_role", + "description": "None", + "cron": "0 20 1 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_org_repo_countries_pr_creator_role/process.sql b/configs/pipelines/sync_org_repo_countries_pr_creator_role/process.sql new file mode 100644 index 00000000000..4e3d4f80390 --- /dev/null +++ b/configs/pipelines/sync_org_repo_countries_pr_creator_role/process.sql @@ -0,0 +1,18 @@ +INSERT INTO mv_repo_countries_pr_creator_role(repo_id, country_code, first_seen_at) +SELECT + /*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */ + ge.repo_id, + gu.country_code, + MIN(ge.created_at) AS new_first_seen_at +FROM github_events ge +JOIN github_users gu ON ge.actor_login = gu.login +WHERE + ge.type = 'PullRequestEvent' + AND ge.action = 'opened' + AND ge.org_id != 0 + AND ge.created_at BETWEEN :from AND :to + AND gu.country_code NOT IN ('N/A', 'UND', '') +GROUP BY ge.repo_id, gu.country_code +ON DUPLICATE KEY UPDATE + first_seen_at = LEAST(first_seen_at, new_first_seen_at) +; diff --git a/configs/pipelines/sync_org_repo_countries_pr_reviewer_role/config.json b/configs/pipelines/sync_org_repo_countries_pr_reviewer_role/config.json new file mode 100644 index 00000000000..bc602314ec6 --- /dev/null +++ b/configs/pipelines/sync_org_repo_countries_pr_reviewer_role/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_org_repo_countries_pr_reviewer_role", + "description": "None", + "cron": "0 25 1 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_org_repo_countries_pr_reviewer_role/process.sql b/configs/pipelines/sync_org_repo_countries_pr_reviewer_role/process.sql new file mode 100644 index 00000000000..f1c7d43e04f --- /dev/null +++ b/configs/pipelines/sync_org_repo_countries_pr_reviewer_role/process.sql @@ -0,0 +1,17 @@ +INSERT INTO mv_repo_countries_pr_reviewer_role(repo_id, country_code, first_seen_at) +SELECT + /*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */ + ge.repo_id, + gu.country_code, + MIN(ge.created_at) AS new_first_seen_at +FROM github_events ge +JOIN github_users gu ON ge.actor_login = gu.login +WHERE + ge.type = 'PullRequestReviewEvent' + AND ge.org_id != 0 + AND ge.created_at BETWEEN :from AND :to + AND gu.country_code NOT IN ('N/A', 'UND', '') +GROUP BY ge.repo_id, gu.country_code +ON DUPLICATE KEY UPDATE + first_seen_at = LEAST(first_seen_at, new_first_seen_at) +; diff --git a/configs/pipelines/sync_org_repo_countries_stargazer_role/config.json b/configs/pipelines/sync_org_repo_countries_stargazer_role/config.json new file mode 100644 index 00000000000..33d8cf3dad5 --- /dev/null +++ b/configs/pipelines/sync_org_repo_countries_stargazer_role/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_org_repo_countries_stargazer_role", + "description": "None", + "cron": "0 30 1 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_org_repo_countries_stargazer_role/process.sql b/configs/pipelines/sync_org_repo_countries_stargazer_role/process.sql new file mode 100644 index 00000000000..496f220ddad --- /dev/null +++ b/configs/pipelines/sync_org_repo_countries_stargazer_role/process.sql @@ -0,0 +1,17 @@ +INSERT INTO mv_repo_countries_pr_reviewer_role(repo_id, country_code, first_seen_at) +SELECT + /*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */ + ge.repo_id, + gu.country_code, + MIN(ge.created_at) AS new_first_seen_at +FROM github_events ge +JOIN github_users gu ON ge.actor_login = gu.login +WHERE + ge.type = 'WatchEvent' + AND ge.org_id != 0 + AND ge.created_at BETWEEN :from AND :to + AND gu.country_code NOT IN ('N/A', 'UND', '') +GROUP BY ge.repo_id, gu.country_code +ON DUPLICATE KEY UPDATE + first_seen_at = LEAST(first_seen_at, new_first_seen_at) +; diff --git a/configs/pipelines/sync_org_repo_organizations_commit_author_role/config.json b/configs/pipelines/sync_org_repo_organizations_commit_author_role/config.json new file mode 100644 index 00000000000..14d7c02f3cc --- /dev/null +++ b/configs/pipelines/sync_org_repo_organizations_commit_author_role/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_org_repo_organizations_commit_author_role", + "description": "None", + "cron": "0 35 1 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_org_repo_organizations_commit_author_role/process.sql b/configs/pipelines/sync_org_repo_organizations_commit_author_role/process.sql new file mode 100644 index 00000000000..e2d1a3d6945 --- /dev/null +++ b/configs/pipelines/sync_org_repo_organizations_commit_author_role/process.sql @@ -0,0 +1,17 @@ +INSERT INTO mv_repo_organizations_commit_author_role(repo_id, org_name, first_seen_at) +SELECT + /*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */ + ge.repo_id, + LEFT(gu.organization_formatted, 40) AS org_name, + MIN(ge.created_at) AS new_first_seen_at +FROM github_events ge +JOIN github_users gu ON ge.actor_login = gu.login +WHERE + ge.type = 'PushEvent' + AND ge.org_id != 0 + AND ge.created_at BETWEEN :from AND :to + AND gu.organization_formatted != '' +GROUP BY ge.repo_id, org_name +ON DUPLICATE KEY UPDATE + first_seen_at = LEAST(first_seen_at, new_first_seen_at) +; diff --git a/configs/pipelines/sync_org_repo_organizations_issue_commenter_role/config.json b/configs/pipelines/sync_org_repo_organizations_issue_commenter_role/config.json new file mode 100644 index 00000000000..600cf92b7a4 --- /dev/null +++ b/configs/pipelines/sync_org_repo_organizations_issue_commenter_role/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_org_repo_organizations_issue_commenter_role", + "description": "None", + "cron": "0 40 1 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_org_repo_organizations_issue_commenter_role/process.sql b/configs/pipelines/sync_org_repo_organizations_issue_commenter_role/process.sql new file mode 100644 index 00000000000..bf44822b354 --- /dev/null +++ b/configs/pipelines/sync_org_repo_organizations_issue_commenter_role/process.sql @@ -0,0 +1,18 @@ +INSERT INTO mv_repo_organizations_issue_commenter_role(repo_id, org_name, first_seen_at) +SELECT + /*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */ + ge.repo_id, + LEFT(gu.organization_formatted, 40) AS org_name, + MIN(ge.created_at) AS new_first_seen_at +FROM github_events ge +JOIN github_users gu ON ge.actor_login = gu.login +JOIN mv_repo_issues ri ON ge.repo_id = ri.repo_id AND ge.number = ri.number +WHERE + ge.type = 'IssueCommentEvent' + AND ge.org_id != 0 + AND ge.created_at BETWEEN :from AND :to + AND gu.organization_formatted != '' +GROUP BY ge.repo_id, org_name +ON DUPLICATE KEY UPDATE + first_seen_at = LEAST(first_seen_at, new_first_seen_at) +; diff --git a/configs/pipelines/sync_org_repo_organizations_issue_creator_role/config.json b/configs/pipelines/sync_org_repo_organizations_issue_creator_role/config.json new file mode 100644 index 00000000000..51c8c8e77dc --- /dev/null +++ b/configs/pipelines/sync_org_repo_organizations_issue_creator_role/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_org_repo_organizations_issue_creator_role", + "description": "None", + "cron": "0 45 1 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_org_repo_organizations_issue_creator_role/process.sql b/configs/pipelines/sync_org_repo_organizations_issue_creator_role/process.sql new file mode 100644 index 00000000000..01d317e0c47 --- /dev/null +++ b/configs/pipelines/sync_org_repo_organizations_issue_creator_role/process.sql @@ -0,0 +1,18 @@ +INSERT INTO mv_repo_organizations_issue_creator_role(repo_id, org_name, first_seen_at) +SELECT + /*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */ + ge.repo_id, + LEFT(gu.organization_formatted, 40) AS org_name, + MIN(ge.created_at) AS new_first_seen_at +FROM github_events ge +JOIN github_users gu ON ge.actor_login = gu.login +WHERE + ge.type = 'IssuesEvent' + AND ge.action = 'opened' + AND ge.org_id != 0 + AND ge.created_at BETWEEN :from AND :to + AND gu.organization_formatted != '' +GROUP BY ge.repo_id, org_name +ON DUPLICATE KEY UPDATE + first_seen_at = LEAST(first_seen_at, new_first_seen_at) +; diff --git a/configs/pipelines/sync_org_repo_organizations_pr_commenter_role/config.json b/configs/pipelines/sync_org_repo_organizations_pr_commenter_role/config.json new file mode 100644 index 00000000000..e66d7bbdf92 --- /dev/null +++ b/configs/pipelines/sync_org_repo_organizations_pr_commenter_role/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_org_repo_organizations_pr_commenter_role", + "description": "None", + "cron": "0 50 1 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_org_repo_organizations_pr_commenter_role/process.sql b/configs/pipelines/sync_org_repo_organizations_pr_commenter_role/process.sql new file mode 100644 index 00000000000..aedb80b4b52 --- /dev/null +++ b/configs/pipelines/sync_org_repo_organizations_pr_commenter_role/process.sql @@ -0,0 +1,18 @@ +INSERT INTO mv_repo_organizations_issue_commenter_role(repo_id, org_name, first_seen_at) +SELECT + /*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */ + ge.repo_id, + LEFT(gu.organization_formatted, 40) AS org_name, + MIN(ge.created_at) AS new_first_seen_at +FROM github_events ge +JOIN github_users gu ON ge.actor_login = gu.login +JOIN mv_repo_pull_requests rpr ON ge.repo_id = rpr.repo_id AND ge.number = rpr.number +WHERE + ge.type = 'IssueCommentEvent' + AND ge.org_id != 0 + AND ge.created_at BETWEEN :from AND :to + AND gu.organization_formatted != '' +GROUP BY ge.repo_id, org_name +ON DUPLICATE KEY UPDATE + first_seen_at = LEAST(first_seen_at, new_first_seen_at) +; diff --git a/configs/pipelines/sync_org_repo_organizations_pr_creator_role/config.json b/configs/pipelines/sync_org_repo_organizations_pr_creator_role/config.json new file mode 100644 index 00000000000..bba33e71c0f --- /dev/null +++ b/configs/pipelines/sync_org_repo_organizations_pr_creator_role/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_org_repo_organizations_pr_creator_role", + "description": "None", + "cron": "0 55 1 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_org_repo_organizations_pr_creator_role/process.sql b/configs/pipelines/sync_org_repo_organizations_pr_creator_role/process.sql new file mode 100644 index 00000000000..474943746a0 --- /dev/null +++ b/configs/pipelines/sync_org_repo_organizations_pr_creator_role/process.sql @@ -0,0 +1,18 @@ +INSERT INTO mv_repo_organizations_pr_creator_role(repo_id, org_name, first_seen_at) +SELECT + /*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */ + ge.repo_id, + LEFT(gu.organization_formatted, 40) AS org_name, + MIN(ge.created_at) AS new_first_seen_at +FROM github_events ge +JOIN github_users gu ON ge.actor_login = gu.login +WHERE + ge.type = 'PullRequestEvent' + AND ge.action = 'opened' + AND ge.org_id != 0 + AND ge.created_at BETWEEN :from AND :to + AND gu.organization_formatted != '' +GROUP BY ge.repo_id, org_name +ON DUPLICATE KEY UPDATE + first_seen_at = LEAST(first_seen_at, new_first_seen_at) +; diff --git a/configs/pipelines/sync_org_repo_organizations_pr_reviewer_role/config.json b/configs/pipelines/sync_org_repo_organizations_pr_reviewer_role/config.json new file mode 100644 index 00000000000..b80267d04bf --- /dev/null +++ b/configs/pipelines/sync_org_repo_organizations_pr_reviewer_role/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_org_repo_organizations_pr_reviewer_role", + "description": "None", + "cron": "0 0 2 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_org_repo_organizations_pr_reviewer_role/process.sql b/configs/pipelines/sync_org_repo_organizations_pr_reviewer_role/process.sql new file mode 100644 index 00000000000..87a0d6883c9 --- /dev/null +++ b/configs/pipelines/sync_org_repo_organizations_pr_reviewer_role/process.sql @@ -0,0 +1,17 @@ +INSERT INTO mv_repo_organizations_pr_reviewer_role(repo_id, org_name, first_seen_at) +SELECT + /*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */ + ge.repo_id, + LEFT(gu.organization_formatted, 40) AS org_name, + MIN(ge.created_at) AS new_first_seen_at +FROM github_events ge +JOIN github_users gu ON ge.actor_login = gu.login +WHERE + ge.type = 'PullRequestReviewEvent' + AND ge.org_id != 0 + AND ge.created_at BETWEEN :from AND :to + AND gu.organization_formatted != '' +GROUP BY ge.repo_id, org_name +ON DUPLICATE KEY UPDATE + first_seen_at = LEAST(first_seen_at, new_first_seen_at) +; diff --git a/configs/pipelines/sync_org_repo_organizations_stargazer_role/config.json b/configs/pipelines/sync_org_repo_organizations_stargazer_role/config.json new file mode 100644 index 00000000000..7c1bdc977c4 --- /dev/null +++ b/configs/pipelines/sync_org_repo_organizations_stargazer_role/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_org_repo_organizations_stargazer_role", + "description": "None", + "cron": "0 10 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_org_repo_organizations_stargazer_role/process.sql b/configs/pipelines/sync_org_repo_organizations_stargazer_role/process.sql new file mode 100644 index 00000000000..6e5976aabd5 --- /dev/null +++ b/configs/pipelines/sync_org_repo_organizations_stargazer_role/process.sql @@ -0,0 +1,17 @@ +INSERT INTO mv_repo_organizations_stargazer_role(repo_id, org_name, first_seen_at) +SELECT + /*+ READ_FROM_STORAGE(TIFLASH[ge, gu]) */ + ge.repo_id, + LEFT(gu.organization_formatted, 40) AS org_name, + MIN(ge.created_at) AS new_first_seen_at +FROM github_events ge +JOIN github_users gu ON ge.actor_login = gu.login +WHERE + ge.type = 'WatchEvent' + AND ge.org_id != 0 + AND ge.created_at BETWEEN :from AND :to + AND gu.organization_formatted != '' +GROUP BY ge.repo_id, org_name +ON DUPLICATE KEY UPDATE + first_seen_at = LEAST(first_seen_at, new_first_seen_at) +; diff --git a/configs/pipelines/sync_repo_issues/config.json b/configs/pipelines/sync_repo_issues/config.json new file mode 100644 index 00000000000..22721572506 --- /dev/null +++ b/configs/pipelines/sync_repo_issues/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_repo_issues", + "description": "None", + "cron": "0 50 0 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_repo_issues/process.sql b/configs/pipelines/sync_repo_issues/process.sql new file mode 100644 index 00000000000..75e9d997250 --- /dev/null +++ b/configs/pipelines/sync_repo_issues/process.sql @@ -0,0 +1,10 @@ +INSERT INTO mv_repo_issues(repo_id, number) +SELECT repo_id, number +FROM github_events ge +WHERE + type = 'IssuesEvent' + AND created_at >= :from + AND created_at <= :to +ON DUPLICATE KEY UPDATE + mv_repo_issues.repo_id = VALUES(repo_id), + mv_repo_issues.number = VALUES(number); \ No newline at end of file diff --git a/configs/pipelines/sync_repo_pull_requests/config.json b/configs/pipelines/sync_repo_pull_requests/config.json new file mode 100644 index 00000000000..68cd464e12d --- /dev/null +++ b/configs/pipelines/sync_repo_pull_requests/config.json @@ -0,0 +1,8 @@ +{ + "name": "sync_repo_pull_requests", + "description": "None", + "cron": "0 40 0 * * *", + "incremental": { + "timeRange": "last_day" + } +} \ No newline at end of file diff --git a/configs/pipelines/sync_repo_pull_requests/process.sql b/configs/pipelines/sync_repo_pull_requests/process.sql new file mode 100644 index 00000000000..a441026f07f --- /dev/null +++ b/configs/pipelines/sync_repo_pull_requests/process.sql @@ -0,0 +1,10 @@ +INSERT INTO mv_repo_pull_requests(repo_id, number) +SELECT repo_id, number +FROM github_events ge +WHERE + type = 'PullRequestEvent' + AND created_at >= :from + AND created_at <= :to +ON DUPLICATE KEY UPDATE + mv_repo_pull_requests.repo_id = VALUES(repo_id), + mv_repo_pull_requests.number = VALUES(number); \ No newline at end of file diff --git a/configs/queries/orgs/participants/locations/params.json b/configs/queries/orgs/participants/locations/params.json index 341b5c06820..ed5e29cf0c9 100644 --- a/configs/queries/orgs/participants/locations/params.json +++ b/configs/queries/orgs/participants/locations/params.json @@ -42,6 +42,11 @@ "name": "excludeUnknown", "type": "boolean", "default": true + }, + { + "name": "excludeSeenBefore", + "type": "boolean", + "default": false } ] } diff --git a/configs/queries/orgs/participants/locations/template.sql b/configs/queries/orgs/participants/locations/template.sql index 974193b7a7b..a9a3cbfced0 100644 --- a/configs/queries/orgs/participants/locations/template.sql +++ b/configs/queries/orgs/participants/locations/template.sql @@ -7,7 +7,33 @@ WITH repos AS ( {% if repoIds.size > 0 %} AND gr.repo_id IN ({{ repoIds | join: ',' }}) {% endif %} -), participants_per_country AS ( +), +{% if excludeSeenBefore %} +countries_seen_before AS ( + SELECT + country_code + FROM + {% case role %} + {% when 'pr_creators' %} mv_repo_countries_pr_creator_role + {% when 'pr_reviewers' %} mv_repo_countries_pr_reviewer_role + {% when 'issue_creators' %} mv_repo_countries_issue_creator_role + {% when 'commit_authors' %} mv_repo_countries_commit_author_role + {% when 'pr_commenters' %} mv_repo_countries_issue_commenter_role + {% when 'issue_commenters' %} mv_repo_countries_pr_commenter_role + {% else %} mv_repo_countries_stargazer_role + {% endcase %} b + WHERE + b.repo_id IN (SELECT repo_id FROM repos) + {% case period %} + {% when 'past_7_days' %} AND b.first_seen_at < (NOW() - INTERVAL 7 DAY) + {% when 'past_28_days' %} AND b.first_seen_at < (NOW() - INTERVAL 28 DAY) + {% when 'past_90_days' %} AND b.first_seen_at < (NOW() - INTERVAL 90 DAY) + {% when 'past_12_months' %} AND b.first_seen_at < (NOW() - INTERVAL 12 MONTH) + {% endcase %} + GROUP BY country_code +), +{% endif %} +participants_per_country AS ( SELECT gu.country_code, COUNT(DISTINCT actor_login) AS participants @@ -28,26 +54,16 @@ WITH repos AS ( AND ge.type = 'IssueCommentEvent' AND ge.action = 'created' AND EXISTS ( SELECT 1 - FROM github_events ge2 - WHERE - ge2.type = 'PullRequestEvent' - AND ge2.action = 'opened' - AND ge2.created_at < ge.created_at - AND ge2.repo_id = ge.repo_id - AND ge2.number = ge.number - ) + FROM mv_repo_pull_requests mrpr + WHERE mrpr.repo_id = ge.repo_id AND mrpr.number = ge.number + ) {% when 'issue_commenters' %} AND ge.type = 'IssueCommentEvent' AND ge.action = 'created' AND EXISTS ( SELECT 1 - FROM github_events ge2 - WHERE - ge2.type = 'IssuesEvent' - AND ge2.action = 'opened' - AND ge2.created_at < ge.created_at - AND ge2.repo_id = ge.repo_id - AND ge2.number = ge.number - ) + FROM mv_repo_issues mri + WHERE mri.repo_id = ge.repo_id AND mri.number = ge.number + ) {% else %} -- Events considered as participation (Exclude `WatchEvent`, which means star a repo). AND ge.type IN ('IssueCommentEvent', 'DeleteEvent', 'CommitCommentEvent', 'MemberEvent', 'PushEvent', 'PublicEvent', 'ForkEvent', 'ReleaseEvent', 'PullRequestReviewEvent', 'CreateEvent', 'GollumEvent', 'PullRequestEvent', 'IssuesEvent', 'PullRequestReviewCommentEvent') @@ -69,7 +85,8 @@ WITH repos AS ( {% when 'past_12_months' %} AND ge.created_at > (NOW() - INTERVAL 12 MONTH) {% endcase %} GROUP BY gu.country_code -), participants_total AS ( +), +participants_total AS ( SELECT SUM(participants) AS total FROM participants_per_country ) SELECT @@ -79,5 +96,9 @@ SELECT FROM participants_per_country ppc, participants_total pt +{% if excludeSeenBefore %} +-- Exclude countries that have been seen before. +WHERE ppc.country_code NOT IN (SELECT country_code FROM countries_seen_before) +{% endif %} ORDER BY ppc.participants DESC LIMIT {{ n }} diff --git a/configs/queries/orgs/participants/organizations/params.json b/configs/queries/orgs/participants/organizations/params.json index 341b5c06820..ed5e29cf0c9 100644 --- a/configs/queries/orgs/participants/organizations/params.json +++ b/configs/queries/orgs/participants/organizations/params.json @@ -42,6 +42,11 @@ "name": "excludeUnknown", "type": "boolean", "default": true + }, + { + "name": "excludeSeenBefore", + "type": "boolean", + "default": false } ] } diff --git a/configs/queries/orgs/participants/organizations/template.sql b/configs/queries/orgs/participants/organizations/template.sql index 9e6e8409743..e03e3cc6c19 100644 --- a/configs/queries/orgs/participants/organizations/template.sql +++ b/configs/queries/orgs/participants/organizations/template.sql @@ -7,7 +7,33 @@ WITH repos AS ( {% if repoIds.size > 0 %} AND gr.repo_id IN ({{ repoIds | join: ',' }}) {% endif %} -), participants_per_org AS ( +), +{% if excludeSeenBefore %} +organizations_seen_before AS ( + SELECT + org_name + FROM + {% case role %} + {% when 'pr_creators' %} mv_repo_organizations_pr_creator_role + {% when 'pr_reviewers' %} mv_repo_organizations_pr_reviewer_role + {% when 'issue_creators' %} mv_repo_organizations_issue_creator_role + {% when 'commit_authors' %} mv_repo_organizations_commit_author_role + {% when 'pr_commenters' %} mv_repo_organizations_pr_commenter_role + {% when 'issue_commenters' %} mv_repo_organizations_issue_commenter_role + {% else %} mv_repo_organizations_stargazer_role + {% endcase %} b + WHERE + b.repo_id IN (SELECT repo_id FROM repos) + {% case period %} + {% when 'past_7_days' %} AND b.first_seen_at < (NOW() - INTERVAL 7 DAY) + {% when 'past_28_days' %} AND b.first_seen_at < (NOW() - INTERVAL 28 DAY) + {% when 'past_90_days' %} AND b.first_seen_at < (NOW() - INTERVAL 90 DAY) + {% when 'past_12_months' %} AND b.first_seen_at < (NOW() - INTERVAL 12 MONTH) + {% endcase %} + GROUP BY org_name +), +{% endif %} +participants_per_org AS ( SELECT IF( gu.organization_formatted IS NOT NULL AND LENGTH(gu.organization_formatted) != 0, @@ -32,26 +58,16 @@ WITH repos AS ( AND ge.type = 'IssueCommentEvent' AND ge.action = 'created' AND EXISTS ( SELECT 1 - FROM github_events ge2 - WHERE - ge2.type = 'PullRequestEvent' - AND ge2.action = 'opened' - AND ge2.created_at < ge.created_at - AND ge2.repo_id = ge.repo_id - AND ge2.number = ge.number - ) + FROM mv_repo_pull_requests mrpr + WHERE mrpr.repo_id = ge.repo_id AND mrpr.number = ge.number + ) {% when 'issue_commenters' %} AND ge.type = 'IssueCommentEvent' AND ge.action = 'created' AND EXISTS ( SELECT 1 - FROM github_events ge2 - WHERE - ge2.type = 'IssuesEvent' - AND ge2.action = 'opened' - AND ge2.created_at < ge.created_at - AND ge2.repo_id = ge.repo_id - AND ge2.number = ge.number - ) + FROM mv_repo_issues mri + WHERE mri.repo_id = ge.repo_id AND mri.number = ge.number + ) {% else %} -- Events considered as participation (Exclude `WatchEvent`, which means star a repo). AND ge.type IN ('IssueCommentEvent', 'DeleteEvent', 'CommitCommentEvent', 'MemberEvent', 'PushEvent', 'PublicEvent', 'ForkEvent', 'ReleaseEvent', 'PullRequestReviewEvent', 'CreateEvent', 'GollumEvent', 'PullRequestEvent', 'IssuesEvent', 'PullRequestReviewCommentEvent') @@ -74,7 +90,8 @@ WITH repos AS ( {% when 'past_12_months' %} AND ge.created_at > (NOW() - INTERVAL 12 MONTH) {% endcase %} GROUP BY organization_name -), participants_total AS ( +), +participants_total AS ( SELECT SUM(participants) AS participants_total FROM participants_per_org ) SELECT @@ -84,5 +101,9 @@ SELECT FROM participants_per_org ppo, participants_total pt +{% if excludeSeenBefore %} +-- Exclude organizations that have been seen before. +WHERE ppo.organization_name NOT IN (SELECT org_name FROM organizations_seen_before) +{% endif %} ORDER BY ppo.participants DESC LIMIT {{ n }} diff --git a/configs/queries/orgs/stars/locations/params.json b/configs/queries/orgs/stars/locations/params.json index bc510c8865f..68e1c217718 100644 --- a/configs/queries/orgs/stars/locations/params.json +++ b/configs/queries/orgs/stars/locations/params.json @@ -36,6 +36,11 @@ "name": "excludeUnknown", "type": "boolean", "default": true + }, + { + "name": "excludeSeenBefore", + "type": "boolean", + "default": false } ] } diff --git a/configs/queries/orgs/stars/locations/template.sql b/configs/queries/orgs/stars/locations/template.sql index a2b58966f61..b6a6d527af0 100644 --- a/configs/queries/orgs/stars/locations/template.sql +++ b/configs/queries/orgs/stars/locations/template.sql @@ -7,7 +7,25 @@ WITH repos AS ( {% if repoIds.size > 0 %} AND gr.repo_id IN ({{ repoIds | join: ',' }}) {% endif %} -), stars_per_country AS ( +), +{% if excludeSeenBefore %} +countries_seen_before AS ( + SELECT + country_code + FROM + mv_repo_countries_stargazer_role b + WHERE + b.repo_id IN (SELECT repo_id FROM repos) + {% case period %} + {% when 'past_7_days' %} AND b.first_seen_at < (NOW() - INTERVAL 7 DAY) + {% when 'past_28_days' %} AND b.first_seen_at < (NOW() - INTERVAL 28 DAY) + {% when 'past_90_days' %} AND b.first_seen_at < (NOW() - INTERVAL 90 DAY) + {% when 'past_12_months' %} AND b.first_seen_at < (NOW() - INTERVAL 12 MONTH) + {% endcase %} + GROUP BY country_code +), +{% endif %} +stars_per_country AS ( SELECT IF(gu.country_code IN ('', 'N/A', 'UND'), 'UND', gu.country_code) AS country_code, COUNT(*) AS stars @@ -45,5 +63,9 @@ SELECT FROM stars_per_country spc, stars_total st +{% if excludeSeenBefore %} +-- Exclude countries that have been seen before. +WHERE NOT EXISTS (SELECT 1 FROM countries_seen_before WHERE country_code = spc.country_code) +{% endif %} ORDER BY spc.stars DESC LIMIT {{ n }} diff --git a/configs/queries/orgs/stars/organizations/params.json b/configs/queries/orgs/stars/organizations/params.json index d10efc4943a..6d41059aacc 100644 --- a/configs/queries/orgs/stars/organizations/params.json +++ b/configs/queries/orgs/stars/organizations/params.json @@ -36,6 +36,11 @@ "name": "excludeUnknown", "type": "boolean", "default": true + }, + { + "name": "excludeSeenBefore", + "type": "boolean", + "default": false } ] } diff --git a/configs/queries/orgs/stars/organizations/template.sql b/configs/queries/orgs/stars/organizations/template.sql index 556ffb2faf0..b083e63e998 100644 --- a/configs/queries/orgs/stars/organizations/template.sql +++ b/configs/queries/orgs/stars/organizations/template.sql @@ -7,7 +7,25 @@ WITH repos AS ( {% if repoIds.size > 0 %} AND gr.repo_id IN ({{ repoIds | join: ',' }}) {% endif %} -), stars_per_org AS ( +), +{% if excludeSeenBefore %} +organizations_seen_before AS ( + SELECT + org_name + FROM + mv_repo_organizations_stargazer_role b + WHERE + b.repo_id IN (SELECT repo_id FROM repos) + {% case period %} + {% when 'past_7_days' %} AND b.first_seen_at < (NOW() - INTERVAL 7 DAY) + {% when 'past_28_days' %} AND b.first_seen_at < (NOW() - INTERVAL 28 DAY) + {% when 'past_90_days' %} AND b.first_seen_at < (NOW() - INTERVAL 90 DAY) + {% when 'past_12_months' %} AND b.first_seen_at < (NOW() - INTERVAL 12 MONTH) + {% endcase %} + GROUP BY org_name +), +{% endif %} +stars_per_org AS ( SELECT IF( gu.organization_formatted IS NOT NULL AND LENGTH(gu.organization_formatted) != 0, @@ -47,5 +65,9 @@ SELECT FROM stars_per_org spo, stars_total st +{% if excludeSeenBefore %} +-- Exclude organizations that have been seen before. +WHERE NOT EXISTS (SELECT 1 FROM organizations_seen_before WHERE org_name = spo.organization_name) +{% endif %} ORDER BY spo.stars DESC LIMIT {{ n }} \ No newline at end of file