getsentry · JoshFerge · Jun 13, 2022 · May 9, 2022 · May 10, 2022 · May 10, 2022
@@ -31,6 +31,7 @@ class StorageSetKey(Enum):
     ERRORS_V2 = "errors_v2"
     ERRORS_V2_RO = "errors_v2_ro"
     PROFILES = "profiles"
+    REPLAYS = "replays"
 
 
 # Storage sets enabled only when development features are enabled.

@@ -17,6 +17,7 @@ class MigrationGroup(Enum):
     SESSIONS = "sessions"
     QUERYLOG = "querylog"
     PROFILES = "profiles"
+    REPLAYS = "replays"
 
 
 # Migration groups are mandatory by default, unless they are on this list
@@ -25,6 +26,7 @@ class MigrationGroup(Enum):
     MigrationGroup.SESSIONS,
     MigrationGroup.QUERYLOG,
     MigrationGroup.PROFILES,
+    MigrationGroup.REPLAYS,
 }
 
 
@@ -156,6 +158,16 @@ def get_migrations(self) -> Sequence[str]:
         ]
 
 
+class ReplaysLoader(DirectoryLoader):
+    def __init__(self) -> None:
+        super().__init__("snuba.migrations.snuba_migrations.replays")
+
+    def get_migrations(self) -> Sequence[str]:
+        return [
+            "0001_replays",
+        ]
+
+
 class MetricsLoader(DirectoryLoader):
     def __init__(self) -> None:
         super().__init__("snuba.migrations.snuba_migrations.metrics")
@@ -233,6 +245,7 @@ def get_migrations(self) -> Sequence[str]:
     MigrationGroup.SESSIONS: SessionsLoader(),
     MigrationGroup.QUERYLOG: QuerylogLoader(),
     MigrationGroup.PROFILES: ProfilesLoader(),
+    MigrationGroup.REPLAYS: ReplaysLoader(),
 }
 
 

@@ -0,0 +1,96 @@
+from typing import Sequence
+
+from snuba.clickhouse.columns import (
+    UUID,
+    Array,
+    Column,
+    DateTime,
+    IPv4,
+    IPv6,
+    Nested,
+    String,
+    UInt,
+)
+from snuba.clusters.storage_sets import StorageSetKey
+from snuba.migrations import migration, operations, table_engines
+from snuba.migrations.columns import MigrationModifiers as Modifiers
+
+raw_columns: Sequence[Column[Modifiers]] = [
+    Column("replay_id", UUID()),
+    Column("sequence_id", UInt(16)),
+    Column("trace_ids", Array(UUID())),
+    Column("title", String()),
+    ### columns used by other sentry events
+    Column("project_id", UInt(64)),
+    # time columns
+    Column("timestamp", DateTime()),
+    # release/environment info
+    Column("platform", String(Modifiers(low_cardinality=True))),
+    Column("environment", String(Modifiers(nullable=True, low_cardinality=True))),
+    Column("release", String(Modifiers(nullable=True))),
+    Column("dist", String(Modifiers(nullable=True))),
+    Column("ip_address_v4", IPv4(Modifiers(nullable=True))),
+    Column("ip_address_v6", IPv6(Modifiers(nullable=True))),
+    # user columns
+    Column("user", String()),
+    Column("user_hash", UInt(64)),
+    Column("user_id", String(Modifiers(nullable=True))),
+    Column("user_name", String(Modifiers(nullable=True))),
+    Column("user_email", String(Modifiers(nullable=True))),
+    # sdk info
+    Column("sdk_name", String()),
+    Column("sdk_version", String()),
+    Column("tags", Nested([("key", String()), ("value", String())])),
+    # internal data
+    Column("retention_days", UInt(16)),
+    Column("partition", UInt(16)),
+    Column("offset", UInt(64)),
+]
+
+
+class Migration(migration.ClickhouseNodeMigration):
+    blocking = False
+
+    def forwards_local(self) -> Sequence[operations.SqlOperation]:
+        return [
+            operations.CreateTable(
+                storage_set=StorageSetKey.REPLAYS,
+                table_name="replays_local",
+                columns=raw_columns,
+                engine=table_engines.ReplacingMergeTree(
+                    storage_set=StorageSetKey.REPLAYS,
+                    order_by="(project_id, toStartOfDay(timestamp), cityHash64(replay_id), sequence_id)",
+                    partition_by="(retention_days, toMonday(timestamp))",
+                    settings={"index_granularity": "8192"},
+                    ttl="timestamp + toIntervalDay(retention_days)",
+                ),
+            ),
+        ]
+
+    def backwards_local(self) -> Sequence[operations.SqlOperation]:
+        return [
+            operations.DropTable(
+                storage_set=StorageSetKey.REPLAYS,
+                table_name="replays_local",
+            ),
+        ]
+
+    def forwards_dist(self) -> Sequence[operations.SqlOperation]:
+        return [
+            operations.CreateTable(
+                storage_set=StorageSetKey.REPLAYS,
+                table_name="replays_dist",
+                columns=raw_columns,
+                engine=table_engines.Distributed(
+                    local_table_name="replays_local",
+                    sharding_key="project_id",
+                ),
+            ),
+        ]
+
+    def backwards_dist(self) -> Sequence[operations.SqlOperation]:
+        return [
+            operations.DropTable(
+                storage_set=StorageSetKey.REPLAYS, table_name="replays_dist"
+            ),
+        ]
@@ -51,6 +51,7 @@
             "errors_v2",
             "errors_v2_ro",
             "profiles",
+            "replays",
         },
         "single_node": True,
     },
@@ -167,7 +168,7 @@
 COLUMN_SPLIT_MAX_RESULTS = 5000
 
 # Migrations in skipped groups will not be run
-SKIPPED_MIGRATION_GROUPS: Set[str] = {"querylog", "profiles"}
+SKIPPED_MIGRATION_GROUPS: Set[str] = {"querylog", "profiles", "replays"}
 
 MAX_RESOLUTION_FOR_JITTER = 60
 

@@ -24,6 +24,7 @@
             "errors_v2",
             "errors_v2_ro",
             "profiles",
+            "replays",
         },
         "single_node": False,
         "cluster_name": "cluster_one_sh",