microsoft · mwc360 · Sep 4, 2025 · Sep 4, 2025 · Sep 4, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "lakebench"
-version = "0.9.0"
+version = "0.9.1"
 authors = [
     { name="Miles Cole" },
 ]

diff --git a/src/lakebench/engines/fabric_spark.py b/src/lakebench/engines/fabric_spark.py
@@ -1,6 +1,8 @@
 from .spark import Spark
 from typing import Optional
 from decimal import Decimal
+import re
+from urllib.parse import urlparse, parse_qs
 
 try:
     from IPython.core.getipython import get_ipython
@@ -39,8 +41,19 @@ def __init__(
         self.cost_per_vcore_hour = cost_per_vcore_hour or getattr(self, '_FABRIC_USD_COST_PER_VCORE_HOUR', None)
         self.cost_per_hour = self.get_total_cores() * self.cost_per_vcore_hour
 
+        url = self.spark.sparkContext.uiWebUrl
+        # Parse webUrl string
+        parsed = urlparse(url)
+        query = parse_qs(parsed.query)
+        artifact_id = query.get("artifactId", [None])[0]
+        # Regex for GUIDs
+        guid_pattern = re.compile(r"[0-9a-fA-F-]{36}")
+        guids = guid_pattern.findall(url)
+        tenant_id = guids[0]     # after /sparkui/
+        activity_id = guids[2]  # after /activities/
+
         self.extended_engine_metadata.update({
-            'spark_history_url': f"https://{self.spark_configs['spark.trident.pbienv'].lower()}.powerbi.com/workloads/de-ds/sparkmonitor/{self.spark_configs['spark.hadoop.trident.artifact.id']}/{self.spark_configs['spark.hadoop.trident.activity.id']}?ctid={self.spark_configs['spark.hadoop.trident.tenant.id']}",
+            'spark_history_url': f"https://{self.spark_configs['spark.trident.pbienv'].lower()}.powerbi.com/workloads/de-ds/sparkmonitor/{artifact_id}/{activity_id}?ctid={tenant_id}",
             'cost_per_hour': Decimal(self.cost_per_hour).quantize(Decimal('0.0000')),
             'capacity_id': self.capacity_id
         })
@@ -60,6 +73,8 @@ def __init__(
             'spark.microsoft.delta.stats.injection.enabled',
             'spark.microsoft.delta.snapshot.driverMode.enabled',
             'spark.microsoft.delta.stats.collect.extended.property.setAtTableCreation',
+            'spark.microsoft.delta.targetFileSize.adaptive.enabled',
+            'spark.sql.parquet.compression.codec',
             'spark.app.id',
             'spark.cluster.name'
         ]}
@@ -72,4 +87,4 @@ def __init__(
             # Enable auto stats collection
             self.spark.conf.set("spark.microsoft.delta.stats.collect.extended", "true")
             self.spark.conf.set("spark.microsoft.delta.stats.injection.enabled", "true")
-            self.spark.conf.set("spark.microsoft.delta.stats.collect.extended.property.setAtTableCreation", "true")
+            self.spark.conf.set("spark.microsoft.delta.stats.collect.extended.property.setAtTableCreation", "true")