Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 186 additions & 0 deletions src/sempy_labs/report/_reportwrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2269,6 +2269,192 @@ def migrate_report_level_measures(self, measures: Optional[str | List[str]] = No
f"{icons.green_dot} The report-level measures have been migrated to the '{dataset_name}' semantic model within the '{dataset_workspace_name}' workspace."
)

def remove_unused_report_level_measures(self) -> pd.DataFrame:
"""
Removes unused report-level measures from the report.

A measure is considered unused if:
1. It is not directly referenced in any visuals, filters, or pages
2. No other report-level measures depend on it

This method will recursively remove measures until no more unused measures are found.

Returns
-------
pandas.DataFrame
A pandas dataframe showing the measures that were removed.
"""
self._ensure_pbir()

# Get report-level measures
rlm = self.list_report_level_measures()
if rlm.empty:
if not self._readonly:
print(
f"{icons.info} The '{self._report_name}' report within the '{self._workspace_name}' workspace has no report-level measures."
)
return pd.DataFrame()

all_removed_measures = []
iteration = 0
max_iterations = 10 # Prevent infinite loops

while iteration < max_iterations:
iteration += 1

# Get current report-level measures
current_rlm = self.list_report_level_measures()
if current_rlm.empty:
break

# Build a map of all report-level measures with their entities
rlm_map = {}
for _, r in current_rlm.iterrows():
rlm_map[r["Measure Name"]] = r["Table Name"]

# Check which measures are used
used_measures = set()

# Helper function to recursively check if a measure is referenced in JSON data
def is_measure_referenced_in_json(json_data, measure_name: str, entity_name: str):
"""Recursively search JSON for measure references"""
if isinstance(json_data, dict):
# Check for direct Measure.Property reference with matching Entity
if "Measure" in json_data:
measure_obj = json_data["Measure"]
if isinstance(measure_obj, dict):
property_name = measure_obj.get("Property")
measure_entity = (
measure_obj.get("Expression", {})
.get("SourceRef", {})
.get("Entity")
)
if property_name == measure_name and measure_entity == entity_name:
return True

# Check for DAX expression containing the measure
if "Expression" in json_data and isinstance(json_data["Expression"], str):
expr = json_data["Expression"]
# Match [MeasureName] pattern
pattern = r"\[" + re.escape(measure_name) + r"\]"
if re.search(pattern, expr):
return True

# Recurse into dictionary values
for value in json_data.values():
if is_measure_referenced_in_json(value, measure_name, entity_name):
return True

elif isinstance(json_data, list):
# Recurse into list items
for item in json_data:
if is_measure_referenced_in_json(item, measure_name, entity_name):
return True

return False

# Check all report definition files for measure references (excluding reportExtensions.json)
for _, r in current_rlm.iterrows():
measure_name = r["Measure Name"]
entity_name = r["Table Name"]

# Check in all report files except reportExtensions.json
for part in self._report_definition.get("parts"):
path = part.get("path")
if path == self._report_extensions_path:
continue

payload = part.get("payload")
if is_measure_referenced_in_json(payload, measure_name, entity_name):
used_measures.add(measure_name)
break # No need to check other files for this measure

# Also check for measures referenced in DAX expressions within reportExtensions.json
# (measures that depend on other measures)
extensions_data = self.get(file_path=self._report_extensions_path)
for entity in extensions_data.get("entities", []):
for measure in entity.get("measures", []):
expr = measure.get("expression", "")
if isinstance(expr, str):
# Find [MeasureName] references in DAX
measure_refs = re.findall(r"\[([^\]]+)\]", expr)
for ref in measure_refs:
if ref in rlm_map:
used_measures.add(ref)

# Identify unused measures in this iteration
unused_in_iteration = []
for _, r in current_rlm.iterrows():
measure_name = r["Measure Name"]
if measure_name not in used_measures:
unused_in_iteration.append(measure_name)

# If no unused measures found, we're done
if not unused_in_iteration:
break

# Remove unused measures from reportExtensions.json
entities = self.get(
file_path=self._report_extensions_path, json_path="$.entities"
)

removed_in_iteration = []
for entity in entities:
table_name = entity.get("name")
original_measures = entity.get("measures", [])

# Track which measures we're removing from this entity
for m in original_measures:
if m.get("name") in unused_in_iteration:
removed_in_iteration.append(
{
"Table Name": table_name,
"Measure Name": m.get("name"),
"Expression": m.get("expression"),
"Data Type": m.get("dataType"),
"Format String": m.get("formatString"),
}
)

# Filter out unused measures
entity["measures"] = [
m
for m in original_measures
if m.get("name") not in unused_in_iteration
]

# Remove entities that now have no measures
entities = [e for e in entities if len(e.get("measures", [])) > 0]

# Update the report definition
if entities:
self.set_json(
file_path=self._report_extensions_path,
json_path="$.entities",
json_value=entities,
)
else:
# Remove reportExtensions.json if no measures remain
self.remove(file_path=self._report_extensions_path, verbose=False)

# Track all removed measures
all_removed_measures.extend(removed_in_iteration)

# Create result dataframe
if all_removed_measures:
df_removed = pd.DataFrame(all_removed_measures).drop_duplicates()
if not self._readonly:
print(
f"{icons.green_dot} Removed {len(df_removed)} unused report-level measure(s) from the '{self._report_name}' report."
)
return df_removed
else:
if not self._readonly:
print(
f"{icons.info} No unused report-level measures found in the '{self._report_name}' report within the '{self._workspace_name}' workspace."
)
return pd.DataFrame()

# In progress...
def _list_annotations(self) -> pd.DataFrame:
"""
Expand Down