Skip to content

Commit

Permalink
pageserver: use layer visibility when composing heatmap (neondatabase…
Browse files Browse the repository at this point in the history
…#8616)

## Problem

Sometimes, a layer is Covered by hasn't yet been evicted from local disk
(e.g. shortly after image layer generation). It is not good use of
resources to download these to a secondary location, as there's a good
chance they will never be read.

This follows the previous change that added layer visibility:
- neondatabase#8511 

Part of epic:
- neondatabase#8398

## Summary of changes

- When generating heatmaps, only include Visible layers
- Update test_secondary_downloads to filter to visible layers when
listing layers from an attached location
  • Loading branch information
jcsp authored Aug 6, 2024
1 parent 42229aa commit 3727c6f
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 20 deletions.
26 changes: 17 additions & 9 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ use self::layer_manager::LayerManager;
use self::logical_size::LogicalSize;
use self::walreceiver::{WalReceiver, WalReceiverConf};

use super::{config::TenantConf, upload_queue::NotInitialized};
use super::{config::TenantConf, storage_layer::LayerVisibilityHint, upload_queue::NotInitialized};
use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf};
use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe};
use super::{
Expand Down Expand Up @@ -2919,14 +2919,22 @@ impl Timeline {

let guard = self.layers.read().await;

let resident = guard.likely_resident_layers().map(|layer| {
let last_activity_ts = layer.latest_activity();

HeatMapLayer::new(
layer.layer_desc().layer_name(),
layer.metadata(),
last_activity_ts,
)
let resident = guard.likely_resident_layers().filter_map(|layer| {
match layer.visibility() {
LayerVisibilityHint::Visible => {
// Layer is visible to one or more read LSNs: elegible for inclusion in layer map
let last_activity_ts = layer.latest_activity();
Some(HeatMapLayer::new(
layer.layer_desc().layer_name(),
layer.metadata(),
last_activity_ts,
))
}
LayerVisibilityHint::Covered => {
// Layer is resident but unlikely to be read: not elegible for inclusion in heatmap.
None
}
}
});

let layers = resident.collect();
Expand Down
2 changes: 2 additions & 0 deletions test_runner/fixtures/pageserver/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class HistoricLayerInfo:
remote: bool
# None for image layers, true if pageserver thinks this is an L0 delta layer
l0: Optional[bool]
visible: bool

@classmethod
def from_json(cls, d: Dict[str, Any]) -> HistoricLayerInfo:
Expand All @@ -79,6 +80,7 @@ def from_json(cls, d: Dict[str, Any]) -> HistoricLayerInfo:
lsn_end=d.get("lsn_end"),
remote=d["remote"],
l0=l0_ness,
visible=d["access_stats"]["visible"],
)


Expand Down
52 changes: 41 additions & 11 deletions test_runner/regress/test_pageserver_secondary.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import os
import random
import time
from typing import Any, Dict, Optional
from pathlib import Path
from typing import Any, Dict, Optional, Union

import pytest
from fixtures.common_types import TenantId, TimelineId
from fixtures.common_types import TenantId, TenantShardId, TimelineId
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver
from fixtures.pageserver.common_types import parse_layer_file_name
Expand Down Expand Up @@ -437,6 +438,35 @@ def validate_heatmap(heatmap):
validate_heatmap(heatmap_second)


def list_elegible_layers(
pageserver, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId
) -> list[Path]:
"""
The subset of layer filenames that are elegible for secondary download: at time of writing this
is all resident layers which are also visible.
"""
candidates = pageserver.list_layers(tenant_id, timeline_id)

layer_map = pageserver.http_client().layer_map_info(tenant_id, timeline_id)

# Map of layer filenames to their visibility the "layer name" is not the same as the filename: add suffix to resolve one to the other
visible_map = dict(
(f"{layer.layer_file_name}-v1-00000001", layer.visible)
for layer in layer_map.historic_layers
)

def is_visible(layer_file_name):
try:
return visible_map[str(layer_file_name)]
except KeyError:
# Unexpected: tests should call this when pageservers are in a quiet state such that the layer map
# matches what's on disk.
log.warn(f"Lookup {layer_file_name} from {list(visible_map.keys())}")
raise

return list(c for c in candidates if is_visible(c))


def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
"""
Test the overall data flow in secondary mode:
Expand Down Expand Up @@ -491,7 +521,7 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):

ps_secondary.http_client().tenant_secondary_download(tenant_id)

assert ps_attached.list_layers(tenant_id, timeline_id) == ps_secondary.list_layers(
assert list_elegible_layers(ps_attached, tenant_id, timeline_id) == ps_secondary.list_layers(
tenant_id, timeline_id
)

Expand All @@ -509,9 +539,9 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
ps_secondary.http_client().tenant_secondary_download(tenant_id)

try:
assert ps_attached.list_layers(tenant_id, timeline_id) == ps_secondary.list_layers(
tenant_id, timeline_id
)
assert list_elegible_layers(
ps_attached, tenant_id, timeline_id
) == ps_secondary.list_layers(tenant_id, timeline_id)
except:
# Do a full listing of the secondary location on errors, to help debug of
# https://github.com/neondatabase/neon/issues/6966
Expand All @@ -532,8 +562,8 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
# ==================================================================
try:
log.info("Evicting a layer...")
layer_to_evict = ps_attached.list_layers(tenant_id, timeline_id)[0]
some_other_layer = ps_attached.list_layers(tenant_id, timeline_id)[1]
layer_to_evict = list_elegible_layers(ps_attached, tenant_id, timeline_id)[0]
some_other_layer = list_elegible_layers(ps_attached, tenant_id, timeline_id)[1]
log.info(f"Victim layer: {layer_to_evict.name}")
ps_attached.http_client().evict_layer(
tenant_id, timeline_id, layer_name=layer_to_evict.name
Expand All @@ -551,9 +581,9 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
ps_secondary.http_client().tenant_secondary_download(tenant_id)

assert layer_to_evict not in ps_attached.list_layers(tenant_id, timeline_id)
assert ps_attached.list_layers(tenant_id, timeline_id) == ps_secondary.list_layers(
tenant_id, timeline_id
)
assert list_elegible_layers(
ps_attached, tenant_id, timeline_id
) == ps_secondary.list_layers(tenant_id, timeline_id)
except:
# On assertion failures, log some details to help with debugging
heatmap = env.pageserver_remote_storage.heatmap_content(tenant_id)
Expand Down

0 comments on commit 3727c6f

Please sign in to comment.