diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index d55c06b685f6..4cab56771b8e 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -718,6 +718,7 @@ pub struct TimelineInfo { pub pg_version: u32, pub state: TimelineState, + pub is_archived: bool, pub walreceiver_status: String, diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 4635e76ea914..cbcc162b325f 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -318,6 +318,24 @@ impl From for ApiError { } } +impl From for ApiError { + fn from(value: crate::tenant::TimelineArchivalError) -> Self { + use crate::tenant::TimelineArchivalError::*; + match value { + NotFound => ApiError::NotFound(anyhow::anyhow!("timeline not found").into()), + Timeout => ApiError::Timeout("hit pageserver internal timeout".into()), + HasUnarchivedChildren(children) => ApiError::PreconditionFailed( + format!( + "Cannot archive timeline which has non-archived child timelines: {children:?}" + ) + .into_boxed_str(), + ), + a @ AlreadyInProgress => ApiError::Conflict(a.to_string()), + Other(e) => ApiError::InternalServerError(e), + } + } +} + impl From for ApiError { fn from(value: crate::tenant::mgr::DeleteTimelineError) -> Self { use crate::tenant::mgr::DeleteTimelineError::*; @@ -405,6 +423,8 @@ async fn build_timeline_info_common( let current_logical_size = timeline.get_current_logical_size(logical_size_task_priority, ctx); let current_physical_size = Some(timeline.layer_size_sum().await); let state = timeline.current_state(); + // Report is_archived = false if the timeline is still loading + let is_archived = timeline.is_archived().unwrap_or(false); let remote_consistent_lsn_projected = timeline .get_remote_consistent_lsn_projected() .unwrap_or(Lsn(0)); @@ -445,6 +465,7 @@ async fn build_timeline_info_common( pg_version: timeline.pg_version, state, + is_archived, walreceiver_status, @@ -686,9 +707,7 @@ async fn timeline_archival_config_handler( tenant .apply_timeline_archival_config(timeline_id, request_data.state) - .await - .context("applying archival config") - .map_err(ApiError::InternalServerError)?; + .await?; Ok::<_, ApiError>(()) } .instrument(info_span!("timeline_archival_config", diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 3a7afff21144..d3589a12c8c6 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -501,6 +501,38 @@ impl Debug for DeleteTimelineError { } } +#[derive(thiserror::Error)] +pub enum TimelineArchivalError { + #[error("NotFound")] + NotFound, + + #[error("Timeout")] + Timeout, + + #[error("HasUnarchivedChildren")] + HasUnarchivedChildren(Vec), + + #[error("Timeline archival is already in progress")] + AlreadyInProgress, + + #[error(transparent)] + Other(#[from] anyhow::Error), +} + +impl Debug for TimelineArchivalError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NotFound => write!(f, "NotFound"), + Self::Timeout => write!(f, "Timeout"), + Self::HasUnarchivedChildren(c) => { + f.debug_tuple("HasUnarchivedChildren").field(c).finish() + } + Self::AlreadyInProgress => f.debug_tuple("AlreadyInProgress").finish(), + Self::Other(e) => f.debug_tuple("Other").field(e).finish(), + } + } +} + pub enum SetStoppingError { AlreadyStopping(completion::Barrier), Broken, @@ -1326,24 +1358,50 @@ impl Tenant { &self, timeline_id: TimelineId, state: TimelineArchivalState, - ) -> anyhow::Result<()> { - let timeline = self - .get_timeline(timeline_id, false) - .context("Cannot apply timeline archival config to inexistent timeline")?; + ) -> Result<(), TimelineArchivalError> { + info!("setting timeline archival config"); + let timeline = { + let timelines = self.timelines.lock().unwrap(); + + let timeline = match timelines.get(&timeline_id) { + Some(t) => t, + None => return Err(TimelineArchivalError::NotFound), + }; + + // Ensure that there are no non-archived child timelines + let children: Vec = timelines + .iter() + .filter_map(|(id, entry)| { + if entry.get_ancestor_timeline_id() != Some(timeline_id) { + return None; + } + if entry.is_archived() == Some(true) { + return None; + } + Some(*id) + }) + .collect(); + + if !children.is_empty() && state == TimelineArchivalState::Archived { + return Err(TimelineArchivalError::HasUnarchivedChildren(children)); + } + Arc::clone(timeline) + }; let upload_needed = timeline .remote_client .schedule_index_upload_for_timeline_archival_state(state)?; if upload_needed { + info!("Uploading new state"); const MAX_WAIT: Duration = Duration::from_secs(10); let Ok(v) = tokio::time::timeout(MAX_WAIT, timeline.remote_client.wait_completion()).await else { tracing::warn!("reached timeout for waiting on upload queue"); - bail!("reached timeout for upload queue flush"); + return Err(TimelineArchivalError::Timeout); }; - v?; + v.map_err(|e| TimelineArchivalError::Other(anyhow::anyhow!(e)))?; } Ok(()) } diff --git a/test_runner/fixtures/common_types.py b/test_runner/fixtures/common_types.py index 7cadcbb4c256..8eda19d1e2f0 100644 --- a/test_runner/fixtures/common_types.py +++ b/test_runner/fixtures/common_types.py @@ -1,5 +1,6 @@ import random from dataclasses import dataclass +from enum import Enum from functools import total_ordering from typing import Any, Dict, Type, TypeVar, Union @@ -213,3 +214,9 @@ def __eq__(self, other) -> bool: def __hash__(self) -> int: return hash(self._tuple()) + + +# TODO: Replace with `StrEnum` when we upgrade to python 3.11 +class TimelineArchivalState(str, Enum): + ARCHIVED = "Archived" + UNARCHIVED = "Unarchived" diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index cd4261f1b84b..582f9c026439 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -10,7 +10,7 @@ from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry -from fixtures.common_types import Lsn, TenantId, TenantShardId, TimelineId +from fixtures.common_types import Lsn, TenantId, TenantShardId, TimelineArchivalState, TimelineId from fixtures.log_helper import log from fixtures.metrics import Metrics, MetricsGetter, parse_metrics from fixtures.pg_version import PgVersion @@ -621,6 +621,22 @@ def timeline_preserve_initdb_archive( ) self.verbose_error(res) + def timeline_archival_config( + self, + tenant_id: Union[TenantId, TenantShardId], + timeline_id: TimelineId, + state: TimelineArchivalState, + ): + config = {"state": state.value} + log.info( + f"requesting timeline archival config {config} for tenant {tenant_id} and timeline {timeline_id}" + ) + res = self.post( + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/archival_config", + json=config, + ) + self.verbose_error(res) + def timeline_get_lsn_by_timestamp( self, tenant_id: Union[TenantId, TenantShardId], diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py new file mode 100644 index 000000000000..b774c7c9fe6a --- /dev/null +++ b/test_runner/regress/test_timeline_archive.py @@ -0,0 +1,96 @@ +import pytest +from fixtures.common_types import TenantId, TimelineArchivalState, TimelineId +from fixtures.neon_fixtures import ( + NeonEnv, +) +from fixtures.pageserver.http import PageserverApiException + + +def test_timeline_archive(neon_simple_env: NeonEnv): + env = neon_simple_env + + env.pageserver.allowed_errors.extend( + [ + ".*Timeline .* was not found.*", + ".*timeline not found.*", + ".*Cannot archive timeline which has unarchived child timelines.*", + ".*Precondition failed: Requested tenant is missing.*", + ] + ) + + ps_http = env.pageserver.http_client() + + # first try to archive non existing timeline + # for existing tenant: + invalid_timeline_id = TimelineId.generate() + with pytest.raises(PageserverApiException, match="timeline not found") as exc: + ps_http.timeline_archival_config( + tenant_id=env.initial_tenant, + timeline_id=invalid_timeline_id, + state=TimelineArchivalState.ARCHIVED, + ) + + assert exc.value.status_code == 404 + + # for non existing tenant: + invalid_tenant_id = TenantId.generate() + with pytest.raises( + PageserverApiException, + match=f"NotFound: tenant {invalid_tenant_id}", + ) as exc: + ps_http.timeline_archival_config( + tenant_id=invalid_tenant_id, + timeline_id=invalid_timeline_id, + state=TimelineArchivalState.ARCHIVED, + ) + + assert exc.value.status_code == 404 + + # construct pair of branches to validate that pageserver prohibits + # archival of ancestor timelines when they have non-archived child branches + parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_archive_parent", "empty") + + leaf_timeline_id = env.neon_cli.create_branch( + "test_ancestor_branch_archive_branch1", "test_ancestor_branch_archive_parent" + ) + + timeline_path = env.pageserver.timeline_dir(env.initial_tenant, parent_timeline_id) + + with pytest.raises( + PageserverApiException, + match="Cannot archive timeline which has non-archived child timelines", + ) as exc: + assert timeline_path.exists() + + ps_http.timeline_archival_config( + tenant_id=env.initial_tenant, + timeline_id=parent_timeline_id, + state=TimelineArchivalState.ARCHIVED, + ) + + assert exc.value.status_code == 412 + + # Test timeline_detail + leaf_detail = ps_http.timeline_detail( + tenant_id=env.initial_tenant, + timeline_id=leaf_timeline_id, + ) + assert leaf_detail["is_archived"] is False + + # Test that archiving the leaf timeline and then the parent works + ps_http.timeline_archival_config( + tenant_id=env.initial_tenant, + timeline_id=leaf_timeline_id, + state=TimelineArchivalState.ARCHIVED, + ) + leaf_detail = ps_http.timeline_detail( + tenant_id=env.initial_tenant, + timeline_id=leaf_timeline_id, + ) + assert leaf_detail["is_archived"] is True + + ps_http.timeline_archival_config( + tenant_id=env.initial_tenant, + timeline_id=parent_timeline_id, + state=TimelineArchivalState.ARCHIVED, + )