Skip to content

Commit

Permalink
[forge] rust bindings for indexer/testnet deployer
Browse files Browse the repository at this point in the history
  • Loading branch information
rustielin committed Sep 11, 2024
1 parent 4085279 commit cd07380
Show file tree
Hide file tree
Showing 19 changed files with 743 additions and 305 deletions.
21 changes: 12 additions & 9 deletions .github/workflows/adhoc-forge.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@ on:
required: false
type: string
description: The Forge k8s cluster to be used for test
FORGE_ENABLE_HAPROXY:
required: false
default: false
type: boolean
description: enable haproxy for the forge test
FORGE_NUM_VALIDATORS:
required: false
type: string
Expand All @@ -48,7 +43,12 @@ on:
required: false
type: boolean
description: Retain debug logs for all nodes

# NOTE: this is a special input that is used to pass extra environment variables to the forge test
# This is required since workflow_dispatch has a limit of 10 inputs. workflow_call has a higher limit so we can parse out the actual envs from this input
FORGE_EXTRA_ENVS:
required: false
type: string
description: Extra environment variables to pass to the forge test. Format is "key1=value1,key2=value2". Use this for FORGE_ENABLE_HAPROXY or FORGE_ENABLE_INDEXER for example
permissions:
contents: read
id-token: write #required for GCP Workload Identity federation which we use to login into Google Artifact Registry
Expand All @@ -60,25 +60,27 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: collect metadata
shell: bash
run: |
echo "GIT_SHA: ${{ inputs.GIT_SHA }}"
echo "IMAGE_TAG: ${{ inputs.IMAGE_TAG }}"
echo "FORGE_IMAGE_TAG: ${{ inputs.FORGE_IMAGE_TAG }}"
echo "FORGE_RUNNER_DURATION_SECS: ${{ inputs.FORGE_RUNNER_DURATION_SECS }}"
echo "FORGE_TEST_SUITE: ${{ inputs.FORGE_TEST_SUITE }}"
echo "FORGE_CLUSTER_NAME: ${{ inputs.FORGE_CLUSTER_NAME }}"
echo "FORGE_ENABLE_HAPROXY: ${{ inputs.FORGE_ENABLE_HAPROXY }}"
echo "FORGE_EXTRA_ENVS: ${{ inputs.FORGE_EXTRA_ENVS }}"
echo "FORGE_NUM_VALIDATORS: ${{ inputs.FORGE_NUM_VALIDATORS }}"
echo "FORGE_NUM_VALIDATOR_FULLNODES: ${{ inputs.FORGE_NUM_VALIDATOR_FULLNODES }}"
echo "FORGE_RETAIN_DEBUG_LOGS: ${{ inputs.FORGE_RETAIN_DEBUG_LOGS }}"
outputs:
gitSha: ${{ inputs.GIT_SHA }}
imageTag: ${{ inputs.IMAGE_TAG }}
forgeImageTag: ${{ inputs.FORGE_IMAGE_TAG }}
forgeRunnerDurationSecs: ${{ inputs.FORGE_RUNNER_DURATION_SECS || 600 }}
forgeTestSuite: ${{ inputs.FORGE_TEST_SUITE }}
forgeClusterName: ${{ inputs.FORGE_CLUSTER_NAME }}
forgeEnableHaproxy: ${{ inputs.FORGE_ENABLE_HAPROXY }}
forgeExtraEnvs: ${{ inputs.FORGE_EXTRA_ENVS }}
forgeNumValidators: ${{ inputs.FORGE_NUM_VALIDATORS }}
forgeNumValidatorFullnodes: ${{ inputs.FORGE_NUM_VALIDATOR_FULLNODES }}

Expand All @@ -93,7 +95,8 @@ jobs:
FORGE_TEST_SUITE: ${{ needs.determine-forge-run-metadata.outputs.forgeTestSuite }}
FORGE_RUNNER_DURATION_SECS: ${{ fromJSON(needs.determine-forge-run-metadata.outputs.forgeRunnerDurationSecs) }} # fromJSON converts to integer
FORGE_CLUSTER_NAME: ${{ needs.determine-forge-run-metadata.outputs.forgeClusterName }}
FORGE_ENABLE_HAPROXY: ${{ needs.determine-forge-run-metadata.outputs.forgeEnableHaproxy }}
FORGE_NUM_VALIDATORS: ${{ needs.determine-forge-run-metadata.outputs.forgeNumValidators }}
FORGE_NUM_VALIDATOR_FULLNODES: ${{ needs.determine-forge-run-metadata.outputs.forgeNumValidatorFullnodes }}
FORGE_RETAIN_DEBUG_LOGS: ${{ inputs.FORGE_RETAIN_DEBUG_LOGS }}
# These inputs are dynamically
FORGE_EXTRA_ENVS: ${{ needs.determine-forge-run-metadata.outputs.forgeExtraEnvs }}
16 changes: 16 additions & 0 deletions .github/workflows/workflow-run-forge.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ on:
required: false
type: string
description: Whether to use HAPRoxy
FORGE_ENABLE_INDEXER:
required: false
type: string
description: Whether to use indexer
FORGE_ENABLE_PERFORMANCE:
required: false
type: string
Expand Down Expand Up @@ -84,6 +88,10 @@ on:
required: false
type: string
description: Number of validator fullnodes to use for the forge test
FORGE_EXTRA_ENVS:
required: false
type: string
description: Extra environment variables to pass to the forge test. Format is "key1=value1,key2=value2". Use this for FORGE_ENABLE_HAPROXY or FORGE_ENABLE_INDEXER for example

env:
AWS_ACCOUNT_NUM: ${{ secrets.ENV_ECR_AWS_ACCOUNT_NUM }}
Expand All @@ -104,6 +112,7 @@ env:
FORGE_RUNNER_DURATION_SECS: ${{ inputs.FORGE_RUNNER_DURATION_SECS }}
FORGE_NAMESPACE: ${{ inputs.FORGE_NAMESPACE }}
FORGE_ENABLE_HAPROXY: ${{ inputs.FORGE_ENABLE_HAPROXY }}
FORGE_ENABLE_INDEXER: ${{ inputs.FORGE_ENABLE_INDEXER }}
FORGE_TEST_SUITE: ${{ inputs.FORGE_TEST_SUITE }}
POST_TO_SLACK: ${{ inputs.POST_TO_SLACK }}
FORGE_ENABLE_FAILPOINTS: ${{ inputs.FORGE_ENABLE_FAILPOINTS }}
Expand All @@ -122,6 +131,13 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: ${{ inputs.TIMEOUT_MINUTES }}
steps:
- name: Parse FORGE_EXTRA_ENVS into environment variables
if: ${{ !inputs.SKIP_JOB && inputs.FORGE_EXTRA_ENVS != null }}
run: |
IFS=',' read -ra FORGE_EXTRA_ENVS <<< "${{ inputs.FORGE_EXTRA_ENVS }}"
for i in "${FORGE_EXTRA_ENVS[@]}"; do
echo "export $i" >> $GITHUB_ENV
done
- uses: actions/checkout@v4
if: ${{ !inputs.SKIP_JOB }}
with:
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ target-out-docker
docker/compose/indexer-grpc/data-service-grpc-server.crt
docker/compose/indexer-grpc/data-service-grpc-server.key


# Doc generation output
*.md.old

Expand Down Expand Up @@ -135,3 +134,6 @@ test_indexer_grpc/*
*.dot
*.bytecode
!third_party/move/move-prover/tests/xsources/design/*.bytecode

# Allow forge envs
!forge.env
52 changes: 33 additions & 19 deletions testsuite/forge-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,8 @@ enum OperatorCommand {
SetNodeImageTag(SetNodeImageTag),
/// Clean up an existing cluster
CleanUp(CleanUp),
/// Resize an existing cluster
Resize(Resize),
/// Create a new cluster for testing purposes
Create(Create),
}

#[derive(Parser, Debug)]
Expand Down Expand Up @@ -193,6 +193,11 @@ struct K8sSwarm {
help = "Retain debug logs and above for all nodes instead of just the first 5 nodes"
)]
retain_debug_logs: bool,
#[clap(
long,
help = "If set, spins up an indexer stack alongside the testnet. Same as --enable-indexer"
)]
enable_indexer: bool,
}

#[derive(Parser, Debug)]
Expand All @@ -217,8 +222,8 @@ struct CleanUp {
}

#[derive(Parser, Debug)]
struct Resize {
#[clap(long, help = "The kubernetes namespace to resize")]
struct Create {
#[clap(long, help = "The kubernetes namespace to create in")]
namespace: String,
#[clap(long, default_value_t = 30)]
num_validators: usize,
Expand All @@ -227,13 +232,13 @@ struct Resize {
#[clap(
long,
help = "Override the image tag used for validators",
default_value = "devnet"
default_value = "main"
)]
validator_image_tag: String,
#[clap(
long,
help = "Override the image tag used for testnet-specific components",
default_value = "devnet"
default_value = "main"
)]
testnet_image_tag: String,
#[clap(
Expand All @@ -248,6 +253,8 @@ struct Resize {
connect_directly: bool,
#[clap(long, help = "If set, enables HAProxy for each of the validators")]
enable_haproxy: bool,
#[clap(long, help = "If set, spins up an indexer stack alongside the testnet")]
enable_indexer: bool,
}

// common metrics thresholds:
Expand Down Expand Up @@ -393,6 +400,7 @@ fn main() -> Result<()> {
k8s.reuse,
k8s.keep,
k8s.enable_haproxy,
k8s.enable_indexer,
)
.unwrap(),
&args.options,
Expand Down Expand Up @@ -421,19 +429,25 @@ fn main() -> Result<()> {
}
Ok(())
},
OperatorCommand::Resize(resize) => {
runtime.block_on(install_testnet_resources(
resize.namespace,
resize.num_validators,
resize.num_fullnodes,
resize.validator_image_tag,
resize.testnet_image_tag,
resize.move_modules_dir,
!resize.connect_directly,
resize.enable_haproxy,
None,
None,
))?;
OperatorCommand::Create(create) => {
let kube_client = runtime.block_on(create_k8s_client())?;
let era = generate_new_era();
let values = ForgeDeployerValues {
profile: DEFAULT_FORGE_DEPLOYER_PROFILE.to_string(),
era,
namespace: create.namespace,
indexer_grpc_values: None,
indexer_processor_values: None,
};
let forge_deployer_manager =
ForgeDeployerManager::from_k8s_client(kube_client, values);
runtime.block_on(forge_deployer_manager.ensure_namespace_prepared())?;
// NOTE: this is generally not going to run from within the cluster, do not perform any operations
// that might require internal DNS resolution to work, such as txn emission directly against the node service IPs.
runtime.block_on(forge_deployer_manager.start(ForgeDeployerType::Testnet))?;
if create.enable_indexer {
runtime.block_on(forge_deployer_manager.start(ForgeDeployerType::Indexer))?;
}
Ok(())
},
},
Expand Down
2 changes: 2 additions & 0 deletions testsuite/forge.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# This file is source-d when running Forge
FORGE_ENABLE_INDEXER=true
6 changes: 6 additions & 0 deletions testsuite/forge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1147,6 +1147,7 @@ def create_forge_command(
forge_namespace_reuse: Optional[str],
forge_namespace_keep: Optional[str],
forge_enable_haproxy: Optional[str],
forge_enable_indexer: Optional[str],
cargo_args: Optional[Sequence[str]],
forge_cli_args: Optional[Sequence[str]],
test_args: Optional[Sequence[str]],
Expand Down Expand Up @@ -1216,6 +1217,8 @@ def create_forge_command(
forge_args.append("--keep")
if forge_enable_haproxy == "true":
forge_args.append("--enable-haproxy")
if forge_enable_indexer == "true":
forge_args.append("--enable-indexer")

if test_args:
forge_args.extend(test_args)
Expand Down Expand Up @@ -1328,6 +1331,7 @@ def seeded_random_choice(namespace: str, cluster_names: Sequence[str]) -> str:
@envoption("FORGE_NAMESPACE_KEEP")
@envoption("FORGE_NAMESPACE_REUSE")
@envoption("FORGE_ENABLE_HAPROXY")
@envoption("FORGE_ENABLE_INDEXER")
@envoption("FORGE_ENABLE_FAILPOINTS")
@envoption("FORGE_ENABLE_PERFORMANCE")
@envoption("FORGE_TEST_SUITE")
Expand Down Expand Up @@ -1373,6 +1377,7 @@ def test(
forge_enable_failpoints: Optional[str],
forge_enable_performance: Optional[str],
forge_enable_haproxy: Optional[str],
forge_enable_indexer: Optional[str],
forge_test_suite: str,
forge_runner_duration_secs: str,
forge_image_tag: Optional[str],
Expand Down Expand Up @@ -1598,6 +1603,7 @@ def test(
forge_namespace_reuse=forge_namespace_reuse,
forge_namespace_keep=forge_namespace_keep,
forge_enable_haproxy=forge_enable_haproxy,
forge_enable_indexer=forge_enable_indexer,
cargo_args=cargo_args,
forge_cli_args=forge_cli_args,
test_args=test_args,
Expand Down
42 changes: 34 additions & 8 deletions testsuite/forge/src/backend/k8s/cluster_helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ pub async fn uninstall_testnet_resources(kube_namespace: String) -> Result<()> {
Ok(())
}

fn generate_new_era() -> String {
pub fn generate_new_era() -> String {
let mut rng = rand::thread_rng();
let r: u8 = rng.gen();
format!("forge{}", r)
Expand Down Expand Up @@ -826,15 +826,41 @@ fn dump_helm_values_to_file(helm_release_name: &str, tmp_dir: &TempDir) -> Resul

#[derive(Error, Debug)]
#[error("{0}")]
enum ApiError {
pub enum ApiError {
RetryableError(String),
FinalError(String),
}

async fn create_namespace(
/// Does the same as create_namespace and handling the 409, but for any k8s resource T
pub async fn maybe_create_k8s_resource<T>(
api: Arc<dyn ReadWrite<T>>,
resource: T,
) -> Result<T, ApiError>
where
T: kube::Resource + Clone + DeserializeOwned + Debug,
<T as kube::Resource>::DynamicType: Default,
{
if let Err(KubeError::Api(api_err)) = api.create(&PostParams::default(), &resource).await {
if api_err.code == 409 {
info!(
"Resource {} already exists, continuing with it",
resource.name()
);
} else {
return Err(ApiError::RetryableError(format!(
"Failed to use existing resource {}: {:?}",
resource.name(),
api_err
)));
}
}
Ok(resource)
}

pub async fn create_namespace(
namespace_api: Arc<dyn ReadWrite<Namespace>>,
kube_namespace: String,
) -> Result<(), ApiError> {
) -> Result<Namespace, ApiError> {
let kube_namespace_name = kube_namespace.clone();
let namespace = Namespace {
metadata: ObjectMeta {
Expand Down Expand Up @@ -866,7 +892,7 @@ async fn create_namespace(
)));
}
}
Ok(())
Ok(namespace)
}

pub async fn create_management_configmap(
Expand Down Expand Up @@ -1067,11 +1093,11 @@ pub fn make_k8s_label(value: String) -> String {
#[cfg(test)]
mod tests {
use super::*;
use crate::FailedNamespacesApi;
use crate::FailedK8sResourceApi;

#[tokio::test]
async fn test_create_namespace_final_error() {
let namespace_creator = Arc::new(FailedNamespacesApi::from_status_code(401));
let namespace_creator = Arc::new(FailedK8sResourceApi::from_status_code(401));
let result = create_namespace(namespace_creator, "banana".to_string()).await;
match result {
Err(ApiError::FinalError(_)) => {},
Expand Down Expand Up @@ -1148,7 +1174,7 @@ labels:

#[tokio::test]
async fn test_create_namespace_retryable_error() {
let namespace_creator = Arc::new(FailedNamespacesApi::from_status_code(403));
let namespace_creator = Arc::new(FailedK8sResourceApi::from_status_code(403));
let result = create_namespace(namespace_creator, "banana".to_string()).await;
match result {
Err(ApiError::RetryableError(_)) => {},
Expand Down
19 changes: 8 additions & 11 deletions testsuite/forge/src/backend/k8s/fullnode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -513,9 +513,7 @@ pub async fn install_public_fullnode<'a>(
#[cfg(test)]
mod tests {
use super::*;
use crate::{
MockConfigMapApi, MockPersistentVolumeClaimApi, MockServiceApi, MockStatefulSetApi,
};
use crate::MockK8sResourceApi;
use aptos_config::config::Identity;
use aptos_sdk::crypto::{x25519::PrivateKey, Uniform};
use k8s_openapi::apimachinery::pkg::api::resource::Quantity;
Expand Down Expand Up @@ -728,15 +726,14 @@ mod tests {
let version = Version::new(0, "banana".to_string());

// create APIs
let stateful_set_api = Arc::new(MockStatefulSetApi::from_stateful_set(
get_dummy_validator_stateful_set(),
let stateful_set_api: Arc<MockK8sResourceApi<StatefulSet>> = Arc::new(
MockK8sResourceApi::from_resource(get_dummy_validator_stateful_set()),
);
let configmap_api = Arc::new(MockK8sResourceApi::new());
let persistent_volume_claim_api = Arc::new(MockK8sResourceApi::from_resource(
get_dummy_validator_persistent_volume_claim(),
));
let configmap_api = Arc::new(MockConfigMapApi::from_config_map(ConfigMap::default()));
let persistent_volume_claim_api =
Arc::new(MockPersistentVolumeClaimApi::from_persistent_volume_claim(
get_dummy_validator_persistent_volume_claim(),
));
let service_api = Arc::new(MockServiceApi::from_service(Service::default()));
let service_api = Arc::new(MockK8sResourceApi::new());

// get the base config and mutate it
let mut node_config = get_default_pfn_node_config();
Expand Down
Loading

0 comments on commit cd07380

Please sign in to comment.