-
Notifications
You must be signed in to change notification settings - Fork 89
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Node heartbeats #3709
Node heartbeats #3709
Changes from 21 commits
ebda95f
dfa33b1
ecb53bf
2191025
98235b0
9bfd366
066e2f2
ce7e83a
f09e590
ea11796
6365301
bda14a5
a75b740
4cd989e
d93c416
80a74b4
d4f254b
8408aa9
0cc06b6
e247467
63336e1
a96c1b0
0f7ef4d
c9f92a8
8487070
a50051b
e841e6e
8cc5fe4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -203,6 +203,7 @@ multierror | |
multiformats | ||
Muxed | ||
mypy | ||
NATS | ||
nbconvert | ||
nemt | ||
nocheck | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,15 +16,17 @@ import ( | |
|
||
var defaultColumnGroups = []string{"labels", "capacity"} | ||
var orderByFields = []string{"id", "type", "available_cpu", "available_memory", "available_disk", "available_gpu", "status"} | ||
var filterStatusValues = []string{"approved", "pending", "rejected"} | ||
var filterApprovalValues = []string{"approved", "pending", "rejected"} | ||
var filterStatusValues = []string{"healthy", "unhealthy", "unknown"} | ||
|
||
// ListOptions is a struct to support node command | ||
type ListOptions struct { | ||
output.OutputOptions | ||
cliflags.ListOptions | ||
ColumnGroups []string | ||
Labels string | ||
FilterByStatus string | ||
ColumnGroups []string | ||
Labels string | ||
FilterByApproval string | ||
FilterByStatus string | ||
} | ||
|
||
// NewListOptions returns initialized Options | ||
|
@@ -42,22 +44,24 @@ func NewListCmd() *cobra.Command { | |
Use: "list", | ||
Short: "List info of network nodes. ", | ||
Args: cobra.NoArgs, | ||
Run: o.run, | ||
RunE: o.run, | ||
} | ||
nodeCmd.Flags().StringSliceVar(&o.ColumnGroups, "show", o.ColumnGroups, | ||
fmt.Sprintf("What column groups to show. Zero or more of: %q", maps.Keys(toggleColumns))) | ||
nodeCmd.Flags().StringVar(&o.Labels, "labels", o.Labels, | ||
"Filter nodes by labels. See https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ for more information.") | ||
nodeCmd.Flags().AddFlagSet(cliflags.ListFlags(&o.ListOptions)) | ||
nodeCmd.Flags().AddFlagSet(cliflags.OutputFormatFlags(&o.OutputOptions)) | ||
nodeCmd.Flags().StringVar(&o.FilterByApproval, "filter-approval", o.FilterByApproval, | ||
fmt.Sprintf("Filter nodes by approval. One of: %q", filterApprovalValues)) | ||
nodeCmd.Flags().StringVar(&o.FilterByStatus, "filter-status", o.FilterByStatus, | ||
fmt.Sprintf("Filter nodes by status. One of: %q", filterStatusValues)) | ||
|
||
return nodeCmd | ||
} | ||
|
||
// Run executes node command | ||
func (o *ListOptions) run(cmd *cobra.Command, _ []string) { | ||
func (o *ListOptions) run(cmd *cobra.Command, _ []string) error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Non-blocking: if we are going to return an error from this method can we replace all the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can definitely add a ticket to address this, yeah. There's a mix of approaches atm. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added #3764 |
||
ctx := cmd.Context() | ||
|
||
var err error | ||
|
@@ -69,15 +73,22 @@ func (o *ListOptions) run(cmd *cobra.Command, _ []string) { | |
} | ||
} | ||
|
||
if o.FilterByApproval != "" { | ||
if !slices.Contains(filterApprovalValues, o.FilterByApproval) { | ||
return fmt.Errorf("cannot use '%s' as filter approval value, should be one of: %q", o.FilterByApproval, filterApprovalValues) | ||
} | ||
} | ||
|
||
if o.FilterByStatus != "" { | ||
if !slices.Contains(filterStatusValues, o.FilterByStatus) { | ||
util.Fatal(cmd, fmt.Errorf("cannot use '%s' as filter status value, should be one of: %q", o.FilterByStatus, filterStatusValues), 1) | ||
return fmt.Errorf("cannot use '%s' as filter status value, should be one of: %q", o.FilterByStatus, filterStatusValues) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. consider printing the specific flag name in the error message e.g. |
||
} | ||
} | ||
|
||
response, err := util.GetAPIClientV2(cmd).Nodes().List(ctx, &apimodels.ListNodesRequest{ | ||
Labels: labelRequirements, | ||
FilterByStatus: o.FilterByStatus, | ||
Labels: labelRequirements, | ||
FilterByApproval: o.FilterByApproval, | ||
FilterByStatus: o.FilterByStatus, | ||
BaseListRequest: apimodels.BaseListRequest{ | ||
Limit: o.Limit, | ||
NextToken: o.NextToken, | ||
|
@@ -97,4 +108,6 @@ func (o *ListOptions) run(cmd *cobra.Command, _ []string) { | |
if err = output.Output(cmd, columns, o.OutputOptions, response.Nodes); err != nil { | ||
util.Fatal(cmd, fmt.Errorf("failed to output: %w", err), 1) | ||
} | ||
|
||
return nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
--- | ||
sidebar_label: approve | ||
--- | ||
|
||
# Command: `node approve` | ||
|
||
The `bacalhau node approve` command offers administrations the ability to approve the cluster membership for a node using its unique identifier. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. command offers |
||
|
||
## Description: | ||
|
||
Using the `approve` sub-command under the `bacalhau node` umbrella, users can allow a node in the pending state to join the cluster and receive work. This feature is crucial for system administrators to manage the cluster. | ||
|
||
## Usage: | ||
|
||
```bash | ||
bacalhau node approve [id] [flags] | ||
``` | ||
|
||
## Flags: | ||
|
||
- `[id]`: | ||
|
||
- The unique identifier of the node you wish to describe. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
- `-h`, `--help`: | ||
|
||
- Displays the help documentation for the `describe` command. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
- `-m message`: | ||
|
||
- A message to be attached to the approval action. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. makes me wonder if we should also include the ClientID of the user issuing the approval, but that could always be included later. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I think at the moment something being audited just means it was logged somewhere, and so the client id should be in the request. |
||
|
||
## Global Flags: | ||
|
||
- `--api-host string`: | ||
|
||
- Specifies the host for client-server communication through REST. This flag is overridden if the `BACALHAU_API_HOST` environment variable is set. | ||
- Default: `"bootstrap.production.bacalhau.org"` | ||
|
||
- `--api-port int`: | ||
|
||
- Designates the port for REST-based communication between client and server. This flag is overlooked if the `BACALHAU_API_PORT` environment variable is defined. | ||
- Default: `1234` | ||
|
||
- `--log-mode logging-mode`: | ||
|
||
- Determines the log format preference. | ||
- Options: `'default','station','json','combined','event'` | ||
- Default: `'default'` | ||
|
||
- `--repo string`: | ||
- Points to the bacalhau repository's path. | ||
- Default: `"`$HOME/.bacalhau"` | ||
Comment on lines
+33
to
+53
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I rather not have these repeated here and just document them one, optionally providing a link, otherwise it draw attention away from what this page is documenting imo. |
||
|
||
## Examples: | ||
|
||
1. Approve a Node with ID `nodeID123`: | ||
|
||
```bash | ||
bacalhau node approve nodeID123 | ||
``` | ||
|
||
2. Approve a Node with an audit message: | ||
|
||
```bash | ||
bacalhau node approve nodeID123 -m "okay" | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
--- | ||
sidebar_label: delete | ||
--- | ||
|
||
# Command: `node approve` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. node delete |
||
|
||
The `bacalhau node delete` command offers administrations the ability to remove a node from the cluster using its unique identifier. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. similar comment about administrations |
||
|
||
## Description: | ||
|
||
Using the `delete` sub-command, administrators can remove a node from the list of available compute nodes in the cluster. This feature is necessary for the management of the infrastructure. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for consistency with above: Using the |
||
|
||
## Usage: | ||
|
||
```bash | ||
bacalhau node delete [id] [flags] | ||
``` | ||
|
||
## Flags: | ||
|
||
- `[id]`: | ||
|
||
- The unique identifier of the node you wish to describe. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
- `-h`, `--help`: | ||
|
||
- Displays the help documentation for the `describe` command. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. replace describe |
||
|
||
- `-m message`: | ||
|
||
- A message to be attached to the deletion action. | ||
|
||
## Global Flags: | ||
|
||
- `--api-host string`: | ||
|
||
- Specifies the host for client-server communication through REST. This flag is overridden if the `BACALHAU_API_HOST` environment variable is set. | ||
- Default: `"bootstrap.production.bacalhau.org"` | ||
|
||
- `--api-port int`: | ||
|
||
- Designates the port for REST-based communication between client and server. This flag is overlooked if the `BACALHAU_API_PORT` environment variable is defined. | ||
- Default: `1234` | ||
|
||
- `--log-mode logging-mode`: | ||
|
||
- Determines the log format preference. | ||
- Options: `'default','station','json','combined','event'` | ||
- Default: `'default'` | ||
|
||
- `--repo string`: | ||
- Points to the bacalhau repository's path. | ||
- Default: `"`$HOME/.bacalhau"` | ||
Comment on lines
+33
to
+53
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I rather not have these repeated here and just document them one, optionally providing a link, otherwise it draw attention away from what this page is documenting imo. |
||
|
||
## Examples: | ||
|
||
1. Delete the Node with ID `nodeID123`: | ||
|
||
```bash | ||
bacalhau node delete nodeID123 | ||
``` | ||
|
||
2. Delete a Node with an audit message: | ||
|
||
```bash | ||
bacalhau node delete nodeID123 -m "bad actor" | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,48 +12,75 @@ bacalhau node [command] | |
|
||
## Available Commands | ||
|
||
1. **[approve](./approve)**: | ||
|
||
- Description: Approves a single node to join the cluster. | ||
- Usage: | ||
|
||
```bash | ||
bacalhau node approve | ||
``` | ||
|
||
1. **[delete](./delete)**: | ||
|
||
- Description: Deletes a node from the cluster using its ID. | ||
- Usage: | ||
```bash | ||
bacalhau node delete | ||
``` | ||
|
||
1. **[describe](./describe)**: | ||
- Description: Retrieves detailed information of a node using its ID. | ||
- Usage: | ||
```bash | ||
bacalhau node describe | ||
``` | ||
|
||
2. **[list](./list)**: | ||
- Description: Lists the details of all nodes present in the network. | ||
- Usage: | ||
```bash | ||
bacalhau node list | ||
``` | ||
|
||
- Description: Retrieves detailed information of a node using its ID. | ||
- Usage: | ||
```bash | ||
bacalhau node describe | ||
``` | ||
|
||
1. **[list](./list)**: | ||
|
||
- Description: Lists the details of all nodes present in the network. | ||
- Usage: | ||
```bash | ||
bacalhau node list | ||
``` | ||
|
||
1. **[reject](./reject)**: | ||
|
||
- Description: Reject a specific node's request to join the cluster. | ||
- Usage: | ||
```bash | ||
bacalhau node reject | ||
``` | ||
|
||
For comprehensive details on any of the sub-commands, run: | ||
|
||
```bash | ||
bacalhau node [command] --help | ||
``` | ||
|
||
## Flags | ||
|
||
- `-h`, `--help`: | ||
- Description: Shows the help information for the `node` command. | ||
- Description: Shows the help information for the `node` command. | ||
|
||
## Global Flags | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. similar sentiment around not repeating global flags |
||
|
||
- `--api-host string`: | ||
- Description: Specifies the host for RESTful communication between the client and server. The flag will be ignored if the `BACALHAU_API_HOST` environment variable is set. | ||
- Default: `bootstrap.production.bacalhau.org` | ||
|
||
- Description: Specifies the host for RESTful communication between the client and server. The flag will be ignored if the `BACALHAU_API_HOST` environment variable is set. | ||
- Default: `bootstrap.production.bacalhau.org` | ||
|
||
- `--api-port int`: | ||
- Description: Designates the port for RESTful communication. The flag will be bypassed if the `BACALHAU_API_PORT` environment variable is active. | ||
- Default: `1234` | ||
|
||
- `--log-mode logging-mode`: | ||
- Description: Chooses the preferred log format. Available choices are: `default`, `station`, `json`, `combined`, and `event`. | ||
- Default: `default` | ||
- Description: Designates the port for RESTful communication. The flag will be bypassed if the `BACALHAU_API_PORT` environment variable is active. | ||
- Default: `1234` | ||
|
||
- `--repo string`: | ||
- Description: Specifies the path to the bacalhau repository. | ||
- Default: `/Users/walid/.bacalhau` | ||
- `--log-mode logging-mode`: | ||
|
||
--- | ||
- Description: Chooses the preferred log format. Available choices are: `default`, `station`, `json`, `combined`, and `event`. | ||
- Default: `default` | ||
|
||
This should provide an organized and structured overview of the `node` command and its functionalities! | ||
- `--repo string`: | ||
- Description: Specifies the path to the bacalhau repository. | ||
- Default: `/Users/walid/.bacalhau` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. who's walid? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i think it should be binary - "connected" or "unconnected" (we could add something more sophisticated like "unhealthy" when we have more health checks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just for the filters, or for the
node list
as well?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i think the node list as well - "healthy" and "unhealthy" imply something i don't think we know. We only know if it's connected or disconnected.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Went with Connected/Disconnected for now, discussed with Walid and will add another duration in future to mark the point beyond which we don't think the node is coming back.