Skip to content
This repository was archived by the owner on Jan 9, 2023. It is now read-only.

Commit 7306c38

Browse files
author
Luke Addison
committed
Recreate bastion when failed
1 parent 1363ec6 commit 7306c38

File tree

14 files changed

+137
-91
lines changed

14 files changed

+137
-91
lines changed

pkg/terraform/providers/tarmak/data_source_bastion_instance.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,6 @@ func dataSourceBastionInstance() *schema.Resource {
1919
Type: schema.TypeString,
2020
Required: true,
2121
},
22-
"instance_id": {
23-
Type: schema.TypeString,
24-
Optional: true,
25-
},
2622
"username": {
2723
Type: schema.TypeString,
2824
Required: true,

pkg/terraform/providers/tarmak/resource_vault_cluster.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ func resourceTarmakVaultCluster() *schema.Resource {
1818
Create: resourceTarmakVaultClusterCreate,
1919
Read: resourceTarmakVaultClusterRead,
2020
Delete: resourceTarmakVaultClusterDelete,
21+
Update: resourceTarmakVaultClusterCreate,
2122

2223
Schema: map[string]*schema.Schema{
2324
"internal_fqdns": {
@@ -43,6 +44,10 @@ func resourceTarmakVaultCluster() *schema.Resource {
4344
Required: true,
4445
ForceNew: true,
4546
},
47+
"bastion_status": {
48+
Type: schema.TypeString,
49+
Required: true,
50+
},
4651
"status": {
4752
Type: schema.TypeString,
4853
Computed: true,
@@ -52,12 +57,10 @@ func resourceTarmakVaultCluster() *schema.Resource {
5257
}
5358

5459
func resourceTarmakVaultClusterCreate(d *schema.ResourceData, meta interface{}) (err error) {
60+
5561
client := meta.(*rpc.Client)
5662

5763
vaultInternalFQDNs := []string{}
58-
59-
//return fmt.Errorf("DEBUG: %#v", d.Get("internal_fqdns").([]interface{})[0])
60-
6164
for _, internalFQDN := range d.Get("internal_fqdns").([]interface{}) {
6265
vaultInternalFQDNs = append(vaultInternalFQDNs, internalFQDN.(string))
6366
}
@@ -94,6 +97,7 @@ func resourceTarmakVaultClusterCreate(d *schema.ResourceData, meta interface{})
9497
}
9598

9699
func resourceTarmakVaultClusterRead(d *schema.ResourceData, meta interface{}) (err error) {
100+
97101
client := meta.(*rpc.Client)
98102

99103
vaultInternalFQDNs := []string{}

pkg/terraform/providers/tarmak/resource_vault_instance_role.go

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
package tarmak
33

44
import (
5-
"fmt"
65
"log"
76
"net/rpc"
87

@@ -16,6 +15,7 @@ func resourceTarmakVaultInstanceRole() *schema.Resource {
1615
Create: resourceTarmakVaultInstanceRoleCreate,
1716
Read: resourceTarmakVaultInstanceRoleRead,
1817
Delete: resourceTarmakVaultInstanceRoleDelete,
18+
Update: resourceTarmakVaultInstanceRoleCreate,
1919

2020
Schema: map[string]*schema.Schema{
2121
"role_name": {
@@ -41,6 +41,10 @@ func resourceTarmakVaultInstanceRole() *schema.Resource {
4141
Required: true,
4242
ForceNew: true,
4343
},
44+
"vault_status": {
45+
Type: schema.TypeString,
46+
Required: true,
47+
},
4448
"init_token": {
4549
Type: schema.TypeString,
4650
Computed: true,
@@ -52,6 +56,13 @@ func resourceTarmakVaultInstanceRole() *schema.Resource {
5256
func resourceTarmakVaultInstanceRoleCreate(d *schema.ResourceData, meta interface{}) (err error) {
5357
client := meta.(*rpc.Client)
5458

59+
vaultStatus := d.Get("vault_status").(string)
60+
if vaultStatus != tarmakRPC.VaultStatusReady {
61+
log.Print("vault is not ready")
62+
d.SetId("")
63+
return nil
64+
}
65+
5566
roleName := d.Get("role_name").(string)
5667
clusterName := d.Get("vault_cluster_name").(string)
5768
vaultInternalFQDNs := []string{}
@@ -72,12 +83,15 @@ func resourceTarmakVaultInstanceRoleCreate(d *schema.ResourceData, meta interfac
7283
var reply tarmakRPC.VaultInstanceRoleReply
7384
err = client.Call(tarmakRPC.VaultInstanceRole, args, &reply)
7485
if err != nil {
86+
log.Printf("call to %s failed: %s", tarmakRPC.VaultInstanceRole, err)
7587
d.SetId("")
76-
return fmt.Errorf("call to %s failed: %s", tarmakRPC.VaultInstanceRole, err)
88+
return nil
7789
}
7890

7991
if err = d.Set("init_token", reply.InitToken); err != nil {
80-
return fmt.Errorf("failed to set init token: %s", err)
92+
log.Printf("failed to set init token: %s", err)
93+
d.SetId("")
94+
return
8195
}
8296

8397
d.SetId(reply.InitToken)
@@ -88,6 +102,13 @@ func resourceTarmakVaultInstanceRoleCreate(d *schema.ResourceData, meta interfac
88102
func resourceTarmakVaultInstanceRoleRead(d *schema.ResourceData, meta interface{}) (err error) {
89103
client := meta.(*rpc.Client)
90104

105+
vaultStatus := d.Get("vault_status").(string)
106+
if vaultStatus != tarmakRPC.VaultStatusReady {
107+
log.Printf("vault is not ready")
108+
d.SetId("")
109+
return nil
110+
}
111+
91112
roleName := d.Get("role_name").(string)
92113
clusterName := d.Get("vault_cluster_name").(string)
93114
vaultInternalFQDNs := []string{}
@@ -108,6 +129,7 @@ func resourceTarmakVaultInstanceRoleRead(d *schema.ResourceData, meta interface{
108129
var reply tarmakRPC.VaultInstanceRoleReply
109130
err = client.Call(tarmakRPC.VaultInstanceRole, args, &reply)
110131
if err != nil {
132+
log.Printf("call to %s failed: %s", tarmakRPC.VaultInstanceRole, err)
111133
d.SetId("")
112134
return nil
113135
}

pkg/terraform/providers/tarmak/rpc/bastion_instance_status.go

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@ import (
88
cluster "github.com/jetstack/tarmak/pkg/apis/cluster/v1alpha1"
99
)
1010

11+
const (
12+
bastionVerifyTimeoutSeconds = 180
13+
BastionStatusUnknown = "unknown"
14+
BastionStatusReady = "ready"
15+
BastionStatusDown = "down"
16+
)
17+
1118
var (
1219
BastionInstanceStatusCall = fmt.Sprintf("%s.BastionInstanceStatus", RPCName)
1320
)
@@ -25,23 +32,53 @@ func (r *tarmakRPC) BastionInstanceStatus(args *BastionInstanceStatusArgs, resul
2532
r.tarmak.Log().Debug("received rpc bastion status")
2633

2734
if r.cluster.GetState() == cluster.StateDestroy {
28-
result.Status = "unknown"
35+
result.Status = BastionStatusUnknown
2936
return nil
3037
}
3138

32-
var err error
33-
for i := 1; i <= Retries; i++ {
34-
if err = r.cluster.Environment().VerifyBastionAvailable(); err != nil {
35-
r.tarmak.Log().Error(err)
36-
time.Sleep(time.Second)
37-
} else {
38-
break
39+
// check if bastion instance exists
40+
instances, err := r.cluster.Environment().Provider().ListHosts(r.cluster.Environment().Hub())
41+
if err != nil {
42+
r.tarmak.Log().Debug("failed to list instances in hub: %s", err)
43+
result.Status = BastionStatusUnknown
44+
return nil
45+
}
46+
bastionExists := false
47+
for _, instance := range instances {
48+
for _, role := range instance.Roles() {
49+
if role == cluster.InstancePoolTypeBastion {
50+
bastionExists = true
51+
}
3952
}
4053
}
41-
if err != nil {
42-
return fmt.Errorf("bastion instance is not ready: %s", err)
54+
if !bastionExists {
55+
r.tarmak.Log().Debug("bastion instance does not exist")
56+
result.Status = BastionStatusDown
57+
return nil
58+
}
59+
60+
// verify bastion responsiveness
61+
verifyChannel := make(chan bool)
62+
go func() {
63+
for {
64+
if err := r.cluster.Environment().VerifyBastionAvailable(); err != nil {
65+
r.tarmak.Log().Error(err)
66+
time.Sleep(time.Second)
67+
continue
68+
}
69+
verifyChannel <- true
70+
return
71+
}
72+
}()
73+
74+
select {
75+
case <-verifyChannel:
76+
case <-time.After(bastionVerifyTimeoutSeconds * time.Second):
77+
r.tarmak.Log().Debug("failed to verify bastion instance")
78+
result.Status = BastionStatusDown
79+
return nil
4380
}
4481

45-
result.Status = "ready"
82+
result.Status = BastionStatusReady
4683
return nil
4784
}

pkg/terraform/providers/tarmak/rpc/vault_cluster_status.go

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ import (
1010
cluster "github.com/jetstack/tarmak/pkg/apis/cluster/v1alpha1"
1111
)
1212

13+
const (
14+
VaultStatusUnknown = "unknown"
15+
VaultStatusReady = "ready"
16+
)
17+
1318
var (
1419
VaultClusterStatusCall = fmt.Sprintf("%s.VaultClusterStatus", RPCName)
1520
VaultClusterInitStatusCall = fmt.Sprintf("%s.VaultClusterInitStatus", RPCName)
@@ -30,7 +35,7 @@ func (r *tarmakRPC) VaultClusterStatus(args *VaultClusterStatusArgs, result *Vau
3035
r.tarmak.Log().Debug("received rpc vault cluster status")
3136

3237
if r.tarmak.Cluster().GetState() == cluster.StateDestroy {
33-
result.Status = "unknown"
38+
result.Status = VaultStatusUnknown
3439
return nil
3540
}
3641

@@ -40,14 +45,16 @@ func (r *tarmakRPC) VaultClusterStatus(args *VaultClusterStatusArgs, result *Vau
4045
if err != nil {
4146
err = fmt.Errorf("failed to initialise vault cluster: %s", err)
4247
r.tarmak.Log().Error(err)
43-
return err
48+
result.Status = VaultStatusUnknown
49+
return nil
4450
}
4551

4652
vaultTunnel, err := vault.TunnelFromFQDNs(args.VaultInternalFQDNs, args.VaultCA)
4753
if err != nil {
4854
err = fmt.Errorf("failed to create vault tunnel: %s", err)
4955
r.tarmak.Log().Error(err)
50-
return err
56+
result.Status = VaultStatusUnknown
57+
return nil
5158
}
5259
defer vaultTunnel.Stop()
5360

@@ -57,7 +64,8 @@ func (r *tarmakRPC) VaultClusterStatus(args *VaultClusterStatusArgs, result *Vau
5764
if err != nil {
5865
err = fmt.Errorf("failed to retrieve vault root token: %s", err)
5966
r.tarmak.Log().Error(err)
60-
return err
67+
result.Status = VaultStatusUnknown
68+
return nil
6169
}
6270

6371
vaultClient.SetToken(vaultRootToken)
@@ -68,18 +76,19 @@ func (r *tarmakRPC) VaultClusterStatus(args *VaultClusterStatusArgs, result *Vau
6876
if err := k.Ensure(); err != nil {
6977
err = fmt.Errorf("vault cluster is not ready: %s", err)
7078
r.tarmak.Log().Error(err)
71-
return err
79+
result.Status = VaultStatusUnknown
80+
return nil
7281
}
7382

74-
result.Status = "ready"
83+
result.Status = VaultStatusReady
7584
return nil
7685
}
7786

7887
func (r *tarmakRPC) VaultClusterInitStatus(args *VaultClusterStatusArgs, result *VaultClusterStatusReply) error {
7988
r.tarmak.Log().Debug("received rpc vault cluster status")
8089

8190
if r.tarmak.Cluster().GetState() == cluster.StateDestroy {
82-
result.Status = "unknown"
91+
result.Status = VaultStatusUnknown
8392
return nil
8493
}
8594

@@ -89,7 +98,8 @@ func (r *tarmakRPC) VaultClusterInitStatus(args *VaultClusterStatusArgs, result
8998
if err != nil {
9099
err = fmt.Errorf("failed to create vault tunnel: %s", err)
91100
r.tarmak.Log().Error(err)
92-
return err
101+
result.Status = VaultStatusUnknown
102+
return nil
93103
}
94104
defer vaultTunnel.Stop()
95105

@@ -99,7 +109,8 @@ func (r *tarmakRPC) VaultClusterInitStatus(args *VaultClusterStatusArgs, result
99109
if err != nil {
100110
err = fmt.Errorf("failed to retrieve vault root token: %s", err)
101111
r.tarmak.Log().Error(err)
102-
return err
112+
result.Status = VaultStatusUnknown
113+
return nil
103114
}
104115

105116
vaultClient.SetToken(vaultRootToken)
@@ -117,14 +128,16 @@ func (r *tarmakRPC) VaultClusterInitStatus(args *VaultClusterStatusArgs, result
117128
if err != nil {
118129
err = fmt.Errorf("failed to retrieve init status: %s", err)
119130
r.tarmak.Log().Error(err)
120-
return err
131+
result.Status = VaultStatusUnknown
132+
return nil
121133
}
122134
if !up {
123135
err = fmt.Errorf("failed to initialised vault cluster")
124136
r.tarmak.Log().Error(err)
125-
return err
137+
result.Status = VaultStatusUnknown
138+
return nil
126139
}
127140

128-
result.Status = "ready"
141+
result.Status = VaultStatusReady
129142
return nil
130143
}

terraform/amazon/modules/bastion/bastion.tf

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,14 @@ resource "aws_security_group" "bastion" {
2424
}
2525
}
2626

27+
data "tarmak_bastion_instance" "bastion" {
28+
hostname = "bastion"
29+
username = "centos"
30+
31+
depends_on = ["aws_instance.bastion"]
32+
}
33+
2734
resource "aws_instance" "bastion" {
28-
count = 1
2935
ami = "${var.bastion_ami}"
3036
instance_type = "${var.bastion_instance_type}"
3137
subnet_id = "${var.public_subnet_ids[0]}"
Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,11 @@
1-
output "bastion_instance_id" {
2-
value = "${element(concat(aws_instance.bastion.*.id, list("")), 0)}"
3-
}
4-
5-
6-
output "bastion_fqdn" {
7-
value = "${aws_route53_record.bastion.fqdn}"
8-
}
9-
10-
output "bastion_private_ip" {
11-
value = "${aws_eip.bastion.public_ip}"
12-
}
13-
14-
output "bastion_ip" {
15-
value = "${aws_eip.bastion.public_ip}"
1+
output "bastion_status" {
2+
value = "${data.tarmak_bastion_instance.bastion.status}"
163
}
174

185
output "bastion_security_group_id" {
196
value = "${element(concat(aws_security_group.bastion.*.id, list("")), 0)}"
207
}
218

22-
output "remote_admin_security_group_id" {
23-
value = "${aws_security_group.remote_admin.id}"
9+
output "bastion_instance_id" {
10+
value = "${element(concat(aws_instance.bastion.*.id, list("")), 0)}"
2411
}

terraform/amazon/modules/kubernetes/inputs.tf

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,6 @@ variable "internal_fqdns" {
4747
type = "list"
4848
}
4949

50-
variable "vault_kms_key_id" {}
51-
52-
variable "vault_unseal_key_name" {}
53-
5450
# template variables
5551
variable "availability_zones" {
5652
type = "list"
@@ -76,4 +72,6 @@ variable "public_zone_id" {}
7672

7773
variable "vault_security_group_id" {}
7874

79-
variable "bastion_security_group_id" {}
75+
variable "bastion_security_group_id" {}
76+
77+
variable "vault_status" {}

0 commit comments

Comments
 (0)