From 42fcf9ebe0e8d68bb75134f4d2a6a5905e7d6c05 Mon Sep 17 00:00:00 2001 From: crstin Date: Mon, 17 Aug 2020 20:26:00 +0200 Subject: [PATCH] Initial commit --- .ansible-lint | 5 + .gitignore | 5 + .gitmodules | 3 + .tool-versions | 1 + README.md | 124 ++++++++ ansible.cfg | 4 + cluster.yml.example | 32 ++ hosts.example | 27 ++ inventory/group_vars/all/all.yml | 98 ++++++ inventory/group_vars/all/docker.yml | 56 ++++ inventory/group_vars/etcd.yml | 22 ++ inventory/group_vars/k8s-cluster/addons.yml | 116 ++++++++ .../group_vars/k8s-cluster/k8s-cluster.yml | 280 ++++++++++++++++++ .../group_vars/k8s-cluster/k8s-net-calico.yml | 67 +++++ .../k8s-cluster/k8s-net-macvlan.yml | 6 + kubespray | 1 + requirements.ansible.yml | 7 + requirements.python.txt | 2 + roles/access/tasks/main.yml | 1 + roles/access/tasks/ssh.yml | 6 + roles/load_balancing/tasks/main.yml | 1 + roles/load_balancing/tasks/metallb.yml | 13 + roles/packages/tasks/install.yml | 14 + roles/packages/tasks/main.yml | 3 + roles/packages/tasks/pin.yml | 7 + roles/packages/tasks/update.yml | 5 + roles/storage/tasks/ceph.yml | 87 ++++++ roles/storage/tasks/main.yml | 3 + roles/storage/tasks/namespace.yml | 7 + roles/storage/tasks/operator.yml | 7 + roles/thorchain/tasks/helm.yml | 5 + roles/thorchain/tasks/main.yml | 2 + roles/thorchain/tasks/repository.yml | 6 + 33 files changed, 1023 insertions(+) create mode 100644 .ansible-lint create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 .tool-versions create mode 100644 README.md create mode 100644 ansible.cfg create mode 100644 cluster.yml.example create mode 100644 hosts.example create mode 100644 inventory/group_vars/all/all.yml create mode 100644 inventory/group_vars/all/docker.yml create mode 100644 inventory/group_vars/etcd.yml create mode 100644 inventory/group_vars/k8s-cluster/addons.yml create mode 100644 inventory/group_vars/k8s-cluster/k8s-cluster.yml create mode 100644 inventory/group_vars/k8s-cluster/k8s-net-calico.yml create mode 100644 inventory/group_vars/k8s-cluster/k8s-net-macvlan.yml create mode 160000 kubespray create mode 100644 requirements.ansible.yml create mode 100644 requirements.python.txt create mode 100644 roles/access/tasks/main.yml create mode 100644 roles/access/tasks/ssh.yml create mode 100644 roles/load_balancing/tasks/main.yml create mode 100644 roles/load_balancing/tasks/metallb.yml create mode 100644 roles/packages/tasks/install.yml create mode 100644 roles/packages/tasks/main.yml create mode 100644 roles/packages/tasks/pin.yml create mode 100644 roles/packages/tasks/update.yml create mode 100644 roles/storage/tasks/ceph.yml create mode 100644 roles/storage/tasks/main.yml create mode 100644 roles/storage/tasks/namespace.yml create mode 100644 roles/storage/tasks/operator.yml create mode 100644 roles/thorchain/tasks/helm.yml create mode 100644 roles/thorchain/tasks/main.yml create mode 100644 roles/thorchain/tasks/repository.yml diff --git a/.ansible-lint b/.ansible-lint new file mode 100644 index 0000000..fd2df7b --- /dev/null +++ b/.ansible-lint @@ -0,0 +1,5 @@ +skip_list: + - 403 # Package installs should not use latest +exclude_paths: + - ./kubespray/ + - ./roles/matthiaslohr.hvswitch_k8s/ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d7bb872 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/inventory/credentials/ +/roles/matthiaslohr.hvswitch_k8s/ +/venv/ +/inventory/inventory.ini +/private-*.yml diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..2519de7 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "kubespray"] + path = kubespray + url = https://github.com/kubernetes-sigs/kubespray.git diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 0000000..a2e9f4b --- /dev/null +++ b/.tool-versions @@ -0,0 +1 @@ +python 3.8.5 diff --git a/README.md b/README.md new file mode 100644 index 0000000..ff0d518 --- /dev/null +++ b/README.md @@ -0,0 +1,124 @@ +# Hetzner Bare Metal k8s Cluster + +The scripts in this repository will setup and maintain one or more [kubernetes][k8s] clusters consisting of dedicated [Hetzner][hetzner] servers. Each cluster will also be provisioned to operate as a node in the [THORCHain][tc] network. + +Executing the scripts in combination with some manual procedures will get you clusters with the following features on bare metal. + +* [Kubespray][kubespray] (based) +* Internal NVMe storage ([Ceph][ceph]/[Rook][rook]) +* Virtual LAN (also over multiple locations) ([Calico][calico]) +* Load Balancing ([MetalLB][metallb]) + +## Preparations + +### Servers + +Acquire a couple of [servers][buy] as the basis for a cluster (`AX41-NVME`'s are working well for instance). Visit the [admin panel][admin] and name the servers appropriately. + +```text +tc-k8s-master +tc-k8s-worker1 +tc-k8s-worker2 +... +``` + +Refer to the [reset procedure][reset] to properly initialize them. + +### vSwitch + +Create a [vSwitch][vswitch] and order an appropriate subnet (it may take a while to show up after the order). Give the vSwitch a name (i.e. `tc-k8s-net`) and assign this vSwitch to the servers. + +Checkout the [docs][vswitch_docs] for help. + +## Usage + +Clone this repository, `cd` into it and download kubespray. + +```bash +git submodule init && git submodule update +``` + +Create a Python virtual environment or similar. + +```bash +# Optional +virtualenv -p python3 venv +``` + +Install dependencies required by Python and Ansible Glaxy. + +```bash +pip install -r requirements.python.txt +ansible-galaxy install -r requirements.ansible.yml +``` + +> Note: Mitogen does not work with ansible collections and needs to be disabled. + +### Provisioning + +```bash +cp hosts.example inventory/inventory.ini +cp cluster.yml.example private-cluster.yml +``` + +Add your server ip's to `inventory.ini` and your network information into `private-cluster.yml` + +If you want to manage multiple clusters simply name the files according to the pattern below. + +```text +private-cluster-01.yml +private-cluster-02.yml +private-cluster-02.yml +... + +private-test.yml +... + +private-helsinki-01.yml +... + +private-whatever.yml +``` + +```bash +# Manage a cluster +ansible-playbook private-cluster.yml + +# If you want to run kubespray separately +ansible-playbook kubespray/cluster.yml +``` + +> Check [this][kubespray] out for more playbooks on cluster management. + +### THORChain + +In order for the cluster to operate as a node in the THORCHain network deploy as instructed [here][tc_deplyoing]. You can also refer to the [node-launcher repository][node-launcher], if necessary, or the THORChain [documentation][tc_docs] as a whole. + +## Resetting the bare metal servers + +Visit the [console][admin] and put each server of the cluster into rescue mode. Then execute the following script. + +```bash +installimage -a -r no -i images/Ubuntu-1804-bionic-64-minimal.tar.gz -p /:ext4:all -d nvme0n1 -f yes -t yes -n hostname +``` + +This will install and use Ubuntu on only one of the two internal NVMe drives. The unused ones will be used for persistent storage with ceph/rook. You can check the internal drive setup with `lsblk`. Change it accordingly in the command shown above when necessary. + +> Ubuntu 18.04 is used because kubespray does not support 20.04 (yet) + +[reset]: #resetting-the-bare-metal-servers +[hetzner]: https://www.hetzner.com +[buy]: https://www.hetzner.com/dedicated-rootserver/matrix-ax +[admin]: https://robot.your-server.de/server +[vswitch]: https://robot.your-server.de/vswitch/index +[vswitch_docs]: https://docs.hetzner.com/robot/dedicated-server/network/vswitch +[k8s]: https://kubernetes.io +[kubespray]: https://kubespray.io/ +[metallb]: https://metallb.universe.tf +[calico]: https://www.projectcalico.org +[ceph]: https://ceph.io +[rook]: https://rook.io +[tc]: https://thorchain.org +[tc_docs]: https://docs.thorchain.org +[tc_deplyoing]: https://docs.thorchain.org/thornodes/kubernetes/deploying +[node-launcher]: https://gitlab.com/thorchain/devops/node-launcher diff --git a/ansible.cfg b/ansible.cfg new file mode 100644 index 0000000..b3de6e1 --- /dev/null +++ b/ansible.cfg @@ -0,0 +1,4 @@ +[defaults] +roles_path = ./roles +inventory = ./inventory/inventory.ini +nocows = 1 diff --git a/cluster.yml.example b/cluster.yml.example new file mode 100644 index 0000000..f8d40bb --- /dev/null +++ b/cluster.yml.example @@ -0,0 +1,32 @@ +- hosts: all + vars: + authorized_keys: + # - key1 + # - key2 + vswitches: + - name: tc-k8s-net # vSwitch name, used for naming the routing table. + routing_table: 1 # ID for the routing table. + vlan: 4000 # VLAN ID for the vSwitch. 4000-4091 supported by Hetzner. + gateway: 33.33.33.33 # If the vSwitch has a subnet, this variable should contain the subnet's gateway IP address + addresses: # IP addresses for the vSwitch network interface (per host) + - "{{ hostvars[inventory_hostname]['ip'] }}/24" + subnets: # Subnets available on the vSwitch (need to be registered with Hetzner robot) for non-private networks + - subnet: 33.33.33.32/29 + roles: + - access + - packages + - matthiaslohr.hvswitch_k8s + +- hosts: kube-master + roles: + - thorchain + +- import_playbook: kubespray/cluster.yml + +- hosts: kube-master[0] + vars: + address_range: + - 33.33.33.34-33.33.33.37 + roles: + - storage + - load_balancing diff --git a/hosts.example b/hosts.example new file mode 100644 index 0000000..84f4521 --- /dev/null +++ b/hosts.example @@ -0,0 +1,27 @@ +[all:vars] +ansible_user=root +ansible_ssh_user=root +ansible_python_interpreter=/usr/bin/python3 + +[all] +master ansible_host=11.22.33.44 ip=10.10.10.11 etcd_member_name=master +worker1 ansible_host=22.33.44.55 ip=10.10.10.12 +worker2 ansible_host=33.44.55.66 ip=10.10.10.13 + +[kube-master] +master + +[etcd] +master + +[kube-node] +master +worker1 +worker2 + +[calico-rr] + +[k8s-cluster:children] +kube-master +kube-node +calico-rr diff --git a/inventory/group_vars/all/all.yml b/inventory/group_vars/all/all.yml new file mode 100644 index 0000000..9e1ba0c --- /dev/null +++ b/inventory/group_vars/all/all.yml @@ -0,0 +1,98 @@ +--- +## Directory where etcd data stored +etcd_data_dir: /var/lib/etcd + +## Experimental kubeadm etcd deployment mode. Available only for new deployment +etcd_kubeadm_enabled: false + +## Directory where the binaries will be installed +bin_dir: /usr/local/bin + +## The access_ip variable is used to define how other nodes should access +## the node. This is used in flannel to allow other flannel nodes to see +## this node for example. The access_ip is really useful AWS and Google +## environments where the nodes are accessed remotely by the "public" ip, +## but don't know about that address themselves. +# access_ip: 1.1.1.1 + + +## External LB example config +## apiserver_loadbalancer_domain_name: "elb.some.domain" +# loadbalancer_apiserver: +# address: 1.2.3.4 +# port: 1234 + +## Internal loadbalancers for apiservers +# loadbalancer_apiserver_localhost: true +# valid options are "nginx" or "haproxy" +# loadbalancer_apiserver_type: nginx # valid values "nginx" or "haproxy" + +## Local loadbalancer should use this port +## And must be set port 6443 +loadbalancer_apiserver_port: 6443 + +## If loadbalancer_apiserver_healthcheck_port variable defined, enables proxy liveness check for nginx. +loadbalancer_apiserver_healthcheck_port: 8081 + +### OTHER OPTIONAL VARIABLES +## For some things, kubelet needs to load kernel modules. For example, dynamic kernel services are needed +## for mounting persistent volumes into containers. These may not be loaded by preinstall kubernetes +## processes. For example, ceph and rbd backed volumes. Set to true to allow kubelet to load kernel +## modules. +# kubelet_load_modules: false + +## Upstream dns servers +# upstream_dns_servers: +# - 8.8.8.8 +# - 8.8.4.4 + +## There are some changes specific to the cloud providers +## for instance we need to encapsulate packets with some network plugins +## If set the possible values are either 'gce', 'aws', 'azure', 'openstack', 'vsphere', 'oci', or 'external' +## When openstack is used make sure to source in the openstack credentials +## like you would do when using openstack-client before starting the playbook. +# cloud_provider: + +## When cloud_provider is set to 'external', you can set the cloud controller to deploy +## Supported cloud controllers are: 'openstack' and 'vsphere' +## When openstack or vsphere are used make sure to source in the required fields +# external_cloud_provider: + +## Set these proxy values in order to update package manager and docker daemon to use proxies +# http_proxy: "" +# https_proxy: "" + +## Refer to roles/kubespray-defaults/defaults/main.yml before modifying no_proxy +# no_proxy: "" + +## Some problems may occur when downloading files over https proxy due to ansible bug +## https://github.com/ansible/ansible/issues/32750. Set this variable to False to disable +## SSL validation of get_url module. Note that kubespray will still be performing checksum validation. +# download_validate_certs: False + +## If you need exclude all cluster nodes from proxy and other resources, add other resources here. +# additional_no_proxy: "" + +## Certificate Management +## This setting determines whether certs are generated via scripts. +## Chose 'none' if you provide your own certificates. +## Option is "script", "none" +## note: vault is removed +# cert_management: script + +## Set to true to allow pre-checks to fail and continue deployment +# ignore_assert_errors: false + +## The read-only port for the Kubelet to serve on with no authentication/authorization. Uncomment to enable. +# kube_read_only_port: 10255 + +## Set true to download and cache container +# download_container: true + +## Deploy container engine +# Set false if you want to deploy container engine manually. +# deploy_container_engine: true + +## Set Pypi repo and cert accordingly +# pyrepo_index: https://pypi.example.com/simple +# pyrepo_cert: /etc/ssl/certs/ca-certificates.crt diff --git a/inventory/group_vars/all/docker.yml b/inventory/group_vars/all/docker.yml new file mode 100644 index 0000000..ebf0bd3 --- /dev/null +++ b/inventory/group_vars/all/docker.yml @@ -0,0 +1,56 @@ +--- +## Uncomment this if you want to force overlay/overlay2 as docker storage driver +## Please note that overlay2 is only supported on newer kernels +# docker_storage_options: -s overlay2 + +## Enable docker_container_storage_setup, it will configure devicemapper driver on Centos7 or RedHat7. +docker_container_storage_setup: false + +## It must be define a disk path for docker_container_storage_setup_devs. +## Otherwise docker-storage-setup will be executed incorrectly. +# docker_container_storage_setup_devs: /dev/vdb + +## Uncomment this if you have more than 3 nameservers, then we'll only use the first 3. +docker_dns_servers_strict: false + +# Path used to store Docker data +docker_daemon_graph: "/var/lib/docker" + +## Used to set docker daemon iptables options to true +docker_iptables_enabled: "false" + +# Docker log options +# Rotate container stderr/stdout logs at 50m and keep last 5 +docker_log_opts: "--log-opt max-size=50m --log-opt max-file=5" + +# define docker bin_dir +docker_bin_dir: "/usr/bin" + +# keep docker packages after installation; speeds up repeated ansible provisioning runs when '1' +# kubespray deletes the docker package on each run, so caching the package makes sense +docker_rpm_keepcache: 0 + +## An obvious use case is allowing insecure-registry access to self hosted registries. +## Can be ipaddress and domain_name. +## example define 172.19.16.11 or mirror.registry.io +# docker_insecure_registries: +# - mirror.registry.io +# - 172.19.16.11 + +## Add other registry,example China registry mirror. +# docker_registry_mirrors: +# - https://registry.docker-cn.com +# - https://mirror.aliyuncs.com + +## If non-empty will override default system MountFlags value. +## This option takes a mount propagation flag: shared, slave +## or private, which control whether mounts in the file system +## namespace set up for docker will receive or propagate mounts +## and unmounts. Leave empty for system default +# docker_mount_flags: + +## A string of extra options to pass to the docker daemon. +## This string should be exactly as you wish it to appear. +# docker_options: "" + +docker_package_version: 18.09 diff --git a/inventory/group_vars/etcd.yml b/inventory/group_vars/etcd.yml new file mode 100644 index 0000000..cbc388e --- /dev/null +++ b/inventory/group_vars/etcd.yml @@ -0,0 +1,22 @@ +--- +## Etcd auto compaction retention for mvcc key value store in hour +# etcd_compaction_retention: 0 + +## Set level of detail for etcd exported metrics, specify 'extensive' to include histogram metrics. +# etcd_metrics: basic + +## Etcd is restricted by default to 512M on systems under 4GB RAM, 512MB is not enough for much more than testing. +## Set this if your etcd nodes have less than 4GB but you want more RAM for etcd. Set to 0 for unrestricted RAM. +# etcd_memory_limit: "512M" + +## Etcd has a default of 2G for its space quota. If you put a value in etcd_memory_limit which is less than +## etcd_quota_backend_bytes, you may encounter out of memory terminations of the etcd cluster. Please check +## etcd documentation for more information. +# etcd_quota_backend_bytes: "2G" + +### ETCD: disable peer client cert authentication. +# This affects ETCD_PEER_CLIENT_CERT_AUTH variable +# etcd_peer_client_auth: true + +## Settings for etcd deployment type +etcd_deployment_type: docker diff --git a/inventory/group_vars/k8s-cluster/addons.yml b/inventory/group_vars/k8s-cluster/addons.yml new file mode 100644 index 0000000..d261e81 --- /dev/null +++ b/inventory/group_vars/k8s-cluster/addons.yml @@ -0,0 +1,116 @@ +--- +# Kubernetes dashboard +# RBAC required. see docs/getting-started.md for access details. +dashboard_enabled: false + +# Helm deployment +helm_enabled: false + +# Registry deployment +registry_enabled: false +# registry_namespace: kube-system +# registry_storage_class: "" +# registry_disk_size: "10Gi" + +# Metrics Server deployment +metrics_server_enabled: false +# metrics_server_kubelet_insecure_tls: true +# metrics_server_metric_resolution: 60s +# metrics_server_kubelet_preferred_address_types: "InternalIP" + +# Rancher Local Path Provisioner +local_path_provisioner_enabled: false +# local_path_provisioner_namespace: "local-path-storage" +# local_path_provisioner_storage_class: "local-path" +# local_path_provisioner_reclaim_policy: Delete +# local_path_provisioner_claim_root: /opt/local-path-provisioner/ +# local_path_provisioner_debug: false +# local_path_provisioner_image_repo: "rancher/local-path-provisioner" +# local_path_provisioner_image_tag: "v0.0.2" +# local_path_provisioner_helper_image_repo: "busybox" +# local_path_provisioner_helper_image_tag: "latest" + +# Local volume provisioner deployment +local_volume_provisioner_enabled: false +# local_volume_provisioner_namespace: kube-system +# local_volume_provisioner_storage_classes: +# local-storage: +# host_dir: /mnt/disks +# mount_dir: /mnt/disks +# volume_mode: Filesystem +# fs_type: ext4 +# fast-disks: +# host_dir: /mnt/fast-disks +# mount_dir: /mnt/fast-disks +# block_cleaner_command: +# - "/scripts/shred.sh" +# - "2" +# volume_mode: Filesystem +# fs_type: ext4 + +# CephFS provisioner deployment +cephfs_provisioner_enabled: false +# cephfs_provisioner_namespace: "cephfs-provisioner" +# cephfs_provisioner_cluster: ceph +# cephfs_provisioner_monitors: "172.24.0.1:6789,172.24.0.2:6789,172.24.0.3:6789" +# cephfs_provisioner_admin_id: admin +# cephfs_provisioner_secret: secret +# cephfs_provisioner_storage_class: cephfs +# cephfs_provisioner_reclaim_policy: Delete +# cephfs_provisioner_claim_root: /volumes +# cephfs_provisioner_deterministic_names: true + +# RBD provisioner deployment +rbd_provisioner_enabled: false +# rbd_provisioner_namespace: rbd-provisioner +# rbd_provisioner_replicas: 2 +# rbd_provisioner_monitors: "172.24.0.1:6789,172.24.0.2:6789,172.24.0.3:6789" +# rbd_provisioner_pool: kube +# rbd_provisioner_admin_id: admin +# rbd_provisioner_secret_name: ceph-secret-admin +# rbd_provisioner_secret: ceph-key-admin +# rbd_provisioner_user_id: kube +# rbd_provisioner_user_secret_name: ceph-secret-user +# rbd_provisioner_user_secret: ceph-key-user +# rbd_provisioner_user_secret_namespace: rbd-provisioner +# rbd_provisioner_fs_type: ext4 +# rbd_provisioner_image_format: "2" +# rbd_provisioner_image_features: layering +# rbd_provisioner_storage_class: rbd +# rbd_provisioner_reclaim_policy: Delete + +# Nginx ingress controller deployment +ingress_nginx_enabled: false +# ingress_nginx_host_network: false +ingress_publish_status_address: "" +# ingress_nginx_nodeselector: +# kubernetes.io/os: "linux" +# ingress_nginx_tolerations: +# - key: "node-role.kubernetes.io/master" +# operator: "Equal" +# value: "" +# effect: "NoSchedule" +# ingress_nginx_namespace: "ingress-nginx" +# ingress_nginx_insecure_port: 80 +# ingress_nginx_secure_port: 443 +# ingress_nginx_configmap: +# map-hash-bucket-size: "128" +# ssl-protocols: "SSLv2" +# ingress_nginx_configmap_tcp_services: +# 9000: "default/example-go:8080" +# ingress_nginx_configmap_udp_services: +# 53: "kube-system/coredns:53" +# ingress_nginx_extra_args: +# - --default-ssl-certificate=default/foo-tls + +# ALB ingress controller deployment +ingress_alb_enabled: false +# alb_ingress_aws_region: "us-east-1" +# alb_ingress_restrict_scheme: "false" +# Enables logging on all outbound requests sent to the AWS API. +# If logging is desired, set to true. +# alb_ingress_aws_debug: "false" + +# Cert manager deployment +cert_manager_enabled: false +# cert_manager_namespace: "cert-manager" diff --git a/inventory/group_vars/k8s-cluster/k8s-cluster.yml b/inventory/group_vars/k8s-cluster/k8s-cluster.yml new file mode 100644 index 0000000..18fc829 --- /dev/null +++ b/inventory/group_vars/k8s-cluster/k8s-cluster.yml @@ -0,0 +1,280 @@ +--- +# Kubernetes configuration dirs and system namespace. +# Those are where all the additional config stuff goes +# the kubernetes normally puts in /srv/kubernetes. +# This puts them in a sane location and namespace. +# Editing those values will almost surely break something. +kube_config_dir: /etc/kubernetes +kube_script_dir: "{{ bin_dir }}/kubernetes-scripts" +kube_manifest_dir: "{{ kube_config_dir }}/manifests" + +# This is where all the cert scripts and certs will be located +kube_cert_dir: "{{ kube_config_dir }}/ssl" + +# This is where all of the bearer tokens will be stored +kube_token_dir: "{{ kube_config_dir }}/tokens" + +# This is where to save basic auth file +kube_users_dir: "{{ kube_config_dir }}/users" + +kube_api_anonymous_auth: true + +## Change this to use another Kubernetes version, e.g. a current beta release +kube_version: v1.17.9 + +# kubernetes image repo define +kube_image_repo: "k8s.gcr.io" + +# Where the binaries will be downloaded. +# Note: ensure that you've enough disk space (about 1G) +local_release_dir: "/tmp/releases" +# Random shifts for retrying failed ops like pushing/downloading +retry_stagger: 5 + +# This is the group that the cert creation scripts chgrp the +# cert files to. Not really changeable... +kube_cert_group: kube-cert + +# Cluster Loglevel configuration +kube_log_level: 2 + +# Directory where credentials will be stored +credentials_dir: "{{ inventory_dir }}/credentials" + +# Users to create for basic auth in Kubernetes API via HTTP +# Optionally add groups for user +kube_api_pwd: "{{ lookup('password', credentials_dir + '/kube_user.creds length=15 chars=ascii_letters,digits') }}" +kube_users: + kube: + pass: "{{kube_api_pwd}}" + role: admin + groups: + - system:masters + +## It is possible to activate / deactivate selected authentication methods (basic auth, static token auth) +# kube_oidc_auth: false +# kube_basic_auth: false +# kube_token_auth: false + + +## Variables for OpenID Connect Configuration https://kubernetes.io/docs/admin/authentication/ +## To use OpenID you have to deploy additional an OpenID Provider (e.g Dex, Keycloak, ...) + +# kube_oidc_url: https:// ... +# kube_oidc_client_id: kubernetes +## Optional settings for OIDC +# kube_oidc_ca_file: "{{ kube_cert_dir }}/ca.pem" +# kube_oidc_username_claim: sub +# kube_oidc_username_prefix: oidc: +# kube_oidc_groups_claim: groups +# kube_oidc_groups_prefix: oidc: + + +# Choose network plugin (cilium, calico, contiv, weave or flannel. Use cni for generic cni plugin) +# Can also be set to 'cloud', which lets the cloud provider setup appropriate routing +kube_network_plugin: calico + +# Setting multi_networking to true will install Multus: https://github.com/intel/multus-cni +kube_network_plugin_multus: false + +# Kubernetes internal network for services, unused block of space. +kube_service_addresses: 10.233.0.0/18 + +# internal network. When used, it will assign IP +# addresses from this range to individual pods. +# This network must be unused in your network infrastructure! +kube_pods_subnet: 10.233.64.0/18 + +# internal network node size allocation (optional). This is the size allocated +# to each node on your network. With these defaults you should have +# room for 4096 nodes with 254 pods per node. +kube_network_node_prefix: 24 + +# The port the API Server will be listening on. +kube_apiserver_ip: "{{ kube_service_addresses|ipaddr('net')|ipaddr(1)|ipaddr('address') }}" +kube_apiserver_port: 6443 # (https) +# kube_apiserver_insecure_port: 8080 # (http) +# Set to 0 to disable insecure port - Requires RBAC in authorization_modes and kube_api_anonymous_auth: true +kube_apiserver_insecure_port: 0 # (disabled) + +# Kube-proxy proxyMode configuration. +# Can be ipvs, iptables +kube_proxy_mode: ipvs + +# configure arp_ignore and arp_announce to avoid answering ARP queries from kube-ipvs0 interface +# must be set to true for MetalLB to work +kube_proxy_strict_arp: true + +# A string slice of values which specify the addresses to use for NodePorts. +# Values may be valid IP blocks (e.g. 1.2.3.0/24, 1.2.3.4/32). +# The default empty string slice ([]) means to use all local addresses. +# kube_proxy_nodeport_addresses_cidr is retained for legacy config +kube_proxy_nodeport_addresses: >- + {%- if kube_proxy_nodeport_addresses_cidr is defined -%} + [{{ kube_proxy_nodeport_addresses_cidr }}] + {%- else -%} + [] + {%- endif -%} + +# If non-empty, will use this string as identification instead of the actual hostname +# kube_override_hostname: >- +# {%- if cloud_provider is defined and cloud_provider in [ 'aws' ] -%} +# {%- else -%} +# {{ inventory_hostname }} +# {%- endif -%} + +## Encrypting Secret Data at Rest (experimental) +kube_encrypt_secret_data: false + +# DNS configuration. +# Kubernetes cluster name, also will be used as DNS domain +cluster_name: cluster.local +# Subdomains of DNS domain to be resolved via /etc/resolv.conf for hostnet pods +ndots: 2 +# Can be coredns, coredns_dual, manual or none +dns_mode: coredns +# Set manual server if using a custom cluster DNS server +# manual_dns_server: 10.x.x.x +# Enable nodelocal dns cache +enable_nodelocaldns: true +nodelocaldns_ip: 169.254.25.10 +nodelocaldns_health_port: 9254 +# nodelocaldns_external_zones: +# - zones: +# - example.com +# - example.io:1053 +# nameservers: +# - 1.1.1.1 +# - 2.2.2.2 +# cache: 5 +# - zones: +# - https://mycompany.local:4453 +# nameservers: +# - 192.168.0.53 +# cache: 0 +# Enable k8s_external plugin for CoreDNS +enable_coredns_k8s_external: false +coredns_k8s_external_zone: k8s_external.local +# Enable endpoint_pod_names option for kubernetes plugin +enable_coredns_k8s_endpoint_pod_names: false + +# Can be docker_dns, host_resolvconf or none +resolvconf_mode: docker_dns +# Deploy netchecker app to verify DNS resolve as an HTTP service +deploy_netchecker: false +# Ip address of the kubernetes skydns service +skydns_server: "{{ kube_service_addresses|ipaddr('net')|ipaddr(3)|ipaddr('address') }}" +skydns_server_secondary: "{{ kube_service_addresses|ipaddr('net')|ipaddr(4)|ipaddr('address') }}" +dns_domain: "{{ cluster_name }}" + +## Container runtime +## docker for docker, crio for cri-o and containerd for containerd. +container_manager: docker + +## Settings for containerd runtimes (only used when container_manager is set to containerd) +# +# Settings for default containerd runtime +# containerd_default_runtime: +# type: io.containerd.runtime.v1.linux +# engine: '' +# root: '' +# +# Settings for additional runtimes for containerd configuration +# containerd_runtimes: +# - name: "" +# type: "" +# engine: "" +# root: "" +# Example for Kata Containers as additional runtime: +# containerd_runtimes: +# - name: kata +# type: io.containerd.kata.v2 +# engine: "" +# root: "" +# +# Settings for untrusted containerd runtime +# containerd_untrusted_runtime_type: '' +# containerd_untrusted_runtime_engine: '' +# containerd_untrusted_runtime_root: '' + +## Settings for containerized control plane (kubelet/secrets) +kubelet_deployment_type: host +helm_deployment_type: host + +# Enable kubeadm experimental control plane +kubeadm_control_plane: false +kubeadm_certificate_key: "{{ lookup('password', credentials_dir + '/kubeadm_certificate_key.creds length=64 chars=hexdigits') | lower }}" + +# K8s image pull policy (imagePullPolicy) +k8s_image_pull_policy: IfNotPresent + +# audit log for kubernetes +kubernetes_audit: false + +# dynamic kubelet configuration +dynamic_kubelet_configuration: false + +# define kubelet config dir for dynamic kubelet +# kubelet_config_dir: +default_kubelet_config_dir: "{{ kube_config_dir }}/dynamic_kubelet_dir" +dynamic_kubelet_configuration_dir: "{{ kubelet_config_dir | default(default_kubelet_config_dir) }}" + +# pod security policy (RBAC must be enabled either by having 'RBAC' in authorization_modes or kubeadm enabled) +podsecuritypolicy_enabled: false + +# Custom PodSecurityPolicySpec for restricted policy +# podsecuritypolicy_restricted_spec: {} + +# Custom PodSecurityPolicySpec for privileged policy +# podsecuritypolicy_privileged_spec: {} + +# Make a copy of kubeconfig on the host that runs Ansible in {{ inventory_dir }}/artifacts +# kubeconfig_localhost: false +# Download kubectl onto the host that runs Ansible in {{ bin_dir }} +# kubectl_localhost: false + +# A comma separated list of levels of node allocatable enforcement to be enforced by kubelet. +# Acceptable options are 'pods', 'system-reserved', 'kube-reserved' and ''. Default is "". +# kubelet_enforce_node_allocatable: pods + +## Optionally reserve resources for OS system daemons. +# system_reserved: true +## Uncomment to override default values +# system_memory_reserved: 512M +# system_cpu_reserved: 500m +## Reservation for master hosts +# system_master_memory_reserved: 256M +# system_master_cpu_reserved: 250m + +# An alternative flexvolume plugin directory +# kubelet_flexvolumes_plugins_dir: /usr/libexec/kubernetes/kubelet-plugins/volume/exec + +## Supplementary addresses that can be added in kubernetes ssl keys. +## That can be useful for example to setup a keepalived virtual IP +# supplementary_addresses_in_ssl_keys: [10.0.0.1, 10.0.0.2, 10.0.0.3] + +## Running on top of openstack vms with cinder enabled may lead to unschedulable pods due to NoVolumeZoneConflict restriction in kube-scheduler. +## See https://github.com/kubernetes-sigs/kubespray/issues/2141 +## Set this variable to true to get rid of this issue +volume_cross_zone_attachment: false +## Add Persistent Volumes Storage Class for corresponding cloud provider (supported: in-tree OpenStack, Cinder CSI, +## AWS EBS CSI, Azure Disk CSI, GCP Persistent Disk CSI) +persistent_volumes_enabled: false + +## Container Engine Acceleration +## Enable container acceleration feature, for example use gpu acceleration in containers +# nvidia_accelerator_enabled: true +## Nvidia GPU driver install. Install will by done by a (init) pod running as a daemonset. +## Important: if you use Ubuntu then you should set in all.yml 'docker_storage_options: -s overlay2' +## Array with nvida_gpu_nodes, leave empty or comment if you don't want to install drivers. +## Labels and taints won't be set to nodes if they are not in the array. +# nvidia_gpu_nodes: +# - kube-gpu-001 +# nvidia_driver_version: "384.111" +## flavor can be tesla or gtx +# nvidia_gpu_flavor: gtx +## NVIDIA driver installer images. Change them if you have trouble accessing gcr.io. +# nvidia_driver_install_centos_container: atzedevries/nvidia-centos-driver-installer:2 +# nvidia_driver_install_ubuntu_container: gcr.io/google-containers/ubuntu-nvidia-driver-installer@sha256:7df76a0f0a17294e86f691c81de6bbb7c04a1b4b3d4ea4e7e2cccdc42e1f6d63 +## NVIDIA GPU device plugin image. +# nvidia_gpu_device_plugin_container: "k8s.gcr.io/nvidia-gpu-device-plugin@sha256:0842734032018be107fa2490c98156992911e3e1f2a21e059ff0105b07dd8e9e" diff --git a/inventory/group_vars/k8s-cluster/k8s-net-calico.yml b/inventory/group_vars/k8s-cluster/k8s-net-calico.yml new file mode 100644 index 0000000..3031fae --- /dev/null +++ b/inventory/group_vars/k8s-cluster/k8s-net-calico.yml @@ -0,0 +1,67 @@ +# see roles/network_plugin/calico/defaults/main.yml + +## With calico it is possible to distributed routes with border routers of the datacenter. +## Warning : enabling router peering will disable calico's default behavior ('node mesh'). +## The subnets of each nodes will be distributed by the datacenter router +# peer_with_router: false + +# Enables Internet connectivity from containers +# nat_outgoing: true + +# add default ippool name +# calico_pool_name: "default-pool" + +# add default ippool blockSize (defaults kube_network_node_prefix) +# calico_pool_blocksize: 24 + +# add default ippool CIDR (must be inside kube_pods_subnet, defaults to kube_pods_subnet otherwise) +# calico_pool_cidr: 1.2.3.4/5 + +# Global as_num (/calico/bgp/v1/global/as_num) +# global_as_num: "64512" + +# You can set MTU value here. If left undefined or empty, it will +# not be specified in calico CNI config, so Calico will use built-in +# defaults. The value should be a number, not a string. +calico_mtu: 1380 + +# Advertise Cluster IPs +# calico_advertise_cluster_ips: true + +# Choose data store type for calico: "etcd" or "kdd" (kubernetes datastore) +# calico_datastore: "etcd" + +# Choose Calico iptables backend: "Legacy", "Auto" or "NFT" +# calico_iptables_backend: "Legacy" + +# Use typha (only with kdd) +# typha_enabled: false + +# Generate TLS certs for secure typha<->calico-node communication +# typha_secure: false + +# Scaling typha: 1 replica per 100 nodes is adequate +# Number of typha replicas +# typha_replicas: 1 + +# Set max typha connections +# typha_max_connections_lower_limit: 300 + +# Set calico network backend: "bird", "vxlan" or "none" +# bird enable BGP routing, required for ipip mode. +# calico_network_backend: bird + +# IP in IP and VXLAN is mutualy exclusive modes. +# set IP in IP encapsulation mode: "Always", "CrossSubnet", "Never" +# calico_ipip_mode: 'Always' + +# set VXLAN encapsulation mode: "Always", "CrossSubnet", "Never" +# calico_vxlan_mode: 'Never' + +# If you want to use non default IP_AUTODETECTION_METHOD for calico node set this option to one of: +# * can-reach=DESTINATION +# * interface=INTERFACE-REGEX +# see https://docs.projectcalico.org/reference/node/configuration +# calico_ip_auto_method: "interface=eth.*" +# Choose the iptables insert mode for Calico: "Insert" or "Append". +# calico_felix_chaininsertmode: Insert diff --git a/inventory/group_vars/k8s-cluster/k8s-net-macvlan.yml b/inventory/group_vars/k8s-cluster/k8s-net-macvlan.yml new file mode 100644 index 0000000..d2534e7 --- /dev/null +++ b/inventory/group_vars/k8s-cluster/k8s-net-macvlan.yml @@ -0,0 +1,6 @@ +--- +# private interface, on a l2-network +macvlan_interface: "eth1" + +# Enable nat in default gateway network interface +enable_nat_default_gateway: true diff --git a/kubespray b/kubespray new file mode 160000 index 0000000..ef3e988 --- /dev/null +++ b/kubespray @@ -0,0 +1 @@ +Subproject commit ef3e98807ebd38471c91ff5734d4523230f81047 diff --git a/requirements.ansible.yml b/requirements.ansible.yml new file mode 100644 index 0000000..17a97d7 --- /dev/null +++ b/requirements.ansible.yml @@ -0,0 +1,7 @@ +roles: + - name: matthiaslohr.hvswitch_k8s + version: v1.0.0 + +collections: + - name: community.kubernetes + version: 0.11.0 diff --git a/requirements.python.txt b/requirements.python.txt new file mode 100644 index 0000000..08f0f87 --- /dev/null +++ b/requirements.python.txt @@ -0,0 +1,2 @@ +ansible==2.9.12 +netaddr==0.8.0 diff --git a/roles/access/tasks/main.yml b/roles/access/tasks/main.yml new file mode 100644 index 0000000..b2a9ac9 --- /dev/null +++ b/roles/access/tasks/main.yml @@ -0,0 +1 @@ +- include: ssh.yml diff --git a/roles/access/tasks/ssh.yml b/roles/access/tasks/ssh.yml new file mode 100644 index 0000000..c29c834 --- /dev/null +++ b/roles/access/tasks/ssh.yml @@ -0,0 +1,6 @@ +- name: Add authorized ssh-keys + authorized_key: + user: "{{ ansible_user }}" + state: present + key: "{{ item }}" + with_items: "{{authorized_keys}}" diff --git a/roles/load_balancing/tasks/main.yml b/roles/load_balancing/tasks/main.yml new file mode 100644 index 0000000..5a00f8a --- /dev/null +++ b/roles/load_balancing/tasks/main.yml @@ -0,0 +1 @@ +- include: metallb.yml diff --git a/roles/load_balancing/tasks/metallb.yml b/roles/load_balancing/tasks/metallb.yml new file mode 100644 index 0000000..53098f6 --- /dev/null +++ b/roles/load_balancing/tasks/metallb.yml @@ -0,0 +1,13 @@ +- name: Setup MetalLB + community.kubernetes.helm: + chart_repo_url: https://charts.bitnami.com/bitnami + chart_ref: metallb + chart_version: 0.1.21 + release_name: metallb + release_namespace: kube-system + release_values: + configInline: + address-pools: + - name: default + protocol: layer2 + addresses: "{{address_range}}" diff --git a/roles/packages/tasks/install.yml b/roles/packages/tasks/install.yml new file mode 100644 index 0000000..4fe87b8 --- /dev/null +++ b/roles/packages/tasks/install.yml @@ -0,0 +1,14 @@ +- name: Install packages + yum: + name: + - vim + - build-essential + - git + - jq + - python3-pip + state: present + +- name: Install python dependencies + pip: + name: + - openshift diff --git a/roles/packages/tasks/main.yml b/roles/packages/tasks/main.yml new file mode 100644 index 0000000..53d8e91 --- /dev/null +++ b/roles/packages/tasks/main.yml @@ -0,0 +1,3 @@ +- include: pin.yml +- include: update.yml +- include: install.yml diff --git a/roles/packages/tasks/pin.yml b/roles/packages/tasks/pin.yml new file mode 100644 index 0000000..f6db701 --- /dev/null +++ b/roles/packages/tasks/pin.yml @@ -0,0 +1,7 @@ +- name: Pin Docker version + copy: + content: "Package: docker-ce\nPin: version {{docker_package_version}}.*\nPin-Priority: 1000\n" + dest: /etc/apt/preferences.d/docker-ce + owner: root + group: root + mode: 0644 diff --git a/roles/packages/tasks/update.yml b/roles/packages/tasks/update.yml new file mode 100644 index 0000000..d516c77 --- /dev/null +++ b/roles/packages/tasks/update.yml @@ -0,0 +1,5 @@ +- name: Upgrade all packages to the latest version + apt: + name: "*" + state: latest + update_cache: yes diff --git a/roles/storage/tasks/ceph.yml b/roles/storage/tasks/ceph.yml new file mode 100644 index 0000000..2d518ff --- /dev/null +++ b/roles/storage/tasks/ceph.yml @@ -0,0 +1,87 @@ +- name: deploy ceph cluster + community.kubernetes.k8s: + definition: + apiVersion: ceph.rook.io/v1 + kind: CephCluster + metadata: + name: rook-ceph + namespace: rook-ceph + spec: + cephVersion: + image: ceph/ceph:v15.2.4 + dataDirHostPath: /var/lib/rook + skipUpgradeChecks: false + continueUpgradeAfterChecksEvenIfNotHealthy: false + mon: + count: 3 + allowMultiplePerNode: false + mgr: + modules: + # Several modules should not need to be included in this list. The "dashboard" and "monitoring" modules + # are already enabled by other settings in the cluster CR and the "rook" module is always enabled. + - name: pg_autoscaler + enabled: true + # enable the ceph dashboard for viewing cluster status + dashboard: + enabled: true + ssl: true + # enable prometheus alerting for cluster + monitoring: + # requires Prometheus to be pre-installed + enabled: false + removeOSDsIfOutAndSafeToRemove: false + storage: + useAllNodes: true + useAllDevices: true + +- name: deploy ceph block pool + community.kubernetes.k8s: + definition: + apiVersion: ceph.rook.io/v1 + kind: CephBlockPool + metadata: + name: replicapool + namespace: rook-ceph + spec: + failureDomain: host + replicated: + size: 3 + +- name: deploy ceph storage class + community.kubernetes.k8s: + definition: + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + name: rook-ceph-block + annotations: + storageclass.kubernetes.io/is-default-class: "true" + # Change "rook-ceph" provisioner prefix to match the operator namespace if needed + provisioner: rook-ceph.rbd.csi.ceph.com + parameters: + # clusterID is the namespace where the rook cluster is running + clusterID: rook-ceph + # Ceph pool into which the RBD image shall be created + pool: replicapool + + # RBD image format. Defaults to "2". + imageFormat: "2" + + # RBD image features. Available for imageFormat: "2". CSI RBD currently supports only `layering` feature. + imageFeatures: layering + + # The secrets contain Ceph admin credentials. + csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph + csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph + csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node + csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph + + # Specify the filesystem type of the volume. If not specified, csi-provisioner + # will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock + # in hyperconverged settings where the volume is mounted on the same node as the osds. + csi.storage.k8s.io/fstype: ext4 + + # Delete the rbd volume when a PVC is deleted + reclaimPolicy: Delete diff --git a/roles/storage/tasks/main.yml b/roles/storage/tasks/main.yml new file mode 100644 index 0000000..37da010 --- /dev/null +++ b/roles/storage/tasks/main.yml @@ -0,0 +1,3 @@ +- include: namespace.yml +- include: operator.yml +- include: ceph.yml diff --git a/roles/storage/tasks/namespace.yml b/roles/storage/tasks/namespace.yml new file mode 100644 index 0000000..6384ce1 --- /dev/null +++ b/roles/storage/tasks/namespace.yml @@ -0,0 +1,7 @@ +- name: create rook-ceph namespace + community.kubernetes.k8s: + definition: + apiVersion: v1 + kind: namespace + metadata: + name: rook-ceph diff --git a/roles/storage/tasks/operator.yml b/roles/storage/tasks/operator.yml new file mode 100644 index 0000000..5f14802 --- /dev/null +++ b/roles/storage/tasks/operator.yml @@ -0,0 +1,7 @@ +- name: install rook-ceph operator + community.kubernetes.helm: + chart_repo_url: https://charts.rook.io/release + chart_ref: rook-ceph + chart_version: v1.4.0 + release_name: rook-ceph + release_namespace: rook-ceph diff --git a/roles/thorchain/tasks/helm.yml b/roles/thorchain/tasks/helm.yml new file mode 100644 index 0000000..9fce39f --- /dev/null +++ b/roles/thorchain/tasks/helm.yml @@ -0,0 +1,5 @@ +- name: Make helm 3 + command: make helm + args: + chdir: node-launcher/ + creates: /usr/local/bin/helm diff --git a/roles/thorchain/tasks/main.yml b/roles/thorchain/tasks/main.yml new file mode 100644 index 0000000..652d2f0 --- /dev/null +++ b/roles/thorchain/tasks/main.yml @@ -0,0 +1,2 @@ +- include: repository.yml +- include: helm.yml diff --git a/roles/thorchain/tasks/repository.yml b/roles/thorchain/tasks/repository.yml new file mode 100644 index 0000000..6eb20af --- /dev/null +++ b/roles/thorchain/tasks/repository.yml @@ -0,0 +1,6 @@ +- name: Add node-launcher repository + git: + repo: https://gitlab.com/thorchain/devops/node-launcher + dest: ~/node-launcher + version: master + update: yes