mirror of
https://git.sr.ht/~magic_rb/website
synced 2024-11-22 08:04:21 +01:00
6019c01875
Signed-off-by: Magic_RB <magic_rb@redalder.org>
488 lines
15 KiB
Org Mode
488 lines
15 KiB
Org Mode
#+TITLE: Scalable ConcourseCI with Nomad and Nix
|
|
#+DATE: <2021-02-14 Sun>
|
|
|
|
In this blog post, I will explain to you how you can deploy ConcourseCI on HashiCorp Nomad with fully automatic and
|
|
Op-free scaling. We will utilize 3 HashiCorp tools, namely Nomad, Vault, and Consul, then PosgresSQL, Nix (not
|
|
necessary, can be replaced) and finally ConcourseCI itself.
|
|
|
|
* Requirements + a functional Nomad installation with Consul and Vault integration
|
|
+ a Nomad cluster with more than 1 node, to actually witness the scaling + under 10GB of space, somewhere around 5 GB, but just be safe and have 10GB
|
|
|
|
* Versions utilized
|
|
+ Consul - v1.9.3 + Nomad - v1.0.3
|
|
+ Vault - v1.6.2 + Linux - 5.11.0
|
|
+ Nix - 2.4pre20201205_a5d85d0 + ConcourseCI - 7.0.0
|
|
+ PostgreSQL - 11.11
|
|
|
|
* Overview
|
|
Our goal is to be able to add a Nomad node to the cluster and have ConcourseCI automatically expand to that node, (We
|
|
can restrict this later with constraints). For this purpose we'll use the ~system~ scheduler, quoting the Nomad docs:
|
|
|
|
#+BEGIN_QUOTE
|
|
The ~system~ scheduler is used to register jobs that should be run on all clients that meet the job's constraints. The
|
|
~system~ scheduler is also invoked when clients join the cluster or transition into the ready state. This means that
|
|
all registered system jobs will be re-evaluated and their tasks will be placed on the newly available nodes if the
|
|
constraints are met.
|
|
#+END_QUOTE
|
|
|
|
A ConcourseCI worker node needs it's own key pair, the best case scenario would be, that we would generate this key
|
|
pair, every time a worker node is brought up and store it in Vault. Fortunately this is possible with a ~pre-start~
|
|
task, and Consul Template. \\
|
|
|
|
That's about it when it comes to the special and interesting bits of this post, so if you already know how to do this,
|
|
or you want to take a stab at solving it yourself, you can stop reading. For those that are still with me, please open
|
|
a terminal and follow along.
|
|
|
|
* Realization
|
|
** Vault setup
|
|
We'll only use the KV store, version 2, as it's the easiest to use and works fine in for this use-case. I've decided
|
|
to structure it like so, but you are free to change it around, the only thing that you need to keep the same, is to
|
|
have a directory with files representing the individual worker nodes, such as =concourse/workers/<worker-hostname>=.
|
|
|
|
*** Structure
|
|
- [[*concourse][concourse]]
|
|
- [[web][web]]
|
|
- [[*db][db]]
|
|
- [[workers][workers]]
|
|
- [[*<worker-hostname>][<worker-hostname>]]
|
|
|
|
**** concourse
|
|
Nothing here, just a folder for the other secrets
|
|
|
|
***** web
|
|
- tsa_host_key - key used for tsa (communication between a web and a worker node)
|
|
- tsa_host_key_pub
|
|
- session_signing key
|
|
- local_user_name - username of the administrator user
|
|
- local_user_pass - password of the administrator user
|
|
|
|
***** db
|
|
- password
|
|
- user
|
|
- database
|
|
- root_user
|
|
- root_password
|
|
|
|
***** workers
|
|
Holds dynamically generated secrets, of all the workers nodes
|
|
|
|
***** <worker-hostname>
|
|
- private_key - the worker's private key
|
|
- public_key - the worker's public key, used for authentication when connecting to a web node
|
|
|
|
*** Policies
|
|
We'll need 3 policies, =concourse-web-policy=, =concourse-worker-policy= and =concourse-db-policy=.
|
|
|
|
#+NAME: concourse-db-policy.hcl
|
|
#+BEGIN_SRC hcl
|
|
path "kv/data/concourse/db" {
|
|
capabilities = ["read"]
|
|
}
|
|
#+END_SRC
|
|
|
|
#+NAME: concourse-web-policy.hcl
|
|
#+BEGIN_SRC hcl
|
|
path "kv/data/concourse/workers/*" {
|
|
capabilities = ["read"]
|
|
}
|
|
|
|
path "kv/metadata/concourse/workers" {
|
|
capabilities = ["list"]
|
|
}
|
|
|
|
path "kv/data/concourse/web" {
|
|
capabilities = ["read"]
|
|
}
|
|
|
|
path "kv/data/concourse/db" {
|
|
capabilities = ["read"]
|
|
}
|
|
#+END_SRC
|
|
|
|
#+NAME: concourse-worker-policy.hcl
|
|
#+BEGIN_SRC hcl
|
|
path "kv/data/concourse/workers/*" {
|
|
capabilities = ["read", "update", "delete"]
|
|
}
|
|
|
|
path "kv/data/concourse/web" {
|
|
capabilities = ["read"]
|
|
}
|
|
#+END_SRC
|
|
|
|
*** Application
|
|
Create the =web= secrets.
|
|
|
|
#+BEGIN_SRC shell-script
|
|
concourse generate-key -t rsa -f ./session_signing_key
|
|
_session_signing_key="$(cat session_signing_key)"
|
|
concourse generate-key -t ssh -f ./tsa_host_key
|
|
_tsa_host_key="$(cat tsa_host_key)"
|
|
_tsa_host_key_pub="$(cat tsa_host_key.pub)"
|
|
#+END_SRC
|
|
|
|
Upload them.
|
|
|
|
#+BEGIN_SRC shell-script
|
|
vault kv put concourse/web \
|
|
session_signing_key="$_session_signing_key" \
|
|
tsa_host_key="$_tsa_host_key" \
|
|
tsa_host_key_pub="$_tsa_host_key_pub" \
|
|
local_user_pass="changeme" \
|
|
local_user_name="changeme"
|
|
#+END_SRC
|
|
|
|
Manually specify and upload the secrets for PostgreSQL.
|
|
|
|
#+BEGIN_SRC shell-script
|
|
vault kv put concourse/db \
|
|
password="changeme" \
|
|
user="changeme" \
|
|
database="changeme" \
|
|
root_user="changeme" \
|
|
root_password="changeme"
|
|
#+END_SRC
|
|
|
|
#+BEGIN_TINY
|
|
The policy file expects the path to be prefixed ~kv/~ if you're using KVv1, with v2 it expects ~kv/data/~. That's
|
|
not the case for the ~vault kv~ subcommand, because it automatically prepends the corrects prefix.
|
|
#+END_TINY
|
|
|
|
** Nomad Web Job
|
|
The basic idea of this job is that we start 2 tasks, one for PostgreSQL and the other one for a ConcourseCI, this
|
|
could be clustered, but it's out of the scope of this post.
|
|
|
|
#+BEGIN_WARNING
|
|
The paths in =read=-like go template calls, must be prefixed with =kv/data= while the =list=-like calls, must be
|
|
prefixed with =kv/metadata= if you're using KVv2. I figured that out by inspecting the =vault kv= subcommand with
|
|
the =-output-curl-string= flag.
|
|
#+END_WARNING
|
|
|
|
#+BEGIN_SRC hcl
|
|
job "concourse-ci-web" {
|
|
datacenters = ["homelab-1"]
|
|
type = "service"
|
|
|
|
group "svc" {
|
|
count = 1
|
|
|
|
network {
|
|
mode ="bridge"
|
|
|
|
port "db" {
|
|
to = "5432"
|
|
}
|
|
port "http" {
|
|
static = "8080"
|
|
to = "8080"
|
|
}
|
|
port "tsa" {
|
|
static = "2222"
|
|
to = "2222"
|
|
}
|
|
}
|
|
|
|
service {
|
|
name = "concourse-web"
|
|
port = "http"
|
|
|
|
check {
|
|
type = "http"
|
|
path = "/"
|
|
interval = "2s"
|
|
timeout = "2s"
|
|
}
|
|
}
|
|
|
|
service {
|
|
name = "concourse-tsa"
|
|
port = "2222"
|
|
}
|
|
|
|
service {
|
|
name = "concourse-db"
|
|
port = "db"
|
|
}
|
|
|
|
task "db" {
|
|
driver = "docker"
|
|
|
|
config {
|
|
image = "magicrb/postgresql@sha256:changeme"
|
|
ports = ["db"]
|
|
|
|
volumes = [
|
|
"secrets/main.sh:/data/scripts/main.sh",
|
|
]
|
|
}
|
|
|
|
vault {
|
|
policies = ["concourse-db-policy"]
|
|
}
|
|
|
|
template {
|
|
data = <<EOF
|
|
{{ with secret "kv/data/concourse/db" }}
|
|
USER={{ .Data.data.root_user }}
|
|
PASSWORD={{ .Data.data.root_password }}
|
|
{{ end }}
|
|
EOF
|
|
destination = "${NOMAD_SECRETS_DIR}/data.env"
|
|
env = true
|
|
}
|
|
|
|
template {
|
|
data = <<EOF
|
|
#!/usr/bin/env bash
|
|
|
|
env
|
|
|
|
{{ with secret "kv/data/concourse/db" }}
|
|
if process_psql -tc "SELECT 1 FROM pg_database WHERE datname = '{{ .Data.data.database }}'" | grep -q 1
|
|
then
|
|
process_psql -c "ALTER USER {{ .Data.data.user }} WITH PASSWORD '{{ .Data.data.password }}'";
|
|
else
|
|
process_psql -c "CREATE DATABASE {{ .Data.data.database }}"
|
|
process_psql -c "CREATE USER {{ .Data.data.user }} WITH ENCRYPTED PASSWORD '{{ .Data.data.password }}'"
|
|
process_psql -c "GRANT ALL PRIVILEGES ON DATABASE {{ .Data.data.database }} TO {{ .Data.data.user }}"
|
|
{{ end }}
|
|
|
|
echo "host all all all md5" >> /data/postgresql/pg_hba.conf
|
|
cat << EOD >> /data/postgresql/postgresql.conf
|
|
listen_addresses = '0.0.0.0'
|
|
password_encryption = md5
|
|
EOD
|
|
fi
|
|
EOF
|
|
destination = "${NOMAD_SECRETS_DIR}/main.sh"
|
|
}
|
|
}
|
|
|
|
task "web" {
|
|
driver = "docker"
|
|
|
|
config {
|
|
image = "concourse/concourse@sha256:changeme"
|
|
command = "web"
|
|
ports = ["http", "tsa"]
|
|
}
|
|
|
|
vault {
|
|
policies = ["concourse-web-policy"]
|
|
}
|
|
|
|
restart {
|
|
attempts = 5
|
|
delay = "15s"
|
|
}
|
|
|
|
template {
|
|
data = <<EOF
|
|
{{ with secret "kv/data/concourse/web" }}
|
|
CONCOURSE_ADD_LOCAL_USER={{ .Data.data.local_user_name }}:{{ .Data.data.local_user_pass }}
|
|
CONCOURSE_MAIN_TEAM_LOCAL_USER={{ .Data.data.local_user_name }}
|
|
{{ end }}
|
|
|
|
CONCOURSE_SESSION_SIGNING_KEY={{ env "NOMAD_SECRETS_DIR" }}/session_signing_key
|
|
CONCOURSE_TSA_HOST_KEY={{ env "NOMAD_SECRETS_DIR" }}/tsa_host_key
|
|
CONCOURSE_TSA_AUTHORIZED_KEYS={{ env "NOMAD_SECRETS_DIR" }}/authorized_worker_keys
|
|
|
|
CONCOURSE_EXTERNAL_URL=http://blowhole.in.redalder.org:8019/
|
|
|
|
CONCOURSE_POSTGRES_HOST=127.0.0.1
|
|
CONCOURSE_POSTGRES_PORT=5432
|
|
{{ with secret "kv/data/concourse/db" }}
|
|
CONCOURSE_POSTGRES_DATABASE={{ .Data.data.database }}
|
|
CONCOURSE_POSTGRES_USER={{ .Data.data.user }}
|
|
CONCOURSE_POSTGRES_PASSWORD={{ .Data.data.password }}
|
|
{{ end }}
|
|
EOF
|
|
destination = "${NOMAD_SECRETS_DIR}/data.env"
|
|
env = true
|
|
}
|
|
|
|
template {
|
|
data = <<EOF
|
|
{{ with secret "kv/data/concourse/web" }}{{ .Data.data.session_signing_key }}{{ end }}
|
|
EOF
|
|
destination = "${NOMAD_SECRETS_DIR}/session_signing_key"
|
|
}
|
|
|
|
template {
|
|
data = <<EOF
|
|
{{ with secret "kv/data/concourse/web" }}{{ .Data.data.tsa_host_key }}{{ end }}
|
|
EOF
|
|
destination = "${NOMAD_SECRETS_DIR}/tsa_host_key"
|
|
}
|
|
|
|
|
|
template {
|
|
data = <<EOF
|
|
{{ range secrets "kv/metadata/concourse/workers/" }}
|
|
{{ with secret (printf "kv/data/concourse/workers/%s" .) }}
|
|
{{ .Data.data.public_key }}
|
|
{{ end }}
|
|
{{ end }}
|
|
EOF
|
|
destination = "${NOMAD_SECRETS_DIR}/authorized_worker_keys"
|
|
change_mode = "signal"
|
|
change_signal = "SIGHUP"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#+END_SRC
|
|
|
|
The interesting bits are the init script for the PostgreSQL container (it's Nix based and written by me), and the last
|
|
template stanza.
|
|
|
|
*** PostgreSQL
|
|
Whatever files you put into =/data/scripts=, will be execute after the initial setup of the database but before the
|
|
real DB process starts. They are a really convenient and flexible to setup databases. In the script you have access
|
|
to =bash=, =postgresql=, and =busybox=. You can find it on [[https://hub.docker.com/r/magicrb/postgresql][Docker Hub.]] \\
|
|
|
|
You may be asking how can we get the script into =/data/scripts=, when the template system of Nomad only allows one
|
|
to output into =/alloc=, =/local=, or =/secrets=. Well, that's what the single volume is for, you can specify the
|
|
source and destination, and if the source is *relative* not absolute, docker will bind mount from a path in the
|
|
container to another path in the container.
|
|
|
|
#+BEGIN_WARNING
|
|
The source path must be *relative*, if it's absolute, Docker will treat it as a host volume!
|
|
#+END_WARNING
|
|
|
|
*** Template Stanza
|
|
The piece of Go template magic you can see here,
|
|
|
|
#+BEGIN_SRC fundamental
|
|
{{ range secrets "kv/metadata/concourse/workers/" }}
|
|
{{ with secret (printf "kv/data/concourse/workers/%s" .) }}
|
|
{{ .Data.data.public_key }}
|
|
{{ end }}
|
|
{{ end }}
|
|
#+END_SRC
|
|
|
|
will iterate through all the entries in =concourse/workers= and execute the inner templating for each, then with
|
|
just fetch each secret and get the public_key. This template will automatically re-execute every so often and by
|
|
setting the =change_mode= to =signal= and the =change_singal= to =SIGHUP=, we tell Nomad to send a =SIGHUP= to PID 1
|
|
in the container. ConcourseCI will reload it's configuration upon receiving a =SIGHUP=, which includes reloading the
|
|
list of authorized keys, neat huh?
|
|
|
|
** Nomad Worker Job
|
|
This job is weirder and more complex, we first use a custom built container, which generates the short lived worker
|
|
keypair and saves it to Vault, at =concourse/workers/<host_hostname>=. You can actually get the host's hostname from
|
|
the =node.unique.name= environment variable, which is not available to the actual code running the in container, but
|
|
only in the =template= stanzas. We therefore save it's content into a real environment variable. After that it's
|
|
quite simple. \\
|
|
|
|
I'll add the simplified script, which generates and saves the generated secrets. The container must run as a
|
|
pre-start task, which is not a sidecar, so that it completes before the main task starts. Template evaluation happens
|
|
at runtime, so the secrets will be properly resolved.
|
|
|
|
#+BEGIN_SRC shell-script
|
|
concourse generate-key -t ssh -f /worker_key
|
|
|
|
_worker_key="$(cat /worker_key)"
|
|
_worker_key_pub="$(cat /worker_key.pub)"
|
|
echo -e "${_worker_key//$'\n'/\\\\n}" > /worker_key
|
|
echo -e "${_worker_key_pub//$'\n'/\\\\n}" > /worker_key.pub
|
|
|
|
|
|
JSON_FMT='{"public_key":"%s","private_key":"%s"}'
|
|
printf "$JSON_FMT" "$(< /worker_key.pub)" "$(< /worker_key)" > secret.json
|
|
|
|
vault kv put kv/concourse/workers/blowhole @secret.json
|
|
#+END_SRC
|
|
|
|
The Bash substitutions are there only to avoid depending on another program like =sed=, which could do it too, and it
|
|
would be readable. I also opted for using JSON file, because I was worried I might hit the maximum argument length. \
|
|
|
|
One thing to note is that I haven't yet figured out a way to dynamically get the address of one of the available
|
|
Vault instances. So for now, it's okay to hardcode it in.
|
|
|
|
#+BEGIN_SRC hcl
|
|
job "concourse-ci-worker" {
|
|
datacenters = ["homelab-1"]
|
|
type = "system"
|
|
|
|
group "svc" {
|
|
count = 1
|
|
|
|
network {
|
|
mode = "bridge"
|
|
}
|
|
|
|
task "create-secret" {
|
|
driver = "docker"
|
|
|
|
config {
|
|
image = "useyourown"
|
|
}
|
|
|
|
vault {
|
|
policies = ["concourse-worker-policy"]
|
|
}
|
|
|
|
lifecycle {
|
|
sidecar = false
|
|
hook = "prestart"
|
|
}
|
|
|
|
template {
|
|
data = <<EOF
|
|
HOST_HOSTNAME="{{ env "node.unique.name" }}"
|
|
VAULT_ADDR="https://example.com:8200/"
|
|
EOF
|
|
env = true
|
|
destination = "${NOMAD_TASK_DIR}/data.env"
|
|
}
|
|
}
|
|
|
|
task "worker" {
|
|
driver = "docker"
|
|
|
|
config {
|
|
image = "concourse/concourse@sha256:changeme"
|
|
command = "worker"
|
|
privileged = true
|
|
}
|
|
|
|
vault {
|
|
policies = ["concourse-worker-policy"]
|
|
}
|
|
|
|
template {
|
|
data = <<EOF
|
|
CONCOURSE_WORK_DIR=/opt/concourse/worker
|
|
CONCOURSE_TSA_HOST=example.com:2222
|
|
CONCOURSE_TSA_PUBLIC_KEY={{ env "NOMAD_SECRETS_DIR" }}/tsa_host_key.pub
|
|
CONCOURSE_TSA_WORKER_PRIVATE_KEY={{ env "NOMAD_SECRETS_DIR" }}/worker.key
|
|
EOF
|
|
env = true
|
|
destination = "${NOMAD_SECRETS_DIR}/data.env"
|
|
}
|
|
|
|
template {
|
|
data = <<EOF
|
|
{{ with secret (printf "kv/data/concourse/workers/%s" (env "node.unique.name") ) }}
|
|
{{ .Data.data.private_key }}
|
|
{{ end }}
|
|
EOF
|
|
destination = "${NOMAD_SECRETS_DIR}/worker.key"
|
|
}
|
|
|
|
template {
|
|
data = <<EOF
|
|
{{ with secret "kv/data/concourse/web" }}{{ .Data.data.tsa_host_key_pub }}{{ end }}
|
|
EOF
|
|
destination = "${NOMAD_SECRETS_DIR}/tsa_host_key.pub"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#+END_SRC
|