alpine-overlay/kubezero/zdt-base/common.sh

514 lines
14 KiB
Bash

# We built on top of tiny-cloud
. /lib/tiny-cloud/common
. /usr/lib/cloudbender/cloud/"$CLOUD".sh
# boolean flags
is_enabled() {
local flag=$(echo "$1" | tr '[:upper:]' '[:lower:]')
[ "$flag" == 1 -o "$flag" == "true" ] && return 0
[ "$flag" == 0 -o "$flag" == "false" -o "$flag" == "none" -o -z "$flag" ] && return 1
log -t user-data warn "Unknown value for boolean option: $flag - assuming False"
return 1
}
# setup_instance, various OS tweaks impossible to do via AMI baking
setup_instance() {
# create machine-id to emulate systemd
[ -f /etc/machine-id ] || uuidgen > /etc/machine-id
# add and mount bpf file system
add_once /etc/fstab "bpffs /sys/fs/bpf bpf rw,nosuid,nodev,noexec,relatime,mode=700 0 0"
mount -a
# Ensure certain mounts are shared to run containers later, eg. cilium, falco
mount --make-shared /sys/fs/cgroup
mount --make-shared /sys/fs/bpf
mount --make-shared /sys
add_once /etc/hosts "${IP_ADDRESS} ${_META_HOSTNAME} ${HOSTNAME}"
# workaround for dhcpcd / openresolv to omit search domain if equal to domain breaking DNS resolution of shortnames for eg. etcd and kube-apiserver
add_once /etc/resolv.conf "search $DOMAIN_NAME"
case "$CLOUD" in
aws)
# Set system wide default region for boto3
echo "export AWS_DEFAULT_REGION=$REGION" > /etc/profile.d/aws.sh
setup_sns_alarms
;;
*)
ewarn "Unsupported Cloud: $CLOUD"
# return 1
;;
esac
}
################
# IAM SSH KEYS #
################
configure_sshd() {
# Change Listen port
local port=${SSHPORT:-"22"}
[ -w /etc/ssh/sshd_config ] && sed -i -e 's/^[\s#]*Port\s.*/Port '$port'/' /etc/ssh/sshd_config
case "$CLOUD" in
aws)
# on AWS call IAM for allowed groups and actual keys
local group=${SSHKEYIAMGROUP:-""}
local role=${SSHKEYIAMROLE:-"arn:aws:iam::000000000000:role/Undefined"}
[ $role == "arn:aws:iam::000000000000:role/Undefined" ] && role=""
if [ -n "$group" ]; then
# Configure SSHD
sed -i -e 's,^[\s#]*AuthorizedKeysCommand\s.*,AuthorizedKeysCommand /usr/sbin/get_iam_sshkeys.py --user %u --group '$group' --iamRole "'$role'",' /etc/ssh/sshd_config
sed -i -e 's,^[\s#]*AuthorizedKeysCommandUser\s.*,AuthorizedKeysCommandUser nobody,' /etc/ssh/sshd_config
einfo "added $group to SSH admin keys"
fi
;;
nocloud)
return 0
;;
*)
ewarn "Unsupported Cloud: $CLOUD"
# return 1
;;
esac
}
# Persist host keys
# has to run before sshd starts up first time !
persistent_sshd_hostkeys() {
# Top level is artifact to be able to limit the SSM IAM permissions
local ssm_path=$1
local key_types="ecdsa ed25519 rsa"
# try to get none existing host keys from SSM
RET=0
for key in $key_types; do
if [ ! -f /etc/ssh/ssh_host_${key}_key.pub -a ! -f /etc/ssh/ssh_host_${key}_key ]; then
(aws ssm get-parameters --names "${ssm_path}/host_${key}.tgz" --with-decryption --query 'Parameters[0].Value' | base64 -d | tar xzf - --directory=/ 1>/dev/null 2>&1) \
&& log -t user-data info "Restored ssh_host_${key}_key from SSM" || RET=1
fi
done
# Update keys if any key couldn't be restored from SSM
if [ $RET -eq 1 ]; then
# generate any missing keys
ssh-keygen -A
for key in $key_types; do
if [ -r /etc/ssh/ssh_host_${key}_key -a -r /etc/ssh/ssh_host_${key}_key.pub ]; then
(aws ssm put-parameter --name "${ssm_path}/host_${key}.tgz" --type SecureString --value \
"$(tar czf - /etc/ssh/ssh_host_${key}_key /etc/ssh/ssh_host_${key}_key.pub | base64)" --overwrite) \
&& log -t user-data info "Uploaded ssh_host_${key}_key to SSM"
fi
done
fi
}
# either plain custom hostname or
# - `unique:<format_string>` eg. `uniq:kube-worker-{:02}` -> kube-worker-01
# - `myownip: <prefix>` eg. `myip: nodegroup-` -> nodegroup-1.2.3.4
set_hostname() {
local custom_hostname=$(echo $1 | awk -F. '{ print $1 }')
if [ -n "$custom_hostname" ]; then
if [[ "$custom_hostname" == unique:* ]]; then
new_hostname=$(uniq_hostname.py $AWS_EC2LAUNCHTEMPLATE_ID $INSTANCE_ID ${custom_hostname##unique:})
elif [[ "$custom_hostname" == myownip:* ]]; then
local _ip=$(echo $IP_ADDRESS | sed -e 's/\./-/g')
new_hostname=$(echo "${custom_hostname##myownip:}$_ip")
else
new_hostname=$custom_hostname
fi
FQDN="${new_hostname}.${DOMAIN_NAME}"
echo ${new_hostname} > /etc/hostname
hostname $new_hostname
export HOSTNAME=$new_hostname
# add new hostname to hosts
add_once /etc/hosts "${IP_ADDRESS} ${FQDN} ${new_hostname}"
log -t user-data info "Hostname updated to ${new_hostname}."
# hup syslog to update loghost macro
/etc/init.d/syslog-ng reload
# update Route53 entry for VPC internal FQDN
route53.py --fqdn $FQDN --record $IP_ADDRESS
# update our Name Tag to FQDN or PrivateDNSName to allow easy indentification in the AWS UI
aws ec2 create-tags --resources $INSTANCE_ID --tags Key=Name,Value=$FQDN
else
aws ec2 create-tags --resources $INSTANCE_ID --tags Key=Name,Value=${HOSTNAME}.${REGION}.compute.internal
fi
}
_parse_volume() {
# Todo: proper checks once all is yaml
# For now just replace ':'
echo $1 | sed -e 's/:/ /g'
}
# mount optional remote volumes
mount_volumes() {
local volumes="$1"
for vol in $volumes; do
# Todo: check volume type and call matching func
read volType volId volDevice volPath < <(_parse_volume $vol)
[ "$volType" != "ebs" ] && { echo "Unknown volume type $volType"; break; }
attach_ebs $volId $volDevice
rc=$?
[ $rc -ne 0 ] && { ewarn "error trying to attach $volId"; break; }
# wait for the block device to become available
while true; do
mdev -s
test -b $volDevice && break
sleep 1
done
# check volume for existing filesystem
type=$(file -Lbs $volDevice)
if [[ "$type" =~ "XFS filesystem" ]]; then
xfs_repair $volDevice >/dev/null 2>&1
else
mkfs.xfs -qf $volDevice >/dev/null
fi
# mount
mkdir -p $volPath
mount -t xfs -o noatime $volDevice $volPath
einfo "mounting $volDevice at $volPath"
done
}
unmount_volumes() {
local volumes="$1"
for vol in $volumes; do
read volType volId volDevice volPath < <(_parse_volume $vol)
umount $volPath && aws ec2 detach-volume --volume-id "$volId" --instance-id $INSTANCE_ID --region $REGION > /dev/null
done
}
# msg used for sns event, last one wins
msg() { MSG="$@"; log -t user-data info "$@"; }
# Generic retry command wrapper, incl. timeout of 30s
# $1 = number of tries; 0 = forever
# $2 = number of seconds to sleep between tries
# $@ actual command
retry() {
local tries=$1
local waitfor=$2
shift 2
while true; do
# Only use timeout of $1 is an executable, call directly if function
type -tf $1 >/dev/null && { timeout 30 $@ && return; } || { $@ && return; }
((tries=tries-1)) || true
[ $tries -eq 0 ] && return 1
sleep $waitfor
done
}
add_swap() {
[ -f /.swapfile ] || { dd if=/dev/zero of=/.swapfile bs=1M count=$1 && chmod 600 /.swapfile && mkswap /.swapfile && swapon /.swapfile; }
grep -q "/.swapfile" /etc/fstab || echo "/.swapfile none swap sw 0 0" >> /etc/fstab
sysctl -w vm.swappiness=10
}
# Get SSM secure string base64 decoded
# $0 SSM_PATH, value to stdout
get_secret() {
aws ssm get-parameter --name ${1,,} --with-decryption --query 'Parameter.Value' | base64 -d
}
# Store values as base64 on SSM
# $0 SSM_PATH VALUE
put_secret() {
aws ssm put-parameter --name ${1,,} --type SecureString --value "$(echo "$2" | base64 -w0)" --overwrite
}
# Gets existing passphrase or creates new passphrase and stores it
init_passphrase() {
local _URL=$1
local _PPFILE=$2
# If secret already exists noop
[ -f $_PPFILE ] && return 0
get_secret $_URL > $_PPFILE && chmod 600 $_PPFILE || \
{ xxd -l16 -p /dev/random > $_PPFILE; chmod 600 $_PPFILE; put_secret $_URL "$(cat $_PPFILE)"; }
}
# upload various useful logs to s3 if configured
upload_debug_logs(){
[ -z $ZDT_CLOUDBENDER_DEBUG_REMOTELOGS ] && return 0
local s3Url="$ZDT_CLOUDBENDER_DEBUG_REMOTELOGS/$INSTANCE_ID/$(date +'%Y%m%d-%H%M%Z')"
local _tmp=$(mktemp -d)
ps -ef > ${_tmp}/process.list
cp /var/log/messages \
/var/log/rc.log \
/var/log/user-data.log \
/etc/cloudbender.conf \
/var/lib/cloud/meta-data \
/var/log/kubelet/kubelet.log \
/var/log/crio/crio.log \
$_tmp
tar cfz /tmp/debuglogs.tgz -C $_tmp .
aws s3 cp /tmp/debuglogs.tgz $s3Url/debuglogs.tgz
return 0
}
exit_trap() {
set +e
trap - ERR EXIT
local ERR_CODE=$1
local ERR_LINE="$2"
local ERR_FUNC="$3"
local ERR_CMD="$4"
if [ $ERR_CODE -ne 0 ]; then
CFN_STATUS="FAILURE"
RESULT="ABANDON"
else
CFN_STATUS="SUCCESS"
RESULT="CONTINUE"
fi
# Add SNS events on demand
if [ -n "$ALARMSNSARN" ]; then
if [ $ERR_CODE -ne 0 ]; then
LEVEL="Error"
SUBJECT="Error during cloud-init."
if [ $ERR_LINE -ne 1 ]; then
MSG="$ERR_CMD failed in $ERR_FUNC at $ERR_LINE. Return: $ERR_CODE"
ATTACHMENT="$(pr -tn $0 | tail -n+$((ERR_LINE - 3)) | head -n7)"
else
MSG="$ERR_CMD"
fi
if [ -n "$ZDT_CLOUDBENDER_DEBUG" ]; then
SUBJECT="$SUBJECT Instance kept running for debug."
else
SUBJECT="$SUBJECT Instance terminated by ASG lifecycle hook."
fi
else
LEVEL="Info"
SUBJECT="ZDT Alpine Instance launched."
fi
if [ -z "${DISABLECLOUDBENDERSNSSCALINGEVENTS}" ] || [ "$LEVEL" != "Info" ]; then
/var/lib/cloud/sns_alarm.sh "$SUBJECT" "$MSG" "$LEVEL" "$ATTACHMENT"
fi
# Disable scaling events during shutdown
[ -n "${DISABLECLOUDBENDERSNSSCALINGEVENTS}" ] && echo "DISABLE_SCALING_EVENTS=1" >> /etc/cloudbender.conf
fi
[ -n "$LAUNCH_HOOK" ] && aws autoscaling complete-lifecycle-action --lifecycle-action-result $RESULT --instance-id $INSTANCE_ID --lifecycle-hook-name $LAUNCH_HOOK --auto-scaling-group-name ${AWS_AUTOSCALING_GROUPNAME} || true
if [ -n "${AWS_CLOUDFORMATION_LOGICAL_ID}" ]; then
aws cloudformation signal-resource --stack-name ${AWS_CLOUDFORMATION_STACK_NAME} --logical-resource-id ${AWS_CLOUDFORMATION_LOGICAL_ID} --unique-id ${INSTANCE_ID} --status ${CFN_STATUS}
fi
# timestamp being done
end_uptime=$(awk '{print $1}' < /proc/uptime)
log -t user-data info "Exiting user-data. $end_uptime seconds after boot. Duration: $(echo "$end_uptime-$start_uptime" | bc)"
# if we ran into error, either upload debug files or poweroff
if [ $ERR_CODE -ne 0 ]; then
is_enabled $ZDT_CLOUDBENDER_DEBUG && upload_debug_logs || poweroff
fi
exit 0
}
### S3 based locking
MUTEX=mutex
MUTEX_OWNER=$HOSTNAME
MUTEX_TIMEOUT=600
release_lock() {
local S3LOCK=$1
rm -f $MUTEX
aws s3 rm $S3LOCK
}
# Lock not timed out and we own it: 0
# Lock not timed out and someone else owns it: 1
# Lock timed out: 2
verify_lock() {
local S3LOCK=$1
aws s3 cp $S3LOCK $MUTEX
_host=$(grep "MUTEX_OWNER=" $MUTEX | sed -e 's/MUTEX_OWNER=//')
_time=$(grep "MUTEX_TIME=" $MUTEX | sed -e 's/MUTEX_TIME=//')
# Check for timestamp and timeout
let timepassed=$(date -u +%s)-$_time
[ $timepassed -gt $MUTEX_TIMEOUT ] && return 2
[ "$_host" == "$MUTEX_OWNER" ] && return 0
return 1
}
aquire_lock() {
local S3LOCK=$1
echo "MUTEX_OWNER=${MUTEX_OWNER}" > $MUTEX
echo "MUTEX_TIME=$(date -u +%s)" >> $MUTEX
aws s3 cp $MUTEX $S3LOCK
# verify we actually got the lock
sleep 2
verify_lock $S3LOCK
}
check_lock() {
local S3LOCK=$1
aws s3 ls $S3LOCK && rc=$? || rc=$?
# No LOCK ? -> get it !
if [ $rc -ne 0 ]; then
aquire_lock $S3LOCK
else
verify_lock $S3LOCK && rc=$? || rc=$?
# Lock timeout -> we get it
if [ $rc -eq 2 ]; then
aquire_lock $S3LOCK
# we already own it
elif [ $rc -eq 0 ]; then
return 0
# someone else has a valid lock
else
return 1
fi
fi
}
# All things networking
enable_ip_forwarding() {
modprobe nf_conntrack
cat <<EOF > /etc/sysctl.d/40-ip-forward.conf
net.ipv4.ip_forward=1
net.ipv4.ip_local_port_range = 1024 65535
net.ipv4.conf.all.send_redirects=0
net.ipv4.conf.all.rp_filter = 1
net.ipv4.conf.all.accept_redirects = 0
net.ipv6.conf.default.forwarding = 1
net.ipv6.conf.all.forwarding = 1
net.netfilter.nf_conntrack_max = 524288
EOF
sysctl -p /etc/sysctl.d/40-ip-forward.conf
}
enable_iptables() {
rc-update add iptables
/etc/init.d/iptables save
}
register_service_dns() {
if [ -n "$DNSZONE" -a -n "$SERVICENAME" ]; then
_IP=${PUBLIC_IP_ADDRESS:-$IP_ADDRESS}
[ -n "SERVICEPRIVATE" ] && _IP=$IP_ADDRESS
route53.py --fqdn "${SERVICENAME}.${DNSZONE}" --record $_IP
# Register shutdown hook to remove DNS entry on terminate
cat <<EOF >> /etc/local.d/route53.stop
echo "Deleting Route53 record for ${SERVICENAME}.${DNSZONE}" >> /tmp/shutdown.log
route53.py --delete --fqdn "${SERVICENAME}.${DNSZONE}" --record ${PUBLIC_IP_ADDRESS:-$IP_ADDRESS}
EOF
chmod +x /etc/local.d/route53.stop
# Short cut our public IP to private one to allow talking to our own service name
add_once /etc/hosts "${IP_ADDRESS} ${SERVICENAME}.${DNSZONE}"
log -t user-data info "Registered $_IP with ${SERVICENAME}.$DNSZONE"
fi
}
setup_prometheus() {
rc-update add node-exporter default
rc-service node-exporter start
log -t user-data info "Enabled and started Prometheus node-exporter"
}
setup_fluentbit() {
local key="cloudbender"
local host="${1:-fluentd}"
if [[ "$host" =~ "@" ]]; then
key=${host%%@*}
host=${host##*@}
fi
# add some AWS metadata
cat <<EOF > /etc/fluent-bit/metadata.conf
[FILTER]
Name record_modifier
Match *
Record source.ip $IP_ADDRESS
Record source.instance_id $INSTANCE_ID
Record source.region $REGION
Record source.account $AWS_ACCOUNT_ID
Record source.conglomerate $CONGLOMERATE
Record source.artifact $ARTIFACT
EOF
# Configure output
cat <<EOF > /etc/fluent-bit/output.conf
[OUTPUT]
Match *
Name forward
Host $host
Port 24224
Shared_Key $key
tls on
Send_options true
Require_ack_response true
EOF
LOG_FILES=$LOGGING_FILES
## TODO:
# Add parameter parsing for custom logfile tailing
rc-update add fluent-bit default
rc-service fluent-bit start
log -t user-data info "Enabled and started fluent-bit logging agent sending logs to $host"
}