alpine-overlay/kubezero/zdt-base/common.sh

# We built on top of tiny-cloud
. /lib/tiny-cloud/common
. /usr/lib/cloudbender/cloud/"$CLOUD".sh

# boolean flags
is_enabled() {
  local flag=$(echo "$1" | tr '[:upper:]' '[:lower:]')

  [ "$flag" == 1 -o "$flag" == "true" ] && return 0
  [ "$flag" == 0 -o "$flag" == "false" -o "$flag" == "none" -o -z "$flag" ] && return 1

  log -t user-data warn "Unknown value for boolean option: $flag - assuming False"
  return 1
}


# setup_instance, various OS tweaks impossible to do via AMI baking
setup_instance() {
    # create machine-id to emulate systemd
  [ -f /etc/machine-id ] || uuidgen > /etc/machine-id

  # add and mount bpf file system
  add_once /etc/fstab "bpffs       /sys/fs/bpf    bpf      rw,nosuid,nodev,noexec,relatime,mode=700 0 0"
  mount -a

  # Ensure certain mounts are shared to run containers later, eg. cilium, falco
  mount --make-shared /sys/fs/cgroup
  mount --make-shared /sys/fs/bpf
  mount --make-shared /sys

  add_once /etc/hosts "${IP_ADDRESS} ${_META_HOSTNAME} ${HOSTNAME}"

  # workaround for dhcpcd / openresolv to omit search domain if equal to domain breaking DNS resolution of shortnames for eg. etcd and kube-apiserver
  add_once /etc/resolv.conf "search $DOMAIN_NAME"

  case "$CLOUD" in
    aws)
      # Set system wide default region for boto3
      echo "export AWS_DEFAULT_REGION=$REGION" > /etc/profile.d/aws.sh

      setup_sns_alarms
      ;;
    *)
      ewarn "Unsupported Cloud: $CLOUD"
      # return 1
      ;;
  esac
}

################
# IAM SSH KEYS #
################
configure_sshd() {
  # Change Listen port
  local port=${SSHPORT:-"22"}
  [ -w /etc/ssh/sshd_config ] && sed -i -e 's/^[\s#]*Port\s.*/Port '$port'/' /etc/ssh/sshd_config

  case "$CLOUD" in
    aws)
      # on AWS call IAM for allowed groups and actual keys
      local group=${SSHKEYIAMGROUP:-""}
      local role=${SSHKEYIAMROLE:-"arn:aws:iam::000000000000:role/Undefined"}
      [ $role == "arn:aws:iam::000000000000:role/Undefined" ] && role=""

      if [ -n "$group" ]; then
        # Configure SSHD
        sed -i -e 's,^[\s#]*AuthorizedKeysCommand\s.*,AuthorizedKeysCommand /usr/sbin/get_iam_sshkeys.py --user %u --group '$group' --iamRole "'$role'",' /etc/ssh/sshd_config
        sed -i -e 's,^[\s#]*AuthorizedKeysCommandUser\s.*,AuthorizedKeysCommandUser nobody,' /etc/ssh/sshd_config

        einfo "added $group to SSH admin keys"
      fi
      ;;
    nocloud)
      return 0
      ;;
    *)
      ewarn "Unsupported Cloud: $CLOUD"
      # return 1
      ;;
  esac
}


# Persist host keys
# has to run before sshd starts up first time !
persistent_sshd_hostkeys() {
  # Top level is artifact to be able to limit the SSM IAM permissions
  local ssm_path=$1
  local key_types="ecdsa ed25519 rsa"

  # try to get none existing host keys from SSM
  RET=0
  for key in $key_types; do
    if [ ! -f /etc/ssh/ssh_host_${key}_key.pub -a ! -f /etc/ssh/ssh_host_${key}_key ]; then
    (aws ssm get-parameters --names "${ssm_path}/host_${key}.tgz" --with-decryption --query 'Parameters[0].Value' | base64 -d | tar xzf - --directory=/ 1>/dev/null 2>&1) \
      && log -t user-data info "Restored ssh_host_${key}_key from SSM" || RET=1
    fi
  done

  # Update keys if any key couldn't be restored from SSM
  if [ $RET -eq 1 ]; then
    # generate any missing keys
    ssh-keygen -A

    for key in $key_types; do
      if [ -r /etc/ssh/ssh_host_${key}_key -a -r /etc/ssh/ssh_host_${key}_key.pub ]; then
        (aws ssm put-parameter --name "${ssm_path}/host_${key}.tgz" --type SecureString --value \
            "$(tar czf - /etc/ssh/ssh_host_${key}_key /etc/ssh/ssh_host_${key}_key.pub | base64)" --overwrite) \
          && log -t user-data info "Uploaded ssh_host_${key}_key to SSM"
      fi
    done
  fi
}

# either plain custom hostname or
# - `unique:<format_string>` eg. `uniq:kube-worker-{:02}` -> kube-worker-01
# - `myownip: <prefix>` eg. `myip: nodegroup-` -> nodegroup-1.2.3.4
set_hostname() {
  local custom_hostname=$(echo $1 | awk -F. '{ print $1 }')

  if [ -n "$custom_hostname" ]; then
    if [[ "$custom_hostname" == unique:* ]]; then
      new_hostname=$(uniq_hostname.py $AWS_EC2LAUNCHTEMPLATE_ID $INSTANCE_ID ${custom_hostname##unique:})

    elif [[ "$custom_hostname" == myownip:* ]]; then
      local _ip=$(echo $IP_ADDRESS | sed -e 's/\./-/g')
      new_hostname=$(echo "${custom_hostname##myownip:}$_ip")

    else
      new_hostname=$custom_hostname
    fi

    FQDN="${new_hostname}.${DOMAIN_NAME}"
    echo ${new_hostname} > /etc/hostname

    hostname $new_hostname
    export HOSTNAME=$new_hostname

    # add new hostname to hosts
    add_once /etc/hosts "${IP_ADDRESS} ${FQDN} ${new_hostname}"

    log -t user-data info "Hostname updated to ${new_hostname}."

    # hup syslog to update loghost macro
    /etc/init.d/syslog-ng reload

    # update Route53 entry for VPC internal FQDN
    route53.py --fqdn $FQDN --record $IP_ADDRESS

    # update our Name Tag to FQDN or PrivateDNSName to allow easy indentification in the AWS UI
    aws ec2 create-tags --resources $INSTANCE_ID --tags Key=Name,Value=$FQDN
  else
    aws ec2 create-tags --resources $INSTANCE_ID --tags Key=Name,Value=${HOSTNAME}.${REGION}.compute.internal
  fi
}


_parse_volume() {
  # Todo: proper checks once all is yaml
  # For now just replace ':'
  echo $1 | sed -e 's/:/ /g'
}

# mount optional remote volumes
mount_volumes() {
  local volumes="$1"

  for vol in $volumes; do
    # Todo: check volume type and call matching func
    read volType volId volDevice volPath < <(_parse_volume $vol)

    [ "$volType" != "ebs" ] && { echo "Unknown volume type $volType"; break; }
    attach_ebs $volId $volDevice
    rc=$?
    [ $rc -ne 0 ] && { ewarn "error trying to attach $volId"; break; }

    # wait for the block device to become available
    while true; do
      mdev -s
      test -b $volDevice && break
      sleep 1
    done

    # check volume for existing filesystem
    type=$(file -Lbs $volDevice)
    if [[ "$type" =~ "XFS filesystem" ]]; then
      xfs_repair $volDevice >/dev/null 2>&1
    else
      mkfs.xfs -qf $volDevice >/dev/null
    fi

    # mount
    mkdir -p $volPath
    mount -t xfs -o noatime $volDevice $volPath

    einfo "mounting $volDevice at $volPath"
  done
}

unmount_volumes() {
  local volumes="$1"

  for vol in $volumes; do
    read volType volId volDevice volPath < <(_parse_volume $vol)

    umount $volPath && aws ec2 detach-volume --volume-id "$volId" --instance-id $INSTANCE_ID --region $REGION > /dev/null
  done
}

# msg used for sns event, last one wins
msg() { MSG="$@"; log -t user-data info "$@"; }

# Generic retry command wrapper, incl. timeout of 30s
# $1 = number of tries; 0 = forever
# $2 = number of seconds to sleep between tries
# $@ actual command
retry() {
  local tries=$1
  local waitfor=$2
  shift 2
  while true; do
    # Only use timeout of $1 is an executable, call directly if function
    type -tf $1 >/dev/null && { timeout 30 $@ && return; } || { $@ && return; }
    ((tries=tries-1)) || true
    [ $tries -eq 0 ] && return 1
    sleep $waitfor
  done
}

add_swap() {
  [ -f /.swapfile ] || { dd if=/dev/zero of=/.swapfile bs=1M count=$1 && chmod 600 /.swapfile && mkswap /.swapfile && swapon /.swapfile; }
  grep -q "/.swapfile" /etc/fstab || echo "/.swapfile none swap sw 0 0" >> /etc/fstab
  sysctl -w vm.swappiness=10
}

# Get SSM secure string base64 decoded
# $0 SSM_PATH, value to stdout
get_secret() {
  aws ssm get-parameter --name ${1,,} --with-decryption --query 'Parameter.Value' | base64 -d
}

# Store values as base64 on SSM
# $0 SSM_PATH VALUE
put_secret() {
  aws ssm put-parameter --name ${1,,} --type SecureString --value "$(echo "$2" | base64 -w0)" --overwrite
}

# Gets existing passphrase or creates new passphrase and stores it
init_passphrase() {
  local _URL=$1
  local _PPFILE=$2

  # If secret already exists noop
  [ -f $_PPFILE ] && return 0

  get_secret $_URL > $_PPFILE && chmod 600 $_PPFILE || \
    { xxd -l16 -p /dev/random > $_PPFILE; chmod 600 $_PPFILE; put_secret $_URL "$(cat $_PPFILE)"; }
}

# upload various useful logs to s3 if configured
upload_debug_logs(){
  [ -z $ZDT_CLOUDBENDER_DEBUG_REMOTELOGS ] && return 0

  local s3Url="$ZDT_CLOUDBENDER_DEBUG_REMOTELOGS/$INSTANCE_ID/$(date +'%Y%m%d-%H%M%Z')"
  local _tmp=$(mktemp -d)

  ps -ef > ${_tmp}/process.list
  cp /var/log/messages \
     /var/log/rc.log \
     /var/log/user-data.log \
     /etc/cloudbender.conf \
     /var/lib/cloud/meta-data \
     /var/log/kubelet/kubelet.log \
     /var/log/crio/crio.log \
     $_tmp

  tar cfz /tmp/debuglogs.tgz -C $_tmp .
  aws s3 cp /tmp/debuglogs.tgz $s3Url/debuglogs.tgz

  return 0
}


exit_trap() {
  set +e
  trap - ERR EXIT
  local ERR_CODE=$1
  local ERR_LINE="$2"
  local ERR_FUNC="$3"
  local ERR_CMD="$4"

  if [ $ERR_CODE -ne 0 ]; then
    CFN_STATUS="FAILURE"
    RESULT="ABANDON"
  else
    CFN_STATUS="SUCCESS"
    RESULT="CONTINUE"
  fi

  # Add SNS events on demand
  if [ -n "$ALARMSNSARN" ]; then
    if [ $ERR_CODE -ne 0 ]; then
      LEVEL="Error"
      SUBJECT="Error during cloud-init."
      if [ $ERR_LINE -ne 1 ]; then
        MSG="$ERR_CMD failed in $ERR_FUNC at $ERR_LINE. Return: $ERR_CODE"
        ATTACHMENT="$(pr -tn $0 | tail -n+$((ERR_LINE - 3)) | head -n7)"
      else
        MSG="$ERR_CMD"
      fi

      if [ -n "$ZDT_CLOUDBENDER_DEBUG" ]; then
        SUBJECT="$SUBJECT Instance kept running for debug."
      else
        SUBJECT="$SUBJECT Instance terminated by ASG lifecycle hook."
      fi
    else
      LEVEL="Info"
      SUBJECT="ZDT Alpine Instance launched."
    fi

    if [ -z "${DISABLECLOUDBENDERSNSSCALINGEVENTS}" ] || [ "$LEVEL" != "Info" ]; then
      /var/lib/cloud/sns_alarm.sh "$SUBJECT" "$MSG" "$LEVEL" "$ATTACHMENT"
    fi

    # Disable scaling events during shutdown
    [ -n "${DISABLECLOUDBENDERSNSSCALINGEVENTS}" ] && echo "DISABLE_SCALING_EVENTS=1" >> /etc/cloudbender.conf
  fi

  [ -n "$LAUNCH_HOOK" ] && aws autoscaling complete-lifecycle-action --lifecycle-action-result $RESULT --instance-id $INSTANCE_ID --lifecycle-hook-name $LAUNCH_HOOK --auto-scaling-group-name ${AWS_AUTOSCALING_GROUPNAME} || true

  if [ -n "${AWS_CLOUDFORMATION_LOGICAL_ID}" ]; then
    aws cloudformation signal-resource --stack-name ${AWS_CLOUDFORMATION_STACK_NAME} --logical-resource-id ${AWS_CLOUDFORMATION_LOGICAL_ID} --unique-id ${INSTANCE_ID} --status ${CFN_STATUS}
  fi

  # timestamp being done
  end_uptime=$(awk '{print $1}' < /proc/uptime)
  log -t user-data info "Exiting user-data. $end_uptime seconds after boot. Duration: $(echo "$end_uptime-$start_uptime" | bc)"

  # if we ran into error, either upload debug files or poweroff
  if [ $ERR_CODE -ne 0 ]; then
    is_enabled $ZDT_CLOUDBENDER_DEBUG && upload_debug_logs || poweroff
  fi

  exit 0
}

### S3 based locking
MUTEX=mutex
MUTEX_OWNER=$HOSTNAME
MUTEX_TIMEOUT=600

release_lock() {
  local S3LOCK=$1

  rm -f $MUTEX
  aws s3 rm $S3LOCK
}

# Lock not timed out and we own it: 0
# Lock not timed out and someone else owns it: 1
# Lock timed out: 2
verify_lock() {
  local S3LOCK=$1

  aws s3 cp $S3LOCK $MUTEX
  _host=$(grep "MUTEX_OWNER=" $MUTEX | sed -e 's/MUTEX_OWNER=//')
  _time=$(grep "MUTEX_TIME=" $MUTEX | sed -e 's/MUTEX_TIME=//')

  # Check for timestamp and timeout
  let timepassed=$(date -u +%s)-$_time
  [ $timepassed -gt $MUTEX_TIMEOUT ] && return 2

  [ "$_host" == "$MUTEX_OWNER" ] && return 0
  return 1
}

aquire_lock() {
  local S3LOCK=$1

  echo "MUTEX_OWNER=${MUTEX_OWNER}" > $MUTEX
  echo "MUTEX_TIME=$(date -u +%s)" >> $MUTEX
  aws s3 cp $MUTEX $S3LOCK

  # verify we actually got the lock
  sleep 2
  verify_lock $S3LOCK
}

check_lock() {
  local S3LOCK=$1

  aws s3 ls $S3LOCK && rc=$? || rc=$?

  # No LOCK ? -> get it !
  if [ $rc -ne 0 ]; then
    aquire_lock $S3LOCK

  else
    verify_lock $S3LOCK && rc=$? || rc=$?

    # Lock timeout -> we get it
    if [ $rc -eq 2 ]; then
      aquire_lock $S3LOCK

    # we already own it
    elif [ $rc -eq 0 ]; then
      return 0

    # someone else has a valid lock
    else
      return 1
    fi
  fi
}

# All things networking
enable_ip_forwarding() {
  modprobe nf_conntrack

  cat <<EOF > /etc/sysctl.d/40-ip-forward.conf
net.ipv4.ip_forward=1
net.ipv4.ip_local_port_range = 1024 65535
net.ipv4.conf.all.send_redirects=0
net.ipv4.conf.all.rp_filter = 1
net.ipv4.conf.all.accept_redirects = 0
net.ipv6.conf.default.forwarding = 1
net.ipv6.conf.all.forwarding = 1
net.netfilter.nf_conntrack_max = 524288
EOF
  sysctl -p /etc/sysctl.d/40-ip-forward.conf
}

enable_iptables() {
  rc-update add iptables
  /etc/init.d/iptables save
}


register_service_dns() {
  if [ -n "$DNSZONE" -a -n "$SERVICENAME" ]; then
    _IP=${PUBLIC_IP_ADDRESS:-$IP_ADDRESS}
    [ -n "SERVICEPRIVATE" ] && _IP=$IP_ADDRESS
    route53.py --fqdn "${SERVICENAME}.${DNSZONE}" --record $_IP

    # Register shutdown hook to remove DNS entry on terminate
    cat <<EOF >> /etc/local.d/route53.stop
echo "Deleting Route53 record for ${SERVICENAME}.${DNSZONE}" >> /tmp/shutdown.log
route53.py --delete --fqdn "${SERVICENAME}.${DNSZONE}" --record ${PUBLIC_IP_ADDRESS:-$IP_ADDRESS}
EOF
    chmod +x /etc/local.d/route53.stop

    # Short cut our public IP to private one to allow talking to our own service name
    add_once /etc/hosts "${IP_ADDRESS} ${SERVICENAME}.${DNSZONE}"

    log -t user-data info "Registered $_IP with ${SERVICENAME}.$DNSZONE"
  fi
}


setup_prometheus() {
  rc-update add node-exporter default
  rc-service node-exporter start

  log -t user-data info "Enabled and started Prometheus node-exporter"
}


setup_fluentbit() {
  local key="cloudbender"
  local host="${1:-fluentd}"

  if [[ "$host" =~ "@" ]]; then
    key=${host%%@*}
    host=${host##*@}
  fi

  # add some AWS metadata
  cat <<EOF > /etc/fluent-bit/metadata.conf
[FILTER]
    Name record_modifier
    Match *
    Record source.ip $IP_ADDRESS
    Record source.instance_id $INSTANCE_ID
    Record source.region $REGION
    Record source.account $AWS_ACCOUNT_ID
    Record source.conglomerate $CONGLOMERATE
    Record source.artifact $ARTIFACT
EOF

  # Configure output
  cat <<EOF > /etc/fluent-bit/output.conf
[OUTPUT]
    Match *
    Name forward
    Host $host
    Port 24224
    Shared_Key $key
    tls on
    Send_options true
    Require_ack_response true
EOF

  LOG_FILES=$LOGGING_FILES

  ## TODO:
  # Add parameter parsing for custom logfile tailing

  rc-update add fluent-bit default
  rc-service fluent-bit start

  log -t user-data info "Enabled and started fluent-bit logging agent sending logs to $host"
}