diff --git a/charts/kubezero-metrics/Chart.yaml b/charts/kubezero-metrics/Chart.yaml index d34a0ae..d2c2a90 100644 --- a/charts/kubezero-metrics/Chart.yaml +++ b/charts/kubezero-metrics/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: kubezero-metrics description: KubeZero Umbrella Chart for prometheus-operator type: application -version: 0.1.3 +version: 0.1.4 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: @@ -16,7 +16,7 @@ dependencies: version: ">= 0.1.3" repository: https://zero-down-time.github.io/kubezero/ - name: prometheus-operator - version: 9.3.0 + version: 9.3.1 repository: https://kubernetes-charts.storage.googleapis.com/ - name: prometheus-adapter version: 2.5.0 diff --git a/charts/kubezero-metrics/values.yaml b/charts/kubezero-metrics/values.yaml index 56b485b..9e5bb4b 100644 --- a/charts/kubezero-metrics/values.yaml +++ b/charts/kubezero-metrics/values.yaml @@ -27,9 +27,11 @@ prometheus-operator: kubeProxy: enabled: true - # Disabled until we figure out how to scrape etcd with ssl client certs kubeEtcd: - enabled: false + enabled: true + service: + port: 2381 + targetPort: 2381 kubeControllerManager: enabled: true diff --git a/Quickstart.md b/docs/Quickstart.md similarity index 100% rename from Quickstart.md rename to docs/Quickstart.md diff --git a/docs/api-server.md b/docs/api-server.md new file mode 100644 index 0000000..ca66fa6 --- /dev/null +++ b/docs/api-server.md @@ -0,0 +1,15 @@ +# api-server OAuth configuration + +## Update Api-server config +Add the following extraArgs to the ClusterConfiguration configMap in the kube-system namespace: +`kubectl edit -n kube-system cm kubeadm-config` + +``` + oidc-issuer-url: "https://accounts.google.com" + oidc-client-id: "" + oidc-username-claim: "email" + oidc-groups-claim: "groups" +``` + +## Resources +- https://kubernetes.io/docs/reference/access-authn-authz/authentication/ diff --git a/docs/cluster.md b/docs/cluster.md new file mode 100644 index 0000000..0f97551 --- /dev/null +++ b/docs/cluster.md @@ -0,0 +1,9 @@ +# Cluster Operations + +## Clean up +### Delete evicted pods across all namespaces + +`kubectl get pods --all-namespaces -o json | jq '.items[] | select(.status.reason!=null) | select(.status.reason | contains("Evicted")) | "kubectl delete pods \(.metadata.name) -n \(.metadata.namespace)"' | xargs -n 1 bash -c +` +### Cleanup old replicasets +`kubectl get rs --all-namespaces | awk {' if ($3 == 0 && $4 == 0) system("kubectl delete rs "$2" --namespace="$1)'}` diff --git a/docs/kubectl.md b/docs/kubectl.md new file mode 100644 index 0000000..fa8283e --- /dev/null +++ b/docs/kubectl.md @@ -0,0 +1,21 @@ +# kubectl +kubectl is the basic cmdline tool to interact with any kubernetes cluster via the kube-api server. + +## Plugins +As there are various very useful plugins for kubectl the first thing should be to install *krew* the plugin manager. +See: https://github.com/kubernetes-sigs/krew for details + +List of awesome plugins: https://github.com/ishantanu/awesome-kubectl-plugins + +### kubelogin +To login / authenticate against an openID provider like Google install the kubelogin plugin. +See: https://github.com/int128/kubelogin + +Make sure to adjust your kubeconfig files accordingly ! + +### kauthproxy +Easiest way to access the Kubernetes dashboard, if installed in the targeted cluster, is to use the kauthproxy plugin. +See: https://github.com/int128/kauthproxy +Once installed simply execute: +`kubectl auth-proxy -n kubernetes-dashboard https://kubernetes-dashboard.svc` +and access the dashboard via the automatically opened browser window. diff --git a/docs/misc.md b/docs/misc.md new file mode 100644 index 0000000..8eecdf2 --- /dev/null +++ b/docs/misc.md @@ -0,0 +1,26 @@ +## Security - Todo +- https://github.com/freach/kubernetes-security-best-practice +- https://github.com/aquasecurity/kube-bench +- https://kubernetes.io/docs/tasks/debug-application-cluster/audit/ +- https://kubernetes.io/docs/tasks/debug-application-cluster/falco/ + +## Performance - Todo +- https://kubernetes.io/docs/tasks/administer-cluster/limit-storage-consumption/ + +- Set priorityclasses and proper CPU/MEM limits for core pods like api-server etc. as we host additional services on the master nodes which might affect these critical systems + see: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/ + +## Storage - Todo +- OpenSource S3 - https://min.io/ +- LinStore - DRDB for K8s - https://vitobotta.com/2020/01/04/linstor-storage-the-kubernetes-way/, https://github.com/kvaps/kube-linstor, https://github.com/piraeusdatastore/piraeus +- ChubaoFS - CephFS competitor + +# Monitoring +- https://github.com/cloudworkz/kube-eagle + +## Cleanup - Todo +Something along the lines of https://github.com/onfido/k8s-cleanup which doesnt work as is + +## Resources +- https://docs.google.com/spreadsheets/d/1WPHt0gsb7adVzY3eviMK2W8LejV0I5m_Zpc8tMzl_2w/edit#gid=0 +- https://github.com/ishantanu/awesome-kubectl-plugins diff --git a/docs/worker.md b/docs/worker.md new file mode 100644 index 0000000..0c4a767 --- /dev/null +++ b/docs/worker.md @@ -0,0 +1,15 @@ +# Operational guide for worker nodes + +## Replace worker node +In order to change the instance type or in genernal replace worker nodes do: + +* (optional) Update the launch configuration of the worker group + +* Make sure there is enough capacity in the cluster to handle all pods being evicted for the node + +* `kubectl drain --ignore-daemonsets node_name` +will evict all pods except DaemonSets. In case there are pods with local storage review each affected pod. After being sure no important data will be lost add `--delete-local-data` to the original command above and try again. + +* Terminate instance matching *node_name* + +The new instance should take over the previous node_name assuming only node is being replaced at a time and automatically join and replace the previous node.