{
   "editable": false,
   "links": [
      {
         "asDropdown": true,
         "includeVars": true,
         "keepTime": true,
         "tags": [
            "kubernetes"
         ],
         "targetBlank": false,
         "title": "Kubernetes",
         "type": "dashboards"
      }
   ],
   "panels": [
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "description": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.",
         "gridPos": {
            "h": 2,
            "w": 24,
            "x": 0,
            "y": 0
         },
         "id": 1,
         "options": {
            "content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only."
         },
         "pluginVersion": "v11.1.0",
         "title": "Notice",
         "type": "text"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "description": "How many percent of requests (both read and write) in 30 days have been answered successfully and fast enough?",
         "fieldConfig": {
            "defaults": {
               "decimals": 3,
               "unit": "percentunit"
            }
         },
         "gridPos": {
            "h": 7,
            "w": 8,
            "x": 0,
            "y": 2
         },
         "id": 2,
         "interval": "1m",
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"}"
            }
         ],
         "title": "Availability (30d) > 99.000%",
         "type": "stat"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "description": "How much error budget is left looking at our 0.990% availability guarantees?",
         "fieldConfig": {
            "defaults": {
               "custom": {
                  "fillOpacity": 100
               },
               "decimals": 3,
               "unit": "percentunit"
            }
         },
         "gridPos": {
            "h": 7,
            "w": 16,
            "x": 8,
            "y": 2
         },
         "id": 3,
         "interval": "1m",
         "options": {
            "legend": {
               "asTable": true,
               "placement": "right",
               "showLegend": true
            },
            "tooltip": {
               "mode": "single"
            }
         },
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "100 * (apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"} - 0.990000)",
               "legendFormat": "errorbudget"
            }
         ],
         "title": "ErrorBudget (30d) > 99.000%",
         "type": "timeseries"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "description": "How many percent of read requests (LIST,GET) in 30 days have been answered successfully and fast enough?",
         "fieldConfig": {
            "defaults": {
               "decimals": 3,
               "unit": "percentunit"
            }
         },
         "gridPos": {
            "h": 7,
            "w": 6,
            "x": 0,
            "y": 9
         },
         "id": 4,
         "interval": "1m",
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "apiserver_request:availability30d{verb=\"read\", cluster=\"$cluster\"}"
            }
         ],
         "title": "Read Availability (30d)",
         "type": "stat"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "description": "How many read requests (LIST,GET) per second do the apiservers get by code?",
         "fieldConfig": {
            "defaults": {
               "custom": {
                  "fillOpacity": 100,
                  "stacking": {
                     "mode": "normal"
                  }
               },
               "unit": "reqps"
            },
            "overrides": [
               {
                  "matcher": {
                     "id": "byRegexp",
                     "options": "/2../i"
                  },
                  "properties": [
                     {
                        "id": "color",
                        "value": "#56A64B"
                     }
                  ]
               },
               {
                  "matcher": {
                     "id": "byRegexp",
                     "options": "/3../i"
                  },
                  "properties": [
                     {
                        "id": "color",
                        "value": "#F2CC0C"
                     }
                  ]
               },
               {
                  "matcher": {
                     "id": "byRegexp",
                     "options": "/4../i"
                  },
                  "properties": [
                     {
                        "id": "color",
                        "value": "#3274D9"
                     }
                  ]
               },
               {
                  "matcher": {
                     "id": "byRegexp",
                     "options": "/5../i"
                  },
                  "properties": [
                     {
                        "id": "color",
                        "value": "#E02F44"
                     }
                  ]
               }
            ]
         },
         "gridPos": {
            "h": 7,
            "w": 6,
            "x": 6,
            "y": 9
         },
         "id": 5,
         "interval": "1m",
         "options": {
            "legend": {
               "asTable": true,
               "placement": "right",
               "showLegend": true
            },
            "tooltip": {
               "mode": "single"
            }
         },
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})",
               "legendFormat": "{{ code }}"
            }
         ],
         "title": "Read SLI - Requests",
         "type": "timeseries"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "description": "How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?",
         "fieldConfig": {
            "defaults": {
               "min": 0,
               "unit": "percentunit"
            }
         },
         "gridPos": {
            "h": 7,
            "w": 6,
            "x": 12,
            "y": 9
         },
         "id": 6,
         "interval": "1m",
         "options": {
            "legend": {
               "asTable": true,
               "placement": "right",
               "showLegend": true
            },
            "tooltip": {
               "mode": "single"
            }
         },
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})",
               "legendFormat": "{{ resource }}"
            }
         ],
         "title": "Read SLI - Errors",
         "type": "timeseries"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "description": "How many seconds is the 99th percentile for reading (LIST|GET) a given resource?",
         "fieldConfig": {
            "defaults": {
               "unit": "s"
            }
         },
         "gridPos": {
            "h": 7,
            "w": 6,
            "x": 18,
            "y": 9
         },
         "id": 7,
         "interval": "1m",
         "options": {
            "legend": {
               "asTable": true,
               "placement": "right",
               "showLegend": true
            },
            "tooltip": {
               "mode": "single"
            }
         },
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}",
               "legendFormat": "{{ resource }}"
            }
         ],
         "title": "Read SLI - Duration",
         "type": "timeseries"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) in 30 days have been answered successfully and fast enough?",
         "fieldConfig": {
            "defaults": {
               "decimals": 3,
               "unit": "percentunit"
            }
         },
         "gridPos": {
            "h": 7,
            "w": 6,
            "x": 0,
            "y": 16
         },
         "id": 8,
         "interval": "1m",
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "apiserver_request:availability30d{verb=\"write\", cluster=\"$cluster\"}"
            }
         ],
         "title": "Write Availability (30d)",
         "type": "stat"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "description": "How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?",
         "fieldConfig": {
            "defaults": {
               "custom": {
                  "fillOpacity": 100,
                  "stacking": {
                     "mode": "normal"
                  }
               },
               "unit": "reqps"
            },
            "overrides": [
               {
                  "matcher": {
                     "id": "byRegexp",
                     "options": "/2../i"
                  },
                  "properties": [
                     {
                        "id": "color",
                        "value": "#56A64B"
                     }
                  ]
               },
               {
                  "matcher": {
                     "id": "byRegexp",
                     "options": "/3../i"
                  },
                  "properties": [
                     {
                        "id": "color",
                        "value": "#F2CC0C"
                     }
                  ]
               },
               {
                  "matcher": {
                     "id": "byRegexp",
                     "options": "/4../i"
                  },
                  "properties": [
                     {
                        "id": "color",
                        "value": "#3274D9"
                     }
                  ]
               },
               {
                  "matcher": {
                     "id": "byRegexp",
                     "options": "/5../i"
                  },
                  "properties": [
                     {
                        "id": "color",
                        "value": "#E02F44"
                     }
                  ]
               }
            ]
         },
         "gridPos": {
            "h": 7,
            "w": 6,
            "x": 6,
            "y": 16
         },
         "id": 9,
         "interval": "1m",
         "options": {
            "legend": {
               "asTable": true,
               "placement": "right",
               "showLegend": true
            },
            "tooltip": {
               "mode": "single"
            }
         },
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})",
               "legendFormat": "{{ code }}"
            }
         ],
         "title": "Write SLI - Requests",
         "type": "timeseries"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?",
         "fieldConfig": {
            "defaults": {
               "min": 0,
               "unit": "percentunit"
            }
         },
         "gridPos": {
            "h": 7,
            "w": 6,
            "x": 12,
            "y": 16
         },
         "id": 10,
         "interval": "1m",
         "options": {
            "legend": {
               "asTable": true,
               "placement": "right",
               "showLegend": true
            },
            "tooltip": {
               "mode": "single"
            }
         },
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})",
               "legendFormat": "{{ resource }}"
            }
         ],
         "title": "Write SLI - Errors",
         "type": "timeseries"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "description": "How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?",
         "fieldConfig": {
            "defaults": {
               "unit": "s"
            }
         },
         "gridPos": {
            "h": 7,
            "w": 6,
            "x": 18,
            "y": 16
         },
         "id": 11,
         "interval": "1m",
         "options": {
            "legend": {
               "asTable": true,
               "placement": "right",
               "showLegend": true
            },
            "tooltip": {
               "mode": "single"
            }
         },
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}",
               "legendFormat": "{{ resource }}"
            }
         ],
         "title": "Write SLI - Duration",
         "type": "timeseries"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "fieldConfig": {
            "defaults": {
               "min": 0,
               "unit": "ops"
            }
         },
         "gridPos": {
            "h": 7,
            "w": 12,
            "x": 0,
            "y": 23
         },
         "id": 12,
         "interval": "1m",
         "options": {
            "legend": {
               "asTable": true,
               "placement": "right",
               "showLegend": false
            },
            "tooltip": {
               "mode": "single"
            }
         },
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)",
               "legendFormat": "{{instance}} {{name}}"
            }
         ],
         "title": "Work Queue Add Rate",
         "type": "timeseries"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "fieldConfig": {
            "defaults": {
               "min": 0,
               "unit": "short"
            }
         },
         "gridPos": {
            "h": 7,
            "w": 12,
            "x": 12,
            "y": 23
         },
         "id": 13,
         "interval": "1m",
         "options": {
            "legend": {
               "asTable": true,
               "placement": "right",
               "showLegend": false
            },
            "tooltip": {
               "mode": "single"
            }
         },
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)",
               "legendFormat": "{{instance}} {{name}}"
            }
         ],
         "title": "Work Queue Depth",
         "type": "timeseries"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "fieldConfig": {
            "defaults": {
               "min": 0,
               "unit": "s"
            }
         },
         "gridPos": {
            "h": 7,
            "w": 24,
            "x": 0,
            "y": 30
         },
         "id": 14,
         "interval": "1m",
         "options": {
            "legend": {
               "asTable": true,
               "calcs": [
                  "lastNotNull"
               ],
               "placement": "right",
               "showLegend": true
            },
            "tooltip": {
               "mode": "single"
            }
         },
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))",
               "legendFormat": "{{instance}} {{name}}"
            }
         ],
         "title": "Work Queue Latency",
         "type": "timeseries"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "fieldConfig": {
            "defaults": {
               "unit": "bytes"
            }
         },
         "gridPos": {
            "h": 7,
            "w": 8,
            "x": 0,
            "y": 37
         },
         "id": 15,
         "interval": "1m",
         "options": {
            "legend": {
               "asTable": true,
               "placement": "right",
               "showLegend": true
            },
            "tooltip": {
               "mode": "single"
            }
         },
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "process_resident_memory_bytes{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}",
               "legendFormat": "{{instance}}"
            }
         ],
         "title": "Memory",
         "type": "timeseries"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "fieldConfig": {
            "defaults": {
               "min": 0,
               "unit": "short"
            }
         },
         "gridPos": {
            "h": 7,
            "w": 8,
            "x": 8,
            "y": 37
         },
         "id": 16,
         "interval": "1m",
         "options": {
            "legend": {
               "asTable": true,
               "placement": "right",
               "showLegend": true
            },
            "tooltip": {
               "mode": "single"
            }
         },
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])",
               "legendFormat": "{{instance}}"
            }
         ],
         "title": "CPU usage",
         "type": "timeseries"
      },
      {
         "datasource": {
            "type": "datasource",
            "uid": "-- Mixed --"
         },
         "fieldConfig": {
            "defaults": {
               "unit": "short"
            }
         },
         "gridPos": {
            "h": 7,
            "w": 8,
            "x": 16,
            "y": 37
         },
         "id": 17,
         "interval": "1m",
         "options": {
            "legend": {
               "asTable": true,
               "placement": "right",
               "showLegend": true
            },
            "tooltip": {
               "mode": "single"
            }
         },
         "pluginVersion": "v11.1.0",
         "targets": [
            {
               "datasource": {
                  "type": "prometheus",
                  "uid": "${datasource}"
               },
               "expr": "go_goroutines{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}",
               "legendFormat": "{{instance}}"
            }
         ],
         "title": "Goroutines",
         "type": "timeseries"
      }
   ],
   "refresh": "10s",
   "schemaVersion": 39,
   "tags": [
      "kubernetes"
   ],
   "templating": {
      "list": [
         {
            "current": {
               "selected": true,
               "text": "default",
               "value": "default"
            },
            "hide": 0,
            "label": "Data source",
            "name": "datasource",
            "query": "prometheus",
            "regex": "",
            "type": "datasource"
         },
         {
            "datasource": {
               "type": "prometheus",
               "uid": "${datasource}"
            },
            "hide": 2,
            "label": "cluster",
            "name": "cluster",
            "query": "label_values(up{job=\"apiserver\"}, cluster)",
            "refresh": 2,
            "sort": 1,
            "type": "query"
         },
         {
            "datasource": {
               "type": "prometheus",
               "uid": "${datasource}"
            },
            "hide": 0,
            "includeAll": true,
            "name": "instance",
            "query": "label_values(up{job=\"apiserver\", cluster=\"$cluster\"}, instance)",
            "refresh": 2,
            "sort": 1,
            "type": "query"
         }
      ]
   },
   "time": {
      "from": "now-1h",
      "to": "now"
   },
   "timezone": "UTC",
   "title": "API server",
   "uid": "09ec8aa1e996d6ffcd6817bbaff4db1b"
}