最近更新时间:2024.02.28 20:04:51
首次发布时间:2023.11.16 18:54:08
容器服务控制台中预置了常见的集群基础资源监控大盘。本文为您介绍如何配置和查看集群基础资源的监控信息。
容器服务集群已接入托管 Prometheus。详情请参见 接入托管 Prometheus。
当您将容器服务集群正确接入托管 Prometheus,并安装了对应的组件后,需要配置对应的采集规则,才能正确采集指标。
说明
您可以查看指定命名空间下无状态负载的监控信息,包括:实例信息(期望实例数、就绪实例数、就绪比例)、CPU 信息(CPU Request、CPU Limits)、内存信息(内存 Request、内存 Limits)、网络信息、磁盘信息等。支持设置查询的时间段,并指定刷新方式(手动刷新、自动刷新)。
该看板的指标清单如下表所示。
看板名称 | PromQL 语句 |
---|---|
Desired Replicas | max(kube_deployment_spec_replicas{deployment="$Deployment",namespace=~"$Namespace",cluster="$ClusterId"}) |
Ready Replicas | max(kube_deployment_status_replicas_available{deployment="$Deployment",namespace=~"$Namespace",cluster="$ClusterId"}) |
Ready Percent | max(kube_deployment_status_replicas_ready{deployment="$Deployment",namespace=~"$Namespace",cluster="$ClusterId"}) / max(kube_deployment_spec_replicas{deployment="$Deployment",namespace=~"$Namespace",cluster="$ClusterId"}) |
Replicas | max(kube_deployment_status_replicas{deployment="$Deployment",namespace=~"$Namespace",cluster="$ClusterId"}) without (instance, pod) |
max(kube_deployment_spec_replicas{deployment="$Deployment",namespace=~"$Namespace",cluster="$ClusterId"}) without (instance, pod) | |
min(kube_deployment_status_replicas_ready{deployment="$Deployment",namespace=~"$Namespace",cluster="$ClusterId"}) without (instance, pod) | |
min(kube_deployment_status_replicas_available{deployment="$Deployment",namespace=~"$Namespace",cluster="$ClusterId"}) without (instance, pod) | |
min(kube_deployment_status_replicas_updated{deployment="$Deployment",namespace=~"$Namespace",cluster="$ClusterId"}) without (instance, pod) | |
min(kube_deployment_status_replicas_unavailable{deployment="$Deployment",namespace=~"$Namespace",cluster="$ClusterId"}) without (instance, pod) | |
CPU requests (Total) | sum(kube_pod_container_resource_requests{namespace=~"$Namespace",cluster="$ClusterId",resource="cpu",container!=""} and ON(namespace, pod) label_join( kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) |
CPU limits(Total) | sum(kube_pod_container_resource_limits{namespace=~"$Namespace",cluster="$ClusterId",resource="cpu",container!=""} and ON(namespace, pod) label_join( kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) |
内存 requests(Total) | sum(kube_pod_container_resource_requests{namespace=~"$Namespace",cluster="$ClusterId",resource="memory",container!=""} and ON(namespace, pod) label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) |
内存 limits(Total) | sum(kube_pod_container_resource_limits{namespace=~"$Namespace",cluster="$ClusterId",resource="memory",container!=""} and ON(namespace, pod) label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) |
CPU requests (Single Pod) | avg(kube_pod_container_resource_requests{namespace=~"$Namespace",cluster="$ClusterId",resource="cpu",container!=""} * ON(pod, namespace) group_left avg(label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) by (pod, namespace)) by (resource) |
CPU limits(Single Pod) | avg(kube_pod_container_resource_limits{namespace=~"$Namespace",cluster="$ClusterId",resource="cpu",container!=""} * ON(pod, namespace) group_left avg(label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) by (pod, namespace)) by (resource) |
CPU 用量 | max(rate(container_cpu_usage_seconds_total{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" }[5m])) by (pod, container, namespace) * ON(pod, namespace) group_left avg(label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) by (pod, namespace) |
内存 requests(Single Pod) | avg(kube_pod_container_resource_requests{namespace=~"$Namespace",cluster="$ClusterId",resource="memory",container!=""} * ON(pod, namespace) group_left avg(label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) by (pod, namespace)) by (resource) |
内存 limits(Single Pod) | avg(kube_pod_container_resource_limits{namespace=~"$Namespace",cluster="$ClusterId",resource="memory",container!=""} * ON(pod, namespace) group_left avg(label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) by (pod, namespace)) by (resource) |
内存用量 | max by(container, pod, namespace) (container_memory_working_set_bytes{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" }) * ON(pod, namespace) group_left avg(label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) by (pod, namespace) |
CPU 使用百分比 | (max(rate(container_cpu_usage_seconds_total{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" }[5m])) by(container, pod, namespace)/max by(container, pod, namespace) (kube_pod_container_resource_limits{resource="cpu", namespace=~"$Namespace",cluster="$ClusterId"})) * ON(pod, namespace) group_left avg(label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) by (pod, namespace) |
(max(rate(container_cpu_usage_seconds_total{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" }[5m])) by(container, pod, namespace)/max by(container, pod, namespace) (kube_pod_container_resource_requests{resource="cpu", namespace=~"$Namespace",cluster="$ClusterId"})) * ON(pod, namespace) group_left avg(label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) by (pod, namespace) | |
内存使用百分比 | (max by(container, pod, namespace) (container_memory_working_set_bytes{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" })/max by(container, pod, namespace) (kube_pod_container_resource_limits{resource="memory", namespace=~"$Namespace",cluster="$ClusterId"})) * ON(pod, namespace) group_left avg(label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) by (pod, namespace) |
(max by(container, pod, namespace) (container_memory_working_set_bytes{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" })/max by(container, pod, namespace) (kube_pod_container_resource_requests{resource="memory", namespace=~"$Namespace",cluster="$ClusterId"})) * ON(pod, namespace) group_left avg(label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) by (pod, namespace) | |
网络流量 | sum(rate(container_network_receive_bytes_total{namespace=~"$Namespace",cluster="$ClusterId"}[5m])) by (pod, namespace) * ON(pod, namespace) group_left avg(label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) by (pod, namespace) |
sum(rate(container_network_transmit_bytes_total{namespace=~"$Namespace",cluster="$ClusterId"}[5m])) by (pod, namespace) * ON(pod, namespace) group_left avg(label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) by (pod, namespace) | |
磁盘 IO | max(rate(container_fs_reads_bytes_total{namespace=~"$Namespace",cluster="$ClusterId", container!=""}[5m])) by (pod,container, namespace) * ON(pod, namespace) group_left avg(label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) by (pod, namespace) |
max(rate(container_fs_writes_bytes_total{namespace=~"$Namespace",cluster="$ClusterId", container!=""}[5m])) by (pod,container, namespace) * ON(pod, namespace) group_left avg(label_join(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="ReplicaSet"}, "replicaset", "", "owner_name") * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{namespace=~"$Namespace",cluster="$ClusterId",owner_name="$Deployment"}) by (pod, namespace) |
您可以查看指定命名空间下有状态负载的监控信息,包括:实例信息(期望实例数、就绪实例数、就绪比例)、CPU 信息(CPU Request、CPU Limits)、内存信息(内存 Request、内存 Limits)、网络信息、磁盘信息等。支持设置查询的时间段,并指定刷新方式(手动刷新、自动刷新)。
该看板的指标清单如下表所示。
看板名称 | PromQL 语句 |
---|---|
Desired Replicas | max(kube_statefulset_replicas{statefulset="$StatefulSet",namespace=~"$Namespace",cluster="$ClusterId"}) |
Ready Replicas | max(kube_statefulset_status_replicas_ready{statefulset="$StatefulSet",namespace=~"$Namespace",cluster="$ClusterId"}) |
Ready Percent | max(kube_statefulset_status_replicas_ready{statefulset="$StatefulSet",namespace=~"$Namespace",cluster="$ClusterId"}) / max(kube_statefulset_replicas{statefulset="$StatefulSet",namespace=~"$Namespace",cluster="$ClusterId"}) |
Replicas | max(kube_statefulset_replicas{statefulset="$StatefulSet",namespace=~"$Namespace",cluster="$ClusterId"}) without (instance, pod) |
max(kube_statefulset_status_replicas{statefulset="$StatefulSet",namespace=~"$Namespace",cluster="$ClusterId"}) without (instance, pod) | |
min(kube_statefulset_status_replicas_ready{statefulset="$StatefulSet",namespace=~"$Namespace",cluster="$ClusterId"}) without (instance, pod) | |
min(kube_statefulset_status_replicas_available{statefulset="$StatefulSet",namespace=~"$Namespace",cluster="$ClusterId"}) without (instance, pod) | |
min(kube_statefulset_status_replicas_updated{statefulset="$StatefulSet",namespace=~"$Namespace",cluster="$ClusterId"}) without (instance, pod) | |
CPU requests (Total) | sum(kube_pod_container_resource_requests{namespace=~"$Namespace",cluster="$ClusterId",resource="cpu",container!=""} and ON(namespace, pod) kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) |
CPU limits(Total) | sum(kube_pod_container_resource_limits{namespace=~"$Namespace",cluster="$ClusterId",resource="cpu",container!=""} and ON(namespace, pod) kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) |
内存 requests(Total) | sum(kube_pod_container_resource_requests{namespace=~"$Namespace",cluster="$ClusterId",resource="memory",container!=""} and ON(namespace, pod) kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) |
内存 limits(Total) | sum(kube_pod_container_resource_limits{namespace=~"$Namespace",cluster="$ClusterId",resource="memory",container!=""} and ON(namespace, pod) kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) |
CPU requests (Single Pod) | avg(kube_pod_container_resource_requests{namespace=~"$Namespace",cluster="$ClusterId",resource="cpu",container!=""} * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) by (pod, namespace)) by (resource) |
CPU limits(Single Pod) | avg(kube_pod_container_resource_limits{namespace=~"$Namespace",cluster="$ClusterId",resource="cpu",container!=""} * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) by (pod, namespace)) by (resource) |
CPU 用量 | max(rate(container_cpu_usage_seconds_total{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" }[5m])) by (pod, container, namespace) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) by (pod, namespace) |
内存 requests(Single Pod) | avg(kube_pod_container_resource_requests{namespace=~"$Namespace",cluster="$ClusterId",resource="memory",container!=""} * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) by (pod, namespace)) by (resource) |
内存 limits(Single Pod) | avg(kube_pod_container_resource_limits{namespace=~"$Namespace",cluster="$ClusterId",resource="memory",container!=""} * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) by (pod, namespace)) by (resource) |
内存用量 | max by(container, pod, namespace) (container_memory_working_set_bytes{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" }) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) by (pod, namespace) |
CPU 使用百分比 | (max(rate(container_cpu_usage_seconds_total{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" }[5m])) by(container, pod, namespace)/max by(container, pod, namespace) (kube_pod_container_resource_limits{resource="cpu", namespace=~"$Namespace",cluster="$ClusterId"})) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) by (pod, namespace) |
(max(rate(container_cpu_usage_seconds_total{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" }[5m])) by(container, pod, namespace)/max by(container, pod, namespace) (kube_pod_container_resource_requests{resource="cpu", namespace=~"$Namespace",cluster="$ClusterId"})) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) by (pod, namespace) | |
内存使用百分比 | (max by(container, pod, namespace) (container_memory_working_set_bytes{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" })/max by(container, pod, namespace) (kube_pod_container_resource_limits{resource="memory", namespace=~"$Namespace",cluster="$ClusterId"})) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) by (pod, namespace) |
(max by(container, pod, namespace) (container_memory_working_set_bytes{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" })/max by(container, pod, namespace) (kube_pod_container_resource_requests{resource="memory", namespace=~"$Namespace",cluster="$ClusterId"})) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) by (pod, namespace) | |
网络流量 | sum(rate(container_network_receive_bytes_total{namespace=~"$Namespace",cluster="$ClusterId"}[5m])) by (pod, namespace) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) by (pod, namespace) |
sum(rate(container_network_transmit_bytes_total{namespace=~"$Namespace",cluster="$ClusterId"}[5m])) by (pod, namespace) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) by (pod, namespace) | |
磁盘 IO | max(rate(container_fs_reads_bytes_total{namespace=~"$Namespace",cluster="$ClusterId", container!=""}[5m])) by (pod,container, namespace) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) by (pod, namespace) |
max(rate(container_fs_writes_bytes_total{namespace=~"$Namespace",cluster="$ClusterId", container!=""}[5m])) by (pod,container, namespace) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="StatefulSet", owner_name="$StatefulSet"}) by (pod, namespace) |
您可以查看指定命名空间下守护进程的监控信息,包括:CPU 信息(CPU Request、CPU Limits)、内存信息(内存 Request、内存 Limits)、网络信息、磁盘信息等。支持设置查询的时间段,并指定刷新方式(手动刷新、自动刷新)。
该看板的指标清单如下表所示。
看板名称 | PromQL 语句 |
---|---|
CPU requests (Total) | sum(kube_pod_container_resource_requests{namespace=~"$Namespace",cluster="$ClusterId",resource="cpu",container!=""} and ON(namespace, pod) kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) |
CPU limits(Total) | sum(kube_pod_container_resource_limits{namespace=~"$Namespace",cluster="$ClusterId",resource="cpu",container!=""} and ON(namespace, pod) kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) |
内存 requests(Total) | sum(kube_pod_container_resource_requests{namespace=~"$Namespace",cluster="$ClusterId",resource="memory",container!=""} and ON(namespace, pod) kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) |
内存 limits(Total) | sum(kube_pod_container_resource_limits{namespace=~"$Namespace",cluster="$ClusterId",resource="memory",container!=""} and ON(namespace, pod) kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) |
CPU requests (Single Pod) | avg(kube_pod_container_resource_requests{namespace=~"$Namespace",cluster="$ClusterId",resource="cpu",container!=""} * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) by (pod, namespace)) by (resource) |
CPU limits(Single Pod) | avg(kube_pod_container_resource_limits{namespace=~"$Namespace",cluster="$ClusterId",resource="cpu",container!=""} * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) by (pod, namespace)) by (resource) |
CPU 用量 | max(rate(container_cpu_usage_seconds_total{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" }[5m])) by (pod, container, namespace) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) by (pod, namespace) |
内存 requests(Single Pod) | avg(kube_pod_container_resource_requests{namespace=~"$Namespace",cluster="$ClusterId",resource="memory",container!=""} * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) by (pod, namespace)) by (resource) |
内存 limits(Single Pod) | avg(kube_pod_container_resource_limits{namespace=~"$Namespace",cluster="$ClusterId",resource="memory",container!=""} * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) by (pod, namespace)) by (resource) |
内存用量 | max by(container, pod, namespace) (container_memory_working_set_bytes{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" }) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) by (pod, namespace) |
CPU 使用百分比 | (max(rate(container_cpu_usage_seconds_total{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" }[5m])) by(container, pod, namespace)/max by(container, pod, namespace) (kube_pod_container_resource_limits{resource="cpu", namespace=~"$Namespace",cluster="$ClusterId"})) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) by (pod, namespace) |
(max(rate(container_cpu_usage_seconds_total{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" }[5m])) by(container, pod, namespace)/max by(container, pod, namespace) (kube_pod_container_resource_requests{resource="cpu", namespace=~"$Namespace",cluster="$ClusterId"})) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) by (pod, namespace) | |
内存使用百分比 | (max by(container, pod, namespace) (container_memory_working_set_bytes{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" })/max by(container, pod, namespace) (kube_pod_container_resource_limits{resource="memory", namespace=~"$Namespace",cluster="$ClusterId"})) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) by (pod, namespace) |
(max by(container, pod, namespace) (container_memory_working_set_bytes{namespace=~"$Namespace",cluster="$ClusterId", container!="", image!="" })/max by(container, pod, namespace) (kube_pod_container_resource_requests{resource="memory", namespace=~"$Namespace",cluster="$ClusterId"})) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) by (pod, namespace) | |
网络流量 | sum(rate(container_network_receive_bytes_total{namespace=~"$Namespace",cluster="$ClusterId"}[5m])) by (pod, namespace) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) by (pod, namespace) |
sum(rate(container_network_transmit_bytes_total{namespace=~"$Namespace",cluster="$ClusterId"}[5m])) by (pod, namespace) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) by (pod, namespace) | |
磁盘 IO | max(rate(container_fs_reads_bytes_total{namespace=~"$Namespace",cluster="$ClusterId" , container!=""}[5m])) by (pod,container, namespace) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) by (pod, namespace) |
max(rate(container_fs_writes_bytes_total{namespace=~"$Namespace",cluster="$ClusterId" , container!=""}[5m])) by (pod,container, namespace) * ON(pod, namespace) group_left avg(kube_pod_owner{namespace=~"$Namespace",cluster="$ClusterId", owner_kind="DaemonSet", owner_name="$DaemonSet"}) by (pod, namespace) |
您可以查看指定命名空间下容器组(Pod)的监控信息,包括:Pod 内存信息(内存使用量、内存使用率)、Pod CPU 信息(CPU 使用量、CPU 使用率)、Pod 网络信息(流入速率、流出速率)等。支持设置查询的时间段,并指定刷新方式(手动刷新、自动刷新)。
说明
您可以查看单个 Container 的监控信息,也可以选择 All,查看所有 Container 的监控信息。
该看板的指标清单如下表所示。
看板名称 | PromQL 语句 |
---|---|
CPU requests | sum(kube_pod_container_resource_requests{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",resource="cpu"}) |
CPU limits | sum(kube_pod_container_resource_limits{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",resource="cpu"}) |
内存 requests | sum(kube_pod_container_resource_requests{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",resource="memory"}) |
内存使用 Top 10 | topk(10,sum(container_memory_working_set_bytes{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",container!=""})by (pod)) |
内存使用率 Top 10(占 limits) | topk(10,sum(container_memory_working_set_bytes{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",container!=""})by(pod)*100/sum(kube_pod_container_resource_limits{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",container!="",resource="memory",unit="byte"})by(pod)) |
内存 limits | sum(kube_pod_container_resource_limits{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",resource="memory"}) |
CPU 使用 Top 10 | topk(10,sum(rate(container_cpu_usage_seconds_total{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",container!=""}[5m]))by(pod)) |
CPU 使用率 Top 10(占 limits) | (sum(rate(container_cpu_usage_seconds_total{cluster="$ClusterId",namespace="$Namespace",pod=~"$Pod",container!=""}[5m]))by(pod)/sum(kube_pod_container_resource_limits{cluster="$ClusterId",namespace="$Namespace",pod=~"$Pod",resource="cpu",container!=""})by(pod))*100 |
内存错误数 | sum(container_memory_failcnt{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod"})by(pod) |
网络 IO Top 10 | topk(10,sum(rate(container_network_receive_bytes_total{cluster=~"$ClusterId",pod=~"$Pod",namespace=~"$Namespace"}[5m]))by (pod)) |
topk(10,sum(rate(container_network_transmit_bytes_total{cluster=~"$ClusterId",pod=~"$Pod",namespace=~"$Namespace"}[5m]))by (pod)) | |
CPU 负载 Top 10(10s) | sum(container_cpu_load_average_10s{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod"})by(pod) |
网络错误数 Top 10 | topk(10,sum(rate(container_network_receive_errors_total{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod"}[5m]))by(pod)) |
topk(10,sum(rate(container_network_transmit_errors_total{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod"}[5m]))by(pod)) | |
Socket 连接数 Top 10 | topk(10,sum(container_sockets{cluster=~"$ClusterId",pod=~"$Pod",namespace=~"$Namespace"})by(pod)) |
网络丢包 Top 10 | topk(10,sum(rate(container_network_receive_packets_dropped_total{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod"}[5m]))by(pod)) |
topk(10,sum(rate(container_network_transmit_packets_dropped_total{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod"}[5m]))by(pod)) | |
磁盘读写 IO Top 10 | topk(10,sum(rate(container_fs_reads_bytes_total{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod"}[5m]))by(pod)) |
topk(10,sum(rate(container_fs_writes_bytes_total{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod"}[5m]))by(pod)) | |
进程数 Top 10 | topk(10,sum(container_processes{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod"}) by(pod)) |
您可以查看指定命名空间下容器(Container)的监控信息,包括:Container 内存信息(内存使用量、内存使用率)、Container CPU 信息(CPU 使用量、CPU 使用率)等。支持设置查询的时间段,并指定刷新方式(手动刷新、自动刷新)。
说明
您可以查看单个 Container 的监控信息,也可以选择 All,查看所有 Container 的监控信息。
该看板的指标清单如下表所示。
看板名称 | PromQL 语句 |
---|---|
容器内存使用 | kube_pod_container_resource_requests{resource="memory",cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",container=~"$Container",container!=""} |
kube_pod_container_resource_limits{resource="memory",cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",container=~"$Container",container!=""} | |
sum(container_memory_working_set_bytes{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",container=~"$Container",container!=""})by(container) | |
容器 CPU 使用 | kube_pod_container_resource_requests{resource="cpu",cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",container=~"$Container",container!=""} |
kube_pod_container_resource_limits{resource="cpu",cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",container=~"$Container",container!=""} | |
irate(container_cpu_usage_seconds_total{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",container=~"$Container",container!=""}[5m]) | |
容器进程 | container_processes{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",container=~"$Container",container!=""} |
打开文件数 | container_file_descriptors{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",container=~"$Container",container!=""} |
重启次数 | kube_pod_container_status_restarts_total{cluster="$ClusterId",namespace=~"$Namespace",pod=~"$Pod",container=~"$Container",container!=""} |
您可以使用托管 Prometheus 的 Explore 功能来快速查询和展示指标数据。详情请参见 指标查询。
您可以在托管 Prometheus 的告警中心配置集群相关告警,包括: