Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include initContainers when calculating pod overhead #3572

Merged
merged 2 commits into from
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -64,23 +64,50 @@ def get_daemon_sets_requests():
info = []
for ds in daemon_sets:
name = ds["metadata"]["name"]
req_mem = req_cpu = lim_mem = lim_cpu = 0
# From https://kubernetes.io/docs/concepts/workloads/pods/init-containers/#resource-sharing-within-containers
# > - The highest of any particular resource request or limit defined on
# > all init containers is the effective init request/limit. If any
# > resource has no resource limit specified this is considered as the
# > highest limit.
# > - The Pod's effective request/limit for a resource is the higher of:
# > - the sum of all app containers request/limit for a resource
# > - the effective init request/limit for a resource
#
# So we have to calculate the requests of the init containers and containers separately,
# and take the max as the effective request / limit

container_req_mem = (
container_req_cpu
) = container_lim_mem = container_lim_cpu = 0
init_container_req_mem = (
init_container_req_cpu
) = init_container_lim_mem = init_container_lim_cpu = 0

for c in ds["spec"]["template"]["spec"]["containers"]:
resources = c.get("resources", {})
requests = resources.get("requests", {})
limits = resources.get("limits", {})
req_mem += parse_quantity(requests.get("memory", 0))
lim_mem += parse_quantity(limits.get("memory", 0))
req_cpu += parse_quantity(requests.get("cpu", 0))
lim_cpu += parse_quantity(limits.get("cpu", 0))
container_req_mem += parse_quantity(requests.get("memory", 0))
container_lim_mem += parse_quantity(limits.get("memory", 0))
container_req_cpu += parse_quantity(requests.get("cpu", 0))
container_lim_cpu += parse_quantity(limits.get("cpu", 0))

for c in ds["spec"]["template"]["spec"].get("initContainers", []):
resources = c.get("resources", {})
requests = resources.get("requests", {})
limits = resources.get("limits", {})
init_container_req_mem += parse_quantity(requests.get("memory", 0))
init_container_lim_mem += parse_quantity(limits.get("memory", 0))
init_container_req_cpu += parse_quantity(requests.get("cpu", 0))
init_container_lim_cpu += parse_quantity(limits.get("cpu", 0))

info.append(
{
"name": name,
"cpu_request": float(req_cpu),
"cpu_limit": float(lim_cpu),
"memory_request": int(req_mem),
"memory_limit": int(lim_mem),
"cpu_request": float(max(container_req_cpu, init_container_req_cpu)),
"cpu_limit": float(max(container_lim_cpu, init_container_lim_cpu)),
"memory_request": int(max(container_req_mem, init_container_req_mem)),
"memory_limit": int(max(container_lim_mem, init_container_lim_mem)),
}
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
gke:
2i2c:
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
other_daemon_sets: binder-staging-dind,binder-staging-image-cleaner,imagebuilding-demo-binderhub-service-docker-api
cpu_requests: 344m
memory_requests: 596Mi
k8s_version: v1.27.4-gke.900
Expand All @@ -31,7 +31,7 @@ gke:
other_daemon_sets: ""
cpu_requests: 344m
memory_requests: 596Mi
k8s_version: v1.27.4-gke.900
k8s_version: v1.27.7-gke.1056000
awi-ciroh:
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
Expand All @@ -43,25 +43,25 @@ gke:
other_daemon_sets: ""
cpu_requests: 344m
memory_requests: 596Mi
k8s_version: v1.27.4-gke.900
k8s_version: v1.27.7-gke.1056000
catalystproject-latam:
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 338m
memory_requests: 496Mi
k8s_version: v1.27.3-gke.100
k8s_version: v1.27.7-gke.1056000
cloudbank:
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: continuous-image-puller,continuous-image-puller,continuous-image-puller,netd
cpu_requests: 342m
memory_requests: 566Mi
k8s_version: v1.26.5-gke.2100
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 344m
memory_requests: 596Mi
k8s_version: v1.27.5-gke.200
hhmi:
requesting_daemon_sets: fluentbit-gke,gke-metadata-server,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 228m
memory_requests: 480Mi
k8s_version: v1.27.3-gke.100
k8s_version: v1.27.7-gke.1056000
leap:
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
Expand All @@ -81,88 +81,94 @@ gke:
memory_requests: 580Mi
k8s_version: v1.27.4-gke.900
pangeo-hubs:
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: netd
cpu_requests: 342m
memory_requests: 566Mi
k8s_version: v1.26.5-gke.2100
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 344m
memory_requests: 596Mi
k8s_version: v1.27.5-gke.200
qcl:
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 338m
memory_requests: 496Mi
k8s_version: v1.27.4-gke.900
k8s_version: v1.27.7-gke.1056000
eks:
2i2c-aws-us:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.25.12-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
catalystproject-africa:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.27.4-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
gridsst:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.25.12-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
jupyter-meets-the-earth:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.25.12-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
nasa-cryo:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.25.12-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
nasa-esdis:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.27.8-eks-8cb36c9
nasa-ghg:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.27.4-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
nasa-veda:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.25.12-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
openscapes:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.24.16-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
smithsonian:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.25.12-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
ubc-eoas:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.24.17-eks-f8587cb
k8s_version: v1.27.8-eks-8cb36c9
victor:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.25.12-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
aks:
utoronto:
requesting_daemon_sets: cloud-node-manager,csi-azuredisk-node,csi-azurefile-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: calico-node,continuous-image-puller,continuous-image-puller,continuous-image-puller,continuous-image-puller
other_daemon_sets: calico-node
cpu_requests: 226m
memory_requests: 300Mi
k8s_version: v1.26.3
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@
"memory": 130451771392
},
"measured_overhead": {
"cpu": 0.165,
"memory": 157286400
"cpu": 0.17,
"memory": 262144000
},
"available": {
"cpu": 15.725,
"memory": 130294484992
"cpu": 15.72,
"memory": 130189627392
}
},
"n2-highmem-32": {
Expand Down
40 changes: 31 additions & 9 deletions deployer/commands/generate/resource_allocation/update_nodeinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,20 +106,42 @@ def get_node_capacity_info(instance_type: str):
mem_available = mem_allocatable

for p in pods:
mem_request = 0
cpu_request = 0
# Iterate through all the containers in the pod, and count the memory & cpu requests
# they make. We don't count initContainers' requests as they don't overlap with the
# container requests at any point.
# From https://kubernetes.io/docs/concepts/workloads/pods/init-containers/#resource-sharing-within-containers
# > - The highest of any particular resource request or limit defined on
# > all init containers is the effective init request/limit. If any
# > resource has no resource limit specified this is considered as the
# > highest limit.
# > - The Pod's effective request/limit for a resource is the higher of:
# > - the sum of all app containers request/limit for a resource
# > - the effective init request/limit for a resource
#
# So we have to calculate the requests of the init containers and containers separately,
# and take the max as the effective request / limit
container_cpu_request = container_mem_request = 0
init_container_cpu_request = init_container_mem_request = 0

for c in p["spec"]["containers"]:
mem_request += parse_quantity(
container_mem_request += parse_quantity(
c.get("resources", {}).get("requests", {}).get("memory", "0")
)
container_cpu_request += parse_quantity(
c.get("resources", {}).get("requests", {}).get("cpu", "0")
)

for c in p["spec"].get("initContainers", []):
init_container_mem_request += parse_quantity(
c.get("resources", {}).get("requests", {}).get("memory", "0")
)
cpu_request += parse_quantity(
init_container_cpu_request += parse_quantity(
c.get("resources", {}).get("requests", {}).get("cpu", "0")
)
cpu_available -= cpu_request
mem_available -= mem_request

print(
p["metadata"]["name"],
max(init_container_mem_request, container_mem_request),
)
cpu_available -= max(container_cpu_request, init_container_cpu_request)
mem_available -= max(container_mem_request, init_container_mem_request)

return {
# CPU units are in fractions, while memory units are bytes
Expand Down