diff --git a/must-gather/README.md b/must-gather/README.md index c71486114..fbfe2dba2 100644 --- a/must-gather/README.md +++ b/must-gather/README.md @@ -2,8 +2,13 @@ You can use the `oc adm must-gather` command to collect information about your cluster. -With the node-maintenance must-gather image you can collect manifests and logs related to node maintenance, -which includes the node objects, and logs and manifests related to the node-maintenance-operator. +With the node-maintenance-must-gather image you can collect manifests and logs related to node maintenance: +- Node objects +- Custom Resource Definition +- Node Maintenance Operator pod's logs (dismiss drained pods) +- Custom Resources +- Cluster's resource (CPU, and Memory) usage + To collect this data, you must specify the extra image using the `--image` option. Example: diff --git a/must-gather/collection-scripts/gather b/must-gather/collection-scripts/gather index dbb269bce..504b457da 100755 --- a/must-gather/collection-scripts/gather +++ b/must-gather/collection-scripts/gather @@ -1,6 +1,7 @@ #!/bin/bash -mkdir -p /must-gather/ +# Create a directory for must-gather +mkdir -p must-gather/operator-pod-logs/ # Generate /must-gather/version file DIR_NAME=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) @@ -8,6 +9,8 @@ DIR_NAME=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) echo "node-maintenance-operator/must-gather" > /must-gather/version version >> /must-gather/version +OPERATOR_NAME="node-maintenance" + # Init named resource list, eg. ns/openshift-config named_resources=() @@ -17,16 +20,22 @@ group_resources=() # Get namespace of node-maintenance-operator - where it is installed NMO_NAMESPACE=$(oc get subs -A --field-selector=metadata.name=node-maintenance-operator -o jsonpath='{.items[*].metadata.namespace}') -# Get nmo logs - Nodes, CRD, and CRs +# Get NMO Information - nodes' object, CRD, pod's logs, CRs, cluster's resource usage. + +# Get Nodes' names +# NODES_NAMES=($(oc get nodes -o jsonpath='{.items[*].metadata.name}{"\n"}')) -# Nodes +# Nodes' objects group_resources+=(nodes) -# NMO CRD +# NMO's CRD NMO_CRD=$(oc get crds -o jsonpath='{range .items[*]}{"crd/"}{.metadata.name}{"\n"}{end}' | grep 'nodemaintenance.medik8s' | sed -z 's/\n/ /g') named_resources+=(${NMO_CRD}) -# node maintenance CRs +# NMO's running POD name +NMO_PODS=($(oc get pods -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' -n ${NMO_NAMESPACE} | grep 'node-maintenance')) + +# Node Maintenance CRs group_resources+=(nm) # Run the Collection of Resources using inspect @@ -34,4 +43,15 @@ oc adm inspect --dest-dir must-gather --all-namespaces "${named_resources[@]}" group_resources_text=$(IFS=, ; echo "${group_resources[*]}") oc adm inspect --dest-dir must-gather --all-namespaces "${group_resources_text}" +# Get pod's logs for only the running pods. Dismiss drained NMO pods +for NMO_POD_NAME in ${NMO_PODS[@]}; +do if [ $(oc get pod ${NMO_POD_NAME} -n ${NMO_NAMESPACE} -o jsonpath='{.status.phase}') == "Running" ]; +then oc logs ${NMO_POD_NAME} -n ${NMO_NAMESPACE} > must-gather/operator-pod-logs/${NMO_POD_NAME}; fi; done + +# Get cluster's resource (CPU and Memory) usage +oc adm top node --use-protocol-buffers > must-gather/cluster_resoruce_statistics + +# Get all journal logs from each Node (TODO: select particluar journal logs, e.g., crio ) +# for NODE in ${NODES_NAMES[@]}; do oc adm node-logs ${NODE} > must-gather/nodes-logs/${NODE}; done + exit 0