Skip to content

Commit

Permalink
Improve crd-extractor script
Browse files Browse the repository at this point in the history
* Fetch CRDs in parallel
* Avoid use of associative array
* Handle space in file path
  • Loading branch information
kishorviswanathan committed Dec 18, 2024
1 parent 028642f commit d12a4c4
Showing 1 changed file with 62 additions and 53 deletions.
115 changes: 62 additions & 53 deletions Utilities/crd-extractor.sh
Original file line number Diff line number Diff line change
@@ -1,108 +1,117 @@
#!/usr/bin/env bash

fetch_crd() {
filename=${1%% *}
kubectl get crds "$filename" -o yaml >"$TMP_CRD_DIR/$filename.yaml" 2>&1
}

# Check if python3 is installed
if ! command -v python3 &> /dev/null; then
if ! command -v python3 &>/dev/null; then
printf "python3 is required for this utility, and is not installed on your machine"
printf "please visit https://www.python.org/downloads/ to install it"
exit 1
fi
# Check if kubectl is installed
if ! command -v kubectl &> /dev/null; then
if ! command -v kubectl &>/dev/null; then
printf "kubectl is required for this utility, and is not installed on your machine"
printf "please visit https://kubernetes.io/docs/tasks/tools/#kubectl to install it"
exit 1
fi

# Check if the pyyaml module is installed
if ! echo 'import yaml' | python3 &> /dev/null; then
if ! echo 'import yaml' | python3 &>/dev/null; then
printf "the python3 module 'yaml' is required, and is not installed on your machine.\n"

while true; do
read -p "Do you wish to install this program? (y/n) " yn
read -p -r "Do you wish to install this program? (y/n) " yn
case $yn in
[Yy] ) pip3 install pyyaml; break;;
"" ) pip3 install pyyaml; break;;
[Nn] ) echo "Exiting..."; exit;;
* ) echo "Please answer 'y' (yes) or 'n' (no).";;
[Yy])
pip3 install pyyaml
break
;;
"")
pip3 install pyyaml
break
;;
[Nn])
echo "Exiting..."
exit
;;
*) echo "Please answer 'y' (yes) or 'n' (no)." ;;
esac
done
fi

# Create temp folder for CRDs
TMP_CRD_DIR=$HOME/.datree/crds
mkdir -p $TMP_CRD_DIR
mkdir -p "$TMP_CRD_DIR"

# Create final schemas directory
SCHEMAS_DIR=$HOME/.datree/crdSchemas
mkdir -p $SCHEMAS_DIR
cd $SCHEMAS_DIR

# Create array to store CRD kinds and groups
ORGANIZE_BY_GROUP=true
declare -A CRD_GROUPS 2>/dev/null
if [ $? -ne 0 ]; then
# Array creation failed, signal to skip organization by group
ORGANIZE_BY_GROUP=false
fi

# Extract CRDs from cluster
NUM_OF_CRDS=0
while read -r crd
do
filename=${crd%% *}
kubectl get crds "$filename" -o yaml > "$TMP_CRD_DIR/$filename.yaml" 2>&1

resourceKind=$(grep "kind:" "$TMP_CRD_DIR/$filename.yaml" | awk 'NR==2{print $2}' | tr '[:upper:]' '[:lower:]')
resourceGroup=$(grep "group:" "$TMP_CRD_DIR/$filename.yaml" | awk 'NR==1{print $2}')
mkdir -p "$SCHEMAS_DIR"
cd "$SCHEMAS_DIR" || exit 1

# Save name and group for later directory organization
CRD_GROUPS["$resourceKind"]="$resourceGroup"

let ++NUM_OF_CRDS
done < <(kubectl get crds 2>&1 | sed -n '/NAME/,$p' | tail -n +2)
# Get a list of all CRDs
printf "Fetching list of CRDs...\n"
IFS=$'\n' read -r -d '' -a CRD_LIST < <(kubectl get crds 2>&1 | sed -n '/NAME/,$p' | tail -n +2 && printf '\0')

# If no CRDs exist in the cluster, exit
if [ $NUM_OF_CRDS == 0 ]; then
if [ ${#CRD_LIST[@]} == 0 ]; then
printf "No CRDs found in the cluster, exiting...\n"
exit 0
fi

# Extract CRDs from cluster
FETCHED_CRDS=0
PARALLELISM=10
for crd in "${CRD_LIST[@]}"; do
printf "Fetching CRD %s/%s...\n" $((FETCHED_CRDS + 1)) ${#CRD_LIST[@]}

# Fetch CRD
fetch_crd "$crd" &

# allow to execute up to $PARALLELISM jobs in parallel
if [[ $(jobs -r -p | wc -l) -ge $PARALLELISM ]]; then
# now there are $PARALLELISM jobs already running, so wait here for any job
# to be finished so there is a place to start next one.
wait -n
fi
((++FETCHED_CRDS))
done

# Download converter script
curl https://raw.githubusercontent.com/yannh/kubeconform/master/scripts/openapi2jsonschema.py --output $TMP_CRD_DIR/openapi2jsonschema.py 2>/dev/null
curl https://raw.githubusercontent.com/yannh/kubeconform/master/scripts/openapi2jsonschema.py --output "$TMP_CRD_DIR/openapi2jsonschema.py" 2>/dev/null

# Convert crds to jsonSchema
python3 $TMP_CRD_DIR/openapi2jsonschema.py $TMP_CRD_DIR/*.yaml
FILENAME_FORMAT="{fullgroup}_{kind}_{version}" python3 "$TMP_CRD_DIR/openapi2jsonschema.py" "$TMP_CRD_DIR"/*.yaml
conversionResult=$?

# Copy and rename files to support kubeval
rm -rf $SCHEMAS_DIR/master-standalone
mkdir -p $SCHEMAS_DIR/master-standalone
cp $SCHEMAS_DIR/*.json $SCHEMAS_DIR/master-standalone
find $SCHEMAS_DIR/master-standalone -name '*json' -exec bash -c ' mv -f $0 ${0/\_/-stable-}' {} \;
rm -rf "$SCHEMAS_DIR/master-standalone"
mkdir -p "$SCHEMAS_DIR/master-standalone"
cp "$SCHEMAS_DIR"/*.json "$SCHEMAS_DIR/master-standalone"
find "$SCHEMAS_DIR/master-standalone" -name '*json' -exec bash -c ' mv -f $0 ${0/\_/-stable-}' {} \;

# Organize schemas by group
if [ $ORGANIZE_BY_GROUP == true ]; then
for schema in $SCHEMAS_DIR/*.json
do
crdFileName=$(basename $schema .json)
crdKind=${crdFileName%%_*}
crdGroup=${CRD_GROUPS[$crdKind]}
mkdir -p $crdGroup
mv $schema ./$crdGroup
done
fi
for schema in "$SCHEMAS_DIR"/*.json; do
crdFileName=$(basename "$schema")
crdGroup=$(echo "$crdFileName" | cut -d"_" -f1)
outName=$(echo "$crdFileName" | cut -d"_" -f2-)
mkdir -p "$crdGroup"
mv "$schema" "./$crdGroup/$outName"
done

CYAN='\033[0;36m'
GREEN='\033[0;32m'
NC='\033[0m' # No Color

if [ $conversionResult == 0 ]; then
printf "${GREEN}Successfully converted $NUM_OF_CRDS CRDs to JSON schema${NC}\n"
printf "${GREEN}Successfully converted $FETCHED_CRDS CRDs to JSON schema${NC}\n"

printf "\nTo validate a CR using various tools, run the relevant command:\n"
printf "\n- ${CYAN}datree:${NC}\n\$ datree test /path/to/file\n"
printf "\n- ${CYAN}kubeconform:${NC}\n\$ kubeconform -summary -output json -schema-location default -schema-location '$HOME/.datree/crdSchemas/{{ .ResourceKind }}_{{ .ResourceAPIVersion }}.json' /path/to/file\n"
printf "\n- ${CYAN}kubeval:${NC}\n\$ kubeval --additional-schema-locations file:\"$HOME/.datree/crdSchemas\" /path/to/file\n\n"
fi

rm -rf $TMP_CRD_DIR
rm -rf "$TMP_CRD_DIR"

0 comments on commit d12a4c4

Please sign in to comment.