pytorch
diff --git a/‎.ci/scripts/validate.sh
Lines changed: 24 additions & 12 deletions b/‎.ci/scripts/validate.sh
Lines changed: 24 additions & 12 deletions
diff --git a/‎.github/workflows/compile-dtype.yml
Lines changed: 0 additions & 118 deletions b/‎.github/workflows/compile-dtype.yml
Lines changed: 0 additions & 118 deletions
diff --git a/‎.github/workflows/compile_t4-dtype.yml
Lines changed: 0 additions & 115 deletions b/‎.github/workflows/compile_t4-dtype.yml
Lines changed: 0 additions & 115 deletions
diff --git a/‎scripts/workflow.sh
Lines changed: 9 additions & 1 deletion b/‎scripts/workflow.sh
Lines changed: 9 additions & 1 deletion
@@ -25,7 +25,7 @@ function generate_compiled_model_output() {
     local MODEL_DIR="${CHECKPOINT_PATH%/*}"
     local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
 
-    for DTYPE in float32 bfloat16; do
+    for DTYPE in float32 bfloat16 float16; do
         echo ""############### Run inference with torch.compile for dtype $DTYPE "###############"
         echo ""
         echo "******************************************"
@@ -85,10 +85,14 @@ function generate_compiled_model_output() {
         echo "******************************************"
         echo "******** INT4 group-wise quantized *******"
         echo "******************************************"
-        python -W ignore generate.py --dtype ${DTYPE} --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_eager" || exit 1
-        cat "$MODEL_DIR/output_eager"
-        python -W ignore generate.py --dtype ${DTYPE} --compile --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_compiled" || exit 1
-        cat "$MODEL_DIR/output_compiled"
+        if [ "$DTYPE" = float16 ]; then
+            echo "Skipping INT4 groupwise quantization for float16 because compile fails"
+	    else
+            python -W ignore generate.py --dtype ${DTYPE} --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_eager" || exit 1
+            cat "$MODEL_DIR/output_eager"
+            python -W ignore generate.py --dtype ${DTYPE} --compile --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --device "$TARGET_DEVICE" > "$MODEL_DIR/output_compiled" || exit 1
+            cat "$MODEL_DIR/output_compiled"
+        fi
     done
 }
 
@@ -98,7 +102,7 @@ function generate_aoti_model_output() {
     local MODEL_DIR="${CHECKPOINT_PATH%/*}"
     local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
 
-    for DTYPE in float32 bfloat16; do
+    for DTYPE in float32 bfloat16 float16; do
         echo ""############### Run inference with AOT Inductor  for dtype $DTYPE "###############"
         echo ""
         echo "******************************************"
@@ -150,12 +154,20 @@ function generate_aoti_model_output() {
         python -W ignore generate.py --dtype ${DTYPE} --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" > "$MODEL_DIR/output_aoti" || exit 1
         cat "$MODEL_DIR/output_aoti"
 
-        # echo "******************************************"
-        # echo "******** INT4 group-wise quantized *******"
-        # echo "******************************************"
-        # python -W ignore export.py --dtype ${DTYPE} --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" || exit 1
-        # python -W ignore generate.py --dtype ${DTYPE} --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" > "$MODEL_DIR/output_aoti" || exit 1
-        # cat "$MODEL_DIR/output_aoti"
+        echo "******************************************"
+        echo "******** INT4 group-wise quantized *******"
+        echo "******************************************"
+        if [ "$DTYPE" = float16 ]; then
+            echo "Skipping INT4 groupwise quantization for float16 because AOTI fails"
+	    else
+            if [ $(uname -s) == "Linux" ]; then
+                echo "Skipping INT4 groupwise quantization because AOTI fails"
+            else
+                python -W ignore export.py --dtype ${DTYPE} --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" || exit 1
+                python -W ignore generate.py --dtype ${DTYPE} --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" > "$MODEL_DIR/output_aoti" || exit 1
+                cat "$MODEL_DIR/output_aoti"
+            fi
+        fi
     done
 }
 
 
@@ -5,6 +5,14 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+################################################################################
+# Usage:
+#   bash script.sh [cpu|cuda] [model_repo] [optional_command]
+# Arguments:
+#   cpu|cuda: Specify the device to run validation on (cpu or cuda).
+#   model_repo: Model repository name to validate (e.g., tinyllamas/stories15M).
+#   optional_command: (optional) Specify additional command "compile", "aoti" or "executorch" to run the selected validation.
+################################################################################
 
 set -eu
 
@@ -75,7 +83,7 @@ MODEL_REPOS=(
     "mistralai/Mistral-7B-v0.1"
     "mistralai/Mistral-7B-Instruct-v0.1"
     "mistralai/Mistral-7B-Instruct-v0.2"
-    # "openlm-research/open_llama_7b"
+    "openlm-research/open_llama_7b"
     "codellama/CodeLlama-7b-Python-hf"
     "codellama/CodeLlama-34b-Python-hf"
     # "meta-llama/Llama-2-7b-chat-hf"