diff --git a/awq/models/auto.py b/awq/models/auto.py
index 02051d96..e21cb954 100644
--- a/awq/models/auto.py
+++ b/awq/models/auto.py
@@ -43,7 +43,7 @@ def from_pretrained(self, model_path, trust_remote_code=True, safetensors=False,
     @classmethod
     def from_quantized(self, quant_path, quant_filename='', max_new_tokens=None,
                        trust_remote_code=True, fuse_layers=True,
-                       batch_size=1, safetensors=False,
+                       batch_size=1, safetensors=True,
                        max_memory=None, offload_folder=None) -> BaseAWQForCausalLM:
         os.environ["AWQ_BATCH_SIZE"] = str(batch_size)
         model_type = check_and_get_model_type(quant_path, trust_remote_code)
diff --git a/awq/models/base.py b/awq/models/base.py
index 619ca17a..0d9e0bd5 100644
--- a/awq/models/base.py
+++ b/awq/models/base.py
@@ -53,7 +53,7 @@ def quantize(self, tokenizer=None, quant_config={},
     def fuse_layers(model):
         pass
 
-    def save_quantized(self, save_dir, safetensors=False, shard_size="10GB"):
+    def save_quantized(self, save_dir, safetensors=True, shard_size="10GB"):
         save_dir = save_dir[:-1] if save_dir[-1] == '/' else save_dir
 
         # Save model
@@ -67,7 +67,9 @@ def forward(self, x): return x
         self.quant_config.save_pretrained(save_dir)
 
         # Remove empty state dict
-        os.remove(f'{save_dir}/pytorch_model.bin')
+        default_path = f'{save_dir}/model.safetensors'
+        if os.path.exists(default_path):
+            os.remove(default_path)
 
         # model_name has no extension, add it when saving state_dict
         model_name = 'model.safetensors' if safetensors else 'pytorch_model.bin'
@@ -130,7 +132,7 @@ def from_pretrained(self, model_path, model_type, torch_dtype: torch.dtype = tor
     @classmethod
     def from_quantized(self, model_path, model_type, model_filename='', 
                              max_new_tokens=None, torch_dtype=torch.float16, 
-                             trust_remote_code=True, safetensors=False, is_quantized=True, 
+                             trust_remote_code=True, safetensors=True, is_quantized=True, 
                              fuse_layers=False, version='GEMM',
                              max_memory=None, offload_folder=None):
         # [STEP 1-2] Load weights path and configs
@@ -180,11 +182,11 @@ def from_quantized(self, model_path, model_type, model_filename='',
 
         return self(model, model_type, is_quantized=is_quantized, quant_config=quant_config)
 
-    def _load_config(self, model_path, model_filename, safetensors=False, 
+    def _load_config(self, model_path, model_filename, safetensors=True, 
                            version="GEMM", trust_remote_code=True, max_new_tokens=4096):
         # [STEP 1] Download model if path is not a directory
         if not os.path.isdir(model_path):
-            ignore_patterns = ["*msgpack*", "*h5*"]
+            ignore_patterns = ["*msgpack*", "*h5*", "optimizer.pt"]
             if safetensors:
                 ignore_patterns.extend(["*.pt*", "*.bin*"])
             else:
diff --git a/examples/basic_generate.py b/examples/basic_generate.py
index 89342524..e9d9cf4f 100644
--- a/examples/basic_generate.py
+++ b/examples/basic_generate.py
@@ -4,7 +4,7 @@
 quant_path = "TheBloke/Mistral-7B-OpenOrca-AWQ"
 
 # Load model
-model = AutoAWQForCausalLM.from_quantized(quant_path, fuse_layers=True, safetensors=True)
+model = AutoAWQForCausalLM.from_quantized(quant_path, fuse_layers=True)
 tokenizer = AutoTokenizer.from_pretrained(quant_path, trust_remote_code=True)
 streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 
diff --git a/examples/basic_quant.py b/examples/basic_quant.py
index 0a83c98c..a5ee859f 100644
--- a/examples/basic_quant.py
+++ b/examples/basic_quant.py
@@ -14,7 +14,6 @@
 model.quantize(tokenizer, quant_config=quant_config)
 
 # Save quantized model
-# NOTE: pass safetensors=True to save quantized model weights as safetensors
 model.save_quantized(quant_path)
 tokenizer.save_pretrained(quant_path)
 
diff --git a/examples/basic_safetensors_generate.py b/examples/basic_safetensors_generate.py
deleted file mode 100644
index 93fcc096..00000000
--- a/examples/basic_safetensors_generate.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from awq import AutoAWQForCausalLM
-from transformers import AutoTokenizer, TextStreamer
-
-quant_path = "casperhansen/opt-125m-awq"
-
-# Load model
-model = AutoAWQForCausalLM.from_quantized(quant_path, fuse_layers=True, safetensors=True)
-tokenizer = AutoTokenizer.from_pretrained(quant_path, trust_remote_code=True)
-streamer = TextStreamer(tokenizer, skip_special_tokens=True)
-
-# Convert prompt to tokens
-prompt_template = """\
-A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
-
-USER: {prompt}
-ASSISTANT:"""
-
-tokens = tokenizer(
-    prompt_template.format(prompt="How are you today?"), 
-    return_tensors='pt'
-).input_ids.cuda()
-
-# Generate output
-generation_output = model.generate(
-    tokens, 
-    streamer=streamer,
-    max_new_tokens=512
-)
diff --git a/examples/benchmark.py b/examples/benchmark.py
index 1b5bcabf..7a4f3bdb 100644
--- a/examples/benchmark.py
+++ b/examples/benchmark.py
@@ -129,7 +129,7 @@ def main(args):
     parser.add_argument("--model_path", type=str, default="casperhansen/mistral-7b-instruct-v0.1-awq", help="path to the model")
     parser.add_argument("--quant_file", type=str, default="", help="weights filename")
     parser.add_argument("--batch_size", type=int, default=1, help="Batch size for cache and generation")
-    parser.add_argument("--safetensors", default=False, action="store_true", help="Use for enabling safetensors")
+    parser.add_argument("--safetensors", default=True, action="store_false", help="Use for disabling safetensors")
     args = parser.parse_args()
 
     main(args)
diff --git a/setup.py b/setup.py
index bcebec31..6de5f5b9 100644
--- a/setup.py
+++ b/setup.py
@@ -44,7 +44,7 @@
 
 requirements = [
     "torch>=2.1.0",
-    "transformers>=4.34.0",
+    "transformers>=4.35.0",
     "tokenizers>=0.12.1",
     "accelerate",
     "sentencepiece",