TypeError: cannot unpack non-iterable NoneType object

Keep getting such errors while inferencing as well as fine-tuning.
Inference: 
File ~/miniconda3/envs/flux/lib/python3.10/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
    113 @functools.wraps(func)
    114 def decorate_context(*args, **kwargs):
    115     with ctx_factory():
--> 116         return func(*args, **kwargs)

File ~/SD/OmniGen/OmniGen/pipeline.py:286, in OmniGenPipeline._call_(self, prompt, input_images, height, width, num_inference_steps, guidance_scale, use_img_guidance, img_guidance_scale, max_input_image_size, separate_cfg_infer, offload_model, use_kv_cache, offload_kv_cache, use_input_image_size_as_output, dtype, seed, output_type)
    282 # else:
    283 #     self.model.to(self.device)
    285 scheduler = OmniGenScheduler(num_steps=num_inference_steps)
--> 286 samples = scheduler(latents, func, model_kwargs, use_kv_cache=use_kv_cache, offload_kv_cache=offload_kv_cache)
    287 samples = samples.chunk((1+num_cfg), dim=0)[0]
    289 if self.model_cpu_offload:

File ~/SD/OmniGen/OmniGen/scheduler.py:164, in OmniGenScheduler._call_(self, z, func, model_kwargs, use_kv_cache, offload_kv_cache)
    162 for i in tqdm(range(self.num_steps)):
    163     timesteps = torch.zeros(size=(len(z), )).to(z.device) + self.sigma[i]
--> 164     pred, cache = func(z, timesteps, past_key_values=cache, **model_kwargs)
    165     sigma_next = self.sigma[i+1]
    166     sigma = self.sigma[i]

File ~/miniconda3/envs/flux/lib/python3.10/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
    113 @functools.wraps(func)
    114 def decorate_context(*args, **kwargs):
    115     with ctx_factory():
--> 116         return func(*args, **kwargs)

File ~/SD/OmniGen/OmniGen/model.py:388, in OmniGen.forward_with_separate_cfg(self, x, timestep, input_ids, input_img_latents, input_image_sizes, attention_mask, position_ids, cfg_scale, use_img_cfg, img_cfg_scale, past_key_values, use_kv_cache, offload_model)
    386 model_out, pask_key_values = [], []
    387 for i in range(len(input_ids)):
--> 388     temp_out, temp_pask_key_values = self.forward(x[i], timestep[i], input_ids[i], input_img_latents[i], input_image_sizes[i], attention_mask[i], position_ids[i], past_key_values=past_key_values[i], return_past_key_values=True, offload_model=offload_model)
    389     model_out.append(temp_out)
    390     pask_key_values.append(temp_pask_key_values)

File ~/SD/OmniGen/OmniGen/model.py:338, in OmniGen.forward(self, x, timestep, input_ids, input_img_latents, input_image_sizes, attention_mask, position_ids, padding_latent, past_key_values, return_past_key_values, offload_model)
    335 else:
    336     input_emb = torch.cat([time_token, x], dim=1)
--> 338 output = self.llm(inputs_embeds=input_emb, attention_mask=attention_mask, position_ids=position_ids, past_key_values=past_key_values, offload_model=offload_model)
    339 output, past_key_values = output.last_hidden_state, output.past_key_values
    340 if input_is_list:

File ~/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/SD/OmniGen/OmniGen/transformer.py:157, in Phi3Transformer.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, offload_model)
    155     if offload_model and not self.training:
    156         self.get_offlaod_layer(layer_idx, device=inputs_embeds.device)
--> 157     layer_outputs = decoder_layer(
    158         hidden_states,
    159         attention_mask=attention_mask,
    160         position_ids=position_ids,
    161         past_key_value=past_key_values,
    162         output_attentions=output_attentions,
    163         use_cache=use_cache,
    164         cache_position=cache_position,
    165     )
    167 hidden_states = layer_outputs[0]
    169 if use_cache:

File ~/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/miniconda3/envs/flux/lib/python3.10/site-packages/transformers/models/phi3/modeling_phi3.py:303, in Phi3DecoderLayer.forward(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, position_embeddings, **kwargs)
    300 hidden_states = self.input_layernorm(hidden_states)
    302 # Self Attention
--> 303 hidden_states, self_attn_weights = self.self_attn(
    304     hidden_states=hidden_states,
    305     attention_mask=attention_mask,
    306     position_ids=position_ids,
    307     past_key_value=past_key_value,
    308     output_attentions=output_attentions,
    309     use_cache=use_cache,
    310     cache_position=cache_position,
    311     position_embeddings=position_embeddings,
    312     **kwargs,
    313 )
    314 hidden_states = residual + self.resid_attn_dropout(hidden_states)  # main diff with Llama
    316 residual = hidden_states

File ~/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/miniconda3/envs/flux/lib/python3.10/site-packages/transformers/models/phi3/modeling_phi3.py:197, in Phi3Attention.forward(self, hidden_states, position_embeddings, attention_mask, past_key_value, cache_position, **kwargs)
    194 key_states = key_states.view(hidden_shape).transpose(1, 2)
    195 value_states = value_states.view(hidden_shape).transpose(1, 2)
--> 197 cos, sin = position_embeddings
    198 query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
    200 if past_key_value is not None:
    201     # sin and cos are specific to RoPE models; cache_position needed for the static cache

TypeError: cannot unpack non-iterable NoneType object


Fine-tuning:
The following values were not passed to accelerate launch and had defaults used instead:
	--num_machines was set to a value of 1
	--mixed_precision was set to a value of 'no'
	--dynamo_backend was set to a value of 'no'
To avoid this warning pass in values for each of the problematic parameters or run accelerate config.
WARNING:xformers:WARNING[XFORMERS]: xFormers can't load C++/CUDA extensions. xFormers was built for:
    PyTorch 2.1.2+cu121 with CUDA 1201 (you have 2.4.0+cu121)
    Python  3.10.13 (you have 3.10.16)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details
/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/xformers/triton/softmax.py:30: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
  @custom_fwd(cast_inputs=torch.float16 if _triton_softmax_fp16_enabled else None)
/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/xformers/triton/softmax.py:87: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
  def backward(
/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/xformers/ops/swiglu_op.py:107: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
  def forward(cls, ctx, x, w1, b1, w2, b2, w3, b3):
/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/xformers/ops/swiglu_op.py:128: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
  def backward(cls, ctx, dx5):
Fetching 10 files: 100%|█████████████████████| 10/10 [00:00<00:00, 14990.36it/s]
Loading safetensors
/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/distributed/fsdp/_init_utils.py:440: UserWarning: FSDP is switching to use NO_SHARD instead of ShardingStrategy.SHARD_GRAD_OP since the world size is 1.
  warnings.warn(
[rank0]: Traceback (most recent call last):
[rank0]:   File "/home/rishita/SD/OmniGen/train.py", line 397, in <module>
[rank0]:     main(args)
[rank0]:   File "/home/rishita/SD/OmniGen/train.py", line 232, in main
[rank0]:     loss_dict = training_losses(model, output_images, model_kwargs)
[rank0]:   File "/home/rishita/SD/OmniGen/OmniGen/train_helper/loss.py", line 47, in training_losses
[rank0]:     model_output = model(xt, t, **model_kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
[rank0]:     return self._call_impl(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
[rank0]:     return forward_call(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 863, in forward
[rank0]:     output = self._fsdp_wrapped_module(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
[rank0]:     return self._call_impl(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
[rank0]:     return forward_call(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/accelerate/utils/operations.py", line 687, in forward
[rank0]:     return model_forward(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/accelerate/utils/operations.py", line 675, in _call_
[rank0]:     return convert_to_fp32(self.model_forward(*args, **kwargs))
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 43, in decorate_autocast
[rank0]:     return func(*args, **kwargs)
[rank0]:   File "/home/rishita/SD/OmniGen/OmniGen/model.py", line 338, in forward
[rank0]:     output = self.llm(inputs_embeds=input_emb, attention_mask=attention_mask, position_ids=position_ids, past_key_values=past_key_values, offload_model=offload_model)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
[rank0]:     return self._call_impl(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
[rank0]:     return forward_call(*args, **kwargs)
[rank0]:   File "/home/rishita/SD/OmniGen/OmniGen/transformer.py", line 144, in forward
[rank0]:     layer_outputs = self._gradient_checkpointing_func(
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/_compile.py", line 31, in inner
[rank0]:     return disable_fn(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 600, in _fn
[rank0]:     return fn(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 481, in checkpoint
[rank0]:     return CheckpointFunction.apply(function, preserve, *args)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/autograd/function.py", line 574, in apply
[rank0]:     return super().apply(*args, **kwargs)  # type: ignore[misc]
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 255, in forward
[rank0]:     outputs = run_function(*args)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
[rank0]:     return self._call_impl(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
[rank0]:     return forward_call(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 863, in forward
[rank0]:     output = self._fsdp_wrapped_module(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
[rank0]:     return self._call_impl(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
[rank0]:     return forward_call(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/transformers/models/phi3/modeling_phi3.py", line 303, in forward
[rank0]:     hidden_states, self_attn_weights = self.self_attn(
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
[rank0]:     return self._call_impl(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
[rank0]:     return forward_call(*args, **kwargs)
[rank0]:   File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/transformers/models/phi3/modeling_phi3.py", line 197, in forward
[rank0]:     cos, sin = position_embeddings
[rank0]: TypeError: cannot unpack non-iterable NoneType object
E0423 05:22:10.833000 139807719794496 torch/distributed/elastic/multiprocessing/api.py:833] failed (exitcode: 1) local_rank: 0 (pid: 8981) of binary: /home/rishita/miniconda3/envs/flux/bin/python
Traceback (most recent call last):
  File "/home/rishita/miniconda3/envs/flux/bin/accelerate", line 8, in <module>
    sys.exit(main())
  File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/accelerate/commands/accelerate_cli.py", line 47, in main
    args.func(args)
  File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/accelerate/commands/launch.py", line 1010, in launch_command
    multi_gpu_launcher(args)
  File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/accelerate/commands/launch.py", line 672, in multi_gpu_launcher
    distrib_run.run(args)
  File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/distributed/run.py", line 892, in run
    elastic_launch(
  File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 133, in _call_
    return launch_agent(self._config, self._entrypoint, list(args))
  File "/home/rishita/miniconda3/envs/flux/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 264, in launch_agent
    raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError: 
============================================================
train.py FAILED
------------------------------------------------------------
Failures:
  <NO_OTHER_FAILURES>
------------------------------------------------------------
Root Cause (first observed failure):
[0]:
  time      : 2025-04-23_05:22:10
  host      : ubuntu-Standard-PC-Q35-ICH9-2009
  rank      : 0 (local_rank: 0)
  exitcode  : 1 (pid: 8981)
  error_file: <N/A>
  traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
============================================================

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

TypeError: cannot unpack non-iterable NoneType object #212

train.py FAILED

Failures:
<NO_OTHER_FAILURES>

Root Cause (first observed failure):
[0]:
time : 2025-04-23_05:22:10
host : ubuntu-Standard-PC-Q35-ICH9-2009
rank : 0 (local_rank: 0)
exitcode : 1 (pid: 8981)
error_file: <N/A>
traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

TypeError: cannot unpack non-iterable NoneType object #212

Description

train.py FAILED

Failures: <NO_OTHER_FAILURES>

Root Cause (first observed failure): [0]: time : 2025-04-23_05:22:10 host : ubuntu-Standard-PC-Q35-ICH9-2009 rank : 0 (local_rank: 0) exitcode : 1 (pid: 8981) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions

Failures:
<NO_OTHER_FAILURES>

Root Cause (first observed failure):
[0]:
time : 2025-04-23_05:22:10
host : ubuntu-Standard-PC-Q35-ICH9-2009
rank : 0 (local_rank: 0)
exitcode : 1 (pid: 8981)
error_file: <N/A>
traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html