Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
epwalsh committed Apr 4, 2024
1 parent 077e274 commit ad550cd
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/olmo_core/distributed/fsdp/flat_param_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ class FlatParamHandle:

process_group: Optional[dist.ProcessGroup] = None

device: Optional[torch.device] = None

@classmethod
def collate_flat_params(
cls,
Expand Down Expand Up @@ -120,6 +122,7 @@ def collate_flat_params(
params_data=params_data,
params_offsets_per_rank=params_offsets_per_rank,
process_group=process_group,
device=device,
)

def unshard_(self, dtype: Optional[torch.dtype] = None, rank0_only: bool = False, cache_grads: bool = False):
Expand All @@ -138,7 +141,9 @@ def unshard_(self, dtype: Optional[torch.dtype] = None, rank0_only: bool = False
)
else:
unsharded_data = torch.empty(
param.sharding_spec.unsharded_flattened_shape, dtype=all_params_unsharded_data.dtype
param.sharding_spec.unsharded_flattened_shape,
dtype=all_params_unsharded_data.dtype,
device=self.device,
)
for rank in range(world_size):
rank_local_data = all_params_unsharded_data[rank][
Expand Down

0 comments on commit ad550cd

Please sign in to comment.