Ensure that inputs and targets are contiguous (#569)

* call reshape instead of view in case the tensors are not contiguous

* fix directly in data loader instead
This commit is contained in:
Sofie Van Landeghem
2026-03-04 22:58:27 +01:00
committed by GitHub
parent 4b4077425b
commit 752abc836e
+2 -2
View File
@@ -282,8 +282,8 @@ def sft_data_generator_bos_bestfit(split, buffer_size=100):
# Build tensors # Build tensors
use_cuda = device_type == "cuda" use_cuda = device_type == "cuda"
batch_tensor = torch.tensor(rows, dtype=torch.long, pin_memory=use_cuda) batch_tensor = torch.tensor(rows, dtype=torch.long, pin_memory=use_cuda)
inputs = batch_tensor[:, :-1].to(device=device, dtype=torch.int32, non_blocking=use_cuda) inputs = batch_tensor[:, :-1].to(device=device, dtype=torch.int32, non_blocking=use_cuda).contiguous()
targets = batch_tensor[:, 1:].to(device=device, dtype=torch.int64, non_blocking=use_cuda) targets = batch_tensor[:, 1:].to(device=device, dtype=torch.int64, non_blocking=use_cuda).contiguous()
# Apply the loss mask from render_conversation (mask=1 for assistant completions, # Apply the loss mask from render_conversation (mask=1 for assistant completions,
# mask=0 for user prompts, BOS, special tokens, tool outputs). mask[1:] aligns # mask=0 for user prompts, BOS, special tokens, tool outputs). mask[1:] aligns