From 752abc836e7075d3a799e2af8f82bdb2456c60cc Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Wed, 4 Mar 2026 22:58:27 +0100 Subject: [PATCH] Ensure that inputs and targets are contiguous (#569) * call reshape instead of view in case the tensors are not contiguous * fix directly in data loader instead --- scripts/chat_sft.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/chat_sft.py b/scripts/chat_sft.py index f31a2d3..cb9e078 100644 --- a/scripts/chat_sft.py +++ b/scripts/chat_sft.py @@ -282,8 +282,8 @@ def sft_data_generator_bos_bestfit(split, buffer_size=100): # Build tensors use_cuda = device_type == "cuda" batch_tensor = torch.tensor(rows, dtype=torch.long, pin_memory=use_cuda) - inputs = batch_tensor[:, :-1].to(device=device, dtype=torch.int32, non_blocking=use_cuda) - targets = batch_tensor[:, 1:].to(device=device, dtype=torch.int64, non_blocking=use_cuda) + inputs = batch_tensor[:, :-1].to(device=device, dtype=torch.int32, non_blocking=use_cuda).contiguous() + targets = batch_tensor[:, 1:].to(device=device, dtype=torch.int64, non_blocking=use_cuda).contiguous() # Apply the loss mask from render_conversation (mask=1 for assistant completions, # mask=0 for user prompts, BOS, special tokens, tool outputs). mask[1:] aligns