diff --git a/nanochat/checkpoint_manager.py b/nanochat/checkpoint_manager.py index c008ec2..d1e0a07 100644 --- a/nanochat/checkpoint_manager.py +++ b/nanochat/checkpoint_manager.py @@ -111,7 +111,7 @@ def build_model(checkpoint_dir, step, device, phase): # Load the Tokenizer tokenizer = get_tokenizer() # Sanity check: compatibility between model and tokenizer - assert tokenizer.get_vocab_size() == model_config_kwargs["vocab_size"] + assert tokenizer.get_vocab_size() == model_config_kwargs["vocab_size"], f"Tokenizer vocab size {tokenizer.get_vocab_size()} does not match model config vocab size {model_config_kwargs['vocab_size']}" return model, tokenizer, meta_data