bugfix

2025-12-26 17:41:57 +08:00
parent 7840049189
commit ea4229851b
2 changed files with 8 additions and 6 deletions
@@ -31,6 +31,8 @@ from tasks.gsm8k import GSM8K
 # RL hyperparameters
 run = "dummy" # wandb run name
 source = "sft" # mid|sft
+model_tag = None # model tag to load the model from (base model or midtrained model)
+step = None # step to load the model from (base model or midtrained model)
 dtype = "bfloat16"
 device_batch_size = 8 # no forward pass will go above this to not OOM
 examples_per_step = 16 # in total and across all ranks (note: examples, not samples/completions!)
@@ -64,7 +66,7 @@ use_dummy_wandb = run == "dummy" or not master_process
 wandb_run = DummyWandb() if use_dummy_wandb else wandb.init(project="nanochat-rl", name=run, config=user_config)

 # Init model and tokenizer
-model, tokenizer, meta = load_model(source, device, phase="eval")
+model, tokenizer, meta = load_model(source, device, phase="eval", model_tag=model_tag, step=step)
 engine = Engine(model, tokenizer) # for sampling rollouts

 # -----------------------------------------------------------------------------
@@ -307,8 +309,8 @@ for step in range(num_steps):
    if master_process and ((step > 0 and step % save_every == 0) or step == num_steps - 1):
        base_dir = get_base_dir()
        depth = model.config.n_layer
-        model_tag = f"d{depth}" # base the model tag on the depth of the base model
-        checkpoint_dir = os.path.join(base_dir, "chatrl_checkpoints", model_tag)
+        output_dirname = model_tag if model_tag else f"d{depth}" # base the model tag on the depth of the base model
+        checkpoint_dir = os.path.join(base_dir, "chatrl_checkpoints", output_dirname)
        model_config_kwargs = model.config.__dict__ # slightly naughty, abusing the simplicity of GPTConfig, TODO nicer
        save_checkpoint(
            checkpoint_dir,