patch: CN mirrors for pytorch-wheels and HF datasets
- pyproject.toml + uv.lock: pytorch-cu128/cpu indexes → mirror.sjtu.edu.cn (aliyun lacks 2.9.1, sjtu has it) - nanochat/dataset.py: climbmix BASE_URL → hf-mirror.com For ailab (CN, RTX 5090) where direct pytorch.org and huggingface.co are unreachable. Override at uv-sync time with UV_DEFAULT_INDEX env.
This commit is contained in:
+1
-1
@@ -20,7 +20,7 @@ from nanochat.common import get_base_dir
|
||||
# The specifics of the current pretraining dataset
|
||||
|
||||
# The URL on the internet where the data is hosted and downloaded from on demand
|
||||
BASE_URL = "https://huggingface.co/datasets/karpathy/climbmix-400b-shuffle/resolve/main"
|
||||
BASE_URL = "https://hf-mirror.com/datasets/karpathy/climbmix-400b-shuffle/resolve/main"
|
||||
MAX_SHARD = 6542 # the last datashard is shard_06542.parquet
|
||||
index_to_filename = lambda index: f"shard_{index:05d}.parquet" # format of the filenames
|
||||
base_dir = get_base_dir()
|
||||
|
||||
Reference in New Issue
Block a user