add explicit UTF-8 encoding

This commit is contained in:
svlandeg
2025-11-03 21:27:12 +01:00
parent a83646e098
commit c72b8b2309
8 changed files with 17 additions and 17 deletions
+2 -2
View File
@@ -34,7 +34,7 @@ def save_checkpoint(checkpoint_dir, step, model_data, optimizer_data, meta_data)
log0(f"Saved optimizer file to: {optimizer_path}")
# Save the metadata dict as json
meta_path = os.path.join(checkpoint_dir, f"meta_{step:06d}.json")
with open(meta_path, "w") as f:
with open(meta_path, "w", encoding='utf-8') as f:
json.dump(meta_data, f, indent=2)
log0(f"Saved metadata file to: {meta_path}")
@@ -50,7 +50,7 @@ def load_checkpoint(checkpoint_dir, step, device, load_optimizer=False):
optimizer_data = torch.load(optimizer_path, map_location=device)
# Load the metadata
meta_path = os.path.join(checkpoint_dir, f"meta_{step:06d}.json")
with open(meta_path, "r") as f:
with open(meta_path, "r", encoding='utf-8') as f:
meta_data = json.load(f)
return model_data, optimizer_data, meta_data
+1 -1
View File
@@ -70,7 +70,7 @@ def download_file_with_lock(url, filename, postprocess_fn=None):
if os.path.exists(file_path):
return file_path
with open(lock_path, 'w') as lock_file:
with open(lock_path, 'w', encoding='utf-8') as lock_file:
# Only a single rank can acquire this lock
# All other ranks block until it is released
+6 -6
View File
@@ -170,7 +170,7 @@ Generated: {timestamp}
# count dependencies via uv.lock
uv_lock_lines = 0
if os.path.exists('uv.lock'):
with open('uv.lock', 'r') as f:
with open('uv.lock', 'r', encoding='utf-8') as f:
uv_lock_lines = len(f.readlines())
header += f"""
@@ -241,7 +241,7 @@ class Report:
slug = slugify(section)
file_name = f"{slug}.md"
file_path = os.path.join(self.report_dir, file_name)
with open(file_path, "w") as f:
with open(file_path, "w", encoding='utf-8') as f:
f.write(f"## {section}\n")
f.write(f"timestamp: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
for item in data:
@@ -272,11 +272,11 @@ class Report:
final_metrics = {} # the most important final metrics we'll add as table at the end
start_time = None
end_time = None
with open(report_file, "w") as out_file:
with open(report_file, "w", encoding='utf-8') as out_file:
# write the header first
header_file = os.path.join(report_dir, "header.md")
if os.path.exists(header_file):
with open(header_file, "r") as f:
with open(header_file, "r", encoding='utf-8') as f:
header_content = f.read()
out_file.write(header_content)
start_time = extract_timestamp(header_content, "Run started:")
@@ -293,7 +293,7 @@ class Report:
if not os.path.exists(section_file):
print(f"Warning: {section_file} does not exist, skipping")
continue
with open(section_file, "r") as in_file:
with open(section_file, "r", encoding='utf-8') as in_file:
section = in_file.read()
# Extract timestamp from this section (the last section's timestamp will "stick" as end_time)
if "rl" not in file_name:
@@ -373,7 +373,7 @@ class Report:
header_file = os.path.join(self.report_dir, "header.md")
header = generate_header()
start_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with open(header_file, "w") as f:
with open(header_file, "w", encoding='utf-8') as f:
f.write(header)
f.write(f"Run started: {start_time}\n\n---\n\n")
print(f"Reset report and wrote header to {header_file}")