diff --git a/research/roi-train.py b/research/roi-train.py index bb277ab..c232e40 100755 --- a/research/roi-train.py +++ b/research/roi-train.py @@ -9,28 +9,23 @@ import os from datetime import datetime from collections import defaultdict import random +import argparse -class CustomDataset(Dataset): - def __init__(self, img_dir, transform=None, limit=500): +class SideBySideDataset(Dataset): + def __init__(self, img_dir, labels_file, transform=None): self.img_dir = img_dir self.transform = transform - self.img_labels = self._load_labels(limit) + self.img_labels = self._load_labels(labels_file) - def _load_labels(self, limit): - cats = defaultdict(list) - with open(os.path.join(self.img_dir, 'labels.txt'), 'r') as f: + def _load_labels(self, labels_file): + ret = [] + with open(labels_file, 'r') as f: lines = f.readlines() for line in lines: if not line.strip(): continue img_name, label = line.strip().split() - cats[label].append([img_name, label]) - min_samples = min(len(v) for v in cats.values()) - min_samples = min(limit, min_samples) - ret = [] - for k, v in cats.items(): - ret.extend(random.sample(v, min_samples)) - #ret.extend(v) + ret.append([img_name, label]) return ret def __len__(self): @@ -45,98 +40,112 @@ class CustomDataset(Dataset): label = int(label) return image, label -# 数据预处理 -transform_train = transforms.Compose([ - #transforms.RandomResizedCrop((128, 64)), # 随机裁剪 - #transforms.RandomHorizontalFlip(), # 随机水平翻转 - #transforms.Resize((256, 128)), # 调整大小 - transforms.ToTensor(), # 转换为Tensor - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 归一化 -]) +def parse_args(): + parser = argparse.ArgumentParser(description='Train a model') + parser.add_argument('--labels-file', required=True, type=str, help='Path to the labels file') + return parser.parse_args() -transform_val = transforms.Compose([ - #transforms.Resize((256, 128)), # 调整大小 - transforms.ToTensor(), # 转换为Tensor - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 归一化 -]) +def main(): + args = parse_args() + # 数据预处理 + transform_train = transforms.Compose([ + #transforms.RandomResizedCrop((128, 64)), # 随机裁剪 + #transforms.RandomHorizontalFlip(), # 随机水平翻转 + #transforms.Resize((256, 128)), # 调整大小 + transforms.ToTensor(), # 转换为Tensor + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 归一化 + ]) -# 加载数据集 -img_dir = os.path.abspath("data/roi/train") + transform_val = transforms.Compose([ + #transforms.Resize((256, 128)), # 调整大小 + transforms.ToTensor(), # 转换为Tensor + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 归一化 + ]) -full_dataset = CustomDataset(img_dir=img_dir, transform=transform_train, limit=5000) -train_ratio = 0.5 -val_ratio = 0.5 + # 加载数据集 + img_dir = os.path.abspath("data/roi/train") -train_size = int(train_ratio * len(full_dataset)) -val_size = len(full_dataset) - train_size + full_dataset = SideBySideDataset( + img_dir=img_dir, + labels_file=args.labels_file, + transform=transform_train, + ) + train_ratio = 0.5 + val_ratio = 0.5 -train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size]) + train_size = int(train_ratio * len(full_dataset)) + val_size = len(full_dataset) - train_size -train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) -val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True) + train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size]) -# 加载预训练的ResNet18模型 -model = models.resnet18(pretrained=True) + train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) + val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True) -# 修改最后一层全连接层,使其输出为2(二分类) -num_ftrs = model.fc.in_features -model.fc = nn.Linear(num_ftrs, 2) + # 加载预训练的ResNet18模型 + model = models.resnet18(pretrained=True) -# 将模型移动到GPU(如果可用) -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -model = model.to(device) + # 修改最后一层全连接层,使其输出为2(二分类) + num_ftrs = model.fc.in_features + model.fc = nn.Linear(num_ftrs, 2) -criterion = nn.CrossEntropyLoss() -optimizer = optim.Adam(model.parameters(), lr=0.0001) -#optimizer = torch.optim.SGD( model.parameters(), lr=0.0001, momentum=0.09, weight_decay=1e-4) + # 将模型移动到GPU(如果可用) + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + model = model.to(device) -scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) + criterion = nn.CrossEntropyLoss() + optimizer = optim.Adam(model.parameters(), lr=0.0001) + #optimizer = torch.optim.SGD( model.parameters(), lr=0.0001, momentum=0.09, weight_decay=1e-4) -num_epochs = 15 + scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) -last_accu = 0 -prev_accu = [] + num_epochs = 15 -for epoch in range(num_epochs): - # 训练阶段 - print(f"Start training epoch {epoch+1}/{num_epochs}") - model.train() - running_loss = 0.0 - for images, labels in train_loader: - images, labels = images.to(device), labels.to(device) - optimizer.zero_grad() - outputs = model(images) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - running_loss += loss.item() + last_accu = 0 + prev_accu = [] - print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}') - - scheduler.step() - - # 验证阶段 - model.eval() - correct = 0 - total = 0 - with torch.no_grad(): - for images, labels in val_loader: + for epoch in range(num_epochs): + # 训练阶段 + print(f"Start training epoch {epoch+1}/{num_epochs}") + model.train() + running_loss = 0.0 + for images, labels in train_loader: images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() outputs = model(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum().item() + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + running_loss += loss.item() - last_accu = 100 * correct / total - prev_accu.append(last_accu) - if epoch > 5: - avg = sum(prev_accu[-5:]) / 5 - variance = sum((x - avg) ** 2 for x in prev_accu[-5:]) / 5 - print(f"variance={variance:.4f}") - if variance < 1 and not (prev_accu[-1] > prev_accu[-2] and prev_accu[-2] > prev_accu[-3] and prev_accu[-3] > prev_accu[-4]): - print(f"Early stopping condition met: {variance:.4f}") - break - print(f'Validation Accuracy: {last_accu:.2f}%') + print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}') -dt = datetime.now().strftime("%Y%m%d_%H%M%S") -torch.save(model.state_dict(), f'data/roi/models/resnet18_{dt}_{last_accu:.2f}.pth') + scheduler.step() + + # 验证阶段 + model.eval() + correct = 0 + total = 0 + with torch.no_grad(): + for images, labels in val_loader: + images, labels = images.to(device), labels.to(device) + outputs = model(images) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum().item() + + last_accu = 100 * correct / total + prev_accu.append(last_accu) + if epoch > 5: + avg = sum(prev_accu[-5:]) / 5 + variance = sum((x - avg) ** 2 for x in prev_accu[-5:]) / 5 + print(f"variance={variance:.4f}") + if variance < 1 and not (prev_accu[-1] > prev_accu[-2] and prev_accu[-2] > prev_accu[-3] and prev_accu[-3] > prev_accu[-4]): + print(f"Early stopping condition met: {variance:.4f}") + break + print(f'Validation Accuracy: {last_accu:.2f}%') + + dt = datetime.now().strftime("%Y%m%d_%H%M%S") + torch.save(model.state_dict(), f'data/roi/models/resnet18_{dt}_{last_accu:.2f}.pth') + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/research/roi-train.sh b/research/roi-train.sh new file mode 100755 index 0000000..73dd373 --- /dev/null +++ b/research/roi-train.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +all_label_file="data/roi/train/labels.txt" +npos=$(grep '1$' $all_label_file | wc -l) +nneg=$(grep '0$' $all_label_file | wc -l) + +echo "npos: $npos" +echo "nneg: $nneg" + +min=$npos +if [ $nneg -lt $min ]; then + min=$nneg +fi + +echo "min: $min" +cat $all_label_file | grep '1$' | shuf | head -n $min > data/roi/train/pos.txt +cat $all_label_file | grep '0$' | shuf | head -n $min > data/roi/train/neg.txt + +cat data/roi/train/pos.txt data/roi/train/neg.txt > data/roi/train/mixed.txt + +# these are the 'original' prints with different bg pattern offset +grep -e "7792[1-7].jpg" $all_label_file >> data/roi/train/mixed.txt + +./roi-train.py --labels-file data/roi/train/mixed.txt diff --git a/research/roi-verify.py b/research/roi-verify.py index 19384f6..59e7ac3 100755 --- a/research/roi-verify.py +++ b/research/roi-verify.py @@ -8,23 +8,24 @@ from roi_lib import * def parse_args(): parser = argparse.ArgumentParser(description='ROI prediction') parser.add_argument('--model', type=str, required=True, help='model path') - parser.add_argument('--image', type=str, required=True, help='image file') + parser.add_argument('image', nargs='+', type=str, help='image file') return parser.parse_args() # 主函数 def main(): args = parse_args() model = load_model(args.model) - image_path = args.image - image_tensor = preprocess_image(image_path) - predicted_class, probabilities = predict(model, image_tensor) - print(f'{image_path} predicted={predicted_class} prob={probabilities}') - if predicted_class == 1: - print("verify ok") - return 0 - else: - print("verify ng") - return 1 + ret = 0 + for image_path in args.image: + image_tensor = preprocess_image(image_path) + predicted_class, probabilities = predict(model, image_tensor) + print(f'{image_path} predicted={predicted_class} prob={probabilities}') + if predicted_class == 1: + print("verify ok") + else: + print("verify ng") + ret = 1 + return ret if __name__ == '__main__': sys.exit(main()) \ No newline at end of file