import argparse import os import time import logging import torch import torch.nn as nn import torch.nn.parallel import torch.backends.cudnn as cudnn import torch.optim import torch.utils.data import models from torch.autograd import Variable from data import get_dataset from preprocess import get_transform from utils import * from datetime import datetime from ast import literal_eval from torchvision.utils import save_image from models.binarized_modules import HingeLoss model_names = sorted(name for name in models.__dict__ if name.islower() and not name.startswith("__") and callable(models.__dict__[name])) parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='/media/hdd/ihubara/BinaryNet.pytorch/results', help='results dir') parser.add_argument('--save', metavar='SAVE', default='', help='saved folder') parser.add_argument('--dataset', metavar='DATASET', default='imagenet', help='dataset name or folder') parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', choices=model_names, help='model architecture: ' + ' | '.join(model_names) + ' (default: alexnet)') parser.add_argument('--input_size', type=int, default=None, help='image input size') parser.add_argument('--model_config', default='', help='additional architecture configuration') parser.add_argument('--type', default='torch.cuda.FloatTensor', help='type of tensor - e.g torch.cuda.HalfTensor') parser.add_argument('--gpus', default='0', help='gpus used for training - e.g 0,1,3') parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', help='number of data loading workers (default: 8)') parser.add_argument('--epochs', default=900, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)') parser.add_argument('-b', '--batch-size', default=256, type=int, metavar='N', help='mini-batch size (default: 256)') parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', help='optimizer function used') parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, metavar='LR', help='initial learning rate') parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)') parser.add_argument('--print-freq', '-p', default=10, type=int, metavar='N', help='print frequency (default: 10)') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', help='evaluate model FILE on validation set') torch.cuda.random.manual_seed_all(10) output_dim = 0 def main(): global args, best_prec1, output_dim best_prec1 = 0 args = parser.parse_args() output_dim = {'cifar10': 10, 'cifar100':100, 'imagenet': 1000}[args.dataset] #import pdb; pdb.set_trace() #torch.save(args.batch_size/(len(args.gpus)/2+1),'multi_gpu_batch_size') if args.evaluate: args.results_dir = '/tmp' if args.save is '': args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) setup_logging(os.path.join(save_path, 'log.txt')) results_file = os.path.join(save_path, 'results.%s') results = ResultsLog(results_file % 'csv', results_file % 'html') logging.info("saving to %s", save_path) logging.debug("run arguments: %s", args) if 'cuda' in args.type: args.gpus = [int(i) for i in args.gpus.split(',')] torch.cuda.set_device(args.gpus[0]) cudnn.benchmark = True else: args.gpus = None # create model logging.info("creating model %s", args.model) model = models.__dict__[args.model] model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': output_dim} if args.model_config is not '': model_config = dict(model_config, **literal_eval(args.model_config)) model = model(**model_config) logging.info("created model with configuration: %s", model_config) # optionally resume from a checkpoint if args.evaluate: if not os.path.isfile(args.evaluate): parser.error('invalid checkpoint: {}'.format(args.evaluate)) checkpoint = torch.load(args.evaluate) model.load_state_dict(checkpoint['state_dict']) logging.info("loaded checkpoint '%s' (epoch %s)", args.evaluate, checkpoint['epoch']) elif args.resume: checkpoint_file = args.resume if os.path.isdir(checkpoint_file): results.load(os.path.join(checkpoint_file, 'results.csv')) checkpoint_file = os.path.join( checkpoint_file, 'model_best.pth.tar') if os.path.isfile(checkpoint_file): logging.info("loading checkpoint '%s'", args.resume) checkpoint = torch.load(checkpoint_file) args.start_epoch = checkpoint['epoch'] - 1 best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) logging.info("loaded checkpoint '%s' (epoch %s)", checkpoint_file, checkpoint['epoch']) else: logging.error("no checkpoint found at '%s'", args.resume) num_parameters = sum([l.nelement() for l in model.parameters()]) logging.info("number of parameters: %d", num_parameters) # Data loading code default_transform = { 'train': get_transform(args.dataset, input_size=args.input_size, augment=True), 'eval': get_transform(args.dataset, input_size=args.input_size, augment=False) } transform = getattr(model, 'input_transform', default_transform) regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, 'lr': args.lr, 'momentum': args.momentum, 'weight_decay': args.weight_decay}}) # define loss function (criterion) and optimizer #criterion = getattr(model, 'criterion', nn.NLLLoss)() criterion = getattr(model, 'criterion', HingeLoss)() #criterion.type(args.type) model.type(args.type) val_data = get_dataset(args.dataset, 'val', transform['eval']) val_loader = torch.utils.data.DataLoader( val_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion, 0) return train_data = get_dataset(args.dataset, 'train', transform['train']) train_loader = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) logging.info('training regime: %s', regime) #import pdb; pdb.set_trace() #search_binarized_modules(model) for epoch in range(args.start_epoch, args.epochs): optimizer = adjust_optimizer(optimizer, epoch, regime) # train for one epoch train_loss, train_prec1, train_prec5 = train( train_loader, model, criterion, epoch, optimizer) # evaluate on validation set val_loss, val_prec1, val_prec5 = validate( val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = val_prec1 > best_prec1 best_prec1 = max(val_prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'model': args.model, 'config': args.model_config, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'regime': regime }, is_best, path=save_path) logging.info('\n Epoch: {0}\t' 'Training Loss {train_loss:.4f} \t' 'Training Prec@1 {train_prec1:.3f} \t' 'Training Prec@5 {train_prec5:.3f} \t' 'Validation Loss {val_loss:.4f} \t' 'Validation Prec@1 {val_prec1:.3f} \t' 'Validation Prec@5 {val_prec5:.3f} \n' .format(epoch + 1, train_loss=train_loss, val_loss=val_loss, train_prec1=train_prec1, val_prec1=val_prec1, train_prec5=train_prec5, val_prec5=val_prec5)) results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, train_error1=100 - train_prec1, val_error1=100 - val_prec1, train_error5=100 - train_prec5, val_error5=100 - val_prec5) results.plot(x='epoch', y=['train_loss', 'val_loss'], title='Loss', ylabel='loss') results.plot(x='epoch', y=['train_error1', 'val_error1'], title='Error@1', ylabel='error %') results.plot(x='epoch', y=['train_error5', 'val_error5'], title='Error@5', ylabel='error %') results.save() def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): if args.gpus and len(args.gpus) > 1: model = torch.nn.DataParallel(model, args.gpus) batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for i, (inputs, target) in enumerate(data_loader): # measure data loading time data_time.update(time.time() - end) if args.gpus is not None: target = target.cuda() #import pdb; pdb.set_trace() if criterion.__class__.__name__=='HingeLoss': target=target.unsqueeze(1) target_onehot = torch.cuda.FloatTensor(target.size(0), output_dim) target_onehot.fill_(-1) target_onehot.scatter_(1, target, 1) target=target.squeeze() if not training: with torch.no_grad(): input_var = Variable(inputs.type(args.type)) target_var = Variable(target_onehot) # compute output output = model(input_var) else: input_var = Variable(inputs.type(args.type)) target_var = Variable(target_onehot) # compute output output = model(input_var) #import pdb; pdb.set_trace() loss = criterion(output, target_onehot) #import pdb; pdb.set_trace() if type(output) is list: output = output[0] # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) #import pdb; pdb.set_trace() #if not training and top1.avg<15: # import pdb; pdb.set_trace() if training: # compute gradient and do SGD step optimizer.zero_grad() #add backwoed hook loss.backward() for p in list(model.parameters()): #import pdb; pdb.set_trace() if hasattr(p,'org'): #print('before:', p[0][0]) #gm=max(p.grad.data.max(),-p.grad.data.min()) #p.grad=p.grad.div(gm+1) p.data.copy_(p.org) #print('after:', p[0][0]) optimizer.step() for p in list(model.parameters()): #import pdb; pdb.set_trace() if hasattr(p,'org'): #print('before:', p[0][0]) p.org.copy_(p.data.clamp_(-1,1)) #if epoch>30: # import pdb; pdb.set_trace() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(data_loader), phase='TRAINING' if training else 'EVALUATING', batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return losses.avg, top1.avg, top5.avg def train(data_loader, model, criterion, epoch, optimizer): # switch to train mode model.train() return forward(data_loader, model, criterion, epoch, training=True, optimizer=optimizer) def validate(data_loader, model, criterion, epoch): # switch to evaluate mode model.eval() return forward(data_loader, model, criterion, epoch, training=False, optimizer=None) if __name__ == '__main__': main()