| @ -0,0 +1,2 @@ | |||
| models.ckpt | |||
| training_state.bin | |||
| @ -0,0 +1,8 @@ | |||
| # BNN.pytorch | |||
| Binarized Neural Network (BNN) for pytorch | |||
| This is the pytorch version for the BNN code, fro VGG and resnet models | |||
| Link to the paper: https://papers.nips.cc/paper/6573-binarized-neural-networks | |||
| The code is based on https://github.com/eladhoffer/convNet.pytorch | |||
| Please install torch and torchvision by following the instructions at: http://pytorch.org/ | |||
| To run resnet18 for cifar10 dataset use: python main_binary.py --model resnet_binary --save resnet18_binary --dataset cifar10 | |||
| @ -0,0 +1,37 @@ | |||
| import os | |||
| import torchvision.datasets as datasets | |||
| import torchvision.transforms as transforms | |||
| _DATASETS_MAIN_PATH = '/home/Datasets' | |||
| _dataset_path = { | |||
| 'cifar10': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR10'), | |||
| 'cifar100': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR100'), | |||
| 'stl10': os.path.join(_DATASETS_MAIN_PATH, 'STL10'), | |||
| 'mnist': os.path.join(_DATASETS_MAIN_PATH, 'MNIST'), | |||
| 'imagenet': { | |||
| 'train': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/train'), | |||
| 'val': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/val') | |||
| } | |||
| } | |||
| def get_dataset(name, split='train', transform=None, | |||
| target_transform=None, download=True): | |||
| train = (split == 'train') | |||
| if name == 'cifar10': | |||
| return datasets.CIFAR10(root=_dataset_path['cifar10'], | |||
| train=train, | |||
| transform=transform, | |||
| target_transform=target_transform, | |||
| download=download) | |||
| elif name == 'cifar100': | |||
| return datasets.CIFAR100(root=_dataset_path['cifar100'], | |||
| train=train, | |||
| transform=transform, | |||
| target_transform=target_transform, | |||
| download=download) | |||
| elif name == 'imagenet': | |||
| path = _dataset_path[name][split] | |||
| return datasets.ImageFolder(root=path, | |||
| transform=transform, | |||
| target_transform=target_transform) | |||
| @ -0,0 +1,309 @@ | |||
| import argparse | |||
| import os | |||
| import time | |||
| import logging | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.parallel | |||
| import torch.backends.cudnn as cudnn | |||
| import torch.optim | |||
| import torch.utils.data | |||
| import models | |||
| from torch.autograd import Variable | |||
| from data import get_dataset | |||
| from preprocess import get_transform | |||
| from utils import * | |||
| from datetime import datetime | |||
| from ast import literal_eval | |||
| from torchvision.utils import save_image | |||
| model_names = sorted(name for name in models.__dict__ | |||
| if name.islower() and not name.startswith("__") | |||
| and callable(models.__dict__[name])) | |||
| <<<<<<< HEAD | |||
| print(model_names) | |||
| ======= | |||
| >>>>>>> 0d30f7b8e44285531022cdc05b2c11c22db27e3a | |||
| parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||
| parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='./results', | |||
| help='results dir') | |||
| parser.add_argument('--save', metavar='SAVE', default='', | |||
| help='saved folder') | |||
| parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||
| help='dataset name or folder') | |||
| parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||
| choices=model_names, | |||
| help='model architecture: ' + | |||
| ' | '.join(model_names) + | |||
| ' (default: alexnet)') | |||
| parser.add_argument('--input_size', type=int, default=None, | |||
| help='image input size') | |||
| parser.add_argument('--model_config', default='', | |||
| help='additional architecture configuration') | |||
| parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||
| help='type of tensor - e.g torch.cuda.HalfTensor') | |||
| parser.add_argument('--gpus', default='0', | |||
| help='gpus used for training - e.g 0,1,3') | |||
| parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||
| help='number of data loading workers (default: 8)') | |||
| parser.add_argument('--epochs', default=2500, type=int, metavar='N', | |||
| help='number of total epochs to run') | |||
| parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||
| help='manual epoch number (useful on restarts)') | |||
| parser.add_argument('-b', '--batch-size', default=256, type=int, | |||
| metavar='N', help='mini-batch size (default: 256)') | |||
| parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||
| help='optimizer function used') | |||
| parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||
| metavar='LR', help='initial learning rate') | |||
| parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||
| help='momentum') | |||
| parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||
| metavar='W', help='weight decay (default: 1e-4)') | |||
| parser.add_argument('--print-freq', '-p', default=10, type=int, | |||
| metavar='N', help='print frequency (default: 10)') | |||
| parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||
| help='path to latest checkpoint (default: none)') | |||
| parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||
| help='evaluate model FILE on validation set') | |||
| def main(): | |||
| global args, best_prec1 | |||
| best_prec1 = 0 | |||
| args = parser.parse_args() | |||
| if args.evaluate: | |||
| args.results_dir = '/tmp' | |||
| if args.save is '': | |||
| args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||
| save_path = os.path.join(args.results_dir, args.save) | |||
| if not os.path.exists(save_path): | |||
| os.makedirs(save_path) | |||
| setup_logging(os.path.join(save_path, 'log.txt')) | |||
| results_file = os.path.join(save_path, 'results.%s') | |||
| results = ResultsLog(results_file % 'csv', results_file % 'html') | |||
| logging.info("saving to %s", save_path) | |||
| logging.debug("run arguments: %s", args) | |||
| if 'cuda' in args.type: | |||
| args.gpus = [int(i) for i in args.gpus.split(',')] | |||
| torch.cuda.set_device(args.gpus[0]) | |||
| cudnn.benchmark = True | |||
| else: | |||
| args.gpus = None | |||
| # create model | |||
| logging.info("creating model %s", args.model) | |||
| model = models.__dict__[args.model] | |||
| model_config = {'input_size': args.input_size, 'dataset': args.dataset} | |||
| if args.model_config is not '': | |||
| model_config = dict(model_config, **literal_eval(args.model_config)) | |||
| model = model(**model_config) | |||
| logging.info("created model with configuration: %s", model_config) | |||
| # optionally resume from a checkpoint | |||
| if args.evaluate: | |||
| if not os.path.isfile(args.evaluate): | |||
| parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||
| checkpoint = torch.load(args.evaluate) | |||
| model.load_state_dict(checkpoint['state_dict']) | |||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||
| args.evaluate, checkpoint['epoch']) | |||
| elif args.resume: | |||
| checkpoint_file = args.resume | |||
| if os.path.isdir(checkpoint_file): | |||
| results.load(os.path.join(checkpoint_file, 'results.csv')) | |||
| checkpoint_file = os.path.join( | |||
| checkpoint_file, 'model_best.pth.tar') | |||
| if os.path.isfile(checkpoint_file): | |||
| logging.info("loading checkpoint '%s'", args.resume) | |||
| checkpoint = torch.load(checkpoint_file) | |||
| args.start_epoch = checkpoint['epoch'] - 1 | |||
| best_prec1 = checkpoint['best_prec1'] | |||
| model.load_state_dict(checkpoint['state_dict']) | |||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||
| checkpoint_file, checkpoint['epoch']) | |||
| else: | |||
| logging.error("no checkpoint found at '%s'", args.resume) | |||
| num_parameters = sum([l.nelement() for l in model.parameters()]) | |||
| logging.info("number of parameters: %d", num_parameters) | |||
| # Data loading code | |||
| default_transform = { | |||
| 'train': get_transform(args.dataset, | |||
| input_size=args.input_size, augment=True), | |||
| 'eval': get_transform(args.dataset, | |||
| input_size=args.input_size, augment=False) | |||
| } | |||
| transform = getattr(model, 'input_transform', default_transform) | |||
| regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||
| 'lr': args.lr, | |||
| 'momentum': args.momentum, | |||
| 'weight_decay': args.weight_decay}}) | |||
| # define loss function (criterion) and optimizer | |||
| criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)() | |||
| criterion.type(args.type) | |||
| model.type(args.type) | |||
| val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||
| val_loader = torch.utils.data.DataLoader( | |||
| val_data, | |||
| batch_size=args.batch_size, shuffle=False, | |||
| num_workers=args.workers, pin_memory=True) | |||
| if args.evaluate: | |||
| validate(val_loader, model, criterion, 0) | |||
| return | |||
| train_data = get_dataset(args.dataset, 'train', transform['train']) | |||
| train_loader = torch.utils.data.DataLoader( | |||
| train_data, | |||
| batch_size=args.batch_size, shuffle=True, | |||
| num_workers=args.workers, pin_memory=True) | |||
| optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||
| logging.info('training regime: %s', regime) | |||
| for epoch in range(args.start_epoch, args.epochs): | |||
| optimizer = adjust_optimizer(optimizer, epoch, regime) | |||
| # train for one epoch | |||
| train_loss, train_prec1, train_prec5 = train( | |||
| train_loader, model, criterion, epoch, optimizer) | |||
| # evaluate on validation set | |||
| val_loss, val_prec1, val_prec5 = validate( | |||
| val_loader, model, criterion, epoch) | |||
| # remember best prec@1 and save checkpoint | |||
| is_best = val_prec1 > best_prec1 | |||
| best_prec1 = max(val_prec1, best_prec1) | |||
| save_checkpoint({ | |||
| 'epoch': epoch + 1, | |||
| 'model': args.model, | |||
| 'config': args.model_config, | |||
| 'state_dict': model.state_dict(), | |||
| 'best_prec1': best_prec1, | |||
| 'regime': regime | |||
| }, is_best, path=save_path) | |||
| logging.info('\n Epoch: {0}\t' | |||
| 'Training Loss {train_loss:.4f} \t' | |||
| 'Training Prec@1 {train_prec1:.3f} \t' | |||
| 'Training Prec@5 {train_prec5:.3f} \t' | |||
| 'Validation Loss {val_loss:.4f} \t' | |||
| 'Validation Prec@1 {val_prec1:.3f} \t' | |||
| 'Validation Prec@5 {val_prec5:.3f} \n' | |||
| .format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
| train_prec1=train_prec1, val_prec1=val_prec1, | |||
| train_prec5=train_prec5, val_prec5=val_prec5)) | |||
| results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
| train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||
| train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||
| #results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||
| # title='Loss', ylabel='loss') | |||
| #results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||
| # title='Error@1', ylabel='error %') | |||
| #results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||
| # title='Error@5', ylabel='error %') | |||
| results.save() | |||
| def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||
| if args.gpus and len(args.gpus) > 1: | |||
| model = torch.nn.DataParallel(model, args.gpus) | |||
| batch_time = AverageMeter() | |||
| data_time = AverageMeter() | |||
| losses = AverageMeter() | |||
| top1 = AverageMeter() | |||
| top5 = AverageMeter() | |||
| end = time.time() | |||
| for i, (inputs, target) in enumerate(data_loader): | |||
| # measure data loading time | |||
| data_time.update(time.time() - end) | |||
| if args.gpus is not None: | |||
| target = target.cuda() | |||
| if not training: | |||
| with torch.no_grad(): | |||
| input_var = Variable(inputs.type(args.type), volatile=not training) | |||
| target_var = Variable(target) | |||
| # compute output | |||
| output = model(input_var) | |||
| else: | |||
| input_var = Variable(inputs.type(args.type), volatile=not training) | |||
| target_var = Variable(target) | |||
| # compute output | |||
| output = model(input_var) | |||
| loss = criterion(output, target_var) | |||
| if type(output) is list: | |||
| output = output[0] | |||
| # measure accuracy and record loss | |||
| prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||
| losses.update(loss.item(), inputs.size(0)) | |||
| top1.update(prec1.item(), inputs.size(0)) | |||
| top5.update(prec5.item(), inputs.size(0)) | |||
| if training: | |||
| # compute gradient and do SGD step | |||
| optimizer.zero_grad() | |||
| loss.backward() | |||
| for p in list(model.parameters()): | |||
| if hasattr(p,'org'): | |||
| p.data.copy_(p.org) | |||
| optimizer.step() | |||
| for p in list(model.parameters()): | |||
| if hasattr(p,'org'): | |||
| p.org.copy_(p.data.clamp_(-1,1)) | |||
| # measure elapsed time | |||
| batch_time.update(time.time() - end) | |||
| end = time.time() | |||
| if i % args.print_freq == 0: | |||
| logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||
| 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||
| 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||
| 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||
| 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||
| 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||
| epoch, i, len(data_loader), | |||
| phase='TRAINING' if training else 'EVALUATING', | |||
| batch_time=batch_time, | |||
| data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||
| return losses.avg, top1.avg, top5.avg | |||
| def train(data_loader, model, criterion, epoch, optimizer): | |||
| # switch to train mode | |||
| model.train() | |||
| return forward(data_loader, model, criterion, epoch, | |||
| training=True, optimizer=optimizer) | |||
| def validate(data_loader, model, criterion, epoch): | |||
| # switch to evaluate mode | |||
| model.eval() | |||
| return forward(data_loader, model, criterion, epoch, | |||
| training=False, optimizer=None) | |||
| if __name__ == '__main__': | |||
| main() | |||
| @ -0,0 +1,332 @@ | |||
| import argparse | |||
| import os | |||
| import time | |||
| import logging | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.parallel | |||
| import torch.backends.cudnn as cudnn | |||
| import torch.optim | |||
| import torch.utils.data | |||
| import models | |||
| from torch.autograd import Variable | |||
| from data import get_dataset | |||
| from preprocess import get_transform | |||
| from utils import * | |||
| from datetime import datetime | |||
| from ast import literal_eval | |||
| from torchvision.utils import save_image | |||
| from models.binarized_modules import HingeLoss | |||
| model_names = sorted(name for name in models.__dict__ | |||
| if name.islower() and not name.startswith("__") | |||
| and callable(models.__dict__[name])) | |||
| parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||
| parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='/media/hdd/ihubara/BinaryNet.pytorch/results', | |||
| help='results dir') | |||
| parser.add_argument('--save', metavar='SAVE', default='', | |||
| help='saved folder') | |||
| parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||
| help='dataset name or folder') | |||
| parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||
| choices=model_names, | |||
| help='model architecture: ' + | |||
| ' | '.join(model_names) + | |||
| ' (default: alexnet)') | |||
| parser.add_argument('--input_size', type=int, default=None, | |||
| help='image input size') | |||
| parser.add_argument('--model_config', default='', | |||
| help='additional architecture configuration') | |||
| parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||
| help='type of tensor - e.g torch.cuda.HalfTensor') | |||
| parser.add_argument('--gpus', default='0', | |||
| help='gpus used for training - e.g 0,1,3') | |||
| parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||
| help='number of data loading workers (default: 8)') | |||
| parser.add_argument('--epochs', default=900, type=int, metavar='N', | |||
| help='number of total epochs to run') | |||
| parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||
| help='manual epoch number (useful on restarts)') | |||
| parser.add_argument('-b', '--batch-size', default=256, type=int, | |||
| metavar='N', help='mini-batch size (default: 256)') | |||
| parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||
| help='optimizer function used') | |||
| parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||
| metavar='LR', help='initial learning rate') | |||
| parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||
| help='momentum') | |||
| parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||
| metavar='W', help='weight decay (default: 1e-4)') | |||
| parser.add_argument('--print-freq', '-p', default=10, type=int, | |||
| metavar='N', help='print frequency (default: 10)') | |||
| parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||
| help='path to latest checkpoint (default: none)') | |||
| parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||
| help='evaluate model FILE on validation set') | |||
| torch.cuda.random.manual_seed_all(10) | |||
| output_dim = 0 | |||
| def main(): | |||
| global args, best_prec1, output_dim | |||
| best_prec1 = 0 | |||
| args = parser.parse_args() | |||
| output_dim = {'cifar10': 10, 'cifar100':100, 'imagenet': 1000}[args.dataset] | |||
| #import pdb; pdb.set_trace() | |||
| #torch.save(args.batch_size/(len(args.gpus)/2+1),'multi_gpu_batch_size') | |||
| if args.evaluate: | |||
| args.results_dir = '/tmp' | |||
| if args.save is '': | |||
| args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||
| save_path = os.path.join(args.results_dir, args.save) | |||
| if not os.path.exists(save_path): | |||
| os.makedirs(save_path) | |||
| setup_logging(os.path.join(save_path, 'log.txt')) | |||
| results_file = os.path.join(save_path, 'results.%s') | |||
| results = ResultsLog(results_file % 'csv', results_file % 'html') | |||
| logging.info("saving to %s", save_path) | |||
| logging.debug("run arguments: %s", args) | |||
| if 'cuda' in args.type: | |||
| args.gpus = [int(i) for i in args.gpus.split(',')] | |||
| torch.cuda.set_device(args.gpus[0]) | |||
| cudnn.benchmark = True | |||
| else: | |||
| args.gpus = None | |||
| # create model | |||
| logging.info("creating model %s", args.model) | |||
| model = models.__dict__[args.model] | |||
| model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': output_dim} | |||
| if args.model_config is not '': | |||
| model_config = dict(model_config, **literal_eval(args.model_config)) | |||
| model = model(**model_config) | |||
| logging.info("created model with configuration: %s", model_config) | |||
| # optionally resume from a checkpoint | |||
| if args.evaluate: | |||
| if not os.path.isfile(args.evaluate): | |||
| parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||
| checkpoint = torch.load(args.evaluate) | |||
| model.load_state_dict(checkpoint['state_dict']) | |||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||
| args.evaluate, checkpoint['epoch']) | |||
| elif args.resume: | |||
| checkpoint_file = args.resume | |||
| if os.path.isdir(checkpoint_file): | |||
| results.load(os.path.join(checkpoint_file, 'results.csv')) | |||
| checkpoint_file = os.path.join( | |||
| checkpoint_file, 'model_best.pth.tar') | |||
| if os.path.isfile(checkpoint_file): | |||
| logging.info("loading checkpoint '%s'", args.resume) | |||
| checkpoint = torch.load(checkpoint_file) | |||
| args.start_epoch = checkpoint['epoch'] - 1 | |||
| best_prec1 = checkpoint['best_prec1'] | |||
| model.load_state_dict(checkpoint['state_dict']) | |||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||
| checkpoint_file, checkpoint['epoch']) | |||
| else: | |||
| logging.error("no checkpoint found at '%s'", args.resume) | |||
| num_parameters = sum([l.nelement() for l in model.parameters()]) | |||
| logging.info("number of parameters: %d", num_parameters) | |||
| # Data loading code | |||
| default_transform = { | |||
| 'train': get_transform(args.dataset, | |||
| input_size=args.input_size, augment=True), | |||
| 'eval': get_transform(args.dataset, | |||
| input_size=args.input_size, augment=False) | |||
| } | |||
| transform = getattr(model, 'input_transform', default_transform) | |||
| regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||
| 'lr': args.lr, | |||
| 'momentum': args.momentum, | |||
| 'weight_decay': args.weight_decay}}) | |||
| # define loss function (criterion) and optimizer | |||
| #criterion = getattr(model, 'criterion', nn.NLLLoss)() | |||
| criterion = getattr(model, 'criterion', HingeLoss)() | |||
| #criterion.type(args.type) | |||
| model.type(args.type) | |||
| val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||
| val_loader = torch.utils.data.DataLoader( | |||
| val_data, | |||
| batch_size=args.batch_size, shuffle=False, | |||
| num_workers=args.workers, pin_memory=True) | |||
| if args.evaluate: | |||
| validate(val_loader, model, criterion, 0) | |||
| return | |||
| train_data = get_dataset(args.dataset, 'train', transform['train']) | |||
| train_loader = torch.utils.data.DataLoader( | |||
| train_data, | |||
| batch_size=args.batch_size, shuffle=True, | |||
| num_workers=args.workers, pin_memory=True) | |||
| optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||
| logging.info('training regime: %s', regime) | |||
| #import pdb; pdb.set_trace() | |||
| #search_binarized_modules(model) | |||
| for epoch in range(args.start_epoch, args.epochs): | |||
| optimizer = adjust_optimizer(optimizer, epoch, regime) | |||
| # train for one epoch | |||
| train_loss, train_prec1, train_prec5 = train( | |||
| train_loader, model, criterion, epoch, optimizer) | |||
| # evaluate on validation set | |||
| val_loss, val_prec1, val_prec5 = validate( | |||
| val_loader, model, criterion, epoch) | |||
| # remember best prec@1 and save checkpoint | |||
| is_best = val_prec1 > best_prec1 | |||
| best_prec1 = max(val_prec1, best_prec1) | |||
| save_checkpoint({ | |||
| 'epoch': epoch + 1, | |||
| 'model': args.model, | |||
| 'config': args.model_config, | |||
| 'state_dict': model.state_dict(), | |||
| 'best_prec1': best_prec1, | |||
| 'regime': regime | |||
| }, is_best, path=save_path) | |||
| logging.info('\n Epoch: {0}\t' | |||
| 'Training Loss {train_loss:.4f} \t' | |||
| 'Training Prec@1 {train_prec1:.3f} \t' | |||
| 'Training Prec@5 {train_prec5:.3f} \t' | |||
| 'Validation Loss {val_loss:.4f} \t' | |||
| 'Validation Prec@1 {val_prec1:.3f} \t' | |||
| 'Validation Prec@5 {val_prec5:.3f} \n' | |||
| .format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
| train_prec1=train_prec1, val_prec1=val_prec1, | |||
| train_prec5=train_prec5, val_prec5=val_prec5)) | |||
| results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
| train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||
| train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||
| results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||
| title='Loss', ylabel='loss') | |||
| results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||
| title='Error@1', ylabel='error %') | |||
| results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||
| title='Error@5', ylabel='error %') | |||
| results.save() | |||
| def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||
| if args.gpus and len(args.gpus) > 1: | |||
| model = torch.nn.DataParallel(model, args.gpus) | |||
| batch_time = AverageMeter() | |||
| data_time = AverageMeter() | |||
| losses = AverageMeter() | |||
| top1 = AverageMeter() | |||
| top5 = AverageMeter() | |||
| end = time.time() | |||
| for i, (inputs, target) in enumerate(data_loader): | |||
| # measure data loading time | |||
| data_time.update(time.time() - end) | |||
| if args.gpus is not None: | |||
| target = target.cuda() | |||
| #import pdb; pdb.set_trace() | |||
| if criterion.__class__.__name__=='HingeLoss': | |||
| target=target.unsqueeze(1) | |||
| target_onehot = torch.cuda.FloatTensor(target.size(0), output_dim) | |||
| target_onehot.fill_(-1) | |||
| target_onehot.scatter_(1, target, 1) | |||
| target=target.squeeze() | |||
| if not training: | |||
| with torch.no_grad(): | |||
| input_var = Variable(inputs.type(args.type)) | |||
| target_var = Variable(target_onehot) | |||
| # compute output | |||
| output = model(input_var) | |||
| else: | |||
| input_var = Variable(inputs.type(args.type)) | |||
| target_var = Variable(target_onehot) | |||
| # compute output | |||
| output = model(input_var) | |||
| #import pdb; pdb.set_trace() | |||
| loss = criterion(output, target_onehot) | |||
| #import pdb; pdb.set_trace() | |||
| if type(output) is list: | |||
| output = output[0] | |||
| # measure accuracy and record loss | |||
| prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||
| losses.update(loss.item(), inputs.size(0)) | |||
| top1.update(prec1.item(), inputs.size(0)) | |||
| top5.update(prec5.item(), inputs.size(0)) | |||
| #import pdb; pdb.set_trace() | |||
| #if not training and top1.avg<15: | |||
| # import pdb; pdb.set_trace() | |||
| if training: | |||
| # compute gradient and do SGD step | |||
| optimizer.zero_grad() | |||
| #add backwoed hook | |||
| loss.backward() | |||
| for p in list(model.parameters()): | |||
| #import pdb; pdb.set_trace() | |||
| if hasattr(p,'org'): | |||
| #print('before:', p[0][0]) | |||
| #gm=max(p.grad.data.max(),-p.grad.data.min()) | |||
| #p.grad=p.grad.div(gm+1) | |||
| p.data.copy_(p.org) | |||
| #print('after:', p[0][0]) | |||
| optimizer.step() | |||
| for p in list(model.parameters()): | |||
| #import pdb; pdb.set_trace() | |||
| if hasattr(p,'org'): | |||
| #print('before:', p[0][0]) | |||
| p.org.copy_(p.data.clamp_(-1,1)) | |||
| #if epoch>30: | |||
| # import pdb; pdb.set_trace() | |||
| # measure elapsed time | |||
| batch_time.update(time.time() - end) | |||
| end = time.time() | |||
| if i % args.print_freq == 0: | |||
| logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||
| 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||
| 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||
| 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||
| 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||
| 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||
| epoch, i, len(data_loader), | |||
| phase='TRAINING' if training else 'EVALUATING', | |||
| batch_time=batch_time, | |||
| data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||
| return losses.avg, top1.avg, top5.avg | |||
| def train(data_loader, model, criterion, epoch, optimizer): | |||
| # switch to train mode | |||
| model.train() | |||
| return forward(data_loader, model, criterion, epoch, | |||
| training=True, optimizer=optimizer) | |||
| def validate(data_loader, model, criterion, epoch): | |||
| # switch to evaluate mode | |||
| model.eval() | |||
| return forward(data_loader, model, criterion, epoch, | |||
| training=False, optimizer=None) | |||
| if __name__ == '__main__': | |||
| main() | |||
| @ -0,0 +1,150 @@ | |||
| from __future__ import print_function | |||
| import argparse | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| import torch.optim as optim | |||
| from torchvision import datasets, transforms | |||
| from torch.autograd import Variable | |||
| from models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||
| from models.binarized_modules import Binarize,HingeLoss | |||
| # Training settings | |||
| parser = argparse.ArgumentParser(description='PyTorch MNIST Example') | |||
| parser.add_argument('--batch-size', type=int, default=64, metavar='N', | |||
| help='input batch size for training (default: 256)') | |||
| parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', | |||
| help='input batch size for testing (default: 1000)') | |||
| parser.add_argument('--epochs', type=int, default=100, metavar='N', | |||
| help='number of epochs to train (default: 10)') | |||
| parser.add_argument('--lr', type=float, default=0.01, metavar='LR', | |||
| help='learning rate (default: 0.001)') | |||
| parser.add_argument('--momentum', type=float, default=0.5, metavar='M', | |||
| help='SGD momentum (default: 0.5)') | |||
| parser.add_argument('--no-cuda', action='store_true', default=False, | |||
| help='disables CUDA training') | |||
| parser.add_argument('--seed', type=int, default=1, metavar='S', | |||
| help='random seed (default: 1)') | |||
| parser.add_argument('--gpus', default=3, | |||
| help='gpus used for training - e.g 0,1,3') | |||
| parser.add_argument('--log-interval', type=int, default=10, metavar='N', | |||
| help='how many batches to wait before logging training status') | |||
| args = parser.parse_args() | |||
| args.cuda = not args.no_cuda and torch.cuda.is_available() | |||
| torch.manual_seed(args.seed) | |||
| if args.cuda: | |||
| torch.cuda.manual_seed(args.seed) | |||
| kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} | |||
| train_loader = torch.utils.data.DataLoader( | |||
| datasets.MNIST('../data', train=True, download=True, | |||
| transform=transforms.Compose([ | |||
| transforms.ToTensor(), | |||
| transforms.Normalize((0.1307,), (0.3081,)) | |||
| ])), | |||
| batch_size=args.batch_size, shuffle=True, **kwargs) | |||
| test_loader = torch.utils.data.DataLoader( | |||
| datasets.MNIST('../data', train=False, transform=transforms.Compose([ | |||
| transforms.ToTensor(), | |||
| transforms.Normalize((0.1307,), (0.3081,)) | |||
| ])), | |||
| batch_size=args.test_batch_size, shuffle=True, **kwargs) | |||
| class Net(nn.Module): | |||
| def __init__(self): | |||
| super(Net, self).__init__() | |||
| self.infl_ratio=3 | |||
| self.fc1 = BinarizeLinear(784, 2048*self.infl_ratio) | |||
| self.htanh1 = nn.Hardtanh() | |||
| self.bn1 = nn.BatchNorm1d(2048*self.infl_ratio) | |||
| self.fc2 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||
| self.htanh2 = nn.Hardtanh() | |||
| self.bn2 = nn.BatchNorm1d(2048*self.infl_ratio) | |||
| self.fc3 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||
| self.htanh3 = nn.Hardtanh() | |||
| self.bn3 = nn.BatchNorm1d(2048*self.infl_ratio) | |||
| self.fc4 = nn.Linear(2048*self.infl_ratio, 10) | |||
| self.logsoftmax=nn.LogSoftmax() | |||
| self.drop=nn.Dropout(0.5) | |||
| def forward(self, x): | |||
| x = x.view(-1, 28*28) | |||
| x = self.fc1(x) | |||
| x = self.bn1(x) | |||
| x = self.htanh1(x) | |||
| x = self.fc2(x) | |||
| x = self.bn2(x) | |||
| x = self.htanh2(x) | |||
| x = self.fc3(x) | |||
| x = self.drop(x) | |||
| x = self.bn3(x) | |||
| x = self.htanh3(x) | |||
| x = self.fc4(x) | |||
| return self.logsoftmax(x) | |||
| model = Net() | |||
| if args.cuda: | |||
| torch.cuda.set_device(3) | |||
| model.cuda() | |||
| criterion = nn.CrossEntropyLoss() | |||
| optimizer = optim.Adam(model.parameters(), lr=args.lr) | |||
| def train(epoch): | |||
| model.train() | |||
| for batch_idx, (data, target) in enumerate(train_loader): | |||
| if args.cuda: | |||
| data, target = data.cuda(), target.cuda() | |||
| data, target = Variable(data), Variable(target) | |||
| optimizer.zero_grad() | |||
| output = model(data) | |||
| loss = criterion(output, target) | |||
| if epoch%40==0: | |||
| optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 | |||
| optimizer.zero_grad() | |||
| loss.backward() | |||
| for p in list(model.parameters()): | |||
| if hasattr(p,'org'): | |||
| p.data.copy_(p.org) | |||
| optimizer.step() | |||
| for p in list(model.parameters()): | |||
| if hasattr(p,'org'): | |||
| p.org.copy_(p.data.clamp_(-1,1)) | |||
| if batch_idx % args.log_interval == 0: | |||
| print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( | |||
| epoch, batch_idx * len(data), len(train_loader.dataset), | |||
| 100. * batch_idx / len(train_loader), loss.item())) | |||
| def test(): | |||
| model.eval() | |||
| test_loss = 0 | |||
| correct = 0 | |||
| with torch.no_grad(): | |||
| for data, target in test_loader: | |||
| if args.cuda: | |||
| data, target = data.cuda(), target.cuda() | |||
| data, target = Variable(data), Variable(target) | |||
| output = model(data) | |||
| test_loss += criterion(output, target).item() # sum up batch loss | |||
| pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability | |||
| correct += pred.eq(target.data.view_as(pred)).cpu().sum() | |||
| test_loss /= len(test_loader.dataset) | |||
| print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( | |||
| test_loss, correct, len(test_loader.dataset), | |||
| 100. * correct / len(test_loader.dataset))) | |||
| for epoch in range(1, args.epochs + 1): | |||
| train(epoch) | |||
| test() | |||
| if epoch%40==0: | |||
| optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 | |||
| @ -0,0 +1,6 @@ | |||
| from .alexnet import * | |||
| from .alexnet_binary import * | |||
| from .resnet import * | |||
| from .resnet_binary import * | |||
| from .vgg_cifar10_binary import * | |||
| @ -0,0 +1,78 @@ | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| __all__ = ['alexnet'] | |||
| class AlexNetOWT_BN(nn.Module): | |||
| def __init__(self, num_classes=1000): | |||
| super(AlexNetOWT_BN, self).__init__() | |||
| self.features = nn.Sequential( | |||
| nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2, | |||
| bias=False), | |||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||
| nn.BatchNorm2d(64), | |||
| nn.ReLU(inplace=True), | |||
| nn.Conv2d(64, 192, kernel_size=5, padding=2, bias=False), | |||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(192), | |||
| nn.Conv2d(192, 384, kernel_size=3, padding=1, bias=False), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(384), | |||
| nn.Conv2d(384, 256, kernel_size=3, padding=1, bias=False), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(256), | |||
| nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(256) | |||
| ) | |||
| self.classifier = nn.Sequential( | |||
| nn.Linear(256 * 6 * 6, 4096, bias=False), | |||
| nn.BatchNorm1d(4096), | |||
| nn.ReLU(inplace=True), | |||
| nn.Dropout(0.5), | |||
| nn.Linear(4096, 4096, bias=False), | |||
| nn.BatchNorm1d(4096), | |||
| nn.ReLU(inplace=True), | |||
| nn.Dropout(0.5), | |||
| nn.Linear(4096, num_classes) | |||
| ) | |||
| self.regime = { | |||
| 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||
| 'weight_decay': 5e-4, 'momentum': 0.9}, | |||
| 10: {'lr': 5e-3}, | |||
| 15: {'lr': 1e-3, 'weight_decay': 0}, | |||
| 20: {'lr': 5e-4}, | |||
| 25: {'lr': 1e-4} | |||
| } | |||
| normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||
| std=[0.229, 0.224, 0.225]) | |||
| self.input_transform = { | |||
| 'train': transforms.Compose([ | |||
| transforms.Scale(256), | |||
| transforms.RandomCrop(224), | |||
| transforms.RandomHorizontalFlip(), | |||
| transforms.ToTensor(), | |||
| normalize | |||
| ]), | |||
| 'eval': transforms.Compose([ | |||
| transforms.Scale(256), | |||
| transforms.CenterCrop(224), | |||
| transforms.ToTensor(), | |||
| normalize | |||
| ]) | |||
| } | |||
| def forward(self, x): | |||
| x = self.features(x) | |||
| x = x.view(-1, 256 * 6 * 6) | |||
| x = self.classifier(x) | |||
| return x | |||
| def alexnet(**kwargs): | |||
| num_classes = kwargs.get( 'num_classes', 1000) | |||
| return AlexNetOWT_BN(num_classes) | |||
| @ -0,0 +1,92 @@ | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||
| __all__ = ['alexnet_binary'] | |||
| class AlexNetOWT_BN(nn.Module): | |||
| def __init__(self, num_classes=1000): | |||
| super(AlexNetOWT_BN, self).__init__() | |||
| self.ratioInfl=3 | |||
| self.features = nn.Sequential( | |||
| BinarizeConv2d(3, int(64*self.ratioInfl), kernel_size=11, stride=4, padding=2), | |||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||
| nn.BatchNorm2d(int(64*self.ratioInfl)), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=5, padding=2), | |||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||
| nn.BatchNorm2d(int(192*self.ratioInfl)), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(int(192*self.ratioInfl), int(384*self.ratioInfl), kernel_size=3, padding=1), | |||
| nn.BatchNorm2d(int(384*self.ratioInfl)), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(int(384*self.ratioInfl), int(256*self.ratioInfl), kernel_size=3, padding=1), | |||
| nn.BatchNorm2d(int(256*self.ratioInfl)), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(int(256*self.ratioInfl), 256, kernel_size=3, padding=1), | |||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||
| nn.BatchNorm2d(256), | |||
| nn.Hardtanh(inplace=True) | |||
| ) | |||
| self.classifier = nn.Sequential( | |||
| BinarizeLinear(256 * 6 * 6, 4096), | |||
| nn.BatchNorm1d(4096), | |||
| nn.Hardtanh(inplace=True), | |||
| #nn.Dropout(0.5), | |||
| BinarizeLinear(4096, 4096), | |||
| nn.BatchNorm1d(4096), | |||
| nn.Hardtanh(inplace=True), | |||
| #nn.Dropout(0.5), | |||
| BinarizeLinear(4096, num_classes), | |||
| nn.BatchNorm1d(1000), | |||
| nn.LogSoftmax() | |||
| ) | |||
| #self.regime = { | |||
| # 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||
| # 'weight_decay': 5e-4, 'momentum': 0.9}, | |||
| # 10: {'lr': 5e-3}, | |||
| # 15: {'lr': 1e-3, 'weight_decay': 0}, | |||
| # 20: {'lr': 5e-4}, | |||
| # 25: {'lr': 1e-4} | |||
| #} | |||
| self.regime = { | |||
| 0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||
| 20: {'lr': 1e-3}, | |||
| 30: {'lr': 5e-4}, | |||
| 35: {'lr': 1e-4}, | |||
| 40: {'lr': 1e-5} | |||
| } | |||
| normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||
| std=[0.229, 0.224, 0.225]) | |||
| self.input_transform = { | |||
| 'train': transforms.Compose([ | |||
| transforms.Scale(256), | |||
| transforms.RandomCrop(224), | |||
| transforms.RandomHorizontalFlip(), | |||
| transforms.ToTensor(), | |||
| normalize | |||
| ]), | |||
| 'eval': transforms.Compose([ | |||
| transforms.Scale(256), | |||
| transforms.CenterCrop(224), | |||
| transforms.ToTensor(), | |||
| normalize | |||
| ]) | |||
| } | |||
| def forward(self, x): | |||
| x = self.features(x) | |||
| x = x.view(-1, 256 * 6 * 6) | |||
| x = self.classifier(x) | |||
| return x | |||
| def alexnet_binary(**kwargs): | |||
| num_classes = kwargs.get( 'num_classes', 1000) | |||
| return AlexNetOWT_BN(num_classes) | |||
| @ -0,0 +1,109 @@ | |||
| import torch | |||
| import pdb | |||
| import torch.nn as nn | |||
| import math | |||
| from torch.autograd import Variable | |||
| from torch.autograd import Function | |||
| import numpy as np | |||
| def Binarize(tensor,quant_mode='det'): | |||
| if quant_mode=='det': | |||
| return tensor.sign() | |||
| else: | |||
| return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1) | |||
| class HingeLoss(nn.Module): | |||
| def __init__(self): | |||
| super(HingeLoss,self).__init__() | |||
| self.margin=1.0 | |||
| def hinge_loss(self,input,target): | |||
| #import pdb; pdb.set_trace() | |||
| output=self.margin-input.mul(target) | |||
| output[output.le(0)]=0 | |||
| return output.mean() | |||
| def forward(self, input, target): | |||
| return self.hinge_loss(input,target) | |||
| class SqrtHingeLossFunction(Function): | |||
| def __init__(self): | |||
| super(SqrtHingeLossFunction,self).__init__() | |||
| self.margin=1.0 | |||
| def forward(self, input, target): | |||
| output=self.margin-input.mul(target) | |||
| output[output.le(0)]=0 | |||
| self.save_for_backward(input, target) | |||
| loss=output.mul(output).sum(0).sum(1).div(target.numel()) | |||
| return loss | |||
| def backward(self,grad_output): | |||
| input, target = self.saved_tensors | |||
| output=self.margin-input.mul(target) | |||
| output[output.le(0)]=0 | |||
| import pdb; pdb.set_trace() | |||
| grad_output.resize_as_(input).copy_(target).mul_(-2).mul_(output) | |||
| grad_output.mul_(output.ne(0).float()) | |||
| grad_output.div_(input.numel()) | |||
| return grad_output,grad_output | |||
| def Quantize(tensor,quant_mode='det', params=None, numBits=8): | |||
| tensor.clamp_(-2**(numBits-1),2**(numBits-1)) | |||
| if quant_mode=='det': | |||
| tensor=tensor.mul(2**(numBits-1)).round().div(2**(numBits-1)) | |||
| else: | |||
| tensor=tensor.mul(2**(numBits-1)).round().add(torch.rand(tensor.size()).add(-0.5)).div(2**(numBits-1)) | |||
| quant_fixed(tensor, params) | |||
| return tensor | |||
| #import torch.nn._functions as tnnf | |||
| class BinarizeLinear(nn.Linear): | |||
| def __init__(self, *kargs, **kwargs): | |||
| super(BinarizeLinear, self).__init__(*kargs, **kwargs) | |||
| def forward(self, input): | |||
| # if input.size(1) != 784: | |||
| # input.data=Binarize(input.data) | |||
| if not hasattr(self.weight,'org'): | |||
| self.weight.org=self.weight.data.clone() | |||
| self.weight.data=Binarize(self.weight.org) | |||
| out = nn.functional.linear(input, self.weight) | |||
| if not self.bias is None: | |||
| self.bias.org=self.bias.data.clone() | |||
| out += self.bias.view(1, -1).expand_as(out) | |||
| return out | |||
| class BinarizeConv2d(nn.Conv2d): | |||
| def __init__(self, *kargs, **kwargs): | |||
| super(BinarizeConv2d, self).__init__(*kargs, **kwargs) | |||
| def forward(self, input): | |||
| # if input.size(1) != 3: | |||
| # input.data = Binarize(input.data) | |||
| if not hasattr(self.weight,'org'): | |||
| self.weight.org=self.weight.data.clone() | |||
| self.weight.data=Binarize(self.weight.org) | |||
| out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||
| self.padding, self.dilation, self.groups) | |||
| if not self.bias is None: | |||
| self.bias.org=self.bias.data.clone() | |||
| out += self.bias.view(1, -1, 1, 1).expand_as(out) | |||
| return out | |||
| # x = torch.tensor([[255.0, 200.0, 201.0], [210.0, 222.0, 223.0]]) | |||
| # print(Quantize(x,quant_mode='det', params=None, numBits=8)) | |||
| @ -0,0 +1,217 @@ | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| import math | |||
| __all__ = ['resnet'] | |||
| def conv3x3(in_planes, out_planes, stride=1): | |||
| "3x3 convolution with padding" | |||
| return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| def init_model(model): | |||
| for m in model.modules(): | |||
| if isinstance(m, nn.Conv2d): | |||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||
| m.weight.data.normal_(0, math.sqrt(2. / n)) | |||
| elif isinstance(m, nn.BatchNorm2d): | |||
| m.weight.data.fill_(1) | |||
| m.bias.data.zero_() | |||
| class BasicBlock(nn.Module): | |||
| expansion = 1 | |||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
| super(BasicBlock, self).__init__() | |||
| self.conv1 = conv3x3(inplanes, planes, stride) | |||
| self.bn1 = nn.BatchNorm2d(planes) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.conv2 = conv3x3(planes, planes) | |||
| self.bn2 = nn.BatchNorm2d(planes) | |||
| self.downsample = downsample | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| residual = x | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.relu(out) | |||
| out = self.conv2(out) | |||
| out = self.bn2(out) | |||
| if self.downsample is not None: | |||
| residual = self.downsample(x) | |||
| out += residual | |||
| out = self.relu(out) | |||
| return out | |||
| class Bottleneck(nn.Module): | |||
| expansion = 4 | |||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
| super(Bottleneck, self).__init__() | |||
| self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) | |||
| self.bn1 = nn.BatchNorm2d(planes) | |||
| self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| self.bn2 = nn.BatchNorm2d(planes) | |||
| self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) | |||
| self.bn3 = nn.BatchNorm2d(planes * 4) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.downsample = downsample | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| residual = x | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.relu(out) | |||
| out = self.conv2(out) | |||
| out = self.bn2(out) | |||
| out = self.relu(out) | |||
| out = self.conv3(out) | |||
| out = self.bn3(out) | |||
| if self.downsample is not None: | |||
| residual = self.downsample(x) | |||
| out += residual | |||
| out = self.relu(out) | |||
| return out | |||
| class ResNet(nn.Module): | |||
| def __init__(self): | |||
| super(ResNet, self).__init__() | |||
| def _make_layer(self, block, planes, blocks, stride=1): | |||
| downsample = None | |||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||
| downsample = nn.Sequential( | |||
| nn.Conv2d(self.inplanes, planes * block.expansion, | |||
| kernel_size=1, stride=stride, bias=False), | |||
| nn.BatchNorm2d(planes * block.expansion), | |||
| ) | |||
| layers = [] | |||
| layers.append(block(self.inplanes, planes, stride, downsample)) | |||
| self.inplanes = planes * block.expansion | |||
| for i in range(1, blocks): | |||
| layers.append(block(self.inplanes, planes)) | |||
| return nn.Sequential(*layers) | |||
| def forward(self, x): | |||
| x = self.conv1(x) | |||
| x = self.bn1(x) | |||
| x = self.relu(x) | |||
| x = self.maxpool(x) | |||
| x = self.layer1(x) | |||
| x = self.layer2(x) | |||
| x = self.layer3(x) | |||
| x = self.layer4(x) | |||
| x = self.avgpool(x) | |||
| x = x.view(x.size(0), -1) | |||
| x = self.fc(x) | |||
| return x | |||
| class ResNet_imagenet(ResNet): | |||
| def __init__(self, num_classes=1000, | |||
| block=Bottleneck, layers=[3, 4, 23, 3]): | |||
| super(ResNet_imagenet, self).__init__() | |||
| self.inplanes = 64 | |||
| self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||
| bias=False) | |||
| self.bn1 = nn.BatchNorm2d(64) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||
| self.layer1 = self._make_layer(block, 64, layers[0]) | |||
| self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||
| self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||
| self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||
| self.avgpool = nn.AvgPool2d(7) | |||
| self.fc = nn.Linear(512 * block.expansion, num_classes) | |||
| init_model(self) | |||
| self.regime = { | |||
| 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
| 'weight_decay': 1e-4, 'momentum': 0.9}, | |||
| 30: {'lr': 1e-2}, | |||
| 60: {'lr': 1e-3, 'weight_decay': 0}, | |||
| 90: {'lr': 1e-4} | |||
| } | |||
| class ResNet_cifar10(ResNet): | |||
| def __init__(self, num_classes=10, | |||
| block=BasicBlock, depth=18): | |||
| super(ResNet_cifar10, self).__init__() | |||
| self.inplanes = 16 | |||
| n = int((depth - 2) / 6) | |||
| self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, | |||
| bias=False) | |||
| self.bn1 = nn.BatchNorm2d(16) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.maxpool = lambda x: x | |||
| self.layer1 = self._make_layer(block, 16, n) | |||
| self.layer2 = self._make_layer(block, 32, n, stride=2) | |||
| self.layer3 = self._make_layer(block, 64, n, stride=2) | |||
| self.layer4 = lambda x: x | |||
| self.avgpool = nn.AvgPool2d(8) | |||
| self.fc = nn.Linear(64, num_classes) | |||
| init_model(self) | |||
| self.regime = { | |||
| 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
| 'weight_decay': 1e-4, 'momentum': 0.9}, | |||
| 81: {'lr': 1e-2}, | |||
| 122: {'lr': 1e-3, 'weight_decay': 0}, | |||
| 164: {'lr': 1e-4} | |||
| } | |||
| def resnet(**kwargs): | |||
| num_classes, depth, dataset = map( | |||
| kwargs.get, ['num_classes', 'depth', 'dataset']) | |||
| if dataset == 'imagenet': | |||
| num_classes = num_classes or 1000 | |||
| depth = depth or 50 | |||
| if depth == 18: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=BasicBlock, layers=[2, 2, 2, 2]) | |||
| if depth == 34: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=BasicBlock, layers=[3, 4, 6, 3]) | |||
| if depth == 50: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=Bottleneck, layers=[3, 4, 6, 3]) | |||
| if depth == 101: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=Bottleneck, layers=[3, 4, 23, 3]) | |||
| if depth == 152: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=Bottleneck, layers=[3, 8, 36, 3]) | |||
| elif dataset == 'cifar10': | |||
| num_classes = num_classes or 10 | |||
| depth = depth or 18 #56 | |||
| return ResNet_cifar10(num_classes=num_classes, | |||
| block=BasicBlock, depth=depth) | |||
| @ -0,0 +1,248 @@ | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| import math | |||
| from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||
| __all__ = ['resnet_binary'] | |||
| def Binaryconv3x3(in_planes, out_planes, stride=1): | |||
| "3x3 convolution with padding" | |||
| return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| def conv3x3(in_planes, out_planes, stride=1): | |||
| "3x3 convolution with padding" | |||
| return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| def init_model(model): | |||
| for m in model.modules(): | |||
| if isinstance(m, BinarizeConv2d): | |||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||
| m.weight.data.normal_(0, math.sqrt(2. / n)) | |||
| elif isinstance(m, nn.BatchNorm2d): | |||
| m.weight.data.fill_(1) | |||
| m.bias.data.zero_() | |||
| class BasicBlock(nn.Module): | |||
| expansion = 1 | |||
| def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||
| super(BasicBlock, self).__init__() | |||
| self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||
| self.bn1 = nn.BatchNorm2d(planes) | |||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||
| self.conv2 = Binaryconv3x3(planes, planes) | |||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||
| self.bn2 = nn.BatchNorm2d(planes) | |||
| self.downsample = downsample | |||
| self.do_bntan=do_bntan; | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| residual = x.clone() | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.tanh1(out) | |||
| out = self.conv2(out) | |||
| if self.downsample is not None: | |||
| if residual.data.max()>1: | |||
| import pdb; pdb.set_trace() | |||
| residual = self.downsample(residual) | |||
| out += residual | |||
| if self.do_bntan: | |||
| out = self.bn2(out) | |||
| out = self.tanh2(out) | |||
| return out | |||
| class Bottleneck(nn.Module): | |||
| expansion = 4 | |||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
| super(Bottleneck, self).__init__() | |||
| self.conv1 = BinarizeConv2d(inplanes, planes, kernel_size=1, bias=False) | |||
| self.bn1 = nn.BatchNorm2d(planes) | |||
| self.conv2 = BinarizeConv2d(planes, planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| self.bn2 = nn.BatchNorm2d(planes) | |||
| self.conv3 = BinarizeConv2d(planes, planes * 4, kernel_size=1, bias=False) | |||
| self.bn3 = nn.BatchNorm2d(planes * 4) | |||
| self.tanh = nn.Hardtanh(inplace=True) | |||
| self.downsample = downsample | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| residual = x | |||
| import pdb; pdb.set_trace() | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.tanh(out) | |||
| out = self.conv2(out) | |||
| out = self.bn2(out) | |||
| out = self.tanh(out) | |||
| out = self.conv3(out) | |||
| out = self.bn3(out) | |||
| if self.downsample is not None: | |||
| residual = self.downsample(x) | |||
| out += residual | |||
| if self.do_bntan: | |||
| out = self.bn2(out) | |||
| out = self.tanh2(out) | |||
| return out | |||
| class ResNet(nn.Module): | |||
| def __init__(self): | |||
| super(ResNet, self).__init__() | |||
| def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||
| downsample = None | |||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||
| downsample = nn.Sequential( | |||
| BinarizeConv2d(self.inplanes, planes * block.expansion, | |||
| kernel_size=1, stride=stride, bias=False), | |||
| nn.BatchNorm2d(planes * block.expansion), | |||
| ) | |||
| layers = [] | |||
| layers.append(block(self.inplanes, planes, stride, downsample)) | |||
| self.inplanes = planes * block.expansion | |||
| for i in range(1, blocks-1): | |||
| layers.append(block(self.inplanes, planes)) | |||
| layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||
| return nn.Sequential(*layers) | |||
| def forward(self, x): | |||
| x = self.conv1(x) | |||
| x = self.maxpool(x) | |||
| x = self.bn1(x) | |||
| x = self.tanh1(x) | |||
| x = self.layer1(x) | |||
| x = self.layer2(x) | |||
| x = self.layer3(x) | |||
| x = self.layer4(x) | |||
| x = self.avgpool(x) | |||
| x = x.view(x.size(0), -1) | |||
| x = self.bn2(x) | |||
| x = self.tanh2(x) | |||
| x = self.fc(x) | |||
| x = self.bn3(x) | |||
| x = self.logsoftmax(x) | |||
| return x | |||
| class ResNet_imagenet(ResNet): | |||
| def __init__(self, num_classes=1000, | |||
| block=Bottleneck, layers=[3, 4, 23, 3]): | |||
| super(ResNet_imagenet, self).__init__() | |||
| self.inplanes = 64 | |||
| self.conv1 = BinarizeConv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||
| bias=False) | |||
| self.bn1 = nn.BatchNorm2d(64) | |||
| self.tanh = nn.Hardtanh(inplace=True) | |||
| self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||
| self.layer1 = self._make_layer(block, 64, layers[0]) | |||
| self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||
| self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||
| self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||
| self.avgpool = nn.AvgPool2d(7) | |||
| self.fc = BinarizeLinear(512 * block.expansion, num_classes) | |||
| init_model(self) | |||
| self.regime = { | |||
| 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
| 'weight_decay': 1e-4, 'momentum': 0.9}, | |||
| 30: {'lr': 1e-2}, | |||
| 60: {'lr': 1e-3, 'weight_decay': 0}, | |||
| 90: {'lr': 1e-4} | |||
| } | |||
| class ResNet_cifar10(ResNet): | |||
| def __init__(self, num_classes=10, | |||
| block=BasicBlock, depth=18): | |||
| super(ResNet_cifar10, self).__init__() | |||
| self.inflate = 5 | |||
| self.inplanes = 16*self.inflate | |||
| n = int((depth - 2) / 6) | |||
| self.conv1 = BinarizeConv2d(3, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||
| bias=False) | |||
| self.maxpool = lambda x: x | |||
| self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||
| self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||
| self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||
| self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||
| self.layer4 = lambda x: x | |||
| self.avgpool = nn.AvgPool2d(8) | |||
| self.bn2 = nn.BatchNorm1d(64*self.inflate) | |||
| self.bn3 = nn.BatchNorm1d(10) | |||
| self.logsoftmax = nn.LogSoftmax() | |||
| self.fc = BinarizeLinear(64*self.inflate, num_classes) | |||
| init_model(self) | |||
| #self.regime = { | |||
| # 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
| # 'weight_decay': 1e-4, 'momentum': 0.9}, | |||
| # 81: {'lr': 1e-4}, | |||
| # 122: {'lr': 1e-5, 'weight_decay': 0}, | |||
| # 164: {'lr': 1e-6} | |||
| #} | |||
| self.regime = { | |||
| 0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||
| 101: {'lr': 1e-3}, | |||
| 142: {'lr': 5e-4}, | |||
| 184: {'lr': 1e-4}, | |||
| 220: {'lr': 1e-5} | |||
| } | |||
| def resnet_binary(**kwargs): | |||
| num_classes, depth, dataset = map( | |||
| kwargs.get, ['num_classes', 'depth', 'dataset']) | |||
| if dataset == 'imagenet': | |||
| num_classes = num_classes or 1000 | |||
| depth = depth or 50 | |||
| if depth == 18: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=BasicBlock, layers=[2, 2, 2, 2]) | |||
| if depth == 34: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=BasicBlock, layers=[3, 4, 6, 3]) | |||
| if depth == 50: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=Bottleneck, layers=[3, 4, 6, 3]) | |||
| if depth == 101: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=Bottleneck, layers=[3, 4, 23, 3]) | |||
| if depth == 152: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=Bottleneck, layers=[3, 8, 36, 3]) | |||
| elif dataset == 'cifar10': | |||
| num_classes = num_classes or 10 | |||
| depth = depth or 18 | |||
| return ResNet_cifar10(num_classes=num_classes, | |||
| block=BasicBlock, depth=depth) | |||
| @ -0,0 +1,69 @@ | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| class AlexNetOWT_BN(nn.Module): | |||
| def __init__(self, num_classes=1000): | |||
| super(AlexNetOWT_BN, self).__init__() | |||
| self.features = nn.Sequential( | |||
| nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, | |||
| bias=False), | |||
| nn.BatchNorm2d(128), | |||
| nn.ReLU(inplace=True), | |||
| nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), | |||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(128), | |||
| nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=False), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(256), | |||
| nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(256), | |||
| nn.Conv2d(256, 512, kernel_size=3, padding=1, bias=False), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(512), | |||
| nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=False), | |||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(512), | |||
| ) | |||
| self.classifier = nn.Sequential( | |||
| nn.Linear(512 * 4 * 4, 1024, bias=False), | |||
| nn.BatchNorm1d(1024), | |||
| nn.ReLU(inplace=True), | |||
| nn.Dropout(0.5), | |||
| nn.Linear(1024, 1024, bias=False), | |||
| nn.BatchNorm1d(1024), | |||
| nn.ReLU(inplace=True), | |||
| nn.Dropout(0.5), | |||
| nn.Linear(1024, num_classes) | |||
| nn.LogSoftMax() | |||
| ) | |||
| self.regime = { | |||
| 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||
| 'weight_decay': 5e-4, 'momentum': 0.9}, | |||
| 10: {'lr': 5e-3}, | |||
| 15: {'lr': 1e-3, 'weight_decay': 0}, | |||
| 20: {'lr': 5e-4}, | |||
| 25: {'lr': 1e-4} | |||
| } | |||
| def forward(self, x): | |||
| x = self.features(x) | |||
| x = x.view(-1, 512 * 4 * 4) | |||
| x = self.classifier(x) | |||
| return x | |||
| def model(**kwargs): | |||
| num_classes = kwargs.get( 'num_classes', 1000) | |||
| return AlexNetOWT_BN(num_classes) | |||
| @ -0,0 +1,80 @@ | |||
| import torch | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| from torch.autograd import Function | |||
| from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||
| class VGG_Cifar10(nn.Module): | |||
| def __init__(self, num_classes=1000): | |||
| super(VGG_Cifar10, self).__init__() | |||
| self.infl_ratio=3; | |||
| self.features = nn.Sequential( | |||
| BinarizeConv2d(3, 128*self.infl_ratio, kernel_size=3, stride=1, padding=1, | |||
| bias=True), | |||
| nn.BatchNorm2d(128*self.infl_ratio), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(128*self.infl_ratio, 128*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||
| nn.BatchNorm2d(128*self.infl_ratio), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(128*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
| nn.BatchNorm2d(256*self.infl_ratio), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(256*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||
| nn.BatchNorm2d(256*self.infl_ratio), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(256*self.infl_ratio, 512*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
| nn.BatchNorm2d(512*self.infl_ratio), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(512*self.infl_ratio, 512, kernel_size=3, padding=1, bias=True), | |||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||
| nn.BatchNorm2d(512), | |||
| nn.Hardtanh(inplace=True) | |||
| ) | |||
| self.classifier = nn.Sequential( | |||
| BinarizeLinear(512 * 4 * 4, 1024, bias=True), | |||
| nn.BatchNorm1d(1024), | |||
| nn.Hardtanh(inplace=True), | |||
| #nn.Dropout(0.5), | |||
| BinarizeLinear(1024, 1024, bias=True), | |||
| nn.BatchNorm1d(1024), | |||
| nn.Hardtanh(inplace=True), | |||
| #nn.Dropout(0.5), | |||
| BinarizeLinear(1024, num_classes, bias=True), | |||
| nn.BatchNorm1d(num_classes, affine=False), | |||
| nn.LogSoftmax() | |||
| ) | |||
| self.regime = { | |||
| 0: {'optimizer': 'Adam', 'betas': (0.9, 0.999),'lr': 5e-3}, | |||
| 40: {'lr': 1e-3}, | |||
| 80: {'lr': 5e-4}, | |||
| 100: {'lr': 1e-4}, | |||
| 120: {'lr': 5e-5}, | |||
| 140: {'lr': 1e-5} | |||
| } | |||
| def forward(self, x): | |||
| x = self.features(x) | |||
| x = x.view(-1, 512 * 4 * 4) | |||
| x = self.classifier(x) | |||
| return x | |||
| def vgg_cifar10_binary(**kwargs): | |||
| num_classes = kwargs.get( 'num_classes', 10) | |||
| return VGG_Cifar10(num_classes) | |||
| @ -0,0 +1,198 @@ | |||
| import torch | |||
| import torchvision.transforms as transforms | |||
| import random | |||
| __imagenet_stats = {'mean': [0.485, 0.456, 0.406], | |||
| 'std': [0.229, 0.224, 0.225]} | |||
| __imagenet_pca = { | |||
| 'eigval': torch.Tensor([0.2175, 0.0188, 0.0045]), | |||
| 'eigvec': torch.Tensor([ | |||
| [-0.5675, 0.7192, 0.4009], | |||
| [-0.5808, -0.0045, -0.8140], | |||
| [-0.5836, -0.6948, 0.4203], | |||
| ]) | |||
| } | |||
| def scale_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||
| t_list = [ | |||
| transforms.CenterCrop(input_size), | |||
| transforms.ToTensor(), | |||
| transforms.Normalize(**normalize), | |||
| ] | |||
| if scale_size != input_size: | |||
| t_list = [transforms.Scale(scale_size)] + t_list | |||
| return transforms.Compose(t_list) | |||
| def scale_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||
| t_list = [ | |||
| transforms.RandomCrop(input_size), | |||
| transforms.ToTensor(), | |||
| transforms.Normalize(**normalize), | |||
| ] | |||
| if scale_size != input_size: | |||
| t_list = [transforms.Scale(scale_size)] + t_list | |||
| transforms.Compose(t_list) | |||
| def pad_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||
| padding = int((scale_size - input_size) / 2) | |||
| return transforms.Compose([ | |||
| transforms.RandomCrop(input_size, padding=padding), | |||
| transforms.RandomHorizontalFlip(), | |||
| transforms.ToTensor(), | |||
| transforms.Normalize(**normalize), | |||
| ]) | |||
| def inception_preproccess(input_size, normalize=__imagenet_stats): | |||
| return transforms.Compose([ | |||
| transforms.RandomSizedCrop(input_size), | |||
| transforms.RandomHorizontalFlip(), | |||
| transforms.ToTensor(), | |||
| transforms.Normalize(**normalize) | |||
| ]) | |||
| def inception_color_preproccess(input_size, normalize=__imagenet_stats): | |||
| return transforms.Compose([ | |||
| transforms.RandomSizedCrop(input_size), | |||
| transforms.RandomHorizontalFlip(), | |||
| transforms.ToTensor(), | |||
| ColorJitter( | |||
| brightness=0.4, | |||
| contrast=0.4, | |||
| saturation=0.4, | |||
| ), | |||
| Lighting(0.1, __imagenet_pca['eigval'], __imagenet_pca['eigvec']), | |||
| transforms.Normalize(**normalize) | |||
| ]) | |||
| def get_transform(name='imagenet', input_size=None, | |||
| scale_size=None, normalize=None, augment=True): | |||
| normalize = normalize or __imagenet_stats | |||
| if name == 'imagenet': | |||
| scale_size = scale_size or 256 | |||
| input_size = input_size or 224 | |||
| if augment: | |||
| return inception_preproccess(input_size, normalize=normalize) | |||
| else: | |||
| return scale_crop(input_size=input_size, | |||
| scale_size=scale_size, normalize=normalize) | |||
| elif 'cifar' in name: | |||
| input_size = input_size or 32 | |||
| if augment: | |||
| scale_size = scale_size or 40 | |||
| return pad_random_crop(input_size, scale_size=scale_size, | |||
| normalize=normalize) | |||
| else: | |||
| scale_size = scale_size or 32 | |||
| return scale_crop(input_size=input_size, | |||
| scale_size=scale_size, normalize=normalize) | |||
| elif name == 'mnist': | |||
| normalize = {'mean': [0.5], 'std': [0.5]} | |||
| input_size = input_size or 28 | |||
| if augment: | |||
| scale_size = scale_size or 32 | |||
| return pad_random_crop(input_size, scale_size=scale_size, | |||
| normalize=normalize) | |||
| else: | |||
| scale_size = scale_size or 32 | |||
| return scale_crop(input_size=input_size, | |||
| scale_size=scale_size, normalize=normalize) | |||
| class Lighting(object): | |||
| """Lighting noise(AlexNet - style PCA - based noise)""" | |||
| def __init__(self, alphastd, eigval, eigvec): | |||
| self.alphastd = alphastd | |||
| self.eigval = eigval | |||
| self.eigvec = eigvec | |||
| def __call__(self, img): | |||
| if self.alphastd == 0: | |||
| return img | |||
| alpha = img.new().resize_(3).normal_(0, self.alphastd) | |||
| rgb = self.eigvec.type_as(img).clone()\ | |||
| .mul(alpha.view(1, 3).expand(3, 3))\ | |||
| .mul(self.eigval.view(1, 3).expand(3, 3))\ | |||
| .sum(1).squeeze() | |||
| return img.add(rgb.view(3, 1, 1).expand_as(img)) | |||
| class Grayscale(object): | |||
| def __call__(self, img): | |||
| gs = img.clone() | |||
| gs[0].mul_(0.299).add_(0.587, gs[1]).add_(0.114, gs[2]) | |||
| gs[1].copy_(gs[0]) | |||
| gs[2].copy_(gs[0]) | |||
| return gs | |||
| class Saturation(object): | |||
| def __init__(self, var): | |||
| self.var = var | |||
| def __call__(self, img): | |||
| gs = Grayscale()(img) | |||
| alpha = random.uniform(0, self.var) | |||
| return img.lerp(gs, alpha) | |||
| class Brightness(object): | |||
| def __init__(self, var): | |||
| self.var = var | |||
| def __call__(self, img): | |||
| gs = img.new().resize_as_(img).zero_() | |||
| alpha = random.uniform(0, self.var) | |||
| return img.lerp(gs, alpha) | |||
| class Contrast(object): | |||
| def __init__(self, var): | |||
| self.var = var | |||
| def __call__(self, img): | |||
| gs = Grayscale()(img) | |||
| gs.fill_(gs.mean()) | |||
| alpha = random.uniform(0, self.var) | |||
| return img.lerp(gs, alpha) | |||
| class RandomOrder(object): | |||
| """ Composes several transforms together in random order. | |||
| """ | |||
| def __init__(self, transforms): | |||
| self.transforms = transforms | |||
| def __call__(self, img): | |||
| if self.transforms is None: | |||
| return img | |||
| order = torch.randperm(len(self.transforms)) | |||
| for i in order: | |||
| img = self.transforms[i](img) | |||
| return img | |||
| class ColorJitter(RandomOrder): | |||
| def __init__(self, brightness=0.4, contrast=0.4, saturation=0.4): | |||
| self.transforms = [] | |||
| if brightness != 0: | |||
| self.transforms.append(Brightness(brightness)) | |||
| if contrast != 0: | |||
| self.transforms.append(Contrast(contrast)) | |||
| if saturation != 0: | |||
| self.transforms.append(Saturation(saturation)) | |||
| @ -0,0 +1,5 @@ | |||
| 2021-04-15 15:36:47 - INFO - saving to ./results/2021-04-15_15-36-47 | |||
| 2021-04-15 15:36:47 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-36-47', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||
| 2021-04-15 15:36:47 - INFO - creating model alexnet | |||
| 2021-04-15 15:36:48 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||
| 2021-04-15 15:36:48 - INFO - number of parameters: 61110184 | |||
| @ -0,0 +1,5 @@ | |||
| 2021-04-15 15:37:52 - INFO - saving to ./results/2021-04-15_15-37-52 | |||
| 2021-04-15 15:37:52 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='resnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-37-52', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||
| 2021-04-15 15:37:52 - INFO - creating model resnet | |||
| 2021-04-15 15:37:52 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||
| 2021-04-15 15:37:52 - INFO - number of parameters: 25557032 | |||
| @ -0,0 +1,5 @@ | |||
| 2021-04-15 15:38:16 - INFO - saving to ./results/2021-04-15_15-38-16 | |||
| 2021-04-15 15:38:16 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-38-16', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||
| 2021-04-15 15:38:16 - INFO - creating model alexnet | |||
| 2021-04-15 15:38:17 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||
| 2021-04-15 15:38:17 - INFO - number of parameters: 61110184 | |||
| @ -0,0 +1,160 @@ | |||
| import os | |||
| import torch | |||
| import logging.config | |||
| import shutil | |||
| import pandas as pd | |||
| from bokeh.io import output_file, save, show | |||
| from bokeh.plotting import figure | |||
| from bokeh.layouts import column | |||
| #from bokeh.charts import Line, defaults | |||
| # | |||
| #defaults.width = 800 | |||
| #defaults.height = 400 | |||
| #defaults.tools = 'pan,box_zoom,wheel_zoom,box_select,hover,resize,reset,save' | |||
| def setup_logging(log_file='log.txt'): | |||
| """Setup logging configuration | |||
| """ | |||
| logging.basicConfig(level=logging.DEBUG, | |||
| format="%(asctime)s - %(levelname)s - %(message)s", | |||
| datefmt="%Y-%m-%d %H:%M:%S", | |||
| filename=log_file, | |||
| filemode='w') | |||
| console = logging.StreamHandler() | |||
| console.setLevel(logging.INFO) | |||
| formatter = logging.Formatter('%(message)s') | |||
| console.setFormatter(formatter) | |||
| logging.getLogger('').addHandler(console) | |||
| class ResultsLog(object): | |||
| def __init__(self, path='results.csv', plot_path=None): | |||
| self.path = path | |||
| self.plot_path = plot_path or (self.path + '.html') | |||
| self.figures = [] | |||
| self.results = None | |||
| def add(self, **kwargs): | |||
| df = pd.DataFrame([kwargs.values()], columns=kwargs.keys()) | |||
| if self.results is None: | |||
| self.results = df | |||
| else: | |||
| self.results = self.results.append(df, ignore_index=True) | |||
| def save(self, title='Training Results'): | |||
| if len(self.figures) > 0: | |||
| if os.path.isfile(self.plot_path): | |||
| os.remove(self.plot_path) | |||
| output_file(self.plot_path, title=title) | |||
| plot = column(*self.figures) | |||
| save(plot) | |||
| self.figures = [] | |||
| self.results.to_csv(self.path, index=False, index_label=False) | |||
| def load(self, path=None): | |||
| path = path or self.path | |||
| if os.path.isfile(path): | |||
| self.results.read_csv(path) | |||
| def show(self): | |||
| if len(self.figures) > 0: | |||
| plot = column(*self.figures) | |||
| show(plot) | |||
| #def plot(self, *kargs, **kwargs): | |||
| # line = Line(data=self.results, *kargs, **kwargs) | |||
| # self.figures.append(line) | |||
| def image(self, *kargs, **kwargs): | |||
| fig = figure() | |||
| fig.image(*kargs, **kwargs) | |||
| self.figures.append(fig) | |||
| def save_checkpoint(state, is_best, path='.', filename='checkpoint.pth.tar', save_all=False): | |||
| filename = os.path.join(path, filename) | |||
| torch.save(state, filename) | |||
| if is_best: | |||
| shutil.copyfile(filename, os.path.join(path, 'model_best.pth.tar')) | |||
| if save_all: | |||
| shutil.copyfile(filename, os.path.join( | |||
| path, 'checkpoint_epoch_%s.pth.tar' % state['epoch'])) | |||
| class AverageMeter(object): | |||
| """Computes and stores the average and current value""" | |||
| def __init__(self): | |||
| self.reset() | |||
| def reset(self): | |||
| self.val = 0 | |||
| self.avg = 0 | |||
| self.sum = 0 | |||
| self.count = 0 | |||
| def update(self, val, n=1): | |||
| self.val = val | |||
| self.sum += val * n | |||
| self.count += n | |||
| self.avg = self.sum / self.count | |||
| __optimizers = { | |||
| 'SGD': torch.optim.SGD, | |||
| 'ASGD': torch.optim.ASGD, | |||
| 'Adam': torch.optim.Adam, | |||
| 'Adamax': torch.optim.Adamax, | |||
| 'Adagrad': torch.optim.Adagrad, | |||
| 'Adadelta': torch.optim.Adadelta, | |||
| 'Rprop': torch.optim.Rprop, | |||
| 'RMSprop': torch.optim.RMSprop | |||
| } | |||
| def adjust_optimizer(optimizer, epoch, config): | |||
| """Reconfigures the optimizer according to epoch and config dict""" | |||
| def modify_optimizer(optimizer, setting): | |||
| if 'optimizer' in setting: | |||
| optimizer = __optimizers[setting['optimizer']]( | |||
| optimizer.param_groups) | |||
| logging.debug('OPTIMIZER - setting method = %s' % | |||
| setting['optimizer']) | |||
| for param_group in optimizer.param_groups: | |||
| for key in param_group.keys(): | |||
| if key in setting: | |||
| logging.debug('OPTIMIZER - setting %s = %s' % | |||
| (key, setting[key])) | |||
| param_group[key] = setting[key] | |||
| return optimizer | |||
| if callable(config): | |||
| optimizer = modify_optimizer(optimizer, config(epoch)) | |||
| else: | |||
| for e in range(epoch + 1): # run over all epochs - sticky setting | |||
| if e in config: | |||
| optimizer = modify_optimizer(optimizer, config[e]) | |||
| return optimizer | |||
| def accuracy(output, target, topk=(1,)): | |||
| """Computes the precision@k for the specified values of k""" | |||
| maxk = max(topk) | |||
| batch_size = target.size(0) | |||
| _, pred = output.float().topk(maxk, 1, True, True) | |||
| pred = pred.t() | |||
| correct = pred.eq(target.view(1, -1).expand_as(pred)) | |||
| res = [] | |||
| for k in topk: | |||
| correct_k = correct[:k].view(-1).float().sum(0) | |||
| res.append(correct_k.mul_(100.0 / batch_size)) | |||
| return res | |||
| # kernel_img = model.features[0][0].kernel.data.clone() | |||
| # kernel_img.add_(-kernel_img.min()) | |||
| # kernel_img.mul_(255 / kernel_img.max()) | |||
| # save_image(kernel_img, 'kernel%s.jpg' % epoch) | |||
| @ -0,0 +1,8 @@ | |||
| # BNN.pytorch | |||
| Binarized Neural Network (BNN) for pytorch | |||
| This is the pytorch version for the BNN code, fro VGG and resnet models | |||
| Link to the paper: https://papers.nips.cc/paper/6573-binarized-neural-networks | |||
| The code is based on https://github.com/eladhoffer/convNet.pytorch | |||
| Please install torch and torchvision by following the instructions at: http://pytorch.org/ | |||
| To run resnet18 for cifar10 dataset use: python main_binary.py --model resnet_binary --save resnet18_binary --dataset cifar10 | |||
| @ -0,0 +1,37 @@ | |||
| import os | |||
| import torchvision.datasets as datasets | |||
| import torchvision.transforms as transforms | |||
| _DATASETS_MAIN_PATH = '/home/Datasets' | |||
| _dataset_path = { | |||
| 'cifar10': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR10'), | |||
| 'cifar100': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR100'), | |||
| 'stl10': os.path.join(_DATASETS_MAIN_PATH, 'STL10'), | |||
| 'mnist': os.path.join(_DATASETS_MAIN_PATH, 'MNIST'), | |||
| 'imagenet': { | |||
| 'train': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/train'), | |||
| 'val': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/val') | |||
| } | |||
| } | |||
| def get_dataset(name, split='train', transform=None, | |||
| target_transform=None, download=True): | |||
| train = (split == 'train') | |||
| if name == 'cifar10': | |||
| return datasets.CIFAR10(root=_dataset_path['cifar10'], | |||
| train=train, | |||
| transform=transform, | |||
| target_transform=target_transform, | |||
| download=download) | |||
| elif name == 'cifar100': | |||
| return datasets.CIFAR100(root=_dataset_path['cifar100'], | |||
| train=train, | |||
| transform=transform, | |||
| target_transform=target_transform, | |||
| download=download) | |||
| elif name == 'imagenet': | |||
| path = _dataset_path[name][split] | |||
| return datasets.ImageFolder(root=path, | |||
| transform=transform, | |||
| target_transform=target_transform) | |||
| @ -0,0 +1,309 @@ | |||
| import argparse | |||
| import os | |||
| import time | |||
| import logging | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.parallel | |||
| import torch.backends.cudnn as cudnn | |||
| import torch.optim | |||
| import torch.utils.data | |||
| import models | |||
| from torch.autograd import Variable | |||
| from data import get_dataset | |||
| from preprocess import get_transform | |||
| from utils import * | |||
| from datetime import datetime | |||
| from ast import literal_eval | |||
| from torchvision.utils import save_image | |||
| model_names = sorted(name for name in models.__dict__ | |||
| if name.islower() and not name.startswith("__") | |||
| and callable(models.__dict__[name])) | |||
| <<<<<<< HEAD | |||
| print(model_names) | |||
| ======= | |||
| >>>>>>> 0d30f7b8e44285531022cdc05b2c11c22db27e3a | |||
| parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||
| parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='./results', | |||
| help='results dir') | |||
| parser.add_argument('--save', metavar='SAVE', default='', | |||
| help='saved folder') | |||
| parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||
| help='dataset name or folder') | |||
| parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||
| choices=model_names, | |||
| help='model architecture: ' + | |||
| ' | '.join(model_names) + | |||
| ' (default: alexnet)') | |||
| parser.add_argument('--input_size', type=int, default=None, | |||
| help='image input size') | |||
| parser.add_argument('--model_config', default='', | |||
| help='additional architecture configuration') | |||
| parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||
| help='type of tensor - e.g torch.cuda.HalfTensor') | |||
| parser.add_argument('--gpus', default='0', | |||
| help='gpus used for training - e.g 0,1,3') | |||
| parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||
| help='number of data loading workers (default: 8)') | |||
| parser.add_argument('--epochs', default=2500, type=int, metavar='N', | |||
| help='number of total epochs to run') | |||
| parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||
| help='manual epoch number (useful on restarts)') | |||
| parser.add_argument('-b', '--batch-size', default=256, type=int, | |||
| metavar='N', help='mini-batch size (default: 256)') | |||
| parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||
| help='optimizer function used') | |||
| parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||
| metavar='LR', help='initial learning rate') | |||
| parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||
| help='momentum') | |||
| parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||
| metavar='W', help='weight decay (default: 1e-4)') | |||
| parser.add_argument('--print-freq', '-p', default=10, type=int, | |||
| metavar='N', help='print frequency (default: 10)') | |||
| parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||
| help='path to latest checkpoint (default: none)') | |||
| parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||
| help='evaluate model FILE on validation set') | |||
| def main(): | |||
| global args, best_prec1 | |||
| best_prec1 = 0 | |||
| args = parser.parse_args() | |||
| if args.evaluate: | |||
| args.results_dir = '/tmp' | |||
| if args.save is '': | |||
| args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||
| save_path = os.path.join(args.results_dir, args.save) | |||
| if not os.path.exists(save_path): | |||
| os.makedirs(save_path) | |||
| setup_logging(os.path.join(save_path, 'log.txt')) | |||
| results_file = os.path.join(save_path, 'results.%s') | |||
| results = ResultsLog(results_file % 'csv', results_file % 'html') | |||
| logging.info("saving to %s", save_path) | |||
| logging.debug("run arguments: %s", args) | |||
| if 'cuda' in args.type: | |||
| args.gpus = [int(i) for i in args.gpus.split(',')] | |||
| torch.cuda.set_device(args.gpus[0]) | |||
| cudnn.benchmark = True | |||
| else: | |||
| args.gpus = None | |||
| # create model | |||
| logging.info("creating model %s", args.model) | |||
| model = models.__dict__[args.model] | |||
| model_config = {'input_size': args.input_size, 'dataset': args.dataset} | |||
| if args.model_config is not '': | |||
| model_config = dict(model_config, **literal_eval(args.model_config)) | |||
| model = model(**model_config) | |||
| logging.info("created model with configuration: %s", model_config) | |||
| # optionally resume from a checkpoint | |||
| if args.evaluate: | |||
| if not os.path.isfile(args.evaluate): | |||
| parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||
| checkpoint = torch.load(args.evaluate) | |||
| model.load_state_dict(checkpoint['state_dict']) | |||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||
| args.evaluate, checkpoint['epoch']) | |||
| elif args.resume: | |||
| checkpoint_file = args.resume | |||
| if os.path.isdir(checkpoint_file): | |||
| results.load(os.path.join(checkpoint_file, 'results.csv')) | |||
| checkpoint_file = os.path.join( | |||
| checkpoint_file, 'model_best.pth.tar') | |||
| if os.path.isfile(checkpoint_file): | |||
| logging.info("loading checkpoint '%s'", args.resume) | |||
| checkpoint = torch.load(checkpoint_file) | |||
| args.start_epoch = checkpoint['epoch'] - 1 | |||
| best_prec1 = checkpoint['best_prec1'] | |||
| model.load_state_dict(checkpoint['state_dict']) | |||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||
| checkpoint_file, checkpoint['epoch']) | |||
| else: | |||
| logging.error("no checkpoint found at '%s'", args.resume) | |||
| num_parameters = sum([l.nelement() for l in model.parameters()]) | |||
| logging.info("number of parameters: %d", num_parameters) | |||
| # Data loading code | |||
| default_transform = { | |||
| 'train': get_transform(args.dataset, | |||
| input_size=args.input_size, augment=True), | |||
| 'eval': get_transform(args.dataset, | |||
| input_size=args.input_size, augment=False) | |||
| } | |||
| transform = getattr(model, 'input_transform', default_transform) | |||
| regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||
| 'lr': args.lr, | |||
| 'momentum': args.momentum, | |||
| 'weight_decay': args.weight_decay}}) | |||
| # define loss function (criterion) and optimizer | |||
| criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)() | |||
| criterion.type(args.type) | |||
| model.type(args.type) | |||
| val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||
| val_loader = torch.utils.data.DataLoader( | |||
| val_data, | |||
| batch_size=args.batch_size, shuffle=False, | |||
| num_workers=args.workers, pin_memory=True) | |||
| if args.evaluate: | |||
| validate(val_loader, model, criterion, 0) | |||
| return | |||
| train_data = get_dataset(args.dataset, 'train', transform['train']) | |||
| train_loader = torch.utils.data.DataLoader( | |||
| train_data, | |||
| batch_size=args.batch_size, shuffle=True, | |||
| num_workers=args.workers, pin_memory=True) | |||
| optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||
| logging.info('training regime: %s', regime) | |||
| for epoch in range(args.start_epoch, args.epochs): | |||
| optimizer = adjust_optimizer(optimizer, epoch, regime) | |||
| # train for one epoch | |||
| train_loss, train_prec1, train_prec5 = train( | |||
| train_loader, model, criterion, epoch, optimizer) | |||
| # evaluate on validation set | |||
| val_loss, val_prec1, val_prec5 = validate( | |||
| val_loader, model, criterion, epoch) | |||
| # remember best prec@1 and save checkpoint | |||
| is_best = val_prec1 > best_prec1 | |||
| best_prec1 = max(val_prec1, best_prec1) | |||
| save_checkpoint({ | |||
| 'epoch': epoch + 1, | |||
| 'model': args.model, | |||
| 'config': args.model_config, | |||
| 'state_dict': model.state_dict(), | |||
| 'best_prec1': best_prec1, | |||
| 'regime': regime | |||
| }, is_best, path=save_path) | |||
| logging.info('\n Epoch: {0}\t' | |||
| 'Training Loss {train_loss:.4f} \t' | |||
| 'Training Prec@1 {train_prec1:.3f} \t' | |||
| 'Training Prec@5 {train_prec5:.3f} \t' | |||
| 'Validation Loss {val_loss:.4f} \t' | |||
| 'Validation Prec@1 {val_prec1:.3f} \t' | |||
| 'Validation Prec@5 {val_prec5:.3f} \n' | |||
| .format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
| train_prec1=train_prec1, val_prec1=val_prec1, | |||
| train_prec5=train_prec5, val_prec5=val_prec5)) | |||
| results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
| train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||
| train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||
| #results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||
| # title='Loss', ylabel='loss') | |||
| #results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||
| # title='Error@1', ylabel='error %') | |||
| #results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||
| # title='Error@5', ylabel='error %') | |||
| results.save() | |||
| def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||
| if args.gpus and len(args.gpus) > 1: | |||
| model = torch.nn.DataParallel(model, args.gpus) | |||
| batch_time = AverageMeter() | |||
| data_time = AverageMeter() | |||
| losses = AverageMeter() | |||
| top1 = AverageMeter() | |||
| top5 = AverageMeter() | |||
| end = time.time() | |||
| for i, (inputs, target) in enumerate(data_loader): | |||
| # measure data loading time | |||
| data_time.update(time.time() - end) | |||
| if args.gpus is not None: | |||
| target = target.cuda() | |||
| if not training: | |||
| with torch.no_grad(): | |||
| input_var = Variable(inputs.type(args.type), volatile=not training) | |||
| target_var = Variable(target) | |||
| # compute output | |||
| output = model(input_var) | |||
| else: | |||
| input_var = Variable(inputs.type(args.type), volatile=not training) | |||
| target_var = Variable(target) | |||
| # compute output | |||
| output = model(input_var) | |||
| loss = criterion(output, target_var) | |||
| if type(output) is list: | |||
| output = output[0] | |||
| # measure accuracy and record loss | |||
| prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||
| losses.update(loss.item(), inputs.size(0)) | |||
| top1.update(prec1.item(), inputs.size(0)) | |||
| top5.update(prec5.item(), inputs.size(0)) | |||
| if training: | |||
| # compute gradient and do SGD step | |||
| optimizer.zero_grad() | |||
| loss.backward() | |||
| for p in list(model.parameters()): | |||
| if hasattr(p,'org'): | |||
| p.data.copy_(p.org) | |||
| optimizer.step() | |||
| for p in list(model.parameters()): | |||
| if hasattr(p,'org'): | |||
| p.org.copy_(p.data.clamp_(-1,1)) | |||
| # measure elapsed time | |||
| batch_time.update(time.time() - end) | |||
| end = time.time() | |||
| if i % args.print_freq == 0: | |||
| logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||
| 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||
| 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||
| 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||
| 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||
| 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||
| epoch, i, len(data_loader), | |||
| phase='TRAINING' if training else 'EVALUATING', | |||
| batch_time=batch_time, | |||
| data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||
| return losses.avg, top1.avg, top5.avg | |||
| def train(data_loader, model, criterion, epoch, optimizer): | |||
| # switch to train mode | |||
| model.train() | |||
| return forward(data_loader, model, criterion, epoch, | |||
| training=True, optimizer=optimizer) | |||
| def validate(data_loader, model, criterion, epoch): | |||
| # switch to evaluate mode | |||
| model.eval() | |||
| return forward(data_loader, model, criterion, epoch, | |||
| training=False, optimizer=None) | |||
| if __name__ == '__main__': | |||
| main() | |||
| @ -0,0 +1,332 @@ | |||
| import argparse | |||
| import os | |||
| import time | |||
| import logging | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.parallel | |||
| import torch.backends.cudnn as cudnn | |||
| import torch.optim | |||
| import torch.utils.data | |||
| import models | |||
| from torch.autograd import Variable | |||
| from data import get_dataset | |||
| from preprocess import get_transform | |||
| from utils import * | |||
| from datetime import datetime | |||
| from ast import literal_eval | |||
| from torchvision.utils import save_image | |||
| from models.binarized_modules import HingeLoss | |||
| model_names = sorted(name for name in models.__dict__ | |||
| if name.islower() and not name.startswith("__") | |||
| and callable(models.__dict__[name])) | |||
| parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||
| parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='/media/hdd/ihubara/BinaryNet.pytorch/results', | |||
| help='results dir') | |||
| parser.add_argument('--save', metavar='SAVE', default='', | |||
| help='saved folder') | |||
| parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||
| help='dataset name or folder') | |||
| parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||
| choices=model_names, | |||
| help='model architecture: ' + | |||
| ' | '.join(model_names) + | |||
| ' (default: alexnet)') | |||
| parser.add_argument('--input_size', type=int, default=None, | |||
| help='image input size') | |||
| parser.add_argument('--model_config', default='', | |||
| help='additional architecture configuration') | |||
| parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||
| help='type of tensor - e.g torch.cuda.HalfTensor') | |||
| parser.add_argument('--gpus', default='0', | |||
| help='gpus used for training - e.g 0,1,3') | |||
| parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||
| help='number of data loading workers (default: 8)') | |||
| parser.add_argument('--epochs', default=900, type=int, metavar='N', | |||
| help='number of total epochs to run') | |||
| parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||
| help='manual epoch number (useful on restarts)') | |||
| parser.add_argument('-b', '--batch-size', default=256, type=int, | |||
| metavar='N', help='mini-batch size (default: 256)') | |||
| parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||
| help='optimizer function used') | |||
| parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||
| metavar='LR', help='initial learning rate') | |||
| parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||
| help='momentum') | |||
| parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||
| metavar='W', help='weight decay (default: 1e-4)') | |||
| parser.add_argument('--print-freq', '-p', default=10, type=int, | |||
| metavar='N', help='print frequency (default: 10)') | |||
| parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||
| help='path to latest checkpoint (default: none)') | |||
| parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||
| help='evaluate model FILE on validation set') | |||
| torch.cuda.random.manual_seed_all(10) | |||
| output_dim = 0 | |||
| def main(): | |||
| global args, best_prec1, output_dim | |||
| best_prec1 = 0 | |||
| args = parser.parse_args() | |||
| output_dim = {'cifar10': 10, 'cifar100':100, 'imagenet': 1000}[args.dataset] | |||
| #import pdb; pdb.set_trace() | |||
| #torch.save(args.batch_size/(len(args.gpus)/2+1),'multi_gpu_batch_size') | |||
| if args.evaluate: | |||
| args.results_dir = '/tmp' | |||
| if args.save is '': | |||
| args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||
| save_path = os.path.join(args.results_dir, args.save) | |||
| if not os.path.exists(save_path): | |||
| os.makedirs(save_path) | |||
| setup_logging(os.path.join(save_path, 'log.txt')) | |||
| results_file = os.path.join(save_path, 'results.%s') | |||
| results = ResultsLog(results_file % 'csv', results_file % 'html') | |||
| logging.info("saving to %s", save_path) | |||
| logging.debug("run arguments: %s", args) | |||
| if 'cuda' in args.type: | |||
| args.gpus = [int(i) for i in args.gpus.split(',')] | |||
| torch.cuda.set_device(args.gpus[0]) | |||
| cudnn.benchmark = True | |||
| else: | |||
| args.gpus = None | |||
| # create model | |||
| logging.info("creating model %s", args.model) | |||
| model = models.__dict__[args.model] | |||
| model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': output_dim} | |||
| if args.model_config is not '': | |||
| model_config = dict(model_config, **literal_eval(args.model_config)) | |||
| model = model(**model_config) | |||
| logging.info("created model with configuration: %s", model_config) | |||
| # optionally resume from a checkpoint | |||
| if args.evaluate: | |||
| if not os.path.isfile(args.evaluate): | |||
| parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||
| checkpoint = torch.load(args.evaluate) | |||
| model.load_state_dict(checkpoint['state_dict']) | |||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||
| args.evaluate, checkpoint['epoch']) | |||
| elif args.resume: | |||
| checkpoint_file = args.resume | |||
| if os.path.isdir(checkpoint_file): | |||
| results.load(os.path.join(checkpoint_file, 'results.csv')) | |||
| checkpoint_file = os.path.join( | |||
| checkpoint_file, 'model_best.pth.tar') | |||
| if os.path.isfile(checkpoint_file): | |||
| logging.info("loading checkpoint '%s'", args.resume) | |||
| checkpoint = torch.load(checkpoint_file) | |||
| args.start_epoch = checkpoint['epoch'] - 1 | |||
| best_prec1 = checkpoint['best_prec1'] | |||
| model.load_state_dict(checkpoint['state_dict']) | |||
| logging.info("loaded checkpoint '%s' (epoch %s)", | |||
| checkpoint_file, checkpoint['epoch']) | |||
| else: | |||
| logging.error("no checkpoint found at '%s'", args.resume) | |||
| num_parameters = sum([l.nelement() for l in model.parameters()]) | |||
| logging.info("number of parameters: %d", num_parameters) | |||
| # Data loading code | |||
| default_transform = { | |||
| 'train': get_transform(args.dataset, | |||
| input_size=args.input_size, augment=True), | |||
| 'eval': get_transform(args.dataset, | |||
| input_size=args.input_size, augment=False) | |||
| } | |||
| transform = getattr(model, 'input_transform', default_transform) | |||
| regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||
| 'lr': args.lr, | |||
| 'momentum': args.momentum, | |||
| 'weight_decay': args.weight_decay}}) | |||
| # define loss function (criterion) and optimizer | |||
| #criterion = getattr(model, 'criterion', nn.NLLLoss)() | |||
| criterion = getattr(model, 'criterion', HingeLoss)() | |||
| #criterion.type(args.type) | |||
| model.type(args.type) | |||
| val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||
| val_loader = torch.utils.data.DataLoader( | |||
| val_data, | |||
| batch_size=args.batch_size, shuffle=False, | |||
| num_workers=args.workers, pin_memory=True) | |||
| if args.evaluate: | |||
| validate(val_loader, model, criterion, 0) | |||
| return | |||
| train_data = get_dataset(args.dataset, 'train', transform['train']) | |||
| train_loader = torch.utils.data.DataLoader( | |||
| train_data, | |||
| batch_size=args.batch_size, shuffle=True, | |||
| num_workers=args.workers, pin_memory=True) | |||
| optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||
| logging.info('training regime: %s', regime) | |||
| #import pdb; pdb.set_trace() | |||
| #search_binarized_modules(model) | |||
| for epoch in range(args.start_epoch, args.epochs): | |||
| optimizer = adjust_optimizer(optimizer, epoch, regime) | |||
| # train for one epoch | |||
| train_loss, train_prec1, train_prec5 = train( | |||
| train_loader, model, criterion, epoch, optimizer) | |||
| # evaluate on validation set | |||
| val_loss, val_prec1, val_prec5 = validate( | |||
| val_loader, model, criterion, epoch) | |||
| # remember best prec@1 and save checkpoint | |||
| is_best = val_prec1 > best_prec1 | |||
| best_prec1 = max(val_prec1, best_prec1) | |||
| save_checkpoint({ | |||
| 'epoch': epoch + 1, | |||
| 'model': args.model, | |||
| 'config': args.model_config, | |||
| 'state_dict': model.state_dict(), | |||
| 'best_prec1': best_prec1, | |||
| 'regime': regime | |||
| }, is_best, path=save_path) | |||
| logging.info('\n Epoch: {0}\t' | |||
| 'Training Loss {train_loss:.4f} \t' | |||
| 'Training Prec@1 {train_prec1:.3f} \t' | |||
| 'Training Prec@5 {train_prec5:.3f} \t' | |||
| 'Validation Loss {val_loss:.4f} \t' | |||
| 'Validation Prec@1 {val_prec1:.3f} \t' | |||
| 'Validation Prec@5 {val_prec5:.3f} \n' | |||
| .format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
| train_prec1=train_prec1, val_prec1=val_prec1, | |||
| train_prec5=train_prec5, val_prec5=val_prec5)) | |||
| results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
| train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||
| train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||
| results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||
| title='Loss', ylabel='loss') | |||
| results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||
| title='Error@1', ylabel='error %') | |||
| results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||
| title='Error@5', ylabel='error %') | |||
| results.save() | |||
| def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||
| if args.gpus and len(args.gpus) > 1: | |||
| model = torch.nn.DataParallel(model, args.gpus) | |||
| batch_time = AverageMeter() | |||
| data_time = AverageMeter() | |||
| losses = AverageMeter() | |||
| top1 = AverageMeter() | |||
| top5 = AverageMeter() | |||
| end = time.time() | |||
| for i, (inputs, target) in enumerate(data_loader): | |||
| # measure data loading time | |||
| data_time.update(time.time() - end) | |||
| if args.gpus is not None: | |||
| target = target.cuda() | |||
| #import pdb; pdb.set_trace() | |||
| if criterion.__class__.__name__=='HingeLoss': | |||
| target=target.unsqueeze(1) | |||
| target_onehot = torch.cuda.FloatTensor(target.size(0), output_dim) | |||
| target_onehot.fill_(-1) | |||
| target_onehot.scatter_(1, target, 1) | |||
| target=target.squeeze() | |||
| if not training: | |||
| with torch.no_grad(): | |||
| input_var = Variable(inputs.type(args.type)) | |||
| target_var = Variable(target_onehot) | |||
| # compute output | |||
| output = model(input_var) | |||
| else: | |||
| input_var = Variable(inputs.type(args.type)) | |||
| target_var = Variable(target_onehot) | |||
| # compute output | |||
| output = model(input_var) | |||
| #import pdb; pdb.set_trace() | |||
| loss = criterion(output, target_onehot) | |||
| #import pdb; pdb.set_trace() | |||
| if type(output) is list: | |||
| output = output[0] | |||
| # measure accuracy and record loss | |||
| prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||
| losses.update(loss.item(), inputs.size(0)) | |||
| top1.update(prec1.item(), inputs.size(0)) | |||
| top5.update(prec5.item(), inputs.size(0)) | |||
| #import pdb; pdb.set_trace() | |||
| #if not training and top1.avg<15: | |||
| # import pdb; pdb.set_trace() | |||
| if training: | |||
| # compute gradient and do SGD step | |||
| optimizer.zero_grad() | |||
| #add backwoed hook | |||
| loss.backward() | |||
| for p in list(model.parameters()): | |||
| #import pdb; pdb.set_trace() | |||
| if hasattr(p,'org'): | |||
| #print('before:', p[0][0]) | |||
| #gm=max(p.grad.data.max(),-p.grad.data.min()) | |||
| #p.grad=p.grad.div(gm+1) | |||
| p.data.copy_(p.org) | |||
| #print('after:', p[0][0]) | |||
| optimizer.step() | |||
| for p in list(model.parameters()): | |||
| #import pdb; pdb.set_trace() | |||
| if hasattr(p,'org'): | |||
| #print('before:', p[0][0]) | |||
| p.org.copy_(p.data.clamp_(-1,1)) | |||
| #if epoch>30: | |||
| # import pdb; pdb.set_trace() | |||
| # measure elapsed time | |||
| batch_time.update(time.time() - end) | |||
| end = time.time() | |||
| if i % args.print_freq == 0: | |||
| logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||
| 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||
| 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||
| 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||
| 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||
| 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||
| epoch, i, len(data_loader), | |||
| phase='TRAINING' if training else 'EVALUATING', | |||
| batch_time=batch_time, | |||
| data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||
| return losses.avg, top1.avg, top5.avg | |||
| def train(data_loader, model, criterion, epoch, optimizer): | |||
| # switch to train mode | |||
| model.train() | |||
| return forward(data_loader, model, criterion, epoch, | |||
| training=True, optimizer=optimizer) | |||
| def validate(data_loader, model, criterion, epoch): | |||
| # switch to evaluate mode | |||
| model.eval() | |||
| return forward(data_loader, model, criterion, epoch, | |||
| training=False, optimizer=None) | |||
| if __name__ == '__main__': | |||
| main() | |||
| @ -0,0 +1,150 @@ | |||
| from __future__ import print_function | |||
| import argparse | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| import torch.optim as optim | |||
| from torchvision import datasets, transforms | |||
| from torch.autograd import Variable | |||
| from models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||
| from models.binarized_modules import Binarize,HingeLoss | |||
| # Training settings | |||
| parser = argparse.ArgumentParser(description='PyTorch MNIST Example') | |||
| parser.add_argument('--batch-size', type=int, default=64, metavar='N', | |||
| help='input batch size for training (default: 256)') | |||
| parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', | |||
| help='input batch size for testing (default: 1000)') | |||
| parser.add_argument('--epochs', type=int, default=100, metavar='N', | |||
| help='number of epochs to train (default: 10)') | |||
| parser.add_argument('--lr', type=float, default=0.01, metavar='LR', | |||
| help='learning rate (default: 0.001)') | |||
| parser.add_argument('--momentum', type=float, default=0.5, metavar='M', | |||
| help='SGD momentum (default: 0.5)') | |||
| parser.add_argument('--no-cuda', action='store_true', default=False, | |||
| help='disables CUDA training') | |||
| parser.add_argument('--seed', type=int, default=1, metavar='S', | |||
| help='random seed (default: 1)') | |||
| parser.add_argument('--gpus', default=3, | |||
| help='gpus used for training - e.g 0,1,3') | |||
| parser.add_argument('--log-interval', type=int, default=10, metavar='N', | |||
| help='how many batches to wait before logging training status') | |||
| args = parser.parse_args() | |||
| args.cuda = not args.no_cuda and torch.cuda.is_available() | |||
| torch.manual_seed(args.seed) | |||
| if args.cuda: | |||
| torch.cuda.manual_seed(args.seed) | |||
| kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} | |||
| train_loader = torch.utils.data.DataLoader( | |||
| datasets.MNIST('../data', train=True, download=True, | |||
| transform=transforms.Compose([ | |||
| transforms.ToTensor(), | |||
| transforms.Normalize((0.1307,), (0.3081,)) | |||
| ])), | |||
| batch_size=args.batch_size, shuffle=True, **kwargs) | |||
| test_loader = torch.utils.data.DataLoader( | |||
| datasets.MNIST('../data', train=False, transform=transforms.Compose([ | |||
| transforms.ToTensor(), | |||
| transforms.Normalize((0.1307,), (0.3081,)) | |||
| ])), | |||
| batch_size=args.test_batch_size, shuffle=True, **kwargs) | |||
| class Net(nn.Module): | |||
| def __init__(self): | |||
| super(Net, self).__init__() | |||
| self.infl_ratio=3 | |||
| self.fc1 = BinarizeLinear(784, 2048*self.infl_ratio) | |||
| self.htanh1 = nn.Hardtanh() | |||
| self.bn1 = nn.BatchNorm1d(2048*self.infl_ratio) | |||
| self.fc2 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||
| self.htanh2 = nn.Hardtanh() | |||
| self.bn2 = nn.BatchNorm1d(2048*self.infl_ratio) | |||
| self.fc3 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||
| self.htanh3 = nn.Hardtanh() | |||
| self.bn3 = nn.BatchNorm1d(2048*self.infl_ratio) | |||
| self.fc4 = nn.Linear(2048*self.infl_ratio, 10) | |||
| self.logsoftmax=nn.LogSoftmax() | |||
| self.drop=nn.Dropout(0.5) | |||
| def forward(self, x): | |||
| x = x.view(-1, 28*28) | |||
| x = self.fc1(x) | |||
| x = self.bn1(x) | |||
| x = self.htanh1(x) | |||
| x = self.fc2(x) | |||
| x = self.bn2(x) | |||
| x = self.htanh2(x) | |||
| x = self.fc3(x) | |||
| x = self.drop(x) | |||
| x = self.bn3(x) | |||
| x = self.htanh3(x) | |||
| x = self.fc4(x) | |||
| return self.logsoftmax(x) | |||
| model = Net() | |||
| if args.cuda: | |||
| torch.cuda.set_device(3) | |||
| model.cuda() | |||
| criterion = nn.CrossEntropyLoss() | |||
| optimizer = optim.Adam(model.parameters(), lr=args.lr) | |||
| def train(epoch): | |||
| model.train() | |||
| for batch_idx, (data, target) in enumerate(train_loader): | |||
| if args.cuda: | |||
| data, target = data.cuda(), target.cuda() | |||
| data, target = Variable(data), Variable(target) | |||
| optimizer.zero_grad() | |||
| output = model(data) | |||
| loss = criterion(output, target) | |||
| if epoch%40==0: | |||
| optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 | |||
| optimizer.zero_grad() | |||
| loss.backward() | |||
| for p in list(model.parameters()): | |||
| if hasattr(p,'org'): | |||
| p.data.copy_(p.org) | |||
| optimizer.step() | |||
| for p in list(model.parameters()): | |||
| if hasattr(p,'org'): | |||
| p.org.copy_(p.data.clamp_(-1,1)) | |||
| if batch_idx % args.log_interval == 0: | |||
| print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( | |||
| epoch, batch_idx * len(data), len(train_loader.dataset), | |||
| 100. * batch_idx / len(train_loader), loss.item())) | |||
| def test(): | |||
| model.eval() | |||
| test_loss = 0 | |||
| correct = 0 | |||
| with torch.no_grad(): | |||
| for data, target in test_loader: | |||
| if args.cuda: | |||
| data, target = data.cuda(), target.cuda() | |||
| data, target = Variable(data), Variable(target) | |||
| output = model(data) | |||
| test_loss += criterion(output, target).item() # sum up batch loss | |||
| pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability | |||
| correct += pred.eq(target.data.view_as(pred)).cpu().sum() | |||
| test_loss /= len(test_loader.dataset) | |||
| print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( | |||
| test_loss, correct, len(test_loader.dataset), | |||
| 100. * correct / len(test_loader.dataset))) | |||
| for epoch in range(1, args.epochs + 1): | |||
| train(epoch) | |||
| test() | |||
| if epoch%40==0: | |||
| optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 | |||
| @ -0,0 +1,6 @@ | |||
| from .alexnet import * | |||
| from .alexnet_binary import * | |||
| from .resnet import * | |||
| from .resnet_binary import * | |||
| from .vgg_cifar10_binary import * | |||
| @ -0,0 +1,78 @@ | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| __all__ = ['alexnet'] | |||
| class AlexNetOWT_BN(nn.Module): | |||
| def __init__(self, num_classes=1000): | |||
| super(AlexNetOWT_BN, self).__init__() | |||
| self.features = nn.Sequential( | |||
| nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2, | |||
| bias=False), | |||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||
| nn.BatchNorm2d(64), | |||
| nn.ReLU(inplace=True), | |||
| nn.Conv2d(64, 192, kernel_size=5, padding=2, bias=False), | |||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(192), | |||
| nn.Conv2d(192, 384, kernel_size=3, padding=1, bias=False), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(384), | |||
| nn.Conv2d(384, 256, kernel_size=3, padding=1, bias=False), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(256), | |||
| nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(256) | |||
| ) | |||
| self.classifier = nn.Sequential( | |||
| nn.Linear(256 * 6 * 6, 4096, bias=False), | |||
| nn.BatchNorm1d(4096), | |||
| nn.ReLU(inplace=True), | |||
| nn.Dropout(0.5), | |||
| nn.Linear(4096, 4096, bias=False), | |||
| nn.BatchNorm1d(4096), | |||
| nn.ReLU(inplace=True), | |||
| nn.Dropout(0.5), | |||
| nn.Linear(4096, num_classes) | |||
| ) | |||
| self.regime = { | |||
| 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||
| 'weight_decay': 5e-4, 'momentum': 0.9}, | |||
| 10: {'lr': 5e-3}, | |||
| 15: {'lr': 1e-3, 'weight_decay': 0}, | |||
| 20: {'lr': 5e-4}, | |||
| 25: {'lr': 1e-4} | |||
| } | |||
| normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||
| std=[0.229, 0.224, 0.225]) | |||
| self.input_transform = { | |||
| 'train': transforms.Compose([ | |||
| transforms.Scale(256), | |||
| transforms.RandomCrop(224), | |||
| transforms.RandomHorizontalFlip(), | |||
| transforms.ToTensor(), | |||
| normalize | |||
| ]), | |||
| 'eval': transforms.Compose([ | |||
| transforms.Scale(256), | |||
| transforms.CenterCrop(224), | |||
| transforms.ToTensor(), | |||
| normalize | |||
| ]) | |||
| } | |||
| def forward(self, x): | |||
| x = self.features(x) | |||
| x = x.view(-1, 256 * 6 * 6) | |||
| x = self.classifier(x) | |||
| return x | |||
| def alexnet(**kwargs): | |||
| num_classes = kwargs.get( 'num_classes', 1000) | |||
| return AlexNetOWT_BN(num_classes) | |||
| @ -0,0 +1,92 @@ | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||
| __all__ = ['alexnet_binary'] | |||
| class AlexNetOWT_BN(nn.Module): | |||
| def __init__(self, num_classes=1000): | |||
| super(AlexNetOWT_BN, self).__init__() | |||
| self.ratioInfl=3 | |||
| self.features = nn.Sequential( | |||
| BinarizeConv2d(3, int(64*self.ratioInfl), kernel_size=11, stride=4, padding=2), | |||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||
| nn.BatchNorm2d(int(64*self.ratioInfl)), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=5, padding=2), | |||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||
| nn.BatchNorm2d(int(192*self.ratioInfl)), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(int(192*self.ratioInfl), int(384*self.ratioInfl), kernel_size=3, padding=1), | |||
| nn.BatchNorm2d(int(384*self.ratioInfl)), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(int(384*self.ratioInfl), int(256*self.ratioInfl), kernel_size=3, padding=1), | |||
| nn.BatchNorm2d(int(256*self.ratioInfl)), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(int(256*self.ratioInfl), 256, kernel_size=3, padding=1), | |||
| nn.MaxPool2d(kernel_size=3, stride=2), | |||
| nn.BatchNorm2d(256), | |||
| nn.Hardtanh(inplace=True) | |||
| ) | |||
| self.classifier = nn.Sequential( | |||
| BinarizeLinear(256 * 6 * 6, 4096), | |||
| nn.BatchNorm1d(4096), | |||
| nn.Hardtanh(inplace=True), | |||
| #nn.Dropout(0.5), | |||
| BinarizeLinear(4096, 4096), | |||
| nn.BatchNorm1d(4096), | |||
| nn.Hardtanh(inplace=True), | |||
| #nn.Dropout(0.5), | |||
| BinarizeLinear(4096, num_classes), | |||
| nn.BatchNorm1d(1000), | |||
| nn.LogSoftmax() | |||
| ) | |||
| #self.regime = { | |||
| # 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||
| # 'weight_decay': 5e-4, 'momentum': 0.9}, | |||
| # 10: {'lr': 5e-3}, | |||
| # 15: {'lr': 1e-3, 'weight_decay': 0}, | |||
| # 20: {'lr': 5e-4}, | |||
| # 25: {'lr': 1e-4} | |||
| #} | |||
| self.regime = { | |||
| 0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||
| 20: {'lr': 1e-3}, | |||
| 30: {'lr': 5e-4}, | |||
| 35: {'lr': 1e-4}, | |||
| 40: {'lr': 1e-5} | |||
| } | |||
| normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||
| std=[0.229, 0.224, 0.225]) | |||
| self.input_transform = { | |||
| 'train': transforms.Compose([ | |||
| transforms.Scale(256), | |||
| transforms.RandomCrop(224), | |||
| transforms.RandomHorizontalFlip(), | |||
| transforms.ToTensor(), | |||
| normalize | |||
| ]), | |||
| 'eval': transforms.Compose([ | |||
| transforms.Scale(256), | |||
| transforms.CenterCrop(224), | |||
| transforms.ToTensor(), | |||
| normalize | |||
| ]) | |||
| } | |||
| def forward(self, x): | |||
| x = self.features(x) | |||
| x = x.view(-1, 256 * 6 * 6) | |||
| x = self.classifier(x) | |||
| return x | |||
| def alexnet_binary(**kwargs): | |||
| num_classes = kwargs.get( 'num_classes', 1000) | |||
| return AlexNetOWT_BN(num_classes) | |||
| @ -0,0 +1,423 @@ | |||
| import torch | |||
| import pdb | |||
| import torch.nn as nn | |||
| import math | |||
| from torch.autograd import Variable | |||
| from torch.autograd import Function | |||
| from decimal import Decimal, ROUND_HALF_UP | |||
| import numpy as np | |||
| def Binarize(tensor,quant_mode='det'): | |||
| if quant_mode=='det': | |||
| return tensor.sign() | |||
| else: | |||
| return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1) | |||
| class HingeLoss(nn.Module): | |||
| def __init__(self): | |||
| super(HingeLoss,self).__init__() | |||
| self.margin=1.0 | |||
| def hinge_loss(self,input,target): | |||
| #import pdb; pdb.set_trace() | |||
| output=self.margin-input.mul(target) | |||
| output[output.le(0)]=0 | |||
| return output.mean() | |||
| def forward(self, input, target): | |||
| return self.hinge_loss(input,target) | |||
| class SqrtHingeLossFunction(Function): | |||
| def __init__(self): | |||
| super(SqrtHingeLossFunction,self).__init__() | |||
| self.margin=1.0 | |||
| def forward(self, input, target): | |||
| output=self.margin-input.mul(target) | |||
| output[output.le(0)]=0 | |||
| self.save_for_backward(input, target) | |||
| loss=output.mul(output).sum(0).sum(1).div(target.numel()) | |||
| return loss | |||
| def backward(self,grad_output): | |||
| input, target = self.saved_tensors | |||
| output=self.margin-input.mul(target) | |||
| output[output.le(0)]=0 | |||
| import pdb; pdb.set_trace() | |||
| grad_output.resize_as_(input).copy_(target).mul_(-2).mul_(output) | |||
| grad_output.mul_(output.ne(0).float()) | |||
| grad_output.div_(input.numel()) | |||
| return grad_output,grad_output | |||
| def Quantize(tensor,quant_mode='det', params=None, numBits=8): | |||
| tensor.clamp_(-2**(numBits-1),2**(numBits-1)) | |||
| if quant_mode=='det': | |||
| tensor=tensor.mul(2**(numBits-1)).round().div(2**(numBits-1)) | |||
| else: | |||
| tensor=tensor.mul(2**(numBits-1)).round().add(torch.rand(tensor.size()).add(-0.5)).div(2**(numBits-1)) | |||
| quant_fixed(tensor, params) | |||
| return tensor | |||
| #import torch.nn._functions as tnnf | |||
| class BinarizeLinear(nn.Linear): | |||
| def __init__(self, *kargs, **kwargs): | |||
| super(BinarizeLinear, self).__init__(*kargs, **kwargs) | |||
| def forward(self, input): | |||
| # if input.size(1) != 784: | |||
| # input.data=Binarize(input.data) | |||
| if not hasattr(self.weight,'org'): | |||
| self.weight.org=self.weight.data.clone() | |||
| self.weight.data=Binarize(self.weight.org) | |||
| out = nn.functional.linear(input, self.weight) | |||
| if not self.bias is None: | |||
| self.bias.org=self.bias.data.clone() | |||
| out += self.bias.view(1, -1).expand_as(out) | |||
| return out | |||
| class BinarizeConv2d(nn.Conv2d): | |||
| def __init__(self, *kargs, **kwargs): | |||
| super(BinarizeConv2d, self).__init__(*kargs, **kwargs) | |||
| def forward(self, input): | |||
| # if input.size(1) != 3: | |||
| # input.data = Binarize(input.data) | |||
| if not hasattr(self.weight,'org'): | |||
| self.weight.org=self.weight.data.clone() | |||
| self.weight.data=Binarize(self.weight.org) | |||
| #input = torch.round(input) | |||
| #input = input*2-1 | |||
| #scale = max(torch.max(input), -torch.min(input)) / 63 | |||
| #input = torch.round(input*2 / scale) - 63 | |||
| #if scale != 0: | |||
| # input = torch.round(input / scale) | |||
| #print (torch.max(input)) | |||
| #print(input) | |||
| input = torch.round(input) | |||
| #print(input) | |||
| #print (torch.max(input)) | |||
| out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||
| self.padding, self.dilation, self.groups) | |||
| #print (torch.min(out), torch.max(out)) | |||
| #out = torch.round(out) | |||
| #print (torch.min(out), torch.max(out)) | |||
| #print (torch.min(input), torch.max(input)) | |||
| #out = torch.round(out / 64 * 36 / 64) | |||
| #print (self.weight.size()[1]) | |||
| #if self.weight.size()[1] >= 16 and self.weight.size()[1] <= 24: | |||
| if self.weight.size()[1] >= 4 and self.weight.size()[2] * self.weight.size()[3] == 9: | |||
| out = torch.round(out / 64 * 36 / 64) | |||
| elif self.weight.size()[1] == 1: | |||
| out = torch.round(out * 7 / 64) | |||
| else: | |||
| out = torch.round(out / 64) | |||
| out = out * 4 | |||
| out[out > 63] = 63 | |||
| out[out < -63] = -63 | |||
| #out = out - torch.round(torch.mean(out)) | |||
| # out = out*4 | |||
| #out[out > 63] = 63 | |||
| #out[out < -63] = -63 | |||
| #else: | |||
| # out = torch.round(out * 10 / 64) | |||
| #print (torch.min(out), torch.max(out)) | |||
| # if not self.bias is None: | |||
| # self.bias.org=self.bias.data.clone() | |||
| # out += self.bias.view(1, -1, 1, 1).expand_as(out) | |||
| return out | |||
| class IdealCimConv2d(nn.Conv2d): | |||
| def __init__(self, *kargs, **kwargs): | |||
| super(IdealCimConv2d, self).__init__(*kargs, **kwargs) | |||
| def forward(self, input): | |||
| # if input.size(1) != 3: | |||
| # input.data = Binarize(input.data) | |||
| if not hasattr(self.weight,'org'): | |||
| self.weight.org=self.weight.data.clone() | |||
| self.weight.data=Binarize(self.weight.org) | |||
| #input = torch.round(input) | |||
| #input = input*2-1 | |||
| #scale = max(torch.max(input), -torch.min(input)) / 63 | |||
| #input = torch.round(input*2 / scale) - 63 | |||
| #if scale != 0: | |||
| # input = torch.round(input / scale) | |||
| #print (torch.max(input)) | |||
| #print(input) | |||
| input = torch.round(input) | |||
| #print(input) | |||
| #print (torch.max(input)) | |||
| out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||
| self.padding, self.dilation, self.groups) | |||
| out = out / 64 | |||
| out = out * 4 | |||
| out[out > 63] = 63 | |||
| out[out < -63] = -63 | |||
| return out | |||
| device = 'cuda:0' | |||
| ''' | |||
| H = [1024, 512] | |||
| sim_model = torch.nn.Sequential( | |||
| torch.nn.Linear(36, H[0]), | |||
| torch.nn.Dropout(p=0.5), | |||
| torch.nn.ReLU(), | |||
| torch.nn.Linear(H[0], H[1]), | |||
| torch.nn.Dropout(p=0.5), | |||
| torch.nn.ReLU(), | |||
| torch.nn.Linear(H[-1], 1), | |||
| ) | |||
| sim_model.load_state_dict(torch.load('model_error.ckpt', map_location=torch.device('cuda:0'))) | |||
| sim_model = sim_model.to(device) | |||
| sim_model.eval() | |||
| ''' | |||
| class CimSimConv2d(nn.Conv2d): | |||
| def __init__(self, *kargs, **kwargs): | |||
| super(CimSimConv2d, self).__init__(*kargs, **kwargs) | |||
| self.device = device | |||
| def forward(self, input): | |||
| if not hasattr(self.weight,'org'): | |||
| self.weight.org=self.weight.data.clone() | |||
| self.weight.data=Binarize(self.weight.org) | |||
| #scale = max(torch.max(input), -torch.min(input)) / 63 | |||
| #if scale != 0: | |||
| # input = torch.round(input / scale) | |||
| #''' random error | |||
| #out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||
| # self.padding, self.dilation, self.groups) | |||
| #out = torch.round(out / 64 * 36 / 64) | |||
| #randrange = (self.weight.size()[1] // 4) | |||
| #for _ in range(randrange): | |||
| # out += torch.randint(-1, 1, out.size(), device=device) | |||
| #out[out>63] = 63 | |||
| #out[out<-63] -63 | |||
| #''' | |||
| input = torch.round(input) | |||
| out2 = self.simconv(input, self.weight) | |||
| ''' | |||
| if torch.max(out2) < 32: | |||
| out2 = out2 * 2 | |||
| if torch.max(out2) < 32: | |||
| out2 = out2 * 2 | |||
| if torch.max(out2) < 32: | |||
| out2 = out2 * 2 | |||
| ''' | |||
| out2 = out2 * 4 | |||
| out2[out2 > 63] = 63 | |||
| out2[out2 < -63] = -63 | |||
| #print (self.weight.data.size()) | |||
| #print (torch.max(out2), torch.min(out2)) | |||
| #print (torch.max(out-out2), torch.min(out-out2)) | |||
| #out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||
| # self.padding, self.dilation, self.groups) | |||
| #print(input.size(), self.weight.size(), out.size()) | |||
| #if not self.bias is None: | |||
| # self.bias.org=self.bias.data.clone() | |||
| # out += self.bias.view(1, -1, 1, 1).expand_as(out) | |||
| return out2 | |||
| def simconv(self, input_a, weight): | |||
| #print(input_a.size(), weight.size()) | |||
| batch_size = input_a.size()[0] | |||
| out_channel = weight.size()[0] | |||
| out_width = input_a.size()[2] - 2 * (weight.size()[2] // 2) | |||
| out_height = input_a.size()[3] - 2 * (weight.size()[3] // 2) | |||
| simout = torch.zeros(batch_size, out_channel, out_width, out_height, dtype = input_a.dtype).to(device) | |||
| first = True | |||
| #''' Mapping Table | |||
| if weight.size()[2] == 7: | |||
| kernel_group = 1 | |||
| else: | |||
| kernel_group = 4 | |||
| Digital_input_split = torch.split(input_a, kernel_group, dim=1) | |||
| binary_weight_split = torch.split(weight, kernel_group, dim=1) | |||
| for i in range(len(Digital_input_split)): | |||
| temp_output = nn.functional.conv2d(Digital_input_split[i], binary_weight_split[i], None, self.stride, self.padding, self.dilation, self.groups) | |||
| #temp_output = torch.round(temp_output / 64 * 36 / 64) | |||
| temp_output = torch.round(temp_output / 64) | |||
| temp_output = Mapping.apply(temp_output) | |||
| simout += temp_output + 2 | |||
| #print (torch.max(simout), torch.min(simout)) | |||
| #''' | |||
| ''' Error model | |||
| for n in range(batch_size): | |||
| for c in range(out_channel): | |||
| w = torch.reshape(weight[c], (-1,)).to(device) | |||
| inputs = [] | |||
| for i in range(out_width): | |||
| for j in range(out_height): | |||
| input = torch.reshape(input_a[n, :, i: i + weight.size()[2], j: j + weight.size()[3]], (-1,)) | |||
| #print (w.size(), input.size()) | |||
| # simout[n][c][i][j] = sum(w*input) | |||
| # TODO | |||
| simout[n][c][i][j] = self.cim_conv_tmp(input, w) | |||
| #''' | |||
| #print (len(input)) | |||
| #print (simout.size()) | |||
| # out = nn.functional.conv2d(input_a, weight) | |||
| return simout | |||
| def cim_conv_tmp(self, input, weight): | |||
| assert len(input) == len(weight) | |||
| raw_sum = 0 | |||
| if len(weight) == 3: | |||
| for i in range((len(input)-1) // 36 + 1): | |||
| data_x = input[i*36:i*36+36] * weight[i*36:i*36+36] | |||
| row = int(Decimal(float(sum(data_x)/64.0)).quantize(0, ROUND_HALF_UP)) | |||
| #''' Error model | |||
| if len(data_x) < 36: | |||
| data_x = torch.cat((data_x, torch.zeros(36 - len(data_x), dtype=data_x.dtype))) | |||
| try: | |||
| #ensor_x = torch.Tensor(data_x).to(self.device) | |||
| tensor_x = data_x.to(device) | |||
| except: | |||
| print (data_x, len()) | |||
| y_pred = sim_model(tensor_x) | |||
| if int(y_pred[0]) > 10: | |||
| adjust = 10 | |||
| elif int(y_pred[0]) < -10: | |||
| adjust = -10 | |||
| else: | |||
| adjust = int(y_pred[0]) | |||
| #print (tensor_x, y_pred) | |||
| raw_sum += (row + adjust + 2) | |||
| #''' | |||
| #if row in self.mappingTable: | |||
| # row = self.mappingTable[row] | |||
| #raw_sum += row | |||
| #raw_sum += row | |||
| else: | |||
| for i in range((len(input)-1) // 49 + 1): | |||
| data_x = input[i*49:i*49+49] * weight[i*49:i*49+49] | |||
| row = int(Decimal(float(sum(data_x)/64.0)).quantize(0, ROUND_HALF_UP)) | |||
| #''' Error model | |||
| if len(data_x) < 49: | |||
| data_x = torch.cat((data_x, torch.zeros(49 - len(data_x), dtype=data_x.dtype))) | |||
| try: | |||
| #ensor_x = torch.Tensor(data_x).to(self.device) | |||
| tensor_x = data_x.to(device) | |||
| except: | |||
| print (data_x, len()) | |||
| y_pred = sim_model(tensor_x) | |||
| if int(y_pred[0]) > 10: | |||
| adjust = 10 | |||
| elif int(y_pred[0]) < -10: | |||
| adjust = -10 | |||
| else: | |||
| adjust = int(y_pred[0]) | |||
| #print (tensor_x, y_pred) | |||
| raw_sum += (row + adjust + 2) | |||
| #print (raw_sum) | |||
| return raw_sum | |||
| class Mapping(torch.autograd.Function): | |||
| @staticmethod | |||
| def forward(ctx, input): | |||
| output = input.clone() | |||
| output[input==-1] = -4 | |||
| output[input==-2] = -5 | |||
| output[input==-3] = -6 | |||
| output[input==-4] = -7 | |||
| output[input==-5] = -9 | |||
| output[input==-6] = -9 | |||
| output[input==-7] = -11 | |||
| output[input==-8] = -11 | |||
| output[input==-9] = -13 | |||
| output[input==-10] = -13 | |||
| output[input==-11] = -17 | |||
| output[input==-12] = -17 | |||
| output[input==-13] = -17 | |||
| output[input==-14] = -19 | |||
| output[input==-15] = -19 | |||
| output[input==-16] = -21 | |||
| output[input==-17] = -21 | |||
| output[input==-18] = -23 | |||
| output[input==-19] = -25 | |||
| output[input==-20] = -25 | |||
| output[input==-21] = -25 | |||
| output[input==-22] = -25 | |||
| output[input==-23] = -27 | |||
| output[input==-24] = -27 | |||
| output[input==-25] = -29 | |||
| output[input==-26] = -29 | |||
| output[input==-27] = -29 | |||
| output[input==-28] = -31 | |||
| output[input==-29] = -31 | |||
| output[input==-30] = -33 | |||
| output[input==-31] = -33 | |||
| output[input==-32] = -35 | |||
| output[input==-33] = -35 | |||
| output[input==-34] = -35 | |||
| #output[input==-35] = -35 | |||
| output[input==0] = -2 | |||
| output[input==1] = -1 | |||
| output[input==2] = 1 | |||
| output[input==3] = 2 | |||
| #output[input==4] = 4 | |||
| output[input==5] = 4 | |||
| #output[input==6] = 6 | |||
| output[input==7] = 8 | |||
| #output[input==8] = 8 | |||
| output[input==9] = 10 | |||
| #output[input==10] = 10 | |||
| output[input==11] = 12 | |||
| #output[input==12] = 12 | |||
| output[input==13] = 16 | |||
| output[input==14] = 16 | |||
| output[input==15] = 16 | |||
| #output[input==16] = 16 | |||
| output[input==17] = 18 | |||
| output[input==18] = 20 | |||
| output[input==19] = 20 | |||
| output[input==20] = 24 | |||
| output[input==21] = 24 | |||
| output[input==22] = 24 | |||
| output[input==23] = 26 | |||
| output[input==24] = 26 | |||
| output[input==25] = 28 | |||
| output[input==26] = 28 | |||
| output[input==27] = 28 | |||
| output[input==28] = 30 | |||
| output[input==29] = 30 | |||
| output[input==30] = 32 | |||
| output[input==31] = 32 | |||
| output[input==32] = 34 | |||
| output[input==33] = 34 | |||
| output[input==34] = 34 | |||
| output[input==35] = 34 | |||
| return output | |||
| def backward(ctx, grad_output): | |||
| return grad_output | |||
| @ -0,0 +1,217 @@ | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| import math | |||
| __all__ = ['resnet'] | |||
| def conv3x3(in_planes, out_planes, stride=1): | |||
| "3x3 convolution with padding" | |||
| return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| def init_model(model): | |||
| for m in model.modules(): | |||
| if isinstance(m, nn.Conv2d): | |||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||
| m.weight.data.normal_(0, math.sqrt(2. / n)) | |||
| elif isinstance(m, nn.BatchNorm2d): | |||
| m.weight.data.fill_(1) | |||
| m.bias.data.zero_() | |||
| class BasicBlock(nn.Module): | |||
| expansion = 1 | |||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
| super(BasicBlock, self).__init__() | |||
| self.conv1 = conv3x3(inplanes, planes, stride) | |||
| self.bn1 = nn.BatchNorm2d(planes) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.conv2 = conv3x3(planes, planes) | |||
| self.bn2 = nn.BatchNorm2d(planes) | |||
| self.downsample = downsample | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| residual = x | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.relu(out) | |||
| out = self.conv2(out) | |||
| out = self.bn2(out) | |||
| if self.downsample is not None: | |||
| residual = self.downsample(x) | |||
| out += residual | |||
| out = self.relu(out) | |||
| return out | |||
| class Bottleneck(nn.Module): | |||
| expansion = 4 | |||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
| super(Bottleneck, self).__init__() | |||
| self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) | |||
| self.bn1 = nn.BatchNorm2d(planes) | |||
| self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| self.bn2 = nn.BatchNorm2d(planes) | |||
| self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) | |||
| self.bn3 = nn.BatchNorm2d(planes * 4) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.downsample = downsample | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| residual = x | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.relu(out) | |||
| out = self.conv2(out) | |||
| out = self.bn2(out) | |||
| out = self.relu(out) | |||
| out = self.conv3(out) | |||
| out = self.bn3(out) | |||
| if self.downsample is not None: | |||
| residual = self.downsample(x) | |||
| out += residual | |||
| out = self.relu(out) | |||
| return out | |||
| class ResNet(nn.Module): | |||
| def __init__(self): | |||
| super(ResNet, self).__init__() | |||
| def _make_layer(self, block, planes, blocks, stride=1): | |||
| downsample = None | |||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||
| downsample = nn.Sequential( | |||
| nn.Conv2d(self.inplanes, planes * block.expansion, | |||
| kernel_size=1, stride=stride, bias=False), | |||
| nn.BatchNorm2d(planes * block.expansion), | |||
| ) | |||
| layers = [] | |||
| layers.append(block(self.inplanes, planes, stride, downsample)) | |||
| self.inplanes = planes * block.expansion | |||
| for i in range(1, blocks): | |||
| layers.append(block(self.inplanes, planes)) | |||
| return nn.Sequential(*layers) | |||
| def forward(self, x): | |||
| x = self.conv1(x) | |||
| x = self.bn1(x) | |||
| x = self.relu(x) | |||
| x = self.maxpool(x) | |||
| x = self.layer1(x) | |||
| x = self.layer2(x) | |||
| x = self.layer3(x) | |||
| x = self.layer4(x) | |||
| x = self.avgpool(x) | |||
| x = x.view(x.size(0), -1) | |||
| x = self.fc(x) | |||
| return x | |||
| class ResNet_imagenet(ResNet): | |||
| def __init__(self, num_classes=1000, | |||
| block=Bottleneck, layers=[3, 4, 23, 3]): | |||
| super(ResNet_imagenet, self).__init__() | |||
| self.inplanes = 64 | |||
| self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||
| bias=False) | |||
| self.bn1 = nn.BatchNorm2d(64) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||
| self.layer1 = self._make_layer(block, 64, layers[0]) | |||
| self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||
| self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||
| self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||
| self.avgpool = nn.AvgPool2d(7) | |||
| self.fc = nn.Linear(512 * block.expansion, num_classes) | |||
| init_model(self) | |||
| self.regime = { | |||
| 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
| 'weight_decay': 1e-4, 'momentum': 0.9}, | |||
| 30: {'lr': 1e-2}, | |||
| 60: {'lr': 1e-3, 'weight_decay': 0}, | |||
| 90: {'lr': 1e-4} | |||
| } | |||
| class ResNet_cifar10(ResNet): | |||
| def __init__(self, num_classes=10, | |||
| block=BasicBlock, depth=18): | |||
| super(ResNet_cifar10, self).__init__() | |||
| self.inplanes = 16 | |||
| n = int((depth - 2) / 6) | |||
| self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, | |||
| bias=False) | |||
| self.bn1 = nn.BatchNorm2d(16) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.maxpool = lambda x: x | |||
| self.layer1 = self._make_layer(block, 16, n) | |||
| self.layer2 = self._make_layer(block, 32, n, stride=2) | |||
| self.layer3 = self._make_layer(block, 64, n, stride=2) | |||
| self.layer4 = lambda x: x | |||
| self.avgpool = nn.AvgPool2d(8) | |||
| self.fc = nn.Linear(64, num_classes) | |||
| init_model(self) | |||
| self.regime = { | |||
| 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
| 'weight_decay': 1e-4, 'momentum': 0.9}, | |||
| 81: {'lr': 1e-2}, | |||
| 122: {'lr': 1e-3, 'weight_decay': 0}, | |||
| 164: {'lr': 1e-4} | |||
| } | |||
| def resnet(**kwargs): | |||
| num_classes, depth, dataset = map( | |||
| kwargs.get, ['num_classes', 'depth', 'dataset']) | |||
| if dataset == 'imagenet': | |||
| num_classes = num_classes or 1000 | |||
| depth = depth or 50 | |||
| if depth == 18: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=BasicBlock, layers=[2, 2, 2, 2]) | |||
| if depth == 34: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=BasicBlock, layers=[3, 4, 6, 3]) | |||
| if depth == 50: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=Bottleneck, layers=[3, 4, 6, 3]) | |||
| if depth == 101: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=Bottleneck, layers=[3, 4, 23, 3]) | |||
| if depth == 152: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=Bottleneck, layers=[3, 8, 36, 3]) | |||
| elif dataset == 'cifar10': | |||
| num_classes = num_classes or 10 | |||
| depth = depth or 18 #56 | |||
| return ResNet_cifar10(num_classes=num_classes, | |||
| block=BasicBlock, depth=depth) | |||
| @ -0,0 +1,248 @@ | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| import math | |||
| from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||
| __all__ = ['resnet_binary'] | |||
| def Binaryconv3x3(in_planes, out_planes, stride=1): | |||
| "3x3 convolution with padding" | |||
| return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| def conv3x3(in_planes, out_planes, stride=1): | |||
| "3x3 convolution with padding" | |||
| return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| def init_model(model): | |||
| for m in model.modules(): | |||
| if isinstance(m, BinarizeConv2d): | |||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||
| m.weight.data.normal_(0, math.sqrt(2. / n)) | |||
| elif isinstance(m, nn.BatchNorm2d): | |||
| m.weight.data.fill_(1) | |||
| m.bias.data.zero_() | |||
| class BasicBlock(nn.Module): | |||
| expansion = 1 | |||
| def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||
| super(BasicBlock, self).__init__() | |||
| self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||
| self.bn1 = nn.BatchNorm2d(planes) | |||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||
| self.conv2 = Binaryconv3x3(planes, planes) | |||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||
| self.bn2 = nn.BatchNorm2d(planes) | |||
| self.downsample = downsample | |||
| self.do_bntan=do_bntan; | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| residual = x.clone() | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.tanh1(out) | |||
| out = self.conv2(out) | |||
| if self.downsample is not None: | |||
| if residual.data.max()>1: | |||
| import pdb; pdb.set_trace() | |||
| residual = self.downsample(residual) | |||
| out += residual | |||
| if self.do_bntan: | |||
| out = self.bn2(out) | |||
| out = self.tanh2(out) | |||
| return out | |||
| class Bottleneck(nn.Module): | |||
| expansion = 4 | |||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
| super(Bottleneck, self).__init__() | |||
| self.conv1 = BinarizeConv2d(inplanes, planes, kernel_size=1, bias=False) | |||
| self.bn1 = nn.BatchNorm2d(planes) | |||
| self.conv2 = BinarizeConv2d(planes, planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| self.bn2 = nn.BatchNorm2d(planes) | |||
| self.conv3 = BinarizeConv2d(planes, planes * 4, kernel_size=1, bias=False) | |||
| self.bn3 = nn.BatchNorm2d(planes * 4) | |||
| self.tanh = nn.Hardtanh(inplace=True) | |||
| self.downsample = downsample | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| residual = x | |||
| import pdb; pdb.set_trace() | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.tanh(out) | |||
| out = self.conv2(out) | |||
| out = self.bn2(out) | |||
| out = self.tanh(out) | |||
| out = self.conv3(out) | |||
| out = self.bn3(out) | |||
| if self.downsample is not None: | |||
| residual = self.downsample(x) | |||
| out += residual | |||
| if self.do_bntan: | |||
| out = self.bn2(out) | |||
| out = self.tanh2(out) | |||
| return out | |||
| class ResNet(nn.Module): | |||
| def __init__(self): | |||
| super(ResNet, self).__init__() | |||
| def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||
| downsample = None | |||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||
| downsample = nn.Sequential( | |||
| BinarizeConv2d(self.inplanes, planes * block.expansion, | |||
| kernel_size=1, stride=stride, bias=False), | |||
| nn.BatchNorm2d(planes * block.expansion), | |||
| ) | |||
| layers = [] | |||
| layers.append(block(self.inplanes, planes, stride, downsample)) | |||
| self.inplanes = planes * block.expansion | |||
| for i in range(1, blocks-1): | |||
| layers.append(block(self.inplanes, planes)) | |||
| layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||
| return nn.Sequential(*layers) | |||
| def forward(self, x): | |||
| x = self.conv1(x) | |||
| x = self.maxpool(x) | |||
| x = self.bn1(x) | |||
| x = self.tanh1(x) | |||
| x = self.layer1(x) | |||
| x = self.layer2(x) | |||
| x = self.layer3(x) | |||
| x = self.layer4(x) | |||
| x = self.avgpool(x) | |||
| x = x.view(x.size(0), -1) | |||
| x = self.bn2(x) | |||
| x = self.tanh2(x) | |||
| x = self.fc(x) | |||
| x = self.bn3(x) | |||
| x = self.logsoftmax(x) | |||
| return x | |||
| class ResNet_imagenet(ResNet): | |||
| def __init__(self, num_classes=1000, | |||
| block=Bottleneck, layers=[3, 4, 23, 3]): | |||
| super(ResNet_imagenet, self).__init__() | |||
| self.inplanes = 64 | |||
| self.conv1 = BinarizeConv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||
| bias=False) | |||
| self.bn1 = nn.BatchNorm2d(64) | |||
| self.tanh = nn.Hardtanh(inplace=True) | |||
| self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||
| self.layer1 = self._make_layer(block, 64, layers[0]) | |||
| self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||
| self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||
| self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||
| self.avgpool = nn.AvgPool2d(7) | |||
| self.fc = BinarizeLinear(512 * block.expansion, num_classes) | |||
| init_model(self) | |||
| self.regime = { | |||
| 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
| 'weight_decay': 1e-4, 'momentum': 0.9}, | |||
| 30: {'lr': 1e-2}, | |||
| 60: {'lr': 1e-3, 'weight_decay': 0}, | |||
| 90: {'lr': 1e-4} | |||
| } | |||
| class ResNet_cifar10(ResNet): | |||
| def __init__(self, num_classes=10, | |||
| block=BasicBlock, depth=18): | |||
| super(ResNet_cifar10, self).__init__() | |||
| self.inflate = 5 | |||
| self.inplanes = 16*self.inflate | |||
| n = int((depth - 2) / 6) | |||
| self.conv1 = BinarizeConv2d(3, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||
| bias=False) | |||
| self.maxpool = lambda x: x | |||
| self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||
| self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||
| self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||
| self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||
| self.layer4 = lambda x: x | |||
| self.avgpool = nn.AvgPool2d(8) | |||
| self.bn2 = nn.BatchNorm1d(64*self.inflate) | |||
| self.bn3 = nn.BatchNorm1d(10) | |||
| self.logsoftmax = nn.LogSoftmax() | |||
| self.fc = BinarizeLinear(64*self.inflate, num_classes) | |||
| init_model(self) | |||
| #self.regime = { | |||
| # 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
| # 'weight_decay': 1e-4, 'momentum': 0.9}, | |||
| # 81: {'lr': 1e-4}, | |||
| # 122: {'lr': 1e-5, 'weight_decay': 0}, | |||
| # 164: {'lr': 1e-6} | |||
| #} | |||
| self.regime = { | |||
| 0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||
| 101: {'lr': 1e-3}, | |||
| 142: {'lr': 5e-4}, | |||
| 184: {'lr': 1e-4}, | |||
| 220: {'lr': 1e-5} | |||
| } | |||
| def resnet_binary(**kwargs): | |||
| num_classes, depth, dataset = map( | |||
| kwargs.get, ['num_classes', 'depth', 'dataset']) | |||
| if dataset == 'imagenet': | |||
| num_classes = num_classes or 1000 | |||
| depth = depth or 50 | |||
| if depth == 18: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=BasicBlock, layers=[2, 2, 2, 2]) | |||
| if depth == 34: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=BasicBlock, layers=[3, 4, 6, 3]) | |||
| if depth == 50: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=Bottleneck, layers=[3, 4, 6, 3]) | |||
| if depth == 101: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=Bottleneck, layers=[3, 4, 23, 3]) | |||
| if depth == 152: | |||
| return ResNet_imagenet(num_classes=num_classes, | |||
| block=Bottleneck, layers=[3, 8, 36, 3]) | |||
| elif dataset == 'cifar10': | |||
| num_classes = num_classes or 10 | |||
| depth = depth or 18 | |||
| return ResNet_cifar10(num_classes=num_classes, | |||
| block=BasicBlock, depth=depth) | |||
| @ -0,0 +1,69 @@ | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| class AlexNetOWT_BN(nn.Module): | |||
| def __init__(self, num_classes=1000): | |||
| super(AlexNetOWT_BN, self).__init__() | |||
| self.features = nn.Sequential( | |||
| nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, | |||
| bias=False), | |||
| nn.BatchNorm2d(128), | |||
| nn.ReLU(inplace=True), | |||
| nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), | |||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(128), | |||
| nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=False), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(256), | |||
| nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(256), | |||
| nn.Conv2d(256, 512, kernel_size=3, padding=1, bias=False), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(512), | |||
| nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=False), | |||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||
| nn.ReLU(inplace=True), | |||
| nn.BatchNorm2d(512), | |||
| ) | |||
| self.classifier = nn.Sequential( | |||
| nn.Linear(512 * 4 * 4, 1024, bias=False), | |||
| nn.BatchNorm1d(1024), | |||
| nn.ReLU(inplace=True), | |||
| nn.Dropout(0.5), | |||
| nn.Linear(1024, 1024, bias=False), | |||
| nn.BatchNorm1d(1024), | |||
| nn.ReLU(inplace=True), | |||
| nn.Dropout(0.5), | |||
| nn.Linear(1024, num_classes) | |||
| nn.LogSoftMax() | |||
| ) | |||
| self.regime = { | |||
| 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||
| 'weight_decay': 5e-4, 'momentum': 0.9}, | |||
| 10: {'lr': 5e-3}, | |||
| 15: {'lr': 1e-3, 'weight_decay': 0}, | |||
| 20: {'lr': 5e-4}, | |||
| 25: {'lr': 1e-4} | |||
| } | |||
| def forward(self, x): | |||
| x = self.features(x) | |||
| x = x.view(-1, 512 * 4 * 4) | |||
| x = self.classifier(x) | |||
| return x | |||
| def model(**kwargs): | |||
| num_classes = kwargs.get( 'num_classes', 1000) | |||
| return AlexNetOWT_BN(num_classes) | |||
| @ -0,0 +1,80 @@ | |||
| import torch | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| from torch.autograd import Function | |||
| from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||
| class VGG_Cifar10(nn.Module): | |||
| def __init__(self, num_classes=1000): | |||
| super(VGG_Cifar10, self).__init__() | |||
| self.infl_ratio=3; | |||
| self.features = nn.Sequential( | |||
| BinarizeConv2d(3, 128*self.infl_ratio, kernel_size=3, stride=1, padding=1, | |||
| bias=True), | |||
| nn.BatchNorm2d(128*self.infl_ratio), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(128*self.infl_ratio, 128*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||
| nn.BatchNorm2d(128*self.infl_ratio), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(128*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
| nn.BatchNorm2d(256*self.infl_ratio), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(256*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||
| nn.BatchNorm2d(256*self.infl_ratio), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(256*self.infl_ratio, 512*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
| nn.BatchNorm2d(512*self.infl_ratio), | |||
| nn.Hardtanh(inplace=True), | |||
| BinarizeConv2d(512*self.infl_ratio, 512, kernel_size=3, padding=1, bias=True), | |||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||
| nn.BatchNorm2d(512), | |||
| nn.Hardtanh(inplace=True) | |||
| ) | |||
| self.classifier = nn.Sequential( | |||
| BinarizeLinear(512 * 4 * 4, 1024, bias=True), | |||
| nn.BatchNorm1d(1024), | |||
| nn.Hardtanh(inplace=True), | |||
| #nn.Dropout(0.5), | |||
| BinarizeLinear(1024, 1024, bias=True), | |||
| nn.BatchNorm1d(1024), | |||
| nn.Hardtanh(inplace=True), | |||
| #nn.Dropout(0.5), | |||
| BinarizeLinear(1024, num_classes, bias=True), | |||
| nn.BatchNorm1d(num_classes, affine=False), | |||
| nn.LogSoftmax() | |||
| ) | |||
| self.regime = { | |||
| 0: {'optimizer': 'Adam', 'betas': (0.9, 0.999),'lr': 5e-3}, | |||
| 40: {'lr': 1e-3}, | |||
| 80: {'lr': 5e-4}, | |||
| 100: {'lr': 1e-4}, | |||
| 120: {'lr': 5e-5}, | |||
| 140: {'lr': 1e-5} | |||
| } | |||
| def forward(self, x): | |||
| x = self.features(x) | |||
| x = x.view(-1, 512 * 4 * 4) | |||
| x = self.classifier(x) | |||
| return x | |||
| def vgg_cifar10_binary(**kwargs): | |||
| num_classes = kwargs.get( 'num_classes', 10) | |||
| return VGG_Cifar10(num_classes) | |||
| @ -0,0 +1,198 @@ | |||
| import torch | |||
| import torchvision.transforms as transforms | |||
| import random | |||
| __imagenet_stats = {'mean': [0.485, 0.456, 0.406], | |||
| 'std': [0.229, 0.224, 0.225]} | |||
| __imagenet_pca = { | |||
| 'eigval': torch.Tensor([0.2175, 0.0188, 0.0045]), | |||
| 'eigvec': torch.Tensor([ | |||
| [-0.5675, 0.7192, 0.4009], | |||
| [-0.5808, -0.0045, -0.8140], | |||
| [-0.5836, -0.6948, 0.4203], | |||
| ]) | |||
| } | |||
| def scale_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||
| t_list = [ | |||
| transforms.CenterCrop(input_size), | |||
| transforms.ToTensor(), | |||
| transforms.Normalize(**normalize), | |||
| ] | |||
| if scale_size != input_size: | |||
| t_list = [transforms.Scale(scale_size)] + t_list | |||
| return transforms.Compose(t_list) | |||
| def scale_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||
| t_list = [ | |||
| transforms.RandomCrop(input_size), | |||
| transforms.ToTensor(), | |||
| transforms.Normalize(**normalize), | |||
| ] | |||
| if scale_size != input_size: | |||
| t_list = [transforms.Scale(scale_size)] + t_list | |||
| transforms.Compose(t_list) | |||
| def pad_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||
| padding = int((scale_size - input_size) / 2) | |||
| return transforms.Compose([ | |||
| transforms.RandomCrop(input_size, padding=padding), | |||
| transforms.RandomHorizontalFlip(), | |||
| transforms.ToTensor(), | |||
| transforms.Normalize(**normalize), | |||
| ]) | |||
| def inception_preproccess(input_size, normalize=__imagenet_stats): | |||
| return transforms.Compose([ | |||
| transforms.RandomSizedCrop(input_size), | |||
| transforms.RandomHorizontalFlip(), | |||
| transforms.ToTensor(), | |||
| transforms.Normalize(**normalize) | |||
| ]) | |||
| def inception_color_preproccess(input_size, normalize=__imagenet_stats): | |||
| return transforms.Compose([ | |||
| transforms.RandomSizedCrop(input_size), | |||
| transforms.RandomHorizontalFlip(), | |||
| transforms.ToTensor(), | |||
| ColorJitter( | |||
| brightness=0.4, | |||
| contrast=0.4, | |||
| saturation=0.4, | |||
| ), | |||
| Lighting(0.1, __imagenet_pca['eigval'], __imagenet_pca['eigvec']), | |||
| transforms.Normalize(**normalize) | |||
| ]) | |||
| def get_transform(name='imagenet', input_size=None, | |||
| scale_size=None, normalize=None, augment=True): | |||
| normalize = normalize or __imagenet_stats | |||
| if name == 'imagenet': | |||
| scale_size = scale_size or 256 | |||
| input_size = input_size or 224 | |||
| if augment: | |||
| return inception_preproccess(input_size, normalize=normalize) | |||
| else: | |||
| return scale_crop(input_size=input_size, | |||
| scale_size=scale_size, normalize=normalize) | |||
| elif 'cifar' in name: | |||
| input_size = input_size or 32 | |||
| if augment: | |||
| scale_size = scale_size or 40 | |||
| return pad_random_crop(input_size, scale_size=scale_size, | |||
| normalize=normalize) | |||
| else: | |||
| scale_size = scale_size or 32 | |||
| return scale_crop(input_size=input_size, | |||
| scale_size=scale_size, normalize=normalize) | |||
| elif name == 'mnist': | |||
| normalize = {'mean': [0.5], 'std': [0.5]} | |||
| input_size = input_size or 28 | |||
| if augment: | |||
| scale_size = scale_size or 32 | |||
| return pad_random_crop(input_size, scale_size=scale_size, | |||
| normalize=normalize) | |||
| else: | |||
| scale_size = scale_size or 32 | |||
| return scale_crop(input_size=input_size, | |||
| scale_size=scale_size, normalize=normalize) | |||
| class Lighting(object): | |||
| """Lighting noise(AlexNet - style PCA - based noise)""" | |||
| def __init__(self, alphastd, eigval, eigvec): | |||
| self.alphastd = alphastd | |||
| self.eigval = eigval | |||
| self.eigvec = eigvec | |||
| def __call__(self, img): | |||
| if self.alphastd == 0: | |||
| return img | |||
| alpha = img.new().resize_(3).normal_(0, self.alphastd) | |||
| rgb = self.eigvec.type_as(img).clone()\ | |||
| .mul(alpha.view(1, 3).expand(3, 3))\ | |||
| .mul(self.eigval.view(1, 3).expand(3, 3))\ | |||
| .sum(1).squeeze() | |||
| return img.add(rgb.view(3, 1, 1).expand_as(img)) | |||
| class Grayscale(object): | |||
| def __call__(self, img): | |||
| gs = img.clone() | |||
| gs[0].mul_(0.299).add_(0.587, gs[1]).add_(0.114, gs[2]) | |||
| gs[1].copy_(gs[0]) | |||
| gs[2].copy_(gs[0]) | |||
| return gs | |||
| class Saturation(object): | |||
| def __init__(self, var): | |||
| self.var = var | |||
| def __call__(self, img): | |||
| gs = Grayscale()(img) | |||
| alpha = random.uniform(0, self.var) | |||
| return img.lerp(gs, alpha) | |||
| class Brightness(object): | |||
| def __init__(self, var): | |||
| self.var = var | |||
| def __call__(self, img): | |||
| gs = img.new().resize_as_(img).zero_() | |||
| alpha = random.uniform(0, self.var) | |||
| return img.lerp(gs, alpha) | |||
| class Contrast(object): | |||
| def __init__(self, var): | |||
| self.var = var | |||
| def __call__(self, img): | |||
| gs = Grayscale()(img) | |||
| gs.fill_(gs.mean()) | |||
| alpha = random.uniform(0, self.var) | |||
| return img.lerp(gs, alpha) | |||
| class RandomOrder(object): | |||
| """ Composes several transforms together in random order. | |||
| """ | |||
| def __init__(self, transforms): | |||
| self.transforms = transforms | |||
| def __call__(self, img): | |||
| if self.transforms is None: | |||
| return img | |||
| order = torch.randperm(len(self.transforms)) | |||
| for i in order: | |||
| img = self.transforms[i](img) | |||
| return img | |||
| class ColorJitter(RandomOrder): | |||
| def __init__(self, brightness=0.4, contrast=0.4, saturation=0.4): | |||
| self.transforms = [] | |||
| if brightness != 0: | |||
| self.transforms.append(Brightness(brightness)) | |||
| if contrast != 0: | |||
| self.transforms.append(Contrast(contrast)) | |||
| if saturation != 0: | |||
| self.transforms.append(Saturation(saturation)) | |||
| @ -0,0 +1,5 @@ | |||
| 2021-04-15 15:36:47 - INFO - saving to ./results/2021-04-15_15-36-47 | |||
| 2021-04-15 15:36:47 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-36-47', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||
| 2021-04-15 15:36:47 - INFO - creating model alexnet | |||
| 2021-04-15 15:36:48 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||
| 2021-04-15 15:36:48 - INFO - number of parameters: 61110184 | |||
| @ -0,0 +1,5 @@ | |||
| 2021-04-15 15:37:52 - INFO - saving to ./results/2021-04-15_15-37-52 | |||
| 2021-04-15 15:37:52 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='resnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-37-52', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||
| 2021-04-15 15:37:52 - INFO - creating model resnet | |||
| 2021-04-15 15:37:52 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||
| 2021-04-15 15:37:52 - INFO - number of parameters: 25557032 | |||
| @ -0,0 +1,5 @@ | |||
| 2021-04-15 15:38:16 - INFO - saving to ./results/2021-04-15_15-38-16 | |||
| 2021-04-15 15:38:16 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-38-16', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||
| 2021-04-15 15:38:16 - INFO - creating model alexnet | |||
| 2021-04-15 15:38:17 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||
| 2021-04-15 15:38:17 - INFO - number of parameters: 61110184 | |||
| @ -0,0 +1,160 @@ | |||
| import os | |||
| import torch | |||
| import logging.config | |||
| import shutil | |||
| import pandas as pd | |||
| from bokeh.io import output_file, save, show | |||
| from bokeh.plotting import figure | |||
| from bokeh.layouts import column | |||
| #from bokeh.charts import Line, defaults | |||
| # | |||
| #defaults.width = 800 | |||
| #defaults.height = 400 | |||
| #defaults.tools = 'pan,box_zoom,wheel_zoom,box_select,hover,resize,reset,save' | |||
| def setup_logging(log_file='log.txt'): | |||
| """Setup logging configuration | |||
| """ | |||
| logging.basicConfig(level=logging.DEBUG, | |||
| format="%(asctime)s - %(levelname)s - %(message)s", | |||
| datefmt="%Y-%m-%d %H:%M:%S", | |||
| filename=log_file, | |||
| filemode='w') | |||
| console = logging.StreamHandler() | |||
| console.setLevel(logging.INFO) | |||
| formatter = logging.Formatter('%(message)s') | |||
| console.setFormatter(formatter) | |||
| logging.getLogger('').addHandler(console) | |||
| class ResultsLog(object): | |||
| def __init__(self, path='results.csv', plot_path=None): | |||
| self.path = path | |||
| self.plot_path = plot_path or (self.path + '.html') | |||
| self.figures = [] | |||
| self.results = None | |||
| def add(self, **kwargs): | |||
| df = pd.DataFrame([kwargs.values()], columns=kwargs.keys()) | |||
| if self.results is None: | |||
| self.results = df | |||
| else: | |||
| self.results = self.results.append(df, ignore_index=True) | |||
| def save(self, title='Training Results'): | |||
| if len(self.figures) > 0: | |||
| if os.path.isfile(self.plot_path): | |||
| os.remove(self.plot_path) | |||
| output_file(self.plot_path, title=title) | |||
| plot = column(*self.figures) | |||
| save(plot) | |||
| self.figures = [] | |||
| self.results.to_csv(self.path, index=False, index_label=False) | |||
| def load(self, path=None): | |||
| path = path or self.path | |||
| if os.path.isfile(path): | |||
| self.results.read_csv(path) | |||
| def show(self): | |||
| if len(self.figures) > 0: | |||
| plot = column(*self.figures) | |||
| show(plot) | |||
| #def plot(self, *kargs, **kwargs): | |||
| # line = Line(data=self.results, *kargs, **kwargs) | |||
| # self.figures.append(line) | |||
| def image(self, *kargs, **kwargs): | |||
| fig = figure() | |||
| fig.image(*kargs, **kwargs) | |||
| self.figures.append(fig) | |||
| def save_checkpoint(state, is_best, path='.', filename='checkpoint.pth.tar', save_all=False): | |||
| filename = os.path.join(path, filename) | |||
| torch.save(state, filename) | |||
| if is_best: | |||
| shutil.copyfile(filename, os.path.join(path, 'model_best.pth.tar')) | |||
| if save_all: | |||
| shutil.copyfile(filename, os.path.join( | |||
| path, 'checkpoint_epoch_%s.pth.tar' % state['epoch'])) | |||
| class AverageMeter(object): | |||
| """Computes and stores the average and current value""" | |||
| def __init__(self): | |||
| self.reset() | |||
| def reset(self): | |||
| self.val = 0 | |||
| self.avg = 0 | |||
| self.sum = 0 | |||
| self.count = 0 | |||
| def update(self, val, n=1): | |||
| self.val = val | |||
| self.sum += val * n | |||
| self.count += n | |||
| self.avg = self.sum / self.count | |||
| __optimizers = { | |||
| 'SGD': torch.optim.SGD, | |||
| 'ASGD': torch.optim.ASGD, | |||
| 'Adam': torch.optim.Adam, | |||
| 'Adamax': torch.optim.Adamax, | |||
| 'Adagrad': torch.optim.Adagrad, | |||
| 'Adadelta': torch.optim.Adadelta, | |||
| 'Rprop': torch.optim.Rprop, | |||
| 'RMSprop': torch.optim.RMSprop | |||
| } | |||
| def adjust_optimizer(optimizer, epoch, config): | |||
| """Reconfigures the optimizer according to epoch and config dict""" | |||
| def modify_optimizer(optimizer, setting): | |||
| if 'optimizer' in setting: | |||
| optimizer = __optimizers[setting['optimizer']]( | |||
| optimizer.param_groups) | |||
| logging.debug('OPTIMIZER - setting method = %s' % | |||
| setting['optimizer']) | |||
| for param_group in optimizer.param_groups: | |||
| for key in param_group.keys(): | |||
| if key in setting: | |||
| logging.debug('OPTIMIZER - setting %s = %s' % | |||
| (key, setting[key])) | |||
| param_group[key] = setting[key] | |||
| return optimizer | |||
| if callable(config): | |||
| optimizer = modify_optimizer(optimizer, config(epoch)) | |||
| else: | |||
| for e in range(epoch + 1): # run over all epochs - sticky setting | |||
| if e in config: | |||
| optimizer = modify_optimizer(optimizer, config[e]) | |||
| return optimizer | |||
| def accuracy(output, target, topk=(1,)): | |||
| """Computes the precision@k for the specified values of k""" | |||
| maxk = max(topk) | |||
| batch_size = target.size(0) | |||
| _, pred = output.float().topk(maxk, 1, True, True) | |||
| pred = pred.t() | |||
| correct = pred.eq(target.view(1, -1).expand_as(pred)) | |||
| res = [] | |||
| for k in topk: | |||
| correct_k = correct[:k].view(-1).float().sum(0) | |||
| res.append(correct_k.mul_(100.0 / batch_size)) | |||
| return res | |||
| # kernel_img = model.features[0][0].kernel.data.clone() | |||
| # kernel_img.add_(-kernel_img.min()) | |||
| # kernel_img.mul_(255 / kernel_img.max()) | |||
| # save_image(kernel_img, 'kernel%s.jpg' % epoch) | |||
| @ -0,0 +1,154 @@ | |||
| import torch | |||
| import numpy as np | |||
| import cv2, os, sys | |||
| import pandas as pd | |||
| from torch.utils.data import Dataset | |||
| from matplotlib import pyplot as plt | |||
| from torch.utils.data import ConcatDataset, DataLoader, Subset | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| from torchvision.datasets import DatasetFolder | |||
| from PIL import Image | |||
| import torchvision.models as models | |||
| batch_size = 32 | |||
| num_epoch = 10 | |||
| train_tfm = transforms.Compose([ | |||
| transforms.Grayscale(), | |||
| transforms.RandomResizedCrop((40,30)), | |||
| transforms.Resize((40, 30)), | |||
| transforms.ToTensor(), | |||
| #transforms.TenCrop((40,30)), | |||
| #transforms.Normalize(0.5,0.5), | |||
| ]) | |||
| test_tfm = transforms.Compose([ | |||
| transforms.Grayscale(), | |||
| transforms.Resize((40, 30)), | |||
| transforms.ToTensor() | |||
| ]) | |||
| ''' | |||
| class Classifier(nn.Module): | |||
| def __init__(self): | |||
| super(Classifier, self).__init__() | |||
| self.cnn_layers = nn.Sequential( | |||
| #input_size(1,30,40) | |||
| nn.Conv2d(1, 16, 3, 1), #output_size(16,28,38) | |||
| nn.BatchNorm2d(16), | |||
| nn.ReLU(), | |||
| nn.Dropout(0.2), | |||
| nn.MaxPool2d(kernel_size = 2), #output_size(16,14,19) | |||
| nn.Conv2d(16, 24, 3, 1), #output_size(24,12,17) | |||
| nn.BatchNorm2d(24), | |||
| nn.ReLU(), | |||
| nn.Dropout(0.2), | |||
| nn.MaxPool2d(kernel_size = 2), #output_size(24,6,8) | |||
| nn.Conv2d(24, 32, 3, 1), #output_size(32,4,6) | |||
| nn.BatchNorm2d(32), | |||
| nn.ReLU(), | |||
| nn.Dropout(0.2), | |||
| nn.MaxPool2d(kernel_size = 2) #ouput_size(32,2,3) | |||
| ) | |||
| self.fc_layers = nn.Sequential( | |||
| nn.Linear(32 * 2 * 3, 32), | |||
| nn.ReLU(), | |||
| nn.Dropout(0.2), | |||
| nn.Linear(32,8) | |||
| ) | |||
| def forward(self, x): | |||
| x = self.cnn_layers(x) | |||
| x = x.flatten(1) | |||
| x = self.fc_layers(x) | |||
| return x | |||
| ''' | |||
| def main(): | |||
| train_set = DatasetFolder("./dataset/data_0705/lepton/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||
| test_set = DatasetFolder("./dataset/data_0705/lepton/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||
| train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||
| test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||
| model = models.resnet18() | |||
| model.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=3, | |||
| bias=False) | |||
| model.fc = nn.Linear(512, 3) | |||
| model = model.to(device) | |||
| print(model) | |||
| optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||
| criterion = nn.CrossEntropyLoss() | |||
| for epoch in range(num_epoch): | |||
| ##Training | |||
| running_loss = 0.0 | |||
| total = 0 | |||
| correct = 0 | |||
| for i, data in enumerate(train_loader): | |||
| inputs, labels = data | |||
| inputs = inputs.to(device) | |||
| labels = labels.to(device) | |||
| optimizer.zero_grad() | |||
| outputs = model(inputs) | |||
| loss = criterion(outputs, labels) | |||
| loss.backward() | |||
| optimizer.step() | |||
| running_loss += loss.item() | |||
| total += labels.size(0) | |||
| _,predicted = torch.max(outputs.data,1) | |||
| #print(predicted) | |||
| #print("label",labels) | |||
| correct += (predicted == labels).sum().item() | |||
| train_acc = correct / total | |||
| print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||
| ##Testing | |||
| model.eval() | |||
| with torch.no_grad(): | |||
| correct = 0 | |||
| total = 0 | |||
| for i, data in enumerate(test_loader): | |||
| inputs, labels = data | |||
| inputs = inputs.to(device) | |||
| labels = labels.to(device) | |||
| outputs = model(inputs) | |||
| _,predicted = torch.max(outputs.data,1) | |||
| total += labels.size(0) | |||
| correct += (predicted == labels).sum().item() | |||
| #print(predicted) | |||
| #print("labels:",labels) | |||
| print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||
| if __name__ == '__main__': | |||
| main() | |||
| @ -0,0 +1,211 @@ | |||
| import torch | |||
| import numpy as np | |||
| import cv2, os, sys | |||
| import pandas as pd | |||
| from torch.utils.data import Dataset | |||
| from matplotlib import pyplot as plt | |||
| from torch.utils.data import ConcatDataset, DataLoader, Subset | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| from torchvision.datasets import DatasetFolder | |||
| from PIL import Image | |||
| import torchvision.models | |||
| import BinaryNetpytorch.models as models | |||
| from BinaryNetpytorch.models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||
| batch_size = 32 | |||
| num_epoch = 10 | |||
| train_tfm = transforms.Compose([ | |||
| # transforms.RandomHorizontalFlip(), | |||
| # transforms.RandomResizedCrop((40,30)), | |||
| transforms.Grayscale(), | |||
| transforms.Resize((40, 30)), | |||
| transforms.ToTensor(), | |||
| #transforms.RandomResizedCrop((40,30)), | |||
| #transforms.TenCrop((40,30)), | |||
| # transforms.Normalize(0.5,0.5), | |||
| ]) | |||
| test_tfm = transforms.Compose([ | |||
| transforms.Grayscale(), | |||
| transforms.Resize((40, 30)), | |||
| transforms.ToTensor() | |||
| ]) | |||
| def Binaryconv3x3(in_planes, out_planes, stride=1): | |||
| "3x3 convolution with padding" | |||
| return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| def conv3x3(in_planes, out_planes, stride=1): | |||
| "3x3 convolution with padding" | |||
| return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| class BasicBlock(nn.Module): | |||
| expansion = 1 | |||
| def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||
| super(BasicBlock, self).__init__() | |||
| self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||
| self.bn1 = nn.BatchNorm2d(planes) | |||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||
| self.conv2 = Binaryconv3x3(planes, planes) | |||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||
| self.bn2 = nn.BatchNorm2d(planes) | |||
| self.downsample = downsample | |||
| self.do_bntan=do_bntan | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| residual = x.clone() | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.tanh1(out) | |||
| out = self.conv2(out) | |||
| if self.downsample is not None: | |||
| if residual.data.max()>1: | |||
| import pdb; pdb.set_trace() | |||
| residual = self.downsample(residual) | |||
| out += residual | |||
| if self.do_bntan: | |||
| out = self.bn2(out) | |||
| out = self.tanh2(out) | |||
| return out | |||
| class ResNet(nn.Module): | |||
| def __init__(self): | |||
| super(ResNet, self).__init__() | |||
| def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||
| downsample = None | |||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||
| downsample = nn.Sequential( | |||
| BinarizeConv2d(self.inplanes, planes * block.expansion, | |||
| kernel_size=1, stride=stride, bias=False), | |||
| nn.BatchNorm2d(planes * block.expansion), | |||
| ) | |||
| layers = [] | |||
| layers.append(block(self.inplanes, planes, stride, downsample)) | |||
| self.inplanes = planes * block.expansion | |||
| for i in range(1, blocks-1): | |||
| layers.append(block(self.inplanes, planes)) | |||
| layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||
| return nn.Sequential(*layers) | |||
| def forward(self, x): | |||
| x = self.conv1(x) | |||
| x = self.maxpool(x) | |||
| x = self.bn1(x) | |||
| x = self.tanh1(x) | |||
| x = self.layer1(x) | |||
| x = self.layer2(x) | |||
| x = self.layer3(x) | |||
| x = self.layer4(x) | |||
| x = self.avgpool(x) | |||
| x = x.view(x.size(0), -1) | |||
| x = self.bn2(x) | |||
| x = self.tanh2(x) | |||
| x = self.fc(x) | |||
| x = self.bn3(x) | |||
| x = self.logsoftmax(x) | |||
| return x | |||
| class ResNet_cifar10(ResNet): | |||
| def __init__(self, num_classes=3, | |||
| block=BasicBlock, depth=18): | |||
| super(ResNet_cifar10, self).__init__() | |||
| self.inflate = 5 | |||
| self.inplanes = 16*self.inflate | |||
| n = int((depth - 2) / 6) | |||
| self.conv1 = BinarizeConv2d(1, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||
| bias=False) | |||
| self.maxpool = lambda x: x | |||
| self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||
| self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||
| self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||
| self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||
| self.layer4 = lambda x: x | |||
| self.avgpool = nn.AvgPool2d(8) | |||
| self.bn2 = nn.BatchNorm1d(64*self.inflate) | |||
| self.bn3 = nn.BatchNorm1d(3) | |||
| self.logsoftmax = nn.LogSoftmax() | |||
| self.fc = BinarizeLinear(64*self.inflate, 3) | |||
| def main(): | |||
| train_set = DatasetFolder("pose_data/training/labeled", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||
| test_set = DatasetFolder("pose_data/testing", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||
| train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||
| test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||
| model = ResNet_cifar10(num_classes=3,block=BasicBlock,depth=18) | |||
| model = model.to(device) | |||
| print(model) | |||
| optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||
| criterion = nn.CrossEntropyLoss() | |||
| for epoch in range(num_epoch): | |||
| running_loss = 0.0 | |||
| total = 0 | |||
| correct = 0 | |||
| for i, data in enumerate(train_loader): | |||
| inputs, labels = data | |||
| inputs = inputs.to(device) | |||
| labels = labels.to(device) | |||
| optimizer.zero_grad() | |||
| outputs = model(inputs) | |||
| loss = criterion(outputs, labels) | |||
| loss.backward() | |||
| optimizer.step() | |||
| running_loss += loss.item() | |||
| total += labels.size(0) | |||
| _,predicted = torch.max(outputs.data,1) | |||
| #print(predicted) | |||
| #print("label",labels) | |||
| correct += (predicted == labels).sum().item() | |||
| train_acc = correct / total | |||
| print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||
| model.eval() | |||
| with torch.no_grad(): | |||
| correct = 0 | |||
| total = 0 | |||
| for i, data in enumerate(test_loader): | |||
| inputs, labels = data | |||
| inputs = inputs.to(device) | |||
| labels = labels.to(device) | |||
| outputs = model(inputs) | |||
| _,predicted = torch.max(outputs.data,1) | |||
| total += labels.size(0) | |||
| correct += (predicted == labels).sum().item() | |||
| #print(predicted) | |||
| #print("labels:",labels) | |||
| print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||
| if __name__ == '__main__': | |||
| main() | |||
| @ -0,0 +1,193 @@ | |||
| import torch | |||
| import numpy as np | |||
| import cv2, os, sys | |||
| import pandas as pd | |||
| from torch.utils.data import Dataset | |||
| from matplotlib import pyplot as plt | |||
| from torch.utils.data import ConcatDataset, DataLoader, Subset | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| from torchvision.datasets import DatasetFolder | |||
| from PIL import Image | |||
| import torchvision.models as models | |||
| batch_size = 32 | |||
| num_epoch = 1 | |||
| torch.cuda.set_device(1) | |||
| train_tfm = transforms.Compose([ | |||
| transforms.Grayscale(), | |||
| transforms.RandomHorizontalFlip(), | |||
| transforms.RandomResizedCrop((68,68)), | |||
| transforms.ToTensor(), | |||
| #transforms.RandomResizedCrop((40,30)), | |||
| #transforms.TenCrop((40,30)), | |||
| #transforms.Normalize(0.5,0.5), | |||
| ]) | |||
| test_tfm = transforms.Compose([ | |||
| transforms.Grayscale(), | |||
| transforms.ToTensor() | |||
| ]) | |||
| ''' | |||
| class Classifier(nn.Module): | |||
| def __init__(self): | |||
| super(Classifier, self).__init__() | |||
| self.cnn_layers = nn.Sequential( | |||
| #input_size(1,30,40) | |||
| nn.Conv2d(1, 16, 3, 1), #output_size(16,28,38) | |||
| nn.BatchNorm2d(16), | |||
| nn.ReLU(), | |||
| nn.Dropout(0.2), | |||
| nn.MaxPool2d(kernel_size = 2), #output_size(16,14,19) | |||
| nn.Conv2d(16, 24, 3, 1), #output_size(24,12,17) | |||
| nn.BatchNorm2d(24), | |||
| nn.ReLU(), | |||
| nn.Dropout(0.2), | |||
| nn.MaxPool2d(kernel_size = 2), #output_size(24,6,8) | |||
| nn.Conv2d(24, 32, 3, 1), #output_size(32,4,6) | |||
| nn.BatchNorm2d(32), | |||
| nn.ReLU(), | |||
| nn.Dropout(0.2), | |||
| nn.MaxPool2d(kernel_size = 2) #ouput_size(32,2,3) | |||
| ) | |||
| self.fc_layers = nn.Sequential( | |||
| nn.Linear(32 * 2 * 3, 32), | |||
| nn.ReLU(), | |||
| nn.Dropout(0.2), | |||
| nn.Linear(32,8) | |||
| ) | |||
| def forward(self, x): | |||
| x = self.cnn_layers(x) | |||
| x = x.flatten(1) | |||
| x = self.fc_layers(x) | |||
| return x | |||
| ''' | |||
| def main(): | |||
| train_set = DatasetFolder("pose_data2/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||
| test_set = DatasetFolder("pose_data2/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||
| valid_set = DatasetFolder("pose_data2/val", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||
| train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||
| test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||
| valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True) | |||
| model_path = "model.ckpt" | |||
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||
| model = models.resnet50() | |||
| model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, | |||
| bias=False) | |||
| model.fc = nn.Linear(2048, 8) | |||
| model = model.to(device) | |||
| print(model) | |||
| optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||
| criterion = nn.CrossEntropyLoss() | |||
| best_acc = -1 | |||
| for epoch in range(num_epoch): | |||
| ##Training | |||
| running_loss = 0.0 | |||
| total = 0 | |||
| correct = 0 | |||
| for i, data in enumerate(train_loader): | |||
| inputs, labels = data | |||
| inputs = inputs.to(device) | |||
| labels = labels.to(device) | |||
| optimizer.zero_grad() | |||
| outputs = model(inputs) | |||
| loss = criterion(outputs, labels) | |||
| loss.backward() | |||
| optimizer.step() | |||
| running_loss += loss.item() | |||
| total += labels.size(0) | |||
| _,predicted = torch.max(outputs.data,1) | |||
| #print(predicted) | |||
| #print("label",labels) | |||
| correct += (predicted == labels).sum().item() | |||
| train_acc = correct / total | |||
| print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||
| ##Validation | |||
| model.eval() | |||
| valid_loss = 0.0 | |||
| total = 0 | |||
| correct = 0 | |||
| for i, data in enumerate(valid_loader): | |||
| inputs, labels = data | |||
| inputs = inputs.to(device) | |||
| labels = labels.to(device) | |||
| with torch.no_grad(): | |||
| outputs = model(inputs) | |||
| loss = criterion(outputs, labels) | |||
| running_loss += loss.item() | |||
| total += labels.size(0) | |||
| _,predicted = torch.max(outputs.data,1) | |||
| correct += (predicted == labels).sum().item() | |||
| valid_acc = correct / total | |||
| print(f"[ Valid | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {valid_acc:.5f}") | |||
| if valid_acc > best_acc: | |||
| best_acc = valid_acc | |||
| torch.save(model.state_dict(), model_path) | |||
| print('saving model with acc {:.3f}'.format(valid_acc)) | |||
| ##Testing | |||
| model = models.resnet50() | |||
| model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, | |||
| bias=False) | |||
| model.fc = nn.Linear(2048, 8) | |||
| model = model.to(device) | |||
| model.load_state_dict(torch.load(model_path)) | |||
| model.eval() | |||
| with torch.no_grad(): | |||
| correct = 0 | |||
| total = 0 | |||
| for i, data in enumerate(test_loader): | |||
| inputs, labels = data | |||
| inputs = inputs.to(device) | |||
| labels = labels.to(device) | |||
| outputs = model(inputs) | |||
| _,predicted = torch.max(outputs.data,1) | |||
| total += labels.size(0) | |||
| correct += (predicted == labels).sum().item() | |||
| # for k in range(batch_size): | |||
| # if predicted[k] != labels[k]: | |||
| # print(inputs[k]) | |||
| #print(predicted) | |||
| #print("labels:",labels) | |||
| print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||
| if __name__ == '__main__': | |||
| main() | |||
| @ -0,0 +1,285 @@ | |||
| import torch | |||
| import numpy as np | |||
| import cv2, os, sys | |||
| import pandas as pd | |||
| from torch.utils.data import Dataset | |||
| from matplotlib import pyplot as plt | |||
| from torch.utils.data import ConcatDataset, DataLoader, Subset | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| from torchvision.datasets import DatasetFolder | |||
| from PIL import Image | |||
| import torchvision.models | |||
| import BinaryNetpytorch.models as models | |||
| from BinaryNetpytorch.models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||
| import progressbar | |||
| import seaborn as sns | |||
| batch_size = 32 | |||
| num_epoch = 60 | |||
| torch.cuda.set_device(1) | |||
| train_tfm = transforms.Compose([ | |||
| # transforms.RandomHorizontalFlip(), | |||
| # transforms.RandomResizedCrop((40,30)), | |||
| transforms.Grayscale(), | |||
| transforms.Resize((68, 68)), | |||
| transforms.ToTensor(), | |||
| #transforms.RandomResizedCrop((40,30)), | |||
| #transforms.TenCrop((40,30)), | |||
| # transforms.Normalize(0.5,0.5), | |||
| ]) | |||
| test_tfm = transforms.Compose([ | |||
| transforms.Grayscale(), | |||
| transforms.Resize((68, 68)), | |||
| transforms.ToTensor() | |||
| ]) | |||
| def Quantize(img): | |||
| scaler = torch.div(img, 0.0078125, rounding_mode="floor") | |||
| scaler_t1 = scaler * 0.0078125 | |||
| scaler_t2 = (scaler + 1) * 0.0078125 | |||
| img = torch.where(abs(img - scaler_t1) < abs(img -scaler_t2), scaler_t1 , scaler_t2) | |||
| return img | |||
| # bar = progressbar.ProgressBar(maxval=img.size(0)*img.size(2)*img.size(3), \ | |||
| # widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) | |||
| # bar.start() | |||
| # for p in range(img.size(0)): | |||
| # for i in range(img.size(2)): | |||
| # for j in range(img.size(3)): | |||
| # scaler = int(img[p][0][i][j] / 0.0078125) | |||
| # t1 = scaler * 0.0078125 | |||
| # t2 = (scaler + 1) * 0.0078125 | |||
| # if(abs(img[p][0][i][j] - t1) < abs(img[p][0][i][j] - t2)): | |||
| # img[p][0][i][j] = t1 | |||
| # else: | |||
| # img[p][0][i][j] = t2 | |||
| # bar.finish() | |||
| # return img | |||
| def Binaryconv3x3(in_planes, out_planes, stride=1): | |||
| "3x3 convolution with padding" | |||
| return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| def conv3x3(in_planes, out_planes, stride=1): | |||
| "3x3 convolution with padding" | |||
| return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| class BasicBlock(nn.Module): | |||
| expansion = 1 | |||
| def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||
| super(BasicBlock, self).__init__() | |||
| self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||
| self.bn1 = nn.BatchNorm2d(planes) | |||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||
| self.conv2 = Binaryconv3x3(planes, planes) | |||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||
| self.bn2 = nn.BatchNorm2d(planes) | |||
| self.downsample = downsample | |||
| self.do_bntan=do_bntan | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| residual = x.clone() | |||
| x = Quantize(x) | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.tanh1(out) | |||
| out = Quantize(out) | |||
| out = self.conv2(out) | |||
| if self.downsample is not None: | |||
| if residual.data.max()>1: | |||
| import pdb; pdb.set_trace() | |||
| residual = self.downsample(residual) | |||
| out += residual | |||
| if self.do_bntan: | |||
| out = self.bn2(out) | |||
| out = self.tanh2(out) | |||
| return out | |||
| class ResNet(nn.Module): | |||
| def __init__(self): | |||
| super(ResNet, self).__init__() | |||
| def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||
| downsample = None | |||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||
| downsample = nn.Sequential( | |||
| BinarizeConv2d(self.inplanes, planes * block.expansion, | |||
| kernel_size=1, stride=stride, bias=False), | |||
| nn.BatchNorm2d(planes * block.expansion), | |||
| ) | |||
| layers = [] | |||
| layers.append(block(self.inplanes, planes, stride, downsample)) | |||
| self.inplanes = planes * block.expansion | |||
| for i in range(1, blocks-1): | |||
| layers.append(block(self.inplanes, planes)) | |||
| layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||
| return nn.Sequential(*layers) | |||
| def forward(self, x): | |||
| x = Quantize(x) | |||
| x = self.conv1(x) | |||
| x = self.maxpool(x) | |||
| x = self.bn1(x) | |||
| x = self.tanh1(x) | |||
| x = self.layer1(x) | |||
| x = self.layer2(x) | |||
| x = self.layer3(x) | |||
| x = self.layer4(x) | |||
| x = self.avgpool(x) | |||
| x = x.view(x.size(0), -1) | |||
| x = self.bn2(x) | |||
| x = self.tanh2(x) | |||
| #print(x.size()) | |||
| x = x.view(32,1280,1,1) | |||
| x = self.fc(x) | |||
| x = x.view(x.size(0), -1) | |||
| x = self.bn3(x) | |||
| x = self.logsoftmax(x) | |||
| return x | |||
| class ResNet_cifar10(ResNet): | |||
| def __init__(self, num_classes=8, | |||
| block=BasicBlock, depth=18): | |||
| super(ResNet_cifar10, self).__init__() | |||
| self.inflate = 5 | |||
| self.inplanes = 16*self.inflate | |||
| n = int((depth - 2) / 6) | |||
| self.conv1 = BinarizeConv2d(1, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||
| bias=False) | |||
| self.maxpool = lambda x: x | |||
| self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||
| self.tanh1 = nn.Hardtanh(inplace=True) | |||
| self.tanh2 = nn.Hardtanh(inplace=True) | |||
| self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||
| self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||
| self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||
| self.layer4 = lambda x: x | |||
| self.avgpool = nn.AvgPool2d(8) | |||
| self.bn2 = nn.BatchNorm1d(256*self.inflate) | |||
| self.bn3 = nn.BatchNorm1d(8) | |||
| self.logsoftmax = nn.LogSoftmax() | |||
| #self.fc = BinarizeLinear(256*self.inflate, 8) | |||
| self.fc = BinarizeConv2d(256*self.inflate, 8, kernel_size=1) | |||
| def main(): | |||
| train_set = DatasetFolder("pose_data2/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||
| test_set = DatasetFolder("pose_data2/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||
| train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||
| test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||
| model = ResNet_cifar10(num_classes=8,block=BasicBlock,depth=18) | |||
| model = model.to(device) | |||
| print(model) | |||
| optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||
| criterion = nn.CrossEntropyLoss() | |||
| model_path = "model.ckpt" | |||
| for epoch in range(num_epoch): | |||
| running_loss = 0.0 | |||
| total = 0 | |||
| correct = 0 | |||
| for i, data in enumerate(train_loader): | |||
| inputs, labels = data | |||
| inputs = inputs.to(device) | |||
| labels = labels.to(device) | |||
| optimizer.zero_grad() | |||
| outputs = model(inputs) | |||
| loss = criterion(outputs, labels) | |||
| loss.backward() | |||
| optimizer.step() | |||
| running_loss += loss.item() | |||
| total += labels.size(0) | |||
| _,predicted = torch.max(outputs.data,1) | |||
| #print(predicted) | |||
| #print("label",labels) | |||
| correct += (predicted == labels).sum().item() | |||
| train_acc = correct / total | |||
| print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||
| torch.save(model.state_dict(), model_path) | |||
| model = ResNet_cifar10(num_classes=8,block=BasicBlock,depth=18) | |||
| model = model.to(device) | |||
| model.load_state_dict(torch.load(model_path)) | |||
| model.eval() | |||
| with torch.no_grad(): | |||
| correct = 0 | |||
| total = 0 | |||
| correct_2 = 0 | |||
| stat = np.zeros((8,8)) | |||
| for i, data in enumerate(test_loader): | |||
| inputs, labels = data | |||
| inputs = inputs.to(device) | |||
| labels = labels.to(device) | |||
| outputs = model(inputs) | |||
| _,predicted = torch.max(outputs.data,1) | |||
| total += labels.size(0) | |||
| correct += (predicted == labels).sum().item() | |||
| for b in range(batch_size): | |||
| if predicted[b] == 0 or predicted[b] == 1 or predicted[b] == 2 or predicted[b] == 3: | |||
| if labels[b] == 0 or labels[b] == 1 or labels[b] == 2 or labels[b] == 3: | |||
| correct_2 += 1 | |||
| else: | |||
| if labels[b] == 4 or labels[b] == 5 or labels[b] == 6 or labels[b] == 7: | |||
| correct_2 += 1 | |||
| for k in range(batch_size): | |||
| if predicted[k] != labels[k]: | |||
| img = inputs[k].mul(255).byte() | |||
| img = img.cpu().numpy().squeeze(0) | |||
| img = np.moveaxis(img, 0, -1) | |||
| predict = predicted[k].cpu().numpy() | |||
| label = labels[k].cpu().numpy() | |||
| path = "test_result/predict:"+str(predict)+"_labels:"+str(label)+".jpg" | |||
| stat[int(label)][int(predict)] += 1 | |||
| cv2.imwrite(path,img) | |||
| print(stat) | |||
| ax = sns.heatmap(stat, linewidth=0.5) | |||
| plt.xlabel('Prediction') | |||
| plt.ylabel('Label') | |||
| plt.savefig('heatmap.jpg') | |||
| #print(predicted) | |||
| #print("labels:",labels) | |||
| print('Test_2clasee Accuracy:{} %'.format((correct_2 / total) * 100)) | |||
| print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||
| if __name__ == '__main__': | |||
| main() | |||
| @ -0,0 +1,207 @@ | |||
| import torch | |||
| import numpy as np | |||
| import cv2, os, sys | |||
| from torch.utils.data import Dataset | |||
| from matplotlib import pyplot as plt | |||
| from torch.utils.data import ConcatDataset, DataLoader, Subset | |||
| import torch.nn as nn | |||
| import torchvision.transforms as transforms | |||
| from torchvision.datasets import DatasetFolder | |||
| from PIL import Image | |||
| from BinaryNetpytorch.models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||
| from BinaryNetpytorch.models.binarized_modules import Binarize,HingeLoss | |||
| import seaborn as sns | |||
| import random | |||
| batch_size = 8 | |||
| num_epoch = 10 | |||
| seed = 777 | |||
| torch.manual_seed(seed) | |||
| torch.cuda.manual_seed(seed) | |||
| torch.cuda.manual_seed_all(seed) | |||
| np.random.seed(seed) | |||
| random.seed(seed) | |||
| torch.backends.cudnn.benchmark = False | |||
| torch.backends.cudnn.deterministic = True | |||
| train_tfm = transforms.Compose([ | |||
| #transforms.Grayscale(), | |||
| #transforms.RandomHorizontalFlip(), | |||
| #transforms.RandomResizedCrop((40,30)), | |||
| #transforms.RandomCrop((40,30)), | |||
| #transforms.RandomHorizontalFlip(), | |||
| transforms.ToTensor(), | |||
| #transforms.RandomResizedCrop((40,30)), | |||
| #transforms.TenCrop((40,30)), | |||
| #transforms.Normalize(0.5,0.5), | |||
| ]) | |||
| test_tfm = transforms.Compose([ | |||
| #transforms.Grayscale(), | |||
| transforms.ToTensor() | |||
| ]) | |||
| class Classifier(nn.Module): | |||
| def __init__(self): | |||
| super(Classifier, self).__init__() | |||
| self.cnn_layers = nn.Sequential( | |||
| # BinarizeConv2d(in_channels=1, out_channels=128, kernel_size=9, padding=9//2, bias=False), | |||
| # nn.BatchNorm2d(128), | |||
| # nn.ReLU(), | |||
| # BinarizeConv2d(in_channels=128, out_channels=64, kernel_size=1, padding=1//2, bias=False), | |||
| # nn.BatchNorm2d(64), | |||
| #input_size(1,30,40) | |||
| BinarizeConv2d(1, 128, 3, 1), #output_size(16,28,38) | |||
| nn.BatchNorm2d(128), | |||
| nn.ReLU(), | |||
| #nn.Dropout(0.2), | |||
| nn.MaxPool2d(kernel_size = 2), #output_size(16,14,19) | |||
| BinarizeConv2d(128, 64, 3, 1), #output_size(24,12,17) | |||
| nn.BatchNorm2d(64), | |||
| nn.ReLU(), | |||
| #nn.Dropout(0.2), | |||
| nn.MaxPool2d(kernel_size = 2), #output_size(24,6,8) | |||
| BinarizeConv2d(64, 32, 3, 1), #output_size(32,4,6) | |||
| nn.BatchNorm2d(32), | |||
| nn.ReLU(), | |||
| #nn.Dropout(0.2), | |||
| nn.MaxPool2d(kernel_size = 2), #ouput_size(32,2,3) | |||
| #nn.LogSoftmax(), | |||
| BinarizeConv2d(32, 3, (3,2), 1) #ouput_size(4,2,3) without max :(32,24,34) | |||
| ) | |||
| def forward(self, x): | |||
| x = self.cnn_layers(x) | |||
| #x = x.flatten(1) | |||
| #x = self.fc_layers(x) | |||
| #print(x.shape) | |||
| x = x.view(x.size(0), -1) | |||
| #print(x.shape) | |||
| #x = nn.LogSoftmax(x) | |||
| #print(x) | |||
| return x | |||
| def main(): | |||
| train_set = DatasetFolder("./dataset/data_0711/grideye/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||
| test_set = DatasetFolder("./dataset/data_0711/grideye/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||
| val_set = DatasetFolder("./dataset/data_0711/grideye/train", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||
| train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||
| test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||
| val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True) | |||
| save_path = 'models.ckpt' | |||
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||
| model = Classifier().to(device) | |||
| optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) | |||
| criterion = nn.CrossEntropyLoss() | |||
| best_accuracy = 0.0 | |||
| for epoch in range(num_epoch): | |||
| running_loss = 0.0 | |||
| total = 0 | |||
| correct = 0 | |||
| for i, data in enumerate(train_loader): | |||
| inputs, labels = data | |||
| inputs = inputs.to(device) | |||
| labels = labels.to(device) | |||
| #print(labels) | |||
| optimizer.zero_grad() | |||
| outputs = model(inputs) | |||
| #print(outputs.shape) | |||
| loss = criterion(outputs, labels) | |||
| loss.backward() | |||
| for p in list(model.parameters()): | |||
| if hasattr(p,'org'): | |||
| p.data.copy_(p.org) | |||
| optimizer.step() | |||
| for p in list(model.parameters()): | |||
| if hasattr(p,'org'): | |||
| p.org.copy_(p.data.clamp_(-1,1)) | |||
| running_loss += loss.item() | |||
| total += labels.size(0) | |||
| _,predicted = torch.max(outputs.data,1) | |||
| #print(predicted) | |||
| #print("label",labels) | |||
| correct += (predicted == labels).sum().item() | |||
| train_acc = correct / total | |||
| print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||
| model.eval() | |||
| with torch.no_grad(): | |||
| correct = 0 | |||
| total = 0 | |||
| for i, data in enumerate(val_loader): | |||
| inputs, labels = data | |||
| inputs = inputs.to(device) | |||
| labels = labels.to(device) | |||
| outputs = model(inputs) | |||
| _,predicted = torch.max(outputs.data,1) | |||
| total += labels.size(0) | |||
| correct += (predicted == labels).sum().item() | |||
| val_acc = correct / total | |||
| if val_acc > best_accuracy: | |||
| best_accuracy = val_acc | |||
| torch.save(model.state_dict(), save_path) | |||
| print("Save Model") | |||
| print(f"[ Val | {epoch + 1:03d}/{num_epoch:03d} ] acc = {val_acc:.5f}") | |||
| model = Classifier().to(device) | |||
| model.load_state_dict(torch.load(save_path)) | |||
| model.eval() | |||
| stat = np.zeros((3,3)) | |||
| with torch.no_grad(): | |||
| correct = 0 | |||
| total = 0 | |||
| print(model) | |||
| for i, data in enumerate(test_loader): | |||
| inputs, labels = data | |||
| inputs = inputs.to(device) | |||
| labels = labels.to(device) | |||
| outputs = model(inputs) | |||
| #print(outputs.data) | |||
| _,predicted = torch.max(outputs.data,1) | |||
| #print(predicted) | |||
| total += labels.size(0) | |||
| correct += (predicted == labels).sum().item() | |||
| for k in range(len(predicted)): | |||
| if predicted[k] != labels[k]: | |||
| img = inputs[k].mul(255).byte() | |||
| img = img.cpu().numpy().squeeze(0) | |||
| img = np.moveaxis(img, 0, -1) | |||
| predict = predicted[k].cpu().numpy() | |||
| label = labels[k].cpu().numpy() | |||
| path = "test_result/predict:"+str(predict)+"_labels:"+str(label)+".jpg" | |||
| stat[int(label)][int(predict)] += 1 | |||
| ax = sns.heatmap(stat, linewidth=0.5) | |||
| plt.xlabel('Prediction') | |||
| plt.ylabel('Label') | |||
| plt.savefig('heatmap.jpg') | |||
| #print(predicted) | |||
| #print("labels:",labels) | |||
| print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||
| if __name__ == '__main__': | |||
| main() | |||