@ -0,0 +1,2 @@ | |||
models.ckpt | |||
training_state.bin |
@ -0,0 +1,8 @@ | |||
# BNN.pytorch | |||
Binarized Neural Network (BNN) for pytorch | |||
This is the pytorch version for the BNN code, fro VGG and resnet models | |||
Link to the paper: https://papers.nips.cc/paper/6573-binarized-neural-networks | |||
The code is based on https://github.com/eladhoffer/convNet.pytorch | |||
Please install torch and torchvision by following the instructions at: http://pytorch.org/ | |||
To run resnet18 for cifar10 dataset use: python main_binary.py --model resnet_binary --save resnet18_binary --dataset cifar10 |
@ -0,0 +1,37 @@ | |||
import os | |||
import torchvision.datasets as datasets | |||
import torchvision.transforms as transforms | |||
_DATASETS_MAIN_PATH = '/home/Datasets' | |||
_dataset_path = { | |||
'cifar10': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR10'), | |||
'cifar100': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR100'), | |||
'stl10': os.path.join(_DATASETS_MAIN_PATH, 'STL10'), | |||
'mnist': os.path.join(_DATASETS_MAIN_PATH, 'MNIST'), | |||
'imagenet': { | |||
'train': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/train'), | |||
'val': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/val') | |||
} | |||
} | |||
def get_dataset(name, split='train', transform=None, | |||
target_transform=None, download=True): | |||
train = (split == 'train') | |||
if name == 'cifar10': | |||
return datasets.CIFAR10(root=_dataset_path['cifar10'], | |||
train=train, | |||
transform=transform, | |||
target_transform=target_transform, | |||
download=download) | |||
elif name == 'cifar100': | |||
return datasets.CIFAR100(root=_dataset_path['cifar100'], | |||
train=train, | |||
transform=transform, | |||
target_transform=target_transform, | |||
download=download) | |||
elif name == 'imagenet': | |||
path = _dataset_path[name][split] | |||
return datasets.ImageFolder(root=path, | |||
transform=transform, | |||
target_transform=target_transform) |
@ -0,0 +1,309 @@ | |||
import argparse | |||
import os | |||
import time | |||
import logging | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.parallel | |||
import torch.backends.cudnn as cudnn | |||
import torch.optim | |||
import torch.utils.data | |||
import models | |||
from torch.autograd import Variable | |||
from data import get_dataset | |||
from preprocess import get_transform | |||
from utils import * | |||
from datetime import datetime | |||
from ast import literal_eval | |||
from torchvision.utils import save_image | |||
model_names = sorted(name for name in models.__dict__ | |||
if name.islower() and not name.startswith("__") | |||
and callable(models.__dict__[name])) | |||
<<<<<<< HEAD | |||
print(model_names) | |||
======= | |||
>>>>>>> 0d30f7b8e44285531022cdc05b2c11c22db27e3a | |||
parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||
parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='./results', | |||
help='results dir') | |||
parser.add_argument('--save', metavar='SAVE', default='', | |||
help='saved folder') | |||
parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||
help='dataset name or folder') | |||
parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||
choices=model_names, | |||
help='model architecture: ' + | |||
' | '.join(model_names) + | |||
' (default: alexnet)') | |||
parser.add_argument('--input_size', type=int, default=None, | |||
help='image input size') | |||
parser.add_argument('--model_config', default='', | |||
help='additional architecture configuration') | |||
parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||
help='type of tensor - e.g torch.cuda.HalfTensor') | |||
parser.add_argument('--gpus', default='0', | |||
help='gpus used for training - e.g 0,1,3') | |||
parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||
help='number of data loading workers (default: 8)') | |||
parser.add_argument('--epochs', default=2500, type=int, metavar='N', | |||
help='number of total epochs to run') | |||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||
help='manual epoch number (useful on restarts)') | |||
parser.add_argument('-b', '--batch-size', default=256, type=int, | |||
metavar='N', help='mini-batch size (default: 256)') | |||
parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||
help='optimizer function used') | |||
parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||
metavar='LR', help='initial learning rate') | |||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||
help='momentum') | |||
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||
metavar='W', help='weight decay (default: 1e-4)') | |||
parser.add_argument('--print-freq', '-p', default=10, type=int, | |||
metavar='N', help='print frequency (default: 10)') | |||
parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||
help='path to latest checkpoint (default: none)') | |||
parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||
help='evaluate model FILE on validation set') | |||
def main(): | |||
global args, best_prec1 | |||
best_prec1 = 0 | |||
args = parser.parse_args() | |||
if args.evaluate: | |||
args.results_dir = '/tmp' | |||
if args.save is '': | |||
args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||
save_path = os.path.join(args.results_dir, args.save) | |||
if not os.path.exists(save_path): | |||
os.makedirs(save_path) | |||
setup_logging(os.path.join(save_path, 'log.txt')) | |||
results_file = os.path.join(save_path, 'results.%s') | |||
results = ResultsLog(results_file % 'csv', results_file % 'html') | |||
logging.info("saving to %s", save_path) | |||
logging.debug("run arguments: %s", args) | |||
if 'cuda' in args.type: | |||
args.gpus = [int(i) for i in args.gpus.split(',')] | |||
torch.cuda.set_device(args.gpus[0]) | |||
cudnn.benchmark = True | |||
else: | |||
args.gpus = None | |||
# create model | |||
logging.info("creating model %s", args.model) | |||
model = models.__dict__[args.model] | |||
model_config = {'input_size': args.input_size, 'dataset': args.dataset} | |||
if args.model_config is not '': | |||
model_config = dict(model_config, **literal_eval(args.model_config)) | |||
model = model(**model_config) | |||
logging.info("created model with configuration: %s", model_config) | |||
# optionally resume from a checkpoint | |||
if args.evaluate: | |||
if not os.path.isfile(args.evaluate): | |||
parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||
checkpoint = torch.load(args.evaluate) | |||
model.load_state_dict(checkpoint['state_dict']) | |||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||
args.evaluate, checkpoint['epoch']) | |||
elif args.resume: | |||
checkpoint_file = args.resume | |||
if os.path.isdir(checkpoint_file): | |||
results.load(os.path.join(checkpoint_file, 'results.csv')) | |||
checkpoint_file = os.path.join( | |||
checkpoint_file, 'model_best.pth.tar') | |||
if os.path.isfile(checkpoint_file): | |||
logging.info("loading checkpoint '%s'", args.resume) | |||
checkpoint = torch.load(checkpoint_file) | |||
args.start_epoch = checkpoint['epoch'] - 1 | |||
best_prec1 = checkpoint['best_prec1'] | |||
model.load_state_dict(checkpoint['state_dict']) | |||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||
checkpoint_file, checkpoint['epoch']) | |||
else: | |||
logging.error("no checkpoint found at '%s'", args.resume) | |||
num_parameters = sum([l.nelement() for l in model.parameters()]) | |||
logging.info("number of parameters: %d", num_parameters) | |||
# Data loading code | |||
default_transform = { | |||
'train': get_transform(args.dataset, | |||
input_size=args.input_size, augment=True), | |||
'eval': get_transform(args.dataset, | |||
input_size=args.input_size, augment=False) | |||
} | |||
transform = getattr(model, 'input_transform', default_transform) | |||
regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||
'lr': args.lr, | |||
'momentum': args.momentum, | |||
'weight_decay': args.weight_decay}}) | |||
# define loss function (criterion) and optimizer | |||
criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)() | |||
criterion.type(args.type) | |||
model.type(args.type) | |||
val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||
val_loader = torch.utils.data.DataLoader( | |||
val_data, | |||
batch_size=args.batch_size, shuffle=False, | |||
num_workers=args.workers, pin_memory=True) | |||
if args.evaluate: | |||
validate(val_loader, model, criterion, 0) | |||
return | |||
train_data = get_dataset(args.dataset, 'train', transform['train']) | |||
train_loader = torch.utils.data.DataLoader( | |||
train_data, | |||
batch_size=args.batch_size, shuffle=True, | |||
num_workers=args.workers, pin_memory=True) | |||
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||
logging.info('training regime: %s', regime) | |||
for epoch in range(args.start_epoch, args.epochs): | |||
optimizer = adjust_optimizer(optimizer, epoch, regime) | |||
# train for one epoch | |||
train_loss, train_prec1, train_prec5 = train( | |||
train_loader, model, criterion, epoch, optimizer) | |||
# evaluate on validation set | |||
val_loss, val_prec1, val_prec5 = validate( | |||
val_loader, model, criterion, epoch) | |||
# remember best prec@1 and save checkpoint | |||
is_best = val_prec1 > best_prec1 | |||
best_prec1 = max(val_prec1, best_prec1) | |||
save_checkpoint({ | |||
'epoch': epoch + 1, | |||
'model': args.model, | |||
'config': args.model_config, | |||
'state_dict': model.state_dict(), | |||
'best_prec1': best_prec1, | |||
'regime': regime | |||
}, is_best, path=save_path) | |||
logging.info('\n Epoch: {0}\t' | |||
'Training Loss {train_loss:.4f} \t' | |||
'Training Prec@1 {train_prec1:.3f} \t' | |||
'Training Prec@5 {train_prec5:.3f} \t' | |||
'Validation Loss {val_loss:.4f} \t' | |||
'Validation Prec@1 {val_prec1:.3f} \t' | |||
'Validation Prec@5 {val_prec5:.3f} \n' | |||
.format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
train_prec1=train_prec1, val_prec1=val_prec1, | |||
train_prec5=train_prec5, val_prec5=val_prec5)) | |||
results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||
train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||
#results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||
# title='Loss', ylabel='loss') | |||
#results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||
# title='Error@1', ylabel='error %') | |||
#results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||
# title='Error@5', ylabel='error %') | |||
results.save() | |||
def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||
if args.gpus and len(args.gpus) > 1: | |||
model = torch.nn.DataParallel(model, args.gpus) | |||
batch_time = AverageMeter() | |||
data_time = AverageMeter() | |||
losses = AverageMeter() | |||
top1 = AverageMeter() | |||
top5 = AverageMeter() | |||
end = time.time() | |||
for i, (inputs, target) in enumerate(data_loader): | |||
# measure data loading time | |||
data_time.update(time.time() - end) | |||
if args.gpus is not None: | |||
target = target.cuda() | |||
if not training: | |||
with torch.no_grad(): | |||
input_var = Variable(inputs.type(args.type), volatile=not training) | |||
target_var = Variable(target) | |||
# compute output | |||
output = model(input_var) | |||
else: | |||
input_var = Variable(inputs.type(args.type), volatile=not training) | |||
target_var = Variable(target) | |||
# compute output | |||
output = model(input_var) | |||
loss = criterion(output, target_var) | |||
if type(output) is list: | |||
output = output[0] | |||
# measure accuracy and record loss | |||
prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||
losses.update(loss.item(), inputs.size(0)) | |||
top1.update(prec1.item(), inputs.size(0)) | |||
top5.update(prec5.item(), inputs.size(0)) | |||
if training: | |||
# compute gradient and do SGD step | |||
optimizer.zero_grad() | |||
loss.backward() | |||
for p in list(model.parameters()): | |||
if hasattr(p,'org'): | |||
p.data.copy_(p.org) | |||
optimizer.step() | |||
for p in list(model.parameters()): | |||
if hasattr(p,'org'): | |||
p.org.copy_(p.data.clamp_(-1,1)) | |||
# measure elapsed time | |||
batch_time.update(time.time() - end) | |||
end = time.time() | |||
if i % args.print_freq == 0: | |||
logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||
'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||
epoch, i, len(data_loader), | |||
phase='TRAINING' if training else 'EVALUATING', | |||
batch_time=batch_time, | |||
data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||
return losses.avg, top1.avg, top5.avg | |||
def train(data_loader, model, criterion, epoch, optimizer): | |||
# switch to train mode | |||
model.train() | |||
return forward(data_loader, model, criterion, epoch, | |||
training=True, optimizer=optimizer) | |||
def validate(data_loader, model, criterion, epoch): | |||
# switch to evaluate mode | |||
model.eval() | |||
return forward(data_loader, model, criterion, epoch, | |||
training=False, optimizer=None) | |||
if __name__ == '__main__': | |||
main() |
@ -0,0 +1,332 @@ | |||
import argparse | |||
import os | |||
import time | |||
import logging | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.parallel | |||
import torch.backends.cudnn as cudnn | |||
import torch.optim | |||
import torch.utils.data | |||
import models | |||
from torch.autograd import Variable | |||
from data import get_dataset | |||
from preprocess import get_transform | |||
from utils import * | |||
from datetime import datetime | |||
from ast import literal_eval | |||
from torchvision.utils import save_image | |||
from models.binarized_modules import HingeLoss | |||
model_names = sorted(name for name in models.__dict__ | |||
if name.islower() and not name.startswith("__") | |||
and callable(models.__dict__[name])) | |||
parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||
parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='/media/hdd/ihubara/BinaryNet.pytorch/results', | |||
help='results dir') | |||
parser.add_argument('--save', metavar='SAVE', default='', | |||
help='saved folder') | |||
parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||
help='dataset name or folder') | |||
parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||
choices=model_names, | |||
help='model architecture: ' + | |||
' | '.join(model_names) + | |||
' (default: alexnet)') | |||
parser.add_argument('--input_size', type=int, default=None, | |||
help='image input size') | |||
parser.add_argument('--model_config', default='', | |||
help='additional architecture configuration') | |||
parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||
help='type of tensor - e.g torch.cuda.HalfTensor') | |||
parser.add_argument('--gpus', default='0', | |||
help='gpus used for training - e.g 0,1,3') | |||
parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||
help='number of data loading workers (default: 8)') | |||
parser.add_argument('--epochs', default=900, type=int, metavar='N', | |||
help='number of total epochs to run') | |||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||
help='manual epoch number (useful on restarts)') | |||
parser.add_argument('-b', '--batch-size', default=256, type=int, | |||
metavar='N', help='mini-batch size (default: 256)') | |||
parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||
help='optimizer function used') | |||
parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||
metavar='LR', help='initial learning rate') | |||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||
help='momentum') | |||
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||
metavar='W', help='weight decay (default: 1e-4)') | |||
parser.add_argument('--print-freq', '-p', default=10, type=int, | |||
metavar='N', help='print frequency (default: 10)') | |||
parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||
help='path to latest checkpoint (default: none)') | |||
parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||
help='evaluate model FILE on validation set') | |||
torch.cuda.random.manual_seed_all(10) | |||
output_dim = 0 | |||
def main(): | |||
global args, best_prec1, output_dim | |||
best_prec1 = 0 | |||
args = parser.parse_args() | |||
output_dim = {'cifar10': 10, 'cifar100':100, 'imagenet': 1000}[args.dataset] | |||
#import pdb; pdb.set_trace() | |||
#torch.save(args.batch_size/(len(args.gpus)/2+1),'multi_gpu_batch_size') | |||
if args.evaluate: | |||
args.results_dir = '/tmp' | |||
if args.save is '': | |||
args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||
save_path = os.path.join(args.results_dir, args.save) | |||
if not os.path.exists(save_path): | |||
os.makedirs(save_path) | |||
setup_logging(os.path.join(save_path, 'log.txt')) | |||
results_file = os.path.join(save_path, 'results.%s') | |||
results = ResultsLog(results_file % 'csv', results_file % 'html') | |||
logging.info("saving to %s", save_path) | |||
logging.debug("run arguments: %s", args) | |||
if 'cuda' in args.type: | |||
args.gpus = [int(i) for i in args.gpus.split(',')] | |||
torch.cuda.set_device(args.gpus[0]) | |||
cudnn.benchmark = True | |||
else: | |||
args.gpus = None | |||
# create model | |||
logging.info("creating model %s", args.model) | |||
model = models.__dict__[args.model] | |||
model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': output_dim} | |||
if args.model_config is not '': | |||
model_config = dict(model_config, **literal_eval(args.model_config)) | |||
model = model(**model_config) | |||
logging.info("created model with configuration: %s", model_config) | |||
# optionally resume from a checkpoint | |||
if args.evaluate: | |||
if not os.path.isfile(args.evaluate): | |||
parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||
checkpoint = torch.load(args.evaluate) | |||
model.load_state_dict(checkpoint['state_dict']) | |||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||
args.evaluate, checkpoint['epoch']) | |||
elif args.resume: | |||
checkpoint_file = args.resume | |||
if os.path.isdir(checkpoint_file): | |||
results.load(os.path.join(checkpoint_file, 'results.csv')) | |||
checkpoint_file = os.path.join( | |||
checkpoint_file, 'model_best.pth.tar') | |||
if os.path.isfile(checkpoint_file): | |||
logging.info("loading checkpoint '%s'", args.resume) | |||
checkpoint = torch.load(checkpoint_file) | |||
args.start_epoch = checkpoint['epoch'] - 1 | |||
best_prec1 = checkpoint['best_prec1'] | |||
model.load_state_dict(checkpoint['state_dict']) | |||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||
checkpoint_file, checkpoint['epoch']) | |||
else: | |||
logging.error("no checkpoint found at '%s'", args.resume) | |||
num_parameters = sum([l.nelement() for l in model.parameters()]) | |||
logging.info("number of parameters: %d", num_parameters) | |||
# Data loading code | |||
default_transform = { | |||
'train': get_transform(args.dataset, | |||
input_size=args.input_size, augment=True), | |||
'eval': get_transform(args.dataset, | |||
input_size=args.input_size, augment=False) | |||
} | |||
transform = getattr(model, 'input_transform', default_transform) | |||
regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||
'lr': args.lr, | |||
'momentum': args.momentum, | |||
'weight_decay': args.weight_decay}}) | |||
# define loss function (criterion) and optimizer | |||
#criterion = getattr(model, 'criterion', nn.NLLLoss)() | |||
criterion = getattr(model, 'criterion', HingeLoss)() | |||
#criterion.type(args.type) | |||
model.type(args.type) | |||
val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||
val_loader = torch.utils.data.DataLoader( | |||
val_data, | |||
batch_size=args.batch_size, shuffle=False, | |||
num_workers=args.workers, pin_memory=True) | |||
if args.evaluate: | |||
validate(val_loader, model, criterion, 0) | |||
return | |||
train_data = get_dataset(args.dataset, 'train', transform['train']) | |||
train_loader = torch.utils.data.DataLoader( | |||
train_data, | |||
batch_size=args.batch_size, shuffle=True, | |||
num_workers=args.workers, pin_memory=True) | |||
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||
logging.info('training regime: %s', regime) | |||
#import pdb; pdb.set_trace() | |||
#search_binarized_modules(model) | |||
for epoch in range(args.start_epoch, args.epochs): | |||
optimizer = adjust_optimizer(optimizer, epoch, regime) | |||
# train for one epoch | |||
train_loss, train_prec1, train_prec5 = train( | |||
train_loader, model, criterion, epoch, optimizer) | |||
# evaluate on validation set | |||
val_loss, val_prec1, val_prec5 = validate( | |||
val_loader, model, criterion, epoch) | |||
# remember best prec@1 and save checkpoint | |||
is_best = val_prec1 > best_prec1 | |||
best_prec1 = max(val_prec1, best_prec1) | |||
save_checkpoint({ | |||
'epoch': epoch + 1, | |||
'model': args.model, | |||
'config': args.model_config, | |||
'state_dict': model.state_dict(), | |||
'best_prec1': best_prec1, | |||
'regime': regime | |||
}, is_best, path=save_path) | |||
logging.info('\n Epoch: {0}\t' | |||
'Training Loss {train_loss:.4f} \t' | |||
'Training Prec@1 {train_prec1:.3f} \t' | |||
'Training Prec@5 {train_prec5:.3f} \t' | |||
'Validation Loss {val_loss:.4f} \t' | |||
'Validation Prec@1 {val_prec1:.3f} \t' | |||
'Validation Prec@5 {val_prec5:.3f} \n' | |||
.format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
train_prec1=train_prec1, val_prec1=val_prec1, | |||
train_prec5=train_prec5, val_prec5=val_prec5)) | |||
results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||
train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||
results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||
title='Loss', ylabel='loss') | |||
results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||
title='Error@1', ylabel='error %') | |||
results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||
title='Error@5', ylabel='error %') | |||
results.save() | |||
def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||
if args.gpus and len(args.gpus) > 1: | |||
model = torch.nn.DataParallel(model, args.gpus) | |||
batch_time = AverageMeter() | |||
data_time = AverageMeter() | |||
losses = AverageMeter() | |||
top1 = AverageMeter() | |||
top5 = AverageMeter() | |||
end = time.time() | |||
for i, (inputs, target) in enumerate(data_loader): | |||
# measure data loading time | |||
data_time.update(time.time() - end) | |||
if args.gpus is not None: | |||
target = target.cuda() | |||
#import pdb; pdb.set_trace() | |||
if criterion.__class__.__name__=='HingeLoss': | |||
target=target.unsqueeze(1) | |||
target_onehot = torch.cuda.FloatTensor(target.size(0), output_dim) | |||
target_onehot.fill_(-1) | |||
target_onehot.scatter_(1, target, 1) | |||
target=target.squeeze() | |||
if not training: | |||
with torch.no_grad(): | |||
input_var = Variable(inputs.type(args.type)) | |||
target_var = Variable(target_onehot) | |||
# compute output | |||
output = model(input_var) | |||
else: | |||
input_var = Variable(inputs.type(args.type)) | |||
target_var = Variable(target_onehot) | |||
# compute output | |||
output = model(input_var) | |||
#import pdb; pdb.set_trace() | |||
loss = criterion(output, target_onehot) | |||
#import pdb; pdb.set_trace() | |||
if type(output) is list: | |||
output = output[0] | |||
# measure accuracy and record loss | |||
prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||
losses.update(loss.item(), inputs.size(0)) | |||
top1.update(prec1.item(), inputs.size(0)) | |||
top5.update(prec5.item(), inputs.size(0)) | |||
#import pdb; pdb.set_trace() | |||
#if not training and top1.avg<15: | |||
# import pdb; pdb.set_trace() | |||
if training: | |||
# compute gradient and do SGD step | |||
optimizer.zero_grad() | |||
#add backwoed hook | |||
loss.backward() | |||
for p in list(model.parameters()): | |||
#import pdb; pdb.set_trace() | |||
if hasattr(p,'org'): | |||
#print('before:', p[0][0]) | |||
#gm=max(p.grad.data.max(),-p.grad.data.min()) | |||
#p.grad=p.grad.div(gm+1) | |||
p.data.copy_(p.org) | |||
#print('after:', p[0][0]) | |||
optimizer.step() | |||
for p in list(model.parameters()): | |||
#import pdb; pdb.set_trace() | |||
if hasattr(p,'org'): | |||
#print('before:', p[0][0]) | |||
p.org.copy_(p.data.clamp_(-1,1)) | |||
#if epoch>30: | |||
# import pdb; pdb.set_trace() | |||
# measure elapsed time | |||
batch_time.update(time.time() - end) | |||
end = time.time() | |||
if i % args.print_freq == 0: | |||
logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||
'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||
epoch, i, len(data_loader), | |||
phase='TRAINING' if training else 'EVALUATING', | |||
batch_time=batch_time, | |||
data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||
return losses.avg, top1.avg, top5.avg | |||
def train(data_loader, model, criterion, epoch, optimizer): | |||
# switch to train mode | |||
model.train() | |||
return forward(data_loader, model, criterion, epoch, | |||
training=True, optimizer=optimizer) | |||
def validate(data_loader, model, criterion, epoch): | |||
# switch to evaluate mode | |||
model.eval() | |||
return forward(data_loader, model, criterion, epoch, | |||
training=False, optimizer=None) | |||
if __name__ == '__main__': | |||
main() |
@ -0,0 +1,150 @@ | |||
from __future__ import print_function | |||
import argparse | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
import torch.optim as optim | |||
from torchvision import datasets, transforms | |||
from torch.autograd import Variable | |||
from models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||
from models.binarized_modules import Binarize,HingeLoss | |||
# Training settings | |||
parser = argparse.ArgumentParser(description='PyTorch MNIST Example') | |||
parser.add_argument('--batch-size', type=int, default=64, metavar='N', | |||
help='input batch size for training (default: 256)') | |||
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', | |||
help='input batch size for testing (default: 1000)') | |||
parser.add_argument('--epochs', type=int, default=100, metavar='N', | |||
help='number of epochs to train (default: 10)') | |||
parser.add_argument('--lr', type=float, default=0.01, metavar='LR', | |||
help='learning rate (default: 0.001)') | |||
parser.add_argument('--momentum', type=float, default=0.5, metavar='M', | |||
help='SGD momentum (default: 0.5)') | |||
parser.add_argument('--no-cuda', action='store_true', default=False, | |||
help='disables CUDA training') | |||
parser.add_argument('--seed', type=int, default=1, metavar='S', | |||
help='random seed (default: 1)') | |||
parser.add_argument('--gpus', default=3, | |||
help='gpus used for training - e.g 0,1,3') | |||
parser.add_argument('--log-interval', type=int, default=10, metavar='N', | |||
help='how many batches to wait before logging training status') | |||
args = parser.parse_args() | |||
args.cuda = not args.no_cuda and torch.cuda.is_available() | |||
torch.manual_seed(args.seed) | |||
if args.cuda: | |||
torch.cuda.manual_seed(args.seed) | |||
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} | |||
train_loader = torch.utils.data.DataLoader( | |||
datasets.MNIST('../data', train=True, download=True, | |||
transform=transforms.Compose([ | |||
transforms.ToTensor(), | |||
transforms.Normalize((0.1307,), (0.3081,)) | |||
])), | |||
batch_size=args.batch_size, shuffle=True, **kwargs) | |||
test_loader = torch.utils.data.DataLoader( | |||
datasets.MNIST('../data', train=False, transform=transforms.Compose([ | |||
transforms.ToTensor(), | |||
transforms.Normalize((0.1307,), (0.3081,)) | |||
])), | |||
batch_size=args.test_batch_size, shuffle=True, **kwargs) | |||
class Net(nn.Module): | |||
def __init__(self): | |||
super(Net, self).__init__() | |||
self.infl_ratio=3 | |||
self.fc1 = BinarizeLinear(784, 2048*self.infl_ratio) | |||
self.htanh1 = nn.Hardtanh() | |||
self.bn1 = nn.BatchNorm1d(2048*self.infl_ratio) | |||
self.fc2 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||
self.htanh2 = nn.Hardtanh() | |||
self.bn2 = nn.BatchNorm1d(2048*self.infl_ratio) | |||
self.fc3 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||
self.htanh3 = nn.Hardtanh() | |||
self.bn3 = nn.BatchNorm1d(2048*self.infl_ratio) | |||
self.fc4 = nn.Linear(2048*self.infl_ratio, 10) | |||
self.logsoftmax=nn.LogSoftmax() | |||
self.drop=nn.Dropout(0.5) | |||
def forward(self, x): | |||
x = x.view(-1, 28*28) | |||
x = self.fc1(x) | |||
x = self.bn1(x) | |||
x = self.htanh1(x) | |||
x = self.fc2(x) | |||
x = self.bn2(x) | |||
x = self.htanh2(x) | |||
x = self.fc3(x) | |||
x = self.drop(x) | |||
x = self.bn3(x) | |||
x = self.htanh3(x) | |||
x = self.fc4(x) | |||
return self.logsoftmax(x) | |||
model = Net() | |||
if args.cuda: | |||
torch.cuda.set_device(3) | |||
model.cuda() | |||
criterion = nn.CrossEntropyLoss() | |||
optimizer = optim.Adam(model.parameters(), lr=args.lr) | |||
def train(epoch): | |||
model.train() | |||
for batch_idx, (data, target) in enumerate(train_loader): | |||
if args.cuda: | |||
data, target = data.cuda(), target.cuda() | |||
data, target = Variable(data), Variable(target) | |||
optimizer.zero_grad() | |||
output = model(data) | |||
loss = criterion(output, target) | |||
if epoch%40==0: | |||
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 | |||
optimizer.zero_grad() | |||
loss.backward() | |||
for p in list(model.parameters()): | |||
if hasattr(p,'org'): | |||
p.data.copy_(p.org) | |||
optimizer.step() | |||
for p in list(model.parameters()): | |||
if hasattr(p,'org'): | |||
p.org.copy_(p.data.clamp_(-1,1)) | |||
if batch_idx % args.log_interval == 0: | |||
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( | |||
epoch, batch_idx * len(data), len(train_loader.dataset), | |||
100. * batch_idx / len(train_loader), loss.item())) | |||
def test(): | |||
model.eval() | |||
test_loss = 0 | |||
correct = 0 | |||
with torch.no_grad(): | |||
for data, target in test_loader: | |||
if args.cuda: | |||
data, target = data.cuda(), target.cuda() | |||
data, target = Variable(data), Variable(target) | |||
output = model(data) | |||
test_loss += criterion(output, target).item() # sum up batch loss | |||
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability | |||
correct += pred.eq(target.data.view_as(pred)).cpu().sum() | |||
test_loss /= len(test_loader.dataset) | |||
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( | |||
test_loss, correct, len(test_loader.dataset), | |||
100. * correct / len(test_loader.dataset))) | |||
for epoch in range(1, args.epochs + 1): | |||
train(epoch) | |||
test() | |||
if epoch%40==0: | |||
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 |
@ -0,0 +1,6 @@ | |||
from .alexnet import * | |||
from .alexnet_binary import * | |||
from .resnet import * | |||
from .resnet_binary import * | |||
from .vgg_cifar10_binary import * |
@ -0,0 +1,78 @@ | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
__all__ = ['alexnet'] | |||
class AlexNetOWT_BN(nn.Module): | |||
def __init__(self, num_classes=1000): | |||
super(AlexNetOWT_BN, self).__init__() | |||
self.features = nn.Sequential( | |||
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2, | |||
bias=False), | |||
nn.MaxPool2d(kernel_size=3, stride=2), | |||
nn.BatchNorm2d(64), | |||
nn.ReLU(inplace=True), | |||
nn.Conv2d(64, 192, kernel_size=5, padding=2, bias=False), | |||
nn.MaxPool2d(kernel_size=3, stride=2), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(192), | |||
nn.Conv2d(192, 384, kernel_size=3, padding=1, bias=False), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(384), | |||
nn.Conv2d(384, 256, kernel_size=3, padding=1, bias=False), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(256), | |||
nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||
nn.MaxPool2d(kernel_size=3, stride=2), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(256) | |||
) | |||
self.classifier = nn.Sequential( | |||
nn.Linear(256 * 6 * 6, 4096, bias=False), | |||
nn.BatchNorm1d(4096), | |||
nn.ReLU(inplace=True), | |||
nn.Dropout(0.5), | |||
nn.Linear(4096, 4096, bias=False), | |||
nn.BatchNorm1d(4096), | |||
nn.ReLU(inplace=True), | |||
nn.Dropout(0.5), | |||
nn.Linear(4096, num_classes) | |||
) | |||
self.regime = { | |||
0: {'optimizer': 'SGD', 'lr': 1e-2, | |||
'weight_decay': 5e-4, 'momentum': 0.9}, | |||
10: {'lr': 5e-3}, | |||
15: {'lr': 1e-3, 'weight_decay': 0}, | |||
20: {'lr': 5e-4}, | |||
25: {'lr': 1e-4} | |||
} | |||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||
std=[0.229, 0.224, 0.225]) | |||
self.input_transform = { | |||
'train': transforms.Compose([ | |||
transforms.Scale(256), | |||
transforms.RandomCrop(224), | |||
transforms.RandomHorizontalFlip(), | |||
transforms.ToTensor(), | |||
normalize | |||
]), | |||
'eval': transforms.Compose([ | |||
transforms.Scale(256), | |||
transforms.CenterCrop(224), | |||
transforms.ToTensor(), | |||
normalize | |||
]) | |||
} | |||
def forward(self, x): | |||
x = self.features(x) | |||
x = x.view(-1, 256 * 6 * 6) | |||
x = self.classifier(x) | |||
return x | |||
def alexnet(**kwargs): | |||
num_classes = kwargs.get( 'num_classes', 1000) | |||
return AlexNetOWT_BN(num_classes) |
@ -0,0 +1,92 @@ | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||
__all__ = ['alexnet_binary'] | |||
class AlexNetOWT_BN(nn.Module): | |||
def __init__(self, num_classes=1000): | |||
super(AlexNetOWT_BN, self).__init__() | |||
self.ratioInfl=3 | |||
self.features = nn.Sequential( | |||
BinarizeConv2d(3, int(64*self.ratioInfl), kernel_size=11, stride=4, padding=2), | |||
nn.MaxPool2d(kernel_size=3, stride=2), | |||
nn.BatchNorm2d(int(64*self.ratioInfl)), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=5, padding=2), | |||
nn.MaxPool2d(kernel_size=3, stride=2), | |||
nn.BatchNorm2d(int(192*self.ratioInfl)), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(int(192*self.ratioInfl), int(384*self.ratioInfl), kernel_size=3, padding=1), | |||
nn.BatchNorm2d(int(384*self.ratioInfl)), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(int(384*self.ratioInfl), int(256*self.ratioInfl), kernel_size=3, padding=1), | |||
nn.BatchNorm2d(int(256*self.ratioInfl)), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(int(256*self.ratioInfl), 256, kernel_size=3, padding=1), | |||
nn.MaxPool2d(kernel_size=3, stride=2), | |||
nn.BatchNorm2d(256), | |||
nn.Hardtanh(inplace=True) | |||
) | |||
self.classifier = nn.Sequential( | |||
BinarizeLinear(256 * 6 * 6, 4096), | |||
nn.BatchNorm1d(4096), | |||
nn.Hardtanh(inplace=True), | |||
#nn.Dropout(0.5), | |||
BinarizeLinear(4096, 4096), | |||
nn.BatchNorm1d(4096), | |||
nn.Hardtanh(inplace=True), | |||
#nn.Dropout(0.5), | |||
BinarizeLinear(4096, num_classes), | |||
nn.BatchNorm1d(1000), | |||
nn.LogSoftmax() | |||
) | |||
#self.regime = { | |||
# 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||
# 'weight_decay': 5e-4, 'momentum': 0.9}, | |||
# 10: {'lr': 5e-3}, | |||
# 15: {'lr': 1e-3, 'weight_decay': 0}, | |||
# 20: {'lr': 5e-4}, | |||
# 25: {'lr': 1e-4} | |||
#} | |||
self.regime = { | |||
0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||
20: {'lr': 1e-3}, | |||
30: {'lr': 5e-4}, | |||
35: {'lr': 1e-4}, | |||
40: {'lr': 1e-5} | |||
} | |||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||
std=[0.229, 0.224, 0.225]) | |||
self.input_transform = { | |||
'train': transforms.Compose([ | |||
transforms.Scale(256), | |||
transforms.RandomCrop(224), | |||
transforms.RandomHorizontalFlip(), | |||
transforms.ToTensor(), | |||
normalize | |||
]), | |||
'eval': transforms.Compose([ | |||
transforms.Scale(256), | |||
transforms.CenterCrop(224), | |||
transforms.ToTensor(), | |||
normalize | |||
]) | |||
} | |||
def forward(self, x): | |||
x = self.features(x) | |||
x = x.view(-1, 256 * 6 * 6) | |||
x = self.classifier(x) | |||
return x | |||
def alexnet_binary(**kwargs): | |||
num_classes = kwargs.get( 'num_classes', 1000) | |||
return AlexNetOWT_BN(num_classes) |
@ -0,0 +1,109 @@ | |||
import torch | |||
import pdb | |||
import torch.nn as nn | |||
import math | |||
from torch.autograd import Variable | |||
from torch.autograd import Function | |||
import numpy as np | |||
def Binarize(tensor,quant_mode='det'): | |||
if quant_mode=='det': | |||
return tensor.sign() | |||
else: | |||
return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1) | |||
class HingeLoss(nn.Module): | |||
def __init__(self): | |||
super(HingeLoss,self).__init__() | |||
self.margin=1.0 | |||
def hinge_loss(self,input,target): | |||
#import pdb; pdb.set_trace() | |||
output=self.margin-input.mul(target) | |||
output[output.le(0)]=0 | |||
return output.mean() | |||
def forward(self, input, target): | |||
return self.hinge_loss(input,target) | |||
class SqrtHingeLossFunction(Function): | |||
def __init__(self): | |||
super(SqrtHingeLossFunction,self).__init__() | |||
self.margin=1.0 | |||
def forward(self, input, target): | |||
output=self.margin-input.mul(target) | |||
output[output.le(0)]=0 | |||
self.save_for_backward(input, target) | |||
loss=output.mul(output).sum(0).sum(1).div(target.numel()) | |||
return loss | |||
def backward(self,grad_output): | |||
input, target = self.saved_tensors | |||
output=self.margin-input.mul(target) | |||
output[output.le(0)]=0 | |||
import pdb; pdb.set_trace() | |||
grad_output.resize_as_(input).copy_(target).mul_(-2).mul_(output) | |||
grad_output.mul_(output.ne(0).float()) | |||
grad_output.div_(input.numel()) | |||
return grad_output,grad_output | |||
def Quantize(tensor,quant_mode='det', params=None, numBits=8): | |||
tensor.clamp_(-2**(numBits-1),2**(numBits-1)) | |||
if quant_mode=='det': | |||
tensor=tensor.mul(2**(numBits-1)).round().div(2**(numBits-1)) | |||
else: | |||
tensor=tensor.mul(2**(numBits-1)).round().add(torch.rand(tensor.size()).add(-0.5)).div(2**(numBits-1)) | |||
quant_fixed(tensor, params) | |||
return tensor | |||
#import torch.nn._functions as tnnf | |||
class BinarizeLinear(nn.Linear): | |||
def __init__(self, *kargs, **kwargs): | |||
super(BinarizeLinear, self).__init__(*kargs, **kwargs) | |||
def forward(self, input): | |||
# if input.size(1) != 784: | |||
# input.data=Binarize(input.data) | |||
if not hasattr(self.weight,'org'): | |||
self.weight.org=self.weight.data.clone() | |||
self.weight.data=Binarize(self.weight.org) | |||
out = nn.functional.linear(input, self.weight) | |||
if not self.bias is None: | |||
self.bias.org=self.bias.data.clone() | |||
out += self.bias.view(1, -1).expand_as(out) | |||
return out | |||
class BinarizeConv2d(nn.Conv2d): | |||
def __init__(self, *kargs, **kwargs): | |||
super(BinarizeConv2d, self).__init__(*kargs, **kwargs) | |||
def forward(self, input): | |||
# if input.size(1) != 3: | |||
# input.data = Binarize(input.data) | |||
if not hasattr(self.weight,'org'): | |||
self.weight.org=self.weight.data.clone() | |||
self.weight.data=Binarize(self.weight.org) | |||
out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||
self.padding, self.dilation, self.groups) | |||
if not self.bias is None: | |||
self.bias.org=self.bias.data.clone() | |||
out += self.bias.view(1, -1, 1, 1).expand_as(out) | |||
return out | |||
# x = torch.tensor([[255.0, 200.0, 201.0], [210.0, 222.0, 223.0]]) | |||
# print(Quantize(x,quant_mode='det', params=None, numBits=8)) |
@ -0,0 +1,217 @@ | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
import math | |||
__all__ = ['resnet'] | |||
def conv3x3(in_planes, out_planes, stride=1): | |||
"3x3 convolution with padding" | |||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
def init_model(model): | |||
for m in model.modules(): | |||
if isinstance(m, nn.Conv2d): | |||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||
m.weight.data.normal_(0, math.sqrt(2. / n)) | |||
elif isinstance(m, nn.BatchNorm2d): | |||
m.weight.data.fill_(1) | |||
m.bias.data.zero_() | |||
class BasicBlock(nn.Module): | |||
expansion = 1 | |||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
super(BasicBlock, self).__init__() | |||
self.conv1 = conv3x3(inplanes, planes, stride) | |||
self.bn1 = nn.BatchNorm2d(planes) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.conv2 = conv3x3(planes, planes) | |||
self.bn2 = nn.BatchNorm2d(planes) | |||
self.downsample = downsample | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.relu(out) | |||
out = self.conv2(out) | |||
out = self.bn2(out) | |||
if self.downsample is not None: | |||
residual = self.downsample(x) | |||
out += residual | |||
out = self.relu(out) | |||
return out | |||
class Bottleneck(nn.Module): | |||
expansion = 4 | |||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
super(Bottleneck, self).__init__() | |||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) | |||
self.bn1 = nn.BatchNorm2d(planes) | |||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
self.bn2 = nn.BatchNorm2d(planes) | |||
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) | |||
self.bn3 = nn.BatchNorm2d(planes * 4) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.downsample = downsample | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.relu(out) | |||
out = self.conv2(out) | |||
out = self.bn2(out) | |||
out = self.relu(out) | |||
out = self.conv3(out) | |||
out = self.bn3(out) | |||
if self.downsample is not None: | |||
residual = self.downsample(x) | |||
out += residual | |||
out = self.relu(out) | |||
return out | |||
class ResNet(nn.Module): | |||
def __init__(self): | |||
super(ResNet, self).__init__() | |||
def _make_layer(self, block, planes, blocks, stride=1): | |||
downsample = None | |||
if stride != 1 or self.inplanes != planes * block.expansion: | |||
downsample = nn.Sequential( | |||
nn.Conv2d(self.inplanes, planes * block.expansion, | |||
kernel_size=1, stride=stride, bias=False), | |||
nn.BatchNorm2d(planes * block.expansion), | |||
) | |||
layers = [] | |||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||
self.inplanes = planes * block.expansion | |||
for i in range(1, blocks): | |||
layers.append(block(self.inplanes, planes)) | |||
return nn.Sequential(*layers) | |||
def forward(self, x): | |||
x = self.conv1(x) | |||
x = self.bn1(x) | |||
x = self.relu(x) | |||
x = self.maxpool(x) | |||
x = self.layer1(x) | |||
x = self.layer2(x) | |||
x = self.layer3(x) | |||
x = self.layer4(x) | |||
x = self.avgpool(x) | |||
x = x.view(x.size(0), -1) | |||
x = self.fc(x) | |||
return x | |||
class ResNet_imagenet(ResNet): | |||
def __init__(self, num_classes=1000, | |||
block=Bottleneck, layers=[3, 4, 23, 3]): | |||
super(ResNet_imagenet, self).__init__() | |||
self.inplanes = 64 | |||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||
bias=False) | |||
self.bn1 = nn.BatchNorm2d(64) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||
self.layer1 = self._make_layer(block, 64, layers[0]) | |||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||
self.avgpool = nn.AvgPool2d(7) | |||
self.fc = nn.Linear(512 * block.expansion, num_classes) | |||
init_model(self) | |||
self.regime = { | |||
0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
'weight_decay': 1e-4, 'momentum': 0.9}, | |||
30: {'lr': 1e-2}, | |||
60: {'lr': 1e-3, 'weight_decay': 0}, | |||
90: {'lr': 1e-4} | |||
} | |||
class ResNet_cifar10(ResNet): | |||
def __init__(self, num_classes=10, | |||
block=BasicBlock, depth=18): | |||
super(ResNet_cifar10, self).__init__() | |||
self.inplanes = 16 | |||
n = int((depth - 2) / 6) | |||
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, | |||
bias=False) | |||
self.bn1 = nn.BatchNorm2d(16) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.maxpool = lambda x: x | |||
self.layer1 = self._make_layer(block, 16, n) | |||
self.layer2 = self._make_layer(block, 32, n, stride=2) | |||
self.layer3 = self._make_layer(block, 64, n, stride=2) | |||
self.layer4 = lambda x: x | |||
self.avgpool = nn.AvgPool2d(8) | |||
self.fc = nn.Linear(64, num_classes) | |||
init_model(self) | |||
self.regime = { | |||
0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
'weight_decay': 1e-4, 'momentum': 0.9}, | |||
81: {'lr': 1e-2}, | |||
122: {'lr': 1e-3, 'weight_decay': 0}, | |||
164: {'lr': 1e-4} | |||
} | |||
def resnet(**kwargs): | |||
num_classes, depth, dataset = map( | |||
kwargs.get, ['num_classes', 'depth', 'dataset']) | |||
if dataset == 'imagenet': | |||
num_classes = num_classes or 1000 | |||
depth = depth or 50 | |||
if depth == 18: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=BasicBlock, layers=[2, 2, 2, 2]) | |||
if depth == 34: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=BasicBlock, layers=[3, 4, 6, 3]) | |||
if depth == 50: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=Bottleneck, layers=[3, 4, 6, 3]) | |||
if depth == 101: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=Bottleneck, layers=[3, 4, 23, 3]) | |||
if depth == 152: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=Bottleneck, layers=[3, 8, 36, 3]) | |||
elif dataset == 'cifar10': | |||
num_classes = num_classes or 10 | |||
depth = depth or 18 #56 | |||
return ResNet_cifar10(num_classes=num_classes, | |||
block=BasicBlock, depth=depth) |
@ -0,0 +1,248 @@ | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
import math | |||
from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||
__all__ = ['resnet_binary'] | |||
def Binaryconv3x3(in_planes, out_planes, stride=1): | |||
"3x3 convolution with padding" | |||
return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
def conv3x3(in_planes, out_planes, stride=1): | |||
"3x3 convolution with padding" | |||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
def init_model(model): | |||
for m in model.modules(): | |||
if isinstance(m, BinarizeConv2d): | |||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||
m.weight.data.normal_(0, math.sqrt(2. / n)) | |||
elif isinstance(m, nn.BatchNorm2d): | |||
m.weight.data.fill_(1) | |||
m.bias.data.zero_() | |||
class BasicBlock(nn.Module): | |||
expansion = 1 | |||
def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||
super(BasicBlock, self).__init__() | |||
self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||
self.bn1 = nn.BatchNorm2d(planes) | |||
self.tanh1 = nn.Hardtanh(inplace=True) | |||
self.conv2 = Binaryconv3x3(planes, planes) | |||
self.tanh2 = nn.Hardtanh(inplace=True) | |||
self.bn2 = nn.BatchNorm2d(planes) | |||
self.downsample = downsample | |||
self.do_bntan=do_bntan; | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x.clone() | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.tanh1(out) | |||
out = self.conv2(out) | |||
if self.downsample is not None: | |||
if residual.data.max()>1: | |||
import pdb; pdb.set_trace() | |||
residual = self.downsample(residual) | |||
out += residual | |||
if self.do_bntan: | |||
out = self.bn2(out) | |||
out = self.tanh2(out) | |||
return out | |||
class Bottleneck(nn.Module): | |||
expansion = 4 | |||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
super(Bottleneck, self).__init__() | |||
self.conv1 = BinarizeConv2d(inplanes, planes, kernel_size=1, bias=False) | |||
self.bn1 = nn.BatchNorm2d(planes) | |||
self.conv2 = BinarizeConv2d(planes, planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
self.bn2 = nn.BatchNorm2d(planes) | |||
self.conv3 = BinarizeConv2d(planes, planes * 4, kernel_size=1, bias=False) | |||
self.bn3 = nn.BatchNorm2d(planes * 4) | |||
self.tanh = nn.Hardtanh(inplace=True) | |||
self.downsample = downsample | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x | |||
import pdb; pdb.set_trace() | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.tanh(out) | |||
out = self.conv2(out) | |||
out = self.bn2(out) | |||
out = self.tanh(out) | |||
out = self.conv3(out) | |||
out = self.bn3(out) | |||
if self.downsample is not None: | |||
residual = self.downsample(x) | |||
out += residual | |||
if self.do_bntan: | |||
out = self.bn2(out) | |||
out = self.tanh2(out) | |||
return out | |||
class ResNet(nn.Module): | |||
def __init__(self): | |||
super(ResNet, self).__init__() | |||
def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||
downsample = None | |||
if stride != 1 or self.inplanes != planes * block.expansion: | |||
downsample = nn.Sequential( | |||
BinarizeConv2d(self.inplanes, planes * block.expansion, | |||
kernel_size=1, stride=stride, bias=False), | |||
nn.BatchNorm2d(planes * block.expansion), | |||
) | |||
layers = [] | |||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||
self.inplanes = planes * block.expansion | |||
for i in range(1, blocks-1): | |||
layers.append(block(self.inplanes, planes)) | |||
layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||
return nn.Sequential(*layers) | |||
def forward(self, x): | |||
x = self.conv1(x) | |||
x = self.maxpool(x) | |||
x = self.bn1(x) | |||
x = self.tanh1(x) | |||
x = self.layer1(x) | |||
x = self.layer2(x) | |||
x = self.layer3(x) | |||
x = self.layer4(x) | |||
x = self.avgpool(x) | |||
x = x.view(x.size(0), -1) | |||
x = self.bn2(x) | |||
x = self.tanh2(x) | |||
x = self.fc(x) | |||
x = self.bn3(x) | |||
x = self.logsoftmax(x) | |||
return x | |||
class ResNet_imagenet(ResNet): | |||
def __init__(self, num_classes=1000, | |||
block=Bottleneck, layers=[3, 4, 23, 3]): | |||
super(ResNet_imagenet, self).__init__() | |||
self.inplanes = 64 | |||
self.conv1 = BinarizeConv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||
bias=False) | |||
self.bn1 = nn.BatchNorm2d(64) | |||
self.tanh = nn.Hardtanh(inplace=True) | |||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||
self.layer1 = self._make_layer(block, 64, layers[0]) | |||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||
self.avgpool = nn.AvgPool2d(7) | |||
self.fc = BinarizeLinear(512 * block.expansion, num_classes) | |||
init_model(self) | |||
self.regime = { | |||
0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
'weight_decay': 1e-4, 'momentum': 0.9}, | |||
30: {'lr': 1e-2}, | |||
60: {'lr': 1e-3, 'weight_decay': 0}, | |||
90: {'lr': 1e-4} | |||
} | |||
class ResNet_cifar10(ResNet): | |||
def __init__(self, num_classes=10, | |||
block=BasicBlock, depth=18): | |||
super(ResNet_cifar10, self).__init__() | |||
self.inflate = 5 | |||
self.inplanes = 16*self.inflate | |||
n = int((depth - 2) / 6) | |||
self.conv1 = BinarizeConv2d(3, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||
bias=False) | |||
self.maxpool = lambda x: x | |||
self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||
self.tanh1 = nn.Hardtanh(inplace=True) | |||
self.tanh2 = nn.Hardtanh(inplace=True) | |||
self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||
self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||
self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||
self.layer4 = lambda x: x | |||
self.avgpool = nn.AvgPool2d(8) | |||
self.bn2 = nn.BatchNorm1d(64*self.inflate) | |||
self.bn3 = nn.BatchNorm1d(10) | |||
self.logsoftmax = nn.LogSoftmax() | |||
self.fc = BinarizeLinear(64*self.inflate, num_classes) | |||
init_model(self) | |||
#self.regime = { | |||
# 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
# 'weight_decay': 1e-4, 'momentum': 0.9}, | |||
# 81: {'lr': 1e-4}, | |||
# 122: {'lr': 1e-5, 'weight_decay': 0}, | |||
# 164: {'lr': 1e-6} | |||
#} | |||
self.regime = { | |||
0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||
101: {'lr': 1e-3}, | |||
142: {'lr': 5e-4}, | |||
184: {'lr': 1e-4}, | |||
220: {'lr': 1e-5} | |||
} | |||
def resnet_binary(**kwargs): | |||
num_classes, depth, dataset = map( | |||
kwargs.get, ['num_classes', 'depth', 'dataset']) | |||
if dataset == 'imagenet': | |||
num_classes = num_classes or 1000 | |||
depth = depth or 50 | |||
if depth == 18: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=BasicBlock, layers=[2, 2, 2, 2]) | |||
if depth == 34: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=BasicBlock, layers=[3, 4, 6, 3]) | |||
if depth == 50: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=Bottleneck, layers=[3, 4, 6, 3]) | |||
if depth == 101: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=Bottleneck, layers=[3, 4, 23, 3]) | |||
if depth == 152: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=Bottleneck, layers=[3, 8, 36, 3]) | |||
elif dataset == 'cifar10': | |||
num_classes = num_classes or 10 | |||
depth = depth or 18 | |||
return ResNet_cifar10(num_classes=num_classes, | |||
block=BasicBlock, depth=depth) |
@ -0,0 +1,69 @@ | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
class AlexNetOWT_BN(nn.Module): | |||
def __init__(self, num_classes=1000): | |||
super(AlexNetOWT_BN, self).__init__() | |||
self.features = nn.Sequential( | |||
nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, | |||
bias=False), | |||
nn.BatchNorm2d(128), | |||
nn.ReLU(inplace=True), | |||
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), | |||
nn.MaxPool2d(kernel_size=2, stride=2), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(128), | |||
nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=False), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(256), | |||
nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||
nn.MaxPool2d(kernel_size=2, stride=2), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(256), | |||
nn.Conv2d(256, 512, kernel_size=3, padding=1, bias=False), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(512), | |||
nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=False), | |||
nn.MaxPool2d(kernel_size=2, stride=2), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(512), | |||
) | |||
self.classifier = nn.Sequential( | |||
nn.Linear(512 * 4 * 4, 1024, bias=False), | |||
nn.BatchNorm1d(1024), | |||
nn.ReLU(inplace=True), | |||
nn.Dropout(0.5), | |||
nn.Linear(1024, 1024, bias=False), | |||
nn.BatchNorm1d(1024), | |||
nn.ReLU(inplace=True), | |||
nn.Dropout(0.5), | |||
nn.Linear(1024, num_classes) | |||
nn.LogSoftMax() | |||
) | |||
self.regime = { | |||
0: {'optimizer': 'SGD', 'lr': 1e-2, | |||
'weight_decay': 5e-4, 'momentum': 0.9}, | |||
10: {'lr': 5e-3}, | |||
15: {'lr': 1e-3, 'weight_decay': 0}, | |||
20: {'lr': 5e-4}, | |||
25: {'lr': 1e-4} | |||
} | |||
def forward(self, x): | |||
x = self.features(x) | |||
x = x.view(-1, 512 * 4 * 4) | |||
x = self.classifier(x) | |||
return x | |||
def model(**kwargs): | |||
num_classes = kwargs.get( 'num_classes', 1000) | |||
return AlexNetOWT_BN(num_classes) |
@ -0,0 +1,80 @@ | |||
import torch | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
from torch.autograd import Function | |||
from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||
class VGG_Cifar10(nn.Module): | |||
def __init__(self, num_classes=1000): | |||
super(VGG_Cifar10, self).__init__() | |||
self.infl_ratio=3; | |||
self.features = nn.Sequential( | |||
BinarizeConv2d(3, 128*self.infl_ratio, kernel_size=3, stride=1, padding=1, | |||
bias=True), | |||
nn.BatchNorm2d(128*self.infl_ratio), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(128*self.infl_ratio, 128*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
nn.MaxPool2d(kernel_size=2, stride=2), | |||
nn.BatchNorm2d(128*self.infl_ratio), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(128*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
nn.BatchNorm2d(256*self.infl_ratio), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(256*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
nn.MaxPool2d(kernel_size=2, stride=2), | |||
nn.BatchNorm2d(256*self.infl_ratio), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(256*self.infl_ratio, 512*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
nn.BatchNorm2d(512*self.infl_ratio), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(512*self.infl_ratio, 512, kernel_size=3, padding=1, bias=True), | |||
nn.MaxPool2d(kernel_size=2, stride=2), | |||
nn.BatchNorm2d(512), | |||
nn.Hardtanh(inplace=True) | |||
) | |||
self.classifier = nn.Sequential( | |||
BinarizeLinear(512 * 4 * 4, 1024, bias=True), | |||
nn.BatchNorm1d(1024), | |||
nn.Hardtanh(inplace=True), | |||
#nn.Dropout(0.5), | |||
BinarizeLinear(1024, 1024, bias=True), | |||
nn.BatchNorm1d(1024), | |||
nn.Hardtanh(inplace=True), | |||
#nn.Dropout(0.5), | |||
BinarizeLinear(1024, num_classes, bias=True), | |||
nn.BatchNorm1d(num_classes, affine=False), | |||
nn.LogSoftmax() | |||
) | |||
self.regime = { | |||
0: {'optimizer': 'Adam', 'betas': (0.9, 0.999),'lr': 5e-3}, | |||
40: {'lr': 1e-3}, | |||
80: {'lr': 5e-4}, | |||
100: {'lr': 1e-4}, | |||
120: {'lr': 5e-5}, | |||
140: {'lr': 1e-5} | |||
} | |||
def forward(self, x): | |||
x = self.features(x) | |||
x = x.view(-1, 512 * 4 * 4) | |||
x = self.classifier(x) | |||
return x | |||
def vgg_cifar10_binary(**kwargs): | |||
num_classes = kwargs.get( 'num_classes', 10) | |||
return VGG_Cifar10(num_classes) |
@ -0,0 +1,198 @@ | |||
import torch | |||
import torchvision.transforms as transforms | |||
import random | |||
__imagenet_stats = {'mean': [0.485, 0.456, 0.406], | |||
'std': [0.229, 0.224, 0.225]} | |||
__imagenet_pca = { | |||
'eigval': torch.Tensor([0.2175, 0.0188, 0.0045]), | |||
'eigvec': torch.Tensor([ | |||
[-0.5675, 0.7192, 0.4009], | |||
[-0.5808, -0.0045, -0.8140], | |||
[-0.5836, -0.6948, 0.4203], | |||
]) | |||
} | |||
def scale_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||
t_list = [ | |||
transforms.CenterCrop(input_size), | |||
transforms.ToTensor(), | |||
transforms.Normalize(**normalize), | |||
] | |||
if scale_size != input_size: | |||
t_list = [transforms.Scale(scale_size)] + t_list | |||
return transforms.Compose(t_list) | |||
def scale_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||
t_list = [ | |||
transforms.RandomCrop(input_size), | |||
transforms.ToTensor(), | |||
transforms.Normalize(**normalize), | |||
] | |||
if scale_size != input_size: | |||
t_list = [transforms.Scale(scale_size)] + t_list | |||
transforms.Compose(t_list) | |||
def pad_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||
padding = int((scale_size - input_size) / 2) | |||
return transforms.Compose([ | |||
transforms.RandomCrop(input_size, padding=padding), | |||
transforms.RandomHorizontalFlip(), | |||
transforms.ToTensor(), | |||
transforms.Normalize(**normalize), | |||
]) | |||
def inception_preproccess(input_size, normalize=__imagenet_stats): | |||
return transforms.Compose([ | |||
transforms.RandomSizedCrop(input_size), | |||
transforms.RandomHorizontalFlip(), | |||
transforms.ToTensor(), | |||
transforms.Normalize(**normalize) | |||
]) | |||
def inception_color_preproccess(input_size, normalize=__imagenet_stats): | |||
return transforms.Compose([ | |||
transforms.RandomSizedCrop(input_size), | |||
transforms.RandomHorizontalFlip(), | |||
transforms.ToTensor(), | |||
ColorJitter( | |||
brightness=0.4, | |||
contrast=0.4, | |||
saturation=0.4, | |||
), | |||
Lighting(0.1, __imagenet_pca['eigval'], __imagenet_pca['eigvec']), | |||
transforms.Normalize(**normalize) | |||
]) | |||
def get_transform(name='imagenet', input_size=None, | |||
scale_size=None, normalize=None, augment=True): | |||
normalize = normalize or __imagenet_stats | |||
if name == 'imagenet': | |||
scale_size = scale_size or 256 | |||
input_size = input_size or 224 | |||
if augment: | |||
return inception_preproccess(input_size, normalize=normalize) | |||
else: | |||
return scale_crop(input_size=input_size, | |||
scale_size=scale_size, normalize=normalize) | |||
elif 'cifar' in name: | |||
input_size = input_size or 32 | |||
if augment: | |||
scale_size = scale_size or 40 | |||
return pad_random_crop(input_size, scale_size=scale_size, | |||
normalize=normalize) | |||
else: | |||
scale_size = scale_size or 32 | |||
return scale_crop(input_size=input_size, | |||
scale_size=scale_size, normalize=normalize) | |||
elif name == 'mnist': | |||
normalize = {'mean': [0.5], 'std': [0.5]} | |||
input_size = input_size or 28 | |||
if augment: | |||
scale_size = scale_size or 32 | |||
return pad_random_crop(input_size, scale_size=scale_size, | |||
normalize=normalize) | |||
else: | |||
scale_size = scale_size or 32 | |||
return scale_crop(input_size=input_size, | |||
scale_size=scale_size, normalize=normalize) | |||
class Lighting(object): | |||
"""Lighting noise(AlexNet - style PCA - based noise)""" | |||
def __init__(self, alphastd, eigval, eigvec): | |||
self.alphastd = alphastd | |||
self.eigval = eigval | |||
self.eigvec = eigvec | |||
def __call__(self, img): | |||
if self.alphastd == 0: | |||
return img | |||
alpha = img.new().resize_(3).normal_(0, self.alphastd) | |||
rgb = self.eigvec.type_as(img).clone()\ | |||
.mul(alpha.view(1, 3).expand(3, 3))\ | |||
.mul(self.eigval.view(1, 3).expand(3, 3))\ | |||
.sum(1).squeeze() | |||
return img.add(rgb.view(3, 1, 1).expand_as(img)) | |||
class Grayscale(object): | |||
def __call__(self, img): | |||
gs = img.clone() | |||
gs[0].mul_(0.299).add_(0.587, gs[1]).add_(0.114, gs[2]) | |||
gs[1].copy_(gs[0]) | |||
gs[2].copy_(gs[0]) | |||
return gs | |||
class Saturation(object): | |||
def __init__(self, var): | |||
self.var = var | |||
def __call__(self, img): | |||
gs = Grayscale()(img) | |||
alpha = random.uniform(0, self.var) | |||
return img.lerp(gs, alpha) | |||
class Brightness(object): | |||
def __init__(self, var): | |||
self.var = var | |||
def __call__(self, img): | |||
gs = img.new().resize_as_(img).zero_() | |||
alpha = random.uniform(0, self.var) | |||
return img.lerp(gs, alpha) | |||
class Contrast(object): | |||
def __init__(self, var): | |||
self.var = var | |||
def __call__(self, img): | |||
gs = Grayscale()(img) | |||
gs.fill_(gs.mean()) | |||
alpha = random.uniform(0, self.var) | |||
return img.lerp(gs, alpha) | |||
class RandomOrder(object): | |||
""" Composes several transforms together in random order. | |||
""" | |||
def __init__(self, transforms): | |||
self.transforms = transforms | |||
def __call__(self, img): | |||
if self.transforms is None: | |||
return img | |||
order = torch.randperm(len(self.transforms)) | |||
for i in order: | |||
img = self.transforms[i](img) | |||
return img | |||
class ColorJitter(RandomOrder): | |||
def __init__(self, brightness=0.4, contrast=0.4, saturation=0.4): | |||
self.transforms = [] | |||
if brightness != 0: | |||
self.transforms.append(Brightness(brightness)) | |||
if contrast != 0: | |||
self.transforms.append(Contrast(contrast)) | |||
if saturation != 0: | |||
self.transforms.append(Saturation(saturation)) |
@ -0,0 +1,5 @@ | |||
2021-04-15 15:36:47 - INFO - saving to ./results/2021-04-15_15-36-47 | |||
2021-04-15 15:36:47 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-36-47', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||
2021-04-15 15:36:47 - INFO - creating model alexnet | |||
2021-04-15 15:36:48 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||
2021-04-15 15:36:48 - INFO - number of parameters: 61110184 |
@ -0,0 +1,5 @@ | |||
2021-04-15 15:37:52 - INFO - saving to ./results/2021-04-15_15-37-52 | |||
2021-04-15 15:37:52 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='resnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-37-52', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||
2021-04-15 15:37:52 - INFO - creating model resnet | |||
2021-04-15 15:37:52 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||
2021-04-15 15:37:52 - INFO - number of parameters: 25557032 |
@ -0,0 +1,5 @@ | |||
2021-04-15 15:38:16 - INFO - saving to ./results/2021-04-15_15-38-16 | |||
2021-04-15 15:38:16 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-38-16', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||
2021-04-15 15:38:16 - INFO - creating model alexnet | |||
2021-04-15 15:38:17 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||
2021-04-15 15:38:17 - INFO - number of parameters: 61110184 |
@ -0,0 +1,160 @@ | |||
import os | |||
import torch | |||
import logging.config | |||
import shutil | |||
import pandas as pd | |||
from bokeh.io import output_file, save, show | |||
from bokeh.plotting import figure | |||
from bokeh.layouts import column | |||
#from bokeh.charts import Line, defaults | |||
# | |||
#defaults.width = 800 | |||
#defaults.height = 400 | |||
#defaults.tools = 'pan,box_zoom,wheel_zoom,box_select,hover,resize,reset,save' | |||
def setup_logging(log_file='log.txt'): | |||
"""Setup logging configuration | |||
""" | |||
logging.basicConfig(level=logging.DEBUG, | |||
format="%(asctime)s - %(levelname)s - %(message)s", | |||
datefmt="%Y-%m-%d %H:%M:%S", | |||
filename=log_file, | |||
filemode='w') | |||
console = logging.StreamHandler() | |||
console.setLevel(logging.INFO) | |||
formatter = logging.Formatter('%(message)s') | |||
console.setFormatter(formatter) | |||
logging.getLogger('').addHandler(console) | |||
class ResultsLog(object): | |||
def __init__(self, path='results.csv', plot_path=None): | |||
self.path = path | |||
self.plot_path = plot_path or (self.path + '.html') | |||
self.figures = [] | |||
self.results = None | |||
def add(self, **kwargs): | |||
df = pd.DataFrame([kwargs.values()], columns=kwargs.keys()) | |||
if self.results is None: | |||
self.results = df | |||
else: | |||
self.results = self.results.append(df, ignore_index=True) | |||
def save(self, title='Training Results'): | |||
if len(self.figures) > 0: | |||
if os.path.isfile(self.plot_path): | |||
os.remove(self.plot_path) | |||
output_file(self.plot_path, title=title) | |||
plot = column(*self.figures) | |||
save(plot) | |||
self.figures = [] | |||
self.results.to_csv(self.path, index=False, index_label=False) | |||
def load(self, path=None): | |||
path = path or self.path | |||
if os.path.isfile(path): | |||
self.results.read_csv(path) | |||
def show(self): | |||
if len(self.figures) > 0: | |||
plot = column(*self.figures) | |||
show(plot) | |||
#def plot(self, *kargs, **kwargs): | |||
# line = Line(data=self.results, *kargs, **kwargs) | |||
# self.figures.append(line) | |||
def image(self, *kargs, **kwargs): | |||
fig = figure() | |||
fig.image(*kargs, **kwargs) | |||
self.figures.append(fig) | |||
def save_checkpoint(state, is_best, path='.', filename='checkpoint.pth.tar', save_all=False): | |||
filename = os.path.join(path, filename) | |||
torch.save(state, filename) | |||
if is_best: | |||
shutil.copyfile(filename, os.path.join(path, 'model_best.pth.tar')) | |||
if save_all: | |||
shutil.copyfile(filename, os.path.join( | |||
path, 'checkpoint_epoch_%s.pth.tar' % state['epoch'])) | |||
class AverageMeter(object): | |||
"""Computes and stores the average and current value""" | |||
def __init__(self): | |||
self.reset() | |||
def reset(self): | |||
self.val = 0 | |||
self.avg = 0 | |||
self.sum = 0 | |||
self.count = 0 | |||
def update(self, val, n=1): | |||
self.val = val | |||
self.sum += val * n | |||
self.count += n | |||
self.avg = self.sum / self.count | |||
__optimizers = { | |||
'SGD': torch.optim.SGD, | |||
'ASGD': torch.optim.ASGD, | |||
'Adam': torch.optim.Adam, | |||
'Adamax': torch.optim.Adamax, | |||
'Adagrad': torch.optim.Adagrad, | |||
'Adadelta': torch.optim.Adadelta, | |||
'Rprop': torch.optim.Rprop, | |||
'RMSprop': torch.optim.RMSprop | |||
} | |||
def adjust_optimizer(optimizer, epoch, config): | |||
"""Reconfigures the optimizer according to epoch and config dict""" | |||
def modify_optimizer(optimizer, setting): | |||
if 'optimizer' in setting: | |||
optimizer = __optimizers[setting['optimizer']]( | |||
optimizer.param_groups) | |||
logging.debug('OPTIMIZER - setting method = %s' % | |||
setting['optimizer']) | |||
for param_group in optimizer.param_groups: | |||
for key in param_group.keys(): | |||
if key in setting: | |||
logging.debug('OPTIMIZER - setting %s = %s' % | |||
(key, setting[key])) | |||
param_group[key] = setting[key] | |||
return optimizer | |||
if callable(config): | |||
optimizer = modify_optimizer(optimizer, config(epoch)) | |||
else: | |||
for e in range(epoch + 1): # run over all epochs - sticky setting | |||
if e in config: | |||
optimizer = modify_optimizer(optimizer, config[e]) | |||
return optimizer | |||
def accuracy(output, target, topk=(1,)): | |||
"""Computes the precision@k for the specified values of k""" | |||
maxk = max(topk) | |||
batch_size = target.size(0) | |||
_, pred = output.float().topk(maxk, 1, True, True) | |||
pred = pred.t() | |||
correct = pred.eq(target.view(1, -1).expand_as(pred)) | |||
res = [] | |||
for k in topk: | |||
correct_k = correct[:k].view(-1).float().sum(0) | |||
res.append(correct_k.mul_(100.0 / batch_size)) | |||
return res | |||
# kernel_img = model.features[0][0].kernel.data.clone() | |||
# kernel_img.add_(-kernel_img.min()) | |||
# kernel_img.mul_(255 / kernel_img.max()) | |||
# save_image(kernel_img, 'kernel%s.jpg' % epoch) |
@ -0,0 +1,8 @@ | |||
# BNN.pytorch | |||
Binarized Neural Network (BNN) for pytorch | |||
This is the pytorch version for the BNN code, fro VGG and resnet models | |||
Link to the paper: https://papers.nips.cc/paper/6573-binarized-neural-networks | |||
The code is based on https://github.com/eladhoffer/convNet.pytorch | |||
Please install torch and torchvision by following the instructions at: http://pytorch.org/ | |||
To run resnet18 for cifar10 dataset use: python main_binary.py --model resnet_binary --save resnet18_binary --dataset cifar10 |
@ -0,0 +1,37 @@ | |||
import os | |||
import torchvision.datasets as datasets | |||
import torchvision.transforms as transforms | |||
_DATASETS_MAIN_PATH = '/home/Datasets' | |||
_dataset_path = { | |||
'cifar10': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR10'), | |||
'cifar100': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR100'), | |||
'stl10': os.path.join(_DATASETS_MAIN_PATH, 'STL10'), | |||
'mnist': os.path.join(_DATASETS_MAIN_PATH, 'MNIST'), | |||
'imagenet': { | |||
'train': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/train'), | |||
'val': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/val') | |||
} | |||
} | |||
def get_dataset(name, split='train', transform=None, | |||
target_transform=None, download=True): | |||
train = (split == 'train') | |||
if name == 'cifar10': | |||
return datasets.CIFAR10(root=_dataset_path['cifar10'], | |||
train=train, | |||
transform=transform, | |||
target_transform=target_transform, | |||
download=download) | |||
elif name == 'cifar100': | |||
return datasets.CIFAR100(root=_dataset_path['cifar100'], | |||
train=train, | |||
transform=transform, | |||
target_transform=target_transform, | |||
download=download) | |||
elif name == 'imagenet': | |||
path = _dataset_path[name][split] | |||
return datasets.ImageFolder(root=path, | |||
transform=transform, | |||
target_transform=target_transform) |
@ -0,0 +1,309 @@ | |||
import argparse | |||
import os | |||
import time | |||
import logging | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.parallel | |||
import torch.backends.cudnn as cudnn | |||
import torch.optim | |||
import torch.utils.data | |||
import models | |||
from torch.autograd import Variable | |||
from data import get_dataset | |||
from preprocess import get_transform | |||
from utils import * | |||
from datetime import datetime | |||
from ast import literal_eval | |||
from torchvision.utils import save_image | |||
model_names = sorted(name for name in models.__dict__ | |||
if name.islower() and not name.startswith("__") | |||
and callable(models.__dict__[name])) | |||
<<<<<<< HEAD | |||
print(model_names) | |||
======= | |||
>>>>>>> 0d30f7b8e44285531022cdc05b2c11c22db27e3a | |||
parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||
parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='./results', | |||
help='results dir') | |||
parser.add_argument('--save', metavar='SAVE', default='', | |||
help='saved folder') | |||
parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||
help='dataset name or folder') | |||
parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||
choices=model_names, | |||
help='model architecture: ' + | |||
' | '.join(model_names) + | |||
' (default: alexnet)') | |||
parser.add_argument('--input_size', type=int, default=None, | |||
help='image input size') | |||
parser.add_argument('--model_config', default='', | |||
help='additional architecture configuration') | |||
parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||
help='type of tensor - e.g torch.cuda.HalfTensor') | |||
parser.add_argument('--gpus', default='0', | |||
help='gpus used for training - e.g 0,1,3') | |||
parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||
help='number of data loading workers (default: 8)') | |||
parser.add_argument('--epochs', default=2500, type=int, metavar='N', | |||
help='number of total epochs to run') | |||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||
help='manual epoch number (useful on restarts)') | |||
parser.add_argument('-b', '--batch-size', default=256, type=int, | |||
metavar='N', help='mini-batch size (default: 256)') | |||
parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||
help='optimizer function used') | |||
parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||
metavar='LR', help='initial learning rate') | |||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||
help='momentum') | |||
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||
metavar='W', help='weight decay (default: 1e-4)') | |||
parser.add_argument('--print-freq', '-p', default=10, type=int, | |||
metavar='N', help='print frequency (default: 10)') | |||
parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||
help='path to latest checkpoint (default: none)') | |||
parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||
help='evaluate model FILE on validation set') | |||
def main(): | |||
global args, best_prec1 | |||
best_prec1 = 0 | |||
args = parser.parse_args() | |||
if args.evaluate: | |||
args.results_dir = '/tmp' | |||
if args.save is '': | |||
args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||
save_path = os.path.join(args.results_dir, args.save) | |||
if not os.path.exists(save_path): | |||
os.makedirs(save_path) | |||
setup_logging(os.path.join(save_path, 'log.txt')) | |||
results_file = os.path.join(save_path, 'results.%s') | |||
results = ResultsLog(results_file % 'csv', results_file % 'html') | |||
logging.info("saving to %s", save_path) | |||
logging.debug("run arguments: %s", args) | |||
if 'cuda' in args.type: | |||
args.gpus = [int(i) for i in args.gpus.split(',')] | |||
torch.cuda.set_device(args.gpus[0]) | |||
cudnn.benchmark = True | |||
else: | |||
args.gpus = None | |||
# create model | |||
logging.info("creating model %s", args.model) | |||
model = models.__dict__[args.model] | |||
model_config = {'input_size': args.input_size, 'dataset': args.dataset} | |||
if args.model_config is not '': | |||
model_config = dict(model_config, **literal_eval(args.model_config)) | |||
model = model(**model_config) | |||
logging.info("created model with configuration: %s", model_config) | |||
# optionally resume from a checkpoint | |||
if args.evaluate: | |||
if not os.path.isfile(args.evaluate): | |||
parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||
checkpoint = torch.load(args.evaluate) | |||
model.load_state_dict(checkpoint['state_dict']) | |||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||
args.evaluate, checkpoint['epoch']) | |||
elif args.resume: | |||
checkpoint_file = args.resume | |||
if os.path.isdir(checkpoint_file): | |||
results.load(os.path.join(checkpoint_file, 'results.csv')) | |||
checkpoint_file = os.path.join( | |||
checkpoint_file, 'model_best.pth.tar') | |||
if os.path.isfile(checkpoint_file): | |||
logging.info("loading checkpoint '%s'", args.resume) | |||
checkpoint = torch.load(checkpoint_file) | |||
args.start_epoch = checkpoint['epoch'] - 1 | |||
best_prec1 = checkpoint['best_prec1'] | |||
model.load_state_dict(checkpoint['state_dict']) | |||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||
checkpoint_file, checkpoint['epoch']) | |||
else: | |||
logging.error("no checkpoint found at '%s'", args.resume) | |||
num_parameters = sum([l.nelement() for l in model.parameters()]) | |||
logging.info("number of parameters: %d", num_parameters) | |||
# Data loading code | |||
default_transform = { | |||
'train': get_transform(args.dataset, | |||
input_size=args.input_size, augment=True), | |||
'eval': get_transform(args.dataset, | |||
input_size=args.input_size, augment=False) | |||
} | |||
transform = getattr(model, 'input_transform', default_transform) | |||
regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||
'lr': args.lr, | |||
'momentum': args.momentum, | |||
'weight_decay': args.weight_decay}}) | |||
# define loss function (criterion) and optimizer | |||
criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)() | |||
criterion.type(args.type) | |||
model.type(args.type) | |||
val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||
val_loader = torch.utils.data.DataLoader( | |||
val_data, | |||
batch_size=args.batch_size, shuffle=False, | |||
num_workers=args.workers, pin_memory=True) | |||
if args.evaluate: | |||
validate(val_loader, model, criterion, 0) | |||
return | |||
train_data = get_dataset(args.dataset, 'train', transform['train']) | |||
train_loader = torch.utils.data.DataLoader( | |||
train_data, | |||
batch_size=args.batch_size, shuffle=True, | |||
num_workers=args.workers, pin_memory=True) | |||
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||
logging.info('training regime: %s', regime) | |||
for epoch in range(args.start_epoch, args.epochs): | |||
optimizer = adjust_optimizer(optimizer, epoch, regime) | |||
# train for one epoch | |||
train_loss, train_prec1, train_prec5 = train( | |||
train_loader, model, criterion, epoch, optimizer) | |||
# evaluate on validation set | |||
val_loss, val_prec1, val_prec5 = validate( | |||
val_loader, model, criterion, epoch) | |||
# remember best prec@1 and save checkpoint | |||
is_best = val_prec1 > best_prec1 | |||
best_prec1 = max(val_prec1, best_prec1) | |||
save_checkpoint({ | |||
'epoch': epoch + 1, | |||
'model': args.model, | |||
'config': args.model_config, | |||
'state_dict': model.state_dict(), | |||
'best_prec1': best_prec1, | |||
'regime': regime | |||
}, is_best, path=save_path) | |||
logging.info('\n Epoch: {0}\t' | |||
'Training Loss {train_loss:.4f} \t' | |||
'Training Prec@1 {train_prec1:.3f} \t' | |||
'Training Prec@5 {train_prec5:.3f} \t' | |||
'Validation Loss {val_loss:.4f} \t' | |||
'Validation Prec@1 {val_prec1:.3f} \t' | |||
'Validation Prec@5 {val_prec5:.3f} \n' | |||
.format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
train_prec1=train_prec1, val_prec1=val_prec1, | |||
train_prec5=train_prec5, val_prec5=val_prec5)) | |||
results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||
train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||
#results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||
# title='Loss', ylabel='loss') | |||
#results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||
# title='Error@1', ylabel='error %') | |||
#results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||
# title='Error@5', ylabel='error %') | |||
results.save() | |||
def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||
if args.gpus and len(args.gpus) > 1: | |||
model = torch.nn.DataParallel(model, args.gpus) | |||
batch_time = AverageMeter() | |||
data_time = AverageMeter() | |||
losses = AverageMeter() | |||
top1 = AverageMeter() | |||
top5 = AverageMeter() | |||
end = time.time() | |||
for i, (inputs, target) in enumerate(data_loader): | |||
# measure data loading time | |||
data_time.update(time.time() - end) | |||
if args.gpus is not None: | |||
target = target.cuda() | |||
if not training: | |||
with torch.no_grad(): | |||
input_var = Variable(inputs.type(args.type), volatile=not training) | |||
target_var = Variable(target) | |||
# compute output | |||
output = model(input_var) | |||
else: | |||
input_var = Variable(inputs.type(args.type), volatile=not training) | |||
target_var = Variable(target) | |||
# compute output | |||
output = model(input_var) | |||
loss = criterion(output, target_var) | |||
if type(output) is list: | |||
output = output[0] | |||
# measure accuracy and record loss | |||
prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||
losses.update(loss.item(), inputs.size(0)) | |||
top1.update(prec1.item(), inputs.size(0)) | |||
top5.update(prec5.item(), inputs.size(0)) | |||
if training: | |||
# compute gradient and do SGD step | |||
optimizer.zero_grad() | |||
loss.backward() | |||
for p in list(model.parameters()): | |||
if hasattr(p,'org'): | |||
p.data.copy_(p.org) | |||
optimizer.step() | |||
for p in list(model.parameters()): | |||
if hasattr(p,'org'): | |||
p.org.copy_(p.data.clamp_(-1,1)) | |||
# measure elapsed time | |||
batch_time.update(time.time() - end) | |||
end = time.time() | |||
if i % args.print_freq == 0: | |||
logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||
'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||
epoch, i, len(data_loader), | |||
phase='TRAINING' if training else 'EVALUATING', | |||
batch_time=batch_time, | |||
data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||
return losses.avg, top1.avg, top5.avg | |||
def train(data_loader, model, criterion, epoch, optimizer): | |||
# switch to train mode | |||
model.train() | |||
return forward(data_loader, model, criterion, epoch, | |||
training=True, optimizer=optimizer) | |||
def validate(data_loader, model, criterion, epoch): | |||
# switch to evaluate mode | |||
model.eval() | |||
return forward(data_loader, model, criterion, epoch, | |||
training=False, optimizer=None) | |||
if __name__ == '__main__': | |||
main() |
@ -0,0 +1,332 @@ | |||
import argparse | |||
import os | |||
import time | |||
import logging | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.parallel | |||
import torch.backends.cudnn as cudnn | |||
import torch.optim | |||
import torch.utils.data | |||
import models | |||
from torch.autograd import Variable | |||
from data import get_dataset | |||
from preprocess import get_transform | |||
from utils import * | |||
from datetime import datetime | |||
from ast import literal_eval | |||
from torchvision.utils import save_image | |||
from models.binarized_modules import HingeLoss | |||
model_names = sorted(name for name in models.__dict__ | |||
if name.islower() and not name.startswith("__") | |||
and callable(models.__dict__[name])) | |||
parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||
parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='/media/hdd/ihubara/BinaryNet.pytorch/results', | |||
help='results dir') | |||
parser.add_argument('--save', metavar='SAVE', default='', | |||
help='saved folder') | |||
parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||
help='dataset name or folder') | |||
parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||
choices=model_names, | |||
help='model architecture: ' + | |||
' | '.join(model_names) + | |||
' (default: alexnet)') | |||
parser.add_argument('--input_size', type=int, default=None, | |||
help='image input size') | |||
parser.add_argument('--model_config', default='', | |||
help='additional architecture configuration') | |||
parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||
help='type of tensor - e.g torch.cuda.HalfTensor') | |||
parser.add_argument('--gpus', default='0', | |||
help='gpus used for training - e.g 0,1,3') | |||
parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||
help='number of data loading workers (default: 8)') | |||
parser.add_argument('--epochs', default=900, type=int, metavar='N', | |||
help='number of total epochs to run') | |||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||
help='manual epoch number (useful on restarts)') | |||
parser.add_argument('-b', '--batch-size', default=256, type=int, | |||
metavar='N', help='mini-batch size (default: 256)') | |||
parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||
help='optimizer function used') | |||
parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||
metavar='LR', help='initial learning rate') | |||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||
help='momentum') | |||
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||
metavar='W', help='weight decay (default: 1e-4)') | |||
parser.add_argument('--print-freq', '-p', default=10, type=int, | |||
metavar='N', help='print frequency (default: 10)') | |||
parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||
help='path to latest checkpoint (default: none)') | |||
parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||
help='evaluate model FILE on validation set') | |||
torch.cuda.random.manual_seed_all(10) | |||
output_dim = 0 | |||
def main(): | |||
global args, best_prec1, output_dim | |||
best_prec1 = 0 | |||
args = parser.parse_args() | |||
output_dim = {'cifar10': 10, 'cifar100':100, 'imagenet': 1000}[args.dataset] | |||
#import pdb; pdb.set_trace() | |||
#torch.save(args.batch_size/(len(args.gpus)/2+1),'multi_gpu_batch_size') | |||
if args.evaluate: | |||
args.results_dir = '/tmp' | |||
if args.save is '': | |||
args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||
save_path = os.path.join(args.results_dir, args.save) | |||
if not os.path.exists(save_path): | |||
os.makedirs(save_path) | |||
setup_logging(os.path.join(save_path, 'log.txt')) | |||
results_file = os.path.join(save_path, 'results.%s') | |||
results = ResultsLog(results_file % 'csv', results_file % 'html') | |||
logging.info("saving to %s", save_path) | |||
logging.debug("run arguments: %s", args) | |||
if 'cuda' in args.type: | |||
args.gpus = [int(i) for i in args.gpus.split(',')] | |||
torch.cuda.set_device(args.gpus[0]) | |||
cudnn.benchmark = True | |||
else: | |||
args.gpus = None | |||
# create model | |||
logging.info("creating model %s", args.model) | |||
model = models.__dict__[args.model] | |||
model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': output_dim} | |||
if args.model_config is not '': | |||
model_config = dict(model_config, **literal_eval(args.model_config)) | |||
model = model(**model_config) | |||
logging.info("created model with configuration: %s", model_config) | |||
# optionally resume from a checkpoint | |||
if args.evaluate: | |||
if not os.path.isfile(args.evaluate): | |||
parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||
checkpoint = torch.load(args.evaluate) | |||
model.load_state_dict(checkpoint['state_dict']) | |||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||
args.evaluate, checkpoint['epoch']) | |||
elif args.resume: | |||
checkpoint_file = args.resume | |||
if os.path.isdir(checkpoint_file): | |||
results.load(os.path.join(checkpoint_file, 'results.csv')) | |||
checkpoint_file = os.path.join( | |||
checkpoint_file, 'model_best.pth.tar') | |||
if os.path.isfile(checkpoint_file): | |||
logging.info("loading checkpoint '%s'", args.resume) | |||
checkpoint = torch.load(checkpoint_file) | |||
args.start_epoch = checkpoint['epoch'] - 1 | |||
best_prec1 = checkpoint['best_prec1'] | |||
model.load_state_dict(checkpoint['state_dict']) | |||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||
checkpoint_file, checkpoint['epoch']) | |||
else: | |||
logging.error("no checkpoint found at '%s'", args.resume) | |||
num_parameters = sum([l.nelement() for l in model.parameters()]) | |||
logging.info("number of parameters: %d", num_parameters) | |||
# Data loading code | |||
default_transform = { | |||
'train': get_transform(args.dataset, | |||
input_size=args.input_size, augment=True), | |||
'eval': get_transform(args.dataset, | |||
input_size=args.input_size, augment=False) | |||
} | |||
transform = getattr(model, 'input_transform', default_transform) | |||
regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||
'lr': args.lr, | |||
'momentum': args.momentum, | |||
'weight_decay': args.weight_decay}}) | |||
# define loss function (criterion) and optimizer | |||
#criterion = getattr(model, 'criterion', nn.NLLLoss)() | |||
criterion = getattr(model, 'criterion', HingeLoss)() | |||
#criterion.type(args.type) | |||
model.type(args.type) | |||
val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||
val_loader = torch.utils.data.DataLoader( | |||
val_data, | |||
batch_size=args.batch_size, shuffle=False, | |||
num_workers=args.workers, pin_memory=True) | |||
if args.evaluate: | |||
validate(val_loader, model, criterion, 0) | |||
return | |||
train_data = get_dataset(args.dataset, 'train', transform['train']) | |||
train_loader = torch.utils.data.DataLoader( | |||
train_data, | |||
batch_size=args.batch_size, shuffle=True, | |||
num_workers=args.workers, pin_memory=True) | |||
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||
logging.info('training regime: %s', regime) | |||
#import pdb; pdb.set_trace() | |||
#search_binarized_modules(model) | |||
for epoch in range(args.start_epoch, args.epochs): | |||
optimizer = adjust_optimizer(optimizer, epoch, regime) | |||
# train for one epoch | |||
train_loss, train_prec1, train_prec5 = train( | |||
train_loader, model, criterion, epoch, optimizer) | |||
# evaluate on validation set | |||
val_loss, val_prec1, val_prec5 = validate( | |||
val_loader, model, criterion, epoch) | |||
# remember best prec@1 and save checkpoint | |||
is_best = val_prec1 > best_prec1 | |||
best_prec1 = max(val_prec1, best_prec1) | |||
save_checkpoint({ | |||
'epoch': epoch + 1, | |||
'model': args.model, | |||
'config': args.model_config, | |||
'state_dict': model.state_dict(), | |||
'best_prec1': best_prec1, | |||
'regime': regime | |||
}, is_best, path=save_path) | |||
logging.info('\n Epoch: {0}\t' | |||
'Training Loss {train_loss:.4f} \t' | |||
'Training Prec@1 {train_prec1:.3f} \t' | |||
'Training Prec@5 {train_prec5:.3f} \t' | |||
'Validation Loss {val_loss:.4f} \t' | |||
'Validation Prec@1 {val_prec1:.3f} \t' | |||
'Validation Prec@5 {val_prec5:.3f} \n' | |||
.format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
train_prec1=train_prec1, val_prec1=val_prec1, | |||
train_prec5=train_prec5, val_prec5=val_prec5)) | |||
results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||
train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||
train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||
results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||
title='Loss', ylabel='loss') | |||
results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||
title='Error@1', ylabel='error %') | |||
results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||
title='Error@5', ylabel='error %') | |||
results.save() | |||
def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||
if args.gpus and len(args.gpus) > 1: | |||
model = torch.nn.DataParallel(model, args.gpus) | |||
batch_time = AverageMeter() | |||
data_time = AverageMeter() | |||
losses = AverageMeter() | |||
top1 = AverageMeter() | |||
top5 = AverageMeter() | |||
end = time.time() | |||
for i, (inputs, target) in enumerate(data_loader): | |||
# measure data loading time | |||
data_time.update(time.time() - end) | |||
if args.gpus is not None: | |||
target = target.cuda() | |||
#import pdb; pdb.set_trace() | |||
if criterion.__class__.__name__=='HingeLoss': | |||
target=target.unsqueeze(1) | |||
target_onehot = torch.cuda.FloatTensor(target.size(0), output_dim) | |||
target_onehot.fill_(-1) | |||
target_onehot.scatter_(1, target, 1) | |||
target=target.squeeze() | |||
if not training: | |||
with torch.no_grad(): | |||
input_var = Variable(inputs.type(args.type)) | |||
target_var = Variable(target_onehot) | |||
# compute output | |||
output = model(input_var) | |||
else: | |||
input_var = Variable(inputs.type(args.type)) | |||
target_var = Variable(target_onehot) | |||
# compute output | |||
output = model(input_var) | |||
#import pdb; pdb.set_trace() | |||
loss = criterion(output, target_onehot) | |||
#import pdb; pdb.set_trace() | |||
if type(output) is list: | |||
output = output[0] | |||
# measure accuracy and record loss | |||
prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||
losses.update(loss.item(), inputs.size(0)) | |||
top1.update(prec1.item(), inputs.size(0)) | |||
top5.update(prec5.item(), inputs.size(0)) | |||
#import pdb; pdb.set_trace() | |||
#if not training and top1.avg<15: | |||
# import pdb; pdb.set_trace() | |||
if training: | |||
# compute gradient and do SGD step | |||
optimizer.zero_grad() | |||
#add backwoed hook | |||
loss.backward() | |||
for p in list(model.parameters()): | |||
#import pdb; pdb.set_trace() | |||
if hasattr(p,'org'): | |||
#print('before:', p[0][0]) | |||
#gm=max(p.grad.data.max(),-p.grad.data.min()) | |||
#p.grad=p.grad.div(gm+1) | |||
p.data.copy_(p.org) | |||
#print('after:', p[0][0]) | |||
optimizer.step() | |||
for p in list(model.parameters()): | |||
#import pdb; pdb.set_trace() | |||
if hasattr(p,'org'): | |||
#print('before:', p[0][0]) | |||
p.org.copy_(p.data.clamp_(-1,1)) | |||
#if epoch>30: | |||
# import pdb; pdb.set_trace() | |||
# measure elapsed time | |||
batch_time.update(time.time() - end) | |||
end = time.time() | |||
if i % args.print_freq == 0: | |||
logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||
'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||
epoch, i, len(data_loader), | |||
phase='TRAINING' if training else 'EVALUATING', | |||
batch_time=batch_time, | |||
data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||
return losses.avg, top1.avg, top5.avg | |||
def train(data_loader, model, criterion, epoch, optimizer): | |||
# switch to train mode | |||
model.train() | |||
return forward(data_loader, model, criterion, epoch, | |||
training=True, optimizer=optimizer) | |||
def validate(data_loader, model, criterion, epoch): | |||
# switch to evaluate mode | |||
model.eval() | |||
return forward(data_loader, model, criterion, epoch, | |||
training=False, optimizer=None) | |||
if __name__ == '__main__': | |||
main() |
@ -0,0 +1,150 @@ | |||
from __future__ import print_function | |||
import argparse | |||
import torch | |||
import torch.nn as nn | |||
import torch.nn.functional as F | |||
import torch.optim as optim | |||
from torchvision import datasets, transforms | |||
from torch.autograd import Variable | |||
from models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||
from models.binarized_modules import Binarize,HingeLoss | |||
# Training settings | |||
parser = argparse.ArgumentParser(description='PyTorch MNIST Example') | |||
parser.add_argument('--batch-size', type=int, default=64, metavar='N', | |||
help='input batch size for training (default: 256)') | |||
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', | |||
help='input batch size for testing (default: 1000)') | |||
parser.add_argument('--epochs', type=int, default=100, metavar='N', | |||
help='number of epochs to train (default: 10)') | |||
parser.add_argument('--lr', type=float, default=0.01, metavar='LR', | |||
help='learning rate (default: 0.001)') | |||
parser.add_argument('--momentum', type=float, default=0.5, metavar='M', | |||
help='SGD momentum (default: 0.5)') | |||
parser.add_argument('--no-cuda', action='store_true', default=False, | |||
help='disables CUDA training') | |||
parser.add_argument('--seed', type=int, default=1, metavar='S', | |||
help='random seed (default: 1)') | |||
parser.add_argument('--gpus', default=3, | |||
help='gpus used for training - e.g 0,1,3') | |||
parser.add_argument('--log-interval', type=int, default=10, metavar='N', | |||
help='how many batches to wait before logging training status') | |||
args = parser.parse_args() | |||
args.cuda = not args.no_cuda and torch.cuda.is_available() | |||
torch.manual_seed(args.seed) | |||
if args.cuda: | |||
torch.cuda.manual_seed(args.seed) | |||
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} | |||
train_loader = torch.utils.data.DataLoader( | |||
datasets.MNIST('../data', train=True, download=True, | |||
transform=transforms.Compose([ | |||
transforms.ToTensor(), | |||
transforms.Normalize((0.1307,), (0.3081,)) | |||
])), | |||
batch_size=args.batch_size, shuffle=True, **kwargs) | |||
test_loader = torch.utils.data.DataLoader( | |||
datasets.MNIST('../data', train=False, transform=transforms.Compose([ | |||
transforms.ToTensor(), | |||
transforms.Normalize((0.1307,), (0.3081,)) | |||
])), | |||
batch_size=args.test_batch_size, shuffle=True, **kwargs) | |||
class Net(nn.Module): | |||
def __init__(self): | |||
super(Net, self).__init__() | |||
self.infl_ratio=3 | |||
self.fc1 = BinarizeLinear(784, 2048*self.infl_ratio) | |||
self.htanh1 = nn.Hardtanh() | |||
self.bn1 = nn.BatchNorm1d(2048*self.infl_ratio) | |||
self.fc2 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||
self.htanh2 = nn.Hardtanh() | |||
self.bn2 = nn.BatchNorm1d(2048*self.infl_ratio) | |||
self.fc3 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||
self.htanh3 = nn.Hardtanh() | |||
self.bn3 = nn.BatchNorm1d(2048*self.infl_ratio) | |||
self.fc4 = nn.Linear(2048*self.infl_ratio, 10) | |||
self.logsoftmax=nn.LogSoftmax() | |||
self.drop=nn.Dropout(0.5) | |||
def forward(self, x): | |||
x = x.view(-1, 28*28) | |||
x = self.fc1(x) | |||
x = self.bn1(x) | |||
x = self.htanh1(x) | |||
x = self.fc2(x) | |||
x = self.bn2(x) | |||
x = self.htanh2(x) | |||
x = self.fc3(x) | |||
x = self.drop(x) | |||
x = self.bn3(x) | |||
x = self.htanh3(x) | |||
x = self.fc4(x) | |||
return self.logsoftmax(x) | |||
model = Net() | |||
if args.cuda: | |||
torch.cuda.set_device(3) | |||
model.cuda() | |||
criterion = nn.CrossEntropyLoss() | |||
optimizer = optim.Adam(model.parameters(), lr=args.lr) | |||
def train(epoch): | |||
model.train() | |||
for batch_idx, (data, target) in enumerate(train_loader): | |||
if args.cuda: | |||
data, target = data.cuda(), target.cuda() | |||
data, target = Variable(data), Variable(target) | |||
optimizer.zero_grad() | |||
output = model(data) | |||
loss = criterion(output, target) | |||
if epoch%40==0: | |||
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 | |||
optimizer.zero_grad() | |||
loss.backward() | |||
for p in list(model.parameters()): | |||
if hasattr(p,'org'): | |||
p.data.copy_(p.org) | |||
optimizer.step() | |||
for p in list(model.parameters()): | |||
if hasattr(p,'org'): | |||
p.org.copy_(p.data.clamp_(-1,1)) | |||
if batch_idx % args.log_interval == 0: | |||
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( | |||
epoch, batch_idx * len(data), len(train_loader.dataset), | |||
100. * batch_idx / len(train_loader), loss.item())) | |||
def test(): | |||
model.eval() | |||
test_loss = 0 | |||
correct = 0 | |||
with torch.no_grad(): | |||
for data, target in test_loader: | |||
if args.cuda: | |||
data, target = data.cuda(), target.cuda() | |||
data, target = Variable(data), Variable(target) | |||
output = model(data) | |||
test_loss += criterion(output, target).item() # sum up batch loss | |||
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability | |||
correct += pred.eq(target.data.view_as(pred)).cpu().sum() | |||
test_loss /= len(test_loader.dataset) | |||
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( | |||
test_loss, correct, len(test_loader.dataset), | |||
100. * correct / len(test_loader.dataset))) | |||
for epoch in range(1, args.epochs + 1): | |||
train(epoch) | |||
test() | |||
if epoch%40==0: | |||
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 |
@ -0,0 +1,6 @@ | |||
from .alexnet import * | |||
from .alexnet_binary import * | |||
from .resnet import * | |||
from .resnet_binary import * | |||
from .vgg_cifar10_binary import * |
@ -0,0 +1,78 @@ | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
__all__ = ['alexnet'] | |||
class AlexNetOWT_BN(nn.Module): | |||
def __init__(self, num_classes=1000): | |||
super(AlexNetOWT_BN, self).__init__() | |||
self.features = nn.Sequential( | |||
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2, | |||
bias=False), | |||
nn.MaxPool2d(kernel_size=3, stride=2), | |||
nn.BatchNorm2d(64), | |||
nn.ReLU(inplace=True), | |||
nn.Conv2d(64, 192, kernel_size=5, padding=2, bias=False), | |||
nn.MaxPool2d(kernel_size=3, stride=2), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(192), | |||
nn.Conv2d(192, 384, kernel_size=3, padding=1, bias=False), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(384), | |||
nn.Conv2d(384, 256, kernel_size=3, padding=1, bias=False), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(256), | |||
nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||
nn.MaxPool2d(kernel_size=3, stride=2), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(256) | |||
) | |||
self.classifier = nn.Sequential( | |||
nn.Linear(256 * 6 * 6, 4096, bias=False), | |||
nn.BatchNorm1d(4096), | |||
nn.ReLU(inplace=True), | |||
nn.Dropout(0.5), | |||
nn.Linear(4096, 4096, bias=False), | |||
nn.BatchNorm1d(4096), | |||
nn.ReLU(inplace=True), | |||
nn.Dropout(0.5), | |||
nn.Linear(4096, num_classes) | |||
) | |||
self.regime = { | |||
0: {'optimizer': 'SGD', 'lr': 1e-2, | |||
'weight_decay': 5e-4, 'momentum': 0.9}, | |||
10: {'lr': 5e-3}, | |||
15: {'lr': 1e-3, 'weight_decay': 0}, | |||
20: {'lr': 5e-4}, | |||
25: {'lr': 1e-4} | |||
} | |||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||
std=[0.229, 0.224, 0.225]) | |||
self.input_transform = { | |||
'train': transforms.Compose([ | |||
transforms.Scale(256), | |||
transforms.RandomCrop(224), | |||
transforms.RandomHorizontalFlip(), | |||
transforms.ToTensor(), | |||
normalize | |||
]), | |||
'eval': transforms.Compose([ | |||
transforms.Scale(256), | |||
transforms.CenterCrop(224), | |||
transforms.ToTensor(), | |||
normalize | |||
]) | |||
} | |||
def forward(self, x): | |||
x = self.features(x) | |||
x = x.view(-1, 256 * 6 * 6) | |||
x = self.classifier(x) | |||
return x | |||
def alexnet(**kwargs): | |||
num_classes = kwargs.get( 'num_classes', 1000) | |||
return AlexNetOWT_BN(num_classes) |
@ -0,0 +1,92 @@ | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||
__all__ = ['alexnet_binary'] | |||
class AlexNetOWT_BN(nn.Module): | |||
def __init__(self, num_classes=1000): | |||
super(AlexNetOWT_BN, self).__init__() | |||
self.ratioInfl=3 | |||
self.features = nn.Sequential( | |||
BinarizeConv2d(3, int(64*self.ratioInfl), kernel_size=11, stride=4, padding=2), | |||
nn.MaxPool2d(kernel_size=3, stride=2), | |||
nn.BatchNorm2d(int(64*self.ratioInfl)), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=5, padding=2), | |||
nn.MaxPool2d(kernel_size=3, stride=2), | |||
nn.BatchNorm2d(int(192*self.ratioInfl)), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(int(192*self.ratioInfl), int(384*self.ratioInfl), kernel_size=3, padding=1), | |||
nn.BatchNorm2d(int(384*self.ratioInfl)), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(int(384*self.ratioInfl), int(256*self.ratioInfl), kernel_size=3, padding=1), | |||
nn.BatchNorm2d(int(256*self.ratioInfl)), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(int(256*self.ratioInfl), 256, kernel_size=3, padding=1), | |||
nn.MaxPool2d(kernel_size=3, stride=2), | |||
nn.BatchNorm2d(256), | |||
nn.Hardtanh(inplace=True) | |||
) | |||
self.classifier = nn.Sequential( | |||
BinarizeLinear(256 * 6 * 6, 4096), | |||
nn.BatchNorm1d(4096), | |||
nn.Hardtanh(inplace=True), | |||
#nn.Dropout(0.5), | |||
BinarizeLinear(4096, 4096), | |||
nn.BatchNorm1d(4096), | |||
nn.Hardtanh(inplace=True), | |||
#nn.Dropout(0.5), | |||
BinarizeLinear(4096, num_classes), | |||
nn.BatchNorm1d(1000), | |||
nn.LogSoftmax() | |||
) | |||
#self.regime = { | |||
# 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||
# 'weight_decay': 5e-4, 'momentum': 0.9}, | |||
# 10: {'lr': 5e-3}, | |||
# 15: {'lr': 1e-3, 'weight_decay': 0}, | |||
# 20: {'lr': 5e-4}, | |||
# 25: {'lr': 1e-4} | |||
#} | |||
self.regime = { | |||
0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||
20: {'lr': 1e-3}, | |||
30: {'lr': 5e-4}, | |||
35: {'lr': 1e-4}, | |||
40: {'lr': 1e-5} | |||
} | |||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||
std=[0.229, 0.224, 0.225]) | |||
self.input_transform = { | |||
'train': transforms.Compose([ | |||
transforms.Scale(256), | |||
transforms.RandomCrop(224), | |||
transforms.RandomHorizontalFlip(), | |||
transforms.ToTensor(), | |||
normalize | |||
]), | |||
'eval': transforms.Compose([ | |||
transforms.Scale(256), | |||
transforms.CenterCrop(224), | |||
transforms.ToTensor(), | |||
normalize | |||
]) | |||
} | |||
def forward(self, x): | |||
x = self.features(x) | |||
x = x.view(-1, 256 * 6 * 6) | |||
x = self.classifier(x) | |||
return x | |||
def alexnet_binary(**kwargs): | |||
num_classes = kwargs.get( 'num_classes', 1000) | |||
return AlexNetOWT_BN(num_classes) |
@ -0,0 +1,423 @@ | |||
import torch | |||
import pdb | |||
import torch.nn as nn | |||
import math | |||
from torch.autograd import Variable | |||
from torch.autograd import Function | |||
from decimal import Decimal, ROUND_HALF_UP | |||
import numpy as np | |||
def Binarize(tensor,quant_mode='det'): | |||
if quant_mode=='det': | |||
return tensor.sign() | |||
else: | |||
return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1) | |||
class HingeLoss(nn.Module): | |||
def __init__(self): | |||
super(HingeLoss,self).__init__() | |||
self.margin=1.0 | |||
def hinge_loss(self,input,target): | |||
#import pdb; pdb.set_trace() | |||
output=self.margin-input.mul(target) | |||
output[output.le(0)]=0 | |||
return output.mean() | |||
def forward(self, input, target): | |||
return self.hinge_loss(input,target) | |||
class SqrtHingeLossFunction(Function): | |||
def __init__(self): | |||
super(SqrtHingeLossFunction,self).__init__() | |||
self.margin=1.0 | |||
def forward(self, input, target): | |||
output=self.margin-input.mul(target) | |||
output[output.le(0)]=0 | |||
self.save_for_backward(input, target) | |||
loss=output.mul(output).sum(0).sum(1).div(target.numel()) | |||
return loss | |||
def backward(self,grad_output): | |||
input, target = self.saved_tensors | |||
output=self.margin-input.mul(target) | |||
output[output.le(0)]=0 | |||
import pdb; pdb.set_trace() | |||
grad_output.resize_as_(input).copy_(target).mul_(-2).mul_(output) | |||
grad_output.mul_(output.ne(0).float()) | |||
grad_output.div_(input.numel()) | |||
return grad_output,grad_output | |||
def Quantize(tensor,quant_mode='det', params=None, numBits=8): | |||
tensor.clamp_(-2**(numBits-1),2**(numBits-1)) | |||
if quant_mode=='det': | |||
tensor=tensor.mul(2**(numBits-1)).round().div(2**(numBits-1)) | |||
else: | |||
tensor=tensor.mul(2**(numBits-1)).round().add(torch.rand(tensor.size()).add(-0.5)).div(2**(numBits-1)) | |||
quant_fixed(tensor, params) | |||
return tensor | |||
#import torch.nn._functions as tnnf | |||
class BinarizeLinear(nn.Linear): | |||
def __init__(self, *kargs, **kwargs): | |||
super(BinarizeLinear, self).__init__(*kargs, **kwargs) | |||
def forward(self, input): | |||
# if input.size(1) != 784: | |||
# input.data=Binarize(input.data) | |||
if not hasattr(self.weight,'org'): | |||
self.weight.org=self.weight.data.clone() | |||
self.weight.data=Binarize(self.weight.org) | |||
out = nn.functional.linear(input, self.weight) | |||
if not self.bias is None: | |||
self.bias.org=self.bias.data.clone() | |||
out += self.bias.view(1, -1).expand_as(out) | |||
return out | |||
class BinarizeConv2d(nn.Conv2d): | |||
def __init__(self, *kargs, **kwargs): | |||
super(BinarizeConv2d, self).__init__(*kargs, **kwargs) | |||
def forward(self, input): | |||
# if input.size(1) != 3: | |||
# input.data = Binarize(input.data) | |||
if not hasattr(self.weight,'org'): | |||
self.weight.org=self.weight.data.clone() | |||
self.weight.data=Binarize(self.weight.org) | |||
#input = torch.round(input) | |||
#input = input*2-1 | |||
#scale = max(torch.max(input), -torch.min(input)) / 63 | |||
#input = torch.round(input*2 / scale) - 63 | |||
#if scale != 0: | |||
# input = torch.round(input / scale) | |||
#print (torch.max(input)) | |||
#print(input) | |||
input = torch.round(input) | |||
#print(input) | |||
#print (torch.max(input)) | |||
out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||
self.padding, self.dilation, self.groups) | |||
#print (torch.min(out), torch.max(out)) | |||
#out = torch.round(out) | |||
#print (torch.min(out), torch.max(out)) | |||
#print (torch.min(input), torch.max(input)) | |||
#out = torch.round(out / 64 * 36 / 64) | |||
#print (self.weight.size()[1]) | |||
#if self.weight.size()[1] >= 16 and self.weight.size()[1] <= 24: | |||
if self.weight.size()[1] >= 4 and self.weight.size()[2] * self.weight.size()[3] == 9: | |||
out = torch.round(out / 64 * 36 / 64) | |||
elif self.weight.size()[1] == 1: | |||
out = torch.round(out * 7 / 64) | |||
else: | |||
out = torch.round(out / 64) | |||
out = out * 4 | |||
out[out > 63] = 63 | |||
out[out < -63] = -63 | |||
#out = out - torch.round(torch.mean(out)) | |||
# out = out*4 | |||
#out[out > 63] = 63 | |||
#out[out < -63] = -63 | |||
#else: | |||
# out = torch.round(out * 10 / 64) | |||
#print (torch.min(out), torch.max(out)) | |||
# if not self.bias is None: | |||
# self.bias.org=self.bias.data.clone() | |||
# out += self.bias.view(1, -1, 1, 1).expand_as(out) | |||
return out | |||
class IdealCimConv2d(nn.Conv2d): | |||
def __init__(self, *kargs, **kwargs): | |||
super(IdealCimConv2d, self).__init__(*kargs, **kwargs) | |||
def forward(self, input): | |||
# if input.size(1) != 3: | |||
# input.data = Binarize(input.data) | |||
if not hasattr(self.weight,'org'): | |||
self.weight.org=self.weight.data.clone() | |||
self.weight.data=Binarize(self.weight.org) | |||
#input = torch.round(input) | |||
#input = input*2-1 | |||
#scale = max(torch.max(input), -torch.min(input)) / 63 | |||
#input = torch.round(input*2 / scale) - 63 | |||
#if scale != 0: | |||
# input = torch.round(input / scale) | |||
#print (torch.max(input)) | |||
#print(input) | |||
input = torch.round(input) | |||
#print(input) | |||
#print (torch.max(input)) | |||
out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||
self.padding, self.dilation, self.groups) | |||
out = out / 64 | |||
out = out * 4 | |||
out[out > 63] = 63 | |||
out[out < -63] = -63 | |||
return out | |||
device = 'cuda:0' | |||
''' | |||
H = [1024, 512] | |||
sim_model = torch.nn.Sequential( | |||
torch.nn.Linear(36, H[0]), | |||
torch.nn.Dropout(p=0.5), | |||
torch.nn.ReLU(), | |||
torch.nn.Linear(H[0], H[1]), | |||
torch.nn.Dropout(p=0.5), | |||
torch.nn.ReLU(), | |||
torch.nn.Linear(H[-1], 1), | |||
) | |||
sim_model.load_state_dict(torch.load('model_error.ckpt', map_location=torch.device('cuda:0'))) | |||
sim_model = sim_model.to(device) | |||
sim_model.eval() | |||
''' | |||
class CimSimConv2d(nn.Conv2d): | |||
def __init__(self, *kargs, **kwargs): | |||
super(CimSimConv2d, self).__init__(*kargs, **kwargs) | |||
self.device = device | |||
def forward(self, input): | |||
if not hasattr(self.weight,'org'): | |||
self.weight.org=self.weight.data.clone() | |||
self.weight.data=Binarize(self.weight.org) | |||
#scale = max(torch.max(input), -torch.min(input)) / 63 | |||
#if scale != 0: | |||
# input = torch.round(input / scale) | |||
#''' random error | |||
#out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||
# self.padding, self.dilation, self.groups) | |||
#out = torch.round(out / 64 * 36 / 64) | |||
#randrange = (self.weight.size()[1] // 4) | |||
#for _ in range(randrange): | |||
# out += torch.randint(-1, 1, out.size(), device=device) | |||
#out[out>63] = 63 | |||
#out[out<-63] -63 | |||
#''' | |||
input = torch.round(input) | |||
out2 = self.simconv(input, self.weight) | |||
''' | |||
if torch.max(out2) < 32: | |||
out2 = out2 * 2 | |||
if torch.max(out2) < 32: | |||
out2 = out2 * 2 | |||
if torch.max(out2) < 32: | |||
out2 = out2 * 2 | |||
''' | |||
out2 = out2 * 4 | |||
out2[out2 > 63] = 63 | |||
out2[out2 < -63] = -63 | |||
#print (self.weight.data.size()) | |||
#print (torch.max(out2), torch.min(out2)) | |||
#print (torch.max(out-out2), torch.min(out-out2)) | |||
#out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||
# self.padding, self.dilation, self.groups) | |||
#print(input.size(), self.weight.size(), out.size()) | |||
#if not self.bias is None: | |||
# self.bias.org=self.bias.data.clone() | |||
# out += self.bias.view(1, -1, 1, 1).expand_as(out) | |||
return out2 | |||
def simconv(self, input_a, weight): | |||
#print(input_a.size(), weight.size()) | |||
batch_size = input_a.size()[0] | |||
out_channel = weight.size()[0] | |||
out_width = input_a.size()[2] - 2 * (weight.size()[2] // 2) | |||
out_height = input_a.size()[3] - 2 * (weight.size()[3] // 2) | |||
simout = torch.zeros(batch_size, out_channel, out_width, out_height, dtype = input_a.dtype).to(device) | |||
first = True | |||
#''' Mapping Table | |||
if weight.size()[2] == 7: | |||
kernel_group = 1 | |||
else: | |||
kernel_group = 4 | |||
Digital_input_split = torch.split(input_a, kernel_group, dim=1) | |||
binary_weight_split = torch.split(weight, kernel_group, dim=1) | |||
for i in range(len(Digital_input_split)): | |||
temp_output = nn.functional.conv2d(Digital_input_split[i], binary_weight_split[i], None, self.stride, self.padding, self.dilation, self.groups) | |||
#temp_output = torch.round(temp_output / 64 * 36 / 64) | |||
temp_output = torch.round(temp_output / 64) | |||
temp_output = Mapping.apply(temp_output) | |||
simout += temp_output + 2 | |||
#print (torch.max(simout), torch.min(simout)) | |||
#''' | |||
''' Error model | |||
for n in range(batch_size): | |||
for c in range(out_channel): | |||
w = torch.reshape(weight[c], (-1,)).to(device) | |||
inputs = [] | |||
for i in range(out_width): | |||
for j in range(out_height): | |||
input = torch.reshape(input_a[n, :, i: i + weight.size()[2], j: j + weight.size()[3]], (-1,)) | |||
#print (w.size(), input.size()) | |||
# simout[n][c][i][j] = sum(w*input) | |||
# TODO | |||
simout[n][c][i][j] = self.cim_conv_tmp(input, w) | |||
#''' | |||
#print (len(input)) | |||
#print (simout.size()) | |||
# out = nn.functional.conv2d(input_a, weight) | |||
return simout | |||
def cim_conv_tmp(self, input, weight): | |||
assert len(input) == len(weight) | |||
raw_sum = 0 | |||
if len(weight) == 3: | |||
for i in range((len(input)-1) // 36 + 1): | |||
data_x = input[i*36:i*36+36] * weight[i*36:i*36+36] | |||
row = int(Decimal(float(sum(data_x)/64.0)).quantize(0, ROUND_HALF_UP)) | |||
#''' Error model | |||
if len(data_x) < 36: | |||
data_x = torch.cat((data_x, torch.zeros(36 - len(data_x), dtype=data_x.dtype))) | |||
try: | |||
#ensor_x = torch.Tensor(data_x).to(self.device) | |||
tensor_x = data_x.to(device) | |||
except: | |||
print (data_x, len()) | |||
y_pred = sim_model(tensor_x) | |||
if int(y_pred[0]) > 10: | |||
adjust = 10 | |||
elif int(y_pred[0]) < -10: | |||
adjust = -10 | |||
else: | |||
adjust = int(y_pred[0]) | |||
#print (tensor_x, y_pred) | |||
raw_sum += (row + adjust + 2) | |||
#''' | |||
#if row in self.mappingTable: | |||
# row = self.mappingTable[row] | |||
#raw_sum += row | |||
#raw_sum += row | |||
else: | |||
for i in range((len(input)-1) // 49 + 1): | |||
data_x = input[i*49:i*49+49] * weight[i*49:i*49+49] | |||
row = int(Decimal(float(sum(data_x)/64.0)).quantize(0, ROUND_HALF_UP)) | |||
#''' Error model | |||
if len(data_x) < 49: | |||
data_x = torch.cat((data_x, torch.zeros(49 - len(data_x), dtype=data_x.dtype))) | |||
try: | |||
#ensor_x = torch.Tensor(data_x).to(self.device) | |||
tensor_x = data_x.to(device) | |||
except: | |||
print (data_x, len()) | |||
y_pred = sim_model(tensor_x) | |||
if int(y_pred[0]) > 10: | |||
adjust = 10 | |||
elif int(y_pred[0]) < -10: | |||
adjust = -10 | |||
else: | |||
adjust = int(y_pred[0]) | |||
#print (tensor_x, y_pred) | |||
raw_sum += (row + adjust + 2) | |||
#print (raw_sum) | |||
return raw_sum | |||
class Mapping(torch.autograd.Function): | |||
@staticmethod | |||
def forward(ctx, input): | |||
output = input.clone() | |||
output[input==-1] = -4 | |||
output[input==-2] = -5 | |||
output[input==-3] = -6 | |||
output[input==-4] = -7 | |||
output[input==-5] = -9 | |||
output[input==-6] = -9 | |||
output[input==-7] = -11 | |||
output[input==-8] = -11 | |||
output[input==-9] = -13 | |||
output[input==-10] = -13 | |||
output[input==-11] = -17 | |||
output[input==-12] = -17 | |||
output[input==-13] = -17 | |||
output[input==-14] = -19 | |||
output[input==-15] = -19 | |||
output[input==-16] = -21 | |||
output[input==-17] = -21 | |||
output[input==-18] = -23 | |||
output[input==-19] = -25 | |||
output[input==-20] = -25 | |||
output[input==-21] = -25 | |||
output[input==-22] = -25 | |||
output[input==-23] = -27 | |||
output[input==-24] = -27 | |||
output[input==-25] = -29 | |||
output[input==-26] = -29 | |||
output[input==-27] = -29 | |||
output[input==-28] = -31 | |||
output[input==-29] = -31 | |||
output[input==-30] = -33 | |||
output[input==-31] = -33 | |||
output[input==-32] = -35 | |||
output[input==-33] = -35 | |||
output[input==-34] = -35 | |||
#output[input==-35] = -35 | |||
output[input==0] = -2 | |||
output[input==1] = -1 | |||
output[input==2] = 1 | |||
output[input==3] = 2 | |||
#output[input==4] = 4 | |||
output[input==5] = 4 | |||
#output[input==6] = 6 | |||
output[input==7] = 8 | |||
#output[input==8] = 8 | |||
output[input==9] = 10 | |||
#output[input==10] = 10 | |||
output[input==11] = 12 | |||
#output[input==12] = 12 | |||
output[input==13] = 16 | |||
output[input==14] = 16 | |||
output[input==15] = 16 | |||
#output[input==16] = 16 | |||
output[input==17] = 18 | |||
output[input==18] = 20 | |||
output[input==19] = 20 | |||
output[input==20] = 24 | |||
output[input==21] = 24 | |||
output[input==22] = 24 | |||
output[input==23] = 26 | |||
output[input==24] = 26 | |||
output[input==25] = 28 | |||
output[input==26] = 28 | |||
output[input==27] = 28 | |||
output[input==28] = 30 | |||
output[input==29] = 30 | |||
output[input==30] = 32 | |||
output[input==31] = 32 | |||
output[input==32] = 34 | |||
output[input==33] = 34 | |||
output[input==34] = 34 | |||
output[input==35] = 34 | |||
return output | |||
def backward(ctx, grad_output): | |||
return grad_output |
@ -0,0 +1,217 @@ | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
import math | |||
__all__ = ['resnet'] | |||
def conv3x3(in_planes, out_planes, stride=1): | |||
"3x3 convolution with padding" | |||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
def init_model(model): | |||
for m in model.modules(): | |||
if isinstance(m, nn.Conv2d): | |||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||
m.weight.data.normal_(0, math.sqrt(2. / n)) | |||
elif isinstance(m, nn.BatchNorm2d): | |||
m.weight.data.fill_(1) | |||
m.bias.data.zero_() | |||
class BasicBlock(nn.Module): | |||
expansion = 1 | |||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
super(BasicBlock, self).__init__() | |||
self.conv1 = conv3x3(inplanes, planes, stride) | |||
self.bn1 = nn.BatchNorm2d(planes) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.conv2 = conv3x3(planes, planes) | |||
self.bn2 = nn.BatchNorm2d(planes) | |||
self.downsample = downsample | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.relu(out) | |||
out = self.conv2(out) | |||
out = self.bn2(out) | |||
if self.downsample is not None: | |||
residual = self.downsample(x) | |||
out += residual | |||
out = self.relu(out) | |||
return out | |||
class Bottleneck(nn.Module): | |||
expansion = 4 | |||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
super(Bottleneck, self).__init__() | |||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) | |||
self.bn1 = nn.BatchNorm2d(planes) | |||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
self.bn2 = nn.BatchNorm2d(planes) | |||
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) | |||
self.bn3 = nn.BatchNorm2d(planes * 4) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.downsample = downsample | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.relu(out) | |||
out = self.conv2(out) | |||
out = self.bn2(out) | |||
out = self.relu(out) | |||
out = self.conv3(out) | |||
out = self.bn3(out) | |||
if self.downsample is not None: | |||
residual = self.downsample(x) | |||
out += residual | |||
out = self.relu(out) | |||
return out | |||
class ResNet(nn.Module): | |||
def __init__(self): | |||
super(ResNet, self).__init__() | |||
def _make_layer(self, block, planes, blocks, stride=1): | |||
downsample = None | |||
if stride != 1 or self.inplanes != planes * block.expansion: | |||
downsample = nn.Sequential( | |||
nn.Conv2d(self.inplanes, planes * block.expansion, | |||
kernel_size=1, stride=stride, bias=False), | |||
nn.BatchNorm2d(planes * block.expansion), | |||
) | |||
layers = [] | |||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||
self.inplanes = planes * block.expansion | |||
for i in range(1, blocks): | |||
layers.append(block(self.inplanes, planes)) | |||
return nn.Sequential(*layers) | |||
def forward(self, x): | |||
x = self.conv1(x) | |||
x = self.bn1(x) | |||
x = self.relu(x) | |||
x = self.maxpool(x) | |||
x = self.layer1(x) | |||
x = self.layer2(x) | |||
x = self.layer3(x) | |||
x = self.layer4(x) | |||
x = self.avgpool(x) | |||
x = x.view(x.size(0), -1) | |||
x = self.fc(x) | |||
return x | |||
class ResNet_imagenet(ResNet): | |||
def __init__(self, num_classes=1000, | |||
block=Bottleneck, layers=[3, 4, 23, 3]): | |||
super(ResNet_imagenet, self).__init__() | |||
self.inplanes = 64 | |||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||
bias=False) | |||
self.bn1 = nn.BatchNorm2d(64) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||
self.layer1 = self._make_layer(block, 64, layers[0]) | |||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||
self.avgpool = nn.AvgPool2d(7) | |||
self.fc = nn.Linear(512 * block.expansion, num_classes) | |||
init_model(self) | |||
self.regime = { | |||
0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
'weight_decay': 1e-4, 'momentum': 0.9}, | |||
30: {'lr': 1e-2}, | |||
60: {'lr': 1e-3, 'weight_decay': 0}, | |||
90: {'lr': 1e-4} | |||
} | |||
class ResNet_cifar10(ResNet): | |||
def __init__(self, num_classes=10, | |||
block=BasicBlock, depth=18): | |||
super(ResNet_cifar10, self).__init__() | |||
self.inplanes = 16 | |||
n = int((depth - 2) / 6) | |||
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, | |||
bias=False) | |||
self.bn1 = nn.BatchNorm2d(16) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.maxpool = lambda x: x | |||
self.layer1 = self._make_layer(block, 16, n) | |||
self.layer2 = self._make_layer(block, 32, n, stride=2) | |||
self.layer3 = self._make_layer(block, 64, n, stride=2) | |||
self.layer4 = lambda x: x | |||
self.avgpool = nn.AvgPool2d(8) | |||
self.fc = nn.Linear(64, num_classes) | |||
init_model(self) | |||
self.regime = { | |||
0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
'weight_decay': 1e-4, 'momentum': 0.9}, | |||
81: {'lr': 1e-2}, | |||
122: {'lr': 1e-3, 'weight_decay': 0}, | |||
164: {'lr': 1e-4} | |||
} | |||
def resnet(**kwargs): | |||
num_classes, depth, dataset = map( | |||
kwargs.get, ['num_classes', 'depth', 'dataset']) | |||
if dataset == 'imagenet': | |||
num_classes = num_classes or 1000 | |||
depth = depth or 50 | |||
if depth == 18: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=BasicBlock, layers=[2, 2, 2, 2]) | |||
if depth == 34: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=BasicBlock, layers=[3, 4, 6, 3]) | |||
if depth == 50: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=Bottleneck, layers=[3, 4, 6, 3]) | |||
if depth == 101: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=Bottleneck, layers=[3, 4, 23, 3]) | |||
if depth == 152: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=Bottleneck, layers=[3, 8, 36, 3]) | |||
elif dataset == 'cifar10': | |||
num_classes = num_classes or 10 | |||
depth = depth or 18 #56 | |||
return ResNet_cifar10(num_classes=num_classes, | |||
block=BasicBlock, depth=depth) |
@ -0,0 +1,248 @@ | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
import math | |||
from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||
__all__ = ['resnet_binary'] | |||
def Binaryconv3x3(in_planes, out_planes, stride=1): | |||
"3x3 convolution with padding" | |||
return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
def conv3x3(in_planes, out_planes, stride=1): | |||
"3x3 convolution with padding" | |||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
def init_model(model): | |||
for m in model.modules(): | |||
if isinstance(m, BinarizeConv2d): | |||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||
m.weight.data.normal_(0, math.sqrt(2. / n)) | |||
elif isinstance(m, nn.BatchNorm2d): | |||
m.weight.data.fill_(1) | |||
m.bias.data.zero_() | |||
class BasicBlock(nn.Module): | |||
expansion = 1 | |||
def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||
super(BasicBlock, self).__init__() | |||
self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||
self.bn1 = nn.BatchNorm2d(planes) | |||
self.tanh1 = nn.Hardtanh(inplace=True) | |||
self.conv2 = Binaryconv3x3(planes, planes) | |||
self.tanh2 = nn.Hardtanh(inplace=True) | |||
self.bn2 = nn.BatchNorm2d(planes) | |||
self.downsample = downsample | |||
self.do_bntan=do_bntan; | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x.clone() | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.tanh1(out) | |||
out = self.conv2(out) | |||
if self.downsample is not None: | |||
if residual.data.max()>1: | |||
import pdb; pdb.set_trace() | |||
residual = self.downsample(residual) | |||
out += residual | |||
if self.do_bntan: | |||
out = self.bn2(out) | |||
out = self.tanh2(out) | |||
return out | |||
class Bottleneck(nn.Module): | |||
expansion = 4 | |||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
super(Bottleneck, self).__init__() | |||
self.conv1 = BinarizeConv2d(inplanes, planes, kernel_size=1, bias=False) | |||
self.bn1 = nn.BatchNorm2d(planes) | |||
self.conv2 = BinarizeConv2d(planes, planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
self.bn2 = nn.BatchNorm2d(planes) | |||
self.conv3 = BinarizeConv2d(planes, planes * 4, kernel_size=1, bias=False) | |||
self.bn3 = nn.BatchNorm2d(planes * 4) | |||
self.tanh = nn.Hardtanh(inplace=True) | |||
self.downsample = downsample | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x | |||
import pdb; pdb.set_trace() | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.tanh(out) | |||
out = self.conv2(out) | |||
out = self.bn2(out) | |||
out = self.tanh(out) | |||
out = self.conv3(out) | |||
out = self.bn3(out) | |||
if self.downsample is not None: | |||
residual = self.downsample(x) | |||
out += residual | |||
if self.do_bntan: | |||
out = self.bn2(out) | |||
out = self.tanh2(out) | |||
return out | |||
class ResNet(nn.Module): | |||
def __init__(self): | |||
super(ResNet, self).__init__() | |||
def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||
downsample = None | |||
if stride != 1 or self.inplanes != planes * block.expansion: | |||
downsample = nn.Sequential( | |||
BinarizeConv2d(self.inplanes, planes * block.expansion, | |||
kernel_size=1, stride=stride, bias=False), | |||
nn.BatchNorm2d(planes * block.expansion), | |||
) | |||
layers = [] | |||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||
self.inplanes = planes * block.expansion | |||
for i in range(1, blocks-1): | |||
layers.append(block(self.inplanes, planes)) | |||
layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||
return nn.Sequential(*layers) | |||
def forward(self, x): | |||
x = self.conv1(x) | |||
x = self.maxpool(x) | |||
x = self.bn1(x) | |||
x = self.tanh1(x) | |||
x = self.layer1(x) | |||
x = self.layer2(x) | |||
x = self.layer3(x) | |||
x = self.layer4(x) | |||
x = self.avgpool(x) | |||
x = x.view(x.size(0), -1) | |||
x = self.bn2(x) | |||
x = self.tanh2(x) | |||
x = self.fc(x) | |||
x = self.bn3(x) | |||
x = self.logsoftmax(x) | |||
return x | |||
class ResNet_imagenet(ResNet): | |||
def __init__(self, num_classes=1000, | |||
block=Bottleneck, layers=[3, 4, 23, 3]): | |||
super(ResNet_imagenet, self).__init__() | |||
self.inplanes = 64 | |||
self.conv1 = BinarizeConv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||
bias=False) | |||
self.bn1 = nn.BatchNorm2d(64) | |||
self.tanh = nn.Hardtanh(inplace=True) | |||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||
self.layer1 = self._make_layer(block, 64, layers[0]) | |||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||
self.avgpool = nn.AvgPool2d(7) | |||
self.fc = BinarizeLinear(512 * block.expansion, num_classes) | |||
init_model(self) | |||
self.regime = { | |||
0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
'weight_decay': 1e-4, 'momentum': 0.9}, | |||
30: {'lr': 1e-2}, | |||
60: {'lr': 1e-3, 'weight_decay': 0}, | |||
90: {'lr': 1e-4} | |||
} | |||
class ResNet_cifar10(ResNet): | |||
def __init__(self, num_classes=10, | |||
block=BasicBlock, depth=18): | |||
super(ResNet_cifar10, self).__init__() | |||
self.inflate = 5 | |||
self.inplanes = 16*self.inflate | |||
n = int((depth - 2) / 6) | |||
self.conv1 = BinarizeConv2d(3, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||
bias=False) | |||
self.maxpool = lambda x: x | |||
self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||
self.tanh1 = nn.Hardtanh(inplace=True) | |||
self.tanh2 = nn.Hardtanh(inplace=True) | |||
self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||
self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||
self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||
self.layer4 = lambda x: x | |||
self.avgpool = nn.AvgPool2d(8) | |||
self.bn2 = nn.BatchNorm1d(64*self.inflate) | |||
self.bn3 = nn.BatchNorm1d(10) | |||
self.logsoftmax = nn.LogSoftmax() | |||
self.fc = BinarizeLinear(64*self.inflate, num_classes) | |||
init_model(self) | |||
#self.regime = { | |||
# 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||
# 'weight_decay': 1e-4, 'momentum': 0.9}, | |||
# 81: {'lr': 1e-4}, | |||
# 122: {'lr': 1e-5, 'weight_decay': 0}, | |||
# 164: {'lr': 1e-6} | |||
#} | |||
self.regime = { | |||
0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||
101: {'lr': 1e-3}, | |||
142: {'lr': 5e-4}, | |||
184: {'lr': 1e-4}, | |||
220: {'lr': 1e-5} | |||
} | |||
def resnet_binary(**kwargs): | |||
num_classes, depth, dataset = map( | |||
kwargs.get, ['num_classes', 'depth', 'dataset']) | |||
if dataset == 'imagenet': | |||
num_classes = num_classes or 1000 | |||
depth = depth or 50 | |||
if depth == 18: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=BasicBlock, layers=[2, 2, 2, 2]) | |||
if depth == 34: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=BasicBlock, layers=[3, 4, 6, 3]) | |||
if depth == 50: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=Bottleneck, layers=[3, 4, 6, 3]) | |||
if depth == 101: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=Bottleneck, layers=[3, 4, 23, 3]) | |||
if depth == 152: | |||
return ResNet_imagenet(num_classes=num_classes, | |||
block=Bottleneck, layers=[3, 8, 36, 3]) | |||
elif dataset == 'cifar10': | |||
num_classes = num_classes or 10 | |||
depth = depth or 18 | |||
return ResNet_cifar10(num_classes=num_classes, | |||
block=BasicBlock, depth=depth) |
@ -0,0 +1,69 @@ | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
class AlexNetOWT_BN(nn.Module): | |||
def __init__(self, num_classes=1000): | |||
super(AlexNetOWT_BN, self).__init__() | |||
self.features = nn.Sequential( | |||
nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, | |||
bias=False), | |||
nn.BatchNorm2d(128), | |||
nn.ReLU(inplace=True), | |||
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), | |||
nn.MaxPool2d(kernel_size=2, stride=2), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(128), | |||
nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=False), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(256), | |||
nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||
nn.MaxPool2d(kernel_size=2, stride=2), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(256), | |||
nn.Conv2d(256, 512, kernel_size=3, padding=1, bias=False), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(512), | |||
nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=False), | |||
nn.MaxPool2d(kernel_size=2, stride=2), | |||
nn.ReLU(inplace=True), | |||
nn.BatchNorm2d(512), | |||
) | |||
self.classifier = nn.Sequential( | |||
nn.Linear(512 * 4 * 4, 1024, bias=False), | |||
nn.BatchNorm1d(1024), | |||
nn.ReLU(inplace=True), | |||
nn.Dropout(0.5), | |||
nn.Linear(1024, 1024, bias=False), | |||
nn.BatchNorm1d(1024), | |||
nn.ReLU(inplace=True), | |||
nn.Dropout(0.5), | |||
nn.Linear(1024, num_classes) | |||
nn.LogSoftMax() | |||
) | |||
self.regime = { | |||
0: {'optimizer': 'SGD', 'lr': 1e-2, | |||
'weight_decay': 5e-4, 'momentum': 0.9}, | |||
10: {'lr': 5e-3}, | |||
15: {'lr': 1e-3, 'weight_decay': 0}, | |||
20: {'lr': 5e-4}, | |||
25: {'lr': 1e-4} | |||
} | |||
def forward(self, x): | |||
x = self.features(x) | |||
x = x.view(-1, 512 * 4 * 4) | |||
x = self.classifier(x) | |||
return x | |||
def model(**kwargs): | |||
num_classes = kwargs.get( 'num_classes', 1000) | |||
return AlexNetOWT_BN(num_classes) |
@ -0,0 +1,80 @@ | |||
import torch | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
from torch.autograd import Function | |||
from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||
class VGG_Cifar10(nn.Module): | |||
def __init__(self, num_classes=1000): | |||
super(VGG_Cifar10, self).__init__() | |||
self.infl_ratio=3; | |||
self.features = nn.Sequential( | |||
BinarizeConv2d(3, 128*self.infl_ratio, kernel_size=3, stride=1, padding=1, | |||
bias=True), | |||
nn.BatchNorm2d(128*self.infl_ratio), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(128*self.infl_ratio, 128*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
nn.MaxPool2d(kernel_size=2, stride=2), | |||
nn.BatchNorm2d(128*self.infl_ratio), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(128*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
nn.BatchNorm2d(256*self.infl_ratio), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(256*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
nn.MaxPool2d(kernel_size=2, stride=2), | |||
nn.BatchNorm2d(256*self.infl_ratio), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(256*self.infl_ratio, 512*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||
nn.BatchNorm2d(512*self.infl_ratio), | |||
nn.Hardtanh(inplace=True), | |||
BinarizeConv2d(512*self.infl_ratio, 512, kernel_size=3, padding=1, bias=True), | |||
nn.MaxPool2d(kernel_size=2, stride=2), | |||
nn.BatchNorm2d(512), | |||
nn.Hardtanh(inplace=True) | |||
) | |||
self.classifier = nn.Sequential( | |||
BinarizeLinear(512 * 4 * 4, 1024, bias=True), | |||
nn.BatchNorm1d(1024), | |||
nn.Hardtanh(inplace=True), | |||
#nn.Dropout(0.5), | |||
BinarizeLinear(1024, 1024, bias=True), | |||
nn.BatchNorm1d(1024), | |||
nn.Hardtanh(inplace=True), | |||
#nn.Dropout(0.5), | |||
BinarizeLinear(1024, num_classes, bias=True), | |||
nn.BatchNorm1d(num_classes, affine=False), | |||
nn.LogSoftmax() | |||
) | |||
self.regime = { | |||
0: {'optimizer': 'Adam', 'betas': (0.9, 0.999),'lr': 5e-3}, | |||
40: {'lr': 1e-3}, | |||
80: {'lr': 5e-4}, | |||
100: {'lr': 1e-4}, | |||
120: {'lr': 5e-5}, | |||
140: {'lr': 1e-5} | |||
} | |||
def forward(self, x): | |||
x = self.features(x) | |||
x = x.view(-1, 512 * 4 * 4) | |||
x = self.classifier(x) | |||
return x | |||
def vgg_cifar10_binary(**kwargs): | |||
num_classes = kwargs.get( 'num_classes', 10) | |||
return VGG_Cifar10(num_classes) |
@ -0,0 +1,198 @@ | |||
import torch | |||
import torchvision.transforms as transforms | |||
import random | |||
__imagenet_stats = {'mean': [0.485, 0.456, 0.406], | |||
'std': [0.229, 0.224, 0.225]} | |||
__imagenet_pca = { | |||
'eigval': torch.Tensor([0.2175, 0.0188, 0.0045]), | |||
'eigvec': torch.Tensor([ | |||
[-0.5675, 0.7192, 0.4009], | |||
[-0.5808, -0.0045, -0.8140], | |||
[-0.5836, -0.6948, 0.4203], | |||
]) | |||
} | |||
def scale_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||
t_list = [ | |||
transforms.CenterCrop(input_size), | |||
transforms.ToTensor(), | |||
transforms.Normalize(**normalize), | |||
] | |||
if scale_size != input_size: | |||
t_list = [transforms.Scale(scale_size)] + t_list | |||
return transforms.Compose(t_list) | |||
def scale_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||
t_list = [ | |||
transforms.RandomCrop(input_size), | |||
transforms.ToTensor(), | |||
transforms.Normalize(**normalize), | |||
] | |||
if scale_size != input_size: | |||
t_list = [transforms.Scale(scale_size)] + t_list | |||
transforms.Compose(t_list) | |||
def pad_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||
padding = int((scale_size - input_size) / 2) | |||
return transforms.Compose([ | |||
transforms.RandomCrop(input_size, padding=padding), | |||
transforms.RandomHorizontalFlip(), | |||
transforms.ToTensor(), | |||
transforms.Normalize(**normalize), | |||
]) | |||
def inception_preproccess(input_size, normalize=__imagenet_stats): | |||
return transforms.Compose([ | |||
transforms.RandomSizedCrop(input_size), | |||
transforms.RandomHorizontalFlip(), | |||
transforms.ToTensor(), | |||
transforms.Normalize(**normalize) | |||
]) | |||
def inception_color_preproccess(input_size, normalize=__imagenet_stats): | |||
return transforms.Compose([ | |||
transforms.RandomSizedCrop(input_size), | |||
transforms.RandomHorizontalFlip(), | |||
transforms.ToTensor(), | |||
ColorJitter( | |||
brightness=0.4, | |||
contrast=0.4, | |||
saturation=0.4, | |||
), | |||
Lighting(0.1, __imagenet_pca['eigval'], __imagenet_pca['eigvec']), | |||
transforms.Normalize(**normalize) | |||
]) | |||
def get_transform(name='imagenet', input_size=None, | |||
scale_size=None, normalize=None, augment=True): | |||
normalize = normalize or __imagenet_stats | |||
if name == 'imagenet': | |||
scale_size = scale_size or 256 | |||
input_size = input_size or 224 | |||
if augment: | |||
return inception_preproccess(input_size, normalize=normalize) | |||
else: | |||
return scale_crop(input_size=input_size, | |||
scale_size=scale_size, normalize=normalize) | |||
elif 'cifar' in name: | |||
input_size = input_size or 32 | |||
if augment: | |||
scale_size = scale_size or 40 | |||
return pad_random_crop(input_size, scale_size=scale_size, | |||
normalize=normalize) | |||
else: | |||
scale_size = scale_size or 32 | |||
return scale_crop(input_size=input_size, | |||
scale_size=scale_size, normalize=normalize) | |||
elif name == 'mnist': | |||
normalize = {'mean': [0.5], 'std': [0.5]} | |||
input_size = input_size or 28 | |||
if augment: | |||
scale_size = scale_size or 32 | |||
return pad_random_crop(input_size, scale_size=scale_size, | |||
normalize=normalize) | |||
else: | |||
scale_size = scale_size or 32 | |||
return scale_crop(input_size=input_size, | |||
scale_size=scale_size, normalize=normalize) | |||
class Lighting(object): | |||
"""Lighting noise(AlexNet - style PCA - based noise)""" | |||
def __init__(self, alphastd, eigval, eigvec): | |||
self.alphastd = alphastd | |||
self.eigval = eigval | |||
self.eigvec = eigvec | |||
def __call__(self, img): | |||
if self.alphastd == 0: | |||
return img | |||
alpha = img.new().resize_(3).normal_(0, self.alphastd) | |||
rgb = self.eigvec.type_as(img).clone()\ | |||
.mul(alpha.view(1, 3).expand(3, 3))\ | |||
.mul(self.eigval.view(1, 3).expand(3, 3))\ | |||
.sum(1).squeeze() | |||
return img.add(rgb.view(3, 1, 1).expand_as(img)) | |||
class Grayscale(object): | |||
def __call__(self, img): | |||
gs = img.clone() | |||
gs[0].mul_(0.299).add_(0.587, gs[1]).add_(0.114, gs[2]) | |||
gs[1].copy_(gs[0]) | |||
gs[2].copy_(gs[0]) | |||
return gs | |||
class Saturation(object): | |||
def __init__(self, var): | |||
self.var = var | |||
def __call__(self, img): | |||
gs = Grayscale()(img) | |||
alpha = random.uniform(0, self.var) | |||
return img.lerp(gs, alpha) | |||
class Brightness(object): | |||
def __init__(self, var): | |||
self.var = var | |||
def __call__(self, img): | |||
gs = img.new().resize_as_(img).zero_() | |||
alpha = random.uniform(0, self.var) | |||
return img.lerp(gs, alpha) | |||
class Contrast(object): | |||
def __init__(self, var): | |||
self.var = var | |||
def __call__(self, img): | |||
gs = Grayscale()(img) | |||
gs.fill_(gs.mean()) | |||
alpha = random.uniform(0, self.var) | |||
return img.lerp(gs, alpha) | |||
class RandomOrder(object): | |||
""" Composes several transforms together in random order. | |||
""" | |||
def __init__(self, transforms): | |||
self.transforms = transforms | |||
def __call__(self, img): | |||
if self.transforms is None: | |||
return img | |||
order = torch.randperm(len(self.transforms)) | |||
for i in order: | |||
img = self.transforms[i](img) | |||
return img | |||
class ColorJitter(RandomOrder): | |||
def __init__(self, brightness=0.4, contrast=0.4, saturation=0.4): | |||
self.transforms = [] | |||
if brightness != 0: | |||
self.transforms.append(Brightness(brightness)) | |||
if contrast != 0: | |||
self.transforms.append(Contrast(contrast)) | |||
if saturation != 0: | |||
self.transforms.append(Saturation(saturation)) |
@ -0,0 +1,5 @@ | |||
2021-04-15 15:36:47 - INFO - saving to ./results/2021-04-15_15-36-47 | |||
2021-04-15 15:36:47 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-36-47', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||
2021-04-15 15:36:47 - INFO - creating model alexnet | |||
2021-04-15 15:36:48 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||
2021-04-15 15:36:48 - INFO - number of parameters: 61110184 |
@ -0,0 +1,5 @@ | |||
2021-04-15 15:37:52 - INFO - saving to ./results/2021-04-15_15-37-52 | |||
2021-04-15 15:37:52 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='resnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-37-52', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||
2021-04-15 15:37:52 - INFO - creating model resnet | |||
2021-04-15 15:37:52 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||
2021-04-15 15:37:52 - INFO - number of parameters: 25557032 |
@ -0,0 +1,5 @@ | |||
2021-04-15 15:38:16 - INFO - saving to ./results/2021-04-15_15-38-16 | |||
2021-04-15 15:38:16 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-38-16', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||
2021-04-15 15:38:16 - INFO - creating model alexnet | |||
2021-04-15 15:38:17 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||
2021-04-15 15:38:17 - INFO - number of parameters: 61110184 |
@ -0,0 +1,160 @@ | |||
import os | |||
import torch | |||
import logging.config | |||
import shutil | |||
import pandas as pd | |||
from bokeh.io import output_file, save, show | |||
from bokeh.plotting import figure | |||
from bokeh.layouts import column | |||
#from bokeh.charts import Line, defaults | |||
# | |||
#defaults.width = 800 | |||
#defaults.height = 400 | |||
#defaults.tools = 'pan,box_zoom,wheel_zoom,box_select,hover,resize,reset,save' | |||
def setup_logging(log_file='log.txt'): | |||
"""Setup logging configuration | |||
""" | |||
logging.basicConfig(level=logging.DEBUG, | |||
format="%(asctime)s - %(levelname)s - %(message)s", | |||
datefmt="%Y-%m-%d %H:%M:%S", | |||
filename=log_file, | |||
filemode='w') | |||
console = logging.StreamHandler() | |||
console.setLevel(logging.INFO) | |||
formatter = logging.Formatter('%(message)s') | |||
console.setFormatter(formatter) | |||
logging.getLogger('').addHandler(console) | |||
class ResultsLog(object): | |||
def __init__(self, path='results.csv', plot_path=None): | |||
self.path = path | |||
self.plot_path = plot_path or (self.path + '.html') | |||
self.figures = [] | |||
self.results = None | |||
def add(self, **kwargs): | |||
df = pd.DataFrame([kwargs.values()], columns=kwargs.keys()) | |||
if self.results is None: | |||
self.results = df | |||
else: | |||
self.results = self.results.append(df, ignore_index=True) | |||
def save(self, title='Training Results'): | |||
if len(self.figures) > 0: | |||
if os.path.isfile(self.plot_path): | |||
os.remove(self.plot_path) | |||
output_file(self.plot_path, title=title) | |||
plot = column(*self.figures) | |||
save(plot) | |||
self.figures = [] | |||
self.results.to_csv(self.path, index=False, index_label=False) | |||
def load(self, path=None): | |||
path = path or self.path | |||
if os.path.isfile(path): | |||
self.results.read_csv(path) | |||
def show(self): | |||
if len(self.figures) > 0: | |||
plot = column(*self.figures) | |||
show(plot) | |||
#def plot(self, *kargs, **kwargs): | |||
# line = Line(data=self.results, *kargs, **kwargs) | |||
# self.figures.append(line) | |||
def image(self, *kargs, **kwargs): | |||
fig = figure() | |||
fig.image(*kargs, **kwargs) | |||
self.figures.append(fig) | |||
def save_checkpoint(state, is_best, path='.', filename='checkpoint.pth.tar', save_all=False): | |||
filename = os.path.join(path, filename) | |||
torch.save(state, filename) | |||
if is_best: | |||
shutil.copyfile(filename, os.path.join(path, 'model_best.pth.tar')) | |||
if save_all: | |||
shutil.copyfile(filename, os.path.join( | |||
path, 'checkpoint_epoch_%s.pth.tar' % state['epoch'])) | |||
class AverageMeter(object): | |||
"""Computes and stores the average and current value""" | |||
def __init__(self): | |||
self.reset() | |||
def reset(self): | |||
self.val = 0 | |||
self.avg = 0 | |||
self.sum = 0 | |||
self.count = 0 | |||
def update(self, val, n=1): | |||
self.val = val | |||
self.sum += val * n | |||
self.count += n | |||
self.avg = self.sum / self.count | |||
__optimizers = { | |||
'SGD': torch.optim.SGD, | |||
'ASGD': torch.optim.ASGD, | |||
'Adam': torch.optim.Adam, | |||
'Adamax': torch.optim.Adamax, | |||
'Adagrad': torch.optim.Adagrad, | |||
'Adadelta': torch.optim.Adadelta, | |||
'Rprop': torch.optim.Rprop, | |||
'RMSprop': torch.optim.RMSprop | |||
} | |||
def adjust_optimizer(optimizer, epoch, config): | |||
"""Reconfigures the optimizer according to epoch and config dict""" | |||
def modify_optimizer(optimizer, setting): | |||
if 'optimizer' in setting: | |||
optimizer = __optimizers[setting['optimizer']]( | |||
optimizer.param_groups) | |||
logging.debug('OPTIMIZER - setting method = %s' % | |||
setting['optimizer']) | |||
for param_group in optimizer.param_groups: | |||
for key in param_group.keys(): | |||
if key in setting: | |||
logging.debug('OPTIMIZER - setting %s = %s' % | |||
(key, setting[key])) | |||
param_group[key] = setting[key] | |||
return optimizer | |||
if callable(config): | |||
optimizer = modify_optimizer(optimizer, config(epoch)) | |||
else: | |||
for e in range(epoch + 1): # run over all epochs - sticky setting | |||
if e in config: | |||
optimizer = modify_optimizer(optimizer, config[e]) | |||
return optimizer | |||
def accuracy(output, target, topk=(1,)): | |||
"""Computes the precision@k for the specified values of k""" | |||
maxk = max(topk) | |||
batch_size = target.size(0) | |||
_, pred = output.float().topk(maxk, 1, True, True) | |||
pred = pred.t() | |||
correct = pred.eq(target.view(1, -1).expand_as(pred)) | |||
res = [] | |||
for k in topk: | |||
correct_k = correct[:k].view(-1).float().sum(0) | |||
res.append(correct_k.mul_(100.0 / batch_size)) | |||
return res | |||
# kernel_img = model.features[0][0].kernel.data.clone() | |||
# kernel_img.add_(-kernel_img.min()) | |||
# kernel_img.mul_(255 / kernel_img.max()) | |||
# save_image(kernel_img, 'kernel%s.jpg' % epoch) |
@ -0,0 +1,154 @@ | |||
import torch | |||
import numpy as np | |||
import cv2, os, sys | |||
import pandas as pd | |||
from torch.utils.data import Dataset | |||
from matplotlib import pyplot as plt | |||
from torch.utils.data import ConcatDataset, DataLoader, Subset | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
from torchvision.datasets import DatasetFolder | |||
from PIL import Image | |||
import torchvision.models as models | |||
batch_size = 32 | |||
num_epoch = 10 | |||
train_tfm = transforms.Compose([ | |||
transforms.Grayscale(), | |||
transforms.RandomResizedCrop((40,30)), | |||
transforms.Resize((40, 30)), | |||
transforms.ToTensor(), | |||
#transforms.TenCrop((40,30)), | |||
#transforms.Normalize(0.5,0.5), | |||
]) | |||
test_tfm = transforms.Compose([ | |||
transforms.Grayscale(), | |||
transforms.Resize((40, 30)), | |||
transforms.ToTensor() | |||
]) | |||
''' | |||
class Classifier(nn.Module): | |||
def __init__(self): | |||
super(Classifier, self).__init__() | |||
self.cnn_layers = nn.Sequential( | |||
#input_size(1,30,40) | |||
nn.Conv2d(1, 16, 3, 1), #output_size(16,28,38) | |||
nn.BatchNorm2d(16), | |||
nn.ReLU(), | |||
nn.Dropout(0.2), | |||
nn.MaxPool2d(kernel_size = 2), #output_size(16,14,19) | |||
nn.Conv2d(16, 24, 3, 1), #output_size(24,12,17) | |||
nn.BatchNorm2d(24), | |||
nn.ReLU(), | |||
nn.Dropout(0.2), | |||
nn.MaxPool2d(kernel_size = 2), #output_size(24,6,8) | |||
nn.Conv2d(24, 32, 3, 1), #output_size(32,4,6) | |||
nn.BatchNorm2d(32), | |||
nn.ReLU(), | |||
nn.Dropout(0.2), | |||
nn.MaxPool2d(kernel_size = 2) #ouput_size(32,2,3) | |||
) | |||
self.fc_layers = nn.Sequential( | |||
nn.Linear(32 * 2 * 3, 32), | |||
nn.ReLU(), | |||
nn.Dropout(0.2), | |||
nn.Linear(32,8) | |||
) | |||
def forward(self, x): | |||
x = self.cnn_layers(x) | |||
x = x.flatten(1) | |||
x = self.fc_layers(x) | |||
return x | |||
''' | |||
def main(): | |||
train_set = DatasetFolder("./dataset/data_0705/lepton/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||
test_set = DatasetFolder("./dataset/data_0705/lepton/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||
model = models.resnet18() | |||
model.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=3, | |||
bias=False) | |||
model.fc = nn.Linear(512, 3) | |||
model = model.to(device) | |||
print(model) | |||
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||
criterion = nn.CrossEntropyLoss() | |||
for epoch in range(num_epoch): | |||
##Training | |||
running_loss = 0.0 | |||
total = 0 | |||
correct = 0 | |||
for i, data in enumerate(train_loader): | |||
inputs, labels = data | |||
inputs = inputs.to(device) | |||
labels = labels.to(device) | |||
optimizer.zero_grad() | |||
outputs = model(inputs) | |||
loss = criterion(outputs, labels) | |||
loss.backward() | |||
optimizer.step() | |||
running_loss += loss.item() | |||
total += labels.size(0) | |||
_,predicted = torch.max(outputs.data,1) | |||
#print(predicted) | |||
#print("label",labels) | |||
correct += (predicted == labels).sum().item() | |||
train_acc = correct / total | |||
print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||
##Testing | |||
model.eval() | |||
with torch.no_grad(): | |||
correct = 0 | |||
total = 0 | |||
for i, data in enumerate(test_loader): | |||
inputs, labels = data | |||
inputs = inputs.to(device) | |||
labels = labels.to(device) | |||
outputs = model(inputs) | |||
_,predicted = torch.max(outputs.data,1) | |||
total += labels.size(0) | |||
correct += (predicted == labels).sum().item() | |||
#print(predicted) | |||
#print("labels:",labels) | |||
print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||
if __name__ == '__main__': | |||
main() |
@ -0,0 +1,211 @@ | |||
import torch | |||
import numpy as np | |||
import cv2, os, sys | |||
import pandas as pd | |||
from torch.utils.data import Dataset | |||
from matplotlib import pyplot as plt | |||
from torch.utils.data import ConcatDataset, DataLoader, Subset | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
from torchvision.datasets import DatasetFolder | |||
from PIL import Image | |||
import torchvision.models | |||
import BinaryNetpytorch.models as models | |||
from BinaryNetpytorch.models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||
batch_size = 32 | |||
num_epoch = 10 | |||
train_tfm = transforms.Compose([ | |||
# transforms.RandomHorizontalFlip(), | |||
# transforms.RandomResizedCrop((40,30)), | |||
transforms.Grayscale(), | |||
transforms.Resize((40, 30)), | |||
transforms.ToTensor(), | |||
#transforms.RandomResizedCrop((40,30)), | |||
#transforms.TenCrop((40,30)), | |||
# transforms.Normalize(0.5,0.5), | |||
]) | |||
test_tfm = transforms.Compose([ | |||
transforms.Grayscale(), | |||
transforms.Resize((40, 30)), | |||
transforms.ToTensor() | |||
]) | |||
def Binaryconv3x3(in_planes, out_planes, stride=1): | |||
"3x3 convolution with padding" | |||
return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
def conv3x3(in_planes, out_planes, stride=1): | |||
"3x3 convolution with padding" | |||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
class BasicBlock(nn.Module): | |||
expansion = 1 | |||
def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||
super(BasicBlock, self).__init__() | |||
self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||
self.bn1 = nn.BatchNorm2d(planes) | |||
self.tanh1 = nn.Hardtanh(inplace=True) | |||
self.conv2 = Binaryconv3x3(planes, planes) | |||
self.tanh2 = nn.Hardtanh(inplace=True) | |||
self.bn2 = nn.BatchNorm2d(planes) | |||
self.downsample = downsample | |||
self.do_bntan=do_bntan | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x.clone() | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.tanh1(out) | |||
out = self.conv2(out) | |||
if self.downsample is not None: | |||
if residual.data.max()>1: | |||
import pdb; pdb.set_trace() | |||
residual = self.downsample(residual) | |||
out += residual | |||
if self.do_bntan: | |||
out = self.bn2(out) | |||
out = self.tanh2(out) | |||
return out | |||
class ResNet(nn.Module): | |||
def __init__(self): | |||
super(ResNet, self).__init__() | |||
def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||
downsample = None | |||
if stride != 1 or self.inplanes != planes * block.expansion: | |||
downsample = nn.Sequential( | |||
BinarizeConv2d(self.inplanes, planes * block.expansion, | |||
kernel_size=1, stride=stride, bias=False), | |||
nn.BatchNorm2d(planes * block.expansion), | |||
) | |||
layers = [] | |||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||
self.inplanes = planes * block.expansion | |||
for i in range(1, blocks-1): | |||
layers.append(block(self.inplanes, planes)) | |||
layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||
return nn.Sequential(*layers) | |||
def forward(self, x): | |||
x = self.conv1(x) | |||
x = self.maxpool(x) | |||
x = self.bn1(x) | |||
x = self.tanh1(x) | |||
x = self.layer1(x) | |||
x = self.layer2(x) | |||
x = self.layer3(x) | |||
x = self.layer4(x) | |||
x = self.avgpool(x) | |||
x = x.view(x.size(0), -1) | |||
x = self.bn2(x) | |||
x = self.tanh2(x) | |||
x = self.fc(x) | |||
x = self.bn3(x) | |||
x = self.logsoftmax(x) | |||
return x | |||
class ResNet_cifar10(ResNet): | |||
def __init__(self, num_classes=3, | |||
block=BasicBlock, depth=18): | |||
super(ResNet_cifar10, self).__init__() | |||
self.inflate = 5 | |||
self.inplanes = 16*self.inflate | |||
n = int((depth - 2) / 6) | |||
self.conv1 = BinarizeConv2d(1, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||
bias=False) | |||
self.maxpool = lambda x: x | |||
self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||
self.tanh1 = nn.Hardtanh(inplace=True) | |||
self.tanh2 = nn.Hardtanh(inplace=True) | |||
self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||
self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||
self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||
self.layer4 = lambda x: x | |||
self.avgpool = nn.AvgPool2d(8) | |||
self.bn2 = nn.BatchNorm1d(64*self.inflate) | |||
self.bn3 = nn.BatchNorm1d(3) | |||
self.logsoftmax = nn.LogSoftmax() | |||
self.fc = BinarizeLinear(64*self.inflate, 3) | |||
def main(): | |||
train_set = DatasetFolder("pose_data/training/labeled", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||
test_set = DatasetFolder("pose_data/testing", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||
model = ResNet_cifar10(num_classes=3,block=BasicBlock,depth=18) | |||
model = model.to(device) | |||
print(model) | |||
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||
criterion = nn.CrossEntropyLoss() | |||
for epoch in range(num_epoch): | |||
running_loss = 0.0 | |||
total = 0 | |||
correct = 0 | |||
for i, data in enumerate(train_loader): | |||
inputs, labels = data | |||
inputs = inputs.to(device) | |||
labels = labels.to(device) | |||
optimizer.zero_grad() | |||
outputs = model(inputs) | |||
loss = criterion(outputs, labels) | |||
loss.backward() | |||
optimizer.step() | |||
running_loss += loss.item() | |||
total += labels.size(0) | |||
_,predicted = torch.max(outputs.data,1) | |||
#print(predicted) | |||
#print("label",labels) | |||
correct += (predicted == labels).sum().item() | |||
train_acc = correct / total | |||
print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||
model.eval() | |||
with torch.no_grad(): | |||
correct = 0 | |||
total = 0 | |||
for i, data in enumerate(test_loader): | |||
inputs, labels = data | |||
inputs = inputs.to(device) | |||
labels = labels.to(device) | |||
outputs = model(inputs) | |||
_,predicted = torch.max(outputs.data,1) | |||
total += labels.size(0) | |||
correct += (predicted == labels).sum().item() | |||
#print(predicted) | |||
#print("labels:",labels) | |||
print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||
if __name__ == '__main__': | |||
main() |
@ -0,0 +1,193 @@ | |||
import torch | |||
import numpy as np | |||
import cv2, os, sys | |||
import pandas as pd | |||
from torch.utils.data import Dataset | |||
from matplotlib import pyplot as plt | |||
from torch.utils.data import ConcatDataset, DataLoader, Subset | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
from torchvision.datasets import DatasetFolder | |||
from PIL import Image | |||
import torchvision.models as models | |||
batch_size = 32 | |||
num_epoch = 1 | |||
torch.cuda.set_device(1) | |||
train_tfm = transforms.Compose([ | |||
transforms.Grayscale(), | |||
transforms.RandomHorizontalFlip(), | |||
transforms.RandomResizedCrop((68,68)), | |||
transforms.ToTensor(), | |||
#transforms.RandomResizedCrop((40,30)), | |||
#transforms.TenCrop((40,30)), | |||
#transforms.Normalize(0.5,0.5), | |||
]) | |||
test_tfm = transforms.Compose([ | |||
transforms.Grayscale(), | |||
transforms.ToTensor() | |||
]) | |||
''' | |||
class Classifier(nn.Module): | |||
def __init__(self): | |||
super(Classifier, self).__init__() | |||
self.cnn_layers = nn.Sequential( | |||
#input_size(1,30,40) | |||
nn.Conv2d(1, 16, 3, 1), #output_size(16,28,38) | |||
nn.BatchNorm2d(16), | |||
nn.ReLU(), | |||
nn.Dropout(0.2), | |||
nn.MaxPool2d(kernel_size = 2), #output_size(16,14,19) | |||
nn.Conv2d(16, 24, 3, 1), #output_size(24,12,17) | |||
nn.BatchNorm2d(24), | |||
nn.ReLU(), | |||
nn.Dropout(0.2), | |||
nn.MaxPool2d(kernel_size = 2), #output_size(24,6,8) | |||
nn.Conv2d(24, 32, 3, 1), #output_size(32,4,6) | |||
nn.BatchNorm2d(32), | |||
nn.ReLU(), | |||
nn.Dropout(0.2), | |||
nn.MaxPool2d(kernel_size = 2) #ouput_size(32,2,3) | |||
) | |||
self.fc_layers = nn.Sequential( | |||
nn.Linear(32 * 2 * 3, 32), | |||
nn.ReLU(), | |||
nn.Dropout(0.2), | |||
nn.Linear(32,8) | |||
) | |||
def forward(self, x): | |||
x = self.cnn_layers(x) | |||
x = x.flatten(1) | |||
x = self.fc_layers(x) | |||
return x | |||
''' | |||
def main(): | |||
train_set = DatasetFolder("pose_data2/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||
test_set = DatasetFolder("pose_data2/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||
valid_set = DatasetFolder("pose_data2/val", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True) | |||
model_path = "model.ckpt" | |||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||
model = models.resnet50() | |||
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, | |||
bias=False) | |||
model.fc = nn.Linear(2048, 8) | |||
model = model.to(device) | |||
print(model) | |||
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||
criterion = nn.CrossEntropyLoss() | |||
best_acc = -1 | |||
for epoch in range(num_epoch): | |||
##Training | |||
running_loss = 0.0 | |||
total = 0 | |||
correct = 0 | |||
for i, data in enumerate(train_loader): | |||
inputs, labels = data | |||
inputs = inputs.to(device) | |||
labels = labels.to(device) | |||
optimizer.zero_grad() | |||
outputs = model(inputs) | |||
loss = criterion(outputs, labels) | |||
loss.backward() | |||
optimizer.step() | |||
running_loss += loss.item() | |||
total += labels.size(0) | |||
_,predicted = torch.max(outputs.data,1) | |||
#print(predicted) | |||
#print("label",labels) | |||
correct += (predicted == labels).sum().item() | |||
train_acc = correct / total | |||
print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||
##Validation | |||
model.eval() | |||
valid_loss = 0.0 | |||
total = 0 | |||
correct = 0 | |||
for i, data in enumerate(valid_loader): | |||
inputs, labels = data | |||
inputs = inputs.to(device) | |||
labels = labels.to(device) | |||
with torch.no_grad(): | |||
outputs = model(inputs) | |||
loss = criterion(outputs, labels) | |||
running_loss += loss.item() | |||
total += labels.size(0) | |||
_,predicted = torch.max(outputs.data,1) | |||
correct += (predicted == labels).sum().item() | |||
valid_acc = correct / total | |||
print(f"[ Valid | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {valid_acc:.5f}") | |||
if valid_acc > best_acc: | |||
best_acc = valid_acc | |||
torch.save(model.state_dict(), model_path) | |||
print('saving model with acc {:.3f}'.format(valid_acc)) | |||
##Testing | |||
model = models.resnet50() | |||
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, | |||
bias=False) | |||
model.fc = nn.Linear(2048, 8) | |||
model = model.to(device) | |||
model.load_state_dict(torch.load(model_path)) | |||
model.eval() | |||
with torch.no_grad(): | |||
correct = 0 | |||
total = 0 | |||
for i, data in enumerate(test_loader): | |||
inputs, labels = data | |||
inputs = inputs.to(device) | |||
labels = labels.to(device) | |||
outputs = model(inputs) | |||
_,predicted = torch.max(outputs.data,1) | |||
total += labels.size(0) | |||
correct += (predicted == labels).sum().item() | |||
# for k in range(batch_size): | |||
# if predicted[k] != labels[k]: | |||
# print(inputs[k]) | |||
#print(predicted) | |||
#print("labels:",labels) | |||
print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||
if __name__ == '__main__': | |||
main() |
@ -0,0 +1,285 @@ | |||
import torch | |||
import numpy as np | |||
import cv2, os, sys | |||
import pandas as pd | |||
from torch.utils.data import Dataset | |||
from matplotlib import pyplot as plt | |||
from torch.utils.data import ConcatDataset, DataLoader, Subset | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
from torchvision.datasets import DatasetFolder | |||
from PIL import Image | |||
import torchvision.models | |||
import BinaryNetpytorch.models as models | |||
from BinaryNetpytorch.models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||
import progressbar | |||
import seaborn as sns | |||
batch_size = 32 | |||
num_epoch = 60 | |||
torch.cuda.set_device(1) | |||
train_tfm = transforms.Compose([ | |||
# transforms.RandomHorizontalFlip(), | |||
# transforms.RandomResizedCrop((40,30)), | |||
transforms.Grayscale(), | |||
transforms.Resize((68, 68)), | |||
transforms.ToTensor(), | |||
#transforms.RandomResizedCrop((40,30)), | |||
#transforms.TenCrop((40,30)), | |||
# transforms.Normalize(0.5,0.5), | |||
]) | |||
test_tfm = transforms.Compose([ | |||
transforms.Grayscale(), | |||
transforms.Resize((68, 68)), | |||
transforms.ToTensor() | |||
]) | |||
def Quantize(img): | |||
scaler = torch.div(img, 0.0078125, rounding_mode="floor") | |||
scaler_t1 = scaler * 0.0078125 | |||
scaler_t2 = (scaler + 1) * 0.0078125 | |||
img = torch.where(abs(img - scaler_t1) < abs(img -scaler_t2), scaler_t1 , scaler_t2) | |||
return img | |||
# bar = progressbar.ProgressBar(maxval=img.size(0)*img.size(2)*img.size(3), \ | |||
# widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) | |||
# bar.start() | |||
# for p in range(img.size(0)): | |||
# for i in range(img.size(2)): | |||
# for j in range(img.size(3)): | |||
# scaler = int(img[p][0][i][j] / 0.0078125) | |||
# t1 = scaler * 0.0078125 | |||
# t2 = (scaler + 1) * 0.0078125 | |||
# if(abs(img[p][0][i][j] - t1) < abs(img[p][0][i][j] - t2)): | |||
# img[p][0][i][j] = t1 | |||
# else: | |||
# img[p][0][i][j] = t2 | |||
# bar.finish() | |||
# return img | |||
def Binaryconv3x3(in_planes, out_planes, stride=1): | |||
"3x3 convolution with padding" | |||
return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
def conv3x3(in_planes, out_planes, stride=1): | |||
"3x3 convolution with padding" | |||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
class BasicBlock(nn.Module): | |||
expansion = 1 | |||
def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||
super(BasicBlock, self).__init__() | |||
self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||
self.bn1 = nn.BatchNorm2d(planes) | |||
self.tanh1 = nn.Hardtanh(inplace=True) | |||
self.conv2 = Binaryconv3x3(planes, planes) | |||
self.tanh2 = nn.Hardtanh(inplace=True) | |||
self.bn2 = nn.BatchNorm2d(planes) | |||
self.downsample = downsample | |||
self.do_bntan=do_bntan | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x.clone() | |||
x = Quantize(x) | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.tanh1(out) | |||
out = Quantize(out) | |||
out = self.conv2(out) | |||
if self.downsample is not None: | |||
if residual.data.max()>1: | |||
import pdb; pdb.set_trace() | |||
residual = self.downsample(residual) | |||
out += residual | |||
if self.do_bntan: | |||
out = self.bn2(out) | |||
out = self.tanh2(out) | |||
return out | |||
class ResNet(nn.Module): | |||
def __init__(self): | |||
super(ResNet, self).__init__() | |||
def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||
downsample = None | |||
if stride != 1 or self.inplanes != planes * block.expansion: | |||
downsample = nn.Sequential( | |||
BinarizeConv2d(self.inplanes, planes * block.expansion, | |||
kernel_size=1, stride=stride, bias=False), | |||
nn.BatchNorm2d(planes * block.expansion), | |||
) | |||
layers = [] | |||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||
self.inplanes = planes * block.expansion | |||
for i in range(1, blocks-1): | |||
layers.append(block(self.inplanes, planes)) | |||
layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||
return nn.Sequential(*layers) | |||
def forward(self, x): | |||
x = Quantize(x) | |||
x = self.conv1(x) | |||
x = self.maxpool(x) | |||
x = self.bn1(x) | |||
x = self.tanh1(x) | |||
x = self.layer1(x) | |||
x = self.layer2(x) | |||
x = self.layer3(x) | |||
x = self.layer4(x) | |||
x = self.avgpool(x) | |||
x = x.view(x.size(0), -1) | |||
x = self.bn2(x) | |||
x = self.tanh2(x) | |||
#print(x.size()) | |||
x = x.view(32,1280,1,1) | |||
x = self.fc(x) | |||
x = x.view(x.size(0), -1) | |||
x = self.bn3(x) | |||
x = self.logsoftmax(x) | |||
return x | |||
class ResNet_cifar10(ResNet): | |||
def __init__(self, num_classes=8, | |||
block=BasicBlock, depth=18): | |||
super(ResNet_cifar10, self).__init__() | |||
self.inflate = 5 | |||
self.inplanes = 16*self.inflate | |||
n = int((depth - 2) / 6) | |||
self.conv1 = BinarizeConv2d(1, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||
bias=False) | |||
self.maxpool = lambda x: x | |||
self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||
self.tanh1 = nn.Hardtanh(inplace=True) | |||
self.tanh2 = nn.Hardtanh(inplace=True) | |||
self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||
self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||
self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||
self.layer4 = lambda x: x | |||
self.avgpool = nn.AvgPool2d(8) | |||
self.bn2 = nn.BatchNorm1d(256*self.inflate) | |||
self.bn3 = nn.BatchNorm1d(8) | |||
self.logsoftmax = nn.LogSoftmax() | |||
#self.fc = BinarizeLinear(256*self.inflate, 8) | |||
self.fc = BinarizeConv2d(256*self.inflate, 8, kernel_size=1) | |||
def main(): | |||
train_set = DatasetFolder("pose_data2/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||
test_set = DatasetFolder("pose_data2/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||
model = ResNet_cifar10(num_classes=8,block=BasicBlock,depth=18) | |||
model = model.to(device) | |||
print(model) | |||
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||
criterion = nn.CrossEntropyLoss() | |||
model_path = "model.ckpt" | |||
for epoch in range(num_epoch): | |||
running_loss = 0.0 | |||
total = 0 | |||
correct = 0 | |||
for i, data in enumerate(train_loader): | |||
inputs, labels = data | |||
inputs = inputs.to(device) | |||
labels = labels.to(device) | |||
optimizer.zero_grad() | |||
outputs = model(inputs) | |||
loss = criterion(outputs, labels) | |||
loss.backward() | |||
optimizer.step() | |||
running_loss += loss.item() | |||
total += labels.size(0) | |||
_,predicted = torch.max(outputs.data,1) | |||
#print(predicted) | |||
#print("label",labels) | |||
correct += (predicted == labels).sum().item() | |||
train_acc = correct / total | |||
print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||
torch.save(model.state_dict(), model_path) | |||
model = ResNet_cifar10(num_classes=8,block=BasicBlock,depth=18) | |||
model = model.to(device) | |||
model.load_state_dict(torch.load(model_path)) | |||
model.eval() | |||
with torch.no_grad(): | |||
correct = 0 | |||
total = 0 | |||
correct_2 = 0 | |||
stat = np.zeros((8,8)) | |||
for i, data in enumerate(test_loader): | |||
inputs, labels = data | |||
inputs = inputs.to(device) | |||
labels = labels.to(device) | |||
outputs = model(inputs) | |||
_,predicted = torch.max(outputs.data,1) | |||
total += labels.size(0) | |||
correct += (predicted == labels).sum().item() | |||
for b in range(batch_size): | |||
if predicted[b] == 0 or predicted[b] == 1 or predicted[b] == 2 or predicted[b] == 3: | |||
if labels[b] == 0 or labels[b] == 1 or labels[b] == 2 or labels[b] == 3: | |||
correct_2 += 1 | |||
else: | |||
if labels[b] == 4 or labels[b] == 5 or labels[b] == 6 or labels[b] == 7: | |||
correct_2 += 1 | |||
for k in range(batch_size): | |||
if predicted[k] != labels[k]: | |||
img = inputs[k].mul(255).byte() | |||
img = img.cpu().numpy().squeeze(0) | |||
img = np.moveaxis(img, 0, -1) | |||
predict = predicted[k].cpu().numpy() | |||
label = labels[k].cpu().numpy() | |||
path = "test_result/predict:"+str(predict)+"_labels:"+str(label)+".jpg" | |||
stat[int(label)][int(predict)] += 1 | |||
cv2.imwrite(path,img) | |||
print(stat) | |||
ax = sns.heatmap(stat, linewidth=0.5) | |||
plt.xlabel('Prediction') | |||
plt.ylabel('Label') | |||
plt.savefig('heatmap.jpg') | |||
#print(predicted) | |||
#print("labels:",labels) | |||
print('Test_2clasee Accuracy:{} %'.format((correct_2 / total) * 100)) | |||
print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||
if __name__ == '__main__': | |||
main() |
@ -0,0 +1,207 @@ | |||
import torch | |||
import numpy as np | |||
import cv2, os, sys | |||
from torch.utils.data import Dataset | |||
from matplotlib import pyplot as plt | |||
from torch.utils.data import ConcatDataset, DataLoader, Subset | |||
import torch.nn as nn | |||
import torchvision.transforms as transforms | |||
from torchvision.datasets import DatasetFolder | |||
from PIL import Image | |||
from BinaryNetpytorch.models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||
from BinaryNetpytorch.models.binarized_modules import Binarize,HingeLoss | |||
import seaborn as sns | |||
import random | |||
batch_size = 8 | |||
num_epoch = 10 | |||
seed = 777 | |||
torch.manual_seed(seed) | |||
torch.cuda.manual_seed(seed) | |||
torch.cuda.manual_seed_all(seed) | |||
np.random.seed(seed) | |||
random.seed(seed) | |||
torch.backends.cudnn.benchmark = False | |||
torch.backends.cudnn.deterministic = True | |||
train_tfm = transforms.Compose([ | |||
#transforms.Grayscale(), | |||
#transforms.RandomHorizontalFlip(), | |||
#transforms.RandomResizedCrop((40,30)), | |||
#transforms.RandomCrop((40,30)), | |||
#transforms.RandomHorizontalFlip(), | |||
transforms.ToTensor(), | |||
#transforms.RandomResizedCrop((40,30)), | |||
#transforms.TenCrop((40,30)), | |||
#transforms.Normalize(0.5,0.5), | |||
]) | |||
test_tfm = transforms.Compose([ | |||
#transforms.Grayscale(), | |||
transforms.ToTensor() | |||
]) | |||
class Classifier(nn.Module): | |||
def __init__(self): | |||
super(Classifier, self).__init__() | |||
self.cnn_layers = nn.Sequential( | |||
# BinarizeConv2d(in_channels=1, out_channels=128, kernel_size=9, padding=9//2, bias=False), | |||
# nn.BatchNorm2d(128), | |||
# nn.ReLU(), | |||
# BinarizeConv2d(in_channels=128, out_channels=64, kernel_size=1, padding=1//2, bias=False), | |||
# nn.BatchNorm2d(64), | |||
#input_size(1,30,40) | |||
BinarizeConv2d(1, 128, 3, 1), #output_size(16,28,38) | |||
nn.BatchNorm2d(128), | |||
nn.ReLU(), | |||
#nn.Dropout(0.2), | |||
nn.MaxPool2d(kernel_size = 2), #output_size(16,14,19) | |||
BinarizeConv2d(128, 64, 3, 1), #output_size(24,12,17) | |||
nn.BatchNorm2d(64), | |||
nn.ReLU(), | |||
#nn.Dropout(0.2), | |||
nn.MaxPool2d(kernel_size = 2), #output_size(24,6,8) | |||
BinarizeConv2d(64, 32, 3, 1), #output_size(32,4,6) | |||
nn.BatchNorm2d(32), | |||
nn.ReLU(), | |||
#nn.Dropout(0.2), | |||
nn.MaxPool2d(kernel_size = 2), #ouput_size(32,2,3) | |||
#nn.LogSoftmax(), | |||
BinarizeConv2d(32, 3, (3,2), 1) #ouput_size(4,2,3) without max :(32,24,34) | |||
) | |||
def forward(self, x): | |||
x = self.cnn_layers(x) | |||
#x = x.flatten(1) | |||
#x = self.fc_layers(x) | |||
#print(x.shape) | |||
x = x.view(x.size(0), -1) | |||
#print(x.shape) | |||
#x = nn.LogSoftmax(x) | |||
#print(x) | |||
return x | |||
def main(): | |||
train_set = DatasetFolder("./dataset/data_0711/grideye/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||
test_set = DatasetFolder("./dataset/data_0711/grideye/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||
val_set = DatasetFolder("./dataset/data_0711/grideye/train", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True) | |||
save_path = 'models.ckpt' | |||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||
model = Classifier().to(device) | |||
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) | |||
criterion = nn.CrossEntropyLoss() | |||
best_accuracy = 0.0 | |||
for epoch in range(num_epoch): | |||
running_loss = 0.0 | |||
total = 0 | |||
correct = 0 | |||
for i, data in enumerate(train_loader): | |||
inputs, labels = data | |||
inputs = inputs.to(device) | |||
labels = labels.to(device) | |||
#print(labels) | |||
optimizer.zero_grad() | |||
outputs = model(inputs) | |||
#print(outputs.shape) | |||
loss = criterion(outputs, labels) | |||
loss.backward() | |||
for p in list(model.parameters()): | |||
if hasattr(p,'org'): | |||
p.data.copy_(p.org) | |||
optimizer.step() | |||
for p in list(model.parameters()): | |||
if hasattr(p,'org'): | |||
p.org.copy_(p.data.clamp_(-1,1)) | |||
running_loss += loss.item() | |||
total += labels.size(0) | |||
_,predicted = torch.max(outputs.data,1) | |||
#print(predicted) | |||
#print("label",labels) | |||
correct += (predicted == labels).sum().item() | |||
train_acc = correct / total | |||
print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||
model.eval() | |||
with torch.no_grad(): | |||
correct = 0 | |||
total = 0 | |||
for i, data in enumerate(val_loader): | |||
inputs, labels = data | |||
inputs = inputs.to(device) | |||
labels = labels.to(device) | |||
outputs = model(inputs) | |||
_,predicted = torch.max(outputs.data,1) | |||
total += labels.size(0) | |||
correct += (predicted == labels).sum().item() | |||
val_acc = correct / total | |||
if val_acc > best_accuracy: | |||
best_accuracy = val_acc | |||
torch.save(model.state_dict(), save_path) | |||
print("Save Model") | |||
print(f"[ Val | {epoch + 1:03d}/{num_epoch:03d} ] acc = {val_acc:.5f}") | |||
model = Classifier().to(device) | |||
model.load_state_dict(torch.load(save_path)) | |||
model.eval() | |||
stat = np.zeros((3,3)) | |||
with torch.no_grad(): | |||
correct = 0 | |||
total = 0 | |||
print(model) | |||
for i, data in enumerate(test_loader): | |||
inputs, labels = data | |||
inputs = inputs.to(device) | |||
labels = labels.to(device) | |||
outputs = model(inputs) | |||
#print(outputs.data) | |||
_,predicted = torch.max(outputs.data,1) | |||
#print(predicted) | |||
total += labels.size(0) | |||
correct += (predicted == labels).sum().item() | |||
for k in range(len(predicted)): | |||
if predicted[k] != labels[k]: | |||
img = inputs[k].mul(255).byte() | |||
img = img.cpu().numpy().squeeze(0) | |||
img = np.moveaxis(img, 0, -1) | |||
predict = predicted[k].cpu().numpy() | |||
label = labels[k].cpu().numpy() | |||
path = "test_result/predict:"+str(predict)+"_labels:"+str(label)+".jpg" | |||
stat[int(label)][int(predict)] += 1 | |||
ax = sns.heatmap(stat, linewidth=0.5) | |||
plt.xlabel('Prediction') | |||
plt.ylabel('Label') | |||
plt.savefig('heatmap.jpg') | |||
#print(predicted) | |||
#print("labels:",labels) | |||
print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||
if __name__ == '__main__': | |||
main() |