@ -0,0 +1,2 @@ | |||||
models.ckpt | |||||
training_state.bin |
@ -0,0 +1,8 @@ | |||||
# BNN.pytorch | |||||
Binarized Neural Network (BNN) for pytorch | |||||
This is the pytorch version for the BNN code, fro VGG and resnet models | |||||
Link to the paper: https://papers.nips.cc/paper/6573-binarized-neural-networks | |||||
The code is based on https://github.com/eladhoffer/convNet.pytorch | |||||
Please install torch and torchvision by following the instructions at: http://pytorch.org/ | |||||
To run resnet18 for cifar10 dataset use: python main_binary.py --model resnet_binary --save resnet18_binary --dataset cifar10 |
@ -0,0 +1,37 @@ | |||||
import os | |||||
import torchvision.datasets as datasets | |||||
import torchvision.transforms as transforms | |||||
_DATASETS_MAIN_PATH = '/home/Datasets' | |||||
_dataset_path = { | |||||
'cifar10': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR10'), | |||||
'cifar100': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR100'), | |||||
'stl10': os.path.join(_DATASETS_MAIN_PATH, 'STL10'), | |||||
'mnist': os.path.join(_DATASETS_MAIN_PATH, 'MNIST'), | |||||
'imagenet': { | |||||
'train': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/train'), | |||||
'val': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/val') | |||||
} | |||||
} | |||||
def get_dataset(name, split='train', transform=None, | |||||
target_transform=None, download=True): | |||||
train = (split == 'train') | |||||
if name == 'cifar10': | |||||
return datasets.CIFAR10(root=_dataset_path['cifar10'], | |||||
train=train, | |||||
transform=transform, | |||||
target_transform=target_transform, | |||||
download=download) | |||||
elif name == 'cifar100': | |||||
return datasets.CIFAR100(root=_dataset_path['cifar100'], | |||||
train=train, | |||||
transform=transform, | |||||
target_transform=target_transform, | |||||
download=download) | |||||
elif name == 'imagenet': | |||||
path = _dataset_path[name][split] | |||||
return datasets.ImageFolder(root=path, | |||||
transform=transform, | |||||
target_transform=target_transform) |
@ -0,0 +1,309 @@ | |||||
import argparse | |||||
import os | |||||
import time | |||||
import logging | |||||
import torch | |||||
import torch.nn as nn | |||||
import torch.nn.parallel | |||||
import torch.backends.cudnn as cudnn | |||||
import torch.optim | |||||
import torch.utils.data | |||||
import models | |||||
from torch.autograd import Variable | |||||
from data import get_dataset | |||||
from preprocess import get_transform | |||||
from utils import * | |||||
from datetime import datetime | |||||
from ast import literal_eval | |||||
from torchvision.utils import save_image | |||||
model_names = sorted(name for name in models.__dict__ | |||||
if name.islower() and not name.startswith("__") | |||||
and callable(models.__dict__[name])) | |||||
<<<<<<< HEAD | |||||
print(model_names) | |||||
======= | |||||
>>>>>>> 0d30f7b8e44285531022cdc05b2c11c22db27e3a | |||||
parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||||
parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='./results', | |||||
help='results dir') | |||||
parser.add_argument('--save', metavar='SAVE', default='', | |||||
help='saved folder') | |||||
parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||||
help='dataset name or folder') | |||||
parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||||
choices=model_names, | |||||
help='model architecture: ' + | |||||
' | '.join(model_names) + | |||||
' (default: alexnet)') | |||||
parser.add_argument('--input_size', type=int, default=None, | |||||
help='image input size') | |||||
parser.add_argument('--model_config', default='', | |||||
help='additional architecture configuration') | |||||
parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||||
help='type of tensor - e.g torch.cuda.HalfTensor') | |||||
parser.add_argument('--gpus', default='0', | |||||
help='gpus used for training - e.g 0,1,3') | |||||
parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||||
help='number of data loading workers (default: 8)') | |||||
parser.add_argument('--epochs', default=2500, type=int, metavar='N', | |||||
help='number of total epochs to run') | |||||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||||
help='manual epoch number (useful on restarts)') | |||||
parser.add_argument('-b', '--batch-size', default=256, type=int, | |||||
metavar='N', help='mini-batch size (default: 256)') | |||||
parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||||
help='optimizer function used') | |||||
parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||||
metavar='LR', help='initial learning rate') | |||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||||
help='momentum') | |||||
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||||
metavar='W', help='weight decay (default: 1e-4)') | |||||
parser.add_argument('--print-freq', '-p', default=10, type=int, | |||||
metavar='N', help='print frequency (default: 10)') | |||||
parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||||
help='path to latest checkpoint (default: none)') | |||||
parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||||
help='evaluate model FILE on validation set') | |||||
def main(): | |||||
global args, best_prec1 | |||||
best_prec1 = 0 | |||||
args = parser.parse_args() | |||||
if args.evaluate: | |||||
args.results_dir = '/tmp' | |||||
if args.save is '': | |||||
args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||||
save_path = os.path.join(args.results_dir, args.save) | |||||
if not os.path.exists(save_path): | |||||
os.makedirs(save_path) | |||||
setup_logging(os.path.join(save_path, 'log.txt')) | |||||
results_file = os.path.join(save_path, 'results.%s') | |||||
results = ResultsLog(results_file % 'csv', results_file % 'html') | |||||
logging.info("saving to %s", save_path) | |||||
logging.debug("run arguments: %s", args) | |||||
if 'cuda' in args.type: | |||||
args.gpus = [int(i) for i in args.gpus.split(',')] | |||||
torch.cuda.set_device(args.gpus[0]) | |||||
cudnn.benchmark = True | |||||
else: | |||||
args.gpus = None | |||||
# create model | |||||
logging.info("creating model %s", args.model) | |||||
model = models.__dict__[args.model] | |||||
model_config = {'input_size': args.input_size, 'dataset': args.dataset} | |||||
if args.model_config is not '': | |||||
model_config = dict(model_config, **literal_eval(args.model_config)) | |||||
model = model(**model_config) | |||||
logging.info("created model with configuration: %s", model_config) | |||||
# optionally resume from a checkpoint | |||||
if args.evaluate: | |||||
if not os.path.isfile(args.evaluate): | |||||
parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||||
checkpoint = torch.load(args.evaluate) | |||||
model.load_state_dict(checkpoint['state_dict']) | |||||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
args.evaluate, checkpoint['epoch']) | |||||
elif args.resume: | |||||
checkpoint_file = args.resume | |||||
if os.path.isdir(checkpoint_file): | |||||
results.load(os.path.join(checkpoint_file, 'results.csv')) | |||||
checkpoint_file = os.path.join( | |||||
checkpoint_file, 'model_best.pth.tar') | |||||
if os.path.isfile(checkpoint_file): | |||||
logging.info("loading checkpoint '%s'", args.resume) | |||||
checkpoint = torch.load(checkpoint_file) | |||||
args.start_epoch = checkpoint['epoch'] - 1 | |||||
best_prec1 = checkpoint['best_prec1'] | |||||
model.load_state_dict(checkpoint['state_dict']) | |||||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
checkpoint_file, checkpoint['epoch']) | |||||
else: | |||||
logging.error("no checkpoint found at '%s'", args.resume) | |||||
num_parameters = sum([l.nelement() for l in model.parameters()]) | |||||
logging.info("number of parameters: %d", num_parameters) | |||||
# Data loading code | |||||
default_transform = { | |||||
'train': get_transform(args.dataset, | |||||
input_size=args.input_size, augment=True), | |||||
'eval': get_transform(args.dataset, | |||||
input_size=args.input_size, augment=False) | |||||
} | |||||
transform = getattr(model, 'input_transform', default_transform) | |||||
regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||||
'lr': args.lr, | |||||
'momentum': args.momentum, | |||||
'weight_decay': args.weight_decay}}) | |||||
# define loss function (criterion) and optimizer | |||||
criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)() | |||||
criterion.type(args.type) | |||||
model.type(args.type) | |||||
val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||||
val_loader = torch.utils.data.DataLoader( | |||||
val_data, | |||||
batch_size=args.batch_size, shuffle=False, | |||||
num_workers=args.workers, pin_memory=True) | |||||
if args.evaluate: | |||||
validate(val_loader, model, criterion, 0) | |||||
return | |||||
train_data = get_dataset(args.dataset, 'train', transform['train']) | |||||
train_loader = torch.utils.data.DataLoader( | |||||
train_data, | |||||
batch_size=args.batch_size, shuffle=True, | |||||
num_workers=args.workers, pin_memory=True) | |||||
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||||
logging.info('training regime: %s', regime) | |||||
for epoch in range(args.start_epoch, args.epochs): | |||||
optimizer = adjust_optimizer(optimizer, epoch, regime) | |||||
# train for one epoch | |||||
train_loss, train_prec1, train_prec5 = train( | |||||
train_loader, model, criterion, epoch, optimizer) | |||||
# evaluate on validation set | |||||
val_loss, val_prec1, val_prec5 = validate( | |||||
val_loader, model, criterion, epoch) | |||||
# remember best prec@1 and save checkpoint | |||||
is_best = val_prec1 > best_prec1 | |||||
best_prec1 = max(val_prec1, best_prec1) | |||||
save_checkpoint({ | |||||
'epoch': epoch + 1, | |||||
'model': args.model, | |||||
'config': args.model_config, | |||||
'state_dict': model.state_dict(), | |||||
'best_prec1': best_prec1, | |||||
'regime': regime | |||||
}, is_best, path=save_path) | |||||
logging.info('\n Epoch: {0}\t' | |||||
'Training Loss {train_loss:.4f} \t' | |||||
'Training Prec@1 {train_prec1:.3f} \t' | |||||
'Training Prec@5 {train_prec5:.3f} \t' | |||||
'Validation Loss {val_loss:.4f} \t' | |||||
'Validation Prec@1 {val_prec1:.3f} \t' | |||||
'Validation Prec@5 {val_prec5:.3f} \n' | |||||
.format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
train_prec1=train_prec1, val_prec1=val_prec1, | |||||
train_prec5=train_prec5, val_prec5=val_prec5)) | |||||
results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||||
train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||||
#results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||||
# title='Loss', ylabel='loss') | |||||
#results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||||
# title='Error@1', ylabel='error %') | |||||
#results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||||
# title='Error@5', ylabel='error %') | |||||
results.save() | |||||
def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||||
if args.gpus and len(args.gpus) > 1: | |||||
model = torch.nn.DataParallel(model, args.gpus) | |||||
batch_time = AverageMeter() | |||||
data_time = AverageMeter() | |||||
losses = AverageMeter() | |||||
top1 = AverageMeter() | |||||
top5 = AverageMeter() | |||||
end = time.time() | |||||
for i, (inputs, target) in enumerate(data_loader): | |||||
# measure data loading time | |||||
data_time.update(time.time() - end) | |||||
if args.gpus is not None: | |||||
target = target.cuda() | |||||
if not training: | |||||
with torch.no_grad(): | |||||
input_var = Variable(inputs.type(args.type), volatile=not training) | |||||
target_var = Variable(target) | |||||
# compute output | |||||
output = model(input_var) | |||||
else: | |||||
input_var = Variable(inputs.type(args.type), volatile=not training) | |||||
target_var = Variable(target) | |||||
# compute output | |||||
output = model(input_var) | |||||
loss = criterion(output, target_var) | |||||
if type(output) is list: | |||||
output = output[0] | |||||
# measure accuracy and record loss | |||||
prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||||
losses.update(loss.item(), inputs.size(0)) | |||||
top1.update(prec1.item(), inputs.size(0)) | |||||
top5.update(prec5.item(), inputs.size(0)) | |||||
if training: | |||||
# compute gradient and do SGD step | |||||
optimizer.zero_grad() | |||||
loss.backward() | |||||
for p in list(model.parameters()): | |||||
if hasattr(p,'org'): | |||||
p.data.copy_(p.org) | |||||
optimizer.step() | |||||
for p in list(model.parameters()): | |||||
if hasattr(p,'org'): | |||||
p.org.copy_(p.data.clamp_(-1,1)) | |||||
# measure elapsed time | |||||
batch_time.update(time.time() - end) | |||||
end = time.time() | |||||
if i % args.print_freq == 0: | |||||
logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||||
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||||
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||||
'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||||
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||||
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||||
epoch, i, len(data_loader), | |||||
phase='TRAINING' if training else 'EVALUATING', | |||||
batch_time=batch_time, | |||||
data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||||
return losses.avg, top1.avg, top5.avg | |||||
def train(data_loader, model, criterion, epoch, optimizer): | |||||
# switch to train mode | |||||
model.train() | |||||
return forward(data_loader, model, criterion, epoch, | |||||
training=True, optimizer=optimizer) | |||||
def validate(data_loader, model, criterion, epoch): | |||||
# switch to evaluate mode | |||||
model.eval() | |||||
return forward(data_loader, model, criterion, epoch, | |||||
training=False, optimizer=None) | |||||
if __name__ == '__main__': | |||||
main() |
@ -0,0 +1,332 @@ | |||||
import argparse | |||||
import os | |||||
import time | |||||
import logging | |||||
import torch | |||||
import torch.nn as nn | |||||
import torch.nn.parallel | |||||
import torch.backends.cudnn as cudnn | |||||
import torch.optim | |||||
import torch.utils.data | |||||
import models | |||||
from torch.autograd import Variable | |||||
from data import get_dataset | |||||
from preprocess import get_transform | |||||
from utils import * | |||||
from datetime import datetime | |||||
from ast import literal_eval | |||||
from torchvision.utils import save_image | |||||
from models.binarized_modules import HingeLoss | |||||
model_names = sorted(name for name in models.__dict__ | |||||
if name.islower() and not name.startswith("__") | |||||
and callable(models.__dict__[name])) | |||||
parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||||
parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='/media/hdd/ihubara/BinaryNet.pytorch/results', | |||||
help='results dir') | |||||
parser.add_argument('--save', metavar='SAVE', default='', | |||||
help='saved folder') | |||||
parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||||
help='dataset name or folder') | |||||
parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||||
choices=model_names, | |||||
help='model architecture: ' + | |||||
' | '.join(model_names) + | |||||
' (default: alexnet)') | |||||
parser.add_argument('--input_size', type=int, default=None, | |||||
help='image input size') | |||||
parser.add_argument('--model_config', default='', | |||||
help='additional architecture configuration') | |||||
parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||||
help='type of tensor - e.g torch.cuda.HalfTensor') | |||||
parser.add_argument('--gpus', default='0', | |||||
help='gpus used for training - e.g 0,1,3') | |||||
parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||||
help='number of data loading workers (default: 8)') | |||||
parser.add_argument('--epochs', default=900, type=int, metavar='N', | |||||
help='number of total epochs to run') | |||||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||||
help='manual epoch number (useful on restarts)') | |||||
parser.add_argument('-b', '--batch-size', default=256, type=int, | |||||
metavar='N', help='mini-batch size (default: 256)') | |||||
parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||||
help='optimizer function used') | |||||
parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||||
metavar='LR', help='initial learning rate') | |||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||||
help='momentum') | |||||
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||||
metavar='W', help='weight decay (default: 1e-4)') | |||||
parser.add_argument('--print-freq', '-p', default=10, type=int, | |||||
metavar='N', help='print frequency (default: 10)') | |||||
parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||||
help='path to latest checkpoint (default: none)') | |||||
parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||||
help='evaluate model FILE on validation set') | |||||
torch.cuda.random.manual_seed_all(10) | |||||
output_dim = 0 | |||||
def main(): | |||||
global args, best_prec1, output_dim | |||||
best_prec1 = 0 | |||||
args = parser.parse_args() | |||||
output_dim = {'cifar10': 10, 'cifar100':100, 'imagenet': 1000}[args.dataset] | |||||
#import pdb; pdb.set_trace() | |||||
#torch.save(args.batch_size/(len(args.gpus)/2+1),'multi_gpu_batch_size') | |||||
if args.evaluate: | |||||
args.results_dir = '/tmp' | |||||
if args.save is '': | |||||
args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||||
save_path = os.path.join(args.results_dir, args.save) | |||||
if not os.path.exists(save_path): | |||||
os.makedirs(save_path) | |||||
setup_logging(os.path.join(save_path, 'log.txt')) | |||||
results_file = os.path.join(save_path, 'results.%s') | |||||
results = ResultsLog(results_file % 'csv', results_file % 'html') | |||||
logging.info("saving to %s", save_path) | |||||
logging.debug("run arguments: %s", args) | |||||
if 'cuda' in args.type: | |||||
args.gpus = [int(i) for i in args.gpus.split(',')] | |||||
torch.cuda.set_device(args.gpus[0]) | |||||
cudnn.benchmark = True | |||||
else: | |||||
args.gpus = None | |||||
# create model | |||||
logging.info("creating model %s", args.model) | |||||
model = models.__dict__[args.model] | |||||
model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': output_dim} | |||||
if args.model_config is not '': | |||||
model_config = dict(model_config, **literal_eval(args.model_config)) | |||||
model = model(**model_config) | |||||
logging.info("created model with configuration: %s", model_config) | |||||
# optionally resume from a checkpoint | |||||
if args.evaluate: | |||||
if not os.path.isfile(args.evaluate): | |||||
parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||||
checkpoint = torch.load(args.evaluate) | |||||
model.load_state_dict(checkpoint['state_dict']) | |||||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
args.evaluate, checkpoint['epoch']) | |||||
elif args.resume: | |||||
checkpoint_file = args.resume | |||||
if os.path.isdir(checkpoint_file): | |||||
results.load(os.path.join(checkpoint_file, 'results.csv')) | |||||
checkpoint_file = os.path.join( | |||||
checkpoint_file, 'model_best.pth.tar') | |||||
if os.path.isfile(checkpoint_file): | |||||
logging.info("loading checkpoint '%s'", args.resume) | |||||
checkpoint = torch.load(checkpoint_file) | |||||
args.start_epoch = checkpoint['epoch'] - 1 | |||||
best_prec1 = checkpoint['best_prec1'] | |||||
model.load_state_dict(checkpoint['state_dict']) | |||||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
checkpoint_file, checkpoint['epoch']) | |||||
else: | |||||
logging.error("no checkpoint found at '%s'", args.resume) | |||||
num_parameters = sum([l.nelement() for l in model.parameters()]) | |||||
logging.info("number of parameters: %d", num_parameters) | |||||
# Data loading code | |||||
default_transform = { | |||||
'train': get_transform(args.dataset, | |||||
input_size=args.input_size, augment=True), | |||||
'eval': get_transform(args.dataset, | |||||
input_size=args.input_size, augment=False) | |||||
} | |||||
transform = getattr(model, 'input_transform', default_transform) | |||||
regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||||
'lr': args.lr, | |||||
'momentum': args.momentum, | |||||
'weight_decay': args.weight_decay}}) | |||||
# define loss function (criterion) and optimizer | |||||
#criterion = getattr(model, 'criterion', nn.NLLLoss)() | |||||
criterion = getattr(model, 'criterion', HingeLoss)() | |||||
#criterion.type(args.type) | |||||
model.type(args.type) | |||||
val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||||
val_loader = torch.utils.data.DataLoader( | |||||
val_data, | |||||
batch_size=args.batch_size, shuffle=False, | |||||
num_workers=args.workers, pin_memory=True) | |||||
if args.evaluate: | |||||
validate(val_loader, model, criterion, 0) | |||||
return | |||||
train_data = get_dataset(args.dataset, 'train', transform['train']) | |||||
train_loader = torch.utils.data.DataLoader( | |||||
train_data, | |||||
batch_size=args.batch_size, shuffle=True, | |||||
num_workers=args.workers, pin_memory=True) | |||||
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||||
logging.info('training regime: %s', regime) | |||||
#import pdb; pdb.set_trace() | |||||
#search_binarized_modules(model) | |||||
for epoch in range(args.start_epoch, args.epochs): | |||||
optimizer = adjust_optimizer(optimizer, epoch, regime) | |||||
# train for one epoch | |||||
train_loss, train_prec1, train_prec5 = train( | |||||
train_loader, model, criterion, epoch, optimizer) | |||||
# evaluate on validation set | |||||
val_loss, val_prec1, val_prec5 = validate( | |||||
val_loader, model, criterion, epoch) | |||||
# remember best prec@1 and save checkpoint | |||||
is_best = val_prec1 > best_prec1 | |||||
best_prec1 = max(val_prec1, best_prec1) | |||||
save_checkpoint({ | |||||
'epoch': epoch + 1, | |||||
'model': args.model, | |||||
'config': args.model_config, | |||||
'state_dict': model.state_dict(), | |||||
'best_prec1': best_prec1, | |||||
'regime': regime | |||||
}, is_best, path=save_path) | |||||
logging.info('\n Epoch: {0}\t' | |||||
'Training Loss {train_loss:.4f} \t' | |||||
'Training Prec@1 {train_prec1:.3f} \t' | |||||
'Training Prec@5 {train_prec5:.3f} \t' | |||||
'Validation Loss {val_loss:.4f} \t' | |||||
'Validation Prec@1 {val_prec1:.3f} \t' | |||||
'Validation Prec@5 {val_prec5:.3f} \n' | |||||
.format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
train_prec1=train_prec1, val_prec1=val_prec1, | |||||
train_prec5=train_prec5, val_prec5=val_prec5)) | |||||
results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||||
train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||||
results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||||
title='Loss', ylabel='loss') | |||||
results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||||
title='Error@1', ylabel='error %') | |||||
results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||||
title='Error@5', ylabel='error %') | |||||
results.save() | |||||
def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||||
if args.gpus and len(args.gpus) > 1: | |||||
model = torch.nn.DataParallel(model, args.gpus) | |||||
batch_time = AverageMeter() | |||||
data_time = AverageMeter() | |||||
losses = AverageMeter() | |||||
top1 = AverageMeter() | |||||
top5 = AverageMeter() | |||||
end = time.time() | |||||
for i, (inputs, target) in enumerate(data_loader): | |||||
# measure data loading time | |||||
data_time.update(time.time() - end) | |||||
if args.gpus is not None: | |||||
target = target.cuda() | |||||
#import pdb; pdb.set_trace() | |||||
if criterion.__class__.__name__=='HingeLoss': | |||||
target=target.unsqueeze(1) | |||||
target_onehot = torch.cuda.FloatTensor(target.size(0), output_dim) | |||||
target_onehot.fill_(-1) | |||||
target_onehot.scatter_(1, target, 1) | |||||
target=target.squeeze() | |||||
if not training: | |||||
with torch.no_grad(): | |||||
input_var = Variable(inputs.type(args.type)) | |||||
target_var = Variable(target_onehot) | |||||
# compute output | |||||
output = model(input_var) | |||||
else: | |||||
input_var = Variable(inputs.type(args.type)) | |||||
target_var = Variable(target_onehot) | |||||
# compute output | |||||
output = model(input_var) | |||||
#import pdb; pdb.set_trace() | |||||
loss = criterion(output, target_onehot) | |||||
#import pdb; pdb.set_trace() | |||||
if type(output) is list: | |||||
output = output[0] | |||||
# measure accuracy and record loss | |||||
prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||||
losses.update(loss.item(), inputs.size(0)) | |||||
top1.update(prec1.item(), inputs.size(0)) | |||||
top5.update(prec5.item(), inputs.size(0)) | |||||
#import pdb; pdb.set_trace() | |||||
#if not training and top1.avg<15: | |||||
# import pdb; pdb.set_trace() | |||||
if training: | |||||
# compute gradient and do SGD step | |||||
optimizer.zero_grad() | |||||
#add backwoed hook | |||||
loss.backward() | |||||
for p in list(model.parameters()): | |||||
#import pdb; pdb.set_trace() | |||||
if hasattr(p,'org'): | |||||
#print('before:', p[0][0]) | |||||
#gm=max(p.grad.data.max(),-p.grad.data.min()) | |||||
#p.grad=p.grad.div(gm+1) | |||||
p.data.copy_(p.org) | |||||
#print('after:', p[0][0]) | |||||
optimizer.step() | |||||
for p in list(model.parameters()): | |||||
#import pdb; pdb.set_trace() | |||||
if hasattr(p,'org'): | |||||
#print('before:', p[0][0]) | |||||
p.org.copy_(p.data.clamp_(-1,1)) | |||||
#if epoch>30: | |||||
# import pdb; pdb.set_trace() | |||||
# measure elapsed time | |||||
batch_time.update(time.time() - end) | |||||
end = time.time() | |||||
if i % args.print_freq == 0: | |||||
logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||||
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||||
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||||
'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||||
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||||
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||||
epoch, i, len(data_loader), | |||||
phase='TRAINING' if training else 'EVALUATING', | |||||
batch_time=batch_time, | |||||
data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||||
return losses.avg, top1.avg, top5.avg | |||||
def train(data_loader, model, criterion, epoch, optimizer): | |||||
# switch to train mode | |||||
model.train() | |||||
return forward(data_loader, model, criterion, epoch, | |||||
training=True, optimizer=optimizer) | |||||
def validate(data_loader, model, criterion, epoch): | |||||
# switch to evaluate mode | |||||
model.eval() | |||||
return forward(data_loader, model, criterion, epoch, | |||||
training=False, optimizer=None) | |||||
if __name__ == '__main__': | |||||
main() |
@ -0,0 +1,150 @@ | |||||
from __future__ import print_function | |||||
import argparse | |||||
import torch | |||||
import torch.nn as nn | |||||
import torch.nn.functional as F | |||||
import torch.optim as optim | |||||
from torchvision import datasets, transforms | |||||
from torch.autograd import Variable | |||||
from models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
from models.binarized_modules import Binarize,HingeLoss | |||||
# Training settings | |||||
parser = argparse.ArgumentParser(description='PyTorch MNIST Example') | |||||
parser.add_argument('--batch-size', type=int, default=64, metavar='N', | |||||
help='input batch size for training (default: 256)') | |||||
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', | |||||
help='input batch size for testing (default: 1000)') | |||||
parser.add_argument('--epochs', type=int, default=100, metavar='N', | |||||
help='number of epochs to train (default: 10)') | |||||
parser.add_argument('--lr', type=float, default=0.01, metavar='LR', | |||||
help='learning rate (default: 0.001)') | |||||
parser.add_argument('--momentum', type=float, default=0.5, metavar='M', | |||||
help='SGD momentum (default: 0.5)') | |||||
parser.add_argument('--no-cuda', action='store_true', default=False, | |||||
help='disables CUDA training') | |||||
parser.add_argument('--seed', type=int, default=1, metavar='S', | |||||
help='random seed (default: 1)') | |||||
parser.add_argument('--gpus', default=3, | |||||
help='gpus used for training - e.g 0,1,3') | |||||
parser.add_argument('--log-interval', type=int, default=10, metavar='N', | |||||
help='how many batches to wait before logging training status') | |||||
args = parser.parse_args() | |||||
args.cuda = not args.no_cuda and torch.cuda.is_available() | |||||
torch.manual_seed(args.seed) | |||||
if args.cuda: | |||||
torch.cuda.manual_seed(args.seed) | |||||
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} | |||||
train_loader = torch.utils.data.DataLoader( | |||||
datasets.MNIST('../data', train=True, download=True, | |||||
transform=transforms.Compose([ | |||||
transforms.ToTensor(), | |||||
transforms.Normalize((0.1307,), (0.3081,)) | |||||
])), | |||||
batch_size=args.batch_size, shuffle=True, **kwargs) | |||||
test_loader = torch.utils.data.DataLoader( | |||||
datasets.MNIST('../data', train=False, transform=transforms.Compose([ | |||||
transforms.ToTensor(), | |||||
transforms.Normalize((0.1307,), (0.3081,)) | |||||
])), | |||||
batch_size=args.test_batch_size, shuffle=True, **kwargs) | |||||
class Net(nn.Module): | |||||
def __init__(self): | |||||
super(Net, self).__init__() | |||||
self.infl_ratio=3 | |||||
self.fc1 = BinarizeLinear(784, 2048*self.infl_ratio) | |||||
self.htanh1 = nn.Hardtanh() | |||||
self.bn1 = nn.BatchNorm1d(2048*self.infl_ratio) | |||||
self.fc2 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||||
self.htanh2 = nn.Hardtanh() | |||||
self.bn2 = nn.BatchNorm1d(2048*self.infl_ratio) | |||||
self.fc3 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||||
self.htanh3 = nn.Hardtanh() | |||||
self.bn3 = nn.BatchNorm1d(2048*self.infl_ratio) | |||||
self.fc4 = nn.Linear(2048*self.infl_ratio, 10) | |||||
self.logsoftmax=nn.LogSoftmax() | |||||
self.drop=nn.Dropout(0.5) | |||||
def forward(self, x): | |||||
x = x.view(-1, 28*28) | |||||
x = self.fc1(x) | |||||
x = self.bn1(x) | |||||
x = self.htanh1(x) | |||||
x = self.fc2(x) | |||||
x = self.bn2(x) | |||||
x = self.htanh2(x) | |||||
x = self.fc3(x) | |||||
x = self.drop(x) | |||||
x = self.bn3(x) | |||||
x = self.htanh3(x) | |||||
x = self.fc4(x) | |||||
return self.logsoftmax(x) | |||||
model = Net() | |||||
if args.cuda: | |||||
torch.cuda.set_device(3) | |||||
model.cuda() | |||||
criterion = nn.CrossEntropyLoss() | |||||
optimizer = optim.Adam(model.parameters(), lr=args.lr) | |||||
def train(epoch): | |||||
model.train() | |||||
for batch_idx, (data, target) in enumerate(train_loader): | |||||
if args.cuda: | |||||
data, target = data.cuda(), target.cuda() | |||||
data, target = Variable(data), Variable(target) | |||||
optimizer.zero_grad() | |||||
output = model(data) | |||||
loss = criterion(output, target) | |||||
if epoch%40==0: | |||||
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 | |||||
optimizer.zero_grad() | |||||
loss.backward() | |||||
for p in list(model.parameters()): | |||||
if hasattr(p,'org'): | |||||
p.data.copy_(p.org) | |||||
optimizer.step() | |||||
for p in list(model.parameters()): | |||||
if hasattr(p,'org'): | |||||
p.org.copy_(p.data.clamp_(-1,1)) | |||||
if batch_idx % args.log_interval == 0: | |||||
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( | |||||
epoch, batch_idx * len(data), len(train_loader.dataset), | |||||
100. * batch_idx / len(train_loader), loss.item())) | |||||
def test(): | |||||
model.eval() | |||||
test_loss = 0 | |||||
correct = 0 | |||||
with torch.no_grad(): | |||||
for data, target in test_loader: | |||||
if args.cuda: | |||||
data, target = data.cuda(), target.cuda() | |||||
data, target = Variable(data), Variable(target) | |||||
output = model(data) | |||||
test_loss += criterion(output, target).item() # sum up batch loss | |||||
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability | |||||
correct += pred.eq(target.data.view_as(pred)).cpu().sum() | |||||
test_loss /= len(test_loader.dataset) | |||||
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( | |||||
test_loss, correct, len(test_loader.dataset), | |||||
100. * correct / len(test_loader.dataset))) | |||||
for epoch in range(1, args.epochs + 1): | |||||
train(epoch) | |||||
test() | |||||
if epoch%40==0: | |||||
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 |
@ -0,0 +1,6 @@ | |||||
from .alexnet import * | |||||
from .alexnet_binary import * | |||||
from .resnet import * | |||||
from .resnet_binary import * | |||||
from .vgg_cifar10_binary import * |
@ -0,0 +1,78 @@ | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
__all__ = ['alexnet'] | |||||
class AlexNetOWT_BN(nn.Module): | |||||
def __init__(self, num_classes=1000): | |||||
super(AlexNetOWT_BN, self).__init__() | |||||
self.features = nn.Sequential( | |||||
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2, | |||||
bias=False), | |||||
nn.MaxPool2d(kernel_size=3, stride=2), | |||||
nn.BatchNorm2d(64), | |||||
nn.ReLU(inplace=True), | |||||
nn.Conv2d(64, 192, kernel_size=5, padding=2, bias=False), | |||||
nn.MaxPool2d(kernel_size=3, stride=2), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(192), | |||||
nn.Conv2d(192, 384, kernel_size=3, padding=1, bias=False), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(384), | |||||
nn.Conv2d(384, 256, kernel_size=3, padding=1, bias=False), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(256), | |||||
nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||||
nn.MaxPool2d(kernel_size=3, stride=2), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(256) | |||||
) | |||||
self.classifier = nn.Sequential( | |||||
nn.Linear(256 * 6 * 6, 4096, bias=False), | |||||
nn.BatchNorm1d(4096), | |||||
nn.ReLU(inplace=True), | |||||
nn.Dropout(0.5), | |||||
nn.Linear(4096, 4096, bias=False), | |||||
nn.BatchNorm1d(4096), | |||||
nn.ReLU(inplace=True), | |||||
nn.Dropout(0.5), | |||||
nn.Linear(4096, num_classes) | |||||
) | |||||
self.regime = { | |||||
0: {'optimizer': 'SGD', 'lr': 1e-2, | |||||
'weight_decay': 5e-4, 'momentum': 0.9}, | |||||
10: {'lr': 5e-3}, | |||||
15: {'lr': 1e-3, 'weight_decay': 0}, | |||||
20: {'lr': 5e-4}, | |||||
25: {'lr': 1e-4} | |||||
} | |||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||||
std=[0.229, 0.224, 0.225]) | |||||
self.input_transform = { | |||||
'train': transforms.Compose([ | |||||
transforms.Scale(256), | |||||
transforms.RandomCrop(224), | |||||
transforms.RandomHorizontalFlip(), | |||||
transforms.ToTensor(), | |||||
normalize | |||||
]), | |||||
'eval': transforms.Compose([ | |||||
transforms.Scale(256), | |||||
transforms.CenterCrop(224), | |||||
transforms.ToTensor(), | |||||
normalize | |||||
]) | |||||
} | |||||
def forward(self, x): | |||||
x = self.features(x) | |||||
x = x.view(-1, 256 * 6 * 6) | |||||
x = self.classifier(x) | |||||
return x | |||||
def alexnet(**kwargs): | |||||
num_classes = kwargs.get( 'num_classes', 1000) | |||||
return AlexNetOWT_BN(num_classes) |
@ -0,0 +1,92 @@ | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
__all__ = ['alexnet_binary'] | |||||
class AlexNetOWT_BN(nn.Module): | |||||
def __init__(self, num_classes=1000): | |||||
super(AlexNetOWT_BN, self).__init__() | |||||
self.ratioInfl=3 | |||||
self.features = nn.Sequential( | |||||
BinarizeConv2d(3, int(64*self.ratioInfl), kernel_size=11, stride=4, padding=2), | |||||
nn.MaxPool2d(kernel_size=3, stride=2), | |||||
nn.BatchNorm2d(int(64*self.ratioInfl)), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=5, padding=2), | |||||
nn.MaxPool2d(kernel_size=3, stride=2), | |||||
nn.BatchNorm2d(int(192*self.ratioInfl)), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(int(192*self.ratioInfl), int(384*self.ratioInfl), kernel_size=3, padding=1), | |||||
nn.BatchNorm2d(int(384*self.ratioInfl)), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(int(384*self.ratioInfl), int(256*self.ratioInfl), kernel_size=3, padding=1), | |||||
nn.BatchNorm2d(int(256*self.ratioInfl)), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(int(256*self.ratioInfl), 256, kernel_size=3, padding=1), | |||||
nn.MaxPool2d(kernel_size=3, stride=2), | |||||
nn.BatchNorm2d(256), | |||||
nn.Hardtanh(inplace=True) | |||||
) | |||||
self.classifier = nn.Sequential( | |||||
BinarizeLinear(256 * 6 * 6, 4096), | |||||
nn.BatchNorm1d(4096), | |||||
nn.Hardtanh(inplace=True), | |||||
#nn.Dropout(0.5), | |||||
BinarizeLinear(4096, 4096), | |||||
nn.BatchNorm1d(4096), | |||||
nn.Hardtanh(inplace=True), | |||||
#nn.Dropout(0.5), | |||||
BinarizeLinear(4096, num_classes), | |||||
nn.BatchNorm1d(1000), | |||||
nn.LogSoftmax() | |||||
) | |||||
#self.regime = { | |||||
# 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||||
# 'weight_decay': 5e-4, 'momentum': 0.9}, | |||||
# 10: {'lr': 5e-3}, | |||||
# 15: {'lr': 1e-3, 'weight_decay': 0}, | |||||
# 20: {'lr': 5e-4}, | |||||
# 25: {'lr': 1e-4} | |||||
#} | |||||
self.regime = { | |||||
0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||||
20: {'lr': 1e-3}, | |||||
30: {'lr': 5e-4}, | |||||
35: {'lr': 1e-4}, | |||||
40: {'lr': 1e-5} | |||||
} | |||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||||
std=[0.229, 0.224, 0.225]) | |||||
self.input_transform = { | |||||
'train': transforms.Compose([ | |||||
transforms.Scale(256), | |||||
transforms.RandomCrop(224), | |||||
transforms.RandomHorizontalFlip(), | |||||
transforms.ToTensor(), | |||||
normalize | |||||
]), | |||||
'eval': transforms.Compose([ | |||||
transforms.Scale(256), | |||||
transforms.CenterCrop(224), | |||||
transforms.ToTensor(), | |||||
normalize | |||||
]) | |||||
} | |||||
def forward(self, x): | |||||
x = self.features(x) | |||||
x = x.view(-1, 256 * 6 * 6) | |||||
x = self.classifier(x) | |||||
return x | |||||
def alexnet_binary(**kwargs): | |||||
num_classes = kwargs.get( 'num_classes', 1000) | |||||
return AlexNetOWT_BN(num_classes) |
@ -0,0 +1,109 @@ | |||||
import torch | |||||
import pdb | |||||
import torch.nn as nn | |||||
import math | |||||
from torch.autograd import Variable | |||||
from torch.autograd import Function | |||||
import numpy as np | |||||
def Binarize(tensor,quant_mode='det'): | |||||
if quant_mode=='det': | |||||
return tensor.sign() | |||||
else: | |||||
return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1) | |||||
class HingeLoss(nn.Module): | |||||
def __init__(self): | |||||
super(HingeLoss,self).__init__() | |||||
self.margin=1.0 | |||||
def hinge_loss(self,input,target): | |||||
#import pdb; pdb.set_trace() | |||||
output=self.margin-input.mul(target) | |||||
output[output.le(0)]=0 | |||||
return output.mean() | |||||
def forward(self, input, target): | |||||
return self.hinge_loss(input,target) | |||||
class SqrtHingeLossFunction(Function): | |||||
def __init__(self): | |||||
super(SqrtHingeLossFunction,self).__init__() | |||||
self.margin=1.0 | |||||
def forward(self, input, target): | |||||
output=self.margin-input.mul(target) | |||||
output[output.le(0)]=0 | |||||
self.save_for_backward(input, target) | |||||
loss=output.mul(output).sum(0).sum(1).div(target.numel()) | |||||
return loss | |||||
def backward(self,grad_output): | |||||
input, target = self.saved_tensors | |||||
output=self.margin-input.mul(target) | |||||
output[output.le(0)]=0 | |||||
import pdb; pdb.set_trace() | |||||
grad_output.resize_as_(input).copy_(target).mul_(-2).mul_(output) | |||||
grad_output.mul_(output.ne(0).float()) | |||||
grad_output.div_(input.numel()) | |||||
return grad_output,grad_output | |||||
def Quantize(tensor,quant_mode='det', params=None, numBits=8): | |||||
tensor.clamp_(-2**(numBits-1),2**(numBits-1)) | |||||
if quant_mode=='det': | |||||
tensor=tensor.mul(2**(numBits-1)).round().div(2**(numBits-1)) | |||||
else: | |||||
tensor=tensor.mul(2**(numBits-1)).round().add(torch.rand(tensor.size()).add(-0.5)).div(2**(numBits-1)) | |||||
quant_fixed(tensor, params) | |||||
return tensor | |||||
#import torch.nn._functions as tnnf | |||||
class BinarizeLinear(nn.Linear): | |||||
def __init__(self, *kargs, **kwargs): | |||||
super(BinarizeLinear, self).__init__(*kargs, **kwargs) | |||||
def forward(self, input): | |||||
# if input.size(1) != 784: | |||||
# input.data=Binarize(input.data) | |||||
if not hasattr(self.weight,'org'): | |||||
self.weight.org=self.weight.data.clone() | |||||
self.weight.data=Binarize(self.weight.org) | |||||
out = nn.functional.linear(input, self.weight) | |||||
if not self.bias is None: | |||||
self.bias.org=self.bias.data.clone() | |||||
out += self.bias.view(1, -1).expand_as(out) | |||||
return out | |||||
class BinarizeConv2d(nn.Conv2d): | |||||
def __init__(self, *kargs, **kwargs): | |||||
super(BinarizeConv2d, self).__init__(*kargs, **kwargs) | |||||
def forward(self, input): | |||||
# if input.size(1) != 3: | |||||
# input.data = Binarize(input.data) | |||||
if not hasattr(self.weight,'org'): | |||||
self.weight.org=self.weight.data.clone() | |||||
self.weight.data=Binarize(self.weight.org) | |||||
out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||||
self.padding, self.dilation, self.groups) | |||||
if not self.bias is None: | |||||
self.bias.org=self.bias.data.clone() | |||||
out += self.bias.view(1, -1, 1, 1).expand_as(out) | |||||
return out | |||||
# x = torch.tensor([[255.0, 200.0, 201.0], [210.0, 222.0, 223.0]]) | |||||
# print(Quantize(x,quant_mode='det', params=None, numBits=8)) |
@ -0,0 +1,217 @@ | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
import math | |||||
__all__ = ['resnet'] | |||||
def conv3x3(in_planes, out_planes, stride=1): | |||||
"3x3 convolution with padding" | |||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
def init_model(model): | |||||
for m in model.modules(): | |||||
if isinstance(m, nn.Conv2d): | |||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||||
m.weight.data.normal_(0, math.sqrt(2. / n)) | |||||
elif isinstance(m, nn.BatchNorm2d): | |||||
m.weight.data.fill_(1) | |||||
m.bias.data.zero_() | |||||
class BasicBlock(nn.Module): | |||||
expansion = 1 | |||||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
super(BasicBlock, self).__init__() | |||||
self.conv1 = conv3x3(inplanes, planes, stride) | |||||
self.bn1 = nn.BatchNorm2d(planes) | |||||
self.relu = nn.ReLU(inplace=True) | |||||
self.conv2 = conv3x3(planes, planes) | |||||
self.bn2 = nn.BatchNorm2d(planes) | |||||
self.downsample = downsample | |||||
self.stride = stride | |||||
def forward(self, x): | |||||
residual = x | |||||
out = self.conv1(x) | |||||
out = self.bn1(out) | |||||
out = self.relu(out) | |||||
out = self.conv2(out) | |||||
out = self.bn2(out) | |||||
if self.downsample is not None: | |||||
residual = self.downsample(x) | |||||
out += residual | |||||
out = self.relu(out) | |||||
return out | |||||
class Bottleneck(nn.Module): | |||||
expansion = 4 | |||||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
super(Bottleneck, self).__init__() | |||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) | |||||
self.bn1 = nn.BatchNorm2d(planes) | |||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
self.bn2 = nn.BatchNorm2d(planes) | |||||
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) | |||||
self.bn3 = nn.BatchNorm2d(planes * 4) | |||||
self.relu = nn.ReLU(inplace=True) | |||||
self.downsample = downsample | |||||
self.stride = stride | |||||
def forward(self, x): | |||||
residual = x | |||||
out = self.conv1(x) | |||||
out = self.bn1(out) | |||||
out = self.relu(out) | |||||
out = self.conv2(out) | |||||
out = self.bn2(out) | |||||
out = self.relu(out) | |||||
out = self.conv3(out) | |||||
out = self.bn3(out) | |||||
if self.downsample is not None: | |||||
residual = self.downsample(x) | |||||
out += residual | |||||
out = self.relu(out) | |||||
return out | |||||
class ResNet(nn.Module): | |||||
def __init__(self): | |||||
super(ResNet, self).__init__() | |||||
def _make_layer(self, block, planes, blocks, stride=1): | |||||
downsample = None | |||||
if stride != 1 or self.inplanes != planes * block.expansion: | |||||
downsample = nn.Sequential( | |||||
nn.Conv2d(self.inplanes, planes * block.expansion, | |||||
kernel_size=1, stride=stride, bias=False), | |||||
nn.BatchNorm2d(planes * block.expansion), | |||||
) | |||||
layers = [] | |||||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||||
self.inplanes = planes * block.expansion | |||||
for i in range(1, blocks): | |||||
layers.append(block(self.inplanes, planes)) | |||||
return nn.Sequential(*layers) | |||||
def forward(self, x): | |||||
x = self.conv1(x) | |||||
x = self.bn1(x) | |||||
x = self.relu(x) | |||||
x = self.maxpool(x) | |||||
x = self.layer1(x) | |||||
x = self.layer2(x) | |||||
x = self.layer3(x) | |||||
x = self.layer4(x) | |||||
x = self.avgpool(x) | |||||
x = x.view(x.size(0), -1) | |||||
x = self.fc(x) | |||||
return x | |||||
class ResNet_imagenet(ResNet): | |||||
def __init__(self, num_classes=1000, | |||||
block=Bottleneck, layers=[3, 4, 23, 3]): | |||||
super(ResNet_imagenet, self).__init__() | |||||
self.inplanes = 64 | |||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||||
bias=False) | |||||
self.bn1 = nn.BatchNorm2d(64) | |||||
self.relu = nn.ReLU(inplace=True) | |||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||||
self.layer1 = self._make_layer(block, 64, layers[0]) | |||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||||
self.avgpool = nn.AvgPool2d(7) | |||||
self.fc = nn.Linear(512 * block.expansion, num_classes) | |||||
init_model(self) | |||||
self.regime = { | |||||
0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
30: {'lr': 1e-2}, | |||||
60: {'lr': 1e-3, 'weight_decay': 0}, | |||||
90: {'lr': 1e-4} | |||||
} | |||||
class ResNet_cifar10(ResNet): | |||||
def __init__(self, num_classes=10, | |||||
block=BasicBlock, depth=18): | |||||
super(ResNet_cifar10, self).__init__() | |||||
self.inplanes = 16 | |||||
n = int((depth - 2) / 6) | |||||
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, | |||||
bias=False) | |||||
self.bn1 = nn.BatchNorm2d(16) | |||||
self.relu = nn.ReLU(inplace=True) | |||||
self.maxpool = lambda x: x | |||||
self.layer1 = self._make_layer(block, 16, n) | |||||
self.layer2 = self._make_layer(block, 32, n, stride=2) | |||||
self.layer3 = self._make_layer(block, 64, n, stride=2) | |||||
self.layer4 = lambda x: x | |||||
self.avgpool = nn.AvgPool2d(8) | |||||
self.fc = nn.Linear(64, num_classes) | |||||
init_model(self) | |||||
self.regime = { | |||||
0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
81: {'lr': 1e-2}, | |||||
122: {'lr': 1e-3, 'weight_decay': 0}, | |||||
164: {'lr': 1e-4} | |||||
} | |||||
def resnet(**kwargs): | |||||
num_classes, depth, dataset = map( | |||||
kwargs.get, ['num_classes', 'depth', 'dataset']) | |||||
if dataset == 'imagenet': | |||||
num_classes = num_classes or 1000 | |||||
depth = depth or 50 | |||||
if depth == 18: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=BasicBlock, layers=[2, 2, 2, 2]) | |||||
if depth == 34: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=BasicBlock, layers=[3, 4, 6, 3]) | |||||
if depth == 50: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=Bottleneck, layers=[3, 4, 6, 3]) | |||||
if depth == 101: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=Bottleneck, layers=[3, 4, 23, 3]) | |||||
if depth == 152: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=Bottleneck, layers=[3, 8, 36, 3]) | |||||
elif dataset == 'cifar10': | |||||
num_classes = num_classes or 10 | |||||
depth = depth or 18 #56 | |||||
return ResNet_cifar10(num_classes=num_classes, | |||||
block=BasicBlock, depth=depth) |
@ -0,0 +1,248 @@ | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
import math | |||||
from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
__all__ = ['resnet_binary'] | |||||
def Binaryconv3x3(in_planes, out_planes, stride=1): | |||||
"3x3 convolution with padding" | |||||
return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
def conv3x3(in_planes, out_planes, stride=1): | |||||
"3x3 convolution with padding" | |||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
def init_model(model): | |||||
for m in model.modules(): | |||||
if isinstance(m, BinarizeConv2d): | |||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||||
m.weight.data.normal_(0, math.sqrt(2. / n)) | |||||
elif isinstance(m, nn.BatchNorm2d): | |||||
m.weight.data.fill_(1) | |||||
m.bias.data.zero_() | |||||
class BasicBlock(nn.Module): | |||||
expansion = 1 | |||||
def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||||
super(BasicBlock, self).__init__() | |||||
self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||||
self.bn1 = nn.BatchNorm2d(planes) | |||||
self.tanh1 = nn.Hardtanh(inplace=True) | |||||
self.conv2 = Binaryconv3x3(planes, planes) | |||||
self.tanh2 = nn.Hardtanh(inplace=True) | |||||
self.bn2 = nn.BatchNorm2d(planes) | |||||
self.downsample = downsample | |||||
self.do_bntan=do_bntan; | |||||
self.stride = stride | |||||
def forward(self, x): | |||||
residual = x.clone() | |||||
out = self.conv1(x) | |||||
out = self.bn1(out) | |||||
out = self.tanh1(out) | |||||
out = self.conv2(out) | |||||
if self.downsample is not None: | |||||
if residual.data.max()>1: | |||||
import pdb; pdb.set_trace() | |||||
residual = self.downsample(residual) | |||||
out += residual | |||||
if self.do_bntan: | |||||
out = self.bn2(out) | |||||
out = self.tanh2(out) | |||||
return out | |||||
class Bottleneck(nn.Module): | |||||
expansion = 4 | |||||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
super(Bottleneck, self).__init__() | |||||
self.conv1 = BinarizeConv2d(inplanes, planes, kernel_size=1, bias=False) | |||||
self.bn1 = nn.BatchNorm2d(planes) | |||||
self.conv2 = BinarizeConv2d(planes, planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
self.bn2 = nn.BatchNorm2d(planes) | |||||
self.conv3 = BinarizeConv2d(planes, planes * 4, kernel_size=1, bias=False) | |||||
self.bn3 = nn.BatchNorm2d(planes * 4) | |||||
self.tanh = nn.Hardtanh(inplace=True) | |||||
self.downsample = downsample | |||||
self.stride = stride | |||||
def forward(self, x): | |||||
residual = x | |||||
import pdb; pdb.set_trace() | |||||
out = self.conv1(x) | |||||
out = self.bn1(out) | |||||
out = self.tanh(out) | |||||
out = self.conv2(out) | |||||
out = self.bn2(out) | |||||
out = self.tanh(out) | |||||
out = self.conv3(out) | |||||
out = self.bn3(out) | |||||
if self.downsample is not None: | |||||
residual = self.downsample(x) | |||||
out += residual | |||||
if self.do_bntan: | |||||
out = self.bn2(out) | |||||
out = self.tanh2(out) | |||||
return out | |||||
class ResNet(nn.Module): | |||||
def __init__(self): | |||||
super(ResNet, self).__init__() | |||||
def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||||
downsample = None | |||||
if stride != 1 or self.inplanes != planes * block.expansion: | |||||
downsample = nn.Sequential( | |||||
BinarizeConv2d(self.inplanes, planes * block.expansion, | |||||
kernel_size=1, stride=stride, bias=False), | |||||
nn.BatchNorm2d(planes * block.expansion), | |||||
) | |||||
layers = [] | |||||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||||
self.inplanes = planes * block.expansion | |||||
for i in range(1, blocks-1): | |||||
layers.append(block(self.inplanes, planes)) | |||||
layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||||
return nn.Sequential(*layers) | |||||
def forward(self, x): | |||||
x = self.conv1(x) | |||||
x = self.maxpool(x) | |||||
x = self.bn1(x) | |||||
x = self.tanh1(x) | |||||
x = self.layer1(x) | |||||
x = self.layer2(x) | |||||
x = self.layer3(x) | |||||
x = self.layer4(x) | |||||
x = self.avgpool(x) | |||||
x = x.view(x.size(0), -1) | |||||
x = self.bn2(x) | |||||
x = self.tanh2(x) | |||||
x = self.fc(x) | |||||
x = self.bn3(x) | |||||
x = self.logsoftmax(x) | |||||
return x | |||||
class ResNet_imagenet(ResNet): | |||||
def __init__(self, num_classes=1000, | |||||
block=Bottleneck, layers=[3, 4, 23, 3]): | |||||
super(ResNet_imagenet, self).__init__() | |||||
self.inplanes = 64 | |||||
self.conv1 = BinarizeConv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||||
bias=False) | |||||
self.bn1 = nn.BatchNorm2d(64) | |||||
self.tanh = nn.Hardtanh(inplace=True) | |||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||||
self.layer1 = self._make_layer(block, 64, layers[0]) | |||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||||
self.avgpool = nn.AvgPool2d(7) | |||||
self.fc = BinarizeLinear(512 * block.expansion, num_classes) | |||||
init_model(self) | |||||
self.regime = { | |||||
0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
30: {'lr': 1e-2}, | |||||
60: {'lr': 1e-3, 'weight_decay': 0}, | |||||
90: {'lr': 1e-4} | |||||
} | |||||
class ResNet_cifar10(ResNet): | |||||
def __init__(self, num_classes=10, | |||||
block=BasicBlock, depth=18): | |||||
super(ResNet_cifar10, self).__init__() | |||||
self.inflate = 5 | |||||
self.inplanes = 16*self.inflate | |||||
n = int((depth - 2) / 6) | |||||
self.conv1 = BinarizeConv2d(3, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||||
bias=False) | |||||
self.maxpool = lambda x: x | |||||
self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||||
self.tanh1 = nn.Hardtanh(inplace=True) | |||||
self.tanh2 = nn.Hardtanh(inplace=True) | |||||
self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||||
self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||||
self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||||
self.layer4 = lambda x: x | |||||
self.avgpool = nn.AvgPool2d(8) | |||||
self.bn2 = nn.BatchNorm1d(64*self.inflate) | |||||
self.bn3 = nn.BatchNorm1d(10) | |||||
self.logsoftmax = nn.LogSoftmax() | |||||
self.fc = BinarizeLinear(64*self.inflate, num_classes) | |||||
init_model(self) | |||||
#self.regime = { | |||||
# 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
# 'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
# 81: {'lr': 1e-4}, | |||||
# 122: {'lr': 1e-5, 'weight_decay': 0}, | |||||
# 164: {'lr': 1e-6} | |||||
#} | |||||
self.regime = { | |||||
0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||||
101: {'lr': 1e-3}, | |||||
142: {'lr': 5e-4}, | |||||
184: {'lr': 1e-4}, | |||||
220: {'lr': 1e-5} | |||||
} | |||||
def resnet_binary(**kwargs): | |||||
num_classes, depth, dataset = map( | |||||
kwargs.get, ['num_classes', 'depth', 'dataset']) | |||||
if dataset == 'imagenet': | |||||
num_classes = num_classes or 1000 | |||||
depth = depth or 50 | |||||
if depth == 18: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=BasicBlock, layers=[2, 2, 2, 2]) | |||||
if depth == 34: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=BasicBlock, layers=[3, 4, 6, 3]) | |||||
if depth == 50: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=Bottleneck, layers=[3, 4, 6, 3]) | |||||
if depth == 101: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=Bottleneck, layers=[3, 4, 23, 3]) | |||||
if depth == 152: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=Bottleneck, layers=[3, 8, 36, 3]) | |||||
elif dataset == 'cifar10': | |||||
num_classes = num_classes or 10 | |||||
depth = depth or 18 | |||||
return ResNet_cifar10(num_classes=num_classes, | |||||
block=BasicBlock, depth=depth) |
@ -0,0 +1,69 @@ | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
class AlexNetOWT_BN(nn.Module): | |||||
def __init__(self, num_classes=1000): | |||||
super(AlexNetOWT_BN, self).__init__() | |||||
self.features = nn.Sequential( | |||||
nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, | |||||
bias=False), | |||||
nn.BatchNorm2d(128), | |||||
nn.ReLU(inplace=True), | |||||
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), | |||||
nn.MaxPool2d(kernel_size=2, stride=2), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(128), | |||||
nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=False), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(256), | |||||
nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||||
nn.MaxPool2d(kernel_size=2, stride=2), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(256), | |||||
nn.Conv2d(256, 512, kernel_size=3, padding=1, bias=False), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(512), | |||||
nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=False), | |||||
nn.MaxPool2d(kernel_size=2, stride=2), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(512), | |||||
) | |||||
self.classifier = nn.Sequential( | |||||
nn.Linear(512 * 4 * 4, 1024, bias=False), | |||||
nn.BatchNorm1d(1024), | |||||
nn.ReLU(inplace=True), | |||||
nn.Dropout(0.5), | |||||
nn.Linear(1024, 1024, bias=False), | |||||
nn.BatchNorm1d(1024), | |||||
nn.ReLU(inplace=True), | |||||
nn.Dropout(0.5), | |||||
nn.Linear(1024, num_classes) | |||||
nn.LogSoftMax() | |||||
) | |||||
self.regime = { | |||||
0: {'optimizer': 'SGD', 'lr': 1e-2, | |||||
'weight_decay': 5e-4, 'momentum': 0.9}, | |||||
10: {'lr': 5e-3}, | |||||
15: {'lr': 1e-3, 'weight_decay': 0}, | |||||
20: {'lr': 5e-4}, | |||||
25: {'lr': 1e-4} | |||||
} | |||||
def forward(self, x): | |||||
x = self.features(x) | |||||
x = x.view(-1, 512 * 4 * 4) | |||||
x = self.classifier(x) | |||||
return x | |||||
def model(**kwargs): | |||||
num_classes = kwargs.get( 'num_classes', 1000) | |||||
return AlexNetOWT_BN(num_classes) |
@ -0,0 +1,80 @@ | |||||
import torch | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
from torch.autograd import Function | |||||
from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
class VGG_Cifar10(nn.Module): | |||||
def __init__(self, num_classes=1000): | |||||
super(VGG_Cifar10, self).__init__() | |||||
self.infl_ratio=3; | |||||
self.features = nn.Sequential( | |||||
BinarizeConv2d(3, 128*self.infl_ratio, kernel_size=3, stride=1, padding=1, | |||||
bias=True), | |||||
nn.BatchNorm2d(128*self.infl_ratio), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(128*self.infl_ratio, 128*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
nn.MaxPool2d(kernel_size=2, stride=2), | |||||
nn.BatchNorm2d(128*self.infl_ratio), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(128*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
nn.BatchNorm2d(256*self.infl_ratio), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(256*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
nn.MaxPool2d(kernel_size=2, stride=2), | |||||
nn.BatchNorm2d(256*self.infl_ratio), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(256*self.infl_ratio, 512*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
nn.BatchNorm2d(512*self.infl_ratio), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(512*self.infl_ratio, 512, kernel_size=3, padding=1, bias=True), | |||||
nn.MaxPool2d(kernel_size=2, stride=2), | |||||
nn.BatchNorm2d(512), | |||||
nn.Hardtanh(inplace=True) | |||||
) | |||||
self.classifier = nn.Sequential( | |||||
BinarizeLinear(512 * 4 * 4, 1024, bias=True), | |||||
nn.BatchNorm1d(1024), | |||||
nn.Hardtanh(inplace=True), | |||||
#nn.Dropout(0.5), | |||||
BinarizeLinear(1024, 1024, bias=True), | |||||
nn.BatchNorm1d(1024), | |||||
nn.Hardtanh(inplace=True), | |||||
#nn.Dropout(0.5), | |||||
BinarizeLinear(1024, num_classes, bias=True), | |||||
nn.BatchNorm1d(num_classes, affine=False), | |||||
nn.LogSoftmax() | |||||
) | |||||
self.regime = { | |||||
0: {'optimizer': 'Adam', 'betas': (0.9, 0.999),'lr': 5e-3}, | |||||
40: {'lr': 1e-3}, | |||||
80: {'lr': 5e-4}, | |||||
100: {'lr': 1e-4}, | |||||
120: {'lr': 5e-5}, | |||||
140: {'lr': 1e-5} | |||||
} | |||||
def forward(self, x): | |||||
x = self.features(x) | |||||
x = x.view(-1, 512 * 4 * 4) | |||||
x = self.classifier(x) | |||||
return x | |||||
def vgg_cifar10_binary(**kwargs): | |||||
num_classes = kwargs.get( 'num_classes', 10) | |||||
return VGG_Cifar10(num_classes) |
@ -0,0 +1,198 @@ | |||||
import torch | |||||
import torchvision.transforms as transforms | |||||
import random | |||||
__imagenet_stats = {'mean': [0.485, 0.456, 0.406], | |||||
'std': [0.229, 0.224, 0.225]} | |||||
__imagenet_pca = { | |||||
'eigval': torch.Tensor([0.2175, 0.0188, 0.0045]), | |||||
'eigvec': torch.Tensor([ | |||||
[-0.5675, 0.7192, 0.4009], | |||||
[-0.5808, -0.0045, -0.8140], | |||||
[-0.5836, -0.6948, 0.4203], | |||||
]) | |||||
} | |||||
def scale_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||||
t_list = [ | |||||
transforms.CenterCrop(input_size), | |||||
transforms.ToTensor(), | |||||
transforms.Normalize(**normalize), | |||||
] | |||||
if scale_size != input_size: | |||||
t_list = [transforms.Scale(scale_size)] + t_list | |||||
return transforms.Compose(t_list) | |||||
def scale_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||||
t_list = [ | |||||
transforms.RandomCrop(input_size), | |||||
transforms.ToTensor(), | |||||
transforms.Normalize(**normalize), | |||||
] | |||||
if scale_size != input_size: | |||||
t_list = [transforms.Scale(scale_size)] + t_list | |||||
transforms.Compose(t_list) | |||||
def pad_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||||
padding = int((scale_size - input_size) / 2) | |||||
return transforms.Compose([ | |||||
transforms.RandomCrop(input_size, padding=padding), | |||||
transforms.RandomHorizontalFlip(), | |||||
transforms.ToTensor(), | |||||
transforms.Normalize(**normalize), | |||||
]) | |||||
def inception_preproccess(input_size, normalize=__imagenet_stats): | |||||
return transforms.Compose([ | |||||
transforms.RandomSizedCrop(input_size), | |||||
transforms.RandomHorizontalFlip(), | |||||
transforms.ToTensor(), | |||||
transforms.Normalize(**normalize) | |||||
]) | |||||
def inception_color_preproccess(input_size, normalize=__imagenet_stats): | |||||
return transforms.Compose([ | |||||
transforms.RandomSizedCrop(input_size), | |||||
transforms.RandomHorizontalFlip(), | |||||
transforms.ToTensor(), | |||||
ColorJitter( | |||||
brightness=0.4, | |||||
contrast=0.4, | |||||
saturation=0.4, | |||||
), | |||||
Lighting(0.1, __imagenet_pca['eigval'], __imagenet_pca['eigvec']), | |||||
transforms.Normalize(**normalize) | |||||
]) | |||||
def get_transform(name='imagenet', input_size=None, | |||||
scale_size=None, normalize=None, augment=True): | |||||
normalize = normalize or __imagenet_stats | |||||
if name == 'imagenet': | |||||
scale_size = scale_size or 256 | |||||
input_size = input_size or 224 | |||||
if augment: | |||||
return inception_preproccess(input_size, normalize=normalize) | |||||
else: | |||||
return scale_crop(input_size=input_size, | |||||
scale_size=scale_size, normalize=normalize) | |||||
elif 'cifar' in name: | |||||
input_size = input_size or 32 | |||||
if augment: | |||||
scale_size = scale_size or 40 | |||||
return pad_random_crop(input_size, scale_size=scale_size, | |||||
normalize=normalize) | |||||
else: | |||||
scale_size = scale_size or 32 | |||||
return scale_crop(input_size=input_size, | |||||
scale_size=scale_size, normalize=normalize) | |||||
elif name == 'mnist': | |||||
normalize = {'mean': [0.5], 'std': [0.5]} | |||||
input_size = input_size or 28 | |||||
if augment: | |||||
scale_size = scale_size or 32 | |||||
return pad_random_crop(input_size, scale_size=scale_size, | |||||
normalize=normalize) | |||||
else: | |||||
scale_size = scale_size or 32 | |||||
return scale_crop(input_size=input_size, | |||||
scale_size=scale_size, normalize=normalize) | |||||
class Lighting(object): | |||||
"""Lighting noise(AlexNet - style PCA - based noise)""" | |||||
def __init__(self, alphastd, eigval, eigvec): | |||||
self.alphastd = alphastd | |||||
self.eigval = eigval | |||||
self.eigvec = eigvec | |||||
def __call__(self, img): | |||||
if self.alphastd == 0: | |||||
return img | |||||
alpha = img.new().resize_(3).normal_(0, self.alphastd) | |||||
rgb = self.eigvec.type_as(img).clone()\ | |||||
.mul(alpha.view(1, 3).expand(3, 3))\ | |||||
.mul(self.eigval.view(1, 3).expand(3, 3))\ | |||||
.sum(1).squeeze() | |||||
return img.add(rgb.view(3, 1, 1).expand_as(img)) | |||||
class Grayscale(object): | |||||
def __call__(self, img): | |||||
gs = img.clone() | |||||
gs[0].mul_(0.299).add_(0.587, gs[1]).add_(0.114, gs[2]) | |||||
gs[1].copy_(gs[0]) | |||||
gs[2].copy_(gs[0]) | |||||
return gs | |||||
class Saturation(object): | |||||
def __init__(self, var): | |||||
self.var = var | |||||
def __call__(self, img): | |||||
gs = Grayscale()(img) | |||||
alpha = random.uniform(0, self.var) | |||||
return img.lerp(gs, alpha) | |||||
class Brightness(object): | |||||
def __init__(self, var): | |||||
self.var = var | |||||
def __call__(self, img): | |||||
gs = img.new().resize_as_(img).zero_() | |||||
alpha = random.uniform(0, self.var) | |||||
return img.lerp(gs, alpha) | |||||
class Contrast(object): | |||||
def __init__(self, var): | |||||
self.var = var | |||||
def __call__(self, img): | |||||
gs = Grayscale()(img) | |||||
gs.fill_(gs.mean()) | |||||
alpha = random.uniform(0, self.var) | |||||
return img.lerp(gs, alpha) | |||||
class RandomOrder(object): | |||||
""" Composes several transforms together in random order. | |||||
""" | |||||
def __init__(self, transforms): | |||||
self.transforms = transforms | |||||
def __call__(self, img): | |||||
if self.transforms is None: | |||||
return img | |||||
order = torch.randperm(len(self.transforms)) | |||||
for i in order: | |||||
img = self.transforms[i](img) | |||||
return img | |||||
class ColorJitter(RandomOrder): | |||||
def __init__(self, brightness=0.4, contrast=0.4, saturation=0.4): | |||||
self.transforms = [] | |||||
if brightness != 0: | |||||
self.transforms.append(Brightness(brightness)) | |||||
if contrast != 0: | |||||
self.transforms.append(Contrast(contrast)) | |||||
if saturation != 0: | |||||
self.transforms.append(Saturation(saturation)) |
@ -0,0 +1,5 @@ | |||||
2021-04-15 15:36:47 - INFO - saving to ./results/2021-04-15_15-36-47 | |||||
2021-04-15 15:36:47 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-36-47', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||||
2021-04-15 15:36:47 - INFO - creating model alexnet | |||||
2021-04-15 15:36:48 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||||
2021-04-15 15:36:48 - INFO - number of parameters: 61110184 |
@ -0,0 +1,5 @@ | |||||
2021-04-15 15:37:52 - INFO - saving to ./results/2021-04-15_15-37-52 | |||||
2021-04-15 15:37:52 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='resnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-37-52', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||||
2021-04-15 15:37:52 - INFO - creating model resnet | |||||
2021-04-15 15:37:52 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||||
2021-04-15 15:37:52 - INFO - number of parameters: 25557032 |
@ -0,0 +1,5 @@ | |||||
2021-04-15 15:38:16 - INFO - saving to ./results/2021-04-15_15-38-16 | |||||
2021-04-15 15:38:16 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-38-16', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||||
2021-04-15 15:38:16 - INFO - creating model alexnet | |||||
2021-04-15 15:38:17 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||||
2021-04-15 15:38:17 - INFO - number of parameters: 61110184 |
@ -0,0 +1,160 @@ | |||||
import os | |||||
import torch | |||||
import logging.config | |||||
import shutil | |||||
import pandas as pd | |||||
from bokeh.io import output_file, save, show | |||||
from bokeh.plotting import figure | |||||
from bokeh.layouts import column | |||||
#from bokeh.charts import Line, defaults | |||||
# | |||||
#defaults.width = 800 | |||||
#defaults.height = 400 | |||||
#defaults.tools = 'pan,box_zoom,wheel_zoom,box_select,hover,resize,reset,save' | |||||
def setup_logging(log_file='log.txt'): | |||||
"""Setup logging configuration | |||||
""" | |||||
logging.basicConfig(level=logging.DEBUG, | |||||
format="%(asctime)s - %(levelname)s - %(message)s", | |||||
datefmt="%Y-%m-%d %H:%M:%S", | |||||
filename=log_file, | |||||
filemode='w') | |||||
console = logging.StreamHandler() | |||||
console.setLevel(logging.INFO) | |||||
formatter = logging.Formatter('%(message)s') | |||||
console.setFormatter(formatter) | |||||
logging.getLogger('').addHandler(console) | |||||
class ResultsLog(object): | |||||
def __init__(self, path='results.csv', plot_path=None): | |||||
self.path = path | |||||
self.plot_path = plot_path or (self.path + '.html') | |||||
self.figures = [] | |||||
self.results = None | |||||
def add(self, **kwargs): | |||||
df = pd.DataFrame([kwargs.values()], columns=kwargs.keys()) | |||||
if self.results is None: | |||||
self.results = df | |||||
else: | |||||
self.results = self.results.append(df, ignore_index=True) | |||||
def save(self, title='Training Results'): | |||||
if len(self.figures) > 0: | |||||
if os.path.isfile(self.plot_path): | |||||
os.remove(self.plot_path) | |||||
output_file(self.plot_path, title=title) | |||||
plot = column(*self.figures) | |||||
save(plot) | |||||
self.figures = [] | |||||
self.results.to_csv(self.path, index=False, index_label=False) | |||||
def load(self, path=None): | |||||
path = path or self.path | |||||
if os.path.isfile(path): | |||||
self.results.read_csv(path) | |||||
def show(self): | |||||
if len(self.figures) > 0: | |||||
plot = column(*self.figures) | |||||
show(plot) | |||||
#def plot(self, *kargs, **kwargs): | |||||
# line = Line(data=self.results, *kargs, **kwargs) | |||||
# self.figures.append(line) | |||||
def image(self, *kargs, **kwargs): | |||||
fig = figure() | |||||
fig.image(*kargs, **kwargs) | |||||
self.figures.append(fig) | |||||
def save_checkpoint(state, is_best, path='.', filename='checkpoint.pth.tar', save_all=False): | |||||
filename = os.path.join(path, filename) | |||||
torch.save(state, filename) | |||||
if is_best: | |||||
shutil.copyfile(filename, os.path.join(path, 'model_best.pth.tar')) | |||||
if save_all: | |||||
shutil.copyfile(filename, os.path.join( | |||||
path, 'checkpoint_epoch_%s.pth.tar' % state['epoch'])) | |||||
class AverageMeter(object): | |||||
"""Computes and stores the average and current value""" | |||||
def __init__(self): | |||||
self.reset() | |||||
def reset(self): | |||||
self.val = 0 | |||||
self.avg = 0 | |||||
self.sum = 0 | |||||
self.count = 0 | |||||
def update(self, val, n=1): | |||||
self.val = val | |||||
self.sum += val * n | |||||
self.count += n | |||||
self.avg = self.sum / self.count | |||||
__optimizers = { | |||||
'SGD': torch.optim.SGD, | |||||
'ASGD': torch.optim.ASGD, | |||||
'Adam': torch.optim.Adam, | |||||
'Adamax': torch.optim.Adamax, | |||||
'Adagrad': torch.optim.Adagrad, | |||||
'Adadelta': torch.optim.Adadelta, | |||||
'Rprop': torch.optim.Rprop, | |||||
'RMSprop': torch.optim.RMSprop | |||||
} | |||||
def adjust_optimizer(optimizer, epoch, config): | |||||
"""Reconfigures the optimizer according to epoch and config dict""" | |||||
def modify_optimizer(optimizer, setting): | |||||
if 'optimizer' in setting: | |||||
optimizer = __optimizers[setting['optimizer']]( | |||||
optimizer.param_groups) | |||||
logging.debug('OPTIMIZER - setting method = %s' % | |||||
setting['optimizer']) | |||||
for param_group in optimizer.param_groups: | |||||
for key in param_group.keys(): | |||||
if key in setting: | |||||
logging.debug('OPTIMIZER - setting %s = %s' % | |||||
(key, setting[key])) | |||||
param_group[key] = setting[key] | |||||
return optimizer | |||||
if callable(config): | |||||
optimizer = modify_optimizer(optimizer, config(epoch)) | |||||
else: | |||||
for e in range(epoch + 1): # run over all epochs - sticky setting | |||||
if e in config: | |||||
optimizer = modify_optimizer(optimizer, config[e]) | |||||
return optimizer | |||||
def accuracy(output, target, topk=(1,)): | |||||
"""Computes the precision@k for the specified values of k""" | |||||
maxk = max(topk) | |||||
batch_size = target.size(0) | |||||
_, pred = output.float().topk(maxk, 1, True, True) | |||||
pred = pred.t() | |||||
correct = pred.eq(target.view(1, -1).expand_as(pred)) | |||||
res = [] | |||||
for k in topk: | |||||
correct_k = correct[:k].view(-1).float().sum(0) | |||||
res.append(correct_k.mul_(100.0 / batch_size)) | |||||
return res | |||||
# kernel_img = model.features[0][0].kernel.data.clone() | |||||
# kernel_img.add_(-kernel_img.min()) | |||||
# kernel_img.mul_(255 / kernel_img.max()) | |||||
# save_image(kernel_img, 'kernel%s.jpg' % epoch) |
@ -0,0 +1,8 @@ | |||||
# BNN.pytorch | |||||
Binarized Neural Network (BNN) for pytorch | |||||
This is the pytorch version for the BNN code, fro VGG and resnet models | |||||
Link to the paper: https://papers.nips.cc/paper/6573-binarized-neural-networks | |||||
The code is based on https://github.com/eladhoffer/convNet.pytorch | |||||
Please install torch and torchvision by following the instructions at: http://pytorch.org/ | |||||
To run resnet18 for cifar10 dataset use: python main_binary.py --model resnet_binary --save resnet18_binary --dataset cifar10 |
@ -0,0 +1,37 @@ | |||||
import os | |||||
import torchvision.datasets as datasets | |||||
import torchvision.transforms as transforms | |||||
_DATASETS_MAIN_PATH = '/home/Datasets' | |||||
_dataset_path = { | |||||
'cifar10': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR10'), | |||||
'cifar100': os.path.join(_DATASETS_MAIN_PATH, 'CIFAR100'), | |||||
'stl10': os.path.join(_DATASETS_MAIN_PATH, 'STL10'), | |||||
'mnist': os.path.join(_DATASETS_MAIN_PATH, 'MNIST'), | |||||
'imagenet': { | |||||
'train': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/train'), | |||||
'val': os.path.join(_DATASETS_MAIN_PATH, 'ImageNet/val') | |||||
} | |||||
} | |||||
def get_dataset(name, split='train', transform=None, | |||||
target_transform=None, download=True): | |||||
train = (split == 'train') | |||||
if name == 'cifar10': | |||||
return datasets.CIFAR10(root=_dataset_path['cifar10'], | |||||
train=train, | |||||
transform=transform, | |||||
target_transform=target_transform, | |||||
download=download) | |||||
elif name == 'cifar100': | |||||
return datasets.CIFAR100(root=_dataset_path['cifar100'], | |||||
train=train, | |||||
transform=transform, | |||||
target_transform=target_transform, | |||||
download=download) | |||||
elif name == 'imagenet': | |||||
path = _dataset_path[name][split] | |||||
return datasets.ImageFolder(root=path, | |||||
transform=transform, | |||||
target_transform=target_transform) |
@ -0,0 +1,309 @@ | |||||
import argparse | |||||
import os | |||||
import time | |||||
import logging | |||||
import torch | |||||
import torch.nn as nn | |||||
import torch.nn.parallel | |||||
import torch.backends.cudnn as cudnn | |||||
import torch.optim | |||||
import torch.utils.data | |||||
import models | |||||
from torch.autograd import Variable | |||||
from data import get_dataset | |||||
from preprocess import get_transform | |||||
from utils import * | |||||
from datetime import datetime | |||||
from ast import literal_eval | |||||
from torchvision.utils import save_image | |||||
model_names = sorted(name for name in models.__dict__ | |||||
if name.islower() and not name.startswith("__") | |||||
and callable(models.__dict__[name])) | |||||
<<<<<<< HEAD | |||||
print(model_names) | |||||
======= | |||||
>>>>>>> 0d30f7b8e44285531022cdc05b2c11c22db27e3a | |||||
parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||||
parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='./results', | |||||
help='results dir') | |||||
parser.add_argument('--save', metavar='SAVE', default='', | |||||
help='saved folder') | |||||
parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||||
help='dataset name or folder') | |||||
parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||||
choices=model_names, | |||||
help='model architecture: ' + | |||||
' | '.join(model_names) + | |||||
' (default: alexnet)') | |||||
parser.add_argument('--input_size', type=int, default=None, | |||||
help='image input size') | |||||
parser.add_argument('--model_config', default='', | |||||
help='additional architecture configuration') | |||||
parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||||
help='type of tensor - e.g torch.cuda.HalfTensor') | |||||
parser.add_argument('--gpus', default='0', | |||||
help='gpus used for training - e.g 0,1,3') | |||||
parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||||
help='number of data loading workers (default: 8)') | |||||
parser.add_argument('--epochs', default=2500, type=int, metavar='N', | |||||
help='number of total epochs to run') | |||||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||||
help='manual epoch number (useful on restarts)') | |||||
parser.add_argument('-b', '--batch-size', default=256, type=int, | |||||
metavar='N', help='mini-batch size (default: 256)') | |||||
parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||||
help='optimizer function used') | |||||
parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||||
metavar='LR', help='initial learning rate') | |||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||||
help='momentum') | |||||
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||||
metavar='W', help='weight decay (default: 1e-4)') | |||||
parser.add_argument('--print-freq', '-p', default=10, type=int, | |||||
metavar='N', help='print frequency (default: 10)') | |||||
parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||||
help='path to latest checkpoint (default: none)') | |||||
parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||||
help='evaluate model FILE on validation set') | |||||
def main(): | |||||
global args, best_prec1 | |||||
best_prec1 = 0 | |||||
args = parser.parse_args() | |||||
if args.evaluate: | |||||
args.results_dir = '/tmp' | |||||
if args.save is '': | |||||
args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||||
save_path = os.path.join(args.results_dir, args.save) | |||||
if not os.path.exists(save_path): | |||||
os.makedirs(save_path) | |||||
setup_logging(os.path.join(save_path, 'log.txt')) | |||||
results_file = os.path.join(save_path, 'results.%s') | |||||
results = ResultsLog(results_file % 'csv', results_file % 'html') | |||||
logging.info("saving to %s", save_path) | |||||
logging.debug("run arguments: %s", args) | |||||
if 'cuda' in args.type: | |||||
args.gpus = [int(i) for i in args.gpus.split(',')] | |||||
torch.cuda.set_device(args.gpus[0]) | |||||
cudnn.benchmark = True | |||||
else: | |||||
args.gpus = None | |||||
# create model | |||||
logging.info("creating model %s", args.model) | |||||
model = models.__dict__[args.model] | |||||
model_config = {'input_size': args.input_size, 'dataset': args.dataset} | |||||
if args.model_config is not '': | |||||
model_config = dict(model_config, **literal_eval(args.model_config)) | |||||
model = model(**model_config) | |||||
logging.info("created model with configuration: %s", model_config) | |||||
# optionally resume from a checkpoint | |||||
if args.evaluate: | |||||
if not os.path.isfile(args.evaluate): | |||||
parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||||
checkpoint = torch.load(args.evaluate) | |||||
model.load_state_dict(checkpoint['state_dict']) | |||||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
args.evaluate, checkpoint['epoch']) | |||||
elif args.resume: | |||||
checkpoint_file = args.resume | |||||
if os.path.isdir(checkpoint_file): | |||||
results.load(os.path.join(checkpoint_file, 'results.csv')) | |||||
checkpoint_file = os.path.join( | |||||
checkpoint_file, 'model_best.pth.tar') | |||||
if os.path.isfile(checkpoint_file): | |||||
logging.info("loading checkpoint '%s'", args.resume) | |||||
checkpoint = torch.load(checkpoint_file) | |||||
args.start_epoch = checkpoint['epoch'] - 1 | |||||
best_prec1 = checkpoint['best_prec1'] | |||||
model.load_state_dict(checkpoint['state_dict']) | |||||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
checkpoint_file, checkpoint['epoch']) | |||||
else: | |||||
logging.error("no checkpoint found at '%s'", args.resume) | |||||
num_parameters = sum([l.nelement() for l in model.parameters()]) | |||||
logging.info("number of parameters: %d", num_parameters) | |||||
# Data loading code | |||||
default_transform = { | |||||
'train': get_transform(args.dataset, | |||||
input_size=args.input_size, augment=True), | |||||
'eval': get_transform(args.dataset, | |||||
input_size=args.input_size, augment=False) | |||||
} | |||||
transform = getattr(model, 'input_transform', default_transform) | |||||
regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||||
'lr': args.lr, | |||||
'momentum': args.momentum, | |||||
'weight_decay': args.weight_decay}}) | |||||
# define loss function (criterion) and optimizer | |||||
criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)() | |||||
criterion.type(args.type) | |||||
model.type(args.type) | |||||
val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||||
val_loader = torch.utils.data.DataLoader( | |||||
val_data, | |||||
batch_size=args.batch_size, shuffle=False, | |||||
num_workers=args.workers, pin_memory=True) | |||||
if args.evaluate: | |||||
validate(val_loader, model, criterion, 0) | |||||
return | |||||
train_data = get_dataset(args.dataset, 'train', transform['train']) | |||||
train_loader = torch.utils.data.DataLoader( | |||||
train_data, | |||||
batch_size=args.batch_size, shuffle=True, | |||||
num_workers=args.workers, pin_memory=True) | |||||
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||||
logging.info('training regime: %s', regime) | |||||
for epoch in range(args.start_epoch, args.epochs): | |||||
optimizer = adjust_optimizer(optimizer, epoch, regime) | |||||
# train for one epoch | |||||
train_loss, train_prec1, train_prec5 = train( | |||||
train_loader, model, criterion, epoch, optimizer) | |||||
# evaluate on validation set | |||||
val_loss, val_prec1, val_prec5 = validate( | |||||
val_loader, model, criterion, epoch) | |||||
# remember best prec@1 and save checkpoint | |||||
is_best = val_prec1 > best_prec1 | |||||
best_prec1 = max(val_prec1, best_prec1) | |||||
save_checkpoint({ | |||||
'epoch': epoch + 1, | |||||
'model': args.model, | |||||
'config': args.model_config, | |||||
'state_dict': model.state_dict(), | |||||
'best_prec1': best_prec1, | |||||
'regime': regime | |||||
}, is_best, path=save_path) | |||||
logging.info('\n Epoch: {0}\t' | |||||
'Training Loss {train_loss:.4f} \t' | |||||
'Training Prec@1 {train_prec1:.3f} \t' | |||||
'Training Prec@5 {train_prec5:.3f} \t' | |||||
'Validation Loss {val_loss:.4f} \t' | |||||
'Validation Prec@1 {val_prec1:.3f} \t' | |||||
'Validation Prec@5 {val_prec5:.3f} \n' | |||||
.format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
train_prec1=train_prec1, val_prec1=val_prec1, | |||||
train_prec5=train_prec5, val_prec5=val_prec5)) | |||||
results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||||
train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||||
#results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||||
# title='Loss', ylabel='loss') | |||||
#results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||||
# title='Error@1', ylabel='error %') | |||||
#results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||||
# title='Error@5', ylabel='error %') | |||||
results.save() | |||||
def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||||
if args.gpus and len(args.gpus) > 1: | |||||
model = torch.nn.DataParallel(model, args.gpus) | |||||
batch_time = AverageMeter() | |||||
data_time = AverageMeter() | |||||
losses = AverageMeter() | |||||
top1 = AverageMeter() | |||||
top5 = AverageMeter() | |||||
end = time.time() | |||||
for i, (inputs, target) in enumerate(data_loader): | |||||
# measure data loading time | |||||
data_time.update(time.time() - end) | |||||
if args.gpus is not None: | |||||
target = target.cuda() | |||||
if not training: | |||||
with torch.no_grad(): | |||||
input_var = Variable(inputs.type(args.type), volatile=not training) | |||||
target_var = Variable(target) | |||||
# compute output | |||||
output = model(input_var) | |||||
else: | |||||
input_var = Variable(inputs.type(args.type), volatile=not training) | |||||
target_var = Variable(target) | |||||
# compute output | |||||
output = model(input_var) | |||||
loss = criterion(output, target_var) | |||||
if type(output) is list: | |||||
output = output[0] | |||||
# measure accuracy and record loss | |||||
prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||||
losses.update(loss.item(), inputs.size(0)) | |||||
top1.update(prec1.item(), inputs.size(0)) | |||||
top5.update(prec5.item(), inputs.size(0)) | |||||
if training: | |||||
# compute gradient and do SGD step | |||||
optimizer.zero_grad() | |||||
loss.backward() | |||||
for p in list(model.parameters()): | |||||
if hasattr(p,'org'): | |||||
p.data.copy_(p.org) | |||||
optimizer.step() | |||||
for p in list(model.parameters()): | |||||
if hasattr(p,'org'): | |||||
p.org.copy_(p.data.clamp_(-1,1)) | |||||
# measure elapsed time | |||||
batch_time.update(time.time() - end) | |||||
end = time.time() | |||||
if i % args.print_freq == 0: | |||||
logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||||
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||||
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||||
'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||||
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||||
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||||
epoch, i, len(data_loader), | |||||
phase='TRAINING' if training else 'EVALUATING', | |||||
batch_time=batch_time, | |||||
data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||||
return losses.avg, top1.avg, top5.avg | |||||
def train(data_loader, model, criterion, epoch, optimizer): | |||||
# switch to train mode | |||||
model.train() | |||||
return forward(data_loader, model, criterion, epoch, | |||||
training=True, optimizer=optimizer) | |||||
def validate(data_loader, model, criterion, epoch): | |||||
# switch to evaluate mode | |||||
model.eval() | |||||
return forward(data_loader, model, criterion, epoch, | |||||
training=False, optimizer=None) | |||||
if __name__ == '__main__': | |||||
main() |
@ -0,0 +1,332 @@ | |||||
import argparse | |||||
import os | |||||
import time | |||||
import logging | |||||
import torch | |||||
import torch.nn as nn | |||||
import torch.nn.parallel | |||||
import torch.backends.cudnn as cudnn | |||||
import torch.optim | |||||
import torch.utils.data | |||||
import models | |||||
from torch.autograd import Variable | |||||
from data import get_dataset | |||||
from preprocess import get_transform | |||||
from utils import * | |||||
from datetime import datetime | |||||
from ast import literal_eval | |||||
from torchvision.utils import save_image | |||||
from models.binarized_modules import HingeLoss | |||||
model_names = sorted(name for name in models.__dict__ | |||||
if name.islower() and not name.startswith("__") | |||||
and callable(models.__dict__[name])) | |||||
parser = argparse.ArgumentParser(description='PyTorch ConvNet Training') | |||||
parser.add_argument('--results_dir', metavar='RESULTS_DIR', default='/media/hdd/ihubara/BinaryNet.pytorch/results', | |||||
help='results dir') | |||||
parser.add_argument('--save', metavar='SAVE', default='', | |||||
help='saved folder') | |||||
parser.add_argument('--dataset', metavar='DATASET', default='imagenet', | |||||
help='dataset name or folder') | |||||
parser.add_argument('--model', '-a', metavar='MODEL', default='alexnet', | |||||
choices=model_names, | |||||
help='model architecture: ' + | |||||
' | '.join(model_names) + | |||||
' (default: alexnet)') | |||||
parser.add_argument('--input_size', type=int, default=None, | |||||
help='image input size') | |||||
parser.add_argument('--model_config', default='', | |||||
help='additional architecture configuration') | |||||
parser.add_argument('--type', default='torch.cuda.FloatTensor', | |||||
help='type of tensor - e.g torch.cuda.HalfTensor') | |||||
parser.add_argument('--gpus', default='0', | |||||
help='gpus used for training - e.g 0,1,3') | |||||
parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', | |||||
help='number of data loading workers (default: 8)') | |||||
parser.add_argument('--epochs', default=900, type=int, metavar='N', | |||||
help='number of total epochs to run') | |||||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N', | |||||
help='manual epoch number (useful on restarts)') | |||||
parser.add_argument('-b', '--batch-size', default=256, type=int, | |||||
metavar='N', help='mini-batch size (default: 256)') | |||||
parser.add_argument('--optimizer', default='SGD', type=str, metavar='OPT', | |||||
help='optimizer function used') | |||||
parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, | |||||
metavar='LR', help='initial learning rate') | |||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M', | |||||
help='momentum') | |||||
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, | |||||
metavar='W', help='weight decay (default: 1e-4)') | |||||
parser.add_argument('--print-freq', '-p', default=10, type=int, | |||||
metavar='N', help='print frequency (default: 10)') | |||||
parser.add_argument('--resume', default='', type=str, metavar='PATH', | |||||
help='path to latest checkpoint (default: none)') | |||||
parser.add_argument('-e', '--evaluate', type=str, metavar='FILE', | |||||
help='evaluate model FILE on validation set') | |||||
torch.cuda.random.manual_seed_all(10) | |||||
output_dim = 0 | |||||
def main(): | |||||
global args, best_prec1, output_dim | |||||
best_prec1 = 0 | |||||
args = parser.parse_args() | |||||
output_dim = {'cifar10': 10, 'cifar100':100, 'imagenet': 1000}[args.dataset] | |||||
#import pdb; pdb.set_trace() | |||||
#torch.save(args.batch_size/(len(args.gpus)/2+1),'multi_gpu_batch_size') | |||||
if args.evaluate: | |||||
args.results_dir = '/tmp' | |||||
if args.save is '': | |||||
args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |||||
save_path = os.path.join(args.results_dir, args.save) | |||||
if not os.path.exists(save_path): | |||||
os.makedirs(save_path) | |||||
setup_logging(os.path.join(save_path, 'log.txt')) | |||||
results_file = os.path.join(save_path, 'results.%s') | |||||
results = ResultsLog(results_file % 'csv', results_file % 'html') | |||||
logging.info("saving to %s", save_path) | |||||
logging.debug("run arguments: %s", args) | |||||
if 'cuda' in args.type: | |||||
args.gpus = [int(i) for i in args.gpus.split(',')] | |||||
torch.cuda.set_device(args.gpus[0]) | |||||
cudnn.benchmark = True | |||||
else: | |||||
args.gpus = None | |||||
# create model | |||||
logging.info("creating model %s", args.model) | |||||
model = models.__dict__[args.model] | |||||
model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': output_dim} | |||||
if args.model_config is not '': | |||||
model_config = dict(model_config, **literal_eval(args.model_config)) | |||||
model = model(**model_config) | |||||
logging.info("created model with configuration: %s", model_config) | |||||
# optionally resume from a checkpoint | |||||
if args.evaluate: | |||||
if not os.path.isfile(args.evaluate): | |||||
parser.error('invalid checkpoint: {}'.format(args.evaluate)) | |||||
checkpoint = torch.load(args.evaluate) | |||||
model.load_state_dict(checkpoint['state_dict']) | |||||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
args.evaluate, checkpoint['epoch']) | |||||
elif args.resume: | |||||
checkpoint_file = args.resume | |||||
if os.path.isdir(checkpoint_file): | |||||
results.load(os.path.join(checkpoint_file, 'results.csv')) | |||||
checkpoint_file = os.path.join( | |||||
checkpoint_file, 'model_best.pth.tar') | |||||
if os.path.isfile(checkpoint_file): | |||||
logging.info("loading checkpoint '%s'", args.resume) | |||||
checkpoint = torch.load(checkpoint_file) | |||||
args.start_epoch = checkpoint['epoch'] - 1 | |||||
best_prec1 = checkpoint['best_prec1'] | |||||
model.load_state_dict(checkpoint['state_dict']) | |||||
logging.info("loaded checkpoint '%s' (epoch %s)", | |||||
checkpoint_file, checkpoint['epoch']) | |||||
else: | |||||
logging.error("no checkpoint found at '%s'", args.resume) | |||||
num_parameters = sum([l.nelement() for l in model.parameters()]) | |||||
logging.info("number of parameters: %d", num_parameters) | |||||
# Data loading code | |||||
default_transform = { | |||||
'train': get_transform(args.dataset, | |||||
input_size=args.input_size, augment=True), | |||||
'eval': get_transform(args.dataset, | |||||
input_size=args.input_size, augment=False) | |||||
} | |||||
transform = getattr(model, 'input_transform', default_transform) | |||||
regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, | |||||
'lr': args.lr, | |||||
'momentum': args.momentum, | |||||
'weight_decay': args.weight_decay}}) | |||||
# define loss function (criterion) and optimizer | |||||
#criterion = getattr(model, 'criterion', nn.NLLLoss)() | |||||
criterion = getattr(model, 'criterion', HingeLoss)() | |||||
#criterion.type(args.type) | |||||
model.type(args.type) | |||||
val_data = get_dataset(args.dataset, 'val', transform['eval']) | |||||
val_loader = torch.utils.data.DataLoader( | |||||
val_data, | |||||
batch_size=args.batch_size, shuffle=False, | |||||
num_workers=args.workers, pin_memory=True) | |||||
if args.evaluate: | |||||
validate(val_loader, model, criterion, 0) | |||||
return | |||||
train_data = get_dataset(args.dataset, 'train', transform['train']) | |||||
train_loader = torch.utils.data.DataLoader( | |||||
train_data, | |||||
batch_size=args.batch_size, shuffle=True, | |||||
num_workers=args.workers, pin_memory=True) | |||||
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) | |||||
logging.info('training regime: %s', regime) | |||||
#import pdb; pdb.set_trace() | |||||
#search_binarized_modules(model) | |||||
for epoch in range(args.start_epoch, args.epochs): | |||||
optimizer = adjust_optimizer(optimizer, epoch, regime) | |||||
# train for one epoch | |||||
train_loss, train_prec1, train_prec5 = train( | |||||
train_loader, model, criterion, epoch, optimizer) | |||||
# evaluate on validation set | |||||
val_loss, val_prec1, val_prec5 = validate( | |||||
val_loader, model, criterion, epoch) | |||||
# remember best prec@1 and save checkpoint | |||||
is_best = val_prec1 > best_prec1 | |||||
best_prec1 = max(val_prec1, best_prec1) | |||||
save_checkpoint({ | |||||
'epoch': epoch + 1, | |||||
'model': args.model, | |||||
'config': args.model_config, | |||||
'state_dict': model.state_dict(), | |||||
'best_prec1': best_prec1, | |||||
'regime': regime | |||||
}, is_best, path=save_path) | |||||
logging.info('\n Epoch: {0}\t' | |||||
'Training Loss {train_loss:.4f} \t' | |||||
'Training Prec@1 {train_prec1:.3f} \t' | |||||
'Training Prec@5 {train_prec5:.3f} \t' | |||||
'Validation Loss {val_loss:.4f} \t' | |||||
'Validation Prec@1 {val_prec1:.3f} \t' | |||||
'Validation Prec@5 {val_prec5:.3f} \n' | |||||
.format(epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
train_prec1=train_prec1, val_prec1=val_prec1, | |||||
train_prec5=train_prec5, val_prec5=val_prec5)) | |||||
results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, | |||||
train_error1=100 - train_prec1, val_error1=100 - val_prec1, | |||||
train_error5=100 - train_prec5, val_error5=100 - val_prec5) | |||||
results.plot(x='epoch', y=['train_loss', 'val_loss'], | |||||
title='Loss', ylabel='loss') | |||||
results.plot(x='epoch', y=['train_error1', 'val_error1'], | |||||
title='Error@1', ylabel='error %') | |||||
results.plot(x='epoch', y=['train_error5', 'val_error5'], | |||||
title='Error@5', ylabel='error %') | |||||
results.save() | |||||
def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): | |||||
if args.gpus and len(args.gpus) > 1: | |||||
model = torch.nn.DataParallel(model, args.gpus) | |||||
batch_time = AverageMeter() | |||||
data_time = AverageMeter() | |||||
losses = AverageMeter() | |||||
top1 = AverageMeter() | |||||
top5 = AverageMeter() | |||||
end = time.time() | |||||
for i, (inputs, target) in enumerate(data_loader): | |||||
# measure data loading time | |||||
data_time.update(time.time() - end) | |||||
if args.gpus is not None: | |||||
target = target.cuda() | |||||
#import pdb; pdb.set_trace() | |||||
if criterion.__class__.__name__=='HingeLoss': | |||||
target=target.unsqueeze(1) | |||||
target_onehot = torch.cuda.FloatTensor(target.size(0), output_dim) | |||||
target_onehot.fill_(-1) | |||||
target_onehot.scatter_(1, target, 1) | |||||
target=target.squeeze() | |||||
if not training: | |||||
with torch.no_grad(): | |||||
input_var = Variable(inputs.type(args.type)) | |||||
target_var = Variable(target_onehot) | |||||
# compute output | |||||
output = model(input_var) | |||||
else: | |||||
input_var = Variable(inputs.type(args.type)) | |||||
target_var = Variable(target_onehot) | |||||
# compute output | |||||
output = model(input_var) | |||||
#import pdb; pdb.set_trace() | |||||
loss = criterion(output, target_onehot) | |||||
#import pdb; pdb.set_trace() | |||||
if type(output) is list: | |||||
output = output[0] | |||||
# measure accuracy and record loss | |||||
prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) | |||||
losses.update(loss.item(), inputs.size(0)) | |||||
top1.update(prec1.item(), inputs.size(0)) | |||||
top5.update(prec5.item(), inputs.size(0)) | |||||
#import pdb; pdb.set_trace() | |||||
#if not training and top1.avg<15: | |||||
# import pdb; pdb.set_trace() | |||||
if training: | |||||
# compute gradient and do SGD step | |||||
optimizer.zero_grad() | |||||
#add backwoed hook | |||||
loss.backward() | |||||
for p in list(model.parameters()): | |||||
#import pdb; pdb.set_trace() | |||||
if hasattr(p,'org'): | |||||
#print('before:', p[0][0]) | |||||
#gm=max(p.grad.data.max(),-p.grad.data.min()) | |||||
#p.grad=p.grad.div(gm+1) | |||||
p.data.copy_(p.org) | |||||
#print('after:', p[0][0]) | |||||
optimizer.step() | |||||
for p in list(model.parameters()): | |||||
#import pdb; pdb.set_trace() | |||||
if hasattr(p,'org'): | |||||
#print('before:', p[0][0]) | |||||
p.org.copy_(p.data.clamp_(-1,1)) | |||||
#if epoch>30: | |||||
# import pdb; pdb.set_trace() | |||||
# measure elapsed time | |||||
batch_time.update(time.time() - end) | |||||
end = time.time() | |||||
if i % args.print_freq == 0: | |||||
logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' | |||||
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' | |||||
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' | |||||
'Loss {loss.val:.4f} ({loss.avg:.4f})\t' | |||||
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' | |||||
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( | |||||
epoch, i, len(data_loader), | |||||
phase='TRAINING' if training else 'EVALUATING', | |||||
batch_time=batch_time, | |||||
data_time=data_time, loss=losses, top1=top1, top5=top5)) | |||||
return losses.avg, top1.avg, top5.avg | |||||
def train(data_loader, model, criterion, epoch, optimizer): | |||||
# switch to train mode | |||||
model.train() | |||||
return forward(data_loader, model, criterion, epoch, | |||||
training=True, optimizer=optimizer) | |||||
def validate(data_loader, model, criterion, epoch): | |||||
# switch to evaluate mode | |||||
model.eval() | |||||
return forward(data_loader, model, criterion, epoch, | |||||
training=False, optimizer=None) | |||||
if __name__ == '__main__': | |||||
main() |
@ -0,0 +1,150 @@ | |||||
from __future__ import print_function | |||||
import argparse | |||||
import torch | |||||
import torch.nn as nn | |||||
import torch.nn.functional as F | |||||
import torch.optim as optim | |||||
from torchvision import datasets, transforms | |||||
from torch.autograd import Variable | |||||
from models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
from models.binarized_modules import Binarize,HingeLoss | |||||
# Training settings | |||||
parser = argparse.ArgumentParser(description='PyTorch MNIST Example') | |||||
parser.add_argument('--batch-size', type=int, default=64, metavar='N', | |||||
help='input batch size for training (default: 256)') | |||||
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', | |||||
help='input batch size for testing (default: 1000)') | |||||
parser.add_argument('--epochs', type=int, default=100, metavar='N', | |||||
help='number of epochs to train (default: 10)') | |||||
parser.add_argument('--lr', type=float, default=0.01, metavar='LR', | |||||
help='learning rate (default: 0.001)') | |||||
parser.add_argument('--momentum', type=float, default=0.5, metavar='M', | |||||
help='SGD momentum (default: 0.5)') | |||||
parser.add_argument('--no-cuda', action='store_true', default=False, | |||||
help='disables CUDA training') | |||||
parser.add_argument('--seed', type=int, default=1, metavar='S', | |||||
help='random seed (default: 1)') | |||||
parser.add_argument('--gpus', default=3, | |||||
help='gpus used for training - e.g 0,1,3') | |||||
parser.add_argument('--log-interval', type=int, default=10, metavar='N', | |||||
help='how many batches to wait before logging training status') | |||||
args = parser.parse_args() | |||||
args.cuda = not args.no_cuda and torch.cuda.is_available() | |||||
torch.manual_seed(args.seed) | |||||
if args.cuda: | |||||
torch.cuda.manual_seed(args.seed) | |||||
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} | |||||
train_loader = torch.utils.data.DataLoader( | |||||
datasets.MNIST('../data', train=True, download=True, | |||||
transform=transforms.Compose([ | |||||
transforms.ToTensor(), | |||||
transforms.Normalize((0.1307,), (0.3081,)) | |||||
])), | |||||
batch_size=args.batch_size, shuffle=True, **kwargs) | |||||
test_loader = torch.utils.data.DataLoader( | |||||
datasets.MNIST('../data', train=False, transform=transforms.Compose([ | |||||
transforms.ToTensor(), | |||||
transforms.Normalize((0.1307,), (0.3081,)) | |||||
])), | |||||
batch_size=args.test_batch_size, shuffle=True, **kwargs) | |||||
class Net(nn.Module): | |||||
def __init__(self): | |||||
super(Net, self).__init__() | |||||
self.infl_ratio=3 | |||||
self.fc1 = BinarizeLinear(784, 2048*self.infl_ratio) | |||||
self.htanh1 = nn.Hardtanh() | |||||
self.bn1 = nn.BatchNorm1d(2048*self.infl_ratio) | |||||
self.fc2 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||||
self.htanh2 = nn.Hardtanh() | |||||
self.bn2 = nn.BatchNorm1d(2048*self.infl_ratio) | |||||
self.fc3 = BinarizeLinear(2048*self.infl_ratio, 2048*self.infl_ratio) | |||||
self.htanh3 = nn.Hardtanh() | |||||
self.bn3 = nn.BatchNorm1d(2048*self.infl_ratio) | |||||
self.fc4 = nn.Linear(2048*self.infl_ratio, 10) | |||||
self.logsoftmax=nn.LogSoftmax() | |||||
self.drop=nn.Dropout(0.5) | |||||
def forward(self, x): | |||||
x = x.view(-1, 28*28) | |||||
x = self.fc1(x) | |||||
x = self.bn1(x) | |||||
x = self.htanh1(x) | |||||
x = self.fc2(x) | |||||
x = self.bn2(x) | |||||
x = self.htanh2(x) | |||||
x = self.fc3(x) | |||||
x = self.drop(x) | |||||
x = self.bn3(x) | |||||
x = self.htanh3(x) | |||||
x = self.fc4(x) | |||||
return self.logsoftmax(x) | |||||
model = Net() | |||||
if args.cuda: | |||||
torch.cuda.set_device(3) | |||||
model.cuda() | |||||
criterion = nn.CrossEntropyLoss() | |||||
optimizer = optim.Adam(model.parameters(), lr=args.lr) | |||||
def train(epoch): | |||||
model.train() | |||||
for batch_idx, (data, target) in enumerate(train_loader): | |||||
if args.cuda: | |||||
data, target = data.cuda(), target.cuda() | |||||
data, target = Variable(data), Variable(target) | |||||
optimizer.zero_grad() | |||||
output = model(data) | |||||
loss = criterion(output, target) | |||||
if epoch%40==0: | |||||
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 | |||||
optimizer.zero_grad() | |||||
loss.backward() | |||||
for p in list(model.parameters()): | |||||
if hasattr(p,'org'): | |||||
p.data.copy_(p.org) | |||||
optimizer.step() | |||||
for p in list(model.parameters()): | |||||
if hasattr(p,'org'): | |||||
p.org.copy_(p.data.clamp_(-1,1)) | |||||
if batch_idx % args.log_interval == 0: | |||||
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( | |||||
epoch, batch_idx * len(data), len(train_loader.dataset), | |||||
100. * batch_idx / len(train_loader), loss.item())) | |||||
def test(): | |||||
model.eval() | |||||
test_loss = 0 | |||||
correct = 0 | |||||
with torch.no_grad(): | |||||
for data, target in test_loader: | |||||
if args.cuda: | |||||
data, target = data.cuda(), target.cuda() | |||||
data, target = Variable(data), Variable(target) | |||||
output = model(data) | |||||
test_loss += criterion(output, target).item() # sum up batch loss | |||||
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability | |||||
correct += pred.eq(target.data.view_as(pred)).cpu().sum() | |||||
test_loss /= len(test_loader.dataset) | |||||
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( | |||||
test_loss, correct, len(test_loader.dataset), | |||||
100. * correct / len(test_loader.dataset))) | |||||
for epoch in range(1, args.epochs + 1): | |||||
train(epoch) | |||||
test() | |||||
if epoch%40==0: | |||||
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1 |
@ -0,0 +1,6 @@ | |||||
from .alexnet import * | |||||
from .alexnet_binary import * | |||||
from .resnet import * | |||||
from .resnet_binary import * | |||||
from .vgg_cifar10_binary import * |
@ -0,0 +1,78 @@ | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
__all__ = ['alexnet'] | |||||
class AlexNetOWT_BN(nn.Module): | |||||
def __init__(self, num_classes=1000): | |||||
super(AlexNetOWT_BN, self).__init__() | |||||
self.features = nn.Sequential( | |||||
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2, | |||||
bias=False), | |||||
nn.MaxPool2d(kernel_size=3, stride=2), | |||||
nn.BatchNorm2d(64), | |||||
nn.ReLU(inplace=True), | |||||
nn.Conv2d(64, 192, kernel_size=5, padding=2, bias=False), | |||||
nn.MaxPool2d(kernel_size=3, stride=2), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(192), | |||||
nn.Conv2d(192, 384, kernel_size=3, padding=1, bias=False), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(384), | |||||
nn.Conv2d(384, 256, kernel_size=3, padding=1, bias=False), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(256), | |||||
nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||||
nn.MaxPool2d(kernel_size=3, stride=2), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(256) | |||||
) | |||||
self.classifier = nn.Sequential( | |||||
nn.Linear(256 * 6 * 6, 4096, bias=False), | |||||
nn.BatchNorm1d(4096), | |||||
nn.ReLU(inplace=True), | |||||
nn.Dropout(0.5), | |||||
nn.Linear(4096, 4096, bias=False), | |||||
nn.BatchNorm1d(4096), | |||||
nn.ReLU(inplace=True), | |||||
nn.Dropout(0.5), | |||||
nn.Linear(4096, num_classes) | |||||
) | |||||
self.regime = { | |||||
0: {'optimizer': 'SGD', 'lr': 1e-2, | |||||
'weight_decay': 5e-4, 'momentum': 0.9}, | |||||
10: {'lr': 5e-3}, | |||||
15: {'lr': 1e-3, 'weight_decay': 0}, | |||||
20: {'lr': 5e-4}, | |||||
25: {'lr': 1e-4} | |||||
} | |||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||||
std=[0.229, 0.224, 0.225]) | |||||
self.input_transform = { | |||||
'train': transforms.Compose([ | |||||
transforms.Scale(256), | |||||
transforms.RandomCrop(224), | |||||
transforms.RandomHorizontalFlip(), | |||||
transforms.ToTensor(), | |||||
normalize | |||||
]), | |||||
'eval': transforms.Compose([ | |||||
transforms.Scale(256), | |||||
transforms.CenterCrop(224), | |||||
transforms.ToTensor(), | |||||
normalize | |||||
]) | |||||
} | |||||
def forward(self, x): | |||||
x = self.features(x) | |||||
x = x.view(-1, 256 * 6 * 6) | |||||
x = self.classifier(x) | |||||
return x | |||||
def alexnet(**kwargs): | |||||
num_classes = kwargs.get( 'num_classes', 1000) | |||||
return AlexNetOWT_BN(num_classes) |
@ -0,0 +1,92 @@ | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
__all__ = ['alexnet_binary'] | |||||
class AlexNetOWT_BN(nn.Module): | |||||
def __init__(self, num_classes=1000): | |||||
super(AlexNetOWT_BN, self).__init__() | |||||
self.ratioInfl=3 | |||||
self.features = nn.Sequential( | |||||
BinarizeConv2d(3, int(64*self.ratioInfl), kernel_size=11, stride=4, padding=2), | |||||
nn.MaxPool2d(kernel_size=3, stride=2), | |||||
nn.BatchNorm2d(int(64*self.ratioInfl)), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=5, padding=2), | |||||
nn.MaxPool2d(kernel_size=3, stride=2), | |||||
nn.BatchNorm2d(int(192*self.ratioInfl)), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(int(192*self.ratioInfl), int(384*self.ratioInfl), kernel_size=3, padding=1), | |||||
nn.BatchNorm2d(int(384*self.ratioInfl)), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(int(384*self.ratioInfl), int(256*self.ratioInfl), kernel_size=3, padding=1), | |||||
nn.BatchNorm2d(int(256*self.ratioInfl)), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(int(256*self.ratioInfl), 256, kernel_size=3, padding=1), | |||||
nn.MaxPool2d(kernel_size=3, stride=2), | |||||
nn.BatchNorm2d(256), | |||||
nn.Hardtanh(inplace=True) | |||||
) | |||||
self.classifier = nn.Sequential( | |||||
BinarizeLinear(256 * 6 * 6, 4096), | |||||
nn.BatchNorm1d(4096), | |||||
nn.Hardtanh(inplace=True), | |||||
#nn.Dropout(0.5), | |||||
BinarizeLinear(4096, 4096), | |||||
nn.BatchNorm1d(4096), | |||||
nn.Hardtanh(inplace=True), | |||||
#nn.Dropout(0.5), | |||||
BinarizeLinear(4096, num_classes), | |||||
nn.BatchNorm1d(1000), | |||||
nn.LogSoftmax() | |||||
) | |||||
#self.regime = { | |||||
# 0: {'optimizer': 'SGD', 'lr': 1e-2, | |||||
# 'weight_decay': 5e-4, 'momentum': 0.9}, | |||||
# 10: {'lr': 5e-3}, | |||||
# 15: {'lr': 1e-3, 'weight_decay': 0}, | |||||
# 20: {'lr': 5e-4}, | |||||
# 25: {'lr': 1e-4} | |||||
#} | |||||
self.regime = { | |||||
0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||||
20: {'lr': 1e-3}, | |||||
30: {'lr': 5e-4}, | |||||
35: {'lr': 1e-4}, | |||||
40: {'lr': 1e-5} | |||||
} | |||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], | |||||
std=[0.229, 0.224, 0.225]) | |||||
self.input_transform = { | |||||
'train': transforms.Compose([ | |||||
transforms.Scale(256), | |||||
transforms.RandomCrop(224), | |||||
transforms.RandomHorizontalFlip(), | |||||
transforms.ToTensor(), | |||||
normalize | |||||
]), | |||||
'eval': transforms.Compose([ | |||||
transforms.Scale(256), | |||||
transforms.CenterCrop(224), | |||||
transforms.ToTensor(), | |||||
normalize | |||||
]) | |||||
} | |||||
def forward(self, x): | |||||
x = self.features(x) | |||||
x = x.view(-1, 256 * 6 * 6) | |||||
x = self.classifier(x) | |||||
return x | |||||
def alexnet_binary(**kwargs): | |||||
num_classes = kwargs.get( 'num_classes', 1000) | |||||
return AlexNetOWT_BN(num_classes) |
@ -0,0 +1,423 @@ | |||||
import torch | |||||
import pdb | |||||
import torch.nn as nn | |||||
import math | |||||
from torch.autograd import Variable | |||||
from torch.autograd import Function | |||||
from decimal import Decimal, ROUND_HALF_UP | |||||
import numpy as np | |||||
def Binarize(tensor,quant_mode='det'): | |||||
if quant_mode=='det': | |||||
return tensor.sign() | |||||
else: | |||||
return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1) | |||||
class HingeLoss(nn.Module): | |||||
def __init__(self): | |||||
super(HingeLoss,self).__init__() | |||||
self.margin=1.0 | |||||
def hinge_loss(self,input,target): | |||||
#import pdb; pdb.set_trace() | |||||
output=self.margin-input.mul(target) | |||||
output[output.le(0)]=0 | |||||
return output.mean() | |||||
def forward(self, input, target): | |||||
return self.hinge_loss(input,target) | |||||
class SqrtHingeLossFunction(Function): | |||||
def __init__(self): | |||||
super(SqrtHingeLossFunction,self).__init__() | |||||
self.margin=1.0 | |||||
def forward(self, input, target): | |||||
output=self.margin-input.mul(target) | |||||
output[output.le(0)]=0 | |||||
self.save_for_backward(input, target) | |||||
loss=output.mul(output).sum(0).sum(1).div(target.numel()) | |||||
return loss | |||||
def backward(self,grad_output): | |||||
input, target = self.saved_tensors | |||||
output=self.margin-input.mul(target) | |||||
output[output.le(0)]=0 | |||||
import pdb; pdb.set_trace() | |||||
grad_output.resize_as_(input).copy_(target).mul_(-2).mul_(output) | |||||
grad_output.mul_(output.ne(0).float()) | |||||
grad_output.div_(input.numel()) | |||||
return grad_output,grad_output | |||||
def Quantize(tensor,quant_mode='det', params=None, numBits=8): | |||||
tensor.clamp_(-2**(numBits-1),2**(numBits-1)) | |||||
if quant_mode=='det': | |||||
tensor=tensor.mul(2**(numBits-1)).round().div(2**(numBits-1)) | |||||
else: | |||||
tensor=tensor.mul(2**(numBits-1)).round().add(torch.rand(tensor.size()).add(-0.5)).div(2**(numBits-1)) | |||||
quant_fixed(tensor, params) | |||||
return tensor | |||||
#import torch.nn._functions as tnnf | |||||
class BinarizeLinear(nn.Linear): | |||||
def __init__(self, *kargs, **kwargs): | |||||
super(BinarizeLinear, self).__init__(*kargs, **kwargs) | |||||
def forward(self, input): | |||||
# if input.size(1) != 784: | |||||
# input.data=Binarize(input.data) | |||||
if not hasattr(self.weight,'org'): | |||||
self.weight.org=self.weight.data.clone() | |||||
self.weight.data=Binarize(self.weight.org) | |||||
out = nn.functional.linear(input, self.weight) | |||||
if not self.bias is None: | |||||
self.bias.org=self.bias.data.clone() | |||||
out += self.bias.view(1, -1).expand_as(out) | |||||
return out | |||||
class BinarizeConv2d(nn.Conv2d): | |||||
def __init__(self, *kargs, **kwargs): | |||||
super(BinarizeConv2d, self).__init__(*kargs, **kwargs) | |||||
def forward(self, input): | |||||
# if input.size(1) != 3: | |||||
# input.data = Binarize(input.data) | |||||
if not hasattr(self.weight,'org'): | |||||
self.weight.org=self.weight.data.clone() | |||||
self.weight.data=Binarize(self.weight.org) | |||||
#input = torch.round(input) | |||||
#input = input*2-1 | |||||
#scale = max(torch.max(input), -torch.min(input)) / 63 | |||||
#input = torch.round(input*2 / scale) - 63 | |||||
#if scale != 0: | |||||
# input = torch.round(input / scale) | |||||
#print (torch.max(input)) | |||||
#print(input) | |||||
input = torch.round(input) | |||||
#print(input) | |||||
#print (torch.max(input)) | |||||
out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||||
self.padding, self.dilation, self.groups) | |||||
#print (torch.min(out), torch.max(out)) | |||||
#out = torch.round(out) | |||||
#print (torch.min(out), torch.max(out)) | |||||
#print (torch.min(input), torch.max(input)) | |||||
#out = torch.round(out / 64 * 36 / 64) | |||||
#print (self.weight.size()[1]) | |||||
#if self.weight.size()[1] >= 16 and self.weight.size()[1] <= 24: | |||||
if self.weight.size()[1] >= 4 and self.weight.size()[2] * self.weight.size()[3] == 9: | |||||
out = torch.round(out / 64 * 36 / 64) | |||||
elif self.weight.size()[1] == 1: | |||||
out = torch.round(out * 7 / 64) | |||||
else: | |||||
out = torch.round(out / 64) | |||||
out = out * 4 | |||||
out[out > 63] = 63 | |||||
out[out < -63] = -63 | |||||
#out = out - torch.round(torch.mean(out)) | |||||
# out = out*4 | |||||
#out[out > 63] = 63 | |||||
#out[out < -63] = -63 | |||||
#else: | |||||
# out = torch.round(out * 10 / 64) | |||||
#print (torch.min(out), torch.max(out)) | |||||
# if not self.bias is None: | |||||
# self.bias.org=self.bias.data.clone() | |||||
# out += self.bias.view(1, -1, 1, 1).expand_as(out) | |||||
return out | |||||
class IdealCimConv2d(nn.Conv2d): | |||||
def __init__(self, *kargs, **kwargs): | |||||
super(IdealCimConv2d, self).__init__(*kargs, **kwargs) | |||||
def forward(self, input): | |||||
# if input.size(1) != 3: | |||||
# input.data = Binarize(input.data) | |||||
if not hasattr(self.weight,'org'): | |||||
self.weight.org=self.weight.data.clone() | |||||
self.weight.data=Binarize(self.weight.org) | |||||
#input = torch.round(input) | |||||
#input = input*2-1 | |||||
#scale = max(torch.max(input), -torch.min(input)) / 63 | |||||
#input = torch.round(input*2 / scale) - 63 | |||||
#if scale != 0: | |||||
# input = torch.round(input / scale) | |||||
#print (torch.max(input)) | |||||
#print(input) | |||||
input = torch.round(input) | |||||
#print(input) | |||||
#print (torch.max(input)) | |||||
out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||||
self.padding, self.dilation, self.groups) | |||||
out = out / 64 | |||||
out = out * 4 | |||||
out[out > 63] = 63 | |||||
out[out < -63] = -63 | |||||
return out | |||||
device = 'cuda:0' | |||||
''' | |||||
H = [1024, 512] | |||||
sim_model = torch.nn.Sequential( | |||||
torch.nn.Linear(36, H[0]), | |||||
torch.nn.Dropout(p=0.5), | |||||
torch.nn.ReLU(), | |||||
torch.nn.Linear(H[0], H[1]), | |||||
torch.nn.Dropout(p=0.5), | |||||
torch.nn.ReLU(), | |||||
torch.nn.Linear(H[-1], 1), | |||||
) | |||||
sim_model.load_state_dict(torch.load('model_error.ckpt', map_location=torch.device('cuda:0'))) | |||||
sim_model = sim_model.to(device) | |||||
sim_model.eval() | |||||
''' | |||||
class CimSimConv2d(nn.Conv2d): | |||||
def __init__(self, *kargs, **kwargs): | |||||
super(CimSimConv2d, self).__init__(*kargs, **kwargs) | |||||
self.device = device | |||||
def forward(self, input): | |||||
if not hasattr(self.weight,'org'): | |||||
self.weight.org=self.weight.data.clone() | |||||
self.weight.data=Binarize(self.weight.org) | |||||
#scale = max(torch.max(input), -torch.min(input)) / 63 | |||||
#if scale != 0: | |||||
# input = torch.round(input / scale) | |||||
#''' random error | |||||
#out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||||
# self.padding, self.dilation, self.groups) | |||||
#out = torch.round(out / 64 * 36 / 64) | |||||
#randrange = (self.weight.size()[1] // 4) | |||||
#for _ in range(randrange): | |||||
# out += torch.randint(-1, 1, out.size(), device=device) | |||||
#out[out>63] = 63 | |||||
#out[out<-63] -63 | |||||
#''' | |||||
input = torch.round(input) | |||||
out2 = self.simconv(input, self.weight) | |||||
''' | |||||
if torch.max(out2) < 32: | |||||
out2 = out2 * 2 | |||||
if torch.max(out2) < 32: | |||||
out2 = out2 * 2 | |||||
if torch.max(out2) < 32: | |||||
out2 = out2 * 2 | |||||
''' | |||||
out2 = out2 * 4 | |||||
out2[out2 > 63] = 63 | |||||
out2[out2 < -63] = -63 | |||||
#print (self.weight.data.size()) | |||||
#print (torch.max(out2), torch.min(out2)) | |||||
#print (torch.max(out-out2), torch.min(out-out2)) | |||||
#out = nn.functional.conv2d(input, self.weight, None, self.stride, | |||||
# self.padding, self.dilation, self.groups) | |||||
#print(input.size(), self.weight.size(), out.size()) | |||||
#if not self.bias is None: | |||||
# self.bias.org=self.bias.data.clone() | |||||
# out += self.bias.view(1, -1, 1, 1).expand_as(out) | |||||
return out2 | |||||
def simconv(self, input_a, weight): | |||||
#print(input_a.size(), weight.size()) | |||||
batch_size = input_a.size()[0] | |||||
out_channel = weight.size()[0] | |||||
out_width = input_a.size()[2] - 2 * (weight.size()[2] // 2) | |||||
out_height = input_a.size()[3] - 2 * (weight.size()[3] // 2) | |||||
simout = torch.zeros(batch_size, out_channel, out_width, out_height, dtype = input_a.dtype).to(device) | |||||
first = True | |||||
#''' Mapping Table | |||||
if weight.size()[2] == 7: | |||||
kernel_group = 1 | |||||
else: | |||||
kernel_group = 4 | |||||
Digital_input_split = torch.split(input_a, kernel_group, dim=1) | |||||
binary_weight_split = torch.split(weight, kernel_group, dim=1) | |||||
for i in range(len(Digital_input_split)): | |||||
temp_output = nn.functional.conv2d(Digital_input_split[i], binary_weight_split[i], None, self.stride, self.padding, self.dilation, self.groups) | |||||
#temp_output = torch.round(temp_output / 64 * 36 / 64) | |||||
temp_output = torch.round(temp_output / 64) | |||||
temp_output = Mapping.apply(temp_output) | |||||
simout += temp_output + 2 | |||||
#print (torch.max(simout), torch.min(simout)) | |||||
#''' | |||||
''' Error model | |||||
for n in range(batch_size): | |||||
for c in range(out_channel): | |||||
w = torch.reshape(weight[c], (-1,)).to(device) | |||||
inputs = [] | |||||
for i in range(out_width): | |||||
for j in range(out_height): | |||||
input = torch.reshape(input_a[n, :, i: i + weight.size()[2], j: j + weight.size()[3]], (-1,)) | |||||
#print (w.size(), input.size()) | |||||
# simout[n][c][i][j] = sum(w*input) | |||||
# TODO | |||||
simout[n][c][i][j] = self.cim_conv_tmp(input, w) | |||||
#''' | |||||
#print (len(input)) | |||||
#print (simout.size()) | |||||
# out = nn.functional.conv2d(input_a, weight) | |||||
return simout | |||||
def cim_conv_tmp(self, input, weight): | |||||
assert len(input) == len(weight) | |||||
raw_sum = 0 | |||||
if len(weight) == 3: | |||||
for i in range((len(input)-1) // 36 + 1): | |||||
data_x = input[i*36:i*36+36] * weight[i*36:i*36+36] | |||||
row = int(Decimal(float(sum(data_x)/64.0)).quantize(0, ROUND_HALF_UP)) | |||||
#''' Error model | |||||
if len(data_x) < 36: | |||||
data_x = torch.cat((data_x, torch.zeros(36 - len(data_x), dtype=data_x.dtype))) | |||||
try: | |||||
#ensor_x = torch.Tensor(data_x).to(self.device) | |||||
tensor_x = data_x.to(device) | |||||
except: | |||||
print (data_x, len()) | |||||
y_pred = sim_model(tensor_x) | |||||
if int(y_pred[0]) > 10: | |||||
adjust = 10 | |||||
elif int(y_pred[0]) < -10: | |||||
adjust = -10 | |||||
else: | |||||
adjust = int(y_pred[0]) | |||||
#print (tensor_x, y_pred) | |||||
raw_sum += (row + adjust + 2) | |||||
#''' | |||||
#if row in self.mappingTable: | |||||
# row = self.mappingTable[row] | |||||
#raw_sum += row | |||||
#raw_sum += row | |||||
else: | |||||
for i in range((len(input)-1) // 49 + 1): | |||||
data_x = input[i*49:i*49+49] * weight[i*49:i*49+49] | |||||
row = int(Decimal(float(sum(data_x)/64.0)).quantize(0, ROUND_HALF_UP)) | |||||
#''' Error model | |||||
if len(data_x) < 49: | |||||
data_x = torch.cat((data_x, torch.zeros(49 - len(data_x), dtype=data_x.dtype))) | |||||
try: | |||||
#ensor_x = torch.Tensor(data_x).to(self.device) | |||||
tensor_x = data_x.to(device) | |||||
except: | |||||
print (data_x, len()) | |||||
y_pred = sim_model(tensor_x) | |||||
if int(y_pred[0]) > 10: | |||||
adjust = 10 | |||||
elif int(y_pred[0]) < -10: | |||||
adjust = -10 | |||||
else: | |||||
adjust = int(y_pred[0]) | |||||
#print (tensor_x, y_pred) | |||||
raw_sum += (row + adjust + 2) | |||||
#print (raw_sum) | |||||
return raw_sum | |||||
class Mapping(torch.autograd.Function): | |||||
@staticmethod | |||||
def forward(ctx, input): | |||||
output = input.clone() | |||||
output[input==-1] = -4 | |||||
output[input==-2] = -5 | |||||
output[input==-3] = -6 | |||||
output[input==-4] = -7 | |||||
output[input==-5] = -9 | |||||
output[input==-6] = -9 | |||||
output[input==-7] = -11 | |||||
output[input==-8] = -11 | |||||
output[input==-9] = -13 | |||||
output[input==-10] = -13 | |||||
output[input==-11] = -17 | |||||
output[input==-12] = -17 | |||||
output[input==-13] = -17 | |||||
output[input==-14] = -19 | |||||
output[input==-15] = -19 | |||||
output[input==-16] = -21 | |||||
output[input==-17] = -21 | |||||
output[input==-18] = -23 | |||||
output[input==-19] = -25 | |||||
output[input==-20] = -25 | |||||
output[input==-21] = -25 | |||||
output[input==-22] = -25 | |||||
output[input==-23] = -27 | |||||
output[input==-24] = -27 | |||||
output[input==-25] = -29 | |||||
output[input==-26] = -29 | |||||
output[input==-27] = -29 | |||||
output[input==-28] = -31 | |||||
output[input==-29] = -31 | |||||
output[input==-30] = -33 | |||||
output[input==-31] = -33 | |||||
output[input==-32] = -35 | |||||
output[input==-33] = -35 | |||||
output[input==-34] = -35 | |||||
#output[input==-35] = -35 | |||||
output[input==0] = -2 | |||||
output[input==1] = -1 | |||||
output[input==2] = 1 | |||||
output[input==3] = 2 | |||||
#output[input==4] = 4 | |||||
output[input==5] = 4 | |||||
#output[input==6] = 6 | |||||
output[input==7] = 8 | |||||
#output[input==8] = 8 | |||||
output[input==9] = 10 | |||||
#output[input==10] = 10 | |||||
output[input==11] = 12 | |||||
#output[input==12] = 12 | |||||
output[input==13] = 16 | |||||
output[input==14] = 16 | |||||
output[input==15] = 16 | |||||
#output[input==16] = 16 | |||||
output[input==17] = 18 | |||||
output[input==18] = 20 | |||||
output[input==19] = 20 | |||||
output[input==20] = 24 | |||||
output[input==21] = 24 | |||||
output[input==22] = 24 | |||||
output[input==23] = 26 | |||||
output[input==24] = 26 | |||||
output[input==25] = 28 | |||||
output[input==26] = 28 | |||||
output[input==27] = 28 | |||||
output[input==28] = 30 | |||||
output[input==29] = 30 | |||||
output[input==30] = 32 | |||||
output[input==31] = 32 | |||||
output[input==32] = 34 | |||||
output[input==33] = 34 | |||||
output[input==34] = 34 | |||||
output[input==35] = 34 | |||||
return output | |||||
def backward(ctx, grad_output): | |||||
return grad_output |
@ -0,0 +1,217 @@ | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
import math | |||||
__all__ = ['resnet'] | |||||
def conv3x3(in_planes, out_planes, stride=1): | |||||
"3x3 convolution with padding" | |||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
def init_model(model): | |||||
for m in model.modules(): | |||||
if isinstance(m, nn.Conv2d): | |||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||||
m.weight.data.normal_(0, math.sqrt(2. / n)) | |||||
elif isinstance(m, nn.BatchNorm2d): | |||||
m.weight.data.fill_(1) | |||||
m.bias.data.zero_() | |||||
class BasicBlock(nn.Module): | |||||
expansion = 1 | |||||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
super(BasicBlock, self).__init__() | |||||
self.conv1 = conv3x3(inplanes, planes, stride) | |||||
self.bn1 = nn.BatchNorm2d(planes) | |||||
self.relu = nn.ReLU(inplace=True) | |||||
self.conv2 = conv3x3(planes, planes) | |||||
self.bn2 = nn.BatchNorm2d(planes) | |||||
self.downsample = downsample | |||||
self.stride = stride | |||||
def forward(self, x): | |||||
residual = x | |||||
out = self.conv1(x) | |||||
out = self.bn1(out) | |||||
out = self.relu(out) | |||||
out = self.conv2(out) | |||||
out = self.bn2(out) | |||||
if self.downsample is not None: | |||||
residual = self.downsample(x) | |||||
out += residual | |||||
out = self.relu(out) | |||||
return out | |||||
class Bottleneck(nn.Module): | |||||
expansion = 4 | |||||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
super(Bottleneck, self).__init__() | |||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) | |||||
self.bn1 = nn.BatchNorm2d(planes) | |||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
self.bn2 = nn.BatchNorm2d(planes) | |||||
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) | |||||
self.bn3 = nn.BatchNorm2d(planes * 4) | |||||
self.relu = nn.ReLU(inplace=True) | |||||
self.downsample = downsample | |||||
self.stride = stride | |||||
def forward(self, x): | |||||
residual = x | |||||
out = self.conv1(x) | |||||
out = self.bn1(out) | |||||
out = self.relu(out) | |||||
out = self.conv2(out) | |||||
out = self.bn2(out) | |||||
out = self.relu(out) | |||||
out = self.conv3(out) | |||||
out = self.bn3(out) | |||||
if self.downsample is not None: | |||||
residual = self.downsample(x) | |||||
out += residual | |||||
out = self.relu(out) | |||||
return out | |||||
class ResNet(nn.Module): | |||||
def __init__(self): | |||||
super(ResNet, self).__init__() | |||||
def _make_layer(self, block, planes, blocks, stride=1): | |||||
downsample = None | |||||
if stride != 1 or self.inplanes != planes * block.expansion: | |||||
downsample = nn.Sequential( | |||||
nn.Conv2d(self.inplanes, planes * block.expansion, | |||||
kernel_size=1, stride=stride, bias=False), | |||||
nn.BatchNorm2d(planes * block.expansion), | |||||
) | |||||
layers = [] | |||||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||||
self.inplanes = planes * block.expansion | |||||
for i in range(1, blocks): | |||||
layers.append(block(self.inplanes, planes)) | |||||
return nn.Sequential(*layers) | |||||
def forward(self, x): | |||||
x = self.conv1(x) | |||||
x = self.bn1(x) | |||||
x = self.relu(x) | |||||
x = self.maxpool(x) | |||||
x = self.layer1(x) | |||||
x = self.layer2(x) | |||||
x = self.layer3(x) | |||||
x = self.layer4(x) | |||||
x = self.avgpool(x) | |||||
x = x.view(x.size(0), -1) | |||||
x = self.fc(x) | |||||
return x | |||||
class ResNet_imagenet(ResNet): | |||||
def __init__(self, num_classes=1000, | |||||
block=Bottleneck, layers=[3, 4, 23, 3]): | |||||
super(ResNet_imagenet, self).__init__() | |||||
self.inplanes = 64 | |||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||||
bias=False) | |||||
self.bn1 = nn.BatchNorm2d(64) | |||||
self.relu = nn.ReLU(inplace=True) | |||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||||
self.layer1 = self._make_layer(block, 64, layers[0]) | |||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||||
self.avgpool = nn.AvgPool2d(7) | |||||
self.fc = nn.Linear(512 * block.expansion, num_classes) | |||||
init_model(self) | |||||
self.regime = { | |||||
0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
30: {'lr': 1e-2}, | |||||
60: {'lr': 1e-3, 'weight_decay': 0}, | |||||
90: {'lr': 1e-4} | |||||
} | |||||
class ResNet_cifar10(ResNet): | |||||
def __init__(self, num_classes=10, | |||||
block=BasicBlock, depth=18): | |||||
super(ResNet_cifar10, self).__init__() | |||||
self.inplanes = 16 | |||||
n = int((depth - 2) / 6) | |||||
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, | |||||
bias=False) | |||||
self.bn1 = nn.BatchNorm2d(16) | |||||
self.relu = nn.ReLU(inplace=True) | |||||
self.maxpool = lambda x: x | |||||
self.layer1 = self._make_layer(block, 16, n) | |||||
self.layer2 = self._make_layer(block, 32, n, stride=2) | |||||
self.layer3 = self._make_layer(block, 64, n, stride=2) | |||||
self.layer4 = lambda x: x | |||||
self.avgpool = nn.AvgPool2d(8) | |||||
self.fc = nn.Linear(64, num_classes) | |||||
init_model(self) | |||||
self.regime = { | |||||
0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
81: {'lr': 1e-2}, | |||||
122: {'lr': 1e-3, 'weight_decay': 0}, | |||||
164: {'lr': 1e-4} | |||||
} | |||||
def resnet(**kwargs): | |||||
num_classes, depth, dataset = map( | |||||
kwargs.get, ['num_classes', 'depth', 'dataset']) | |||||
if dataset == 'imagenet': | |||||
num_classes = num_classes or 1000 | |||||
depth = depth or 50 | |||||
if depth == 18: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=BasicBlock, layers=[2, 2, 2, 2]) | |||||
if depth == 34: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=BasicBlock, layers=[3, 4, 6, 3]) | |||||
if depth == 50: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=Bottleneck, layers=[3, 4, 6, 3]) | |||||
if depth == 101: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=Bottleneck, layers=[3, 4, 23, 3]) | |||||
if depth == 152: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=Bottleneck, layers=[3, 8, 36, 3]) | |||||
elif dataset == 'cifar10': | |||||
num_classes = num_classes or 10 | |||||
depth = depth or 18 #56 | |||||
return ResNet_cifar10(num_classes=num_classes, | |||||
block=BasicBlock, depth=depth) |
@ -0,0 +1,248 @@ | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
import math | |||||
from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
__all__ = ['resnet_binary'] | |||||
def Binaryconv3x3(in_planes, out_planes, stride=1): | |||||
"3x3 convolution with padding" | |||||
return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
def conv3x3(in_planes, out_planes, stride=1): | |||||
"3x3 convolution with padding" | |||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
def init_model(model): | |||||
for m in model.modules(): | |||||
if isinstance(m, BinarizeConv2d): | |||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||||
m.weight.data.normal_(0, math.sqrt(2. / n)) | |||||
elif isinstance(m, nn.BatchNorm2d): | |||||
m.weight.data.fill_(1) | |||||
m.bias.data.zero_() | |||||
class BasicBlock(nn.Module): | |||||
expansion = 1 | |||||
def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||||
super(BasicBlock, self).__init__() | |||||
self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||||
self.bn1 = nn.BatchNorm2d(planes) | |||||
self.tanh1 = nn.Hardtanh(inplace=True) | |||||
self.conv2 = Binaryconv3x3(planes, planes) | |||||
self.tanh2 = nn.Hardtanh(inplace=True) | |||||
self.bn2 = nn.BatchNorm2d(planes) | |||||
self.downsample = downsample | |||||
self.do_bntan=do_bntan; | |||||
self.stride = stride | |||||
def forward(self, x): | |||||
residual = x.clone() | |||||
out = self.conv1(x) | |||||
out = self.bn1(out) | |||||
out = self.tanh1(out) | |||||
out = self.conv2(out) | |||||
if self.downsample is not None: | |||||
if residual.data.max()>1: | |||||
import pdb; pdb.set_trace() | |||||
residual = self.downsample(residual) | |||||
out += residual | |||||
if self.do_bntan: | |||||
out = self.bn2(out) | |||||
out = self.tanh2(out) | |||||
return out | |||||
class Bottleneck(nn.Module): | |||||
expansion = 4 | |||||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
super(Bottleneck, self).__init__() | |||||
self.conv1 = BinarizeConv2d(inplanes, planes, kernel_size=1, bias=False) | |||||
self.bn1 = nn.BatchNorm2d(planes) | |||||
self.conv2 = BinarizeConv2d(planes, planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
self.bn2 = nn.BatchNorm2d(planes) | |||||
self.conv3 = BinarizeConv2d(planes, planes * 4, kernel_size=1, bias=False) | |||||
self.bn3 = nn.BatchNorm2d(planes * 4) | |||||
self.tanh = nn.Hardtanh(inplace=True) | |||||
self.downsample = downsample | |||||
self.stride = stride | |||||
def forward(self, x): | |||||
residual = x | |||||
import pdb; pdb.set_trace() | |||||
out = self.conv1(x) | |||||
out = self.bn1(out) | |||||
out = self.tanh(out) | |||||
out = self.conv2(out) | |||||
out = self.bn2(out) | |||||
out = self.tanh(out) | |||||
out = self.conv3(out) | |||||
out = self.bn3(out) | |||||
if self.downsample is not None: | |||||
residual = self.downsample(x) | |||||
out += residual | |||||
if self.do_bntan: | |||||
out = self.bn2(out) | |||||
out = self.tanh2(out) | |||||
return out | |||||
class ResNet(nn.Module): | |||||
def __init__(self): | |||||
super(ResNet, self).__init__() | |||||
def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||||
downsample = None | |||||
if stride != 1 or self.inplanes != planes * block.expansion: | |||||
downsample = nn.Sequential( | |||||
BinarizeConv2d(self.inplanes, planes * block.expansion, | |||||
kernel_size=1, stride=stride, bias=False), | |||||
nn.BatchNorm2d(planes * block.expansion), | |||||
) | |||||
layers = [] | |||||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||||
self.inplanes = planes * block.expansion | |||||
for i in range(1, blocks-1): | |||||
layers.append(block(self.inplanes, planes)) | |||||
layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||||
return nn.Sequential(*layers) | |||||
def forward(self, x): | |||||
x = self.conv1(x) | |||||
x = self.maxpool(x) | |||||
x = self.bn1(x) | |||||
x = self.tanh1(x) | |||||
x = self.layer1(x) | |||||
x = self.layer2(x) | |||||
x = self.layer3(x) | |||||
x = self.layer4(x) | |||||
x = self.avgpool(x) | |||||
x = x.view(x.size(0), -1) | |||||
x = self.bn2(x) | |||||
x = self.tanh2(x) | |||||
x = self.fc(x) | |||||
x = self.bn3(x) | |||||
x = self.logsoftmax(x) | |||||
return x | |||||
class ResNet_imagenet(ResNet): | |||||
def __init__(self, num_classes=1000, | |||||
block=Bottleneck, layers=[3, 4, 23, 3]): | |||||
super(ResNet_imagenet, self).__init__() | |||||
self.inplanes = 64 | |||||
self.conv1 = BinarizeConv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||||
bias=False) | |||||
self.bn1 = nn.BatchNorm2d(64) | |||||
self.tanh = nn.Hardtanh(inplace=True) | |||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||||
self.layer1 = self._make_layer(block, 64, layers[0]) | |||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||||
self.avgpool = nn.AvgPool2d(7) | |||||
self.fc = BinarizeLinear(512 * block.expansion, num_classes) | |||||
init_model(self) | |||||
self.regime = { | |||||
0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
30: {'lr': 1e-2}, | |||||
60: {'lr': 1e-3, 'weight_decay': 0}, | |||||
90: {'lr': 1e-4} | |||||
} | |||||
class ResNet_cifar10(ResNet): | |||||
def __init__(self, num_classes=10, | |||||
block=BasicBlock, depth=18): | |||||
super(ResNet_cifar10, self).__init__() | |||||
self.inflate = 5 | |||||
self.inplanes = 16*self.inflate | |||||
n = int((depth - 2) / 6) | |||||
self.conv1 = BinarizeConv2d(3, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||||
bias=False) | |||||
self.maxpool = lambda x: x | |||||
self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||||
self.tanh1 = nn.Hardtanh(inplace=True) | |||||
self.tanh2 = nn.Hardtanh(inplace=True) | |||||
self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||||
self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||||
self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||||
self.layer4 = lambda x: x | |||||
self.avgpool = nn.AvgPool2d(8) | |||||
self.bn2 = nn.BatchNorm1d(64*self.inflate) | |||||
self.bn3 = nn.BatchNorm1d(10) | |||||
self.logsoftmax = nn.LogSoftmax() | |||||
self.fc = BinarizeLinear(64*self.inflate, num_classes) | |||||
init_model(self) | |||||
#self.regime = { | |||||
# 0: {'optimizer': 'SGD', 'lr': 1e-1, | |||||
# 'weight_decay': 1e-4, 'momentum': 0.9}, | |||||
# 81: {'lr': 1e-4}, | |||||
# 122: {'lr': 1e-5, 'weight_decay': 0}, | |||||
# 164: {'lr': 1e-6} | |||||
#} | |||||
self.regime = { | |||||
0: {'optimizer': 'Adam', 'lr': 5e-3}, | |||||
101: {'lr': 1e-3}, | |||||
142: {'lr': 5e-4}, | |||||
184: {'lr': 1e-4}, | |||||
220: {'lr': 1e-5} | |||||
} | |||||
def resnet_binary(**kwargs): | |||||
num_classes, depth, dataset = map( | |||||
kwargs.get, ['num_classes', 'depth', 'dataset']) | |||||
if dataset == 'imagenet': | |||||
num_classes = num_classes or 1000 | |||||
depth = depth or 50 | |||||
if depth == 18: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=BasicBlock, layers=[2, 2, 2, 2]) | |||||
if depth == 34: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=BasicBlock, layers=[3, 4, 6, 3]) | |||||
if depth == 50: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=Bottleneck, layers=[3, 4, 6, 3]) | |||||
if depth == 101: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=Bottleneck, layers=[3, 4, 23, 3]) | |||||
if depth == 152: | |||||
return ResNet_imagenet(num_classes=num_classes, | |||||
block=Bottleneck, layers=[3, 8, 36, 3]) | |||||
elif dataset == 'cifar10': | |||||
num_classes = num_classes or 10 | |||||
depth = depth or 18 | |||||
return ResNet_cifar10(num_classes=num_classes, | |||||
block=BasicBlock, depth=depth) |
@ -0,0 +1,69 @@ | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
class AlexNetOWT_BN(nn.Module): | |||||
def __init__(self, num_classes=1000): | |||||
super(AlexNetOWT_BN, self).__init__() | |||||
self.features = nn.Sequential( | |||||
nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, | |||||
bias=False), | |||||
nn.BatchNorm2d(128), | |||||
nn.ReLU(inplace=True), | |||||
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), | |||||
nn.MaxPool2d(kernel_size=2, stride=2), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(128), | |||||
nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=False), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(256), | |||||
nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), | |||||
nn.MaxPool2d(kernel_size=2, stride=2), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(256), | |||||
nn.Conv2d(256, 512, kernel_size=3, padding=1, bias=False), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(512), | |||||
nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=False), | |||||
nn.MaxPool2d(kernel_size=2, stride=2), | |||||
nn.ReLU(inplace=True), | |||||
nn.BatchNorm2d(512), | |||||
) | |||||
self.classifier = nn.Sequential( | |||||
nn.Linear(512 * 4 * 4, 1024, bias=False), | |||||
nn.BatchNorm1d(1024), | |||||
nn.ReLU(inplace=True), | |||||
nn.Dropout(0.5), | |||||
nn.Linear(1024, 1024, bias=False), | |||||
nn.BatchNorm1d(1024), | |||||
nn.ReLU(inplace=True), | |||||
nn.Dropout(0.5), | |||||
nn.Linear(1024, num_classes) | |||||
nn.LogSoftMax() | |||||
) | |||||
self.regime = { | |||||
0: {'optimizer': 'SGD', 'lr': 1e-2, | |||||
'weight_decay': 5e-4, 'momentum': 0.9}, | |||||
10: {'lr': 5e-3}, | |||||
15: {'lr': 1e-3, 'weight_decay': 0}, | |||||
20: {'lr': 5e-4}, | |||||
25: {'lr': 1e-4} | |||||
} | |||||
def forward(self, x): | |||||
x = self.features(x) | |||||
x = x.view(-1, 512 * 4 * 4) | |||||
x = self.classifier(x) | |||||
return x | |||||
def model(**kwargs): | |||||
num_classes = kwargs.get( 'num_classes', 1000) | |||||
return AlexNetOWT_BN(num_classes) |
@ -0,0 +1,80 @@ | |||||
import torch | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
from torch.autograd import Function | |||||
from .binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
class VGG_Cifar10(nn.Module): | |||||
def __init__(self, num_classes=1000): | |||||
super(VGG_Cifar10, self).__init__() | |||||
self.infl_ratio=3; | |||||
self.features = nn.Sequential( | |||||
BinarizeConv2d(3, 128*self.infl_ratio, kernel_size=3, stride=1, padding=1, | |||||
bias=True), | |||||
nn.BatchNorm2d(128*self.infl_ratio), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(128*self.infl_ratio, 128*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
nn.MaxPool2d(kernel_size=2, stride=2), | |||||
nn.BatchNorm2d(128*self.infl_ratio), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(128*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
nn.BatchNorm2d(256*self.infl_ratio), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(256*self.infl_ratio, 256*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
nn.MaxPool2d(kernel_size=2, stride=2), | |||||
nn.BatchNorm2d(256*self.infl_ratio), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(256*self.infl_ratio, 512*self.infl_ratio, kernel_size=3, padding=1, bias=True), | |||||
nn.BatchNorm2d(512*self.infl_ratio), | |||||
nn.Hardtanh(inplace=True), | |||||
BinarizeConv2d(512*self.infl_ratio, 512, kernel_size=3, padding=1, bias=True), | |||||
nn.MaxPool2d(kernel_size=2, stride=2), | |||||
nn.BatchNorm2d(512), | |||||
nn.Hardtanh(inplace=True) | |||||
) | |||||
self.classifier = nn.Sequential( | |||||
BinarizeLinear(512 * 4 * 4, 1024, bias=True), | |||||
nn.BatchNorm1d(1024), | |||||
nn.Hardtanh(inplace=True), | |||||
#nn.Dropout(0.5), | |||||
BinarizeLinear(1024, 1024, bias=True), | |||||
nn.BatchNorm1d(1024), | |||||
nn.Hardtanh(inplace=True), | |||||
#nn.Dropout(0.5), | |||||
BinarizeLinear(1024, num_classes, bias=True), | |||||
nn.BatchNorm1d(num_classes, affine=False), | |||||
nn.LogSoftmax() | |||||
) | |||||
self.regime = { | |||||
0: {'optimizer': 'Adam', 'betas': (0.9, 0.999),'lr': 5e-3}, | |||||
40: {'lr': 1e-3}, | |||||
80: {'lr': 5e-4}, | |||||
100: {'lr': 1e-4}, | |||||
120: {'lr': 5e-5}, | |||||
140: {'lr': 1e-5} | |||||
} | |||||
def forward(self, x): | |||||
x = self.features(x) | |||||
x = x.view(-1, 512 * 4 * 4) | |||||
x = self.classifier(x) | |||||
return x | |||||
def vgg_cifar10_binary(**kwargs): | |||||
num_classes = kwargs.get( 'num_classes', 10) | |||||
return VGG_Cifar10(num_classes) |
@ -0,0 +1,198 @@ | |||||
import torch | |||||
import torchvision.transforms as transforms | |||||
import random | |||||
__imagenet_stats = {'mean': [0.485, 0.456, 0.406], | |||||
'std': [0.229, 0.224, 0.225]} | |||||
__imagenet_pca = { | |||||
'eigval': torch.Tensor([0.2175, 0.0188, 0.0045]), | |||||
'eigvec': torch.Tensor([ | |||||
[-0.5675, 0.7192, 0.4009], | |||||
[-0.5808, -0.0045, -0.8140], | |||||
[-0.5836, -0.6948, 0.4203], | |||||
]) | |||||
} | |||||
def scale_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||||
t_list = [ | |||||
transforms.CenterCrop(input_size), | |||||
transforms.ToTensor(), | |||||
transforms.Normalize(**normalize), | |||||
] | |||||
if scale_size != input_size: | |||||
t_list = [transforms.Scale(scale_size)] + t_list | |||||
return transforms.Compose(t_list) | |||||
def scale_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||||
t_list = [ | |||||
transforms.RandomCrop(input_size), | |||||
transforms.ToTensor(), | |||||
transforms.Normalize(**normalize), | |||||
] | |||||
if scale_size != input_size: | |||||
t_list = [transforms.Scale(scale_size)] + t_list | |||||
transforms.Compose(t_list) | |||||
def pad_random_crop(input_size, scale_size=None, normalize=__imagenet_stats): | |||||
padding = int((scale_size - input_size) / 2) | |||||
return transforms.Compose([ | |||||
transforms.RandomCrop(input_size, padding=padding), | |||||
transforms.RandomHorizontalFlip(), | |||||
transforms.ToTensor(), | |||||
transforms.Normalize(**normalize), | |||||
]) | |||||
def inception_preproccess(input_size, normalize=__imagenet_stats): | |||||
return transforms.Compose([ | |||||
transforms.RandomSizedCrop(input_size), | |||||
transforms.RandomHorizontalFlip(), | |||||
transforms.ToTensor(), | |||||
transforms.Normalize(**normalize) | |||||
]) | |||||
def inception_color_preproccess(input_size, normalize=__imagenet_stats): | |||||
return transforms.Compose([ | |||||
transforms.RandomSizedCrop(input_size), | |||||
transforms.RandomHorizontalFlip(), | |||||
transforms.ToTensor(), | |||||
ColorJitter( | |||||
brightness=0.4, | |||||
contrast=0.4, | |||||
saturation=0.4, | |||||
), | |||||
Lighting(0.1, __imagenet_pca['eigval'], __imagenet_pca['eigvec']), | |||||
transforms.Normalize(**normalize) | |||||
]) | |||||
def get_transform(name='imagenet', input_size=None, | |||||
scale_size=None, normalize=None, augment=True): | |||||
normalize = normalize or __imagenet_stats | |||||
if name == 'imagenet': | |||||
scale_size = scale_size or 256 | |||||
input_size = input_size or 224 | |||||
if augment: | |||||
return inception_preproccess(input_size, normalize=normalize) | |||||
else: | |||||
return scale_crop(input_size=input_size, | |||||
scale_size=scale_size, normalize=normalize) | |||||
elif 'cifar' in name: | |||||
input_size = input_size or 32 | |||||
if augment: | |||||
scale_size = scale_size or 40 | |||||
return pad_random_crop(input_size, scale_size=scale_size, | |||||
normalize=normalize) | |||||
else: | |||||
scale_size = scale_size or 32 | |||||
return scale_crop(input_size=input_size, | |||||
scale_size=scale_size, normalize=normalize) | |||||
elif name == 'mnist': | |||||
normalize = {'mean': [0.5], 'std': [0.5]} | |||||
input_size = input_size or 28 | |||||
if augment: | |||||
scale_size = scale_size or 32 | |||||
return pad_random_crop(input_size, scale_size=scale_size, | |||||
normalize=normalize) | |||||
else: | |||||
scale_size = scale_size or 32 | |||||
return scale_crop(input_size=input_size, | |||||
scale_size=scale_size, normalize=normalize) | |||||
class Lighting(object): | |||||
"""Lighting noise(AlexNet - style PCA - based noise)""" | |||||
def __init__(self, alphastd, eigval, eigvec): | |||||
self.alphastd = alphastd | |||||
self.eigval = eigval | |||||
self.eigvec = eigvec | |||||
def __call__(self, img): | |||||
if self.alphastd == 0: | |||||
return img | |||||
alpha = img.new().resize_(3).normal_(0, self.alphastd) | |||||
rgb = self.eigvec.type_as(img).clone()\ | |||||
.mul(alpha.view(1, 3).expand(3, 3))\ | |||||
.mul(self.eigval.view(1, 3).expand(3, 3))\ | |||||
.sum(1).squeeze() | |||||
return img.add(rgb.view(3, 1, 1).expand_as(img)) | |||||
class Grayscale(object): | |||||
def __call__(self, img): | |||||
gs = img.clone() | |||||
gs[0].mul_(0.299).add_(0.587, gs[1]).add_(0.114, gs[2]) | |||||
gs[1].copy_(gs[0]) | |||||
gs[2].copy_(gs[0]) | |||||
return gs | |||||
class Saturation(object): | |||||
def __init__(self, var): | |||||
self.var = var | |||||
def __call__(self, img): | |||||
gs = Grayscale()(img) | |||||
alpha = random.uniform(0, self.var) | |||||
return img.lerp(gs, alpha) | |||||
class Brightness(object): | |||||
def __init__(self, var): | |||||
self.var = var | |||||
def __call__(self, img): | |||||
gs = img.new().resize_as_(img).zero_() | |||||
alpha = random.uniform(0, self.var) | |||||
return img.lerp(gs, alpha) | |||||
class Contrast(object): | |||||
def __init__(self, var): | |||||
self.var = var | |||||
def __call__(self, img): | |||||
gs = Grayscale()(img) | |||||
gs.fill_(gs.mean()) | |||||
alpha = random.uniform(0, self.var) | |||||
return img.lerp(gs, alpha) | |||||
class RandomOrder(object): | |||||
""" Composes several transforms together in random order. | |||||
""" | |||||
def __init__(self, transforms): | |||||
self.transforms = transforms | |||||
def __call__(self, img): | |||||
if self.transforms is None: | |||||
return img | |||||
order = torch.randperm(len(self.transforms)) | |||||
for i in order: | |||||
img = self.transforms[i](img) | |||||
return img | |||||
class ColorJitter(RandomOrder): | |||||
def __init__(self, brightness=0.4, contrast=0.4, saturation=0.4): | |||||
self.transforms = [] | |||||
if brightness != 0: | |||||
self.transforms.append(Brightness(brightness)) | |||||
if contrast != 0: | |||||
self.transforms.append(Contrast(contrast)) | |||||
if saturation != 0: | |||||
self.transforms.append(Saturation(saturation)) |
@ -0,0 +1,5 @@ | |||||
2021-04-15 15:36:47 - INFO - saving to ./results/2021-04-15_15-36-47 | |||||
2021-04-15 15:36:47 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-36-47', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||||
2021-04-15 15:36:47 - INFO - creating model alexnet | |||||
2021-04-15 15:36:48 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||||
2021-04-15 15:36:48 - INFO - number of parameters: 61110184 |
@ -0,0 +1,5 @@ | |||||
2021-04-15 15:37:52 - INFO - saving to ./results/2021-04-15_15-37-52 | |||||
2021-04-15 15:37:52 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='resnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-37-52', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||||
2021-04-15 15:37:52 - INFO - creating model resnet | |||||
2021-04-15 15:37:52 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||||
2021-04-15 15:37:52 - INFO - number of parameters: 25557032 |
@ -0,0 +1,5 @@ | |||||
2021-04-15 15:38:16 - INFO - saving to ./results/2021-04-15_15-38-16 | |||||
2021-04-15 15:38:16 - DEBUG - run arguments: Namespace(batch_size=256, dataset='imagenet', epochs=2500, evaluate=None, gpus='0', input_size=None, lr=0.1, model='alexnet', model_config='', momentum=0.9, optimizer='SGD', print_freq=10, results_dir='./results', resume='', save='2021-04-15_15-38-16', start_epoch=0, type='torch.cuda.FloatTensor', weight_decay=0.0001, workers=8) | |||||
2021-04-15 15:38:16 - INFO - creating model alexnet | |||||
2021-04-15 15:38:17 - INFO - created model with configuration: {'input_size': None, 'dataset': 'imagenet'} | |||||
2021-04-15 15:38:17 - INFO - number of parameters: 61110184 |
@ -0,0 +1,160 @@ | |||||
import os | |||||
import torch | |||||
import logging.config | |||||
import shutil | |||||
import pandas as pd | |||||
from bokeh.io import output_file, save, show | |||||
from bokeh.plotting import figure | |||||
from bokeh.layouts import column | |||||
#from bokeh.charts import Line, defaults | |||||
# | |||||
#defaults.width = 800 | |||||
#defaults.height = 400 | |||||
#defaults.tools = 'pan,box_zoom,wheel_zoom,box_select,hover,resize,reset,save' | |||||
def setup_logging(log_file='log.txt'): | |||||
"""Setup logging configuration | |||||
""" | |||||
logging.basicConfig(level=logging.DEBUG, | |||||
format="%(asctime)s - %(levelname)s - %(message)s", | |||||
datefmt="%Y-%m-%d %H:%M:%S", | |||||
filename=log_file, | |||||
filemode='w') | |||||
console = logging.StreamHandler() | |||||
console.setLevel(logging.INFO) | |||||
formatter = logging.Formatter('%(message)s') | |||||
console.setFormatter(formatter) | |||||
logging.getLogger('').addHandler(console) | |||||
class ResultsLog(object): | |||||
def __init__(self, path='results.csv', plot_path=None): | |||||
self.path = path | |||||
self.plot_path = plot_path or (self.path + '.html') | |||||
self.figures = [] | |||||
self.results = None | |||||
def add(self, **kwargs): | |||||
df = pd.DataFrame([kwargs.values()], columns=kwargs.keys()) | |||||
if self.results is None: | |||||
self.results = df | |||||
else: | |||||
self.results = self.results.append(df, ignore_index=True) | |||||
def save(self, title='Training Results'): | |||||
if len(self.figures) > 0: | |||||
if os.path.isfile(self.plot_path): | |||||
os.remove(self.plot_path) | |||||
output_file(self.plot_path, title=title) | |||||
plot = column(*self.figures) | |||||
save(plot) | |||||
self.figures = [] | |||||
self.results.to_csv(self.path, index=False, index_label=False) | |||||
def load(self, path=None): | |||||
path = path or self.path | |||||
if os.path.isfile(path): | |||||
self.results.read_csv(path) | |||||
def show(self): | |||||
if len(self.figures) > 0: | |||||
plot = column(*self.figures) | |||||
show(plot) | |||||
#def plot(self, *kargs, **kwargs): | |||||
# line = Line(data=self.results, *kargs, **kwargs) | |||||
# self.figures.append(line) | |||||
def image(self, *kargs, **kwargs): | |||||
fig = figure() | |||||
fig.image(*kargs, **kwargs) | |||||
self.figures.append(fig) | |||||
def save_checkpoint(state, is_best, path='.', filename='checkpoint.pth.tar', save_all=False): | |||||
filename = os.path.join(path, filename) | |||||
torch.save(state, filename) | |||||
if is_best: | |||||
shutil.copyfile(filename, os.path.join(path, 'model_best.pth.tar')) | |||||
if save_all: | |||||
shutil.copyfile(filename, os.path.join( | |||||
path, 'checkpoint_epoch_%s.pth.tar' % state['epoch'])) | |||||
class AverageMeter(object): | |||||
"""Computes and stores the average and current value""" | |||||
def __init__(self): | |||||
self.reset() | |||||
def reset(self): | |||||
self.val = 0 | |||||
self.avg = 0 | |||||
self.sum = 0 | |||||
self.count = 0 | |||||
def update(self, val, n=1): | |||||
self.val = val | |||||
self.sum += val * n | |||||
self.count += n | |||||
self.avg = self.sum / self.count | |||||
__optimizers = { | |||||
'SGD': torch.optim.SGD, | |||||
'ASGD': torch.optim.ASGD, | |||||
'Adam': torch.optim.Adam, | |||||
'Adamax': torch.optim.Adamax, | |||||
'Adagrad': torch.optim.Adagrad, | |||||
'Adadelta': torch.optim.Adadelta, | |||||
'Rprop': torch.optim.Rprop, | |||||
'RMSprop': torch.optim.RMSprop | |||||
} | |||||
def adjust_optimizer(optimizer, epoch, config): | |||||
"""Reconfigures the optimizer according to epoch and config dict""" | |||||
def modify_optimizer(optimizer, setting): | |||||
if 'optimizer' in setting: | |||||
optimizer = __optimizers[setting['optimizer']]( | |||||
optimizer.param_groups) | |||||
logging.debug('OPTIMIZER - setting method = %s' % | |||||
setting['optimizer']) | |||||
for param_group in optimizer.param_groups: | |||||
for key in param_group.keys(): | |||||
if key in setting: | |||||
logging.debug('OPTIMIZER - setting %s = %s' % | |||||
(key, setting[key])) | |||||
param_group[key] = setting[key] | |||||
return optimizer | |||||
if callable(config): | |||||
optimizer = modify_optimizer(optimizer, config(epoch)) | |||||
else: | |||||
for e in range(epoch + 1): # run over all epochs - sticky setting | |||||
if e in config: | |||||
optimizer = modify_optimizer(optimizer, config[e]) | |||||
return optimizer | |||||
def accuracy(output, target, topk=(1,)): | |||||
"""Computes the precision@k for the specified values of k""" | |||||
maxk = max(topk) | |||||
batch_size = target.size(0) | |||||
_, pred = output.float().topk(maxk, 1, True, True) | |||||
pred = pred.t() | |||||
correct = pred.eq(target.view(1, -1).expand_as(pred)) | |||||
res = [] | |||||
for k in topk: | |||||
correct_k = correct[:k].view(-1).float().sum(0) | |||||
res.append(correct_k.mul_(100.0 / batch_size)) | |||||
return res | |||||
# kernel_img = model.features[0][0].kernel.data.clone() | |||||
# kernel_img.add_(-kernel_img.min()) | |||||
# kernel_img.mul_(255 / kernel_img.max()) | |||||
# save_image(kernel_img, 'kernel%s.jpg' % epoch) |
@ -0,0 +1,154 @@ | |||||
import torch | |||||
import numpy as np | |||||
import cv2, os, sys | |||||
import pandas as pd | |||||
from torch.utils.data import Dataset | |||||
from matplotlib import pyplot as plt | |||||
from torch.utils.data import ConcatDataset, DataLoader, Subset | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
from torchvision.datasets import DatasetFolder | |||||
from PIL import Image | |||||
import torchvision.models as models | |||||
batch_size = 32 | |||||
num_epoch = 10 | |||||
train_tfm = transforms.Compose([ | |||||
transforms.Grayscale(), | |||||
transforms.RandomResizedCrop((40,30)), | |||||
transforms.Resize((40, 30)), | |||||
transforms.ToTensor(), | |||||
#transforms.TenCrop((40,30)), | |||||
#transforms.Normalize(0.5,0.5), | |||||
]) | |||||
test_tfm = transforms.Compose([ | |||||
transforms.Grayscale(), | |||||
transforms.Resize((40, 30)), | |||||
transforms.ToTensor() | |||||
]) | |||||
''' | |||||
class Classifier(nn.Module): | |||||
def __init__(self): | |||||
super(Classifier, self).__init__() | |||||
self.cnn_layers = nn.Sequential( | |||||
#input_size(1,30,40) | |||||
nn.Conv2d(1, 16, 3, 1), #output_size(16,28,38) | |||||
nn.BatchNorm2d(16), | |||||
nn.ReLU(), | |||||
nn.Dropout(0.2), | |||||
nn.MaxPool2d(kernel_size = 2), #output_size(16,14,19) | |||||
nn.Conv2d(16, 24, 3, 1), #output_size(24,12,17) | |||||
nn.BatchNorm2d(24), | |||||
nn.ReLU(), | |||||
nn.Dropout(0.2), | |||||
nn.MaxPool2d(kernel_size = 2), #output_size(24,6,8) | |||||
nn.Conv2d(24, 32, 3, 1), #output_size(32,4,6) | |||||
nn.BatchNorm2d(32), | |||||
nn.ReLU(), | |||||
nn.Dropout(0.2), | |||||
nn.MaxPool2d(kernel_size = 2) #ouput_size(32,2,3) | |||||
) | |||||
self.fc_layers = nn.Sequential( | |||||
nn.Linear(32 * 2 * 3, 32), | |||||
nn.ReLU(), | |||||
nn.Dropout(0.2), | |||||
nn.Linear(32,8) | |||||
) | |||||
def forward(self, x): | |||||
x = self.cnn_layers(x) | |||||
x = x.flatten(1) | |||||
x = self.fc_layers(x) | |||||
return x | |||||
''' | |||||
def main(): | |||||
train_set = DatasetFolder("./dataset/data_0705/lepton/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||||
test_set = DatasetFolder("./dataset/data_0705/lepton/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||||
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||||
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||||
model = models.resnet18() | |||||
model.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=3, | |||||
bias=False) | |||||
model.fc = nn.Linear(512, 3) | |||||
model = model.to(device) | |||||
print(model) | |||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||||
criterion = nn.CrossEntropyLoss() | |||||
for epoch in range(num_epoch): | |||||
##Training | |||||
running_loss = 0.0 | |||||
total = 0 | |||||
correct = 0 | |||||
for i, data in enumerate(train_loader): | |||||
inputs, labels = data | |||||
inputs = inputs.to(device) | |||||
labels = labels.to(device) | |||||
optimizer.zero_grad() | |||||
outputs = model(inputs) | |||||
loss = criterion(outputs, labels) | |||||
loss.backward() | |||||
optimizer.step() | |||||
running_loss += loss.item() | |||||
total += labels.size(0) | |||||
_,predicted = torch.max(outputs.data,1) | |||||
#print(predicted) | |||||
#print("label",labels) | |||||
correct += (predicted == labels).sum().item() | |||||
train_acc = correct / total | |||||
print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||||
##Testing | |||||
model.eval() | |||||
with torch.no_grad(): | |||||
correct = 0 | |||||
total = 0 | |||||
for i, data in enumerate(test_loader): | |||||
inputs, labels = data | |||||
inputs = inputs.to(device) | |||||
labels = labels.to(device) | |||||
outputs = model(inputs) | |||||
_,predicted = torch.max(outputs.data,1) | |||||
total += labels.size(0) | |||||
correct += (predicted == labels).sum().item() | |||||
#print(predicted) | |||||
#print("labels:",labels) | |||||
print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||||
if __name__ == '__main__': | |||||
main() |
@ -0,0 +1,211 @@ | |||||
import torch | |||||
import numpy as np | |||||
import cv2, os, sys | |||||
import pandas as pd | |||||
from torch.utils.data import Dataset | |||||
from matplotlib import pyplot as plt | |||||
from torch.utils.data import ConcatDataset, DataLoader, Subset | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
from torchvision.datasets import DatasetFolder | |||||
from PIL import Image | |||||
import torchvision.models | |||||
import BinaryNetpytorch.models as models | |||||
from BinaryNetpytorch.models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
batch_size = 32 | |||||
num_epoch = 10 | |||||
train_tfm = transforms.Compose([ | |||||
# transforms.RandomHorizontalFlip(), | |||||
# transforms.RandomResizedCrop((40,30)), | |||||
transforms.Grayscale(), | |||||
transforms.Resize((40, 30)), | |||||
transforms.ToTensor(), | |||||
#transforms.RandomResizedCrop((40,30)), | |||||
#transforms.TenCrop((40,30)), | |||||
# transforms.Normalize(0.5,0.5), | |||||
]) | |||||
test_tfm = transforms.Compose([ | |||||
transforms.Grayscale(), | |||||
transforms.Resize((40, 30)), | |||||
transforms.ToTensor() | |||||
]) | |||||
def Binaryconv3x3(in_planes, out_planes, stride=1): | |||||
"3x3 convolution with padding" | |||||
return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
def conv3x3(in_planes, out_planes, stride=1): | |||||
"3x3 convolution with padding" | |||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
class BasicBlock(nn.Module): | |||||
expansion = 1 | |||||
def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||||
super(BasicBlock, self).__init__() | |||||
self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||||
self.bn1 = nn.BatchNorm2d(planes) | |||||
self.tanh1 = nn.Hardtanh(inplace=True) | |||||
self.conv2 = Binaryconv3x3(planes, planes) | |||||
self.tanh2 = nn.Hardtanh(inplace=True) | |||||
self.bn2 = nn.BatchNorm2d(planes) | |||||
self.downsample = downsample | |||||
self.do_bntan=do_bntan | |||||
self.stride = stride | |||||
def forward(self, x): | |||||
residual = x.clone() | |||||
out = self.conv1(x) | |||||
out = self.bn1(out) | |||||
out = self.tanh1(out) | |||||
out = self.conv2(out) | |||||
if self.downsample is not None: | |||||
if residual.data.max()>1: | |||||
import pdb; pdb.set_trace() | |||||
residual = self.downsample(residual) | |||||
out += residual | |||||
if self.do_bntan: | |||||
out = self.bn2(out) | |||||
out = self.tanh2(out) | |||||
return out | |||||
class ResNet(nn.Module): | |||||
def __init__(self): | |||||
super(ResNet, self).__init__() | |||||
def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||||
downsample = None | |||||
if stride != 1 or self.inplanes != planes * block.expansion: | |||||
downsample = nn.Sequential( | |||||
BinarizeConv2d(self.inplanes, planes * block.expansion, | |||||
kernel_size=1, stride=stride, bias=False), | |||||
nn.BatchNorm2d(planes * block.expansion), | |||||
) | |||||
layers = [] | |||||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||||
self.inplanes = planes * block.expansion | |||||
for i in range(1, blocks-1): | |||||
layers.append(block(self.inplanes, planes)) | |||||
layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||||
return nn.Sequential(*layers) | |||||
def forward(self, x): | |||||
x = self.conv1(x) | |||||
x = self.maxpool(x) | |||||
x = self.bn1(x) | |||||
x = self.tanh1(x) | |||||
x = self.layer1(x) | |||||
x = self.layer2(x) | |||||
x = self.layer3(x) | |||||
x = self.layer4(x) | |||||
x = self.avgpool(x) | |||||
x = x.view(x.size(0), -1) | |||||
x = self.bn2(x) | |||||
x = self.tanh2(x) | |||||
x = self.fc(x) | |||||
x = self.bn3(x) | |||||
x = self.logsoftmax(x) | |||||
return x | |||||
class ResNet_cifar10(ResNet): | |||||
def __init__(self, num_classes=3, | |||||
block=BasicBlock, depth=18): | |||||
super(ResNet_cifar10, self).__init__() | |||||
self.inflate = 5 | |||||
self.inplanes = 16*self.inflate | |||||
n = int((depth - 2) / 6) | |||||
self.conv1 = BinarizeConv2d(1, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||||
bias=False) | |||||
self.maxpool = lambda x: x | |||||
self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||||
self.tanh1 = nn.Hardtanh(inplace=True) | |||||
self.tanh2 = nn.Hardtanh(inplace=True) | |||||
self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||||
self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||||
self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||||
self.layer4 = lambda x: x | |||||
self.avgpool = nn.AvgPool2d(8) | |||||
self.bn2 = nn.BatchNorm1d(64*self.inflate) | |||||
self.bn3 = nn.BatchNorm1d(3) | |||||
self.logsoftmax = nn.LogSoftmax() | |||||
self.fc = BinarizeLinear(64*self.inflate, 3) | |||||
def main(): | |||||
train_set = DatasetFolder("pose_data/training/labeled", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||||
test_set = DatasetFolder("pose_data/testing", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||||
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||||
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||||
model = ResNet_cifar10(num_classes=3,block=BasicBlock,depth=18) | |||||
model = model.to(device) | |||||
print(model) | |||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||||
criterion = nn.CrossEntropyLoss() | |||||
for epoch in range(num_epoch): | |||||
running_loss = 0.0 | |||||
total = 0 | |||||
correct = 0 | |||||
for i, data in enumerate(train_loader): | |||||
inputs, labels = data | |||||
inputs = inputs.to(device) | |||||
labels = labels.to(device) | |||||
optimizer.zero_grad() | |||||
outputs = model(inputs) | |||||
loss = criterion(outputs, labels) | |||||
loss.backward() | |||||
optimizer.step() | |||||
running_loss += loss.item() | |||||
total += labels.size(0) | |||||
_,predicted = torch.max(outputs.data,1) | |||||
#print(predicted) | |||||
#print("label",labels) | |||||
correct += (predicted == labels).sum().item() | |||||
train_acc = correct / total | |||||
print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||||
model.eval() | |||||
with torch.no_grad(): | |||||
correct = 0 | |||||
total = 0 | |||||
for i, data in enumerate(test_loader): | |||||
inputs, labels = data | |||||
inputs = inputs.to(device) | |||||
labels = labels.to(device) | |||||
outputs = model(inputs) | |||||
_,predicted = torch.max(outputs.data,1) | |||||
total += labels.size(0) | |||||
correct += (predicted == labels).sum().item() | |||||
#print(predicted) | |||||
#print("labels:",labels) | |||||
print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||||
if __name__ == '__main__': | |||||
main() |
@ -0,0 +1,193 @@ | |||||
import torch | |||||
import numpy as np | |||||
import cv2, os, sys | |||||
import pandas as pd | |||||
from torch.utils.data import Dataset | |||||
from matplotlib import pyplot as plt | |||||
from torch.utils.data import ConcatDataset, DataLoader, Subset | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
from torchvision.datasets import DatasetFolder | |||||
from PIL import Image | |||||
import torchvision.models as models | |||||
batch_size = 32 | |||||
num_epoch = 1 | |||||
torch.cuda.set_device(1) | |||||
train_tfm = transforms.Compose([ | |||||
transforms.Grayscale(), | |||||
transforms.RandomHorizontalFlip(), | |||||
transforms.RandomResizedCrop((68,68)), | |||||
transforms.ToTensor(), | |||||
#transforms.RandomResizedCrop((40,30)), | |||||
#transforms.TenCrop((40,30)), | |||||
#transforms.Normalize(0.5,0.5), | |||||
]) | |||||
test_tfm = transforms.Compose([ | |||||
transforms.Grayscale(), | |||||
transforms.ToTensor() | |||||
]) | |||||
''' | |||||
class Classifier(nn.Module): | |||||
def __init__(self): | |||||
super(Classifier, self).__init__() | |||||
self.cnn_layers = nn.Sequential( | |||||
#input_size(1,30,40) | |||||
nn.Conv2d(1, 16, 3, 1), #output_size(16,28,38) | |||||
nn.BatchNorm2d(16), | |||||
nn.ReLU(), | |||||
nn.Dropout(0.2), | |||||
nn.MaxPool2d(kernel_size = 2), #output_size(16,14,19) | |||||
nn.Conv2d(16, 24, 3, 1), #output_size(24,12,17) | |||||
nn.BatchNorm2d(24), | |||||
nn.ReLU(), | |||||
nn.Dropout(0.2), | |||||
nn.MaxPool2d(kernel_size = 2), #output_size(24,6,8) | |||||
nn.Conv2d(24, 32, 3, 1), #output_size(32,4,6) | |||||
nn.BatchNorm2d(32), | |||||
nn.ReLU(), | |||||
nn.Dropout(0.2), | |||||
nn.MaxPool2d(kernel_size = 2) #ouput_size(32,2,3) | |||||
) | |||||
self.fc_layers = nn.Sequential( | |||||
nn.Linear(32 * 2 * 3, 32), | |||||
nn.ReLU(), | |||||
nn.Dropout(0.2), | |||||
nn.Linear(32,8) | |||||
) | |||||
def forward(self, x): | |||||
x = self.cnn_layers(x) | |||||
x = x.flatten(1) | |||||
x = self.fc_layers(x) | |||||
return x | |||||
''' | |||||
def main(): | |||||
train_set = DatasetFolder("pose_data2/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||||
test_set = DatasetFolder("pose_data2/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||||
valid_set = DatasetFolder("pose_data2/val", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||||
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||||
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||||
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True) | |||||
model_path = "model.ckpt" | |||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||||
model = models.resnet50() | |||||
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, | |||||
bias=False) | |||||
model.fc = nn.Linear(2048, 8) | |||||
model = model.to(device) | |||||
print(model) | |||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||||
criterion = nn.CrossEntropyLoss() | |||||
best_acc = -1 | |||||
for epoch in range(num_epoch): | |||||
##Training | |||||
running_loss = 0.0 | |||||
total = 0 | |||||
correct = 0 | |||||
for i, data in enumerate(train_loader): | |||||
inputs, labels = data | |||||
inputs = inputs.to(device) | |||||
labels = labels.to(device) | |||||
optimizer.zero_grad() | |||||
outputs = model(inputs) | |||||
loss = criterion(outputs, labels) | |||||
loss.backward() | |||||
optimizer.step() | |||||
running_loss += loss.item() | |||||
total += labels.size(0) | |||||
_,predicted = torch.max(outputs.data,1) | |||||
#print(predicted) | |||||
#print("label",labels) | |||||
correct += (predicted == labels).sum().item() | |||||
train_acc = correct / total | |||||
print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||||
##Validation | |||||
model.eval() | |||||
valid_loss = 0.0 | |||||
total = 0 | |||||
correct = 0 | |||||
for i, data in enumerate(valid_loader): | |||||
inputs, labels = data | |||||
inputs = inputs.to(device) | |||||
labels = labels.to(device) | |||||
with torch.no_grad(): | |||||
outputs = model(inputs) | |||||
loss = criterion(outputs, labels) | |||||
running_loss += loss.item() | |||||
total += labels.size(0) | |||||
_,predicted = torch.max(outputs.data,1) | |||||
correct += (predicted == labels).sum().item() | |||||
valid_acc = correct / total | |||||
print(f"[ Valid | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {valid_acc:.5f}") | |||||
if valid_acc > best_acc: | |||||
best_acc = valid_acc | |||||
torch.save(model.state_dict(), model_path) | |||||
print('saving model with acc {:.3f}'.format(valid_acc)) | |||||
##Testing | |||||
model = models.resnet50() | |||||
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, | |||||
bias=False) | |||||
model.fc = nn.Linear(2048, 8) | |||||
model = model.to(device) | |||||
model.load_state_dict(torch.load(model_path)) | |||||
model.eval() | |||||
with torch.no_grad(): | |||||
correct = 0 | |||||
total = 0 | |||||
for i, data in enumerate(test_loader): | |||||
inputs, labels = data | |||||
inputs = inputs.to(device) | |||||
labels = labels.to(device) | |||||
outputs = model(inputs) | |||||
_,predicted = torch.max(outputs.data,1) | |||||
total += labels.size(0) | |||||
correct += (predicted == labels).sum().item() | |||||
# for k in range(batch_size): | |||||
# if predicted[k] != labels[k]: | |||||
# print(inputs[k]) | |||||
#print(predicted) | |||||
#print("labels:",labels) | |||||
print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||||
if __name__ == '__main__': | |||||
main() |
@ -0,0 +1,285 @@ | |||||
import torch | |||||
import numpy as np | |||||
import cv2, os, sys | |||||
import pandas as pd | |||||
from torch.utils.data import Dataset | |||||
from matplotlib import pyplot as plt | |||||
from torch.utils.data import ConcatDataset, DataLoader, Subset | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
from torchvision.datasets import DatasetFolder | |||||
from PIL import Image | |||||
import torchvision.models | |||||
import BinaryNetpytorch.models as models | |||||
from BinaryNetpytorch.models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
import progressbar | |||||
import seaborn as sns | |||||
batch_size = 32 | |||||
num_epoch = 60 | |||||
torch.cuda.set_device(1) | |||||
train_tfm = transforms.Compose([ | |||||
# transforms.RandomHorizontalFlip(), | |||||
# transforms.RandomResizedCrop((40,30)), | |||||
transforms.Grayscale(), | |||||
transforms.Resize((68, 68)), | |||||
transforms.ToTensor(), | |||||
#transforms.RandomResizedCrop((40,30)), | |||||
#transforms.TenCrop((40,30)), | |||||
# transforms.Normalize(0.5,0.5), | |||||
]) | |||||
test_tfm = transforms.Compose([ | |||||
transforms.Grayscale(), | |||||
transforms.Resize((68, 68)), | |||||
transforms.ToTensor() | |||||
]) | |||||
def Quantize(img): | |||||
scaler = torch.div(img, 0.0078125, rounding_mode="floor") | |||||
scaler_t1 = scaler * 0.0078125 | |||||
scaler_t2 = (scaler + 1) * 0.0078125 | |||||
img = torch.where(abs(img - scaler_t1) < abs(img -scaler_t2), scaler_t1 , scaler_t2) | |||||
return img | |||||
# bar = progressbar.ProgressBar(maxval=img.size(0)*img.size(2)*img.size(3), \ | |||||
# widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) | |||||
# bar.start() | |||||
# for p in range(img.size(0)): | |||||
# for i in range(img.size(2)): | |||||
# for j in range(img.size(3)): | |||||
# scaler = int(img[p][0][i][j] / 0.0078125) | |||||
# t1 = scaler * 0.0078125 | |||||
# t2 = (scaler + 1) * 0.0078125 | |||||
# if(abs(img[p][0][i][j] - t1) < abs(img[p][0][i][j] - t2)): | |||||
# img[p][0][i][j] = t1 | |||||
# else: | |||||
# img[p][0][i][j] = t2 | |||||
# bar.finish() | |||||
# return img | |||||
def Binaryconv3x3(in_planes, out_planes, stride=1): | |||||
"3x3 convolution with padding" | |||||
return BinarizeConv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
def conv3x3(in_planes, out_planes, stride=1): | |||||
"3x3 convolution with padding" | |||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
class BasicBlock(nn.Module): | |||||
expansion = 1 | |||||
def __init__(self, inplanes, planes, stride=1, downsample=None,do_bntan=True): | |||||
super(BasicBlock, self).__init__() | |||||
self.conv1 = Binaryconv3x3(inplanes, planes, stride) | |||||
self.bn1 = nn.BatchNorm2d(planes) | |||||
self.tanh1 = nn.Hardtanh(inplace=True) | |||||
self.conv2 = Binaryconv3x3(planes, planes) | |||||
self.tanh2 = nn.Hardtanh(inplace=True) | |||||
self.bn2 = nn.BatchNorm2d(planes) | |||||
self.downsample = downsample | |||||
self.do_bntan=do_bntan | |||||
self.stride = stride | |||||
def forward(self, x): | |||||
residual = x.clone() | |||||
x = Quantize(x) | |||||
out = self.conv1(x) | |||||
out = self.bn1(out) | |||||
out = self.tanh1(out) | |||||
out = Quantize(out) | |||||
out = self.conv2(out) | |||||
if self.downsample is not None: | |||||
if residual.data.max()>1: | |||||
import pdb; pdb.set_trace() | |||||
residual = self.downsample(residual) | |||||
out += residual | |||||
if self.do_bntan: | |||||
out = self.bn2(out) | |||||
out = self.tanh2(out) | |||||
return out | |||||
class ResNet(nn.Module): | |||||
def __init__(self): | |||||
super(ResNet, self).__init__() | |||||
def _make_layer(self, block, planes, blocks, stride=1,do_bntan=True): | |||||
downsample = None | |||||
if stride != 1 or self.inplanes != planes * block.expansion: | |||||
downsample = nn.Sequential( | |||||
BinarizeConv2d(self.inplanes, planes * block.expansion, | |||||
kernel_size=1, stride=stride, bias=False), | |||||
nn.BatchNorm2d(planes * block.expansion), | |||||
) | |||||
layers = [] | |||||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||||
self.inplanes = planes * block.expansion | |||||
for i in range(1, blocks-1): | |||||
layers.append(block(self.inplanes, planes)) | |||||
layers.append(block(self.inplanes, planes,do_bntan=do_bntan)) | |||||
return nn.Sequential(*layers) | |||||
def forward(self, x): | |||||
x = Quantize(x) | |||||
x = self.conv1(x) | |||||
x = self.maxpool(x) | |||||
x = self.bn1(x) | |||||
x = self.tanh1(x) | |||||
x = self.layer1(x) | |||||
x = self.layer2(x) | |||||
x = self.layer3(x) | |||||
x = self.layer4(x) | |||||
x = self.avgpool(x) | |||||
x = x.view(x.size(0), -1) | |||||
x = self.bn2(x) | |||||
x = self.tanh2(x) | |||||
#print(x.size()) | |||||
x = x.view(32,1280,1,1) | |||||
x = self.fc(x) | |||||
x = x.view(x.size(0), -1) | |||||
x = self.bn3(x) | |||||
x = self.logsoftmax(x) | |||||
return x | |||||
class ResNet_cifar10(ResNet): | |||||
def __init__(self, num_classes=8, | |||||
block=BasicBlock, depth=18): | |||||
super(ResNet_cifar10, self).__init__() | |||||
self.inflate = 5 | |||||
self.inplanes = 16*self.inflate | |||||
n = int((depth - 2) / 6) | |||||
self.conv1 = BinarizeConv2d(1, 16*self.inflate, kernel_size=3, stride=1, padding=1, | |||||
bias=False) | |||||
self.maxpool = lambda x: x | |||||
self.bn1 = nn.BatchNorm2d(16*self.inflate) | |||||
self.tanh1 = nn.Hardtanh(inplace=True) | |||||
self.tanh2 = nn.Hardtanh(inplace=True) | |||||
self.layer1 = self._make_layer(block, 16*self.inflate, n) | |||||
self.layer2 = self._make_layer(block, 32*self.inflate, n, stride=2) | |||||
self.layer3 = self._make_layer(block, 64*self.inflate, n, stride=2,do_bntan=False) | |||||
self.layer4 = lambda x: x | |||||
self.avgpool = nn.AvgPool2d(8) | |||||
self.bn2 = nn.BatchNorm1d(256*self.inflate) | |||||
self.bn3 = nn.BatchNorm1d(8) | |||||
self.logsoftmax = nn.LogSoftmax() | |||||
#self.fc = BinarizeLinear(256*self.inflate, 8) | |||||
self.fc = BinarizeConv2d(256*self.inflate, 8, kernel_size=1) | |||||
def main(): | |||||
train_set = DatasetFolder("pose_data2/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||||
test_set = DatasetFolder("pose_data2/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||||
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||||
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||||
model = ResNet_cifar10(num_classes=8,block=BasicBlock,depth=18) | |||||
model = model.to(device) | |||||
print(model) | |||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |||||
criterion = nn.CrossEntropyLoss() | |||||
model_path = "model.ckpt" | |||||
for epoch in range(num_epoch): | |||||
running_loss = 0.0 | |||||
total = 0 | |||||
correct = 0 | |||||
for i, data in enumerate(train_loader): | |||||
inputs, labels = data | |||||
inputs = inputs.to(device) | |||||
labels = labels.to(device) | |||||
optimizer.zero_grad() | |||||
outputs = model(inputs) | |||||
loss = criterion(outputs, labels) | |||||
loss.backward() | |||||
optimizer.step() | |||||
running_loss += loss.item() | |||||
total += labels.size(0) | |||||
_,predicted = torch.max(outputs.data,1) | |||||
#print(predicted) | |||||
#print("label",labels) | |||||
correct += (predicted == labels).sum().item() | |||||
train_acc = correct / total | |||||
print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||||
torch.save(model.state_dict(), model_path) | |||||
model = ResNet_cifar10(num_classes=8,block=BasicBlock,depth=18) | |||||
model = model.to(device) | |||||
model.load_state_dict(torch.load(model_path)) | |||||
model.eval() | |||||
with torch.no_grad(): | |||||
correct = 0 | |||||
total = 0 | |||||
correct_2 = 0 | |||||
stat = np.zeros((8,8)) | |||||
for i, data in enumerate(test_loader): | |||||
inputs, labels = data | |||||
inputs = inputs.to(device) | |||||
labels = labels.to(device) | |||||
outputs = model(inputs) | |||||
_,predicted = torch.max(outputs.data,1) | |||||
total += labels.size(0) | |||||
correct += (predicted == labels).sum().item() | |||||
for b in range(batch_size): | |||||
if predicted[b] == 0 or predicted[b] == 1 or predicted[b] == 2 or predicted[b] == 3: | |||||
if labels[b] == 0 or labels[b] == 1 or labels[b] == 2 or labels[b] == 3: | |||||
correct_2 += 1 | |||||
else: | |||||
if labels[b] == 4 or labels[b] == 5 or labels[b] == 6 or labels[b] == 7: | |||||
correct_2 += 1 | |||||
for k in range(batch_size): | |||||
if predicted[k] != labels[k]: | |||||
img = inputs[k].mul(255).byte() | |||||
img = img.cpu().numpy().squeeze(0) | |||||
img = np.moveaxis(img, 0, -1) | |||||
predict = predicted[k].cpu().numpy() | |||||
label = labels[k].cpu().numpy() | |||||
path = "test_result/predict:"+str(predict)+"_labels:"+str(label)+".jpg" | |||||
stat[int(label)][int(predict)] += 1 | |||||
cv2.imwrite(path,img) | |||||
print(stat) | |||||
ax = sns.heatmap(stat, linewidth=0.5) | |||||
plt.xlabel('Prediction') | |||||
plt.ylabel('Label') | |||||
plt.savefig('heatmap.jpg') | |||||
#print(predicted) | |||||
#print("labels:",labels) | |||||
print('Test_2clasee Accuracy:{} %'.format((correct_2 / total) * 100)) | |||||
print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||||
if __name__ == '__main__': | |||||
main() |
@ -0,0 +1,207 @@ | |||||
import torch | |||||
import numpy as np | |||||
import cv2, os, sys | |||||
from torch.utils.data import Dataset | |||||
from matplotlib import pyplot as plt | |||||
from torch.utils.data import ConcatDataset, DataLoader, Subset | |||||
import torch.nn as nn | |||||
import torchvision.transforms as transforms | |||||
from torchvision.datasets import DatasetFolder | |||||
from PIL import Image | |||||
from BinaryNetpytorch.models.binarized_modules import BinarizeLinear,BinarizeConv2d | |||||
from BinaryNetpytorch.models.binarized_modules import Binarize,HingeLoss | |||||
import seaborn as sns | |||||
import random | |||||
batch_size = 8 | |||||
num_epoch = 10 | |||||
seed = 777 | |||||
torch.manual_seed(seed) | |||||
torch.cuda.manual_seed(seed) | |||||
torch.cuda.manual_seed_all(seed) | |||||
np.random.seed(seed) | |||||
random.seed(seed) | |||||
torch.backends.cudnn.benchmark = False | |||||
torch.backends.cudnn.deterministic = True | |||||
train_tfm = transforms.Compose([ | |||||
#transforms.Grayscale(), | |||||
#transforms.RandomHorizontalFlip(), | |||||
#transforms.RandomResizedCrop((40,30)), | |||||
#transforms.RandomCrop((40,30)), | |||||
#transforms.RandomHorizontalFlip(), | |||||
transforms.ToTensor(), | |||||
#transforms.RandomResizedCrop((40,30)), | |||||
#transforms.TenCrop((40,30)), | |||||
#transforms.Normalize(0.5,0.5), | |||||
]) | |||||
test_tfm = transforms.Compose([ | |||||
#transforms.Grayscale(), | |||||
transforms.ToTensor() | |||||
]) | |||||
class Classifier(nn.Module): | |||||
def __init__(self): | |||||
super(Classifier, self).__init__() | |||||
self.cnn_layers = nn.Sequential( | |||||
# BinarizeConv2d(in_channels=1, out_channels=128, kernel_size=9, padding=9//2, bias=False), | |||||
# nn.BatchNorm2d(128), | |||||
# nn.ReLU(), | |||||
# BinarizeConv2d(in_channels=128, out_channels=64, kernel_size=1, padding=1//2, bias=False), | |||||
# nn.BatchNorm2d(64), | |||||
#input_size(1,30,40) | |||||
BinarizeConv2d(1, 128, 3, 1), #output_size(16,28,38) | |||||
nn.BatchNorm2d(128), | |||||
nn.ReLU(), | |||||
#nn.Dropout(0.2), | |||||
nn.MaxPool2d(kernel_size = 2), #output_size(16,14,19) | |||||
BinarizeConv2d(128, 64, 3, 1), #output_size(24,12,17) | |||||
nn.BatchNorm2d(64), | |||||
nn.ReLU(), | |||||
#nn.Dropout(0.2), | |||||
nn.MaxPool2d(kernel_size = 2), #output_size(24,6,8) | |||||
BinarizeConv2d(64, 32, 3, 1), #output_size(32,4,6) | |||||
nn.BatchNorm2d(32), | |||||
nn.ReLU(), | |||||
#nn.Dropout(0.2), | |||||
nn.MaxPool2d(kernel_size = 2), #ouput_size(32,2,3) | |||||
#nn.LogSoftmax(), | |||||
BinarizeConv2d(32, 3, (3,2), 1) #ouput_size(4,2,3) without max :(32,24,34) | |||||
) | |||||
def forward(self, x): | |||||
x = self.cnn_layers(x) | |||||
#x = x.flatten(1) | |||||
#x = self.fc_layers(x) | |||||
#print(x.shape) | |||||
x = x.view(x.size(0), -1) | |||||
#print(x.shape) | |||||
#x = nn.LogSoftmax(x) | |||||
#print(x) | |||||
return x | |||||
def main(): | |||||
train_set = DatasetFolder("./dataset/data_0711/grideye/train", loader=lambda x: Image.open(x), extensions="bmp", transform=train_tfm) | |||||
test_set = DatasetFolder("./dataset/data_0711/grideye/test", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||||
val_set = DatasetFolder("./dataset/data_0711/grideye/train", loader=lambda x: Image.open(x), extensions="bmp", transform=test_tfm) | |||||
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) | |||||
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) | |||||
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True) | |||||
save_path = 'models.ckpt' | |||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |||||
model = Classifier().to(device) | |||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) | |||||
criterion = nn.CrossEntropyLoss() | |||||
best_accuracy = 0.0 | |||||
for epoch in range(num_epoch): | |||||
running_loss = 0.0 | |||||
total = 0 | |||||
correct = 0 | |||||
for i, data in enumerate(train_loader): | |||||
inputs, labels = data | |||||
inputs = inputs.to(device) | |||||
labels = labels.to(device) | |||||
#print(labels) | |||||
optimizer.zero_grad() | |||||
outputs = model(inputs) | |||||
#print(outputs.shape) | |||||
loss = criterion(outputs, labels) | |||||
loss.backward() | |||||
for p in list(model.parameters()): | |||||
if hasattr(p,'org'): | |||||
p.data.copy_(p.org) | |||||
optimizer.step() | |||||
for p in list(model.parameters()): | |||||
if hasattr(p,'org'): | |||||
p.org.copy_(p.data.clamp_(-1,1)) | |||||
running_loss += loss.item() | |||||
total += labels.size(0) | |||||
_,predicted = torch.max(outputs.data,1) | |||||
#print(predicted) | |||||
#print("label",labels) | |||||
correct += (predicted == labels).sum().item() | |||||
train_acc = correct / total | |||||
print(f"[ Train | {epoch + 1:03d}/{num_epoch:03d} ] loss = {running_loss:.5f}, acc = {train_acc:.5f}") | |||||
model.eval() | |||||
with torch.no_grad(): | |||||
correct = 0 | |||||
total = 0 | |||||
for i, data in enumerate(val_loader): | |||||
inputs, labels = data | |||||
inputs = inputs.to(device) | |||||
labels = labels.to(device) | |||||
outputs = model(inputs) | |||||
_,predicted = torch.max(outputs.data,1) | |||||
total += labels.size(0) | |||||
correct += (predicted == labels).sum().item() | |||||
val_acc = correct / total | |||||
if val_acc > best_accuracy: | |||||
best_accuracy = val_acc | |||||
torch.save(model.state_dict(), save_path) | |||||
print("Save Model") | |||||
print(f"[ Val | {epoch + 1:03d}/{num_epoch:03d} ] acc = {val_acc:.5f}") | |||||
model = Classifier().to(device) | |||||
model.load_state_dict(torch.load(save_path)) | |||||
model.eval() | |||||
stat = np.zeros((3,3)) | |||||
with torch.no_grad(): | |||||
correct = 0 | |||||
total = 0 | |||||
print(model) | |||||
for i, data in enumerate(test_loader): | |||||
inputs, labels = data | |||||
inputs = inputs.to(device) | |||||
labels = labels.to(device) | |||||
outputs = model(inputs) | |||||
#print(outputs.data) | |||||
_,predicted = torch.max(outputs.data,1) | |||||
#print(predicted) | |||||
total += labels.size(0) | |||||
correct += (predicted == labels).sum().item() | |||||
for k in range(len(predicted)): | |||||
if predicted[k] != labels[k]: | |||||
img = inputs[k].mul(255).byte() | |||||
img = img.cpu().numpy().squeeze(0) | |||||
img = np.moveaxis(img, 0, -1) | |||||
predict = predicted[k].cpu().numpy() | |||||
label = labels[k].cpu().numpy() | |||||
path = "test_result/predict:"+str(predict)+"_labels:"+str(label)+".jpg" | |||||
stat[int(label)][int(predict)] += 1 | |||||
ax = sns.heatmap(stat, linewidth=0.5) | |||||
plt.xlabel('Prediction') | |||||
plt.ylabel('Label') | |||||
plt.savefig('heatmap.jpg') | |||||
#print(predicted) | |||||
#print("labels:",labels) | |||||
print('Test Accuracy:{} %'.format((correct / total) * 100)) | |||||
if __name__ == '__main__': | |||||
main() |