简介
根据<< A Neural Algorithm of Artistic Style >>,画家所画的作品可以分成内容(content)和风格(style),此次要撰写的程式需要两张图片-风格图片以及内容图片,使用风格图片学习画作的风格并和内容图片的内容进行结合,产生一张新图片(同时具有风格图片风格和内容图片的内容)。+
=
原理
既然是使用神经网路进行学习必定需要求loss,但在求loss前我们先了解图案在不同层CNN进行重塑的结果(下面图出自于论文)
网路
论文中使用的是vgg19(pretrained=True)的网路架构
loss
计算loss前我们需要準备内容图片、风格图片和一张目标图(此图就是我们程式中的输出图,可以是全白图、也可以直接使用内容图片)content loss:将内容图片以及目标图片输入网路中,对两个图片在conv4层的输出进行MSELoss得到content loss。

程式码
(程式码基本上都出自于pytorch的Tutorials,加上注解)
https://pytorch.org/tutorials/advanced/neural_style_tutorial.html
import torchimport torch.nn as nnimport torch.nn.functional as Fimport torch.optim as optimimport copyimport matplotlib.pyplot as pltfrom torchvision import modelsfrom torchvision import transformsfrom PIL import Image
图片前处理imgsize = 128loader = transforms.Compose( [ transforms.Resize(imgsize), transforms.ToTensor(), ])def image_loader(image_name): image = Image.open(image_name) image = loader(image) #调整图片大小并且转为tensor image = torch.unsqueeze(image,dim=0) #调整至Conv2d的输入格式 image = image.float() return imagestyle_img = image_loader(".\image2\picasso.jpg") #风格图片content_img = image_loader(".\image2\dancing.jpg") #内容图片assert style_img.size() == content_img.size() #确保风格图片和内容图片大小相同
content loss此class并非真的是pytorch中的loss方法,而是继承nn.Module用来计算content_loss的一层网路并不会更动到输入值
class Contentloss(nn.Module): def __init__(self,target): super().__init__() self.target = target.detach() def forward(self,input): self.loss = F.mse_loss(input,self.target) return input
style loss如同前文所说我们需要先将feature maps转成矩阵,接着求出Gram matrix(G=矩阵*transpose(矩阵))
def gram_matrix(input): a,b,c,d = input.shape #a=batch_size b=featuremap c,d=length*height features = input.view(a*b,c*d) #转为矩阵形式 G = torch.mm(features,features.t()) #计算Gram matrix return G.div(a*b*c*d)class Styleloss(nn.Module): def __init__(self,target_feature): super().__init__() self.target = gram_matrix(target_feature).detach() def forward(self,input): G = gram_matrix(input) self.loss = F.mse_loss(G,self.target) return input
导入模型vgg网路训练时是使用mean=[0.485, 0.456, 0.406]和std=[0.229, 0.224, 0.225]来进行标準化,因此进入网路的图面也必须使用这两个参数进行一次标準化。
cnn = models.vgg19(pretrained=True).features.eval() #pretrained = true表示保留参数值cnn_normalization_mean = torch.tensor([0.485,0.456,0.406])cnn_normalization_std = torch.tensor([0.229,0.224,0.225])class Normalization(nn.Module): def __init__(self,mean,std): super().__init__() self.mean = torch.tensor(mean).view(-1,1,1) self.std = torch.tensor(std).view(-1,1,1) def forward(self,img): return (img-self.mean)/self.std
搭建网路藉由nn.sequential一层一层的增加网路,vgg19的架构为conv2d->Relu->conv2d->Relu->Maxpool->conv2d...,由此特性判断在何处应该添加content loss层和style loss层
def get_style_model_and_losses(cnn,normalization_mean,normalization_std,style_img,content_img,content_layers=content_layers_default,style_layers=style_layers_default): cnn = copy.deepcopy(cnn) normalization = Normalization(normalization_mean,normalization_std) content_losses=[] #用来存放content loss网路层的list style_losses=[] #用来存放style loss网路层的list model = nn.Sequential(normalization) #加入第一层标準化层 i=0 for layer in cnn.children(): if isinstance(layer, nn.Conv2d): i=i+1 name = 'conv_{}'.format(i) elif isinstance(layer, nn.ReLU): name = 'relu_{}'.format(i) layer = nn.ReLU(inplace=False) elif isinstance(layer, nn.MaxPool2d): name = 'pool_{}'.format(i) elif isinstance(layer, nn.BatchNorm2d): name = 'bn_{}'.format(i) else: raise RuntimeError('Unrecognized layer: {}'.format(layer.__class__.__name__)) model.add_module(name,layer) if name in content_layers: target = model(content_img).detach() content_loss = Contentloss(target)#创建content loss网路层 #并将target传入init model.add_module("content_loss_{}".format(i),content_loss) content_losses.append(content_loss)#添加的是网路层 if name in style_layers: target_feature = model(style_img).detach() style_loss = Styleloss(target_feature)#创建style loss网路层 #并将target传入init model.add_module("style_loss_{}".format(i),style_loss)#添加的是网路层 style_losses.append(style_loss) for i in range(len(model)-1,-1,-1): if isinstance(model[i],Contentloss) or isinstance(model[i],Styleloss): break model = model[:i+1] return model,style_losses,content_losses
初始化要输出的图input_img = content_img.clone() #可以是内容图片或白噪声
optimizerdef get_input_optimizer(input_img): #採用论文中建议的LBFGS optimizer = optim.LBFGS([input_img.requires_grad_()]) return optimizer
运行程式def run_style_transfer(cnn,normalization_mean,normalization_std,content_img,style_img,input_img,num_setps=300,style_weight=1000000,content_weight=1): print('Building the style transfer model...') model,style_losses,content_losses = get_style_model_and_losses(cnn,normalization_mean,normalization_std,style_img,content_img) print(model) optimizer = get_input_optimizer(input_img) print('optimizimg...') run = [0] while run[0]<= num_setps: def closure(): input_img.data.clamp_(0,1) optimizer.zero_grad() model(input_img) style_score = 0 content_score = 0 for s1 in style_losses: style_score = style_score+s1.loss for c1 in content_losses: content_score = content_score+c1.loss style_score = style_weight*style_score content_score = content_weight*content_score loss = style_score + content_score loss.backward() run[0] = run[0]+1 if run[0] % 50 == 0: print("run{}:".format(run)) print('style loss:{:4f} content loss:{:4f}'.format(style_score.item(),content_score.item())) print() return style_score + content_score optimizer.step(closure) input_img.data.clamp_(0,1) return input_img