Skip to content
Snippets Groups Projects
Commit 56355a4e authored by Jan Kieseler's avatar Jan Kieseler
Browse files

flow approach

parent 5e4bb512
Branches
No related tags found
No related merge requests found
#!/usr/bin/env python
# coding=utf-8
#########################################################################
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the version 3 of the GNU General Public License #
# as published by the Free Software Foundation. #
# #
# This program is distributed in the hope that it will be useful, but #
# WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
# General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program. If not, see <http://www.gnu.org/licenses/>. #
# #
# Written by and Copyright (C) Francois Fleuret #
# Contact <francois.fleuret@idiap.ch> for comments & bug reports #
#########################################################################
import math
from math import pi
import random
import numpy as np
import torch
import torchvision
from torch import nn, autograd
from torch.nn import functional as F
import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import matplotlib.collections as mc
def set_seed(seed):
if seed >= 0:
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
#set_seed(0)
######################################################################
def phi(x):
p, std = 0.3, 0.2
mu = (1 - p) * torch.exp(LogProba((x - 0.5) / std, math.log(1 / std))) + \
p * torch.exp(LogProba((x + 0.5) / std, math.log(1 / std)))
return mu
def sample_phi(nb):
p, std = 0.3, 0.2
result = torch.empty(nb).normal_(0, std)
result = result + torch.sign(torch.rand(result.size()) - p) / 2
return result
######################################################################
# START_LOG_PROBA
def LogProba(x, ldj):
log_p = ldj - 0.5 * (x**2 + math.log(2*pi))
return log_p
# END_LOG_PROBA
######################################################################
# START_MODEL
class PiecewiseLinear(nn.Module):
def __init__(self, n_conditions, xmin=0., xmax=20, nb=1000):
super().__init__()
self.xmin = xmin
self.xmax = xmax
self.nb = nb
self.alpha = nn.Parameter(torch.tensor([xmin], dtype = torch.float))
#mu = math.log((xmax - xmin) / nb)
#self.xi = nn.Parameter(torch.empty(nb + 1).normal_(mu, 1e-4))
self.condition_net = torch.nn.Sequential(
torch.nn.Linear(n_conditions, 64),
torch.nn.ReLU(),
torch.nn.Linear(64, 64),
torch.nn.ReLU(),
torch.nn.Linear(64, nb + 1)
)
def forward(self, x, conditions):
'''
original implementation:
x torch.Size([100]) : B
y torch.Size([1002]) : nb+1
u torch.Size([100]) : B
n torch.Size([100]) : B
a torch.Size([100]) : B
out torch.Size([100]): B
'''
#since conditions change, this is now different for each batch element, add zero dimension everywhere
xi = self.condition_net(conditions)
#print("xi.shape, x.shape",xi.shape, x.shape) # B x nb+1
y = self.alpha + xi.exp().cumsum(1) # 0 -> 1 # B x nb+1
#print("y.shape",y.shape)
u = self.nb * (x - self.xmin) / (self.xmax - self.xmin) # B
#print("u.shape",u.shape)
n = u.long().clamp(0, self.nb - 1) # B
#print("n.shape",n.shape)
a = (u - n).clamp(0, 1) # B
#print("a.shape",a.shape)
y0 = y.gather(1, n) # Gather y values in dim 1 at indices n
y1 = y.gather(1, n + 1) # Gather y values in dim 1 at indices n + 1
# now we need to use the right batch elment in y
out = (1 - a) * y0 + a * y1
return out
# END_MODEL
def invert(self, y, conditions): #FIXME also w.r.t. dimensions
# Generate xi from the condition input
xi = self.condition_net(conditions)
# Calculate ys using the cumulative sum of the exponential of xi
ys = self.xmin + xi.exp().cumsum(dim=1)
yy = y.view(-1, 1)
k = torch.arange(self.nb, device=y.device).view(1, -1)
# Ensure y values are within the valid range
assert (y >= ys[:, 0]).all() and (y <= ys[:, -1]).all()
yk = ys[:, :-1]
ykp1 = ys[:, 1:]
# Create masks to identify the correct intervals
masks = (yy >= yk) & (yy < ykp1)
# Calculate the inverse transformation within the identified intervals
x = self.xmin + (self.xmax - self.xmin) / self.nb * ((masks.float() * (k + (yy - yk) / (ykp1 - yk))).sum(dim=1, keepdim=True))
return x
#nb_samples = 25000
#nb_epochs = 250
#batch_size = 100
#
#model = PiecewiseLinear(nb = 1001, xmin = -4, xmax = 4)
## model = SumOfSigmoids(nb = 51, xmin = -4, xmax = 4)
#
## print(model(torch.linspace(-10, 10, 25)))
#
## exit(0)
#
## print('** TESTING WITH POSITIVE POLYNOMIAL!!!!')
## model = PositivePolynomial(degree = 16)
#
#train_input = sample_phi(nb_samples)
#
#optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4)
#
#for k in range(nb_epochs):
# acc_loss = 0
#
## START_OPTIMIZATION
# for input in train_input.split(batch_size):
# input.requires_grad_()
# output = model(input)
#
# derivatives, = autograd.grad(
# output.sum(), input,
# retain_graph = True, create_graph = True
# )
#
# loss = ( 0.5 * (output**2 + math.log(2*pi)) - derivatives.log() ).mean()
#
# optimizer.zero_grad()
# loss.backward()
# optimizer.step()
## END_OPTIMIZATION
#
# acc_loss += loss.item()
# if k%10 == 0: print(k, loss.item())
#
######################################################################
#
#input = torch.linspace(-3, 3, 175)
#
#mu = phi(input)
#mu_N = torch.exp(LogProba(input, 0))
#
#input.requires_grad_()
#output = model(input)
#
#grad = autograd.grad(output.sum(), input)[0]
#mu_hat = LogProba(output, grad.log()).detach().exp()
#
######################################################################
# FIGURES
#
#input = input.detach().numpy()
#output = output.detach().numpy()
#mu = mu.numpy()
#mu_hat = mu_hat.numpy()
#
######################################################################
#
#fig = plt.figure()
#ax = fig.add_subplot(1, 1, 1)
## ax.set_xlim(-5, 5)
## ax.set_ylim(-5, 5)
## ax.set_aspect('equal')
## ax.axis('off')
#
#ax.plot(input, output, '-', color = 'tab:red')
#
#filename = 'miniflow_mapping.pdf'
#print(f'Saving {filename}')
#fig.savefig(filename, bbox_inches = 'tight')
#
# plt.show()
######################################################################
#
#green_dist = '#bfdfbf'
#
#fig = plt.figure()
#ax = fig.add_subplot(1, 1, 1)
## ax.set_xlim(-4.5, 4.5)
## ax.set_ylim(-0.1, 1.1)
#lines = list(([(x_in.item(), 0), (x_out.item(), 0.5)]) for (x_in, x_out) in zip(input, output))
#lc = mc.LineCollection(lines, color = 'tab:red', linewidth = 0.1)
#ax.add_collection(lc)
#ax.axis('off')
#
#ax.fill_between(input, 0.52, mu_N * 0.2 + 0.52, color = green_dist)
#ax.fill_between(input, -0.30, mu * 0.2 - 0.30, color = green_dist)
#
#filename = 'miniflow_flow.pdf'
#print(f'Saving {filename}')
#fig.savefig(filename, bbox_inches = 'tight')
#
# plt.show()
######################################################################
#
#fig = plt.figure()
#ax = fig.add_subplot(1, 1, 1)
#ax.axis('off')
#
#ax.fill_between(input, 0, mu, color = green_dist)
## ax.plot(input, mu, '-', color = 'tab:blue')
## ax.step(input, mu_hat, '-', where = 'mid', color = 'tab:red')
#ax.plot(input, mu_hat, '-', color = 'tab:red')
#
#filename = 'miniflow_dist.pdf'
#print(f'Saving {filename}')
#fig.savefig(filename, bbox_inches = 'tight')
#
# plt.show()
######################################################################
#
#fig = plt.figure()
#ax = fig.add_subplot(1, 1, 1)
#ax.axis('off')
#
## ax.plot(input, mu, '-', color = 'tab:blue')
#ax.fill_between(input, 0, mu, color = green_dist)
## ax.step(input, mu_hat, '-', where = 'mid', color = 'tab:red')
#
#filename = 'miniflow_target_dist.pdf'
#print(f'Saving {filename}')
#fig.savefig(filename, bbox_inches = 'tight')
#
# plt.show()
######################################################################
#
#if hasattr(model, 'invert'):
# z = torch.randn(200)
# z = z[(z > -3) * (z < 3)]
#
# x = model.invert(z)
#
# fig = plt.figure()
# ax = fig.add_subplot(1, 1, 1)
# ax.set_xlim(-4.5, 4.5)
# ax.set_ylim(-0.1, 1.1)
# lines = list(([(x_in.item(), 0), (x_out.item(), 0.5)]) for (x_in, x_out) in zip(x, z))
# lc = mc.LineCollection(lines, color = 'tab:red', linewidth = 0.1)
# ax.add_collection(lc)
# # ax.axis('off')
#
# # ax.fill_between(input, 0.52, mu_N * 0.2 + 0.52, color = green_dist)
# # ax.fill_between(input, -0.30, mu * 0.2 - 0.30, color = green_dist)
#
# filename = 'miniflow_synth.pdf'
# print(f'Saving {filename}')
# fig.savefig(filename, bbox_inches = 'tight')
#
# # plt.show()
###################################################################### interface code to make it a drop-in replacement for the original code
class Surrogate(torch.nn.Module):
def __init__(self, n_detector_parameters, n_true_plus_context_inputs, n_out_parameters, n_time_steps, betas=(1e-4, 0.02)):
super(Surrogate, self).__init__()
assert n_out_parameters == 1 #this is the whole point of a one-valued output
self.n_detector_parameters = n_detector_parameters
self.model = PiecewiseLinear(n_conditions = n_detector_parameters + n_true_plus_context_inputs, #true context is energy etc,
xmin = -10., xmax = 10., #here the reco stuff is normalised so that should (TM) work
)
self.optimizer = torch.optim.Adam(self.parameters(), lr=0.0001)
self.n_reco_parameters = n_out_parameters # called n_reco_parameters for consistency with the original code
self.device = torch.device('cuda')
self.n_time_steps = None #compatibility
def forward(self, noise, detector_parameters, true_inputs, true_context, reco_step_inputs=None, time_step=None):
assert time_step is None #make sure it's not used wrongly
assert reco_step_inputs is None #make sure it's not used wrongly
all_cond = torch.cat([detector_parameters, true_inputs, true_context], dim=1)
#create noise with right batch size
return self.model(noise, all_cond)
def to(self, device = None):
if device is None:
device = self.device
super(Surrogate, self).to(device)
self.model.to(device)
return self
def create_noisy_input(self, nb):
return torch.randn(nb, 1).to(self.device)
def sample(self, detector_parameters, true_inputs, true_context):
noise = self.create_noisy_input(true_inputs.shape[0]).to(self.device)
conditions = torch.cat([detector_parameters, true_inputs, true_context], dim=1)
return self.model.invert(noise, conditions)
def train_model(self, surrogate_dataset, batch_size, n_epochs, lr):
# train the surrogate model
train_loader = DataLoader(surrogate_dataset, batch_size=batch_size, shuffle=True)
# set the optimizer
self.optimizer.lr = lr #FIXME
self.to(self.device)
self.train()
for epoch in range(n_epochs):
for batch_idx, (detector_parameters, true_inputs, true_context, reco_result) in enumerate(train_loader):
# this needs to be adapted since it is a diffusion model. so the noise loop needs to be in here
# the noise loop is the same as in the generator
detector_parameters = detector_parameters.to(self.device)
true_inputs = true_inputs.to(self.device)
reco_step_inputs = reco_result.to(self.device)
true_context = true_context.to(self.device)
reco_step_inputs.requires_grad_()
model_out = self(reco_step_inputs,detector_parameters, true_inputs, true_context)
derivatives, = autograd.grad(
model_out.sum(), reco_step_inputs,
retain_graph = True, create_graph = True
)
#print(derivatives)
loss = ( 0.5 * (model_out**2 + math.log(2*pi)) - derivatives.log() ).mean()
#print(loss)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
print('Surrogate Epoch: {} \tLoss: {:.8f}'.format(
epoch, loss.item()))
self.eval()
return loss.item()
def apply_model_in_batches(self, dataset, batch_size, oversample=1):
'''
one to one copy of the original function, no changes needed
'''
self.to()
self.eval()
# create a dataloader for the dataset
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
# create a tensor to store the results
results = torch.zeros(oversample * len(dataset), self.n_reco_parameters).to('cpu')
reco = torch.zeros(oversample * len(dataset), self.n_reco_parameters).to('cpu')
true = torch.zeros(oversample * len(dataset), self.n_reco_parameters).to('cpu')
for i_o in range(oversample):
# loop over the batches
for batch_idx, (detector_parameters, true_inputs, true_context, reco_inputs) in enumerate(data_loader): # the reco is not needed as it is generated here
print(f'batch {batch_idx} of {len(data_loader)}', end='\r')
detector_parameters = detector_parameters.to(self.device)
true_inputs = true_inputs.to(self.device)
true_context = true_context.to(self.device)
reco_inputs = reco_inputs.to(self.device)
# apply the model
reco_surrogate = self.sample(detector_parameters, true_inputs, true_context)
#un_normalise all to physical values
reco_surrogate = dataset.unnormalise_target(reco_surrogate)
reco_inputs = dataset.unnormalise_target(reco_inputs)
true_inputs = dataset.unnormalise_target(true_inputs)
# store the results
start_inject_index = i_o * len(dataset) + batch_idx * batch_size
end_inject_index = i_o * len(dataset) + (batch_idx + 1) * batch_size
results[start_inject_index : end_inject_index] = reco_surrogate.detach().to('cpu')
reco [start_inject_index : end_inject_index] = reco_inputs.detach().to('cpu')
true [start_inject_index : end_inject_index] = true_inputs.detach().to('cpu')
return results, reco, true
......@@ -22,7 +22,7 @@ class Optimizer(object):
self.reconstruction_model = reconstruction_model
self.n_time_steps = surrogate_model.n_time_steps
#self.n_time_steps = surrogate_model.n_time_steps not used anyway
self.lr = lr
self.batch_size = batch_size
self.constraints = constraints
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment