flow approach

56355a4e · Jan Kieseler · 5e4bb512 · 56355a4e · 56355a4e
Commit 56355a4e authored 9 months ago by Jan Kieseler
--- a/modules/one_dim_flow_surrogate.py
+++ b/modules/one_dim_flow_surrogate.py
+#!/usr/bin/env python
+# coding=utf-8
+
+#########################################################################
+# This program is free software: you can redistribute it and/or modify  #
+# it under the terms of the version 3 of the GNU General Public License #
+# as published by the Free Software Foundation.                         #
+#                                                                       #
+# This program is distributed in the hope that it will be useful, but   #
+# WITHOUT ANY WARRANTY; without even the implied warranty of            #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU      #
+# General Public License for more details.                              #
+#                                                                       #
+# You should have received a copy of the GNU General Public License     #
+# along with this program. If not, see <http://www.gnu.org/licenses/>.  #
+#                                                                       #
+# Written by and Copyright (C) Francois Fleuret                         #
+# Contact <francois.fleuret@idiap.ch> for comments & bug reports        #
+#########################################################################
+
+import math
+from math import pi
+import random
+
+import numpy as np
+import torch
+import torchvision
+from torch import nn, autograd
+from torch.nn import functional as F
+import torch
+from torch.utils.data import DataLoader
+import matplotlib.pyplot as plt
+import matplotlib.collections as mc
+
+def set_seed(seed):
+    if seed >= 0:
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+#set_seed(0)
+
+######################################################################
+
+def phi(x):
+    p, std = 0.3, 0.2
+    mu = (1 - p) * torch.exp(LogProba((x - 0.5) / std, math.log(1 / std))) + \
+              p  * torch.exp(LogProba((x + 0.5) / std, math.log(1 / std)))
+    return mu
+
+def sample_phi(nb):
+    p, std = 0.3, 0.2
+    result = torch.empty(nb).normal_(0, std)
+    result = result + torch.sign(torch.rand(result.size()) - p) / 2
+    return result
+
+######################################################################
+
+# START_LOG_PROBA
+def LogProba(x, ldj):
+    log_p = ldj - 0.5 * (x**2 + math.log(2*pi))
+    return log_p
+# END_LOG_PROBA
+
+######################################################################
+
+# START_MODEL
+class PiecewiseLinear(nn.Module):
+    def __init__(self, n_conditions, xmin=0., xmax=20, nb=1000):
+        super().__init__()
+        self.xmin = xmin
+        self.xmax = xmax
+        self.nb = nb
+        self.alpha = nn.Parameter(torch.tensor([xmin], dtype = torch.float))
+        #mu = math.log((xmax - xmin) / nb)
+        
+        #self.xi = nn.Parameter(torch.empty(nb + 1).normal_(mu, 1e-4))
+        self.condition_net = torch.nn.Sequential(
+            torch.nn.Linear(n_conditions, 64),
+            torch.nn.ReLU(),
+            torch.nn.Linear(64, 64),
+            torch.nn.ReLU(),
+            torch.nn.Linear(64, nb + 1)
+        )
+
+    def forward(self, x, conditions):
+        '''
+        original implementation:
+        x torch.Size([100]) : B
+        y torch.Size([1002]) : nb+1
+        u torch.Size([100]) : B
+        n torch.Size([100]) : B
+        a torch.Size([100]) : B
+        out torch.Size([100]): B
+        '''
+        #since conditions change, this is now different for each batch element, add zero dimension everywhere
+        xi = self.condition_net(conditions)
+        #print("xi.shape, x.shape",xi.shape, x.shape)  # B x nb+1
+        y = self.alpha + xi.exp().cumsum(1) # 0 -> 1 # B x nb+1
+        #print("y.shape",y.shape)
+        u = self.nb * (x - self.xmin) / (self.xmax - self.xmin) # B
+        #print("u.shape",u.shape) 
+        n = u.long().clamp(0, self.nb - 1) # B
+        #print("n.shape",n.shape)
+        a = (u - n).clamp(0, 1) # B
+        #print("a.shape",a.shape)
+        y0 = y.gather(1, n)  # Gather y values in dim 1 at indices n
+        y1 = y.gather(1, n + 1)  # Gather y values in dim 1 at indices n + 1
+
+        # now we need to use the right batch elment in y
+        out = (1 - a) * y0 + a * y1
+        
+        return out
+# END_MODEL
+
+    def invert(self, y, conditions): #FIXME also w.r.t. dimensions
+        # Generate xi from the condition input
+        xi = self.condition_net(conditions)
+        
+        # Calculate ys using the cumulative sum of the exponential of xi
+        ys = self.xmin + xi.exp().cumsum(dim=1)
+        
+        yy = y.view(-1, 1)
+        k = torch.arange(self.nb, device=y.device).view(1, -1)
+        
+        # Ensure y values are within the valid range
+        assert (y >= ys[:, 0]).all() and (y <= ys[:, -1]).all()
+        
+        yk = ys[:, :-1]
+        ykp1 = ys[:, 1:]
+        
+        # Create masks to identify the correct intervals
+        masks = (yy >= yk) & (yy < ykp1)
+        
+        # Calculate the inverse transformation within the identified intervals
+        x = self.xmin + (self.xmax - self.xmin) / self.nb * ((masks.float() * (k + (yy - yk) / (ykp1 - yk))).sum(dim=1, keepdim=True))
+        
+        return x
+
+#nb_samples = 25000
+#nb_epochs = 250
+#batch_size = 100
+#
+#model = PiecewiseLinear(nb = 1001, xmin = -4, xmax = 4)
+## model = SumOfSigmoids(nb = 51, xmin = -4, xmax = 4)
+#
+## print(model(torch.linspace(-10, 10, 25)))
+#
+## exit(0)
+#
+## print('** TESTING WITH POSITIVE POLYNOMIAL!!!!')
+## model = PositivePolynomial(degree = 16)
+#
+#train_input = sample_phi(nb_samples)
+#
+#optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4)
+#
+#for k in range(nb_epochs):
+#    acc_loss = 0
+#
+## START_OPTIMIZATION
+#    for input in train_input.split(batch_size):
+#        input.requires_grad_()
+#        output = model(input)
+#
+#        derivatives, = autograd.grad(
+#            output.sum(), input,
+#            retain_graph = True, create_graph = True
+#        )
+#
+#        loss = ( 0.5 * (output**2 + math.log(2*pi)) - derivatives.log() ).mean()
+#
+#        optimizer.zero_grad()
+#        loss.backward()
+#        optimizer.step()
+## END_OPTIMIZATION
+#
+#        acc_loss += loss.item()
+#    if k%10 == 0: print(k, loss.item())
+#
+######################################################################
+#
+#input = torch.linspace(-3, 3, 175)
+#
+#mu = phi(input)
+#mu_N = torch.exp(LogProba(input, 0))
+#
+#input.requires_grad_()
+#output = model(input)
+#
+#grad = autograd.grad(output.sum(), input)[0]
+#mu_hat = LogProba(output, grad.log()).detach().exp()
+#
+######################################################################
+# FIGURES
+#
+#input = input.detach().numpy()
+#output = output.detach().numpy()
+#mu = mu.numpy()
+#mu_hat = mu_hat.numpy()
+#
+######################################################################
+#
+#fig = plt.figure()
+#ax = fig.add_subplot(1, 1, 1)
+## ax.set_xlim(-5, 5)
+## ax.set_ylim(-5, 5)
+## ax.set_aspect('equal')
+## ax.axis('off')
+#
+#ax.plot(input, output, '-', color = 'tab:red')
+#
+#filename = 'miniflow_mapping.pdf'
+#print(f'Saving {filename}')
+#fig.savefig(filename, bbox_inches = 'tight')
+#
+# plt.show()
+
+######################################################################
+#
+#green_dist = '#bfdfbf'
+#
+#fig = plt.figure()
+#ax = fig.add_subplot(1, 1, 1)
+## ax.set_xlim(-4.5, 4.5)
+## ax.set_ylim(-0.1, 1.1)
+#lines = list(([(x_in.item(), 0), (x_out.item(), 0.5)]) for (x_in, x_out) in zip(input, output))
+#lc = mc.LineCollection(lines, color = 'tab:red', linewidth = 0.1)
+#ax.add_collection(lc)
+#ax.axis('off')
+#
+#ax.fill_between(input,  0.52, mu_N * 0.2 + 0.52, color = green_dist)
+#ax.fill_between(input, -0.30, mu   * 0.2 - 0.30, color = green_dist)
+#
+#filename = 'miniflow_flow.pdf'
+#print(f'Saving {filename}')
+#fig.savefig(filename, bbox_inches = 'tight')
+#
+# plt.show()
+
+######################################################################
+#
+#fig = plt.figure()
+#ax = fig.add_subplot(1, 1, 1)
+#ax.axis('off')
+#
+#ax.fill_between(input, 0, mu, color = green_dist)
+## ax.plot(input, mu, '-', color = 'tab:blue')
+## ax.step(input, mu_hat, '-', where = 'mid', color = 'tab:red')
+#ax.plot(input, mu_hat, '-', color = 'tab:red')
+#
+#filename = 'miniflow_dist.pdf'
+#print(f'Saving {filename}')
+#fig.savefig(filename, bbox_inches = 'tight')
+#
+# plt.show()
+
+######################################################################
+#
+#fig = plt.figure()
+#ax = fig.add_subplot(1, 1, 1)
+#ax.axis('off')
+#
+## ax.plot(input, mu, '-', color = 'tab:blue')
+#ax.fill_between(input, 0, mu, color = green_dist)
+## ax.step(input, mu_hat, '-', where = 'mid', color = 'tab:red')
+#
+#filename = 'miniflow_target_dist.pdf'
+#print(f'Saving {filename}')
+#fig.savefig(filename, bbox_inches = 'tight')
+#
+# plt.show()
+
+######################################################################
+#
+#if hasattr(model, 'invert'):
+#    z = torch.randn(200)
+#    z = z[(z > -3) * (z < 3)]
+#
+#    x = model.invert(z)
+#
+#    fig = plt.figure()
+#    ax = fig.add_subplot(1, 1, 1)
+#    ax.set_xlim(-4.5, 4.5)
+#    ax.set_ylim(-0.1, 1.1)
+#    lines = list(([(x_in.item(), 0), (x_out.item(), 0.5)]) for (x_in, x_out) in zip(x, z))
+#    lc = mc.LineCollection(lines, color = 'tab:red', linewidth = 0.1)
+#    ax.add_collection(lc)
+#    # ax.axis('off')
+#
+#    # ax.fill_between(input,  0.52, mu_N * 0.2 + 0.52, color = green_dist)
+#    # ax.fill_between(input, -0.30, mu   * 0.2 - 0.30, color = green_dist)
+#
+#    filename = 'miniflow_synth.pdf'
+#    print(f'Saving {filename}')
+#    fig.savefig(filename, bbox_inches = 'tight')
+#
+#    # plt.show()
+
+
+
+###################################################################### interface code to make it a drop-in replacement for the original code
+
+
+class Surrogate(torch.nn.Module):
+    def __init__(self, n_detector_parameters, n_true_plus_context_inputs, n_out_parameters, n_time_steps, betas=(1e-4, 0.02)):
+        super(Surrogate, self).__init__()
+        assert n_out_parameters == 1 #this is the whole point of a one-valued output
+
+        self.n_detector_parameters = n_detector_parameters
+        self.model = PiecewiseLinear(n_conditions = n_detector_parameters + n_true_plus_context_inputs, #true context is energy etc,
+                                     xmin = -10., xmax = 10., #here the reco stuff is normalised so that should (TM) work
+                                     )
+        
+
+        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.0001)
+        self.n_reco_parameters = n_out_parameters # called n_reco_parameters for consistency with the original code
+        self.device = torch.device('cuda')
+        self.n_time_steps = None #compatibility
+
+
+    def forward(self, noise, detector_parameters, true_inputs, true_context, reco_step_inputs=None, time_step=None):
+        assert time_step is None #make sure it's not used wrongly
+        assert reco_step_inputs is None #make sure it's not used wrongly
+        
+        all_cond = torch.cat([detector_parameters, true_inputs, true_context], dim=1)
+        #create noise with right batch size
+        
+        return self.model(noise, all_cond)
+    
+    def to(self, device = None):
+        if device is None:
+            device = self.device
+        super(Surrogate, self).to(device)
+        self.model.to(device)
+        return self
+    
+    def create_noisy_input(self, nb):
+        return torch.randn(nb, 1).to(self.device)
+    
+    
+    def sample(self, detector_parameters, true_inputs, true_context):
+        noise = self.create_noisy_input(true_inputs.shape[0]).to(self.device)
+        conditions = torch.cat([detector_parameters, true_inputs, true_context], dim=1)
+        return self.model.invert(noise, conditions)
+
+
+    
+    def train_model(self, surrogate_dataset, batch_size, n_epochs, lr):
+        # train the surrogate model
+        train_loader = DataLoader(surrogate_dataset,  batch_size=batch_size, shuffle=True)
+        # set the optimizer
+        self.optimizer.lr = lr #FIXME
+        self.to(self.device)
+        
+        self.train()
+
+
+        for epoch in range(n_epochs):
+            for batch_idx, (detector_parameters, true_inputs, true_context, reco_result) in enumerate(train_loader):
+                # this needs to be adapted since it is a diffusion model. so the noise loop needs to be in here
+                # the noise loop is the same as in the generator
+                
+                detector_parameters = detector_parameters.to(self.device)
+                true_inputs = true_inputs.to(self.device)
+                reco_step_inputs = reco_result.to(self.device)
+                true_context = true_context.to(self.device)
+                reco_step_inputs.requires_grad_()
+
+                model_out = self(reco_step_inputs,detector_parameters, true_inputs, true_context)
+
+                derivatives, = autograd.grad(
+                            model_out.sum(), reco_step_inputs,
+                            retain_graph = True, create_graph = True
+                            )
+                #print(derivatives)
+
+                loss = ( 0.5 * (model_out**2 + math.log(2*pi)) - derivatives.log() ).mean()
+                #print(loss)
+
+
+                self.optimizer.zero_grad()
+                loss.backward()
+                self.optimizer.step()
+
+            print('Surrogate Epoch: {} \tLoss: {:.8f}'.format(
+                        epoch, loss.item()))
+        
+        self.eval()
+        return loss.item()
+
+    def apply_model_in_batches(self, dataset, batch_size, oversample=1):
+        '''
+        one to one copy of the original function, no changes needed
+        '''
+
+        self.to()
+        self.eval()
+        # create a dataloader for the dataset
+        data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
+        # create a tensor to store the results
+        results = torch.zeros(oversample * len(dataset), self.n_reco_parameters).to('cpu')
+        reco = torch.zeros(oversample * len(dataset), self.n_reco_parameters).to('cpu')
+        true = torch.zeros(oversample * len(dataset), self.n_reco_parameters).to('cpu')
+        for i_o in range(oversample):
+            # loop over the batches
+            for batch_idx, (detector_parameters, true_inputs, true_context, reco_inputs) in enumerate(data_loader): # the reco is not needed as it is generated here
+                
+                print(f'batch {batch_idx} of {len(data_loader)}', end='\r')
+                detector_parameters = detector_parameters.to(self.device)
+                true_inputs = true_inputs.to(self.device)
+                true_context = true_context.to(self.device)
+                reco_inputs = reco_inputs.to(self.device)
+                # apply the model
+                reco_surrogate = self.sample(detector_parameters, true_inputs, true_context)
+
+                #un_normalise all to physical values
+                reco_surrogate = dataset.unnormalise_target(reco_surrogate)
+                reco_inputs = dataset.unnormalise_target(reco_inputs)
+                true_inputs = dataset.unnormalise_target(true_inputs)
+                # store the results
+                start_inject_index = i_o * len(dataset) + batch_idx * batch_size
+                end_inject_index = i_o * len(dataset) + (batch_idx + 1) * batch_size
+                results[start_inject_index : end_inject_index] = reco_surrogate.detach().to('cpu')
+                reco   [start_inject_index : end_inject_index] = reco_inputs.detach().to('cpu')
+                true   [start_inject_index : end_inject_index] = true_inputs.detach().to('cpu')
+
+        return results, reco, true
--- a/modules/optimizer.py
+++ b/modules/optimizer.py
@@ -22,7 +22,7 @@ class Optimizer(object):
        self.reconstruction_model = reconstruction_model


-        self.n_time_steps = surrogate_model.n_time_steps
+        #self.n_time_steps = surrogate_model.n_time_steps not used anyway
        self.lr = lr
        self.batch_size = batch_size
        self.constraints = constraints