Skip to content
Snippets Groups Projects
Commit 7ca2971a authored by Jan Kieseler's avatar Jan Kieseler
Browse files

now working smoothly

parent 35749a48
Branches
No related tags found
No related merge requests found
......@@ -5,4 +5,8 @@ import numpy as np
if os.path.isfile('evolution.npy'):
evolution = np.load('evolution.npy').tolist()
print('EVOLUTION:',evolution)
\ No newline at end of file
for e in evolution:
for i in e:
#print without line break and round to two significant digits
print("{:.2f}".format(i), end=' ')
print()
\ No newline at end of file
import multiprocessing
from generator import Generator, Parameters, SurrogateDataset
from reconstruction import Reconstruction
from surrogate import Surrogate
......@@ -7,19 +11,26 @@ from matplotlib import pyplot as plt
import torch
import numpy as np
if __name__ == "__main__":
multiprocessing.set_start_method('spawn')
#so far these are only layer thicknesses
box = np.array([0.2, 0.2,
0.2, 0.2,
box = np.array([0.1, 0.2,
0.1, 0.2,
0.1, 0.2,
0.1, 0.2,
0.5, 1.])
# optimisation loop
parameters = np.array([0.5, 2.5,
0.5, 4.,
parameters = np.array([0.5, 3.,
3.5, 4.,
1.5, 3.,
3.5, 5.,
5.5, 10.])
gen = Generator(box,
Parameters(len(box)),
n_vars = 30,
n_events_per_var = 1000)
n_events_per_var = 200)#0)
print("Number of detector parameters: ", gen.n_parameters)
print("Number of parameters per sensor: ", gen.n_parameters_per_sensor)
......@@ -34,24 +45,24 @@ reco_model = Reconstruction(gen.n_parameters,
surrogate_model = Surrogate(gen.n_parameters,
gen.n_target_parameters + gen.n_context_parameters, #context
gen.n_target_parameters, #reco
n_time_steps=100)
n_time_steps=400)
optimizer = Optimizer(gen, surrogate_model, reco_model, parameters)
n_epochs_pre = 100
n_epochs_main = 100
n_epochs_pre = 40
n_epochs_main = 60
evolution = [parameters]
evolution = [[parameters,0.]]
#if saved evolution exists, load it
import os.path
if os.path.isfile('evolution.npy'):
if False and os.path.isfile('evolution.npy'):
evolution = np.load('evolution.npy').tolist()
print('EVOLUTION:',evolution)
parameters = evolution[-1]
def save_evolution(evolution):
np.save('evolution.npy', np.array(evolution))
np.save('evolution.npy', np.array([p[0] for p in evolution])) #only save parameters
save_evolution(evolution)
#exit()
......@@ -59,49 +70,57 @@ def make_check_plot(surr_out, reco_out, true_in, evolution ):
'''
make response plots for all three and save them, use evolution size as step index for output file name
'''
reco_resp = reco_out.detach().cpu().numpy() - true_in.detach().cpu().numpy() - 1.
surr_resp = surr_out.detach().cpu().numpy() - true_in.detach().cpu().numpy() - 1.
reco_resp = (reco_out.detach().cpu().numpy() - true_in.detach().cpu().numpy()) / true_in.detach().cpu().numpy()
surr_resp = (surr_out.detach().cpu().numpy() - true_in.detach().cpu().numpy()) / true_in.detach().cpu().numpy()
range = np.min([np.min(reco_resp), np.min(surr_resp)]), np.max([np.max(reco_resp), np.max(surr_resp)])
#range = np.min([np.min(reco_resp), np.min(surr_resp)]), np.max([np.max(reco_resp), np.max(surr_resp)])
plt.figure(figsize=(10,10))
plt.hist(reco_resp.flatten(), bins=21, range=range, alpha = 0.5, label='reco')
plt.hist(surr_resp.flatten(), bins=21, range=range, alpha = 0.5, label='surr')
plt.yscale('log')
plt.hist(reco_resp.flatten(), bins=21, range=(-2,2), alpha = 0.5, label='reco')
plt.hist(surr_resp.flatten(), bins=21, range=(-2,2), alpha = 0.5, label='surr')
plt.text(0.15, 0.8, 'step '+str(len(evolution)), transform=plt.gca().transAxes)
plt.text(0.15, 0.7, 'o_loss '+str((evolution[-1][1])), transform=plt.gca().transAxes)
plt.legend()
plt.savefig('response_'+str(len(evolution))+'.png')
plt.savefig('response_'+str(len(evolution))+'_lin.png')
plt.yscale('log')
plt.savefig('response_'+str(len(evolution))+'_log.png')
plt.close()
for i in range(100): #just an example
ds = gen.create_torch_dataset(parameters)
print("Reconstruction training")
if not i: #train a bit more the first time
reco_model.train_model(ds, batch_size=32, n_epochs= n_epochs_pre//2, lr=0.01)
reco_model.train_model(ds, batch_size=128, n_epochs= n_epochs_pre//2, lr=0.01)
reco_model.train_model(ds, batch_size=128, n_epochs= n_epochs_pre//2, lr=0.001)
reco_model.to('cuda')
reco_model.train_model(ds, batch_size=256, n_epochs= n_epochs_main, lr=0.001)
reco_result = reco_model.apply_model_in_batches(ds, batch_size=128)
reco_model.to('cpu') #free up gpu memory
#create the surrogate dataset
surrogate_dataset = SurrogateDataset(ds, reco_result.detach().cpu().numpy())#important to detach here
if not i: #train a bit more the first time
surrogate_model.train_model(surrogate_dataset, batch_size=128, n_epochs= n_epochs_pre//2, lr=0.01)
surrogate_model.train_model(surrogate_dataset, batch_size=128, n_epochs= n_epochs_pre//2, lr=0.001)
surrogate_model.train_model(surrogate_dataset, batch_size=512, n_epochs= n_epochs_pre//2, lr=0.001)
print("Surrogate model")
surrogate_model.train_model(surrogate_dataset, batch_size=512, n_epochs= n_epochs_main, lr=0.001)
print("Surrogate training")
surrogate_model.train_model(surrogate_dataset, batch_size=2048, n_epochs= n_epochs_main, lr=0.001)
## check if things worked
surr_out, reco_out, true_in = surrogate_model.apply_model_in_batches(surrogate_dataset, batch_size=1024)
make_check_plot(surr_out, reco_out, true_in, evolution)
print("Optimization")
#apply the surrogate model
updated_detector_parameters, optimal = optimizer.optimize(surrogate_dataset, batch_size=128, n_epochs=100, lr=0.01)
updated_detector_parameters, optimal, o_loss = optimizer.optimize(surrogate_dataset, batch_size=1024, n_epochs=100, lr=0.001)
print("Updated detector parameters: ", updated_detector_parameters, "; not interrupted", optimal, "; change", updated_detector_parameters - parameters)
parameters = np.abs(updated_detector_parameters)+1e-3
evolution.append(parameters)
evolution.append([parameters, o_loss])
print('EVOLUTION:',evolution)
save_evolution(evolution)
......
from G4Calo import GeometryDescriptor, G4System
import random
import sys
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch
import time
from multiprocessing import Pool
import multiprocessing
import os
# make geant quiet
......@@ -79,7 +79,7 @@ class Parameters(object):
assert len(parameters) == self.N, "Wrong number of parameters!"
self.parameters = parameters
def get_descriptor(self, parameters : np.ndarray = None):
def get_descriptor(self, parameters : np.ndarray = None, desc_class = None):
'''
Returns a GeometryDescriptor from the given parameters.
Strictly takes a dict as input to ensure that the parameter names are consistent.
......@@ -89,7 +89,7 @@ class Parameters(object):
# get dict representation
parameters = self.mapper.map_to_dict(parameters) #takes a few ms, but worth it for clarity
cw = GeometryDescriptor()
cw = desc_class()
for i in range(self.N//2):
cw.addLayer(parameters['thickness_absorber_'+str(i)],"G4_Pb", False)
cw.addLayer(parameters['thickness_scintillator_'+str(i)],"G4_POLYSTYRENE",True,1)
......@@ -241,23 +241,73 @@ class Generator(object):
#create para list
para_list = [(self.random_parameters(parameters), i) for i in range(self.n_vars)]
#run self.generate_single in parallel
with Pool() as p:
dfs = p.map(self.generate_single, para_list)
#custom made process handling because of Geant4 hang-ups
processes = []
queues = []
results = []
for i in range(self.n_vars):
queue = multiprocessing.Queue()
p = multiprocessing.Process(target=self.generate_single, args=(self.random_parameters(parameters), i, queue))
time.sleep(.1)
p.start()
processes.append(p)
queues.append(queue)
done = [False for i in range(self.n_vars)]
done_after = [-1. for _ in range(self.n_vars)]
start_time = time.time()
avg_time_per_job = 1e6
while not all(done):
sth_happened = False
for i, process in enumerate(processes):
queue = queues[i]
try:
# Wait for the process to put data in the queue
result = queue.get(timeout=.1)
results.append(result)
time.sleep(0.1)
process.terminate() #just kill it because of geant being weird
done[i] = True
done_after[i] = time.time() - start_time
sth_happened = True
except multiprocessing.queues.Empty:
pass
if any(done):
avg_time_per_job = np.mean(np.array(done_after)[np.array(done_after) > 0.])
#if one job takes longer than 10 times the average, kill it
if not done[i] and time.time() - start_time > 10. * avg_time_per_job:
print('killing job', i)
process.terminate()
done[i] = True
sth_happened = True
if sth_happened:
print('done', np.sum(np.array(done)), done, done_after)
time.sleep(.1)
#dangerous but keep it for now
os.system("rm -f *_t0.pkl")#no problem with race here are not needed
os.system("rm -f *_t0.root")#no problem with race here are not needed
#concatenate the dataframes
return pd.concat(dfs)
#only select results with valid done_after time
results = [results[i] for i in range(len(results)) if done_after[i] > 0.]
return pd.concat(results)
def generate_single(self, i_in):
def generate_single(self, parameters, index, output_queue):
'''
generates a dataframe with one set of parameters.
this should probably be moved to the Parameters class, given that there are explicit mapping dependencies
'''
# sleep index ms
parameters, index = i_in
time.sleep(index/1000)
cw = self.par_desc.get_descriptor(parameters)
# import only in fork
time.sleep(index/10) #this is because the random seed in geant is based on the time in ms
from G4Calo import G4System, GeometryDescriptor
cw = self.par_desc.get_descriptor(parameters, GeometryDescriptor)
G4System.init(cw)
#make it more quiet; doesn't seem to work...
......@@ -274,15 +324,17 @@ class Generator(object):
df2 = G4System.run_batch(self.n_events_per_var, 'gamma', .1, 10) #DEBUG SET TO GAMMA FOR TESTING
df2 = df2.assign(true_pid=np.array( len(df2) * [2.11], dtype='float32')) #pid is scaled by 100
# just a dangerous workaroung
del cw
import os
os.system("rm *_t0.pkl")#no problem with race here are not needed
# concatenate the data frames
df = pd.concat([df, df2])
tiled_pars = len(df)*[np.array(parameters) ] # np.tile(pars[np.newaxis, ...], [len(df), 1])
# add the array as columns to the dataframe
df = df.assign(detector_parameters=tiled_pars)
return df
output_queue.put(df)
print('done with generating #', index)
del G4System # this is important to force a reload and make it strictly local
return True
......@@ -304,6 +356,7 @@ class Generator(object):
# infer the lengths
df = self.generate(*args, **kwargs)
print('done with generating')
# create simple array data out of dataframe (will sit in memory, can be improved at some point)
# this is the input to the data loader
......
......@@ -34,7 +34,6 @@ class Optimizer(object):
def to(self, device):
self.device = device
self.surrogate_model.to(device)
# self.reconstruction_model.to(device) #not needed
def other_constraints(self, dataset):
# keep the size of the detector within 3m
......@@ -63,7 +62,9 @@ class Optimizer(object):
# create a dataloader for the dataset, this is the surrogate dataloader
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
# loop over the batches
mean_loss = 0
for epoch in range(n_epochs):
mean_loss = 0 #only use last epoch
for batch_idx, (_, true_inputs, true_context, reco_result) in enumerate(data_loader):
true_inputs = true_inputs.to(self.device)
true_context = true_context.to(self.device)
......@@ -79,20 +80,24 @@ class Optimizer(object):
loss.backward()
#print('gradient',self.detector_parameters.grad, 'should have', self.detector_parameters.requires_grad)
self.optimizer.step()
mean_loss += loss.item()
#check if the parameters are still local otherwise stop
if not self.generator.is_local(self.detector_parameters.detach().cpu().numpy()):
print("Parameters not local anymore. Stopping.")
self.clamp_parameters()
return self.detector_parameters.detach().cpu().numpy(), False
return self.detector_parameters.detach().cpu().numpy(), False, mean_loss / (batch_idx+1)
if batch_idx % 20 == 0:
print('current parameters: ', self.detector_parameters.cpu().numpy())
print('current parameters: ', self.detector_parameters.detach().cpu().numpy())
print('Optimizer Epoch: {} \tLoss: {:.8f}'.format(
epoch, loss.item()))
self.clamp_parameters()
return self.detector_parameters.detach().cpu().numpy(), True
mean_loss /= len(data_loader)
return self.detector_parameters.detach().cpu().numpy(), True, mean_loss
def get_optimum(self):
return self.detector_parameters.detach().cpu().numpy()
......
......@@ -181,16 +181,17 @@ class Surrogate(torch.nn.Module):
epoch, loss.item()))
self.eval()
def apply_model_in_batches(self, dataset, batch_size):
def apply_model_in_batches(self, dataset, batch_size, oversample=1):
self.to()
self.eval()
# create a dataloader for the dataset
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
# create a tensor to store the results
results = torch.zeros(len(dataset), self.n_reco_parameters)
reco = torch.zeros(len(dataset), self.n_reco_parameters)
true = torch.zeros(len(dataset), self.n_reco_parameters)
results = torch.zeros(oversample * len(dataset), self.n_reco_parameters)
reco = torch.zeros(oversample * len(dataset), self.n_reco_parameters)
true = torch.zeros(oversample * len(dataset), self.n_reco_parameters)
for i_o in range(oversample):
# loop over the batches
for batch_idx, (detector_parameters, true_inputs, true_context, reco_inputs) in enumerate(data_loader): # the reco is not needed as it is generated here
detector_parameters = detector_parameters.to(self.device)
......@@ -200,8 +201,10 @@ class Surrogate(torch.nn.Module):
# apply the model
reco_surrogate = self.sample_forward(detector_parameters, true_inputs, true_context)
# store the results
results[batch_idx * batch_size : (batch_idx + 1) * batch_size] = reco_surrogate
reco[batch_idx * batch_size : (batch_idx + 1) * batch_size] = reco_surrogate
true[batch_idx * batch_size : (batch_idx + 1) * batch_size] = reco_inputs
start_inject_index = i_o * len(dataset) + batch_idx * batch_size
end_inject_index = i_o * len(dataset) + (batch_idx + 1) * batch_size
results[start_inject_index : end_inject_index] = reco_surrogate
reco [start_inject_index : end_inject_index] = reco_inputs
true [start_inject_index : end_inject_index] = true_inputs
return results, reco, true
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment