Skip to content
Snippets Groups Projects
Commit 26b8b7f5 authored by Lars Sowa's avatar Lars Sowa
Browse files

naming & misc

parent c433528e
Branches
No related tags found
1 merge request!9Flex tensor dev
Pipeline #3501 failed
Source diff could not be displayed: it is too large. Options to address this: view the blob.
%% Cell type:markdown id: tags:
# Testing the TaylorAnalysis Framework
In this notebook we will demonstrate the correctness of the taylorcoefficients computed by the TaylorAnalysis based on a simple example.
%% Cell type:code id: tags:
``` python
import tayloranalysis as ta
import torch
```
%% Cell type:markdown id: tags:
First setup a simple polynomial function
$$f(x,y) = xy+xy^2+y^3$$
Since the TaylorAnalysis uses `torch`, we'll define the polynomial within a `torch.nn.Module`.
%% Cell type:code id: tags:
``` python
class Polynom(torch.nn.Module):
def __init__(self):
torch.nn.Module.__init__(self)
def forward(self, point):
y = point[:,1]
x = point[:,0]
return x*y + x * y**2 + y**3
```
%% Cell type:markdown id: tags:
Initialize model and extend it with the TaylorAnalysis module.
%% Cell type:code id: tags:
``` python
model = Polynom()
model = ta.extend_model(model)
```
%% Cell type:markdown id: tags:
Remember the taylor series for a function $f(x, y)$ at point $a$:
$$f(x, y) = \sum_{m=0}^{\infty} \sum_{n=0}^{\infty} \frac{1}{m!n!} \left[\frac{\partial^{m+n}f}{\partial x^m \partial y^n}(x, y)(x-a)^m(y-b)^n\right]$$
with the derivative orders $m$, $n$ w.r.t. the input variables $x$ and $y$.
The corresponding taylor coefficients are
$$\mathrm{TC}_\mathrm{m,n}(x,y) = \frac{1}{m!n!} \frac{\partial^{m+n}f}{\partial x^m \partial y^n}(x, y)$$
%% Cell type:markdown id: tags:
Now choose some taylorcoefficients we want to look at and define a point to expand around:
%% Cell type:code id: tags:
``` python
combinations = [(0,), (0,0), (0,1,1), (1,), (0,1), (0,0,1), (1,1,1)]
combinations = [
(0,),
(1,),
(0, 0),
(0, 1),
(1, 0),
(1, 1),
(0, 0, 0),
(0, 0, 1),
(0, 1, 0),
(0, 1, 1),
(1, 0, 0),
(1, 0, 1),
(1, 1, 0),
(1, 1, 1),
]
point = torch.tensor([[3,2]]).float()
```
%% Cell type:markdown id: tags:
Loop over combinations and determine the corresponding taylorcoefficients.
As reduction method we will use the default identity function.
%% Cell type:code id: tags:
``` python
for combi in combinations:
# compute taylorcoefficient based on point
tc = model.get_tc(
"point",
forward_kwargs={"point": point},
index_list=[combi],
tc_idx_list=[combi],
)
tc = list(tc.values())[0]
# determine derivation orders
xorder = combi.count(0)
yorder = combi.count(1)
# print result
print(f"TC{chr(8320 + xorder)}{chr(8320 + yorder)}({int(point[0][0])},{int(point[0][1])}) = {float(tc)}")
```
%% Output
TC₁₀(3,2) = 6.0
TC₀₁(3,2) = 27.0
TC₂₀(3,2) = 0.0
TC₁₁(3,2) = 5.0
TC₁₁(3,2) = 5.0
TC₀₂(3,2) = 9.0
TC₃₀(3,2) = 0.0
TC₂₁(3,2) = 0.0
TC₂₁(3,2) = 0.0
TC₁₂(3,2) = 1.0
TC₂₁(3,2) = 0.0
TC₁₂(3,2) = 1.0
TC₁₂(3,2) = 1.0
TC₀₃(3,2) = 1.0
%% Cell type:markdown id: tags:
This can be manually compared with the corresponding derivations of
$$f(x,y) = xy+xy^2+y^3$$
Therefore holds for TC₁₀(3,2):
$$
\begin{align}
\mathrm{TC}_\mathrm{1,0}(3,2) &= \frac{1}{1!0!} \frac{\partial^{1+0}f}{\partial x^1 \partial y^0}(x, y)|_{x=3, y=2} \\
&= \frac{\partial}{\partial x}f(x, y) |_{x=3, y=2} \\
&= \frac{\partial}{\partial x} \left(xy+xy^2+y^3\right)|_{x=3, y=2} \\
&= y+y^2|_{x=3, y=2} \\
&= 6
\end{align}
$$
... and same for remaining.
......
......@@ -10,7 +10,7 @@ from collections.abc import Sequence
# Helpers
def get_factorial_factors(*indices: int) -> float:
def _get_factorial_factors(*indices: int) -> float:
"""Function to compute the factorial factors for the taylorcoefficients: Prod_n^len(indices) 1/n!
Returns:
......@@ -23,7 +23,7 @@ def get_factorial_factors(*indices: int) -> float:
return 1.0 / factor
def get_summation_indices(shape: torch.Tensor.shape, drop_axis) -> Tuple[int, ...]:
def _get_summation_indices(shape: torch.Tensor.shape, drop_axis) -> Tuple[int, ...]:
"""Function to get the summation indices for the gradient.
Args:
......@@ -39,7 +39,7 @@ def get_summation_indices(shape: torch.Tensor.shape, drop_axis) -> Tuple[int, ..
return tuple(idx)
def get_slice(shape: torch.Tensor.shape, index: int, axis: int) -> Tuple[slice, ...]:
def _get_slice(shape: torch.Tensor.shape, index: int, axis: int) -> Tuple[slice, ...]:
"""Function to get the slice for a tensor.
Args:
......@@ -67,42 +67,9 @@ def identity(x: torch.Tensor) -> torch.Tensor:
return x
class CustomForwardDict(dict):
# custom dict wrapper to dynamically access the derivation target
def __init__(self, key_to_tctensor: str, target_idx: int, *args, **kwargs):
super().__init__(*args, **kwargs)
self._key = key_to_tctensor
self._idx = target_idx
@property
def tctensor(self):
"""Property to easily access the derivation target.
Raises:
ValueError: If _idx is not an int
NotImplementedError: Only list and tensor are supported as derivation targets.
Returns:
torch.Tensor: Derivation target.
"""
# if the key is a list/tuple, return the element (tensor) at the given index
if isinstance(self.__getitem__(self._key), Sequence):
if not isinstance(self._idx, int):
raise ValueError("Target index must be an integer!")
return self.__getitem__(self._key)[self._idx]
# if the key is a tensor, return the tensor
elif isinstance(self.__getitem__(self._key), torch.Tensor):
return self.__getitem__(self._key)
else:
raise NotImplementedError(
"Only list and tensor are supported as derivation targets."
)
def _node_selection(
pred: torch.Tensor,
output_node: int,
selected_output_node: int,
eval_max_output_node_only: bool,
) -> torch.Tensor:
"""Method to select the nodes for which the taylorcoefficients should be computed.
......@@ -110,7 +77,7 @@ def _node_selection(
Args:
pred (torch.Tensor): X data of shape (batch, features).
output_node (Optional[Int]): Node selection for evaluation. If None, all nodes are selected. If int, only the selected node is selected. If tuple, only the selected nodes are selected.
eval_max_output_node_only (Bool): If True, only the node with the highest value is selected.
eval_max_selected_output_node_only (Bool): If True, only the node with the highest value is selected.
Returns:
torch.Tensor: Selected taylorcoefficients (batch, features)
......@@ -132,15 +99,48 @@ def _node_selection(
pred = pred * pred_cat
# second step: class selection
# no selection is performed when output_node == "all"
if isinstance(output_node, (int, tuple)): # i.e. 0, (0, 1)
pred = pred[:, output_node]
# no selection is performed when selected_output_node == "all"
if isinstance(selected_output_node, (int, tuple)): # i.e. 0, (0, 1)
pred = pred[:, selected_output_node]
# sum up everything
pred = pred.sum()
return pred
class CustomForwardDict(dict):
# custom dict wrapper to dynamically access the tctensor
def __init__(
self, forward_kwargs_tctensor_key: str, idx_to_tctensor: int, *args, **kwargs
):
super().__init__(*args, **kwargs)
self._key = forward_kwargs_tctensor_key
self._idx = idx_to_tctensor
@property
def tctensor(self):
"""Property to easily access the tctensor.
Raises:
ValueError: If _idx is not an int
NotImplementedError: Only list and tensor are supported as tctensor.
Returns:
torch.Tensor: tctensor.
"""
# if the key is a list/tuple, return the element (tensor) at the given index
if isinstance(self[self._key], Sequence):
if not isinstance(self._idx, int):
raise ValueError("Target index must be an integer!")
return self[self._key][self._idx]
# if the key is a tensor, return the tensor
elif isinstance(self[self._key], torch.Tensor):
return self[self._key]
else:
raise NotImplementedError("Only list and tensor are supported as tctensor.")
##############################################
# main class
......@@ -154,7 +154,7 @@ class BaseTaylorAnalysis(object):
def _first_order(
self,
forward_kwargs: CustomForwardDict,
features_axis: int,
tctensor_features_axis: int,
pred: torch.Tensor,
ind_i: int,
) -> torch.Tensor:
......@@ -171,13 +171,13 @@ class BaseTaylorAnalysis(object):
# first order grads
gradients = grad(pred, forward_kwargs.tctensor)[0]
return gradients[get_slice(gradients.shape, ind_i, features_axis)]
return gradients[_get_slice(gradients.shape, ind_i, tctensor_features_axis)]
@torch.enable_grad()
def _second_order(
self,
forward_kwargs: CustomForwardDict,
features_axis: int,
tctensor_features_axis: int,
pred: torch.Tensor,
ind_i: int,
ind_j: int,
......@@ -196,19 +196,19 @@ class BaseTaylorAnalysis(object):
# first order gradients
gradients = grad(pred, forward_kwargs.tctensor, create_graph=True)[0]
gradients = gradients.sum(
axis=get_summation_indices(gradients.shape, features_axis)
axis=_get_summation_indices(gradients.shape, tctensor_features_axis)
)
# second order gradients
gradients = grad(gradients[ind_i], forward_kwargs.tctensor)[0]
# factor for second order taylor terms
gradients *= get_factorial_factors(ind_i, ind_j)
return gradients[get_slice(gradients.shape, ind_j, features_axis)]
gradients *= _get_factorial_factors(ind_i, ind_j)
return gradients[_get_slice(gradients.shape, ind_j, tctensor_features_axis)]
@torch.enable_grad()
def _third_order(
self,
forward_kwargs: CustomForwardDict,
features_axis: int,
tctensor_features_axis: int,
pred: torch.Tensor,
ind_i: int,
ind_j: int,
......@@ -229,37 +229,37 @@ class BaseTaylorAnalysis(object):
# first order gradients
gradients = grad(pred, forward_kwargs.tctensor, create_graph=True)[0]
gradients = gradients.sum(
axis=get_summation_indices(gradients.shape, features_axis)
axis=_get_summation_indices(gradients.shape, tctensor_features_axis)
)
# second order gradients
gradients = grad(gradients[ind_i], forward_kwargs.tctensor, create_graph=True)[
0
]
gradients = gradients.sum(
axis=get_summation_indices(gradients.shape, features_axis)
axis=_get_summation_indices(gradients.shape, tctensor_features_axis)
)
# third order gradients
gradients = grad(gradients[ind_j], forward_kwargs.tctensor)[0]
# factor for all third order taylor terms
gradients *= get_factorial_factors(ind_i, ind_j, ind_k)
return gradients[get_slice(gradients.shape, ind_k, features_axis)]
gradients *= _get_factorial_factors(ind_i, ind_j, ind_k)
return gradients[_get_slice(gradients.shape, ind_k, tctensor_features_axis)]
def _calculate_tc(
self,
forward_kwargs: CustomForwardDict,
output_node: int,
selected_output_node: int,
eval_max_output_node_only: bool,
features_axis: int,
keep_model_output_idx: int,
tctensor_features_axis: int,
selected_model_output_idx: int,
*indices,
) -> torch.Tensor:
"""Method to calculate the taylorcoefficients based on the indices.
Args:
forward_kwargs (CustomForwardDict): (Custom) Dictionary with additional forward arguments
output_node (Int): Node selection for evaluation.
selected_output_node (Int): Node selection for evaluation.
eval_max_output_node_only (Bool): If True, only the node with the highest value is selected.
features_axis (int, optional): Dimension containing features in tensor forward_kwargs.tctensor. Defaults to -1.
tctensor_features_axis (int, optional): Dimension containing features in tctensor given in forward_kwargs. Defaults to -1.
Raises:
NotImplementedError: Only first, second and third order taylorcoefficients are supported.
......@@ -276,19 +276,19 @@ class BaseTaylorAnalysis(object):
# select relevant predictions
if isinstance(pred, Sequence):
if keep_model_output_idx is None:
if selected_model_output_idx is None:
raise ValueError(
"keep_model_output_idx must be set since model output is a sequence!"
"selected_model_output_idx must be set since model output is a sequence!"
)
pred = pred[keep_model_output_idx]
pred = _node_selection(pred, output_node, eval_max_output_node_only)
pred = pred[selected_model_output_idx]
pred = _node_selection(pred, selected_output_node, eval_max_output_node_only)
# compute TCs
functions = [self._first_order, self._second_order, self._third_order]
try:
return functions[len(indices) - 1](
forward_kwargs,
features_axis,
tctensor_features_axis,
pred,
*indices,
)
......@@ -299,57 +299,57 @@ class BaseTaylorAnalysis(object):
def get_tc(
self,
key_to_tctensor: str,
forward_kwargs_tctensor_key: str,
forward_kwargs: Dict[str, Any],
idx_list: List[Tuple[int, ...]],
tc_idx_list: List[Tuple[int, ...]],
*,
output_node: Optional[Union[int, Tuple[int], None]] = None,
selected_output_node: Optional[Union[int, Tuple[int]]] = None,
eval_max_output_node_only: Optional[bool] = True,
reduce_func: Optional[Callable] = identity,
features_axis: int = -1,
idx_to_tctensor: Union[int, None] = None,
keep_model_output_idx: Union[int, None] = None,
tctensor_features_axis: int = -1,
additional_idx_to_tctensor: Optional[int] = None,
selected_model_output_idx: Optional[int] = None,
) -> Dict[Tuple[int, ...], Any]:
"""Function to handle multiple indices and return the taylorcoefficients as a dictionary: to be used by the user.
Args:
key_to_tctensor (str): Key to input tensor in forward_kwargs. Based on this tensor the taylorcoefficients are computed.
forward_kwargs_tctensor_key (str): Key to input tensor in forward_kwargs. Based on this tensor the taylorcoefficients are computed.
forward_kwargs (Union[None, Dict[str, Any]]): Dictionary with forward arguments
idx_list (List[Tuple[int, ...]]): List of indices for which the taylorcoefficients should be computed.
output_node (Int, optional): Node selection for evaluation. Defaults to None.
tc_idx_list (List[Tuple[int, ...]]): List of indices for which the taylorcoefficients should be computed based on the tensor selected by forward_kwargs_tctensor_key (and additional_idx_to_tctensor).
selected_output_node (Int, optional): Node selection for evaluation. Defaults to None.
eval_max_output_node_only (Bool, optional): If True, only the node with the highest value is selected. Defaults to True.
reduce_func (Callable, optional): Function to reduce the taylorcoefficients. Defaults to identity.
features_axis (int, optional): Dimension containing features in tensor forward_kwargs.tctensor. Defaults to -1.
idx_to_tctensor (Union[int, None], optional): Index of the target tensor if forward_kwargs[key_to_tctensor] is a list. Defaults to None.
keep_model_output_idx (int, optional): Index of the model output if its output is a sequence. Defaults to 0.
tctensor_features_axis (int, optional): Dimension containing features in tctensor given in forward_kwargs. Defaults to -1.
additional_idx_to_tctensor (int, optional): Index of the tctensor if forward_kwargs[forward_kwargs_tctensor_key] is a list. Defaults to None.
selected_model_output_idx (int, optional): Index of the model output if its output is a sequence. Defaults to 0.
Raises:
ValueError: idx_list must be a List of tuples!
ValueError: tc_idx_list must be a List of tuples!
Returns:
Dict: Dictionary with taylorcoefficients. Values are set by the user within the reduce function. Keys are the indices (tuple).
"""
forward_kwargs = CustomForwardDict(
key_to_tctensor, idx_to_tctensor, forward_kwargs
forward_kwargs_tctensor_key, additional_idx_to_tctensor, forward_kwargs
)
assert isinstance(reduce_func, Callable), "Reduce function must be callable!"
assert isinstance(
output_node, (int, tuple, type(None))
selected_output_node, (int, tuple, type(None))
), "Node must be int, tuple or None!"
# loop over all tc to compute
output = {}
for ind in idx_list:
for ind in tc_idx_list:
if not isinstance(ind, tuple):
raise ValueError("idx_list must be a list of tuples!")
raise ValueError("tc_idx_list must be a list of tuples!")
# get TCs
out = self._calculate_tc(
forward_kwargs,
output_node,
selected_output_node,
eval_max_output_node_only,
features_axis,
keep_model_output_idx,
tctensor_features_axis,
selected_model_output_idx,
*ind,
)
# apply reduce function
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment