From 222f3ec8f1f6a81c385ec43c2aa01744f810e99a Mon Sep 17 00:00:00 2001
From: Onno Kampman <onno.kampman@moht.com.sg>
Date: Fri, 15 Mar 2024 15:49:37 +0800
Subject: [PATCH 01/11] ignore specific notebooks

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 5985c7d..08ff2ae 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 .DS_Store
 
-notebooks/
+notebooks/computational_cost/
+notebooks/studies/
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

From dcd6f83fa505f889114d32c32a3d9a9b579a0396 Mon Sep 17 00:00:00 2001
From: Onno Kampman <onno.kampman@moht.com.sg>
Date: Fri, 15 Mar 2024 16:57:23 +0800
Subject: [PATCH 02/11] prepare for factored WP implementation

---
 fcest/models/likelihoods.py     | 114 +++++++++++++++++-----
 fcest/models/wishart_process.py | 166 +++++++++++++++++++-------------
 2 files changed, 185 insertions(+), 95 deletions(-)

diff --git a/fcest/models/likelihoods.py b/fcest/models/likelihoods.py
index af70cf8..e641999 100644
--- a/fcest/models/likelihoods.py
+++ b/fcest/models/likelihoods.py
@@ -26,30 +26,69 @@
 )
 
 
-class WishartProcessLikelihood(MonteCarloLikelihood):
+class WishartProcessLikelihoodBase(MonteCarloLikelihood):
+    """
+    Class for Wishart process likelihoods.
+    """
+
+    def __init__(
+        self,
+        D: int,
+        nu: int = None,
+        num_mc_samples: int = 2,
+        num_factors: int = None,
+    ):
+        """
+        Initialize the base Wishart process likelihood.
+
+        Parameters
+        ----------
+        :param D:
+            The number of time series.
+        :param nu:
+            Degrees of freedom.
+        :param num_mc_samples:
+            Number of Monte Carlo samples used to approximate gradients (S).
+        """
+        if num_factors is not None:
+            latent_dim = num_factors * nu
+        else:
+            latent_dim = D * nu
+        super().__init__(
+            input_dim=1,
+            latent_dim=latent_dim,
+            observation_dim=D,
+        )
+        self.D = D
+        self.nu = nu
+        self.num_mc_samples = num_mc_samples
+        self.num_factors = num_factors
+
+
+class WishartProcessLikelihood(WishartProcessLikelihoodBase):
     """
     Class for Wishart process likelihoods.
     It specifies an observation model connecting the latent functions ('F') to the data ('Y').
     """
 
     def __init__(
-            self,
-            D: int,
-            nu: int = None,
-            num_mc_samples: int = 2,
-            A_scale_matrix_option: str = 'train_full_matrix',
-            train_additive_noise: bool = False,
-            additive_noise_matrix_init: float = 0.01,
-            verbose: bool = True,
+        self,
+        D: int,
+        nu: int = None,
+        num_mc_samples: int = 2,
+        A_scale_matrix_option: str = 'train_full_matrix',
+        train_additive_noise: bool = False,
+        additive_noise_matrix_init: float = 0.01,
+        verbose: bool = True,
     ) -> None:
         """
         Initialize the Wishart process likelihood.
 
         Parameters
         ----------
-        :param D: 
+        :param D:
             The number of time series.
-        :param nu: 
+        :param nu:
             Degrees of freedom.
         :param num_mc_samples:
             Number of Monte Carlo samples used to approximate gradients (S).
@@ -66,13 +105,10 @@ def __init__(
         if nu < D:
             raise Exception("Wishart Degrees of Freedom must be >= D.")
         super().__init__(
-            input_dim=1,
-            latent_dim=D * nu,
-            observation_dim=D,
+            D=D,
+            nu=nu,
+            num_mc_samples=num_mc_samples,
         )
-        self.D = D
-        self.nu = nu
-        self.num_mc_samples = num_mc_samples
         self.A_scale_matrix = self._set_A_scale_matrix(option=A_scale_matrix_option)  # (D, D)
 
         # The additive noise matrix must have positive diagonal values, which this softplus construction guarantees.
@@ -91,11 +127,11 @@ def __init__(
             print('initial additive part: ', self.additive_part)
 
     def variational_expectations(
-            self,
-            x_data: np.array,
-            f_mean: tf.Tensor,
-            f_variance: tf.Tensor,
-            y_data: np.array,
+        self,
+        x_data: np.array,
+        f_mean: tf.Tensor,
+        f_variance: tf.Tensor,
+        y_data: np.array,
     ) -> tf.Tensor:
         """
         This returns the expectation of log likelihood part of the ELBO.
@@ -205,7 +241,10 @@ def _log_prob(
         log_likel_p = tf.math.reduce_mean(log_likel_p, axis=0)  # mean over Monte Carlo samples, (N, )
         return log_likel_p
 
-    def _set_A_scale_matrix(self, option: str = 'identity') -> tf.Tensor:
+    def _set_A_scale_matrix(
+        self,
+        option: str = 'identity',
+    ) -> tf.Tensor:
         """
         A (the Cholesky factor of scale matrix V) represents the mean of estimates.
 
@@ -250,7 +289,10 @@ def _set_A_scale_matrix(self, option: str = 'identity') -> tf.Tensor:
             raise NotImplementedError(f"Option '{option:s}' for 'A_scale_matrix' not recognized.")
         return A_scale_matrix
 
-    def _add_diagonal_additive_noise(self, cov_matrix):
+    def _add_diagonal_additive_noise(
+        self,
+        cov_matrix,
+    ):
         """
         Add some noise, either fixed or trained.
 
@@ -264,7 +306,27 @@ def _add_diagonal_additive_noise(self, cov_matrix):
         )
 
 
-class FactoredWishartProcessLikelihood(MonteCarloLikelihood):
+class FactoredWishartProcessLikelihood(WishartProcessLikelihoodBase):
+    """
+    Class for Factored Wishart process likelihoods.
+    """
+
+    def __init__(
+        self,
+        D: int,
+        nu: int = None,
+        num_mc_samples: int = 2,
+        num_factors: int = None,
+        A_scale_matrix_option: str = 'train_full_matrix',
+        train_additive_noise: bool = False,
+        additive_noise_matrix_init: float = 0.01,
+        verbose: bool = True,
+    ):
+        nu = num_factors if nu is None else nu
+        super().__init__(
+            D=D,
+            nu=nu,
+            num_mc_samples=num_mc_samples,
+        )
 
-    def __init__(self):
         raise NotImplementedError("Factorized Wishart process not implemented yet.")
diff --git a/fcest/models/wishart_process.py b/fcest/models/wishart_process.py
index cfc773a..633acb8 100644
--- a/fcest/models/wishart_process.py
+++ b/fcest/models/wishart_process.py
@@ -37,7 +37,6 @@ class VariationalWishartProcess(models.vgp.VGP):
     Base class of the variational Wishart process (VWP) model.
     Most of the work will be done by `gpflow.models.vgp.VGP`.
 
-    TODO: add option for minibatch training
     TODO: shall we convert all to float32 instead of float64 to speed up computation?
         from gpflow.config import default_float
         from gpflow.utilities import to_default_float
@@ -48,17 +47,18 @@ class VariationalWishartProcess(models.vgp.VGP):
     """
 
     def __init__(
-            self,
-            x_observed: np.array,
-            y_observed: np.array,
-            nu: int = None,
-            kernel: Kernel = None,
-            num_mc_samples: int = 5,
-            A_scale_matrix_option: str = 'train_full_matrix',
-            train_additive_noise: bool = True,
-            kernel_lengthscale_init: float = 0.3,
-            q_sqrt_init: float = 0.001,
-            num_factors: int = None,
+        self,
+        x_observed: np.array,
+        y_observed: np.array,
+        nu: int = None,
+        kernel: Kernel = None,
+        num_mc_samples: int = 5,
+        A_scale_matrix_option: str = 'train_full_matrix',
+        train_additive_noise: bool = True,
+        kernel_lengthscale_init: float = 0.3,
+        q_sqrt_init: float = 0.001,
+        num_factors: int = None,
+        minibatch_size: int = None,
     ) -> None:
         """
         Initialize Variational Wishart Process (VWP) model.
@@ -85,13 +85,14 @@ def __init__(
         :param q_sqrt_init:
             Empirical results suggest a value of 0.001 is slightly better than 0.01.
         :param num_factors:
+            Number of factors to use in the factored model.
+            If None, the non-factored model will be instantiated.
+        :param minibatch_size:
+            TODO: add option for minibatch training
         """
         self.D = y_observed.shape[1]
         logging.info(f"Found {self.D:d} time series (D = {self.D:d}).")
 
-        if num_factors is not None:
-            raise NotImplementedError("Factorized Wishart process not implemented yet.")
-
         if nu is None:
             nu = self.D
         self.nu = nu
@@ -99,13 +100,23 @@ def __init__(
         if kernel is None:
             kernel = gpflow.kernels.Matern52()
 
-        likel = WishartProcessLikelihood(
-            D=self.D,
-            nu=nu,
-            num_mc_samples=num_mc_samples,
-            A_scale_matrix_option=A_scale_matrix_option,
-            train_additive_noise=train_additive_noise,
-        )
+        if num_factors is not None:
+            likel = FactoredWishartProcessLikelihood(
+                D=self.D,
+                nu=nu,
+                num_mc_samples=num_mc_samples,
+                A_scale_matrix_option=A_scale_matrix_option,
+                train_additive_noise=train_additive_noise,
+                num_factors=num_factors,
+            )
+        else:
+            likel = WishartProcessLikelihood(
+                D=self.D,
+                nu=nu,
+                num_mc_samples=num_mc_samples,
+                A_scale_matrix_option=A_scale_matrix_option,
+                train_additive_noise=train_additive_noise,
+            )
         super().__init__(
             data=(x_observed, y_observed),
             kernel=kernel,
@@ -118,10 +129,10 @@ def __init__(
         )
 
     def predict_cov(
-            self,
-            x_new: np.array,
-            num_mc_samples: int = 300,
-    ) -> (tf.Tensor, tf.Tensor):
+        self,
+        x_new: np.array,
+        num_mc_samples: int = 300,
+    ) -> tuple[tf.Tensor, tf.Tensor]:
         """
         The main attribute to predict covariance matrices at any point in time.
 
@@ -141,10 +152,10 @@ def predict_cov(
         return cov_mean, cov_stddev
 
     def predict_corr(
-            self,
-            x_new: np.array,
-            num_mc_samples: int = 300,
-    ) -> (tf.Tensor, tf.Tensor):
+        self,
+        x_new: np.array,
+        num_mc_samples: int = 300,
+    ) -> tuple[tf.Tensor, tf.Tensor]:
         """
         The main attribute to predict correlation matrices at any point in time.
 
@@ -312,6 +323,8 @@ class SparseVariationalWishartProcess(models.svgp.SVGP):
     """
     Base class of the sparse variational Wishart process (SVWP) model.
     Most of the work will be done by `gpflow.models.svgp.SVGP`.
+    This is essentially a wrapper around the SVGP model class in GPflow.
+
     This sparse implementation reduces computational cost if we have large N, but is not likely to improve performance.
     However, the location of the inducing points Z may be interesting by themselves.
 
@@ -319,18 +332,19 @@ class SparseVariationalWishartProcess(models.svgp.SVGP):
     """
 
     def __init__(
-            self,
-            D: int,
-            Z: np.array,
-            nu: int = None,
-            kernel: Kernel = gpflow.kernels.Matern52(),
-            num_mc_samples: int = 5,
-            A_scale_matrix_option: str = 'train_full_matrix',
-            train_additive_noise: bool = True,
-            kernel_lengthscale_init: float = 0.3,
-            q_sqrt_init: float = 0.001,
-            num_factors: int = None,
-            verbose: bool = True,
+        self,
+        D: int,
+        Z: np.array,
+        nu: int = None,
+        kernel: Kernel = gpflow.kernels.Matern52(),
+        num_mc_samples: int = 5,
+        A_scale_matrix_option: str = 'train_full_matrix',
+        train_additive_noise: bool = True,
+        kernel_lengthscale_init: float = 0.3,
+        q_sqrt_init: float = 0.001,
+        num_factors: int = None,
+        minibatch_size: int = None,
+        verbose: bool = True,
     ) -> None:
         """
         Initialize Sparse Variational Wishart Process (SVWP) model.
@@ -355,6 +369,8 @@ def __init__(
         :param num_factors:
             Number of factors to use in the factored model.
             If None, the non-factored model will be instantiated.
+        :param minibatch_size:
+            TODO: add option for minibatch training
         :param verbose:
         """
         self.D = D
@@ -367,7 +383,15 @@ def __init__(
         self.nu = nu
 
         if num_factors is not None:
-            likel = FactoredWishartProcessLikelihood()
+            likel = FactoredWishartProcessLikelihood(
+                D=self.D,
+                nu=nu,
+                num_mc_samples=num_mc_samples,
+                A_scale_matrix_option=A_scale_matrix_option,
+                train_additive_noise=train_additive_noise,
+                num_factors=num_factors,
+                verbose=verbose,
+            )
         else:
             likel = WishartProcessLikelihood(
                 D=self.D,
@@ -390,10 +414,10 @@ def __init__(
         )
 
     def predict_cov(
-            self,
-            x_new: np.array,
-            num_mc_samples: int = 300,
-    ) -> (tf.Tensor, tf.Tensor):
+        self,
+        x_new: np.array,
+        num_mc_samples: int = 300,
+    ) -> tuple[tf.Tensor, tf.Tensor]:
         """
         The main attribute to predict covariance matrices at any point in time.
 
@@ -415,29 +439,29 @@ def predict_cov(
         return cov_mean, cov_stddev
 
     def predict_cov_samples(
-            self,
-            x_new: np.array,
-            num_mc_samples: int = 300,
+        self,
+        x_new: np.array,
+        num_mc_samples: int = 300,
     ) -> tf.Tensor:
         """
-        TODO: we don't use this
-
         The main attribute to predict covariance matrices at any point in time.
 
         Parameters
         ----------
         :param x_new:
+            The locations at which to predict.
         :param num_mc_samples:
+            Number of Monte Carlo samples.
         :return:
         """
         cov_samples = self._get_cov_samples(x_new, num_mc_samples)  # (S_new, N_new, D, D)
         return cov_samples
 
     def predict_corr(
-            self,
-            x_new: np.array,
-            num_mc_samples: int = 300,
-    ) -> (tf.Tensor, tf.Tensor):
+        self,
+        x_new: np.array,
+        num_mc_samples: int = 300,
+    ) -> tuple[tf.Tensor, tf.Tensor]:
         """
         The main attribute to predict correlation matrices at any point in time.
 
@@ -446,7 +470,9 @@ def predict_corr(
         Parameters
         ----------
         :param x_new:
+            Locations at which to predict.
         :param num_mc_samples:
+            Number of Monte Carlo samples.
         :return:
             Tuple of (mean, stddev) of correlation matrices.
         """
@@ -460,9 +486,9 @@ def predict_corr(
         return corr_mean, corr_stddev
 
     def _get_cov_samples(
-            self,
-            x_new: np.array,
-            num_mc_samples: int = 300,
+        self,
+        x_new: np.array,
+        num_mc_samples: int = 300,
     ) -> tf.Tensor:
         """
         Prediction routine for covariance matrices.
@@ -494,7 +520,9 @@ def _get_cov_samples(
             dtype=tf.dtypes.float64
         ) * f_stddev_new + f_mean_new  # (S_new, N_new, D, nu)
 
-        # TODO: does this still work if nu != D?
+        if self.nu != self.D:
+            # TODO: does this still work if nu != D?
+            raise NotImplementedError("This implementation only works for nu = D.")
         # print(self.likelihood.A_scale_matrix)
         # af = tf.matmul(self.likelihood.A_scale_matrix, f_sample)  # (S_new, N_new, D, nu)
         af = tf.multiply(self.likelihood.A_scale_matrix, f_sample)  # (S_new, N_new, D, nu)
@@ -507,9 +535,9 @@ def scale_matrix(self):
         return self.likelihood.A_scale_matrix * self.likelihood.A_scale_matrix.T
 
     def _initialize_parameters(
-            self,
-            kernel_lengthscale_init: float,
-            q_sqrt_init: float,
+        self,
+        kernel_lengthscale_init: float,
+        q_sqrt_init: float,
     ) -> None:
         """
         Set initial values of trainable parameters.
@@ -524,9 +552,9 @@ def _initialize_parameters(
         self.q_sqrt.assign(self.q_sqrt * q_sqrt_init)
 
     def save_model_params_dict(
-            self,
-            savedir: str,
-            model_name: str,
+        self,
+        savedir: str,
+        model_name: str,
     ) -> None:
         """
         We only save the trained model parameters.
@@ -558,9 +586,9 @@ def save_model_params_dict(
         logging.info(f"Model '{model_name:s}' saved in '{savedir:s}'.")
 
     def load_from_params_dict(
-            self,
-            savedir: str,
-            model_name: str,
+        self,
+        savedir: str,
+        model_name: str,
     ) -> None:
         """
         This assumes you have created a new model.

From a3fe789d69610737cd6dd499a545d50f96c0b358 Mon Sep 17 00:00:00 2001
From: Onno Kampman <onno.kampman@moht.com.sg>
Date: Fri, 15 Mar 2024 18:17:40 +0800
Subject: [PATCH 03/11] minor edits

---
 fcest/models/likelihoods.py     | 54 +++++++++++++++++++++++++++------
 fcest/models/wishart_process.py | 16 +++++-----
 tests_requirements.txt          |  1 +
 3 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/fcest/models/likelihoods.py b/fcest/models/likelihoods.py
index e641999..3b7e8d0 100644
--- a/fcest/models/likelihoods.py
+++ b/fcest/models/likelihoods.py
@@ -28,7 +28,7 @@
 
 class WishartProcessLikelihoodBase(MonteCarloLikelihood):
     """
-    Class for Wishart process likelihoods.
+    Abstract class for all Wishart process likelihoods.
     """
 
     def __init__(
@@ -40,6 +40,7 @@ def __init__(
     ):
         """
         Initialize the base Wishart process likelihood.
+        This implementation assumes the input is uni-dimensional.
 
         Parameters
         ----------
@@ -49,6 +50,7 @@ def __init__(
             Degrees of freedom.
         :param num_mc_samples:
             Number of Monte Carlo samples used to approximate gradients (S).
+            Sometimes also denoted as R.
         """
         if num_factors is not None:
             latent_dim = num_factors * nu
@@ -76,7 +78,7 @@ def __init__(
         D: int,
         nu: int = None,
         num_mc_samples: int = 2,
-        A_scale_matrix_option: str = 'train_full_matrix',
+        scale_matrix_cholesky_option: str = 'train_full_matrix',
         train_additive_noise: bool = False,
         additive_noise_matrix_init: float = 0.01,
         verbose: bool = True,
@@ -92,7 +94,7 @@ def __init__(
             Degrees of freedom.
         :param num_mc_samples:
             Number of Monte Carlo samples used to approximate gradients (S).
-        :param A_scale_matrix_option:
+        :param scale_matrix_cholesky_option:
         :param train_additive_noise:
             Whether to train the additive noise matrix (Lambda).
         :param additive_noise_matrix_init:
@@ -109,7 +111,9 @@ def __init__(
             nu=nu,
             num_mc_samples=num_mc_samples,
         )
-        self.A_scale_matrix = self._set_A_scale_matrix(option=A_scale_matrix_option)  # (D, D)
+        self.A_scale_matrix = self._set_A_scale_matrix(
+            option=scale_matrix_cholesky_option
+        )  # (D, D)
 
         # The additive noise matrix must have positive diagonal values, which this softplus construction guarantees.
         additive_noise_matrix_init = np.log(
@@ -122,7 +126,7 @@ def __init__(
         )  # (D, )
 
         if verbose:
-            logging.info(f"A scale matrix option is '{A_scale_matrix_option:s}'.")
+            logging.info(f"Scale matrix Cholesky (matrix A) option is '{scale_matrix_cholesky_option:s}'.")
             print('A_scale_matrix: ', self.A_scale_matrix)
             print('initial additive part: ', self.additive_part)
 
@@ -207,13 +211,14 @@ def _log_prob(
         # compute the constant term of the log likelihood
         constant_term = - self.D / 2 * tf.math.log(2 * tf.constant(np.pi, dtype=tf.float64))
 
-        # compute the `log(det(AFFA))` component of the log likelihood
+        # compute the AFFA component of the log likelihood - our construction of \Sigma
         # TODO: this does not work for nu != D
         # af = tf.matmul(self.A_scale_matrix, f_sample)  # (S, N, D, nu)
         af = tf.multiply(self.A_scale_matrix, f_sample)
-
-        affa = tf.matmul(af, af, transpose_b=True)  # (S, N, D, D) - our construction of \Sigma
+        affa = tf.matmul(af, af, transpose_b=True)  # (S, N, D, D)
         affa = self._add_diagonal_additive_noise(affa)  # (S, N, D, D)
+
+        # compute the `log(det(AFFA))` component of the log likelihood
         # Before, the trainable additive noise sometimes broke the Cholesky decomposition.
         # This did not happen again after forcing it to be positive.
         # TODO: Can adding positive values to the diagonal ever make a PSD matrix become non-PSD?
@@ -224,7 +229,9 @@ def _log_prob(
             print(self.additive_part)
             print(e)
         log_det_affa = 2 * tf.math.reduce_sum(
-            tf.math.log(tf.linalg.diag_part(L)),
+            tf.math.log(
+                tf.linalg.diag_part(L)
+            ),
             axis=2
         )  # (S, N)
 
@@ -317,7 +324,7 @@ def __init__(
         nu: int = None,
         num_mc_samples: int = 2,
         num_factors: int = None,
-        A_scale_matrix_option: str = 'train_full_matrix',
+        scale_matrix_cholesky_option: str = 'train_full_matrix',
         train_additive_noise: bool = False,
         additive_noise_matrix_init: float = 0.01,
         verbose: bool = True,
@@ -330,3 +337,30 @@ def __init__(
         )
 
         raise NotImplementedError("Factorized Wishart process not implemented yet.")
+
+    def _log_prob(
+        self,
+        x_data: np.array,
+        f_sample: tf.Tensor,
+        y_data: np.array,
+    ) -> tf.Tensor:
+        """
+        Compute the (Monte Carlo estimate of) the log likelihood given samples of the GPs.
+
+        This overrides the method in MonteCarloLikelihood.
+
+        Parameters
+        ----------
+        :param x_data:
+            Input tensor.
+            NumPy array of shape (num_time_steps, 1) or (N, 1).
+        :param f_sample:
+            Function evaluation tensor.
+            (num_mc_samples, num_time_steps, num_factors, degrees_of_freedom) or (S, N, K, nu) -
+        :param y_data:
+            Observation tensor.
+            (num_time_steps, num_time_series) or (N, D) -
+        :return:
+            (num_time_steps, ) or (N, )
+        """
+        assert isinstance(f_sample, tf.Tensor)
diff --git a/fcest/models/wishart_process.py b/fcest/models/wishart_process.py
index 633acb8..ac83620 100644
--- a/fcest/models/wishart_process.py
+++ b/fcest/models/wishart_process.py
@@ -53,7 +53,7 @@ def __init__(
         nu: int = None,
         kernel: Kernel = None,
         num_mc_samples: int = 5,
-        A_scale_matrix_option: str = 'train_full_matrix',
+        scale_matrix_cholesky_option: str = 'train_full_matrix',
         train_additive_noise: bool = True,
         kernel_lengthscale_init: float = 0.3,
         q_sqrt_init: float = 0.001,
@@ -78,7 +78,7 @@ def __init__(
         :param num_mc_samples:
             The number of Monte Carlo samples used to approximate the ELBO.
             In the paper this is R, in the code sometimes S.
-        :param A_scale_matrix_option:
+        :param scale_matrix_cholesky_option:
             We found that training the full matrix yields the best results.
         :param train_additive_noise:
         :param kernel_lengthscale_init:
@@ -105,7 +105,7 @@ def __init__(
                 D=self.D,
                 nu=nu,
                 num_mc_samples=num_mc_samples,
-                A_scale_matrix_option=A_scale_matrix_option,
+                scale_matrix_cholesky_option=scale_matrix_cholesky_option,
                 train_additive_noise=train_additive_noise,
                 num_factors=num_factors,
             )
@@ -114,7 +114,7 @@ def __init__(
                 D=self.D,
                 nu=nu,
                 num_mc_samples=num_mc_samples,
-                A_scale_matrix_option=A_scale_matrix_option,
+                scale_matrix_cholesky_option=scale_matrix_cholesky_option,
                 train_additive_noise=train_additive_noise,
             )
         super().__init__(
@@ -338,7 +338,7 @@ def __init__(
         nu: int = None,
         kernel: Kernel = gpflow.kernels.Matern52(),
         num_mc_samples: int = 5,
-        A_scale_matrix_option: str = 'train_full_matrix',
+        scale_matrix_cholesky_option: str = 'train_full_matrix',
         train_additive_noise: bool = True,
         kernel_lengthscale_init: float = 0.3,
         q_sqrt_init: float = 0.001,
@@ -362,7 +362,7 @@ def __init__(
         :param kernel:
         :param num_mc_samples:
             Number of Monte Carlo samples taken to approximate the ELBO.
-        :param A_scale_matrix_option:
+        :param scale_matrix_cholesky_option:
         :param train_additive_noise:
         :param kernel_lengthscale_init:
         :param q_sqrt_init:
@@ -387,7 +387,7 @@ def __init__(
                 D=self.D,
                 nu=nu,
                 num_mc_samples=num_mc_samples,
-                A_scale_matrix_option=A_scale_matrix_option,
+                scale_matrix_cholesky_option=scale_matrix_cholesky_option,
                 train_additive_noise=train_additive_noise,
                 num_factors=num_factors,
                 verbose=verbose,
@@ -397,7 +397,7 @@ def __init__(
                 D=self.D,
                 nu=nu,
                 num_mc_samples=num_mc_samples,
-                A_scale_matrix_option=A_scale_matrix_option,
+                scale_matrix_cholesky_option=scale_matrix_cholesky_option,
                 train_additive_noise=train_additive_noise,
                 verbose=verbose,
             )
diff --git a/tests_requirements.txt b/tests_requirements.txt
index 994ccdf..a0bad48 100644
--- a/tests_requirements.txt
+++ b/tests_requirements.txt
@@ -12,3 +12,4 @@ scikit-learn
 scipy
 statsmodels
 tensorflow>=2.10
+tf-keras

From 3e7ccabdb8925ccc1d9c2ebae74bdf01624ae983 Mon Sep 17 00:00:00 2001
From: Onno Kampman <onno.kampman@moht.com.sg>
Date: Fri, 15 Mar 2024 20:43:21 +0800
Subject: [PATCH 04/11] minor fix

---
 tests/fcest/models/test_likelihoods.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/fcest/models/test_likelihoods.py b/tests/fcest/models/test_likelihoods.py
index 3265c2c..7fe6684 100644
--- a/tests/fcest/models/test_likelihoods.py
+++ b/tests/fcest/models/test_likelihoods.py
@@ -20,11 +20,11 @@ class TestLikelihoods(unittest.TestCase):
     """
 
     def test_tensor_computations(
-            self,
-            num_mc_samples: int = 2,
-            num_time_steps: int = 6,
-            num_time_series: int = 2,
-            nu: int = 3,
+        self,
+        num_mc_samples: int = 2,
+        num_time_steps: int = 6,
+        num_time_series: int = 2,
+        nu: int = 3,
     ) -> None:
         """
         Test tensor computations.
@@ -83,7 +83,7 @@ def test_wishart_process_likelihood(self):
             D=2,
             nu=2,
             num_mc_samples=7,
-            A_scale_matrix_option='train_full_matrix',
+            scale_matrix_cholesky_option='train_full_matrix',
             train_additive_noise=True
         )
 

From f867944f29109d1f6c7da5a24296649bb8202151 Mon Sep 17 00:00:00 2001
From: Onno Kampman <onno.kampman@moht.com.sg>
Date: Fri, 15 Mar 2024 22:39:36 +0800
Subject: [PATCH 05/11] minor fix

---
 tests_requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests_requirements.txt b/tests_requirements.txt
index a0bad48..8431053 100644
--- a/tests_requirements.txt
+++ b/tests_requirements.txt
@@ -11,5 +11,5 @@ rpy2==3.4.5
 scikit-learn
 scipy
 statsmodels
-tensorflow>=2.10
+tensorflow>=2.10,<=2.15
 tf-keras

From 5c722b2bbbf50450b73f5b99eecb767357dbf8be Mon Sep 17 00:00:00 2001
From: Onno Kampman <onno.kampman@moht.com.sg>
Date: Sun, 7 Apr 2024 18:28:54 +0800
Subject: [PATCH 06/11] fix test environment

---
 setup.py               | 4 +++-
 tests_requirements.txt | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index cd4225a..b427a8b 100644
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,9 @@ def read_file(filename: str) -> str:
         'rpy2==3.4.5',
         'scipy',
         'statsmodels',
-        'tensorflow>=2.10',
+        'tensorflow>=2.10,<=2.15',
+        'tensorflow-probability<=0.23',
+        'tf-keras',
     ],
     python_requires='>=3.10',
     zip_safe=False
diff --git a/tests_requirements.txt b/tests_requirements.txt
index 8431053..da1df79 100644
--- a/tests_requirements.txt
+++ b/tests_requirements.txt
@@ -12,4 +12,5 @@ scikit-learn
 scipy
 statsmodels
 tensorflow>=2.10,<=2.15
+tensorflow-probability<=0.23
 tf-keras

From 2a1255a904b3a60534f671437ad2094ec8ff7881 Mon Sep 17 00:00:00 2001
From: Onno Kampman <onno.kampman@moht.com.sg>
Date: Sat, 13 Apr 2024 21:43:03 +0800
Subject: [PATCH 07/11] add start of GPR model demo notebook

---
 notebooks/Model demos/GPR.ipynb | 146 ++++++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 notebooks/Model demos/GPR.ipynb

diff --git a/notebooks/Model demos/GPR.ipynb b/notebooks/Model demos/GPR.ipynb
new file mode 100644
index 0000000..d697c94
--- /dev/null
+++ b/notebooks/Model demos/GPR.ipynb	
@@ -0,0 +1,146 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# GPR model demo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gpflow\n",
+    "from gpflow.utilities import print_summary\n",
+    "# import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "# The lines below are specific to the notebook format\n",
+    "# %matplotlib inline\n",
+    "# plt.rcParams[\"figure.figsize\"] = (12, 6)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "N = 100  # number of time steps (scanning volumes)\n",
+    "D = 3  # number of time series (ROIs)\n",
+    "x = np.linspace(0, 1, N).reshape(-1, 1)\n",
+    "y = np.random.random(size=(N, D))\n",
+    "df = pd.DataFrame(y)\n",
+    "\n",
+    "X = df.iloc[:, 0].values.reshape(-1, 1)\n",
+    "Y = df.iloc[:, 1].values.reshape(-1, 1)\n",
+    "\n",
+    "# _ = plt.plot(X, Y, \"kx\", mew=2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "k = gpflow.kernels.Matern52()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_summary(k)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m = gpflow.models.GPR(\n",
+    "    data=(X, Y),\n",
+    "    kernel=k,\n",
+    "    mean_function=None,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_summary(m)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m.likelihood.variance.assign(0.01)\n",
+    "m.kernel.lengthscales.assign(0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "opt = gpflow.optimizers.Scipy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "opt_logs = opt.minimize(\n",
+    "    m.training_loss, m.trainable_variables, options=dict(maxiter=100)\n",
+    ")\n",
+    "print_summary(m)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "hello",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From b45afea7544af455a5e12ffd58dce05063dc983f Mon Sep 17 00:00:00 2001
From: Onno Kampman <onno.kampman@moht.com.sg>
Date: Sun, 5 May 2024 21:24:56 +0800
Subject: [PATCH 08/11] add skeleton for pypi publish

---
 .github/workflows/python-publish.yml | 39 ++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 .github/workflows/python-publish.yml

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 0000000..bdaab28
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,39 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: read
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}

From 90f42c9833a5af0ba8e6312095430232afb576b1 Mon Sep 17 00:00:00 2001
From: Onno Kampman <onno.kampman@moht.com.sg>
Date: Sun, 5 May 2024 21:27:50 +0800
Subject: [PATCH 09/11] add test for lower triangular indices tuples

---
 tests/fcest/helpers/test_array_operations.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/fcest/helpers/test_array_operations.py b/tests/fcest/helpers/test_array_operations.py
index 5478a75..02790bb 100644
--- a/tests/fcest/helpers/test_array_operations.py
+++ b/tests/fcest/helpers/test_array_operations.py
@@ -5,6 +5,7 @@
 import tensorflow as tf
 
 from fcest.helpers.array_operations import are_all_positive_definite
+from fcest.helpers.array_operations import get_all_lower_triangular_indices_tuples
 from fcest.helpers.array_operations import to_correlation_structure
 
 
@@ -64,6 +65,14 @@ def test_assert_positive_definite_symmetric_positive_definite(self):
         matrices = tf.constant(matrices, dtype=tf.dtypes.float64)
         self.assertTrue(are_all_positive_definite(matrices))
 
+    def test_get_all_lower_triangular_indices_tuples(self):
+        lower_triangular_indices = get_all_lower_triangular_indices_tuples(
+            num_time_series=3
+        )
+        true_tuples_list = [(1, 0), (2, 0), (2, 1)]
+        self.assertEqual(type(lower_triangular_indices), list)
+        self.assertEqual(lower_triangular_indices, true_tuples_list)
+
     def test_to_correlation_structure(self):
         """
         Test identical covariance and correlation structures.

From de1859a9c86bfb5a8955fe328dc71897286878cb Mon Sep 17 00:00:00 2001
From: Onno Kampman <onno.kampman@moht.com.sg>
Date: Sun, 5 May 2024 21:28:07 +0800
Subject: [PATCH 10/11] add start of inference test

---
 tests/fcest/helpers/test_inference.py | 42 +++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 tests/fcest/helpers/test_inference.py

diff --git a/tests/fcest/helpers/test_inference.py b/tests/fcest/helpers/test_inference.py
new file mode 100644
index 0000000..9901c3d
--- /dev/null
+++ b/tests/fcest/helpers/test_inference.py
@@ -0,0 +1,42 @@
+import unittest
+
+import numpy as np
+
+from fcest.helpers.inference import run_adam
+
+
+class TestInference(unittest.TestCase):
+    """
+    Test functions in inference.py.
+    """
+
+    def test_run_adam(self):
+        """
+        Test that the function returns a list.
+        """
+        # logf = run_adam(
+        #     "VWP",
+        #     m,
+        #     iterations=100,
+        # )
+        logf = []
+        self.assertEqual(type(logf), list)
+
+    @staticmethod
+    def _simulate_d2_time_series() -> np.array:
+        """
+        Get dummy time series.
+
+        :return:
+            Array of shape (N, D)
+        """
+        N = 200
+        D = 2
+        x = np.linspace(0, 1, N).reshape(-1, 1)
+        y = np.random.random(size=(N, D))
+
+        return x, y
+
+
+if __name__ == '__main__':
+    unittest.main()

From cd4118af74e6a82a30a19ebee2a4d071d91ea1bc Mon Sep 17 00:00:00 2001
From: Onno Kampman <onno.kampman@moht.com.sg>
Date: Sun, 5 May 2024 21:37:41 +0800
Subject: [PATCH 11/11] add pip install link

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 1bebeb9..3117934 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,10 @@ $ cd FCEst
 $ pip install -e .
 ```
 
+```zsh
+$ pip install git+https://github.com/OnnoKampman/FCEst.git@v0.1.0
+```
+
 Make sure you have R installed and that `R_HOME` is set, for example by running `brew install r` on MacOS.
 
 At some point this package will be made directly available from PyPi.