diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6c19dd6d..53b1bd35 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 This file records changes to the codebase grouped by version release. Unreleased changes are generally only present during development (relevant parts of the changelog can be written and saved in that section before a version number has been assigned)
 
+## [1.28.5] - 2023-06-28
+
+- Enable automated calibration of transfers and updated documentation to cover this feature
+
 ## [1.28.4] - 2023-05-27
 
 - Added an option to save initial compartment sizes inside a `ParameterSet`. Importantly, this saved representation allows setting the initial _subcompartment_ sizes for a `TimedCompartment`. It therefore offers the possibility of initializing the model in a steady state computed from a previous simulation run, that would not be possible to initialize conventionally because standard initialization uniformly distributes people into the subcompartments of a timed compartment.  
diff --git a/atomica/calibration.py b/atomica/calibration.py
index ed827fd6..16046035 100644
--- a/atomica/calibration.py
+++ b/atomica/calibration.py
@@ -43,7 +43,6 @@ def _update_parset(parset, y_factors, pars_to_adjust):
             tokens = par_name.split("_from_")
             par = parset.transfers[tokens[0]][tokens[1]]
             par.y_factor[pop_name] = y_factors[i]
-            raise NotImplementedError
 
 
 def _calculate_objective(y_factors, pars_to_adjust, output_quantities, parset, project):
@@ -123,7 +122,10 @@ def calibrate(project, parset: ParameterSet, pars_to_adjust, output_quantities,
     :param pars_to_adjust: list of tuples, (par_name,pop_name,lower_limit,upper_limit)
                            the pop name can be None, which will be expanded to all populations
                            relevant to the parameter independently, or 'all' which will instead operate
-                           on the meta y factor.
+                           on the meta y factor. To calibrate a transfer, the parameter name should be set to
+                           ``'<tranfer_code_name>_from_<from_pop>'`` and then the destination population can be specified
+                           as the ``pop_name``. For example, to automatically calibrate an aging transfer 'age' from 5-14
+                           to 15-64, the tuple would contain ``pars_to_adjust=[('age_from_5-14','15-64',...)]``
     :param output_quantities: list of tuples, (var_label,pop_name,weight,metric), for use in the objective
                               function. pop_name=None will expand to all pops. pop_name='all' is not supported
     :param max_time: If using ASD, the maximum run time
@@ -243,7 +245,6 @@ def calibrate(project, parset: ParameterSet, pars_to_adjust, output_quantities,
             tokens = par_name.split("_from_")
             par = args["parset"].transfers[tokens[0]][tokens[1]]
             logger.debug("parset.transfers['{}']['{}'].y_factor['{}']={:.2f}".format(tokens[0], tokens[1], pop_name, par.y_factor[pop_name]))
-            raise NotImplementedError  # Transfers might be handled differently in Atomica
 
     args["parset"].name = "calibrated_" + args["parset"].name
 
diff --git a/atomica/demos.py b/atomica/demos.py
index b56ed912..c9e30de0 100644
--- a/atomica/demos.py
+++ b/atomica/demos.py
@@ -11,10 +11,30 @@
 from .scenarios import BudgetScenario
 from .utils import TimeSeries
 
-__all__ = ["demo", "make_demo_scenarios"]
-
-
-def demo(which: str = None, do_run: bool = True, addprogs: bool = True) -> Project:
+__all__ = ["demo", "demo_projects", "make_demo_scenarios"]
+
+
+demo_projects = [
+    "udt",
+    "udt_dyn",
+    "usdt",
+    "cervicalcancer",
+    "sir",
+    "diabetes",
+    "combined",
+    "service",
+    "hypertension",
+    "hypertension_dyn",
+    "hiv",
+    "hiv_dyn",
+    "tb_simple",
+    "tb_simple_dyn",
+    "tb",
+    "dt",
+]
+
+
+def demo(which: str = "sir", do_run: bool = True, addprogs: bool = True) -> Project:
     """
     Return a demo project
 
@@ -25,30 +45,11 @@ def demo(which: str = None, do_run: bool = True, addprogs: bool = True) -> Proje
 
     """
 
-    options = [
-        "udt",
-        "udt_dyn",
-        "usdt",
-        "cervicalcancer",
-        "sir",
-        "diabetes",
-        "combined",
-        # 'service',
-        "hypertension",
-        "hypertension_dyn",
-        "hiv",
-        "hiv_dyn",
-        "tb_simple",
-        "tb_simple_dyn",
-        "environment",
-        "tb",
-    ]
-
-    dtdict = sc.odict.fromkeys(options, 1.0)
+    dtdict = sc.odict.fromkeys(demo_projects, 1.0)
     dtdict["tb"] = 0.5
 
-    if which is None or which not in options:
-        raise Exception("Supported project types are:\n%s" % ("\n".join(options)))
+    if which not in demo_projects:
+        raise Exception("Supported demonstration projects are:\n%s" % ("\n".join(demos)))
 
     framework = LIBRARY_PATH / f"{which}_framework.xlsx"
     databook = LIBRARY_PATH / f"{which}_databook.xlsx"
@@ -61,7 +62,7 @@ def demo(which: str = None, do_run: bool = True, addprogs: bool = True) -> Proje
     if do_run:
         P.run_sim(store_results=True)
 
-    if addprogs:
+    if addprogs and progbook.exists():
         logger.debug("Loading progbook")
         P.load_progbook(progbook)
 
diff --git a/atomica/excel.py b/atomica/excel.py
index 2a0119ce..fd2a43ae 100644
--- a/atomica/excel.py
+++ b/atomica/excel.py
@@ -424,7 +424,8 @@ def __init__(self, code_name: str, full_name: str, tvec: np.array, from_pops: li
             raise Exception('Unknown TimeDependentConnections type - must be "transfer" or "interaction"')
 
     def __repr__(self):
-        return '<TDC %s "%s">' % (self.type.title(), self.code_name)
+        # return '<TDC %s "%s">' % (self.type.title(), self.code_name)
+        return sc.prepr(self)
 
     @classmethod
     def from_tables(cls, tables: list, interaction_type):
@@ -834,7 +835,7 @@ def _write_pop_matrix(self, worksheet, start_row, formats, references: dict = No
             from_idx = self.from_pops.index(from_pop)
             to_idx = self.to_pops.index(to_pop)
             if boolean_choice:
-                value = "Y" if value else "N"
+                value = "Y" if (value.vals or value.assumption) else "N"
             content[from_idx, to_idx] = value
 
         # Write the content
diff --git a/atomica/migration.py b/atomica/migration.py
index 7c893a48..ea536eff 100644
--- a/atomica/migration.py
+++ b/atomica/migration.py
@@ -807,9 +807,8 @@ def _convert_framework_columns(framework):
     framework._validate()
     return framework
 
+
 @migration("ParameterSet", "1.28.3", "1.28.4", "Add initialization atttribute")
 def _parset_add_initialization(parset):
     parset.initialization = None
     return parset
-
-
diff --git a/atomica/model.py b/atomica/model.py
index 173de76d..432bb7a3 100644
--- a/atomica/model.py
+++ b/atomica/model.py
@@ -22,6 +22,8 @@
 from .parameters import Parameter as ParsetParameter
 from .parameters import ParameterSet as ParameterSet
 import math
+
+from .utils import stochastic_rounding
 import pandas as pd
 
 model_settings = dict()
@@ -228,11 +230,15 @@ def __setitem__(self, key, value) -> None:
 class Compartment(Variable):
     """A class to wrap up data for one compartment within a cascade network."""
 
-    def __init__(self, pop, name):
+    def __init__(self, pop, name, multinomial: bool = False, rng_sampler=None):
         Variable.__init__(self, pop=pop, id=(pop.name, name))
         self.units = "Number of people"
         self.outlinks = []
         self.inlinks = []
+        self.multinomial = multinomial
+        self.rng_sampler = rng_sampler
+        if self.multinomial and rng_sampler is None:
+            self.rng_sampler = np.random.default_rng()
         self._cached_outflow = None
 
     def unlink(self):
@@ -352,11 +358,21 @@ def resolve_outflows(self, ti: int) -> None:
         else:
             rescale = 1
 
-        n = rescale * self.vals[ti]
         self._cached_outflow = 0
-        for link in self.outlinks:
-            link.vals[ti] = link._cache * n
-            self._cached_outflow += link.vals[ti]
+
+        if self.multinomial:
+            # for every person in the compartment, we now have probabilities for each possible outflow or staying as the remainder
+            stays = max(0.0, 1.0 - outflow)
+            rescaled_frac_flows = [rescale * link._cache for link in self.outlinks] + [stays]
+            number_outflows = self.rng_sampler.multinomial(self[ti], rescaled_frac_flows, size=1)[0]
+            for ln, link in enumerate(self.outlinks):
+                link.vals[ti] = number_outflows[ln]
+                self._cached_outflow += link.vals[ti]
+        else:
+            n = rescale * self.vals[ti]
+            for link in self.outlinks:
+                link.vals[ti] = link._cache * n
+                self._cached_outflow += link.vals[ti]
 
     def update(self, ti: int) -> None:
         """
@@ -398,7 +414,7 @@ def connect(self, dest, par) -> None:
 
 
 class JunctionCompartment(Compartment):
-    def __init__(self, pop, name: str, duration_group: str = None):
+    def __init__(self, pop, name: str, duration_group: str = None, multinomial: bool = False, rng_sampler=None):
         """
 
         A TimedCompartment has a duration group by virtue of having a `.parameter` attribute and a flush link.
@@ -417,7 +433,7 @@ def __init__(self, pop, name: str, duration_group: str = None):
         :param duration_group: Optionally specify a duration group
 
         """
-        super().__init__(pop, name)
+        super().__init__(pop, name, multinomial=multinomial, rng_sampler=rng_sampler)
         self.duration_group = duration_group  #: Store the name of the duration group, if the junction belongs to one
 
     def connect(self, dest, par) -> None:
@@ -516,12 +532,21 @@ def balance(self, ti: int) -> None:
         outflow_fractions = [link.parameter.vals[ti] for link in self.outlinks]
         total_outflow = sum(outflow_fractions)
 
-        # Finally, assign the inflow to the outflow proportionately accounting for the total outflow downscaling
+        outflow_fractions /= total_outflow  # proportionately accounting for the total outflow downscaling
+
+        # assign outflow stochastically to compartments based on the probabilities
+        if self.multinomial and net_inflow > 0.0:
+            if self.duration_group:
+                raise Exception("Need to implement code here to make sure outflows from EACH part of the duration group are integers")
+            else:
+                outflow_fractions = self.rng_sampler.multinomial(net_inflow, outflow_fractions, size=1)[0] / net_inflow
+
+        # Finally, assign the inflow to the outflow proportionately
         for frac, link in zip(outflow_fractions, self.outlinks):
             if self.duration_group:
-                link._vals[:, ti] = net_inflow * frac / total_outflow
+                link._vals[:, ti] = net_inflow * frac
             else:
-                link.vals[ti] = net_inflow * frac / total_outflow
+                link.vals[ti] = net_inflow * frac
 
     def initial_flush(self) -> None:
         """
@@ -542,6 +567,10 @@ def initial_flush(self) -> None:
             outflow_fractions = np.array([link.parameter.vals[0] for link in self.outlinks])
             outflow_fractions /= np.sum(outflow_fractions)
 
+            # assign outflow stochastically to compartments based on the probabilities
+            if self.multinomial:
+                outflow_fractions = self.rng_sampler.multinomial(self.vals[0], outflow_fractions, size=1)[0] / self.vals[0]
+
             # Assign the inflow directly to the outflow compartments
             # This is done using the [] indexing on the downstream compartment so it is agnostic
             # to the downstream compartment type
@@ -579,22 +608,37 @@ def balance(self, ti: int) -> None:
                 net_inflow += link.vals[ti]  # If not part of a duration group, get scalar flow from Link.vals
 
         outflow_fractions = np.zeros(len(self.outlinks))
+        residual_link_index = None
         for i, link in enumerate(self.outlinks):
             if link.parameter is not None:
                 outflow_fractions[i] = link.parameter.vals[ti]
             else:
+                residual_link_index = i
                 outflow_fractions[i] = 0
 
         total_outflow = outflow_fractions.sum()
         if total_outflow > 1:
             outflow_fractions /= total_outflow
 
+        # assign outflow stochastically to compartments based on the probabilities
+        if self.multinomial and net_inflow > 0.0:
+            if np.abs(np.round(net_inflow) - net_inflow).sum() > 1e-3:
+                raise Exception(f"Net inflow should be an integer for a stochastic run, was: {net_inflow}")
+
+            # Give residual probability to the residual link if there is one, otherwise normalize probabilities
+            if residual_link_index is not None:
+                outflow_fractions[residual_link_index] = max(0, 1 - outflow_fractions.sum())
+            else:  # Probabilities have to sum to one as we have to flush the junction, and multinomials probabilities should sum to 1
+                outflow_fractions = outflow_fractions / outflow_fractions.sum()
+
+            outflow_fractions = self.rng_sampler.multinomial(np.round(net_inflow).astype(int), outflow_fractions, size=1)[0] / (net_inflow)
+
         # Calculate outflows
         outflow = net_inflow.reshape(-1, 1) * outflow_fractions.reshape(1, -1)
 
         # Finally, assign the inflow to the outflow proportionately accounting for the total outflow downscaling
         for frac, link in zip(outflow_fractions, self.outlinks):
-            if link.parameter is None and total_outflow < 1:
+            if link.parameter is None and total_outflow < 1 and not self.multinomial:  # If we are stochastic then we have already taken the residual link into account
                 flow = net_inflow - np.sum(outflow, axis=1)  # Sum after multiplying by outflow fractions to reduce numerical precision errors and enforce conserved quantities more accurately
             else:
                 flow = net_inflow * frac
@@ -627,6 +671,10 @@ def initial_flush(self) -> None:
                 outflow_fractions /= total_outflow
                 has_residual = False
 
+            # assign outflow stochastically to compartments based on the probabilities
+            if self.multinomial and total_outflow > 0.0:
+                outflow_fractions = self.rng_sampler.multinomial(self.vals[0], outflow_fractions, size=1)[0] / (self.vals[0] * sum(outflow_fractions))
+
             # Assign the inflow directly to the outflow compartments
             # This is done using the [] indexing on the downstream compartment so it is agnostic
             # to the downstream compartment type
@@ -657,24 +705,28 @@ class SourceCompartment(Compartment):
 
     """
 
-    def __init__(self, pop, name):
-        super().__init__(pop, name)
+    def __init__(self, pop, name, multinomial: bool = False, rng_sampler=None):
+        super().__init__(pop, name, multinomial=multinomial, rng_sampler=rng_sampler)
 
     def preallocate(self, tvec: np.array, dt: float) -> None:
         super().preallocate(tvec, dt)
-        self.vals.fill(0.0)  #: Source compartments have an unlimited number of people in them. TODO: If this complicates validation, set to 0.0
+        self.vals.fill(0.0)  #: Source compartments have an unlimited number of people in them. Set to 0.0 to simplify validation.
 
     def resolve_outflows(self, ti: int) -> None:
+        # Unlike other Compartments, link._cache will still be in Number form and not converted to a proportion
         for link in self.outlinks:
-            link.vals[ti] = link._cache
+            if self.multinomial:
+                link.vals[ti] = stochastic_rounding(link._cache, rng_sampler=self.rng_sampler)
+            else:
+                link.vals[ti] = link._cache
 
     def update(self, ti: int) -> None:
         pass
 
 
 class SinkCompartment(Compartment):
-    def __init__(self, pop, name):
-        super().__init__(pop, name)
+    def __init__(self, pop, name, multinomial: bool = False, rng_sampler=None):
+        super().__init__(pop, name, multinomial=multinomial, rng_sampler=rng_sampler)
 
     def preallocate(self, tvec: np.array, dt: float) -> None:
         """
@@ -730,7 +782,7 @@ def update(self, ti: int) -> None:
 
 
 class TimedCompartment(Compartment):
-    def __init__(self, pop, name: str, parameter: ParsetParameter):
+    def __init__(self, pop, name: str, parameter: ParsetParameter, multinomial: bool = False, rng_sampler=None):
         """
         Instantiate the TimedCompartment
 
@@ -743,7 +795,7 @@ def __init__(self, pop, name: str, parameter: ParsetParameter):
 
         """
 
-        Compartment.__init__(self, pop=pop, name=name)
+        Compartment.__init__(self, pop=pop, name=name, multinomial=multinomial, rng_sampler=rng_sampler)
         self._vals = None  #: Primary storage, a matrix of size (duration x timesteps). The first row is the one that people leave from, the last row is the one that people arrive in
         self.parameter = parameter  #: The parameter to read the duration from - this needs to be done after parameters are precomputed though, in case the duration is coming from a (constant) function
         self.flush_link = None  #: Reference to the timed outflow link that flushes the compartment. Note that this needs to be set externally because Links are instantiated after Compartments
@@ -847,6 +899,9 @@ def resolve_outflows(self, ti: int) -> None:
 
         """
 
+        # TODO adjust for stochastic variables
+        assert self.multinomial == False, "resolve_outflows needs to be updated to work with stochastic transitions"
+
         # First, work out the scale factors as usual
         self.flush_link._cache = 0.0  # At this stage, no outflow goes via the flush link
 
@@ -1532,7 +1587,7 @@ class Population:
     Each model population must contain a set of compartments with equivalent names.
     """
 
-    def __init__(self, framework, name: str, label: str, progset: ProgramSet, pop_type: str):
+    def __init__(self, framework, name: str, label: str, progset: ProgramSet, pop_type: str, multinomial: bool = False, rng_sampler=None):
         """
         Construct a Population
 
@@ -1546,11 +1601,16 @@ def __init__(self, framework, name: str, label: str, progset: ProgramSet, pop_ty
         :param label: The full name of the population
         :param progset: A ProgramSet instance
 
+        :param multinomial: whether to use stochasticity in determining population movement
+        :param rng_sampler: optional Generator for random numbers to give consistent results between model runs
+
         """
 
         self.name = name  #: The code name of the population
         self.label = label  #: The full name/label of the population
         self.type = pop_type  #: The population's type
+        self.multinomial = multinomial  #: Whether the population should be handled stochastically as discrete integer people instead of fractions
+        self.rng_sampler = rng_sampler
 
         self.comps = list()  #: List of Compartment objects
         self.characs = list()  #: List of Characteristic objects
@@ -1761,17 +1821,17 @@ def build(self, framework, progset):
         for comp_name in list(comps.index):
             if comps.at[comp_name, "population type"] == self.type:
                 if comp_name in residual_junctions:
-                    self.comps.append(ResidualJunctionCompartment(pop=self, name=comp_name, duration_group=comps.at[comp_name, "duration group"]))
+                    self.comps.append(ResidualJunctionCompartment(pop=self, name=comp_name, multinomial=self.multinomial, rng_sampler=self.rng_sampler, duration_group=comps.at[comp_name, "duration group"]))
                 elif comps.at[comp_name, "is junction"] == "y":
-                    self.comps.append(JunctionCompartment(pop=self, name=comp_name, duration_group=comps.at[comp_name, "duration group"]))
+                    self.comps.append(JunctionCompartment(pop=self, name=comp_name, multinomial=self.multinomial, rng_sampler=self.rng_sampler, duration_group=comps.at[comp_name, "duration group"]))
                 elif comps.at[comp_name, "duration group"]:
-                    self.comps.append(TimedCompartment(pop=self, name=comp_name, parameter=self.par_lookup[comps.at[comp_name, "duration group"]]))
+                    self.comps.append(TimedCompartment(pop=self, name=comp_name, multinomial=self.multinomial, rng_sampler=self.rng_sampler, parameter=self.par_lookup[comps.at[comp_name, "duration group"]]))
                 elif comps.at[comp_name, "is source"] == "y":
-                    self.comps.append(SourceCompartment(pop=self, name=comp_name))
+                    self.comps.append(SourceCompartment(pop=self, name=comp_name, multinomial=self.multinomial, rng_sampler=self.rng_sampler))
                 elif comps.at[comp_name, "is sink"] == "y":
-                    self.comps.append(SinkCompartment(pop=self, name=comp_name))
+                    self.comps.append(SinkCompartment(pop=self, name=comp_name, multinomial=self.multinomial, rng_sampler=self.rng_sampler))
                 else:
-                    self.comps.append(Compartment(pop=self, name=comp_name))
+                    self.comps.append(Compartment(pop=self, name=comp_name, multinomial=self.multinomial, rng_sampler=self.rng_sampler))
 
         self.comp_lookup = {comp.name: comp for comp in self.comps}
 
@@ -1848,7 +1908,6 @@ def initialize_compartments(self, parset: ParameterSet, framework, t_init: float
 
         """
 
-
         if not self.comps:
             # If this population has no compartments, then nothing needs to be done
             return
@@ -1964,13 +2023,16 @@ def report_characteristic(charac, n_indent=0):
 
         # Otherwise, insert the values
         for i, c in enumerate(comps):
-            c[0] = max(0.0, x[i])
+            if self.multinomial:
+                c[0] = stochastic_rounding(max(0.0, x[i]), rng_sampler=self.rng_sampler)  # integer values (still represented as floats)
+            else:
+                c[0] = max(0.0, x[i])
 
 
 class Model:
     """A class to wrap up multiple populations within model and handle cross-population transitions."""
 
-    def __init__(self, settings, framework, parset, progset=None, program_instructions=None):
+    def __init__(self, settings, framework, parset, progset=None, program_instructions=None, rng_sampler=None, acceptance_criteria=[]):
 
         # Note that if a progset is provided and program instructions are not, then programs will not be
         # turned on. However, the progset is still available so that the coverage can still be probed
@@ -1990,6 +2052,13 @@ def __init__(self, settings, framework, parset, progset=None, program_instructio
         self.program_instructions = sc.dcp(program_instructions)  # program instructions
         self.t = settings.tvec  #: Simulation time vector (this is a brand new instance from the `settings.tvec` property method)
         self.dt = settings.sim_dt  #: Simulation time step
+        self.multinomial = settings.multinomial  #: Run with discrete numbers of people per compartment instead of fractions.
+
+        self.acceptance_criteria = sc.dcp(acceptance_criteria)  #: If the model fails to adhere to these criteria, raise a BadInitialization exception
+
+        if rng_sampler is None:
+            rng_sampler = np.random.default_rng()
+        self.rng_sampler = rng_sampler
 
         self._t_index = 0  # Keeps track of array index for current timepoint data within all compartments.
         self._vars_by_pop = None  # Cache to look up lists of variables by name across populations
@@ -2134,7 +2203,7 @@ def build(self, parset):
 
         # First construct populations
         for k, (pop_name, pop_label, pop_type) in enumerate(zip(parset.pop_names, parset.pop_labels, parset.pop_types)):
-            self.pops.append(Population(framework=self.framework, name=pop_name, label=pop_label, progset=self.progset, pop_type=pop_type))
+            self.pops.append(Population(framework=self.framework, name=pop_name, label=pop_label, progset=self.progset, pop_type=pop_type, multinomial=self.multinomial, rng_sampler=self.rng_sampler))
             self._pop_ids[pop_name] = k
 
         # Expand interactions into matrix form
@@ -2258,6 +2327,21 @@ def build(self, parset):
                 obj.preallocate(self.t, self.dt)
             pop.initialize_compartments(parset, self.framework, self.t[0])
 
+        # More finally, set up acceptance criteria for those which should be checked at runtime (variables that have ._is_dynamic = True), and those which can only be checked after conclusion
+        if self.acceptance_criteria:
+            for criteria in self.acceptance_criteria:
+                pars = self._vars_by_pop[criteria["parameter"]]
+                for par in pars:
+                    if par.pop.name == criteria["population"]:
+                        if par._is_dynamic:
+                            criteria["evaluate_at"] = self.t[np.where(self.t > criteria["t_range"][1])][0]
+                        else:
+                            criteria["evaluate_at"] = np.inf  # can only evaluate postcompute
+
+            # sort by evaluation time - now we only have to look at the first element every timestep
+            self.acceptance_criteria = sorted(self.acceptance_criteria, key=lambda item: item["evaluate_at"])
+            self.acceptance_index = 0  # which index to evaluate
+
     def _set_exec_order(self) -> None:
         """
         Get the execution order
@@ -2381,6 +2465,8 @@ def process(self) -> None:
             self.update_comps()
             self.update_pars()
             self.update_links()
+            if self.acceptance_criteria:
+                self.update_acceptance()
 
         # Update postcompute parameters - note that it needs to be done in execution order
         for par_name in self._exec_order["all_pars"]:
@@ -2389,6 +2475,9 @@ def process(self) -> None:
                     par.update()
                     par.constrain()
 
+        if self.acceptance_criteria:  # call update_acceptance once more for any variables that could only be assessed postcompute
+            self.update_acceptance(evaluate_all=True)
+
         # Clear characteristic internal storage and switch to dynamic computation to save space
         for pop in self.pops:
             for charac in pop.characs:
@@ -2516,6 +2605,39 @@ def update_comps(self) -> None:
             for comp in pop.comps:
                 comp.update(ti)
 
+    def update_acceptance(self, evaluate_all=False) -> None:
+        """
+        Update any acceptance criteria for the model run, raise BadInitialization error if the model run is failing
+
+        :param evaluate_all: Final evaluation of all acceptance criteria.
+
+        """
+
+        time = self.t[self._t_index]  # actual time
+
+        # We know that these are sorted by evaluation time so we only need to look at the first index
+        while (self.acceptance_index < len(self.acceptance_criteria)) and (evaluate_all or time >= self.acceptance_criteria[self.acceptance_index]["evaluate_at"]):
+            # time ranges for assessment are in order, so only need to check the first one
+            assessable = self.acceptance_criteria[self.acceptance_index]
+            # we have passed the time window, assess if the criteria was met - either return a fail condition or remove this acceptance criteria
+            a_par = assessable["parameter"]
+            a_pop = assessable["population"]
+            a_times = assessable["t_range"]
+            accept_vals = assessable["value"]
+            a_t_inds = np.where(np.logical_and(self.t >= a_times[0], self.t <= a_times[1]))  # self.t is the full tvec for the model
+
+            pars = self._vars_by_pop[a_par]
+            for par in pars:  # TODO should be more efficient way to directly access the par values for the pop rather than looping
+                if par.pop.name == a_pop:
+                    a_t_val = np.mean(par[a_t_inds])  # mean across the time range
+                    if a_t_val <= accept_vals[0] or a_t_val >= accept_vals[1]:
+                        # print(f'{time} Rejecting run as sampled sim value {a_t_val} outside of acceptable bound {accept_vals} for parameter {a_par}, population {a_pop} at time {a_times}')
+                        raise BadInitialization(f"Rejecting run as sampled sim value {a_t_val} outside of acceptable bound {accept_vals} for parameter {a_par}, population {a_pop} at time {a_times}")
+                    # else:
+                    # print(f'{time} ACCEPTING run as sampled sim value {a_t_val} inside of acceptable bound {accept_vals} for parameter {a_par}, population {a_pop} at time {a_times}')
+            assessable["assessed_value"] = a_t_val
+            self.acceptance_index += 1
+
     def flush_junctions(self) -> None:
         """
         Flush initialization values from junctions
@@ -2652,7 +2774,7 @@ def update_pars(self) -> None:
                     par.constrain(ti)
 
 
-def run_model(settings, framework, parset: ParameterSet, progset: ProgramSet = None, program_instructions: ProgramInstructions = None, name: str = None):
+def run_model(settings, framework, parset: ParameterSet, progset: ProgramSet = None, program_instructions: ProgramInstructions = None, name: str = None, rng_sampler=None, acceptance_criteria=[]):
     """
     Build and process model
 
@@ -2676,10 +2798,12 @@ def run_model(settings, framework, parset: ParameterSet, progset: ProgramSet = N
     :param progset: Optionally provide a :class:`ProgramSet` instance to use programs
     :param program_instructions: Optional :class:`ProgramInstructions` instance. If ``progset`` is specified, then instructions must be provided
     :param name: Optionally specify the name to assign to the output result
+    :param rng_sampler: Optionally specify a random number generator that may have been seeded to generate consistent results
+
     :return: A :class:`Result` object containing the processed model
 
     """
 
-    m = Model(settings, framework, parset, progset, program_instructions)
+    m = Model(settings, framework, parset, progset, program_instructions, rng_sampler=rng_sampler, acceptance_criteria=acceptance_criteria)
     m.process()
     return Result(model=m, parset=parset, name=name)
diff --git a/atomica/parameters.py b/atomica/parameters.py
index 5f668300..d3221497 100644
--- a/atomica/parameters.py
+++ b/atomica/parameters.py
@@ -96,7 +96,7 @@ def interpolate(self, tvec, pop_name: str) -> np.array:
 
         return self.ts[pop_name].interpolate(tvec, method=self._interpolation_method)
 
-    def sample(self, constant: bool) -> None:
+    def sample(self, constant: bool, rng_sampler=None) -> None:
         """
         Perturb parameter based on uncertainties
 
@@ -105,11 +105,11 @@ def sample(self, constant: bool) -> None:
 
         :param constant: If True, time series will be perturbed by a single constant offset. If False,
                          a different perturbation will be applied to each time specific value independently.
-
+        :param rng_sampler: Optional random number generator that may have been seeded to generate consistent results
         """
 
         for k, ts in self.ts.items():
-            self.ts[k] = ts.sample(constant)
+            self.ts[k] = ts.sample(constant, rng_sampler)
 
     def smooth(self, tvec, method="smoothinterp", pop_names=None, **kwargs):
         """
@@ -341,16 +341,16 @@ def from_excel(cls, excelfile: pd.ExcelFile):
         """
         # excelfile = spreadsheet.pandas()
 
-        metadata, value_df = atomica.excel.read_dataframes(excelfile.book['Initialization'])
+        metadata, value_df = atomica.excel.read_dataframes(excelfile.book["Initialization"])
 
         values = {}
-        for k,s in value_df.T.reset_index().T.set_index([0,1]).iterrows():
+        for k, s in value_df.T.reset_index().T.set_index([0, 1]).iterrows():
             v = s.dropna().values
             values[k] = v[0] if len(v) == 1 else v
 
         self = cls(values)
 
-        for k,v in metadata.T.reset_index().T.set_index(0)[1].to_dict().items():
+        for k, v in metadata.T.reset_index().T.set_index(0)[1].to_dict().items():
             setattr(self, k, v)
 
         return self
@@ -509,19 +509,20 @@ def get_par(self, name: str, pop: str = None) -> Parameter:
         else:
             raise KeyError(f'Parameter "{name}" not found')
 
-    def sample(self, constant=True):
+    def sample(self, constant: bool = True, rng_sampler=None):
         """
         Return a sampled copy of the ParameterSet
 
         :param constant: If True, time series will be perturbed by a single constant offset. If False,
                          a different perturbation will be applied to each time specific value independently.
+        :param rng_sampler: Optional random number generator that may have been seeded to generate consistent results
         :return: A new :class:`ParameterSet` with perturbed values
 
         """
-
         new = sc.dcp(self)
-        for par in new.all_pars():
-            par.sample(constant)
+
+        for i, par in enumerate(new.all_pars()):
+            par.sample(constant=constant, rng_sampler=rng_sampler)
         return new
 
     def make_constant(self, year: float):
@@ -534,7 +535,7 @@ def make_constant(self, year: float):
         :param year: Year to use for interpolation
         :return: A copy of the ParameterSet with constant parameters
         """
-        ps = self.copy(f'{self.name} (constant)')
+        ps = self.copy(f"{self.name} (constant)")
         for par in ps.all_pars():
             for ts in par.ts.values():
                 ts.insert(None, ts.interpolate(year))
diff --git a/atomica/plotting.py b/atomica/plotting.py
index 96362ca5..b77b9b6d 100644
--- a/atomica/plotting.py
+++ b/atomica/plotting.py
@@ -392,7 +392,7 @@ def placeholder_pop():
                         elif pop_aggregation == "weighted":
                             numerator = sum(aggregated_outputs[x][output_name] * popsize[x] for x in pop_labels)  # Add together all the outputs
                             denominator = sum([popsize[x] for x in pop_labels])
-                            vals = np.divide(numerator, denominator, out=np.full(numerator.shape, np.nan, dtype=float), where=numerator != 0)
+                            vals = np.divide(numerator, denominator, out=np.full(numerator.shape, np.nan, dtype=float), where=denominator != 0)
                         else:
                             raise Exception("Unknown pop aggregation method")
                         self.series.append(Series(tvecs[result_label], vals, result_label, pop_name, output_name, data_label[output_name], units=aggregated_units[output_name], timescale=aggregated_timescales[output_name], data_pop=pop_name))
@@ -447,9 +447,9 @@ def accumulate(self, accumulation_method) -> None:
                 s.vals = np.cumsum(s.vals)
             elif accumulation_method == "integrate":
                 if s.timescale:
-                    s.vals = scipy.integrate.cumtrapz(s.vals, s.tvec / s.timescale)
+                    s.vals = scipy.integrate.cumulative_trapezoid(s.vals, s.tvec / s.timescale)
                 else:
-                    s.vals = scipy.integrate.cumtrapz(s.vals, s.tvec)
+                    s.vals = scipy.integrate.cumulative_trapezoid(s.vals, s.tvec)
                 s.vals = np.insert(s.vals, 0, 0.0)
 
                 # If integrating a quantity with a timescale, then lose the timescale factor
@@ -977,7 +977,9 @@ def set_colors(self, colors=None, results="all", pops="all", outputs="all", over
         elif isinstance(colors, list):
             assert len(colors) == len(targets), "Number of colors must either be a string, or a list with as many elements as colors to set"
             colors = colors
-        elif colors.startswith("#") or colors not in [m for m in plt.cm.datad if not m.endswith("_r")]:
+        elif isinstance(colors, tuple) and colors[0].startswith("#"): # tuple with (color, opacity)
+            colors = [colors for _ in range(len(targets))]  # Apply color to all requested outputs
+        elif isinstance(colors, str) and (colors.startswith("#") or colors not in [m for m in plt.cm.datad if not m.endswith("_r")]):
             colors = [colors for _ in range(len(targets))]  # Apply color to all requested outputs
         else:
             color_norm = matplotlib_colors.Normalize(vmin=-1, vmax=len(targets))
@@ -1439,7 +1441,8 @@ def process_input_stacks(input_stacks, available_items):
     return figs
 
 
-def plot_series(plotdata, plot_type="line", axis=None, data=None, legend_mode=None, lw=None, n_cols: int = None) -> list:
+def plot_series(plotdata, plot_type="line", axis=None, data=None, legend_mode=None, lw=None, n_cols: int = None,
+                colors=None) -> list:
     """
     Produce a time series plot
 
@@ -1452,6 +1455,7 @@ def plot_series(plotdata, plot_type="line", axis=None, data=None, legend_mode=No
     :param lw: override the default line width
     :param n_cols: If None (default), separate figures will be created for each axis. If provided, axes will be tiled as subplots in a single figure
                    window with the requested number of columns
+    :param colors: Colors to be passed to plotdata.set_colors
     :return: A list of newly created Figures
 
     """
@@ -1505,7 +1509,7 @@ def _prepare_figures(dim1, dim2, n_cols):
         logger.warning("At least one Series has only one timepoint. Series must have at least 2 time points to be rendered as a line - `plot_bars` may be more suitable for such data")
 
     if axis == "results":
-        plotdata.set_colors(results=plotdata.results.keys())
+        plotdata.set_colors(colors=colors, results=plotdata.results.keys())
 
         figs, axes = _prepare_figures(plotdata.pops, plotdata.outputs, n_cols)
 
@@ -1540,7 +1544,7 @@ def _prepare_figures(dim1, dim2, n_cols):
                 _render_legend(ax, plot_type)
 
     elif axis == "pops":
-        plotdata.set_colors(pops=plotdata.pops.keys())
+        plotdata.set_colors(colors=colors, pops=plotdata.pops.keys())
 
         figs, axes = _prepare_figures(plotdata.results, plotdata.outputs, n_cols)
 
@@ -1573,7 +1577,7 @@ def _prepare_figures(dim1, dim2, n_cols):
                 _render_legend(ax, plot_type)
 
     elif axis == "outputs":
-        plotdata.set_colors(outputs=plotdata.outputs.keys())
+        plotdata.set_colors(colors=colors, outputs=plotdata.outputs.keys())
 
         figs, axes = _prepare_figures(plotdata.results, plotdata.pops, n_cols)
 
diff --git a/atomica/programs.py b/atomica/programs.py
index f3f60f1f..277eb3f5 100644
--- a/atomica/programs.py
+++ b/atomica/programs.py
@@ -1152,7 +1152,7 @@ def get_outcomes(self, prop_coverage: dict) -> dict:
 
         return {(covout.par, covout.pop): covout.get_outcome(prop_coverage) for covout in self.covouts.values()}
 
-    def sample(self, constant: bool = True):
+    def sample(self, constant: bool = True, rng_sampler=None):
         """
         Perturb programs based on uncertainties
 
@@ -1169,9 +1169,9 @@ def sample(self, constant: bool = True):
 
         new = sc.dcp(self)
         for prog in new.programs.values():
-            prog.sample(constant)
+            prog.sample(constant, rng_sampler=rng_sampler)
         for covout in new.covouts.values():
-            covout.sample()
+            covout.sample(rng_sampler=rng_sampler)
         return new
 
 
@@ -1232,7 +1232,7 @@ def is_one_off(self) -> bool:
         """
         return "/year" not in self.unit_cost.units
 
-    def sample(self, constant: bool) -> None:
+    def sample(self, constant: bool, rng_sampler=None) -> None:
         """
         Perturb program values based on uncertainties
 
@@ -1242,11 +1242,12 @@ def sample(self, constant: bool) -> None:
 
         :param constant: If True, time series will be perturbed by a single constant offset. If False,
                          an different perturbation will be applied to each time specific value independently.
+        :param rng_sampler: Optional random number generator that may have been seeded to generate consistent results
         """
 
         self.spend_data = self.spend_data.sample(constant)
         self.unit_cost = self.unit_cost.sample(constant)
-        self.capacity_constraint = self.capacity_constraint.sample(constant)
+        self.capacity_constraint = self.capacity_constraint.sample(constant, rng_sampler)
         self.saturation = self.saturation.sample(constant)
         self.coverage = self.coverage.sample(constant)
 
@@ -1408,25 +1409,28 @@ def n_progs(self) -> int:
 
         return len(self.progs)
 
-    def sample(self) -> None:
+    def sample(self, rng_sampler=None) -> None:
         """
         Perturb the values entered in the databook
 
         The :class:`Covout` instance is modified in-place. Note that the program outcomes are scalars
         that do not vary over time - therefore, :meth:`Covout.sample()` does not have a ``constant``
         argument.
-
+        :param rng_sampler: Optional random number generator that may have been seeded to generate consistent results
         """
 
         if self.sigma is None:
             return
 
+        if rng_sampler is None:
+            rng_sampler = np.random
+
         for k, v in self.progs.items():
-            self.progs[k] = v + self.sigma * np.random.randn(1)[0]
+            self.progs[k] = v + self.sigma * rng_sampler.standard_normal(1)[0]
         # Perturb the interactions
         if self._interactions:
             for k, v in self.interactions.items():
-                self.interactions[k] = v + self.sigma * np.random.randn(1)[0]
+                self.interactions[k] = v + self.sigma * rng_sampler.standard_normal(1)[0]
             tokens = ["%s=%.4f" % ("+".join(k), v) for k, v in self.interactions.items()]
             self.imp_interaction = ",".join(tokens)
 
diff --git a/atomica/project.py b/atomica/project.py
index e6881957..8b800381 100644
--- a/atomica/project.py
+++ b/atomica/project.py
@@ -44,10 +44,24 @@
 
 
 class ProjectSettings:
-    def __init__(self, sim_start=2000, sim_end=2035, sim_dt=0.25):
+    def __init__(self, sim_start:float=2000, sim_end:float=2035, sim_dt:float=0.25, multinomial:bool=False):
+        """
+        Project settings object
+
+        This contains various settings that are used by the Project when running simulations.
+        Most importantly, it contains the simulation start year, end year, and timestep, with
+        validation performed when setting these values.
+
+        :param sim_start: Simulation start year
+        :param sim_end: Simulation end year
+        :param sim_dt: Simulation time step
+        :param multinomial: Use multinomial mode with stochastic transitions
+        """
+
         self._sim_start = sim_start
         self._sim_dt = sim_dt
         self._sim_end = 0.0
+        self.multinomial = multinomial
         self.update_time_vector(end=sim_end)
 
     def __repr__(self):
@@ -94,8 +108,8 @@ def tvec(self) -> np.ndarray:
         :return: Array of simulation times
 
         """
-
-        return np.linspace(self.sim_start, self.sim_end, int((self.sim_end - self.sim_start) / self.sim_dt) + 1)
+        return np.arange(round((self.sim_end - self.sim_start) / self.sim_dt) + 1) * self.sim_dt + self.sim_start
+        # return np.linspace(self.sim_start, self.sim_end, int((self.sim_end - self.sim_start) / self.sim_dt) + 1)
 
     def update_time_vector(self, start: float = None, end: float = None, dt: float = None) -> None:
         """
@@ -475,7 +489,7 @@ def update_settings(self, sim_start=None, sim_end=None, sim_dt=None):
         """Modify the project settings, e.g. the simulation time vector."""
         self.settings.update_time_vector(start=sim_start, end=sim_end, dt=sim_dt)
 
-    def run_sim(self, parset=None, progset=None, progset_instructions=None, store_results=False, result_name: str = None):
+    def run_sim(self, parset=None, progset=None, progset_instructions=None, store_results=False, result_name: str = None, rng=None, acceptance_criteria=[]):
         """
         Run a single simulation
 
@@ -488,9 +502,14 @@ def run_sim(self, parset=None, progset=None, progset_instructions=None, store_re
         :param progset_instructions: A :class:`ProgramInstructions` instance. Programs will only be used if a instructions are provided
         :param store_results: If True, then the result will automatically be stored in ``self.results``
         :param result_name: Optionally assign a specific name to the result (otherwise, a unique default name will automatically be selected)
+        :param rng: Optionally a random number generator that may have been seeded to generate consistent results, or a random_seed used to generate a Generator
+        :param acceptance_criteria: Optional criteria to assert that outputs are within a given tolerance of a given value for given parameters over a given period of time
+        tolerance e.g.  = [{'parameter': 'incidence', 'population': 'adults', 'value': (0.02, 0.025), 't_range': (2018, 2018.99)}]
+
         :return: A :class:`Result` instance
 
         """
+        rng_sampler = rng if isinstance(rng, np.random._generator.Generator) else np.random.default_rng(rng)
 
         parset = self.parset(parset)
         if progset is not None:
@@ -509,14 +528,14 @@ def run_sim(self, parset=None, progset=None, progset_instructions=None, store_re
                 k += 1
 
         tm = sc.tic()
-        result = run_model(settings=self.settings, framework=self.framework, parset=parset, progset=progset, program_instructions=progset_instructions, name=result_name)
+        result = run_model(settings=self.settings, framework=self.framework, parset=parset, progset=progset, program_instructions=progset_instructions, name=result_name, rng_sampler=rng_sampler, acceptance_criteria=acceptance_criteria)
         logger.info('Elapsed time for running "%s": %ss', self.name, sc.sigfig(sc.toc(tm, output=True), 3))
         if store_results:
             self.results.append(result)
 
         return result
 
-    def run_sampled_sims(self, parset, progset=None, progset_instructions=None, result_names=None, n_samples: int = 1, parallel=False, max_attempts=None, num_workers=None) -> list:
+    def run_sampled_sims(self, parset, progset=None, progset_instructions=None, result_names=None, n_samples: int = 1, rand_seed: int = None, parallel=False, max_attempts=None, num_workers=None, acceptance_criteria=[]) -> list:
         """
         Run sampled simulations
 
@@ -540,6 +559,11 @@ def run_sampled_sims(self, parset, progset=None, progset_instructions=None, resu
         :param parallel: If True, run simulations in parallel (on Windows, must have ``if __name__ == '__main__'`` gating the calling code)
         :param max_attempts: Number of retry attempts for bad initializations
         :param num_workers: If ``parallel`` is True, this determines the number of parallel workers to use (default is usually number of CPUs)
+        :param rng_sampler: Optional random number generator that may have been seeded to generate consistent results
+        :param acceptance_criteria: Optional criteria to assert that outputs are within a given tolerance of a given value for given parameters in given years
+            tolerance e.g.  = [{'parameter': 'incidence', 'population': 'adults', 'value': (0.02, 0.025), 't': 2018}]
+
+
         :return: A list of Results that can be passed to `Ensemble.update()`. If multiple instructions are provided, the return value of this
                  function will be a list of lists, where the inner list iterates over different instructions for the same parset/progset samples.
                  It is expected in that case that the Ensemble's mapping function would take in a list of results
@@ -563,17 +587,27 @@ def run_sampled_sims(self, parset, progset=None, progset_instructions=None, resu
 
         show_progress = n_samples > 1 and logger.getEffectiveLevel() <= logging.INFO
 
+        if rand_seed is not None:
+            rng = np.random.default_rng(seed=rand_seed)
+            seed_samples = rng.integers(1e15, size=n_samples)
+        else:
+            seed_samples = [None] * n_samples
+
+        model_rngs = [np.random.default_rng(seed=seed) for seed in seed_samples]  # generate a RNG for each model
+
         if parallel:
-            fcn = functools.partial(_run_sampled_sim, proj=self, parset=parset, progset=progset, progset_instructions=progset_instructions, result_names=result_names, max_attempts=max_attempts)
-            results = parallel_progress(fcn, n_samples, show_progress=show_progress, num_workers=num_workers)
+            fcn = functools.partial(_run_sampled_sim, proj=self, parset=parset, progset=progset, progset_instructions=progset_instructions, result_names=result_names, max_attempts=max_attempts, acceptance_criteria=acceptance_criteria)
+            # as multiprocessing does not handle partial functions as compiled functions, need to send the rngs as kwargs in a dictionary, not as args to the partial function
+            model_rng_kwargs = [{"rng_sampler": rng} for rng in model_rngs]
+            results = parallel_progress(fcn, model_rng_kwargs, show_progress=show_progress, num_workers=num_workers)
         elif show_progress:
             # Print the progress bar if the logging level was INFO or lower
             # This means that the user can still set the logging level higher e.g. WARNING to suppress output from Atomica in general
             # (including any progress bars)
             with Quiet():
-                results = [_run_sampled_sim(self, parset, progset, progset_instructions, result_names, max_attempts=max_attempts) for _ in tqdm.trange(n_samples)]
+                results = [_run_sampled_sim(self, parset, progset, progset_instructions, result_names, max_attempts=max_attempts, rng_sampler=rng, acceptance_criteria=acceptance_criteria) for rng in tqdm.tqdm(model_rngs)]
         else:
-            results = [_run_sampled_sim(self, parset, progset, progset_instructions, result_names, max_attempts=max_attempts) for _ in range(n_samples)]
+            results = [_run_sampled_sim(self, parset, progset, progset_instructions, result_names, max_attempts=max_attempts, rng_sampler=rng, acceptance_criteria=acceptance_criteria) for rng in model_rngs]
 
         return results
 
@@ -715,7 +749,7 @@ def __setstate__(self, d):
         self.__dict__ = P.__dict__
 
 
-def _run_sampled_sim(proj, parset, progset, progset_instructions: list, result_names: list, max_attempts: int = None):
+def _run_sampled_sim(proj, parset, progset, progset_instructions: list, result_names: list, max_attempts: int = None, rng_sampler=None, acceptance_criteria=[]):
     """
     Internal function to run simulation with sampling
 
@@ -736,6 +770,9 @@ def _run_sampled_sim(proj, parset, progset, progset_instructions: list, result_n
     :param progset_instructions: A list of instructions to run against a single sample
     :param result_names: A list of result names (strings)
     :param max_attempts: Maximum number of sampling attempts before raising an error
+    :param rng_sampler: Optional random number generator that may have been seeded to generate consistent results
+    :param acceptance_criteria: Optional criteria to assert that outputs are within a given tolerance of a given value for given parameters over a given period of time
+        tolerance e.g.  = [{'parameter': 'incidence', 'population': 'adults', 'value': (0.02, 0.025), 't_range': (2018, 2018.99)}]
     :return: A list of results that either contains 1 result, or the same number of results as instructions
 
     """
@@ -745,16 +782,23 @@ def _run_sampled_sim(proj, parset, progset, progset_instructions: list, result_n
     if max_attempts is None:
         max_attempts = 50
 
+    if rng_sampler is None:
+        rng_sampler = np.random.default_rng()
+
+    # Set up separate seeds for each attempt and seed a rng for each one (in case scenarios diverge in later timepoints, it's important to START from the same seeds)
+    rand_seeds = rng_sampler.integers(1e15, size=max_attempts)
+
     attempts = 0
     while attempts < max_attempts:
         try:
+            rng_attempt = np.random.default_rng(seed=rand_seeds[attempts])
+            sampled_parset = parset.sample(rng_sampler=rng_attempt)
             if progset:
-                sampled_parset = parset.sample()
-                sampled_progset = progset.sample()
-                results = [proj.run_sim(parset=sampled_parset, progset=sampled_progset, progset_instructions=x, result_name=y) for x, y in zip(progset_instructions, result_names)]
+                sampled_progset = progset.sample(rng_sampler=rng_attempt)
+                results = [proj.run_sim(parset=sampled_parset, progset=sampled_progset, progset_instructions=x, result_name=y, rng=rng_attempt, acceptance_criteria=acceptance_criteria) for x, y in zip(progset_instructions, result_names)]
             else:
-                sampled_parset = parset.sample()
-                results = [proj.run_sim(parset=sampled_parset, result_name=y) for y in result_names]
+                results = [proj.run_sim(parset=sampled_parset, result_name=y, rng=rng_attempt, acceptance_criteria=acceptance_criteria) for y in result_names]
+
             return results
         except BadInitialization:
             attempts += 1
diff --git a/atomica/results.py b/atomica/results.py
index 11aa0775..2034e0b8 100644
--- a/atomica/results.py
+++ b/atomica/results.py
@@ -838,30 +838,21 @@ def _write_df(writer, formats, sheet_name, df, level_ordering):
     for level in level_ordering:
         order[level] = list(dict.fromkeys(df.index.get_level_values(level)))
 
-    required_width = [0] * (len(level_ordering) - 1)
-
-    row = 0
+    df = df.reorder_levels(level_ordering)
+    for i in range(len(level_ordering)):
+        df = df.reindex(order[level_ordering[i]], level=i)
 
     worksheet = writer.book.add_worksheet(sheet_name)
     writer.sheets[sheet_name] = worksheet  # Need to add it to the ExcelWriter for it to behave properly
 
-    for title, table in df.groupby(level=level_ordering[0], sort=False):
-        worksheet.write_string(row, 0, title, formats["center_bold"])
-        row += 1
-
-        table.reset_index(level=level_ordering[0], drop=True, inplace=True)  # Drop the title column
-        table = table.reorder_levels(level_ordering[1:])
-        for i in range(1, len(level_ordering)):
-            table = table.reindex(order[level_ordering[i]], level=i - 1)
-        table.index = table.index.set_names([level_substitutions[x] if x in level_substitutions else x.title() for x in table.index.names])
-        table.to_excel(writer, sheet_name=sheet_name, startcol=0, startrow=row, merge_cells=False)
-        row += table.shape[0] + 2
+    df.index = df.index.set_names([level_substitutions[x] if x in level_substitutions else x.title() for x in df.index.names])
+    df.to_excel(writer, sheet_name, startcol=0, startrow=0, merge_cells=False)
 
-        required_width[0] = max(required_width[0], len(title))
-        for i in range(0, len(required_width)):
-            required_width[i] = max(required_width[i], max(table.index.get_level_values(i).str.len()))
+    required_width = [len(name) for name in df.index.names]
+    for i in range(len(required_width)):
+        required_width[i] = max(len(val) for val in order[level_ordering[i]])
 
-    for i in range(0, len(required_width)):
+    for i in range(len(required_width)):
         if required_width[i] > 0:
             worksheet.set_column(i, i, required_width[i] * 1.1 + 1)
 
@@ -1229,7 +1220,7 @@ def plot_series(self, fig=None, style="quartile", results=None, outputs=None, po
 
         if fig is None:
             fig = plt.figure()
-        ax = plt.gca()
+        ax = fig.gca()
 
         series_lookup = self._get_series()
 
@@ -1264,7 +1255,7 @@ def plot_series(self, fig=None, style="quartile", results=None, outputs=None, po
 
             proposed_label = "%s (%s)" % (output, these_series[0].unit_string)
             if ax.yaxis.get_label().get_text():
-                assert proposed_label == ax.yaxis.get_label().get_text(), "The outputs being superimposed have different units"
+                assert proposed_label == ax.yaxis.get_label().get_text(), f"The outputs being superimposed have different units: {proposed_label} vs {ax.yaxis.get_label().get_text()}"
             else:
                 ax.set_ylabel(proposed_label)
 
diff --git a/atomica/system.py b/atomica/system.py
index c72ab22a..46587e6a 100644
--- a/atomica/system.py
+++ b/atomica/system.py
@@ -85,7 +85,7 @@ class FrameworkSettings:
     DEFAULT_SYMBOL_INAPPLICABLE = "N.A."
     DEFAULT_POP_TYPE = "default"
 
-    RESERVED_KEYWORDS = ["t", "flow", "all", "dt", "total"]  # A code_name in the framework cannot be equal to one of these values
+    RESERVED_KEYWORDS = ["t", "flow", "all", "dt", "total", "constant"]  # A code_name in the framework cannot be equal to one of these values
     RESERVED_KEYWORDS += supported_functions.keys()
 
     RESERVED_SYMBOLS = set(":,;/+-*'\" @")  # A code_name in the framework (for characs, comps, pars) cannot contain any of these characters
diff --git a/atomica/utils.py b/atomica/utils.py
index ee342e54..5b532388 100644
--- a/atomica/utils.py
+++ b/atomica/utils.py
@@ -587,7 +587,7 @@ def interpolate(self, t2: np.array, method="linear", **kwargs) -> np.array:
         interpolator = method(t1, v1, **kwargs)
         return interpolator(t2)
 
-    def sample(self, constant=True):
+    def sample(self, constant: bool = True, rng_sampler=None):
         """
         Return a sampled copy of the TimeSeries
 
@@ -596,6 +596,8 @@ def sample(self, constant=True):
 
         :param constant: If True, time series will be perturbed by a single constant offset. If False,
                          an different perturbation will be applied to each time specific value independently.
+        :param rng_sampler: Optional random number generator that may have been seeded to generate consistent results
+
         :return: A copied ``TimeSeries`` with perturbed values
 
         """
@@ -603,9 +605,12 @@ def sample(self, constant=True):
         if self._sampled:
             raise Exception("Sampling has already been performed - can only sample once")
 
+        if rng_sampler is None:
+            rng_sampler = np.random  #
+
         new = self.copy()
         if self.sigma is not None:
-            delta = self.sigma * np.random.randn(1)[0]
+            delta = self.sigma * rng_sampler.standard_normal(1)[0]
             if self.assumption is not None:
                 new.assumption += delta
 
@@ -614,7 +619,7 @@ def sample(self, constant=True):
                 new.vals = [v + delta for v in new.vals]
             else:
                 # Sample again for each data point
-                for i, (v, delta) in enumerate(zip(new.vals, self.sigma * np.random.randn(len(new.vals)))):
+                for i, (v, delta) in enumerate(zip(new.vals, self.sigma * rng_sampler.standard_normal(len(new.vals)))):
                     new.vals[i] = v + delta
 
         # Sampling flag only needs to be set if the TimeSeries had data to change
@@ -841,7 +846,10 @@ def callback(result, idx):
             jobs.append(pool.apply_async(fcn, callback=partial(callback, idx=i)))
     else:
         for i, x in enumerate(inputs):
-            jobs.append(pool.apply_async(fcn, args=(x,), callback=partial(callback, idx=i)))
+            if isinstance(x, dict):
+                pool.apply_async(fcn, args=(), kwds=x, callback=partial(callback, idx=i))  # if the list of inputs is given as a dictionary then pass as kwargs (kwds) instead
+            else:
+                pool.apply_async(fcn, args=(x,), callback=partial(callback, idx=i))
 
     pool.close()
     pool.join()
@@ -982,3 +990,17 @@ def stop_logging() -> None:
             logger.removeHandler(handler)
             # Don't terminate the loop, if by some change there is more than one handler
             # (not supposed to happen though) then we would want to close them all
+
+
+def stochastic_rounding(x, rng_sampler=None):
+    """
+    Stochastically round a float up or down to an integer-equivalent value (note: returns still as a float)
+    :param x: value to be rounded
+    """
+    sample = np.random.random() if rng_sampler is None else rng_sampler.random()
+
+    floor = np.floor(x)
+    remainder = x - floor
+    if remainder:
+        x = floor + int(sample < remainder)
+    return x
diff --git a/atomica/version.py b/atomica/version.py
index 3cc178d9..235b678a 100644
--- a/atomica/version.py
+++ b/atomica/version.py
@@ -6,6 +6,6 @@
 
 import sciris as sc
 
-version = "1.28.4"
-versiondate = "2024-05-27"
+version = "1.28.5"
+versiondate = "2024-06-28"
 gitinfo = sc.gitinfo(__file__)
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 4f7ed50a..50fc1d9c 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -9,8 +9,8 @@ schedules:
 jobs:
 - job: 'tox'
   variables:
-    python.version: '3.11'
-    TOXENV: 'py311'
+    python.version: '3.12'
+    TOXENV: 'py312'
   strategy:
     matrix:
       linux:
@@ -52,7 +52,7 @@ jobs:
     - task: UsePythonVersion@0
       displayName: 'Select python version'
       inputs:
-        versionSpec: '3.11'
+        versionSpec: '3.12'
     - script: python -m pip install flake8 flake8-junit-report
       displayName: 'Install flake8'
     - script: flake8 atomica tests --exit-zero --output-file flake8.txt
@@ -75,7 +75,7 @@ jobs:
     - task: UsePythonVersion@0
       displayName: 'Select python version'
       inputs:
-        versionSpec: '3.11'
+        versionSpec: '3.12'
     - script: |
         sudo apt install pandoc -y
       displayName: 'Install pandoc'
@@ -113,7 +113,7 @@ jobs:
     - task: UsePythonVersion@0
       displayName: 'Select python version'
       inputs:
-        versionSpec: '3.11'
+        versionSpec: '3.12'
     - script: |
         python -m pip install wheel
         python -m pip install twine
diff --git a/docs/examples/Uncertainty.ipynb b/docs/examples/Uncertainty.ipynb
index daef6d9d..fa652784 100644
--- a/docs/examples/Uncertainty.ipynb
+++ b/docs/examples/Uncertainty.ipynb
@@ -95,7 +95,7 @@
     "import matplotlib.pyplot as plt\n",
     "from scipy import stats\n",
     "import sciris as sc\n",
-    "np.random.seed(3)\n",
+    "np.random.seed(5)\n",
     "testdir = '../../tests/'\n",
     "\n",
     "P = at.Project(framework=testdir + 'test_uncertainty_framework.xlsx', databook=testdir + 'test_uncertainty_databook.xlsx')\n",
@@ -146,7 +146,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "If there is uncertainty, we can obtain a sampled epidemiological prediction by sampling from all of the quantities containing uncertinty. For example, replacing the value of `214` above with a sample from the distribution $\\ \\mathcal{N}(214,20)$ - along with all of the other quantities. The input to the model is effectively a parameter set derived from the original set of parameters, but with values replaced by sampled values where appropriate. You can obtain a sampled parameter set simply by calling the `ParameterSet.sample()` method:"
+    "If there is uncertainty, we can obtain a sampled epidemiological prediction by sampling from all of the quantities containing uncertainty. For example, replacing the value of `214` above with a sample from the distribution $\\ \\mathcal{N}(214,20)$ - along with all of the other quantities. The input to the model is effectively a parameter set derived from the original set of parameters, but with values replaced by sampled values where appropriate. You can obtain a sampled parameter set simply by calling the `ParameterSet.sample()` method:"
    ]
   },
   {
@@ -1035,9 +1035,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [conda env:atomica37]",
+   "display_name": "Python [conda env:atomica312]",
    "language": "python",
-   "name": "conda-env-atomica37-py"
+   "name": "conda-env-atomica312-py"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1049,7 +1049,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.12.2"
   },
   "toc": {
    "base_numbering": 1,
diff --git a/docs/tutorial/T2-Calibration.ipynb b/docs/tutorial/T2-Calibration.ipynb
index df6bdbd5..9c0dbab2 100644
--- a/docs/tutorial/T2-Calibration.ipynb
+++ b/docs/tutorial/T2-Calibration.ipynb
@@ -83,7 +83,7 @@
     "</tbody>\n",
     "</table>\n",
     "    \n",
-    "An example of a suitable parameter for databook calibration is `relative seasonality of mosquito population size` - no hard data can exist, but it might be possible to calculate proxy values from other data such as rainfall patterns in different years, adjust annual values manually to match historical epidemic patterns, and then assume a median value for future years. Having this assumption in the databook allows for those calculations to be used as a starting point in the databook before manually editing, and allows for comparability with other projects that use the same parameter. \n",
+    "An example of a suitable parameter for databook calibration is `relative seasonality of mosquito population size` - it is unlikely that any existing data would be able to directly provide a value for this parameter, but it might be possible to calculate proxy values from other data such as rainfall patterns in different years, adjust annual values manually to match historical epidemic patterns, and then assume a median value for future years. Having this assumption in the databook allows for those calculations to be used as a starting point in the databook before manually editing, and allows for comparability with other projects that use the same parameter. \n",
     "\n",
     "<div class=\"alert alert-block alert-success\">\n",
     "<b>Suggestion:</b> When designing a databook, it is recommended that <b>all</b> parameters intended for explicit databook calibration are placed on a single 'Calibration' sheet to provide clarity about what is data and what are calibrated assumptions.\n",
@@ -91,14 +91,22 @@
     "\n",
     "An example of a suitable parameter for scale factor calibration is `treatment initiation` used to determine model transitions from diagnosed to treated - a country has reported data for the number of people initiating treatment, and it is important to accurately maintain the official numbers in the databook. Nevertheless, there may be systematic under-reporting by an unknown degree and we want to account for those additional treatments in the model to ensure outcomes are representative, so it is appropriate to adjust the scale factor.\n",
     "\n",
-    "An example of a parameter that could be adjusted in either way depending on the circumstances or just personal preference would be `force of infection` - this is a clear calibration parameter that is not based on data, and could be adjusted in a databook if it's necessary to reflect changing circumstances external to the model over time, calibrated automatically with a scale factor via the `calibrate` function below in order to achieve the best fit to data, or even a mixture of the two methods.\n",
+    "An example of a parameter that could be adjusted in either way depending on the circumstances or just personal preference would be `force of infection` - this is a clear calibration parameter that is not based on data, and could be adjusted in a databook if it's necessary to reflect changing circumstances external to the model over time, calibrated automatically with a scale factor via the `calibrate` function below in order to achieve the best fit to data, or even a mixture of the two methods."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Calibration scale factors\n",
     "\n",
+    "The values in the databook are intended to serve as a record of the original input data used to inform the model. Therefore, if a parameter needs to be scaled up or down to calibrate the model, it is typically preferable to do this by setting a 'scale factor' (also referred to as a 'y-factor') in the `ParameterSet`, rather than changing the data. \n",
     "\n",
     "<div class=\"alert alert-block alert-info\">\n",
     "The web interfaces (such as the Cascade Analysis Tool) perform calibration using scale factors. The scale factors shown on the website correspond to the values being set here.\n",
     "</div>\n",
     "\n",
-    "To set a scale factor, create a `ParameterSet` either by copying an existing one, or creating a new one. Then, access the `pars` attribute to look up the parameter you wish to change, and set the `y_factor` for the population you want to change:"
+    "To set a scale factor, create a `ParameterSet` either by copying an existing one, or by creating a new one. Then, access the `pars` attribute to look up the parameter you wish to change, and set the `y_factor` for the population you want to change:"
    ]
   },
   {
@@ -107,7 +115,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "p2 = P.parsets[0].copy()\n",
+    "p2 = P.parsets[0].copy() # Copy an existing ParameterSet\n",
     "p2.pars['b_rate'].y_factor['adults'] = 2"
    ]
   },
@@ -133,7 +141,206 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can see that we have considerably overshot the data, indicating that doubling the birth rate was much too big a change. This would typically be the first step in an iterative process, where you adjust the scale factor, inspect the data, and then make further adjustments. \n",
+    "We can see that we have considerably overshot the data, indicating that doubling the birth rate was much too big a change. This would typically be the first step in an iterative process, where you adjust the scale factor, inspect the data, and then make further adjustments. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Calibrating compartment sizes\n",
+    "\n",
+    "The `ParameterSet` contains parameters for quantities entered on the 'Compartments' and 'Characteristics' sheet of the databook. These are used to calculate the initial compartment sizes used at the start of the simulation. Y-factors can be set for these quantities, the same as for any other parameter. This will not change the data points - just the calculated initial compartment size. For example, we can initialize the model with half as many susceptible people by setting a y-factor of 0.5 on the `sus` compartment size:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p2 = at.ParameterSet(P.framework, P.data) # Make a new ParameterSet\n",
+    "p2.pars['sus'].y_factor['adults'] = 0.5 \n",
+    "r2 = P.run_sim(parset=p2,result_name = 'Decreased initial sus')\n",
+    "d = at.PlotData([result,r2], outputs='sus',project=P)\n",
+    "at.plot_series(d,axis='results',data=P.data);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Unlike setting y-factors for parameters, y-factors for compartment sizes may result in invalid initializations. This can occur if the scaled compartment sizes or characteristics are inconsistent with each other. For example, if the calibrated number of initial susceptible people results in needing more people than the total population size, an error will occur:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p2 = at.ParameterSet(P.framework, P.data) # Make a new ParameterSet\n",
+    "p2.pars['sus'].y_factor['adults'] = 1.1\n",
+    "try:\n",
+    "    r2 = P.run_sim(parset=p2,result_name = 'Decreased initial sus')\n",
+    "except Exception as e:\n",
+    "    print(e)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this case, such an error could be resolved by also increasing the y-factor for the 'alive' characteristic, thereby increasing the total population size:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p2 = at.ParameterSet(P.framework, P.data) # Make a new ParameterSet\n",
+    "p2.pars['sus'].y_factor['adults'] = 1.1\n",
+    "p2.pars['alive'].y_factor['adults'] = 1.2\n",
+    "r2 = P.run_sim(parset=p2,result_name = 'Increased initial sus')\n",
+    "d = at.PlotData([result,r2], outputs=['sus','alive'],project=P)\n",
+    "at.plot_series(d,axis='results',data=P.data);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "See [https://atomica.tools/docs/master/general/Compartment-Initialization.html](https://atomica.tools/docs/master/general/Compartment-Initialization.html) for more information on how the initialization of compartment sizes is carried out by Atomica. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Calibrating transfers\n",
+    "\n",
+    "It is also possible to set y-factors to calibrate transfers, that move people from one population to another. A transfer moves people from one population to another e.g., aging from 5-14 to 15-64. Therefore, a y-factor for a transfer must be identified by two populations rather than one. Further, the transfer parameter does not appear in the framework, because they are population-specific. Instead, the transfer is defined in the databook, and then a corresponding parameter is automatically created in the `ParameterSet`. For example, we can load the TB demo in Atomica and inspect the transfers:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "P2 = at.demo('tb')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the databook, there are two transfers, relating to aging and incarceration:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "P2.data.transfers"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "These both appear in the `ParameterSet` under the 'transfers' attribute:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "P2.parsets[0].transfers.keys()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To set a scale factor, it is necessary to define both the source and destination population. The transfer is indexed first by the source population, and then by the 'to' population. The top level key in the transfer corresponds to the source population:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "P2.parsets[0].transfers['age'].keys() # Source populations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If we index the transfers using this code name, we can access a `Parameter` that contains `y_factor` entries for all of the destination populations."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "P2.parsets[0].transfers['age']['5-14']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Notice how this parameter only contains y-factors for the destination populations associated with the 'age' transfer out of the '5-14' population"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "P2.parsets[0].transfers['age']['5-14'].y_factor"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This corresponds to the fact that the aging transfer can only move people in 5-14 population to the 15-64 population. To set a y-factor for this transfer, assign a value to this entry in the `y_factor` attribute:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "P2.parsets[0].transfers['age']['5-14'].y_factor['15-64'] = 1.5"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The example above will increase the transfer rate for aging from 5-14 to 15-64. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Automatic calibration\n",
     "\n",
     "Automated calibration is also available via the project's `calibrate` method. This will automatically adjust parameter values to match the data. To use this function, you need to specify which parameters to set scale factors for, and which variables in the databook to compute calibration quality from. The framework can provide defaults for which parameters to automatically calibrate, or you can pass a list of those parameters in directly. In this example, we will pass in `b_rate` because we want to adjust the birth rate, and we will use `sus` as a measurable since we want to match the number of susceptible people. The configuration therefore corresponds to the example shown above. "
    ]
@@ -182,6 +389,15 @@
     "at.plot_series(d,axis='results',data=P.data);"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Automated calibration of transfers\n",
+    "\n",
+    "As described above, transfers are identified by two populations rather than one. The 'adjustable' for a transfer parameter is therefore identified by concatenating the name of the transfer and the name of the source population, separated by `'_from_'`. For example, we could specify an adjustable of `'age_from_5-14'` to calibrate the aging transfer out of the 5-14 population. "
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -284,9 +500,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [conda env:atomica37]",
+   "display_name": "Python [conda env:atomica312]",
    "language": "python",
-   "name": "conda-env-atomica37-py"
+   "name": "conda-env-atomica312-py"
   },
   "language_info": {
    "codemirror_mode": {
@@ -298,7 +514,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.12.2"
   },
   "toc": {
    "base_numbering": 1,
diff --git a/setup.py b/setup.py
index 675bf903..eb71024a 100644
--- a/setup.py
+++ b/setup.py
@@ -21,8 +21,13 @@
     "Operating System :: OS Independent",
     "Programming Language :: Python",
     "Topic :: Software Development :: Libraries :: Python Modules",
-    "Development Status :: 3 - Alpha",
+    "Development Status :: 4 - Beta",
     "Programming Language :: Python :: 3.7",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
 ]
 
 setup(
@@ -41,8 +46,8 @@
     include_package_data=True,
     install_requires=[
         "matplotlib",
-        "numpy>=1.10.1",
-        "scipy>=1.2.1",
+        "numpy>=1.10.1,<2",
+        "scipy>=1.6",
         "pandas",
         "xlsxwriter",
         "openpyxl",
diff --git a/tests/conftest.py b/tests/conftest.py
index 9934aed3..3439bfe3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,3 +6,12 @@
 def close_figures():
     yield
     plt.close("all")
+
+
+# conftest.py
+def pytest_collection_modifyitems(items):
+    for i, item in enumerate(items):
+        if item.name == "test_model[tb]":
+            items.pop(i)
+            items.insert(0, item)
+            break
diff --git a/tests/test_tox_library.py b/tests/test_tox_library.py
index 704742dc..16e21d58 100644
--- a/tests/test_tox_library.py
+++ b/tests/test_tox_library.py
@@ -173,7 +173,7 @@ def run_regenerated_framework(proj):
 # each function call as a separate test, which can be run in parallel
 # simply by adding `pytest -n 4` for instance. Or via tox
 # `tox -- -o -n 4` (because the default config is to use n=2 for TravisCI)
-@pytest.mark.parametrize("model", models)
+@pytest.mark.parametrize("model", at.demo_projects)
 def test_model(model):
 
     framework_file = at.LIBRARY_PATH / f"{model}_framework.xlsx"
diff --git a/tests/test_tox_markovchain.py b/tests/test_tox_markovchain.py
new file mode 100644
index 00000000..c526cea2
--- /dev/null
+++ b/tests/test_tox_markovchain.py
@@ -0,0 +1,121 @@
+import atomica as at
+import numpy as np
+import os
+import matplotlib.pyplot as plt
+import sciris as sc
+import pytest
+
+
+dirname = at.parent_dir()
+# np.seterr(all='raise')
+
+
+@pytest.mark.parametrize("demo", at.demo_projects)
+def test_functional_all_framework_databook(demo):
+    # Test that all demo frameworks can be run with stochastic mode turned on
+    P = at.demo(demo, do_run=False)
+    P.settings.multinomial = True
+    P.run_sim(result_name="DTMC")
+
+
+@pytest.mark.parametrize("demo", ["sir", "udt", "tb_simple"])  # This test is expensive so only run it for a few lightweight models
+def test_compare_deterministic_markovchain(demo):
+
+    seed = 0
+    N = 1000
+
+    y_cutoff = 10  # (deterministic) Compartment values below this will not be considered for the comparisons
+    index_cutoff = 1500
+    y_diff_cutoff = 0
+
+    ratio_cutoff = 0.05
+
+    def filter_points(t, det, mc, mc_mean):
+        # t_fil, det_fil, mc_fil = sc.dcp(t), sc.dcp(det), sc.dcp(mc)
+
+        indices = np.ones(t.shape)
+        indices = np.logical_and(indices, det >= y_cutoff)
+        indices = np.logical_and(indices, np.arange(len(indices)) <= index_cutoff)
+        indices = np.logical_and(indices, np.abs(det - mc_mean) > y_diff_cutoff)
+
+        return t[indices], det[indices], mc[:, indices], mc_mean[indices]
+
+    def get_array_outputs(results, output, pop):
+        extracted = [None] * len(results)
+        for i, res in enumerate(results):
+            extracted[i] = res.model.get_pop(pop).get_variable(output)[0].vals
+
+        array = np.stack(extracted, axis=0)
+        t = results[0].model.get_pop(pop).get_variable(output)[0].t
+        return t, array
+
+    def get_average_output(results, output, pop):
+        t, array = get_array_outputs(results, output, pop)
+        average = np.mean(array, axis=0)
+
+        return t, average, array
+
+    def do_hypothesis_test(det_array, mc_results):
+        import scipy
+
+        res = scipy.stats.ttest_1samp(mc_results, det_array, keepdims=True)
+        return res
+
+    def run_simple_percent_test(P, baseline_results, mc_results):
+        baseline_results = sc.promotetolist(baseline_results)
+        mc_results = sc.promotetolist(mc_results)
+
+        compartment_names = [name for name in baseline_results[0].model.framework.comps.index.values]
+
+        max_max_diff = 0
+
+        for compartment_name in compartment_names:
+            for pop in baseline_results[0].pop_names:
+                try:
+                    baseline_results[0].model.get_pop(pop).get_variable(compartment_name)[0].vals
+                except:
+                    continue  # This population doesn't have this compartment
+
+                t, mc_mean, mc_array = get_average_output(mc_results, compartment_name, pop)
+                _, det_mean, _ = get_average_output(baseline_results, compartment_name, pop)
+
+                t, det_mean, mc_array, mc_mean = filter_points(t, det_mean, mc_array, mc_mean)
+
+                if len(t) == 0:  # No filtered outputs so skip this output, pop combo
+                    continue
+
+                indices = det_mean != 0
+                ratio = np.ones(mc_mean.shape)
+                ratio[indices] = mc_mean[indices] / det_mean[indices]
+                if min(ratio) < 1 - ratio_cutoff or max(ratio) > 1 + ratio_cutoff:
+                    print("RATIO", compartment_name, pop, min(ratio), max(ratio), list(zip(ratio, det_mean, mc_mean)))
+                    indices = np.logical_or(ratio < 1 - ratio_cutoff, ration > 1 + ratio_cutoff)
+
+                    raise Exception(f"The Markov Chain runs (N={len(mc_results)} differed too much from the baseline runs (N={len(baseline_results)} at starting at timestep={indices[0]}, t={t[indices[0]]}, with compartment={compartment_name}, population={pop}. Baseline={det_mean[indices[0]]}, MC={mc_mean[indices[0]]}, difference={100*(mc_mean[indices[0]]/det_mean[indices[0]] - 1):.2f}")
+
+                max_diff = np.max(np.abs(ratio - 1))
+                max_max_diff = max(max_diff, max_max_diff)
+
+        print(f"Success: max difference {max_max_diff*100:.2f}%")
+
+    seed = np.random.default_rng(seed).integers(2**32 - 1)
+
+    P = at.demo(demo, do_run=False)
+    res = P.run_sim(result_name="Original")
+
+    baseline_results = [res]
+
+    P.settings.multinomial = True
+    mc_results = [None] * N
+    with at.Quiet():  # Using at.Quiet should automatically reset the logging level if an exception occurs
+        for i in range(N):
+            this_seed = (i + seed + hash(demo)) % (2**32)
+            mc_results[i] = P.run_sim(result_name=f"DTMC {i}", rng=this_seed)
+
+    run_simple_percent_test(P, baseline_results=baseline_results, mc_results=mc_results)
+
+
+if __name__ == "__main__":
+    for demo in at.demo_projects:
+        test_functional_all_framework_databook()
+    test_compare_deterministic_markovchain("sir")
diff --git a/tests/test_uncertainty_framework.xlsx b/tests/test_uncertainty_framework.xlsx
index 120b1735..37073ea5 100644
Binary files a/tests/test_uncertainty_framework.xlsx and b/tests/test_uncertainty_framework.xlsx differ
diff --git a/tests/text_tox_timed_initialization.py b/tests/text_tox_timed_initialization.py
index 66782372..80e0ff48 100644
--- a/tests/text_tox_timed_initialization.py
+++ b/tests/text_tox_timed_initialization.py
@@ -18,6 +18,7 @@
 testdir = at.parent_dir()
 tmpdir = testdir / "temp"
 
+
 def test_timed_initialization():
 
     F = at.ProjectFramework(testdir / "timed_initialization_test_framework.xlsx")
@@ -32,17 +33,16 @@ def set_initialization_basic(F, D, year, y_factor=True):
         ps2 = at.ParameterSet(F, D, "basic")
 
         # Set initial compartment sizes for initialization quantities
-        for qty in itertools.chain(F.characs.index[F.characs['setup weight'] > 0], F.comps.index[F.comps['setup weight'] > 0]):
+        for qty in itertools.chain(F.characs.index[F.characs["setup weight"] > 0], F.comps.index[F.comps["setup weight"] > 0]):
             for pop in D.pops:
                 if y_factor:
                     ps2.pars[qty].meta_y_factor = 1
-                    ps2.pars[qty].y_factor[pop] = res.get_variable(qty, pop)[0].vals[-1]/ps2.pars[qty].ts[pop].interpolate(year)
+                    ps2.pars[qty].y_factor[pop] = res.get_variable(qty, pop)[0].vals[-1] / ps2.pars[qty].ts[pop].interpolate(year)
                 else:
                     ps2.pars[qty].ts[pop].insert(year, res.get_variable(qty, pop)[0].vals[-1])
 
         return ps2
 
-
     P = at.Project(framework=F, databook=D, do_run=False)
     P.settings.sim_dt = 1 / 12
     P.settings.sim_start = 2018
@@ -52,24 +52,25 @@ def set_initialization_basic(F, D, year, y_factor=True):
 
     # Basic steady state initialization
     ps = set_initialization_basic(F, D, 2018)
-    res2 = P.run_sim(ps, result_name='basic')
+    res2 = P.run_sim(ps, result_name="basic")
 
     # Advanced steady state initialization
-    ps2 = at.ParameterSet(F,D)
+    ps2 = at.ParameterSet(F, D)
     ps2.set_initialization(res1, 2021)
-    res3 = P.run_sim(ps2, result_name='advanced')
+    res3 = P.run_sim(ps2, result_name="advanced")
 
-    ps2.pars['alive'].y_factor[0] = 1.1
+    ps2.pars["alive"].y_factor[0] = 1.1
 
-    ps2.save_calibration(tmpdir/'test_cal.xlsx')
-    res3 = P.run_sim(ps2, result_name='advanced')
+    ps2.save_calibration(tmpdir / "test_cal.xlsx")
+    res3 = P.run_sim(ps2, result_name="advanced")
 
-    ps4 = P.make_parset('saved')
-    ps4.load_calibration(tmpdir/'test_cal.xlsx')
-    res4 = P.run_sim(ps4, result_name='loaded')
+    ps4 = P.make_parset("saved")
+    ps4.load_calibration(tmpdir / "test_cal.xlsx")
+    res4 = P.run_sim(ps4, result_name="loaded")
 
     d = at.PlotData([res1, res2, res3, res4], ["sus", "inf", "rec"])
     at.plot_series(d)
 
-if __name__ == '__main__':
-    test_timed_initialization()
\ No newline at end of file
+
+if __name__ == "__main__":
+    test_timed_initialization()