From 02c9b17c22604f9b19d731151f6bb17c642d8672 Mon Sep 17 00:00:00 2001
From: Dan Hatton <daniel.hatton@diamond.ac.uk>
Date: Tue, 15 Oct 2024 09:42:07 +0100
Subject: [PATCH 01/91] The machine config for the instrument is already
 accessed via key

---
 src/murfey/server/api/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/murfey/server/api/__init__.py b/src/murfey/server/api/__init__.py
index 89a1b5cb..c6e24c9c 100644
--- a/src/murfey/server/api/__init__.py
+++ b/src/murfey/server/api/__init__.py
@@ -167,8 +167,8 @@ def get_instrument_display_name(instrument_name: str) -> str:
     machine_config = get_machine_config(instrument_name=instrument_name)[
         instrument_name
     ]
-    if machine_config.get(instrument_name):
-        return machine_config[instrument_name].display_name
+    if machine_config:
+        return machine_config.display_name
     return ""
 
 

From 3d747ad5269c75a17661aa813fab5c6aaf274584 Mon Sep 17 00:00:00 2001
From: Dan Hatton <daniel.hatton@diamond.ac.uk>
Date: Tue, 15 Oct 2024 09:45:41 +0100
Subject: [PATCH 02/91] No longer strictly an int

---
 src/murfey/server/websocket.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/murfey/server/websocket.py b/src/murfey/server/websocket.py
index 0680c778..f6f5302a 100644
--- a/src/murfey/server/websocket.py
+++ b/src/murfey/server/websocket.py
@@ -137,7 +137,7 @@ async def websocket_connection_endpoint(
             except Exception:
                 await manager.broadcast(f"Client #{client_id} sent message {data}")
     except WebSocketDisconnect:
-        log.info(f"Disconnecting Client {int(sanitise(str(client_id)))}")
+        log.info(f"Disconnecting Client {sanitise(str(client_id))}")
         manager.disconnect(websocket, client_id, unregister_client=False)
         await manager.broadcast(f"Client #{client_id} disconnected")
         await manager.delete_state(f"Client {client_id}")

From 66a424ddbfd6a705f19224a8f30a4f5bb0421b02 Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Thu, 17 Oct 2024 09:21:19 +0100
Subject: [PATCH 03/91] Allow spaces in directory names when looking for gain
 references

---
 src/murfey/instrument_server/api.py | 6 +++---
 src/murfey/util/__init__.py         | 7 +++++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/murfey/instrument_server/api.py b/src/murfey/instrument_server/api.py
index cae6c3d3..a1e7b74a 100644
--- a/src/murfey/instrument_server/api.py
+++ b/src/murfey/instrument_server/api.py
@@ -237,9 +237,9 @@ def get_possible_gain_references(
         headers={"Authorization": f"Bearer {tokens[session_id]}"},
     ).json()
     candidates = []
-    for gf in secure_path(Path(machine_config["gain_reference_directory"])).glob(
-        "**/*"
-    ):
+    for gf in secure_path(
+        Path(machine_config["gain_reference_directory"]), keep_spaces=True
+    ).glob("**/*"):
         if gf.is_file():
             candidates.append(
                 File(
diff --git a/src/murfey/util/__init__.py b/src/murfey/util/__init__.py
index 382f0226..c5222bd1 100644
--- a/src/murfey/util/__init__.py
+++ b/src/murfey/util/__init__.py
@@ -62,8 +62,11 @@ def sanitise_nonpath(in_string: str) -> str:
     return in_string
 
 
-def secure_path(in_path: Path) -> Path:
-    secured_parts = [secure_filename(p) for p in in_path.parts]
+def secure_path(in_path: Path, keep_spaces: bool = False) -> Path:
+    if keep_spaces:
+        secured_parts = [secure_filename(p) for p in in_path.parts if " " not in p]
+    else:
+        secured_parts = [secure_filename(p) for p in in_path.parts]
     return Path("/".join(secured_parts))
 
 

From c18c7820135455126905e55d8b19d7e7a6beb5d5 Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Thu, 17 Oct 2024 09:38:46 +0100
Subject: [PATCH 04/91] Fix for instrument card display now that the whole
 machine configuration is pulled by the server

---
 src/murfey/server/api/display.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/murfey/server/api/display.py b/src/murfey/server/api/display.py
index 6c5c82d2..8f51548a 100644
--- a/src/murfey/server/api/display.py
+++ b/src/murfey/server/api/display.py
@@ -13,10 +13,10 @@
 machine_config = get_machine_config()
 
 
-@router.get("/microscope_image/")
-def get_mic_image():
-    if machine_config.image_path:
-        return FileResponse(machine_config.image_path)
+@router.get("/instruments/{instrument_name}/image/")
+def get_mic_image(instrument_name: str):
+    if machine_config[instrument_name].image_path:
+        return FileResponse(machine_config[instrument_name].image_path)
     return None
 
 

From 29f50ac143f8b67157debc1952c8030cc43444e5 Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Thu, 17 Oct 2024 16:52:09 +0100
Subject: [PATCH 05/91] Ignore parts with spaces, don't remove them

---
 src/murfey/util/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/murfey/util/__init__.py b/src/murfey/util/__init__.py
index c5222bd1..2d8e0aac 100644
--- a/src/murfey/util/__init__.py
+++ b/src/murfey/util/__init__.py
@@ -64,7 +64,9 @@ def sanitise_nonpath(in_string: str) -> str:
 
 def secure_path(in_path: Path, keep_spaces: bool = False) -> Path:
     if keep_spaces:
-        secured_parts = [secure_filename(p) for p in in_path.parts if " " not in p]
+        secured_parts = [
+            secure_filename(p) if " " not in p else p for p in in_path.parts
+        ]
     else:
         secured_parts = [secure_filename(p) for p in in_path.parts]
     return Path("/".join(secured_parts))

From 8e4b0e24222ce64f7a2f21f766c5d12c52b19637 Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Thu, 17 Oct 2024 16:55:37 +0100
Subject: [PATCH 06/91] Remove unused function

---
 src/murfey/util/__init__.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/src/murfey/util/__init__.py b/src/murfey/util/__init__.py
index 2d8e0aac..78ccc3a5 100644
--- a/src/murfey/util/__init__.py
+++ b/src/murfey/util/__init__.py
@@ -8,7 +8,7 @@
 import logging
 import os
 import shutil
-from functools import lru_cache, partial
+from functools import partial
 from pathlib import Path
 from queue import Queue
 from threading import Thread
@@ -72,14 +72,6 @@ def secure_path(in_path: Path, keep_spaces: bool = False) -> Path:
     return Path("/".join(secured_parts))
 
 
-@lru_cache(maxsize=1)
-def get_machine_config(url: str, instrument_name: str = "", demo: bool = False) -> dict:
-    _instrument_name: str | None = instrument_name or os.getenv("BEAMLINE")
-    if not _instrument_name:
-        return {}
-    return requests.get(f"{url}/instruments/{_instrument_name}/machine").json()
-
-
 def _get_visit_list(api_base: ParseResult, instrument_name: str):
     get_visits_url = api_base._replace(
         path=f"/instruments/{instrument_name}/visits_raw"

From 60ec9a1437a4ded993b22e23250cd75497a667c4 Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Thu, 17 Oct 2024 17:11:03 +0100
Subject: [PATCH 07/91] THe function wasn't unused but should be renamed

---
 src/murfey/client/analyser.py              |  8 ++++----
 src/murfey/client/contexts/clem.py         |  4 ++--
 src/murfey/client/contexts/spa.py          |  8 ++++----
 src/murfey/client/contexts/spa_metadata.py |  4 ++--
 src/murfey/client/contexts/tomo.py         |  6 +++---
 src/murfey/client/tui/app.py               | 10 +++++-----
 src/murfey/client/tui/screens.py           | 14 +++++++-------
 src/murfey/util/__init__.py                | 12 +++++++++++-
 8 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/src/murfey/client/analyser.py b/src/murfey/client/analyser.py
index 1be2c21f..5ae587a0 100644
--- a/src/murfey/client/analyser.py
+++ b/src/murfey/client/analyser.py
@@ -22,7 +22,7 @@
 from murfey.client.instance_environment import MurfeyInstanceEnvironment
 from murfey.client.rsync import RSyncerUpdate, TransferResult
 from murfey.client.tui.forms import FormDependency
-from murfey.util import Observer, get_machine_config
+from murfey.util import Observer, get_machine_config_client
 from murfey.util.models import PreprocessingParametersTomo, ProcessingParametersSPA
 
 logger = logging.getLogger("murfey.client.analyser")
@@ -73,7 +73,7 @@ def __init__(
         self._stopping = False
         self._halt_thread = False
         self._murfey_config = (
-            get_machine_config(
+            get_machine_config_client(
                 str(environment.url.geturl()),
                 instrument_name=environment.instrument_name,
                 demo=environment.demo,
@@ -145,7 +145,7 @@ def _find_context(self, file_path: Path) -> bool:
             and self._environment
         ):
             created_directories = set(
-                get_machine_config(
+                get_machine_config_client(
                     str(self._environment.url.geturl()),
                     instrument_name=self._environment.instrument_name,
                     demo=self._environment.demo,
@@ -165,7 +165,7 @@ def _find_context(self, file_path: Path) -> bool:
                     logger.info("Acquisition software: EPU")
                     if self._environment:
                         try:
-                            cfg = get_machine_config(
+                            cfg = get_machine_config_client(
                                 str(self._environment.url.geturl()),
                                 instrument_name=self._environment.instrument_name,
                                 demo=self._environment.demo,
diff --git a/src/murfey/client/contexts/clem.py b/src/murfey/client/contexts/clem.py
index d0ef8b64..2ce45ad2 100644
--- a/src/murfey/client/contexts/clem.py
+++ b/src/murfey/client/contexts/clem.py
@@ -14,7 +14,7 @@
 
 from murfey.client.context import Context
 from murfey.client.instance_environment import MurfeyInstanceEnvironment
-from murfey.util import capture_post, get_machine_config
+from murfey.util import capture_post, get_machine_config_client
 
 # Create logger object
 logger = logging.getLogger("murfey.client.contexts.clem")
@@ -26,7 +26,7 @@ def _file_transferred_to(
     """
     Returns the Path of the transferred file on the DLS file system.
     """
-    machine_config = get_machine_config(
+    machine_config = get_machine_config_client(
         str(environment.url.geturl()),
         instrument_name=environment.instrument_name,
         demo=environment.demo,
diff --git a/src/murfey/client/contexts/spa.py b/src/murfey/client/contexts/spa.py
index 2077f751..29e13949 100644
--- a/src/murfey/client/contexts/spa.py
+++ b/src/murfey/client/contexts/spa.py
@@ -19,7 +19,7 @@
     authorised_requests,
     capture_get,
     capture_post,
-    get_machine_config,
+    get_machine_config_client,
 )
 
 logger = logging.getLogger("murfey.client.contexts.spa")
@@ -105,7 +105,7 @@ def _get_grid_square_atlas_positions(
 def _file_transferred_to(
     environment: MurfeyInstanceEnvironment, source: Path, file_path: Path
 ):
-    machine_config = get_machine_config(
+    machine_config = get_machine_config_client(
         str(environment.url.geturl()),
         instrument_name=environment.instrument_name,
         demo=environment.demo,
@@ -684,7 +684,7 @@ def post_transfer(
             if transferred_file.suffix in data_suffixes:
                 if self._acquisition_software == "epu":
                     if environment:
-                        machine_config = get_machine_config(
+                        machine_config = get_machine_config_client(
                             str(environment.url.geturl()),
                             instrument_name=environment.instrument_name,
                             demo=environment.demo,
@@ -872,7 +872,7 @@ def _register_processing_job(
         parameters = parameters or {}
         environment.id_tag_registry["processing_job"].append(tag)
         proc_url = f"{str(environment.url.geturl())}/visits/{environment.visit}/{environment.murfey_session}/register_processing_job"
-        machine_config = get_machine_config(
+        machine_config = get_machine_config_client(
             str(environment.url.geturl()),
             instrument_name=environment.instrument_name,
             demo=environment.demo,
diff --git a/src/murfey/client/contexts/spa_metadata.py b/src/murfey/client/contexts/spa_metadata.py
index f3ca9ee2..4588df86 100644
--- a/src/murfey/client/contexts/spa_metadata.py
+++ b/src/murfey/client/contexts/spa_metadata.py
@@ -8,7 +8,7 @@
 from murfey.client.context import Context
 from murfey.client.contexts.spa import _get_grid_square_atlas_positions, _get_source
 from murfey.client.instance_environment import MurfeyInstanceEnvironment, SampleInfo
-from murfey.util import authorised_requests, capture_post, get_machine_config
+from murfey.util import authorised_requests, capture_post, get_machine_config_client
 
 logger = logging.getLogger("murfey.client.contexts.spa_metadata")
 
@@ -18,7 +18,7 @@
 def _atlas_destination(
     environment: MurfeyInstanceEnvironment, source: Path, file_path: Path
 ) -> Path:
-    machine_config = get_machine_config(
+    machine_config = get_machine_config_client(
         str(environment.url.geturl()),
         instrument_name=environment.instrument_name,
         demo=environment.demo,
diff --git a/src/murfey/client/contexts/tomo.py b/src/murfey/client/contexts/tomo.py
index 35224355..4f4e9dad 100644
--- a/src/murfey/client/contexts/tomo.py
+++ b/src/murfey/client/contexts/tomo.py
@@ -19,7 +19,7 @@
     MurfeyInstanceEnvironment,
     global_env_lock,
 )
-from murfey.util import authorised_requests, capture_post, get_machine_config
+from murfey.util import authorised_requests, capture_post, get_machine_config_client
 from murfey.util.mdoc import get_block, get_global_data, get_num_blocks
 
 logger = logging.getLogger("murfey.client.contexts.tomo")
@@ -341,7 +341,7 @@ def _complete_process_file(
     def _file_transferred_to(
         self, environment: MurfeyInstanceEnvironment, source: Path, file_path: Path
     ):
-        machine_config = get_machine_config(
+        machine_config = get_machine_config_client(
             str(environment.url.geturl()),
             instrument_name=environment.instrument_name,
             demo=environment.demo,
@@ -794,7 +794,7 @@ def post_transfer(
             if transferred_file.suffix in data_suffixes:
                 if self._acquisition_software == "tomo":
                     if environment:
-                        machine_config = get_machine_config(
+                        machine_config = get_machine_config_client(
                             str(environment.url.geturl()),
                             instrument_name=environment.instrument_name,
                             demo=environment.demo,
diff --git a/src/murfey/client/tui/app.py b/src/murfey/client/tui/app.py
index 9d313660..1af32cb9 100644
--- a/src/murfey/client/tui/app.py
+++ b/src/murfey/client/tui/app.py
@@ -34,7 +34,7 @@
 from murfey.client.watchdir_multigrid import MultigridDirWatcher
 from murfey.util import (
     capture_post,
-    get_machine_config,
+    get_machine_config_client,
     read_config,
     set_default_acquisition_output,
 )
@@ -104,7 +104,7 @@ def __init__(
         self._force_mdoc_metadata = force_mdoc_metadata
         self._strict = strict
         self._skip_existing_processing = skip_existing_processing
-        self._machine_config = get_machine_config(
+        self._machine_config = get_machine_config_client(
             str(self._environment.url.geturl()),
             instrument_name=self._environment.instrument_name,
             demo=self._environment.demo,
@@ -128,7 +128,7 @@ def _launch_multigrid_watcher(
         self, source: Path, destination_overrides: Dict[Path, str] | None = None
     ):
         log.info(f"Launching multigrid watcher for source {source}")
-        machine_config = get_machine_config(
+        machine_config = get_machine_config_client(
             str(self._environment.url.geturl()),
             instrument_name=self._environment.instrument_name,
             demo=self._environment.demo,
@@ -688,7 +688,7 @@ def on_log_book_log(self, message):
         self.log_book.write(message.renderable)
 
     async def reset(self):
-        machine_config = get_machine_config(
+        machine_config = get_machine_config_client(
             str(self._environment.url.geturl()),
             instrument_name=self._environment.instrument_name,
             demo=self._environment.demo,
@@ -743,7 +743,7 @@ def clean_up_quit(self) -> None:
         exit()
 
     async def action_clear(self) -> None:
-        machine_config = get_machine_config(
+        machine_config = get_machine_config_client(
             str(self._environment.url.geturl()),
             instrument_name=self._environment.instrument_name,
             demo=self._environment.demo,
diff --git a/src/murfey/client/tui/screens.py b/src/murfey/client/tui/screens.py
index 3f879138..9fa3f44e 100644
--- a/src/murfey/client/tui/screens.py
+++ b/src/murfey/client/tui/screens.py
@@ -56,7 +56,7 @@
 )
 from murfey.client.rsync import RSyncer
 from murfey.client.tui.forms import FormDependency
-from murfey.util import capture_post, get_machine_config, read_config
+from murfey.util import capture_post, get_machine_config_client, read_config
 from murfey.util.models import PreprocessingParametersTomo, ProcessingParametersSPA
 
 log = logging.getLogger("murfey.tui.screens")
@@ -262,7 +262,7 @@ def __init__(
         super().__init__(*args, **kwargs)
         self._selected_dir = basepath
         self._add_basepath = add_basepath
-        cfg = get_machine_config(
+        cfg = get_machine_config_client(
             str(self.app._environment.url.geturl()),
             instrument_name=self.app._environment.instrument_name,
             demo=self.app._environment.demo,
@@ -898,7 +898,7 @@ def on_button_pressed(self, event: Button.Pressed):
         visit_dir = Path(str(event.button.label)) / self.app._visit
         visit_dir.mkdir(exist_ok=True)
         self.app._set_default_acquisition_directories(visit_dir)
-        machine_config = get_machine_config(
+        machine_config = get_machine_config_client(
             str(self.app._environment.url.geturl()),
             instrument_name=self.app._environment.instrument_name,
             demo=self.app._environment.demo,
@@ -940,7 +940,7 @@ def compose(self):
             )
             yield RadioButton("Tomography", value=self._context is TomographyContext)
         if self.app._multigrid:
-            machine_config = get_machine_config(
+            machine_config = get_machine_config_client(
                 str(self.app._environment.url.geturl()),
                 instrument_name=self.app._environment.instrument_name,
             )
@@ -1000,7 +1000,7 @@ def compose(self):
                                 )
                             )
         else:
-            machine_config = get_machine_config(
+            machine_config = get_machine_config_client(
                 str(self.app._environment.url.geturl()),
                 instrument_name=self.app._environment.instrument_name,
             )
@@ -1031,7 +1031,7 @@ def compose(self):
                     i = Input(value=val, id=k.name, classes="input-destination")
                 params_bulk.append(i)
                 self._inputs[i] = k.name
-            machine_config = get_machine_config(
+            machine_config = get_machine_config_client(
                 str(self.app._environment.url.geturl()),
                 instrument_name=self.app._environment.instrument_name,
                 demo=self.app._environment.demo,
@@ -1083,7 +1083,7 @@ def on_switch_changed(self, event):
 
     def on_radio_set_changed(self, event: RadioSet.Changed) -> None:
         if event.index == 0:
-            cfg = get_machine_config(
+            cfg = get_machine_config_client(
                 str(self.app._environment.url.geturl()),
                 instrument_name=self.app._environment.instrument_name,
                 demo=self.app._environment.demo,
diff --git a/src/murfey/util/__init__.py b/src/murfey/util/__init__.py
index 78ccc3a5..da68c844 100644
--- a/src/murfey/util/__init__.py
+++ b/src/murfey/util/__init__.py
@@ -8,7 +8,7 @@
 import logging
 import os
 import shutil
-from functools import partial
+from functools import lru_cache, partial
 from pathlib import Path
 from queue import Queue
 from threading import Thread
@@ -40,6 +40,16 @@ def read_config() -> configparser.ConfigParser:
     return config
 
 
+@lru_cache(maxsize=1)
+def get_machine_config_client(
+    url: str, instrument_name: str = "", demo: bool = False
+) -> dict:
+    _instrument_name: str | None = instrument_name or os.getenv("BEAMLINE")
+    if not _instrument_name:
+        return {}
+    return requests.get(f"{url}/instruments/{_instrument_name}/machine").json()
+
+
 def authorised_requests() -> Tuple[Callable, Callable, Callable, Callable]:
     token = read_config()["Murfey"].get("token", "")
     _get = partial(requests.get, headers={"Authorization": f"Bearer {token}"})

From 7c6548f381467074d74e20f0f17faecc318bbc0b Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Thu, 17 Oct 2024 16:35:19 +0100
Subject: [PATCH 08/91] If there is no ISPyB connection then autogenerate data
 collection and processing job IDs in the Murfey database

---
 src/murfey/server/__init__.py | 193 +++++++++++++++++-----------------
 src/murfey/server/demo_api.py |  43 ++++----
 src/murfey/server/ispyb.py    |  20 +++-
 3 files changed, 134 insertions(+), 122 deletions(-)

diff --git a/src/murfey/server/__init__.py b/src/murfey/server/__init__.py
index 6448a137..7ec89d89 100644
--- a/src/murfey/server/__init__.py
+++ b/src/murfey/server/__init__.py
@@ -2572,17 +2572,23 @@ def feedback_callback(header: dict, message: dict) -> None:
             ).all():
                 dcgid = dcg_murfey[0].id
             else:
-                record = DataCollectionGroup(
-                    sessionId=ispyb_session_id,
-                    experimentType=message["experiment_type"],
-                    experimentTypeId=message["experiment_type_id"],
-                )
-                dcgid = _register(record, header)
-                murfey_dcg = db.DataCollectionGroup(
-                    id=dcgid,
-                    session_id=message["session_id"],
-                    tag=message.get("tag"),
-                )
+                if ispyb_session_id is None:
+                    murfey_dcg = db.DataCollectionGroup(
+                        session_id=message["session_id"],
+                        tag=message.get("tag"),
+                    )
+                else:
+                    record = DataCollectionGroup(
+                        sessionId=ispyb_session_id,
+                        experimentType=message["experiment_type"],
+                        experimentTypeId=message["experiment_type_id"],
+                    )
+                    dcgid = _register(record, header)
+                    murfey_dcg = db.DataCollectionGroup(
+                        id=dcgid,
+                        session_id=message["session_id"],
+                        tag=message.get("tag"),
+                    )
                 murfey_db.add(murfey_dcg)
                 murfey_db.commit()
                 murfey_db.close()
@@ -2635,60 +2641,66 @@ def feedback_callback(header: dict, message: dict) -> None:
             ).all():
                 dcid = dc_murfey[0].id
             else:
-                record = DataCollection(
-                    SESSIONID=ispyb_session_id,
-                    experimenttype=message["experiment_type"],
-                    imageDirectory=message["image_directory"],
-                    imageSuffix=message["image_suffix"],
-                    voltage=message["voltage"],
-                    dataCollectionGroupId=dcgid,
-                    pixelSizeOnImage=message["pixel_size"],
-                    imageSizeX=message["image_size_x"],
-                    imageSizeY=message["image_size_y"],
-                    slitGapHorizontal=message.get("slit_width"),
-                    magnification=message.get("magnification"),
-                    exposureTime=message.get("exposure_time"),
-                    totalExposedDose=message.get("total_exposed_dose"),
-                    c2aperture=message.get("c2aperture"),
-                    phasePlate=int(message.get("phase_plate", 0)),
-                )
-                dcid = _register(
-                    record,
-                    header,
-                    tag=(
-                        message.get("tag")
-                        if message["experiment_type"] == "tomography"
-                        else ""
-                    ),
-                )
-                murfey_dc = db.DataCollection(
-                    id=dcid,
-                    tag=message.get("tag"),
-                    dcg_id=dcgid,
-                )
+                if ispyb_session_id is None:
+                    murfey_dc = db.DataCollection(
+                        tag=message.get("tag"),
+                        dcg_id=dcgid,
+                    )
+                else:
+                    record = DataCollection(
+                        SESSIONID=ispyb_session_id,
+                        experimenttype=message["experiment_type"],
+                        imageDirectory=message["image_directory"],
+                        imageSuffix=message["image_suffix"],
+                        voltage=message["voltage"],
+                        dataCollectionGroupId=dcgid,
+                        pixelSizeOnImage=message["pixel_size"],
+                        imageSizeX=message["image_size_x"],
+                        imageSizeY=message["image_size_y"],
+                        slitGapHorizontal=message.get("slit_width"),
+                        magnification=message.get("magnification"),
+                        exposureTime=message.get("exposure_time"),
+                        totalExposedDose=message.get("total_exposed_dose"),
+                        c2aperture=message.get("c2aperture"),
+                        phasePlate=int(message.get("phase_plate", 0)),
+                    )
+                    dcid = _register(
+                        record,
+                        header,
+                        tag=(
+                            message.get("tag")
+                            if message["experiment_type"] == "tomography"
+                            else ""
+                        ),
+                    )
+                    murfey_dc = db.DataCollection(
+                        id=dcid,
+                        tag=message.get("tag"),
+                        dcg_id=dcgid,
+                    )
                 murfey_db.add(murfey_dc)
                 murfey_db.commit()
+                dcid = murfey_dc.id
                 murfey_db.close()
             if dcid is None and _transport_object:
                 _transport_object.transport.nack(header, requeue=True)
                 return None
-            if global_state.get("data_collection_ids") and isinstance(
-                global_state["data_collection_ids"], dict
-            ):
-                global_state["data_collection_ids"] = {
-                    **global_state["data_collection_ids"],
-                    message.get("tag"): dcid,
-                }
-            else:
-                global_state["data_collection_ids"] = {message.get("tag"): dcid}
             if _transport_object:
                 _transport_object.transport.ack(header)
             return None
         elif message["register"] == "processing_job":
             logger.info("registering processing job")
             assert isinstance(global_state["data_collection_ids"], dict)
-            _dcid = global_state["data_collection_ids"].get(message["tag"])
-            if _dcid is None:
+            dc = murfey_db.exec(
+                select(db.DataCollection, db.DataCollectionGroup)
+                .where(db.DataCollection.dcg_id == db.DataCollectionGroup.id)
+                .where(db.DataCollectionGroup.session_id == murfey_session_id)
+                .where(db.DataCollectionGroup.tag == message["source"])
+                .where(db.DataCollection.tag == message["tag"])
+            ).all()
+            if dc:
+                _dcid = dc[0][0].id
+            else:
                 logger.warning(f"No data collection ID found for {message['tag']}")
                 if _transport_object:
                     _transport_object.transport.nack(header, requeue=True)
@@ -2700,38 +2712,33 @@ def feedback_callback(header: dict, message: dict) -> None:
             ).all():
                 pid = pj_murfey[0].id
             else:
-                record = ProcessingJob(dataCollectionId=_dcid, recipe=message["recipe"])
-                run_parameters = message.get("parameters", {})
-                assert isinstance(run_parameters, dict)
-                if message.get("job_parameters"):
-                    job_parameters = [
-                        ProcessingJobParameter(parameterKey=k, parameterValue=v)
-                        for k, v in message["job_parameters"].items()
-                    ]
-                    pid = _register(ExtendedRecord(record, job_parameters), header)
+                if murfey.server.ispyb.Session() is None:
+                    murfey_pj = db.ProcessingJob(recipe=message["recipe"], dc_id=_dcid)
                 else:
-                    pid = _register(record, header)
-                murfey_pj = db.ProcessingJob(
-                    id=pid, recipe=message["recipe"], dc_id=_dcid
-                )
+                    record = ProcessingJob(
+                        dataCollectionId=_dcid, recipe=message["recipe"]
+                    )
+                    run_parameters = message.get("parameters", {})
+                    assert isinstance(run_parameters, dict)
+                    if message.get("job_parameters"):
+                        job_parameters = [
+                            ProcessingJobParameter(parameterKey=k, parameterValue=v)
+                            for k, v in message["job_parameters"].items()
+                        ]
+                        pid = _register(ExtendedRecord(record, job_parameters), header)
+                    else:
+                        pid = _register(record, header)
+                    murfey_pj = db.ProcessingJob(
+                        id=pid, recipe=message["recipe"], dc_id=_dcid
+                    )
                 murfey_db.add(murfey_pj)
                 murfey_db.commit()
+                pid = murfey_pj.id
                 murfey_db.close()
             if pid is None and _transport_object:
                 _transport_object.transport.nack(header, requeue=True)
                 return None
             prom.preprocessed_movies.labels(processing_job=pid)
-            if global_state.get("processing_job_ids"):
-                global_state["processing_job_ids"] = {
-                    **global_state["processing_job_ids"],  # type: ignore
-                    message.get("tag"): {
-                        **global_state["processing_job_ids"].get(message.get("tag"), {}),  # type: ignore
-                        message["recipe"]: pid,
-                    },
-                }
-            else:
-                prids = {message["tag"]: {message["recipe"]: pid}}
-                global_state["processing_job_ids"] = prids
             if message.get("job_parameters"):
                 if _transport_object:
                     _transport_object.transport.ack(header)
@@ -2741,30 +2748,20 @@ def feedback_callback(header: dict, message: dict) -> None:
             ).all():
                 appid = app_murfey[0].id
             else:
-                record = AutoProcProgram(
-                    processingJobId=pid, processingStartTime=datetime.now()
-                )
-                appid = _register(record, header)
-                if appid is None and _transport_object:
-                    _transport_object.transport.nack(header, requeue=True)
-                    return None
-                murfey_app = db.AutoProcProgram(id=appid, pj_id=pid)
+                if murfey.server.ispyb.Session() is None:
+                    murfey_app = db.AutoProcProgram(pj_id=pid)
+                else:
+                    record = AutoProcProgram(
+                        processingJobId=pid, processingStartTime=datetime.now()
+                    )
+                    appid = _register(record, header)
+                    if appid is None and _transport_object:
+                        _transport_object.transport.nack(header, requeue=True)
+                        return None
+                    murfey_app = db.AutoProcProgram(id=appid, pj_id=pid)
                 murfey_db.add(murfey_app)
                 murfey_db.commit()
                 murfey_db.close()
-            if global_state.get("autoproc_program_ids"):
-                assert isinstance(global_state["autoproc_program_ids"], dict)
-                global_state["autoproc_program_ids"] = {
-                    **global_state["autoproc_program_ids"],
-                    message.get("tag"): {
-                        **global_state["autoproc_program_ids"].get(message.get("tag"), {}),  # type: ignore
-                        message["recipe"]: appid,
-                    },
-                }
-            else:
-                global_state["autoproc_program_ids"] = {
-                    message["tag"]: {message["recipe"]: appid}
-                }
             if _transport_object:
                 _transport_object.transport.ack(header)
             return None
diff --git a/src/murfey/server/demo_api.py b/src/murfey/server/demo_api.py
index 7f209d36..eab41858 100644
--- a/src/murfey/server/demo_api.py
+++ b/src/murfey/server/demo_api.py
@@ -843,26 +843,31 @@ def register_tilt(visit_name: str, client_id: int, tilt_info: TiltInfo, db=murfe
     db.commit()
 
 
+# @router.get("/instruments/{instrument_name}/visits_raw", response_model=List[Visit])
+# def get_current_visits(instrument_name: str):
+#     return [
+#         Visit(
+#             start=datetime.datetime.now(),
+#             end=datetime.datetime.now() + datetime.timedelta(days=1),
+#             session_id=1,
+#             name="cm31111-2",
+#             beamline="m12",
+#             proposal_title="Nothing of importance",
+#         ),
+#         Visit(
+#             start=datetime.datetime.now(),
+#             end=datetime.datetime.now() + datetime.timedelta(days=1),
+#             session_id=1,
+#             name="cm31111-3",
+#             beamline="m12",
+#             proposal_title="Nothing of importance",
+#         ),
+#     ]
+
+
 @router.get("/instruments/{instrument_name}/visits_raw", response_model=List[Visit])
-def get_current_visits(instrument_name: str):
-    return [
-        Visit(
-            start=datetime.datetime.now(),
-            end=datetime.datetime.now() + datetime.timedelta(days=1),
-            session_id=1,
-            name="cm31111-2",
-            beamline="m12",
-            proposal_title="Nothing of importance",
-        ),
-        Visit(
-            start=datetime.datetime.now(),
-            end=datetime.datetime.now() + datetime.timedelta(days=1),
-            session_id=1,
-            name="cm31111-3",
-            beamline="m12",
-            proposal_title="Nothing of importance",
-        ),
-    ]
+def get_current_visits(instrument_name: str, db=murfey.server.ispyb.DB):
+    return murfey.server.ispyb.get_all_ongoing_visits(instrument_name, db)
 
 
 @router.get("/visits/{visit_name}")
diff --git a/src/murfey/server/ispyb.py b/src/murfey/server/ispyb.py
index bf5284e3..3e31342d 100644
--- a/src/murfey/server/ispyb.py
+++ b/src/murfey/server/ispyb.py
@@ -2,6 +2,7 @@
 
 import datetime
 import logging
+import os
 from typing import Callable, List, Optional
 
 import ispyb
@@ -36,7 +37,7 @@
         bind=sqlalchemy.create_engine(url(), connect_args={"use_pure": True})
     )
 except AttributeError:
-    Session = None
+    Session = lambda: None
 
 
 def _send_using_new_connection(transport_type: str, queue: str, message: dict) -> None:
@@ -56,7 +57,7 @@ def __init__(self, transport_type):
         self.transport = workflows.transport.lookup(transport_type)()
         self.transport.connect()
         self.feedback_queue = ""
-        self.ispyb = ispyb.open()
+        self.ispyb = ispyb.open() if os.getenv("ISYPB_CREDENTIALS") else None
         self._connection_callback: Callable | None = None
 
     def reconnect(self):
@@ -273,6 +274,9 @@ def do_buffer_lookup(self, app_id: int, uuid: int) -> Optional[int]:
 
 def _get_session() -> sqlalchemy.orm.Session:
     db = Session()
+    if db is None:
+        yield None
+        return
     try:
         yield db
     finally:
@@ -288,8 +292,10 @@ def get_session_id(
     proposal_code: str,
     proposal_number: str,
     visit_number: str,
-    db: sqlalchemy.orm.Session,
-) -> int:
+    db: sqlalchemy.orm.Session | None,
+) -> int | None:
+    if db is None:
+        return None
     query = (
         db.query(BLSession)
         .join(Proposal)
@@ -347,7 +353,11 @@ def get_sub_samples_from_visit(visit: str, db: sqlalchemy.orm.Session) -> List[S
     return res
 
 
-def get_all_ongoing_visits(microscope: str, db: sqlalchemy.orm.Session) -> list[Visit]:
+def get_all_ongoing_visits(
+    microscope: str, db: sqlalchemy.orm.Session | None
+) -> list[Visit]:
+    if db is None:
+        return []
     query = (
         db.query(BLSession)
         .join(Proposal)

From a1996a93bb21fe98e53acf70771480fb74885979 Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Thu, 17 Oct 2024 17:26:20 +0100
Subject: [PATCH 09/91] Setup for multigrid controller to be able to skip the
 rsync step

---
 src/murfey/client/multigrid_control.py | 111 +++++++++++++++----------
 1 file changed, 67 insertions(+), 44 deletions(-)

diff --git a/src/murfey/client/multigrid_control.py b/src/murfey/client/multigrid_control.py
index 6c28b205..4174a6dd 100644
--- a/src/murfey/client/multigrid_control.py
+++ b/src/murfey/client/multigrid_control.py
@@ -173,6 +173,7 @@ def _start_rsyncer(
         remove_files: bool = False,
         tag: str = "",
         limited: bool = False,
+        transfer: bool = True,
     ):
         log.info(f"starting rsyncer: {source}")
         if self._environment:
@@ -189,47 +190,48 @@ def _start_rsyncer(
                     log.warning(
                         f"Gain reference file {self._environment.gain_ref} was not successfully transferred to {visit_path}/processing"
                     )
-        self.rsync_processes[source] = RSyncer(
-            source,
-            basepath_remote=Path(destination),
-            server_url=self._environment.url,
-            stop_callback=self._rsyncer_stopped,
-            do_transfer=self.do_transfer,
-            remove_files=remove_files,
-        )
+        if transfer:
+            self.rsync_processes[source] = RSyncer(
+                source,
+                basepath_remote=Path(destination),
+                server_url=self._environment.url,
+                stop_callback=self._rsyncer_stopped,
+                do_transfer=self.do_transfer,
+                remove_files=remove_files,
+            )
 
-        def rsync_result(update: RSyncerUpdate):
-            if not update.base_path:
-                raise ValueError("No base path from rsyncer update")
-            if not self.rsync_processes.get(update.base_path):
-                raise ValueError("TUI rsync process does not exist")
-            if update.outcome is TransferResult.SUCCESS:
-                # log.info(
-                #     f"File {str(update.file_path)!r} successfully transferred ({update.file_size} bytes)"
-                # )
-                pass
-            else:
-                log.warning(f"Failed to transfer file {str(update.file_path)!r}")
-                self.rsync_processes[update.base_path].enqueue(update.file_path)
+            def rsync_result(update: RSyncerUpdate):
+                if not update.base_path:
+                    raise ValueError("No base path from rsyncer update")
+                if not self.rsync_processes.get(update.base_path):
+                    raise ValueError("TUI rsync process does not exist")
+                if update.outcome is TransferResult.SUCCESS:
+                    # log.info(
+                    #     f"File {str(update.file_path)!r} successfully transferred ({update.file_size} bytes)"
+                    # )
+                    pass
+                else:
+                    log.warning(f"Failed to transfer file {str(update.file_path)!r}")
+                    self.rsync_processes[update.base_path].enqueue(update.file_path)
 
-        self.rsync_processes[source].subscribe(rsync_result)
-        self.rsync_processes[source].subscribe(
-            partial(
-                self._increment_transferred_files,
-                destination=destination,
-                source=str(source),
-            ),
-            secondary=True,
-        )
-        url = f"{str(self._environment.url.geturl())}/sessions/{str(self._environment.murfey_session)}/rsyncer"
-        rsyncer_data = {
-            "source": str(source),
-            "destination": destination,
-            "session_id": self.session_id,
-            "transferring": self.do_transfer or self._environment.demo,
-            "tag": tag,
-        }
-        requests.post(url, json=rsyncer_data)
+            self.rsync_processes[source].subscribe(rsync_result)
+            self.rsync_processes[source].subscribe(
+                partial(
+                    self._increment_transferred_files,
+                    destination=destination,
+                    source=str(source),
+                ),
+                secondary=True,
+            )
+            url = f"{str(self._environment.url.geturl())}/sessions/{str(self._environment.murfey_session)}/rsyncer"
+            rsyncer_data = {
+                "source": str(source),
+                "destination": destination,
+                "session_id": self.session_id,
+                "transferring": self.do_transfer or self._environment.demo,
+                "tag": tag,
+            }
+            requests.post(url, json=rsyncer_data)
         self._environment.watchers[source] = DirWatcher(source, settling_time=30)
 
         if not self.analysers.get(source) and analyse:
@@ -254,15 +256,36 @@ def rsync_result(update: RSyncerUpdate):
             else:
                 self.analysers[source].subscribe(self._data_collection_form)
             self.analysers[source].start()
-            self.rsync_processes[source].subscribe(self.analysers[source].enqueue)
+            if transfer:
+                self.rsync_processes[source].subscribe(self.analysers[source].enqueue)
 
-        self.rsync_processes[source].start()
+        if transfer:
+            self.rsync_processes[source].start()
 
         if self._environment:
             if self._environment.watchers.get(source):
-                self._environment.watchers[source].subscribe(
-                    self.rsync_processes[source].enqueue
-                )
+                if transfer:
+                    self._environment.watchers[source].subscribe(
+                        self.rsync_processes[source].enqueue
+                    )
+                else:
+                    # the watcher and rsyncer don't notify with the same object so conversion required here
+                    def _rsync_update_converter(p: Path) -> None:
+                        self.analysers[source].enqueue(
+                            RSyncerUpdate(
+                                file_path=p,
+                                file_size=0,
+                                outcome=TransferResult.SUCCESS,
+                                transfer_total=0,
+                                queue_size=0,
+                                base_path=source,
+                            )
+                        )
+                        return None
+
+                    self._environment.watchers[source].subscribe(
+                        _rsync_update_converter
+                    )
                 self._environment.watchers[source].subscribe(
                     partial(
                         self._increment_file_count,

From df0719c83a8741ec8166c67f8c8ca73e14fbc2f5 Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Fri, 18 Oct 2024 09:13:42 +0100
Subject: [PATCH 10/91] Make data transfer option configurable

---
 src/murfey/client/multigrid_control.py | 1 +
 src/murfey/util/config.py              | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/murfey/client/multigrid_control.py b/src/murfey/client/multigrid_control.py
index 4174a6dd..22176c9a 100644
--- a/src/murfey/client/multigrid_control.py
+++ b/src/murfey/client/multigrid_control.py
@@ -133,6 +133,7 @@ def _start_rsyncer_multigrid(
             remove_files=remove_files,
             tag=tag,
             limited=limited,
+            transfer=machine_data.get("data_transfer_enabled", True),
         )
         self.ws.send(json.dumps({"message": "refresh"}))
 
diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 2481bfbc..d73b31d7 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -35,6 +35,7 @@ class MachineConfig(BaseModel):
     data_required_substrings: Dict[str, Dict[str, List[str]]] = {}
     allow_removal: bool = False
     modular_spa: bool = False
+    data_transfer_enabled: bool = True
     processing_enabled: bool = True
     machine_override: str = ""
     processed_extra_directory: str = ""

From 42984f17c66490d6cb373cb74a6b2a04d9e38673 Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Fri, 18 Oct 2024 13:34:11 +0100
Subject: [PATCH 11/91] Extend data transfer option configuration to TUI

---
 src/murfey/client/tui/app.py | 127 +++++++++++++++++++++--------------
 1 file changed, 76 insertions(+), 51 deletions(-)

diff --git a/src/murfey/client/tui/app.py b/src/murfey/client/tui/app.py
index 1af32cb9..12605168 100644
--- a/src/murfey/client/tui/app.py
+++ b/src/murfey/client/tui/app.py
@@ -192,6 +192,7 @@ def _start_rsyncer_multigrid(
             analyse=analyse,
             remove_files=remove_files,
             limited=limited,
+            transfer=machine_data.get("data_transfer_enabled", True),
         )
 
     def _start_rsyncer(
@@ -203,6 +204,7 @@ def _start_rsyncer(
         analyse: bool = True,
         remove_files: bool = False,
         limited: bool = False,
+        transfer: bool = True,
     ):
         log.info(f"starting rsyncer: {source}")
         if self._environment:
@@ -219,55 +221,57 @@ def _start_rsyncer(
                     log.warning(
                         f"Gain reference file {self._environment.gain_ref} was not successfully transferred to {visit_path}/processing"
                     )
-        self.rsync_processes[source] = RSyncer(
-            source,
-            basepath_remote=Path(destination),
-            server_url=self._url,
-            # local=self._environment.demo,
-            status_bar=self._statusbar,
-            do_transfer=self._do_transfer,
-            required_substrings_for_removal=self._data_substrings,
-            remove_files=remove_files,
-        )
+        if transfer:
+            self.rsync_processes[source] = RSyncer(
+                source,
+                basepath_remote=Path(destination),
+                server_url=self._url,
+                # local=self._environment.demo,
+                status_bar=self._statusbar,
+                do_transfer=self._do_transfer,
+                required_substrings_for_removal=self._data_substrings,
+                remove_files=remove_files,
+            )
 
-        def rsync_result(update: RSyncerUpdate):
-            if not update.base_path:
-                raise ValueError("No base path from rsyncer update")
-            if not self.rsync_processes.get(update.base_path):
-                raise ValueError("TUI rsync process does not exist")
-            if update.outcome is TransferResult.SUCCESS:
-                log.debug(
-                    f"Succesfully transferred file {str(update.file_path)!r} ({update.file_size} bytes)"
+            def rsync_result(update: RSyncerUpdate):
+                if not update.base_path:
+                    raise ValueError("No base path from rsyncer update")
+                if not self.rsync_processes.get(update.base_path):
+                    raise ValueError("TUI rsync process does not exist")
+                if update.outcome is TransferResult.SUCCESS:
+                    log.debug(
+                        f"Succesfully transferred file {str(update.file_path)!r} ({update.file_size} bytes)"
+                    )
+                    # pass
+                else:
+                    log.warning(f"Failed to transfer file {str(update.file_path)!r}")
+                    self.rsync_processes[update.base_path].enqueue(update.file_path)
+
+            self.rsync_processes[source].subscribe(rsync_result)
+            self.rsync_processes[source].subscribe(
+                partial(
+                    self._increment_transferred_files_prometheus,
+                    destination=destination,
+                    source=str(source),
                 )
-                # pass
-            else:
-                log.warning(f"Failed to transfer file {str(update.file_path)!r}")
-                self.rsync_processes[update.base_path].enqueue(update.file_path)
-
-        self.rsync_processes[source].subscribe(rsync_result)
-        self.rsync_processes[source].subscribe(
-            partial(
-                self._increment_transferred_files_prometheus,
-                destination=destination,
-                source=str(source),
             )
-        )
-        self.rsync_processes[source].subscribe(
-            partial(
-                self._increment_transferred_files,
-                destination=destination,
-                source=str(source),
-            ),
-            secondary=True,
-        )
-        url = f"{str(self._url.geturl())}/sessions/{str(self._environment.murfey_session)}/rsyncer"
-        rsyncer_data = {
-            "source": str(source),
-            "destination": destination,
-            "session_id": self._environment.murfey_session,
-            "transferring": self._do_transfer,
-        }
-        requests.post(url, json=rsyncer_data)
+            self.rsync_processes[source].subscribe(
+                partial(
+                    self._increment_transferred_files,
+                    destination=destination,
+                    source=str(source),
+                ),
+                secondary=True,
+            )
+            url = f"{str(self._url.geturl())}/sessions/{str(self._environment.murfey_session)}/rsyncer"
+            rsyncer_data = {
+                "source": str(source),
+                "destination": destination,
+                "session_id": self._environment.murfey_session,
+                "transferring": self._do_transfer,
+            }
+            requests.post(url, json=rsyncer_data)
+
         self._environment.watchers[source] = DirWatcher(source, settling_time=30)
 
         if not self.analysers.get(source) and analyse:
@@ -295,15 +299,36 @@ def rsync_result(update: RSyncerUpdate):
             else:
                 self.analysers[source].subscribe(self._data_collection_form)
             self.analysers[source].start()
-            self.rsync_processes[source].subscribe(self.analysers[source].enqueue)
+            if transfer:
+                self.rsync_processes[source].subscribe(self.analysers[source].enqueue)
 
-        self.rsync_processes[source].start()
+        if transfer:
+            self.rsync_processes[source].start()
 
         if self._environment:
             if self._environment.watchers.get(source):
-                self._environment.watchers[source].subscribe(
-                    self.rsync_processes[source].enqueue
-                )
+                if transfer:
+                    self._environment.watchers[source].subscribe(
+                        self.rsync_processes[source].enqueue
+                    )
+                else:
+
+                    def _rsync_update_converter(p: Path) -> None:
+                        self.analysers[source].enqueue(
+                            RSyncerUpdate(
+                                file_path=p,
+                                file_size=0,
+                                outcome=TransferResult.SUCCESS,
+                                transfer_total=0,
+                                queue_size=0,
+                                base_path=source,
+                            )
+                        )
+                        return None
+
+                    self._environment.watchers[source].subscribe(
+                        _rsync_update_converter
+                    )
                 self._environment.watchers[source].subscribe(
                     partial(
                         self._increment_file_count,

From 600d71f247367d7c328a0ed01ad4563b4d8419ac Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Mon, 21 Oct 2024 12:36:20 +0100
Subject: [PATCH 12/91] Add visit creation option in TUI

---
 src/murfey/client/tui/app.py         |  9 +++-
 src/murfey/client/tui/controller.css | 32 ++++++++++++++
 src/murfey/client/tui/screens.py     | 64 ++++++++++++++++++++++++++++
 3 files changed, 103 insertions(+), 2 deletions(-)

diff --git a/src/murfey/client/tui/app.py b/src/murfey/client/tui/app.py
index 12605168..f0f40543 100644
--- a/src/murfey/client/tui/app.py
+++ b/src/murfey/client/tui/app.py
@@ -25,6 +25,7 @@
     MainScreen,
     ProcessingForm,
     SessionSelection,
+    VisitCreation,
     VisitSelection,
     WaitingScreen,
     determine_default_destination,
@@ -682,8 +683,12 @@ async def on_mount(self) -> None:
         exisiting_sessions = requests.get(
             f"{self._environment.url.geturl()}/sessions"
         ).json()
-        self.install_screen(VisitSelection(self.visits), "visit-select-screen")
-        self.push_screen("visit-select-screen")
+        if self.visits:
+            self.install_screen(VisitSelection(self.visits), "visit-select-screen")
+            self.push_screen("visit-select-screen")
+        else:
+            self.install_screen(VisitCreation(), "visit-creation-screen")
+            self.push_screen("visit-creation-screen")
         if exisiting_sessions:
             self.install_screen(
                 SessionSelection(
diff --git a/src/murfey/client/tui/controller.css b/src/murfey/client/tui/controller.css
index 01451b4b..d88bad4f 100644
--- a/src/murfey/client/tui/controller.css
+++ b/src/murfey/client/tui/controller.css
@@ -46,6 +46,12 @@ SessionSelection {
   border: hidden;
 }
 
+VisitCreation {
+  layout: grid;
+  grid-size: 2;
+  border: hidden;
+}
+
 VisitSelection {
   layout: grid;
   grid-size: 4;
@@ -235,6 +241,22 @@ RadioSet {
   background: darkslateblue;
 }
 
+.btn-visit-create {
+  width: 100%;
+  height: 90fr;
+  column-span: 2;
+  background: teal;
+  border: solid black;
+}
+
+.btn-visit-create:hover {
+  background: purple;
+}
+
+.btn-visit-create:focus {
+  background: darkslateblue;
+}
+
 .btn-session {
   width: 100%;
   height: 20%;
@@ -468,6 +490,16 @@ RadioSet {
   background: black;
 }
 
+.input-visit-name {
+  width: 100%;
+  height: 100%;
+  column-span: 2;
+  row-span: 1;
+  content-align: left middle;
+  text-style: bold;
+  background: blueviolet;
+}
+
 #log_book {
   width: 100%;
   height: 100%;
diff --git a/src/murfey/client/tui/screens.py b/src/murfey/client/tui/screens.py
index 9fa3f44e..2b80197e 100644
--- a/src/murfey/client/tui/screens.py
+++ b/src/murfey/client/tui/screens.py
@@ -760,6 +760,70 @@ def on_button_pressed(self, event: Button.Pressed):
             self.app.push_screen("upstream-downloads")
 
 
+class VisitCreation(Screen):
+    visit_name: reactive[str] = reactive("")
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def compose(self):
+        yield Input(placeholder="Visit name", classes="input-visit-name")
+        yield Button("Create visit", classes="btn-visit-create")
+
+    def on_input_changed(self, event):
+        self.visit_name = event.value
+
+    def on_button_pressed(self, event: Button.Pressed):
+        text = str(self.visit_name)
+        self.app._visit = text
+        self.app._environment.visit = text
+        response = requests.post(
+            f"{self.app._environment.url.geturl()}/visits/{text}",
+            json={"id": self.app._environment.client_id},
+        )
+        log.info(f"Posted visit registration: {response.status_code}")
+        machine_data = requests.get(
+            f"{self.app._environment.url.geturl()}/machine"
+        ).json()
+
+        self.app.install_screen(
+            DirectorySelection(
+                [
+                    p[0]
+                    for p in machine_data.get("data_directories", {}).items()
+                    if p[1] == "detector" and Path(p[0]).exists()
+                ]
+            ),
+            "directory-select",
+        )
+        self.app.pop_screen()
+
+        if machine_data.get("gain_reference_directory"):
+            self.app.install_screen(
+                GainReference(
+                    determine_gain_ref(Path(machine_data["gain_reference_directory"])),
+                    True,
+                ),
+                "gain-ref-select",
+            )
+            self.app.push_screen("gain-ref-select")
+        else:
+            if self._switch_status:
+                self.app.push_screen("directory-select")
+            else:
+                self.app.install_screen(LaunchScreen(basepath=Path("./")), "launcher")
+                self.app.push_screen("launcher")
+
+        if machine_data.get("upstream_data_directories"):
+            upstream_downloads = requests.get(
+                f"{self.app._environment.url.geturl()}/sessions/{self.app._environment.murfey_session}/upstream_visits"
+            ).json()
+            self.app.install_screen(
+                UpstreamDownloads(upstream_downloads), "upstream-downloads"
+            )
+            self.app.push_screen("upstream-downloads")
+
+
 class UpstreamDownloads(Screen):
     def __init__(self, connected_visits: Dict[str, Path], *args, **kwargs):
         super().__init__(*args, **kwargs)

From d171395f41d7e9eff937f2c2056e0cf239645add Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Thu, 24 Oct 2024 11:43:01 +0100
Subject: [PATCH 13/91] Explanatory comment

---
 src/murfey/client/tui/screens.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/murfey/client/tui/screens.py b/src/murfey/client/tui/screens.py
index 2b80197e..a0469ecf 100644
--- a/src/murfey/client/tui/screens.py
+++ b/src/murfey/client/tui/screens.py
@@ -761,6 +761,8 @@ def on_button_pressed(self, event: Button.Pressed):
 
 
 class VisitCreation(Screen):
+    # This allows for the manual creation of a visit name when there is no LIMS system to provide it
+    # Shares a lot of code with VisitSelection, should be neatened up at some point
     visit_name: reactive[str] = reactive("")
 
     def __init__(self, *args, **kwargs):

From aed6c9840e1f3ec0ec10af5443457023376cdaa3 Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Thu, 24 Oct 2024 14:19:51 +0100
Subject: [PATCH 14/91] Remove some path construction that is specific to DLS
 directory structure

---
 src/murfey/server/__init__.py     |  2 +-
 src/murfey/server/api/__init__.py | 26 +++++++++++++++++---------
 src/murfey/util/config.py         |  1 +
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/murfey/server/__init__.py b/src/murfey/server/__init__.py
index 7ec89d89..2b47e02a 100644
--- a/src/murfey/server/__init__.py
+++ b/src/murfey/server/__init__.py
@@ -2813,7 +2813,7 @@ def feedback_callback(header: dict, message: dict) -> None:
                     dose_per_frame=message["dose_per_frame"],
                     gain_ref=(
                         str(machine_config.rsync_basepath / message["gain_ref"])
-                        if message["gain_ref"]
+                        if message["gain_ref"] and machine_config.data_transfer_enabled
                         else message["gain_ref"]
                     ),
                     voltage=message["voltage"],
diff --git a/src/murfey/server/api/__init__.py b/src/murfey/server/api/__init__.py
index c6e24c9c..0eebe86b 100644
--- a/src/murfey/server/api/__init__.py
+++ b/src/murfey/server/api/__init__.py
@@ -1232,7 +1232,7 @@ async def request_tomography_preprocessing(
                 "fm_dose": proc_file.dose_per_frame,
                 "gain_ref": (
                     str(machine_config.rsync_basepath / proc_file.gain_ref)
-                    if proc_file.gain_ref
+                    if proc_file.gain_ref and machine_config.data_transfer_enabled
                     else proc_file.gain_ref
                 ),
                 "fm_int_file": proc_file.eer_fractionation_file,
@@ -1509,14 +1509,22 @@ async def write_eer_fractionation_file(
     machine_config = get_machine_config(instrument_name=instrument_name)[
         instrument_name
     ]
-    file_path = (
-        Path(machine_config.rsync_basepath)
-        / (machine_config.rsync_module or "data")
-        / str(datetime.datetime.now().year)
-        / secure_filename(visit_name)
-        / "processing"
-        / secure_filename(fractionation_params.fractionation_file_name)
-    )
+    if machine_config.eer_fractionation_file_template:
+        file_path = Path(
+            machine_config.eer_fractionation_file_template.format(
+                visit=secure_filename(visit_name),
+                year=str(datetime.datetime.now().year),
+            )
+        ) / secure_filename(fractionation_params.fractionation_file_name)
+    else:
+        file_path = (
+            Path(machine_config.rsync_basepath)
+            / (machine_config.rsync_module or "data")
+            / str(datetime.datetime.now().year)
+            / secure_filename(visit_name)
+            / "processing"
+            / secure_filename(fractionation_params.fractionation_file_name)
+        )
     if file_path.is_file():
         return {"eer_fractionation_file": str(file_path)}
 
diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index d73b31d7..847b4edb 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -27,6 +27,7 @@ class MachineConfig(BaseModel):
     create_directories: Dict[str, str] = {"atlas": "atlas"}
     analyse_created_directories: List[str] = []
     gain_reference_directory: Optional[Path] = None
+    eer_fractionation_file_template: str = ""
     processed_directory_name: str = "processed"
     gain_directory_name: str = "processing"
     node_creator_queue: str = "node_creator"

From 74871e149fc026092cb5738e53fbe1ce859163d9 Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Thu, 24 Oct 2024 13:30:38 +0100
Subject: [PATCH 15/91] Starting to add some description to configuration

---
 src/murfey/cli/generate_config.py |  0
 src/murfey/util/config.py         | 39 ++++++++++++++++++++++++-------
 2 files changed, 31 insertions(+), 8 deletions(-)
 create mode 100644 src/murfey/cli/generate_config.py

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 847b4edb..bf92ed35 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -7,7 +7,7 @@
 from typing import Dict, List, Literal, Optional, Union
 
 import yaml
-from pydantic import BaseModel, BaseSettings
+from pydantic import BaseModel, BaseSettings, Field
 
 
 class MachineConfig(BaseModel):
@@ -16,25 +16,48 @@ class MachineConfig(BaseModel):
     data_directories: Dict[Path, str]
     rsync_basepath: Path
     default_model: Path
-    display_name: str = ""
-    instrument_name: str = ""
-    image_path: Optional[Path] = None
+    display_name: str = Field(
+        default="",
+        description="Name of instrument used for display purposes, i.e. Krios I",
+    )
+    instrument_name: str = Field(
+        default="",
+        description="Computer friendly instrument reference name, i.e. m02. Must not contain special characters or whitespace",
+    )
+    image_path: Optional[Path] = Field(
+        default=None,
+        description="Path to an image of the instrument for display purposes",
+    )
     software_versions: Dict[str, str] = {}
     external_executables: Dict[str, str] = {}
     external_executables_eer: Dict[str, str] = {}
     external_environment: Dict[str, str] = {}
     rsync_module: str = ""
-    create_directories: Dict[str, str] = {"atlas": "atlas"}
+    create_directories: Dict[str, str] = Field(
+        default={"atlas": "atlas"},
+        description="Directories to be created within each visit",
+    )
     analyse_created_directories: List[str] = []
     gain_reference_directory: Optional[Path] = None
     eer_fractionation_file_template: str = ""
+    gain_reference_directory: Optional[Path] = Field(
+        default=None,
+        description="Directory in which the gain reference is written from the detector",
+    )
     processed_directory_name: str = "processed"
     gain_directory_name: str = "processing"
     node_creator_queue: str = "node_creator"
-    superres: bool = False
-    camera: str = "FALCON"
+    superres: bool = Field(
+        default=False, description="Is the detector on this instrument a GATAN K3?"
+    )
+    camera: Literal["FALCON", "K3", ""] = Field(
+        default="FALCON",
+        description="What camera is the instrument equipped with? Only relevant for TEM instruments. Use the option closest to your case. This is used to determine whether the gain reference needs to be binned down from superres",
+    )
     data_required_substrings: Dict[str, Dict[str, List[str]]] = {}
-    allow_removal: bool = False
+    allow_removal: bool = Field(
+        default=False, description="Allow original files to be removed after rsync"
+    )
     modular_spa: bool = False
     data_transfer_enabled: bool = True
     processing_enabled: bool = True

From 7fc5e28833c46af39e4634705d4c2fe8aa9fde48 Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Thu, 24 Oct 2024 16:00:06 +0100
Subject: [PATCH 16/91] Further improvements to machine configuration including
 some validation depending on other configuration operations

---
 src/murfey/util/config.py | 41 +++++++++++++++++++++++++++++++--------
 1 file changed, 33 insertions(+), 8 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index bf92ed35..e8ea0cca 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -4,18 +4,24 @@
 import socket
 from functools import lru_cache
 from pathlib import Path
-from typing import Dict, List, Literal, Optional, Union
+from typing import Any, Dict, List, Literal, Mapping, Optional, Union
 
 import yaml
-from pydantic import BaseModel, BaseSettings, Field
+from pydantic import BaseModel, BaseSettings, Field, validator
+from pydantic.errors import NoneIsNotAllowedError
 
 
 class MachineConfig(BaseModel):
-    acquisition_software: List[str]
+    acquisition_software: List[Literal["epu", "tomo", "serialem", "autotem"]]
     calibrations: Dict[str, Dict[str, Union[dict, float]]]
     data_directories: Dict[Path, str]
-    rsync_basepath: Path
-    default_model: Path
+    rsync_basepath: Optional[Path] = Field(
+        default=None,
+        description="Path set for the rsync daemon which will need to be prepended to file paths. Required if data_transfer_enabled is True",
+    )
+    default_model: Optional[Path] = Field(
+        default=None, description="Path to the default model used for particle picking"
+    )
     display_name: str = Field(
         default="",
         description="Name of instrument used for display purposes, i.e. Krios I",
@@ -38,7 +44,6 @@ class MachineConfig(BaseModel):
         description="Directories to be created within each visit",
     )
     analyse_created_directories: List[str] = []
-    gain_reference_directory: Optional[Path] = None
     eer_fractionation_file_template: str = ""
     gain_reference_directory: Optional[Path] = Field(
         default=None,
@@ -58,7 +63,6 @@ class MachineConfig(BaseModel):
     allow_removal: bool = Field(
         default=False, description="Allow original files to be removed after rsync"
     )
-    modular_spa: bool = False
     data_transfer_enabled: bool = True
     processing_enabled: bool = True
     machine_override: str = ""
@@ -84,7 +88,10 @@ class MachineConfig(BaseModel):
     model_search_directory: str = "processing"
     initial_model_search_directory: str = "processing/initial_model"
 
-    failure_queue: str = ""
+    failure_queue: str = Field(
+        default="",
+        description="Name of RabbitMQ queue where failed API calls will be recorded",
+    )
     instrument_server_url: str = "http://localhost:8001"
     frontend_url: str = "http://localhost:3000"
     murfey_url: str = "http://localhost:8000"
@@ -92,6 +99,24 @@ class MachineConfig(BaseModel):
     security_configuration_path: Optional[Path] = None
     auth_url: str = ""
 
+    @validator("rsync_basepath", always=True)
+    def __validate_rsync_basepath_if_transfer_enabled__(
+        cls, v: Optional[str], values: Mapping[str, Any]
+    ) -> Any:
+        if values["data_transfer_enabled"]:
+            if v is None:
+                raise NoneIsNotAllowedError
+        return v
+
+    @validator("default_model", always=True)
+    def __validate_default_model_if_processing_enabled_and_spa_possible__(
+        cls, v: Optional[str], values: Mapping[str, Any]
+    ) -> Any:
+        if values["processing_enabled"] and "epu" in values["acquisition_software"]:
+            if v is None:
+                raise NoneIsNotAllowedError
+        return v
+
 
 def from_file(config_file_path: Path, instrument: str = "") -> Dict[str, MachineConfig]:
     with open(config_file_path, "r") as config_stream:

From b6b6b50eb3abcf117158cfddfe1614c1dcf156f5 Mon Sep 17 00:00:00 2001
From: Daniel Hatton <daniel.hatton@diamond.ac.uk>
Date: Thu, 24 Oct 2024 17:44:43 +0100
Subject: [PATCH 17/91] Early development of command line tool for generating a
 configuration file

---
 pyproject.toml                    |  1 +
 src/murfey/cli/generate_config.py | 27 +++++++++++++++++++++++++++
 src/murfey/util/config.py         |  6 ++++--
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0bda2582..2a22c2aa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,6 +84,7 @@ murfey = "murfey.client:run"
 "murfey.db_sql" = "murfey.cli.murfey_db_sql:run"
 "murfey.decrypt_password" = "murfey.cli.decrypt_db_password:run"
 "murfey.dlq_murfey" = "murfey.cli.dlq_resubmit:run"
+"murfey.generate_config" = "murfey.cli.generate_config:run"
 "murfey.generate_key" = "murfey.cli.generate_crypto_key:run"
 "murfey.generate_password" = "murfey.cli.generate_db_password:run"
 "murfey.instrument_server" = "murfey.instrument_server:run"
diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index e69de29b..e7c42916 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -0,0 +1,27 @@
+from pydantic import ValidationError
+from pydantic.fields import UndefinedType
+from rich.pretty import pprint
+from rich.prompt import Prompt
+
+from murfey.util.config import MachineConfig
+
+
+def run():
+    new_config = {}
+    for k, field in MachineConfig.__fields__.items():
+        pprint(field.name)
+        pprint(field.field_info.description)
+        if isinstance(field.field_info.default, UndefinedType):
+            value = Prompt.ask("Please provide a value")
+        else:
+            value = Prompt.ask(
+                "Please provide a value", default=field.field_info.default
+            )
+        new_config[k] = value
+
+        try:
+            MachineConfig.validate(new_config)
+        except ValidationError as exc:
+            for ve in exc.errors():
+                if ve["type"] != "value_error.missing":
+                    print("Validation failed")
diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index e8ea0cca..ca4773f2 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -103,7 +103,7 @@ class MachineConfig(BaseModel):
     def __validate_rsync_basepath_if_transfer_enabled__(
         cls, v: Optional[str], values: Mapping[str, Any]
     ) -> Any:
-        if values["data_transfer_enabled"]:
+        if values.get("data_transfer_enabled"):
             if v is None:
                 raise NoneIsNotAllowedError
         return v
@@ -112,7 +112,9 @@ def __validate_rsync_basepath_if_transfer_enabled__(
     def __validate_default_model_if_processing_enabled_and_spa_possible__(
         cls, v: Optional[str], values: Mapping[str, Any]
     ) -> Any:
-        if values["processing_enabled"] and "epu" in values["acquisition_software"]:
+        if values.get("processing_enabled") and "epu" in values.get(
+            "acquisition_software", []
+        ):
             if v is None:
                 raise NoneIsNotAllowedError
         return v

From db60fa43e3ced7800dd5a8074ebff74b59f55fcb Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Tue, 12 Nov 2024 18:39:37 +0000
Subject: [PATCH 18/91] Rearranged MachineConfig keys by purpose and added
 descriptions

---
 src/murfey/util/config.py | 459 ++++++++++++++++++++++++++++++++------
 1 file changed, 387 insertions(+), 72 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 57dfb925..6d403be1 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -8,98 +8,403 @@
 
 import yaml
 from backports.entry_points_selectable import entry_points
-from pydantic import BaseModel, BaseSettings, Extra, Field, validator
+from pydantic import BaseModel, BaseSettings, Extra, Field, root_validator, validator
 from pydantic.errors import NoneIsNotAllowedError
 
 
 class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
-    acquisition_software: List[Literal["epu", "tomo", "serialem", "autotem"]]
-    calibrations: Dict[str, Dict[str, Union[dict, float]]]
-    data_directories: Dict[Path, str]
-    rsync_basepath: Optional[Path] = Field(
-        default=None,
-        description="Path set for the rsync daemon which will need to be prepended to file paths. Required if data_transfer_enabled is True",
-    )
-    default_model: Optional[Path] = Field(
-        default=None, description="Path to the default model used for particle picking"
-    )
+
+    # General info about the instrument
     display_name: str = Field(
         default="",
-        description="Name of instrument used for display purposes, i.e. Krios I",
+        description="Name of instrument used for display purposes, i.e. Krios I.",
     )
     instrument_name: str = Field(
         default="",
-        description="Computer friendly instrument reference name, i.e. m02. Must not contain special characters or whitespace",
+        description=(
+            "Computer-friendly instrument reference name, i.e. m02. "
+            "The name must not contain special characters or whitespace."
+        ),
     )
     image_path: Optional[Path] = Field(
         default=None,
-        description="Path to an image of the instrument for display purposes",
-    )
-    software_versions: Dict[str, str] = {}
-    external_executables: Dict[str, str] = {}
-    external_executables_eer: Dict[str, str] = {}
-    external_environment: Dict[str, str] = {}
-    rsync_module: str = ""
-    create_directories: Dict[str, str] = Field(
+        description="Path to an image of the instrument for display purposes.",
+    )
+    machine_override: str = Field(
+        default="",
+        description=(
+            "Override the instrument name as defined in the environment variable or "
+            "the configuration with this one. This is used if, for example, many "
+            "machines are sharing a server, and need to be named differently."
+        ),
+    )
+
+    """"""
+    # Instrument hardware/software-related information
+    camera: str = Field(
+        default="FALCON",
+        description=(
+            "Name of the camera used by the TEM. This is only relevant for TEMs to "
+            "determine how the gain reference needs to be processed, e.g., if it has "
+            "to be binned down from superres or flipped along the x-/y-axis."
+        ),
+        # NOTE:
+        #   Supported options: Falcon 4, Falcon 4I, K2, K3 (superres)
+        #   _FLIPX/_FLIPY is to tell it what to do with the gain reference
+        #   Will need to create a new key to record whether the gain reference image
+        #   needs to be flippedflip_gain: X, Y, None
+    )
+    superres: bool = Field(
+        default=False,
+        description=(
+            "Check if the superres feature present on this microscope? "
+            "For a Gatan K3, this will be set to True."
+        ),
+    )
+    flip_gain: Literal["x", "y", ""] = Field(
+        default="",
+        description=(
+            "State if the gain reference needs to be flipped along a specific axis."
+        ),
+        # NOTE: This is a placeholder for a key that will be implemented in the future
+    )
+    calibrations: dict[str, dict[str, Union[dict, float]]] = Field(
+        default={},
+        description=(
+            "Nested dictionary containing the calibrations for this microscope. "
+            "E.g., 'magnification' would be a valid dictionary, in which the "
+            "field-of-view at each magnfication level is provided as a key-value "
+            "pair."
+        ),
+    )
+
+    # NOTE:
+    #   acquisition_software, software_versions, and software_settings_output_directories
+    #   can all potentially be combined into one nested dictionary
+    acquisition_software: list[
+        Literal["epu", "tomo", "serialem", "autotem", "leica"]
+    ] = Field(
+        default=[],
+        description=("List of all the acquisition software present on this machine."),
+    )
+    software_versions: dict[str, str] = Field(
+        default={},
+        description=(
+            "Dictionary containing the version number of the acquisition software as "
+            "key-value pairs."
+        ),
+    )
+    software_settings_output_directories: dict[str, list[Path]] = Field(
+        default={},
+        description=(
+            "A dictionary in which the keys are the full file paths to the settings "
+            "for the acquisition software packages, and the values are lists of keys "
+            "through the layered structure of the XML settings files to where the save "
+            "directory can be overwritten."
+        ),
+    )
+
+    """"""
+    # Instrument-side file paths
+    data_directories: dict[Path, str] = Field(
+        default={},
+        description=(
+            "Dictionary of key-value pairs, where the keys are full paths to where "
+            "data is stored on the client machine, and the value denotes the type "
+            "of data stored at that path."
+        ),
+    )
+    data_required_substrings: dict[str, dict[str, list[str]]] = Field(
+        default={},
+        description=(
+            "Nested dictionary stating the file suffixes to look for as part of the "
+            "processing workflow for a given software package, and subsequently the "
+            "key phrases to search for within the file name for it to be selected for "
+            "processing."
+        ),
+    )
+    create_directories: dict[str, str] = Field(
         default={"atlas": "atlas"},
-        description="Directories to be created within each visit",
+        description=(
+            "Dictionary describing the directories to create within each visit on the "
+            "instrument machine. The key will be what Murfey calls the folder internaly, "
+            "while the value is what the folder is actually called on the file system."
+        ),
+        # NOTE: This key should eventually be changed into a list of strings
+    )
+    analyse_created_directories: list[str] = Field(
+        default=[],
+        description=(
+            "List of folders to be considered for analysis by Murfey. This will "
+            "generally be a subset of the list of folders specified earlier when "
+            "creating the directories for each visit."
+        ),
     )
-    analyse_created_directories: List[str] = []
-    eer_fractionation_file_template: str = ""
     gain_reference_directory: Optional[Path] = Field(
         default=None,
-        description="Directory in which the gain reference is written from the detector",
+        description=(
+            "Full path to where the gain reference from the detector is saved."
+        ),
     )
-    processed_directory_name: str = "processed"
-    gain_directory_name: str = "processing"
-    node_creator_queue: str = "node_creator"
-    superres: bool = Field(
-        default=False, description="Is the detector on this instrument a GATAN K3?"
-    )
-    camera: Literal["FALCON", "K3", ""] = Field(
-        default="FALCON",
-        description="What camera is the instrument equipped with? Only relevant for TEM instruments. Use the option closest to your case. This is used to determine whether the gain reference needs to be binned down from superres",
+    eer_fractionation_file_template: str = Field(
+        default="",
+        description=(
+            "File path template that can be provided if the EER fractionation files "
+            "are saved in a location separate from the rest of the data. This will "
+            "be a string, with '{visit}' and '{year}' being optional arguments that "
+            "can be embedded in the string. E.g.: /home/user/data/{year}/{visit}"
+        ),
+    )  # Only if Falcon is used
+    # To avoid others having to follow the {year}/{visit} format we are doing
+
+    """"""
+    # rsync-related settings (only if rsync is used)
+    data_transfer_enabled: bool = Field(
+        default=True,
+        description=("Toggle whether to enable data transfer via rsync."),
+        # NOTE: Only request input for this code block if data transfer is enabled
     )
-    data_required_substrings: Dict[str, Dict[str, List[str]]] = {}
     allow_removal: bool = Field(
-        default=False, description="Allow original files to be removed after rsync"
-    )
-    data_transfer_enabled: bool = True
-    modular_spa: bool = False  # Is this key needed?
-    processing_enabled: bool = True
-    machine_override: str = ""
-    processed_extra_directory: str = ""
-    plugin_packages: Dict[str, Path] = {}
-    software_settings_output_directories: Dict[str, List[str]] = {}
-    process_by_default: bool = True
-    recipes: Dict[str, str] = {
-        "em-spa-bfactor": "em-spa-bfactor",
-        "em-spa-class2d": "em-spa-class2d",
-        "em-spa-class3d": "em-spa-class3d",
-        "em-spa-preprocess": "em-spa-preprocess",
-        "em-spa-refine": "em-spa-refine",
-        "em-tomo-preprocess": "em-tomo-preprocess",
-        "em-tomo-align": "em-tomo-align",
-    }
+        default=False, description="Allow original files to be removed after rsync."
+    )
+    rsync_basepath: Optional[Path] = Field(
+        default=None,
+        description=(
+            "Full path on the storage server that the rsync daemon will append the "
+            "relative paths of the transferred files to."
+        ),
+    )
+    rsync_module: Optional[str] = Field(
+        default=None,
+        description=(
+            "Name of the rsync module the files are being transferred with. The module "
+            "will be appended to the rsync base path, and the relative paths will be "
+            "appended to the module. This is particularly useful when many instrument "
+            "machines are transferring to the same storage server, as you can specify "
+            "different sub-folders to save the data to."
+        ),
+    )
 
-    # Find and download upstream directories
-    upstream_data_directories: List[Path] = []  # Previous sessions
-    upstream_data_download_directory: Optional[Path] = None  # Set by microscope config
-    upstream_data_tiff_locations: List[str] = ["processed"]  # Location of CLEM TIFFs
+    # Related visits and data
+    upstream_data_directories: list[Path] = Field(
+        default=[],
+        description=(
+            "List of file paths on other instruments for Murfey to look for the current "
+            "visit under. This is primarily used for multi-instrument workflows that "
+            "use processed data from other instruments as input."
+        ),
+    )
+    upstream_data_download_directory: Optional[Path] = Field(
+        default=None,
+        description=(
+            "Name of the folder on the instrument machine to transfer files of the same "
+            "visit from other instruments to."
+        ),
+    )
+    upstream_data_tiff_locations: list[str] = Field(
+        default=["processed"],
+        description=(
+            "Name of the sub-folder within the visit folder from which to transfer the "
+            "results. This would typically be the 'processed' folder."
+        ),
+        # NOTE: This should eventually be converted into a dictionary, which looks for
+        # files in different locations according to the workflows they correspond to
+    )
 
-    model_search_directory: str = "processing"
-    initial_model_search_directory: str = "processing/initial_model"
+    """"""
+    # Processing-related keys
+    processing_enabled: bool = Field(
+        default=True,
+        description="Toggle whether to enable data processing.",
+        # NOTE: Only request input for this code block if processing is enabled
+    )
+    process_by_default: bool = Field(
+        default=True,
+        description=(
+            "Toggle whether processing should be enabled by default. If False, Murfey "
+            "will ask the user whether they want to process the data in their current "
+            "session."
+        ),
+    )
 
+    # Server-side file paths
+    gain_directory_name: str = Field(
+        default="processing",
+        description=(
+            "Name of the folder to save the files used to facilitate data processing to. "
+            "This folder will be located under the current visit."
+        ),
+    )
+    processed_directory_name: str = Field(
+        default="processed",
+        description=(
+            "Name of the folder to save the output of the data processing workflow to. "
+            "This folder will be located under the current visit."
+        ),
+    )
+    processed_extra_directory: str = Field(
+        default="",
+        description=(
+            "Name of the sub-folder in the processed directory to save the output of "
+            "additional processing workflows to. E.g., if you are using Relion for "
+            "processing, its output files could be stored in a 'relion' sub-folder."
+        ),
+        # NOTE: This should eventually be a list of strings, if we want to allow
+        # users to add more processing options to their workflow
+    )
+
+    # Extra plugins for data acquisition(?)
+    external_executables: dict[str, Path] = Field(
+        default={},
+        description=(
+            "Dictionary containing additional software packages to be used as part of "
+            "the processing workflow. The keys are the names of the packages and the "
+            "values are the full paths to where the executables are located."
+        ),
+    )
+    external_executables_eer: dict[str, Path] = Field(
+        default={},
+        description=(
+            "A similar dictionary, but for the executables associated with processing "
+            "EER files."
+        ),
+        # NOTE: Both external_executables variables should be combined into one. The
+        # EER ones could be their own key, where different software packages are
+        # provided for different file types in different workflows.
+    )
+    external_environment: dict[str, str] = Field(
+        default={},
+        description=(
+            "Dictionary containing full paths to supporting files and executables that "
+            "are needed to run the executables to be used. These paths will be added "
+            "to the $PATH environment variable."
+        ),
+    )
+    plugin_packages: dict[str, Path] = Field(
+        default={},
+        description=(
+            "Dictionary containing full paths to additional plugins for Murfey that "
+            "help support the workflow."
+        ),
+    )
+
+    # Phase-contrast related processing workflows
+    recipes: dict[str, str] = Field(
+        default={
+            "em-spa-bfactor": "em-spa-bfactor",
+            "em-spa-class2d": "em-spa-class2d",
+            "em-spa-class3d": "em-spa-class3d",
+            "em-spa-preprocess": "em-spa-preprocess",
+            "em-spa-refine": "em-spa-refine",
+            "em-tomo-preprocess": "em-tomo-preprocess",
+            "em-tomo-align": "em-tomo-align",
+        },
+        description=(
+            "A dictionary of recipes for Murfey to run to facilitate data processing. "
+            "The key represents the name of the recipe used by Murfey, while its value "
+            "is the name of the recipe in the repository it's in."
+        ),
+        # NOTE: Currently, this recipe-searching structure is tied to the GitLab repo;
+        # need to provide an option to map it file paths instead, or even a folder.
+        # A parameter like recipe_folder might work?
+    )
+    modular_spa: bool = Field(
+        default=True,
+        description=(
+            "Deprecated key to toggle SPA processing; will be phased out eventually."
+        ),
+    )
+
+    # Particle picking settings
+    # machine_learning_model
+    default_model: Optional[Path] = Field(
+        default=None,
+        description=(
+            "Path to the default machine learning model used for particle picking."
+        ),
+    )
+    model_search_directory: str = Field(
+        default="processing",
+        description=(
+            "Relative path to where user-uploaded machine learning models are stored. "
+            "Murfey will look for the folders under the current visit."
+        ),
+    )  # User-uploaded models
+
+    initial_model_search_directory: str = Field(
+        default="processing/initial_model",  # User-uploaded electron density models
+        description=(
+            "Relative path to where user-uploaded electron density models are stored. "
+            "Murfey will look for the folders under the current visit."
+        ),
+    )
+
+    """"""
+    # Network connections
+    frontend_url: str = Field(
+        default="http://localhost:3000",
+        description="URL to the Murfey frontend.",
+    )
+    murfey_url: str = Field(
+        default="http://localhost:8000",
+        description="URL to the Murfey API.",
+    )
+    instrument_server_url: str = Field(
+        default="http://localhost:8001",
+        description="URL to the instrument server.",
+    )
+
+    # Security-related keys
+    security_configuration_path: Optional[Path] = Field(
+        default=None,
+        description=(
+            "Full file path to the YAML file containing the configurations for the "
+            "Murfey server."
+        ),
+    )
+    auth_url: str = Field(
+        default="",
+        description="URL to where users can authenticate their Murfey sessions.",
+    )
+
+    # RabbitMQ-specifc keys
     failure_queue: str = Field(
         default="",
-        description="Name of RabbitMQ queue where failed API calls will be recorded",
+        description="Name of RabbitMQ queue where failed API calls will be recorded.",
+    )
+    node_creator_queue: str = Field(
+        default="node_creator",
+        description=(
+            "Name of the RabbitMQ queue where requests for creating job nodes are sent."
+        ),
     )
-    instrument_server_url: str = "http://localhost:8001"
-    frontend_url: str = "http://localhost:3000"
-    murfey_url: str = "http://localhost:8000"
 
-    security_configuration_path: Optional[Path] = None
-    auth_url: str = ""
+    @validator("camera", always=True, pre=True)
+    def _validate_camera_model(cls, value: str):
+        # Let non-strings fail validation naturally
+        if not isinstance(value, str):
+            return value
+        # Handle empty string
+        if len(value) == 0:
+            return value
+        # Match string to known camera models
+        supported_camera_models = ("FALCON", "K3")
+        if value.upper().startswith(
+            supported_camera_models
+        ):  # Case-insensitive matching
+            return value.upper()
+        else:
+            raise ValueError(
+                f"unexpected value; permitted: {supported_camera_models!r} "
+                f"(type=value_error.const; given={value!r}; "
+                f"permitted={supported_camera_models!r})"
+            )
+
+    @root_validator(pre=False)
+    def _validate_superres(cls, model: dict):
+        camera: str = model.get("camera", "")
+        model["superres"] = True if camera.startswith("K3") else False
+        return model
 
     @validator("rsync_basepath", always=True)
     def __validate_rsync_basepath_if_transfer_enabled__(
@@ -133,17 +438,25 @@ def from_file(config_file_path: Path, instrument: str = "") -> Dict[str, Machine
 
 
 class Security(BaseModel):
+    # Database connection settings
     murfey_db_credentials: str
+    sqlalchemy_pooling: bool = True
     crypto_key: str
+
+    # RabbitMQ settings
+    feedback_queue: str = "murfey_feedback"
+
+    # Server authentication settings
+    auth_type: Literal["password", "cookie"] = "password"
     auth_key: str = ""
     auth_algorithm: str = ""
-    sqlalchemy_pooling: bool = True
-    allow_origins: List[str] = ["*"]
-    session_validation: str = ""
-    session_token_timeout: Optional[int] = None
-    auth_type: Literal["password", "cookie"] = "password"
     cookie_key: str = ""
-    feedback_queue: str = "murfey_feedback"
+
+    session_validation: str = ""
+    session_token_timeout: Optional[int] = (
+        None  # seconds; typically the length of a microscope session plus a bit
+    )
+    allow_origins: List[str] = ["*"]  # Restrict to only certain hostnames
 
 
 def security_from_file(config_file_path: Path) -> Security:
@@ -165,6 +478,8 @@ def get_hostname():
     return socket.gethostname()
 
 
+# How does microscope_name differ from instrument_name?
+# Should we stick to one?
 def get_microscope(machine_config: MachineConfig | None = None) -> str:
     if machine_config:
         microscope_name = machine_config.machine_override or os.getenv("BEAMLINE", "")

From 816738a67d82cb28327d4d009f13c1720e21405f Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 13 Nov 2024 08:59:15 +0000
Subject: [PATCH 19/91] Renamed Security as ServerConfig, given the change in
 its contents

---
 src/murfey/server/murfey_db.py |  6 +++---
 src/murfey/util/config.py      | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/murfey/server/murfey_db.py b/src/murfey/server/murfey_db.py
index 2d0d52cf..5a0690e5 100644
--- a/src/murfey/server/murfey_db.py
+++ b/src/murfey/server/murfey_db.py
@@ -8,10 +8,10 @@
 from sqlalchemy.pool import NullPool
 from sqlmodel import Session, create_engine
 
-from murfey.util.config import Security, get_security_config
+from murfey.util.config import ServerConfig, get_security_config
 
 
-def url(security_config: Security | None = None) -> str:
+def url(security_config: ServerConfig | None = None) -> str:
     security_config = security_config or get_security_config()
     with open(security_config.murfey_db_credentials, "r") as stream:
         creds = yaml.safe_load(stream)
@@ -21,7 +21,7 @@ def url(security_config: Security | None = None) -> str:
 
 
 def get_murfey_db_session(
-    security_config: Security | None = None,
+    security_config: ServerConfig | None = None,
 ) -> Session:  # type: ignore
     _url = url(security_config)
     if security_config and not security_config.sqlalchemy_pooling:
diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 6d403be1..782092b7 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -437,7 +437,7 @@ def from_file(config_file_path: Path, instrument: str = "") -> Dict[str, Machine
     }
 
 
-class Security(BaseModel):
+class ServerConfig(BaseModel):
     # Database connection settings
     murfey_db_credentials: str
     sqlalchemy_pooling: bool = True
@@ -459,10 +459,10 @@ class Security(BaseModel):
     allow_origins: List[str] = ["*"]  # Restrict to only certain hostnames
 
 
-def security_from_file(config_file_path: Path) -> Security:
+def security_from_file(config_file_path: Path) -> ServerConfig:
     with open(config_file_path, "r") as config_stream:
         config = yaml.safe_load(config_stream)
-    return Security(**config)
+    return ServerConfig(**config)
 
 
 class Settings(BaseSettings):
@@ -489,7 +489,7 @@ def get_microscope(machine_config: MachineConfig | None = None) -> str:
 
 
 @lru_cache(maxsize=1)
-def get_security_config() -> Security:
+def get_security_config() -> ServerConfig:
     if settings.murfey_security_configuration:
         return security_from_file(Path(settings.murfey_security_configuration))
     if settings.murfey_machine_configuration and os.getenv("BEAMLINE"):
@@ -498,7 +498,7 @@ def get_security_config() -> Security:
         ]
         if machine_config.security_configuration_path:
             return security_from_file(machine_config.security_configuration_path)
-    return Security(
+    return ServerConfig(
         session_validation="",
         murfey_db_credentials="",
         crypto_key="",

From 70e1c68c9115d0c9b8d8b21e41086585c38d1489 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 13 Nov 2024 09:02:28 +0000
Subject: [PATCH 20/91] Fixed '_resize_initial_model' since 'executables' dict
 stores Path objects now.

---
 src/murfey/server/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/murfey/server/__init__.py b/src/murfey/server/__init__.py
index a8222c87..dfebed4b 100644
--- a/src/murfey/server/__init__.py
+++ b/src/murfey/server/__init__.py
@@ -1667,13 +1667,13 @@ def _resize_intial_model(
     downscaled_pixel_size: float,
     input_path: Path,
     output_path: Path,
-    executables: Dict[str, str],
+    executables: Dict[str, Path],
     env: Dict[str, str],
 ) -> None:
     if executables.get("relion_image_handler"):
         comp_proc = subprocess.run(
             [
-                f"{executables['relion_image_handler']}",
+                f"{str(executables['relion_image_handler'])}",
                 "--i",
                 str(input_path),
                 "--new_box",

From c53b099e2845e5faded01253b8ebf127b54819ab Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 13 Nov 2024 10:48:12 +0000
Subject: [PATCH 21/91] Replaced 'from_file' with 'machine_config_from_file'
 for greater clarity

---
 src/murfey/util/config.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 782092b7..8fe6689e 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -427,7 +427,9 @@ def __validate_default_model_if_processing_enabled_and_spa_possible__(
         return v
 
 
-def from_file(config_file_path: Path, instrument: str = "") -> Dict[str, MachineConfig]:
+def machine_config_from_file(
+    config_file_path: Path, instrument: str = ""
+) -> Dict[str, MachineConfig]:
     with open(config_file_path, "r") as config_stream:
         config = yaml.safe_load(config_stream)
     return {
@@ -522,7 +524,7 @@ def get_machine_config(instrument_name: str = "") -> Dict[str, MachineConfig]:
     }
     if settings.murfey_machine_configuration:
         microscope = instrument_name
-        machine_config = from_file(
+        machine_config = machine_config_from_file(
             Path(settings.murfey_machine_configuration), microscope
         )
     return machine_config

From 3e1387955e3c07af404356649fcc84018783a248 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 13 Nov 2024 10:48:22 +0000
Subject: [PATCH 22/91] Updated code to reflect type changes of parameters from
 str to Path and making keys Optional in MachineConfig

---
 src/murfey/server/api/__init__.py | 40 +++++++++++++++++++++----------
 src/murfey/server/api/spa.py      | 13 +++++++++-
 src/murfey/server/demo_api.py     | 18 +++++++-------
 src/murfey/server/gain.py         | 12 +++++-----
 4 files changed, 56 insertions(+), 27 deletions(-)

diff --git a/src/murfey/server/api/__init__.py b/src/murfey/server/api/__init__.py
index 86e14099..ef22cacb 100644
--- a/src/murfey/server/api/__init__.py
+++ b/src/murfey/server/api/__init__.py
@@ -51,7 +51,7 @@
 from murfey.server.gain import Camera, prepare_eer_gain, prepare_gain
 from murfey.server.murfey_db import murfey_db
 from murfey.util import secure_path
-from murfey.util.config import MachineConfig, from_file, settings
+from murfey.util.config import MachineConfig, machine_config_from_file, settings
 from murfey.util.db import (
     AutoProcProgram,
     ClientEnvironment,
@@ -146,9 +146,9 @@ def connections_check():
 def machine_info() -> Optional[MachineConfig]:
     instrument_name = os.getenv("BEAMLINE")
     if settings.murfey_machine_configuration and instrument_name:
-        return from_file(Path(settings.murfey_machine_configuration), instrument_name)[
-            instrument_name
-        ]
+        return machine_config_from_file(
+            Path(settings.murfey_machine_configuration), instrument_name
+        )[instrument_name]
     return None
 
 
@@ -156,9 +156,9 @@ def machine_info() -> Optional[MachineConfig]:
 @router.get("/instruments/{instrument_name}/machine")
 def machine_info_by_name(instrument_name: str) -> Optional[MachineConfig]:
     if settings.murfey_machine_configuration:
-        return from_file(Path(settings.murfey_machine_configuration), instrument_name)[
-            instrument_name
-        ]
+        return machine_config_from_file(
+            Path(settings.murfey_machine_configuration), instrument_name
+        )[instrument_name]
     return None
 
 
@@ -1218,6 +1218,10 @@ async def request_tomography_preprocessing(
         murfey_ids = _murfey_id(appid, db, number=1, close=False)
         if not mrc_out.parent.exists():
             mrc_out.parent.mkdir(parents=True, exist_ok=True)
+        # Handle case when gain reference file is None
+        if not proc_file.gain_ref:
+            log.error("No gain reference file was provided in the ProcessFile object")
+            return proc_file
         zocalo_message: dict = {
             "recipes": ["em-tomo-preprocess"],
             "parameters": {
@@ -1236,7 +1240,9 @@ async def request_tomography_preprocessing(
                 "fm_dose": proc_file.dose_per_frame,
                 "gain_ref": (
                     str(machine_config.rsync_basepath / proc_file.gain_ref)
-                    if proc_file.gain_ref and machine_config.data_transfer_enabled
+                    if proc_file.gain_ref
+                    and machine_config.data_transfer_enabled
+                    and machine_config.rsync_basepath
                     else proc_file.gain_ref
                 ),
                 "fm_int_file": proc_file.eer_fractionation_file,
@@ -1249,7 +1255,7 @@ async def request_tomography_preprocessing(
             _transport_object.send("processing_recipe", zocalo_message)
         else:
             log.error(
-                f"Pe-processing was requested for {sanitise(ppath.name)} but no Zocalo transport object was found"
+                f"Preprocessing was requested for {sanitise(ppath.name)} but no Zocalo transport object was found"
             )
             return proc_file
     else:
@@ -1285,6 +1291,8 @@ def suggest_path(
         raise ValueError(
             "No machine configuration set when suggesting destination path"
         )
+    if not machine_config.rsync_basepath:
+        raise ValueError("No rsync basepath set")
     check_path = machine_config.rsync_basepath / base_path
     check_path_name = check_path.name
     while check_path.exists():
@@ -1366,6 +1374,8 @@ def start_dc(
     machine_config = get_machine_config(instrument_name=instrument_name)[
         instrument_name
     ]
+    if not machine_config.rsync_basepath:
+        raise ValueError("No rsync basepath set")
     log.info(
         f"Starting data collection on microscope {get_microscope(machine_config=machine_config)} "
         f"with basepath {sanitise(str(machine_config.rsync_basepath))} and directory {sanitise(dc_params.image_directory)}"
@@ -1453,8 +1463,10 @@ async def process_gain(
         executables = machine_config.external_executables
     env = machine_config.external_environment
     safe_path_name = secure_filename(gain_reference_params.gain_ref.name)
+    if not machine_config.rsync_basepath:
+        raise ValueError("No rsync basepath set")
     filepath = (
-        Path(machine_config.rsync_basepath)
+        machine_config.rsync_basepath
         / (machine_config.rsync_module or "data")
         / str(datetime.datetime.now().year)
         / secure_filename(visit_name)
@@ -1521,8 +1533,10 @@ async def write_eer_fractionation_file(
             )
         ) / secure_filename(fractionation_params.fractionation_file_name)
     else:
+        if not machine_config.rsync_basepath:
+            raise ValueError("rsync basepath not set")
         file_path = (
-            Path(machine_config.rsync_basepath)
+            machine_config.rsync_basepath
             / (machine_config.rsync_module or "data")
             / str(datetime.datetime.now().year)
             / secure_filename(visit_name)
@@ -1566,8 +1580,10 @@ async def make_gif(
     machine_config = get_machine_config(instrument_name=instrument_name)[
         instrument_name
     ]
+    if not machine_config.rsync_basepath:
+        raise ValueError("rsync basepath not set")
     output_dir = (
-        Path(machine_config.rsync_basepath)
+        machine_config.rsync_basepath
         / (machine_config.rsync_module or "data")
         / secure_filename(year)
         / secure_filename(visit_name)
diff --git a/src/murfey/server/api/spa.py b/src/murfey/server/api/spa.py
index bc759425..9166b911 100644
--- a/src/murfey/server/api/spa.py
+++ b/src/murfey/server/api/spa.py
@@ -20,6 +20,13 @@ def _cryolo_model_path(visit: str, instrument_name: str) -> Path:
     machine_config = get_machine_config(instrument_name=instrument_name)[
         instrument_name
     ]
+    # Raise error if relevant keys weren't set in MachineConfig
+    if not machine_config.rsync_basepath:
+        raise ValueError("Unable to find crYOLO model; rsync_basepath was not set")
+    if not machine_config.default_model:
+        raise ValueError("No default crYOLO model was set")
+
+    # Find user-provided crYOLO model
     if machine_config.model_search_directory:
         visit_directory = (
             machine_config.rsync_basepath
@@ -32,10 +39,14 @@ def _cryolo_model_path(visit: str, instrument_name: str) -> Path:
         )
         if possible_models:
             return sorted(possible_models, key=lambda x: x.stat().st_ctime)[-1]
+
+    # Return default crYOLO model otherwise
     return machine_config.default_model
 
 
 @router.get("/sessions/{session_id}/cryolo_model")
 def get_cryolo_model_path(session_id: int, db=murfey_db):
     session = db.exec(select(MurfeySession).where(MurfeySession.id == session_id)).one()
-    return {"model_path": _cryolo_model_path(session.visit, session.instrment_name)}
+    return {
+        "model_path": str(_cryolo_model_path(session.visit, session.instrment_name))
+    }
diff --git a/src/murfey/server/demo_api.py b/src/murfey/server/demo_api.py
index eab41858..60583776 100644
--- a/src/murfey/server/demo_api.py
+++ b/src/murfey/server/demo_api.py
@@ -40,7 +40,7 @@
 from murfey.server.api import MurfeySessionID
 from murfey.server.api.auth import validate_token
 from murfey.server.murfey_db import murfey_db
-from murfey.util.config import MachineConfig, from_file
+from murfey.util.config import MachineConfig, machine_config_from_file
 from murfey.util.db import (
     AutoProcProgram,
     ClientEnvironment,
@@ -113,7 +113,9 @@ class Settings(BaseSettings):
 machine_config: dict = {}
 if settings.murfey_machine_configuration:
     microscope = get_microscope()
-    machine_config = from_file(Path(settings.murfey_machine_configuration), microscope)
+    machine_config = machine_config_from_file(
+        Path(settings.murfey_machine_configuration), microscope
+    )
 
 
 # This will be the homepage for a given microscope.
@@ -134,9 +136,9 @@ async def root(request: Request):
 def machine_info() -> Optional[MachineConfig]:
     instrument_name = os.getenv("BEAMLINE")
     if settings.murfey_machine_configuration and instrument_name:
-        return from_file(Path(settings.murfey_machine_configuration), instrument_name)[
-            instrument_name
-        ]
+        return machine_config_from_file(
+            Path(settings.murfey_machine_configuration), instrument_name
+        )[instrument_name]
     return None
 
 
@@ -144,9 +146,9 @@ def machine_info() -> Optional[MachineConfig]:
 @router.get("/instruments/{instrument_name}/machine")
 def machine_info_by_name(instrument_name: str) -> Optional[MachineConfig]:
     if settings.murfey_machine_configuration:
-        return from_file(Path(settings.murfey_machine_configuration), instrument_name)[
-            instrument_name
-        ]
+        return machine_config_from_file(
+            Path(settings.murfey_machine_configuration), instrument_name
+        )[instrument_name]
     return None
 
 
diff --git a/src/murfey/server/gain.py b/src/murfey/server/gain.py
index f026be9a..2109c094 100644
--- a/src/murfey/server/gain.py
+++ b/src/murfey/server/gain.py
@@ -24,7 +24,7 @@ def _sanitise(gain_path: Path) -> Path:
 async def prepare_gain(
     camera: int,
     gain_path: Path,
-    executables: Dict[str, str],
+    executables: Dict[str, Path],
     env: Dict[str, str],
     rescale: bool = True,
     tag: str = "",
@@ -57,7 +57,7 @@ async def prepare_gain(
         gain_path_mrc = gain_path.with_suffix(".mrc")
         gain_path_superres = gain_path.parent / (gain_path.name + "_superres.mrc")
         dm4_proc = await asyncio.create_subprocess_shell(
-            f"{executables['dm2mrc']} {gain_path} {gain_path_mrc}",
+            f"{str(executables['dm2mrc'])} {gain_path} {gain_path_mrc}",
             stdout=asyncio.subprocess.PIPE,
             stderr=asyncio.subprocess.PIPE,
         )
@@ -65,7 +65,7 @@ async def prepare_gain(
         if dm4_proc.returncode:
             return None, None
         clip_proc = await asyncio.create_subprocess_shell(
-            f"{executables['clip']} {flip} {secure_path(gain_path_mrc)} {secure_path(gain_path_superres) if rescale else secure_path(gain_out)}",
+            f"{str(executables['clip'])} {flip} {secure_path(gain_path_mrc)} {secure_path(gain_path_superres) if rescale else secure_path(gain_out)}",
             stdout=asyncio.subprocess.PIPE,
             stderr=asyncio.subprocess.PIPE,
         )
@@ -74,7 +74,7 @@ async def prepare_gain(
             return None, None
         if rescale:
             newstack_proc = await asyncio.create_subprocess_shell(
-                f"{executables['newstack']} -bin 2 {secure_path(gain_path_superres)} {secure_path(gain_out)}",
+                f"{str(executables['newstack'])} -bin 2 {secure_path(gain_path_superres)} {secure_path(gain_out)}",
                 stdout=asyncio.subprocess.PIPE,
                 stderr=asyncio.subprocess.PIPE,
             )
@@ -88,7 +88,7 @@ async def prepare_gain(
 
 
 async def prepare_eer_gain(
-    gain_path: Path, executables: Dict[str, str], env: Dict[str, str], tag: str = ""
+    gain_path: Path, executables: Dict[str, Path], env: Dict[str, str], tag: str = ""
 ) -> Tuple[Path | None, Path | None]:
     if not executables.get("tif2mrc"):
         return None, None
@@ -98,7 +98,7 @@ async def prepare_eer_gain(
     for k, v in env.items():
         os.environ[k] = v
     mrc_convert = await asyncio.create_subprocess_shell(
-        f"{executables['tif2mrc']} {secure_path(gain_path)} {secure_path(gain_out)}"
+        f"{str(executables['tif2mrc'])} {secure_path(gain_path)} {secure_path(gain_out)}"
     )
     await mrc_convert.communicate()
     if mrc_convert.returncode:

From 203eaacf377fd5213f61e64aca9f2c452c3704a0 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 13 Nov 2024 10:53:04 +0000
Subject: [PATCH 23/91] Renamed 'security_from_file' to
 'server_config_from_file' for greater clarity

---
 src/murfey/util/config.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 8fe6689e..34401268 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -461,7 +461,7 @@ class ServerConfig(BaseModel):
     allow_origins: List[str] = ["*"]  # Restrict to only certain hostnames
 
 
-def security_from_file(config_file_path: Path) -> ServerConfig:
+def server_config_from_file(config_file_path: Path) -> ServerConfig:
     with open(config_file_path, "r") as config_stream:
         config = yaml.safe_load(config_stream)
     return ServerConfig(**config)
@@ -493,13 +493,13 @@ def get_microscope(machine_config: MachineConfig | None = None) -> str:
 @lru_cache(maxsize=1)
 def get_security_config() -> ServerConfig:
     if settings.murfey_security_configuration:
-        return security_from_file(Path(settings.murfey_security_configuration))
+        return server_config_from_file(Path(settings.murfey_security_configuration))
     if settings.murfey_machine_configuration and os.getenv("BEAMLINE"):
         machine_config = get_machine_config(instrument_name=os.getenv("BEAMLINE"))[
             os.getenv("BEAMLINE", "")
         ]
         if machine_config.security_configuration_path:
-            return security_from_file(machine_config.security_configuration_path)
+            return server_config_from_file(machine_config.security_configuration_path)
     return ServerConfig(
         session_validation="",
         murfey_db_credentials="",

From 28885240195d19ac17e624c8ee1d5486a667ff3c Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 13 Nov 2024 11:52:13 +0000
Subject: [PATCH 24/91] Renamed 'ServerConfig' to 'GlobalConfig' and
 'get_security_config' to 'get_global_config'

---
 src/murfey/cli/add_user.py              |  4 +-
 src/murfey/cli/decrypt_db_password.py   |  6 +--
 src/murfey/cli/generate_db_password.py  |  6 +--
 src/murfey/cli/inject_spa_processing.py |  6 +--
 src/murfey/cli/spa_ispyb_messages.py    |  6 +--
 src/murfey/server/__init__.py           |  8 ++--
 src/murfey/server/api/auth.py           | 28 +++++++-------
 src/murfey/server/api/clem.py           |  2 +
 src/murfey/server/main.py               |  6 +--
 src/murfey/server/murfey_db.py          | 18 ++++-----
 src/murfey/util/config.py               | 49 ++++++++++++++-----------
 11 files changed, 74 insertions(+), 65 deletions(-)

diff --git a/src/murfey/cli/add_user.py b/src/murfey/cli/add_user.py
index 3a7696d0..55011b86 100644
--- a/src/murfey/cli/add_user.py
+++ b/src/murfey/cli/add_user.py
@@ -4,7 +4,7 @@
 
 from murfey.server.api.auth import hash_password
 from murfey.server.murfey_db import url
-from murfey.util.config import get_security_config
+from murfey.util.config import get_global_config
 from murfey.util.db import MurfeyUser as User
 
 
@@ -21,7 +21,7 @@ def run():
     new_user = User(
         username=args.username, hashed_password=hash_password(args.password)
     )
-    _url = url(get_security_config())
+    _url = url(get_global_config())
     engine = create_engine(_url)
     with Session(engine) as murfey_db:
         murfey_db.add(new_user)
diff --git a/src/murfey/cli/decrypt_db_password.py b/src/murfey/cli/decrypt_db_password.py
index 0e019a1d..ff628173 100644
--- a/src/murfey/cli/decrypt_db_password.py
+++ b/src/murfey/cli/decrypt_db_password.py
@@ -2,7 +2,7 @@
 
 from cryptography.fernet import Fernet
 
-from murfey.util.config import get_security_config
+from murfey.util.config import get_global_config
 
 
 def run():
@@ -12,6 +12,6 @@ def run():
 
     args = parser.parse_args()
 
-    security_config = get_security_config()
-    f = Fernet(security_config.crypto_key.encode("ascii"))
+    global_config = get_global_config()
+    f = Fernet(global_config.crypto_key.encode("ascii"))
     print(f.decrypt(args.password.encode("ascii")).decode())
diff --git a/src/murfey/cli/generate_db_password.py b/src/murfey/cli/generate_db_password.py
index 431ede7e..ba9f07d4 100644
--- a/src/murfey/cli/generate_db_password.py
+++ b/src/murfey/cli/generate_db_password.py
@@ -3,12 +3,12 @@
 
 from cryptography.fernet import Fernet
 
-from murfey.util.config import get_security_config
+from murfey.util.config import get_global_config
 
 
 def run():
-    security_config = get_security_config()
-    f = Fernet(security_config.crypto_key.encode("ascii"))
+    global_config = get_global_config()
+    f = Fernet(global_config.crypto_key.encode("ascii"))
     alphabet = string.ascii_letters + string.digits
     password = "".join(secrets.choice(alphabet) for i in range(32))
     print(f.encrypt(password.encode("ascii")).decode())
diff --git a/src/murfey/cli/inject_spa_processing.py b/src/murfey/cli/inject_spa_processing.py
index 2b14df9d..47ab25f2 100644
--- a/src/murfey/cli/inject_spa_processing.py
+++ b/src/murfey/cli/inject_spa_processing.py
@@ -10,7 +10,7 @@
 
 from murfey.server.ispyb import TransportManager
 from murfey.server.murfey_db import url
-from murfey.util.config import get_machine_config, get_microscope, get_security_config
+from murfey.util.config import get_global_config, get_machine_config, get_microscope
 from murfey.util.db import (
     AutoProcProgram,
     ClientEnvironment,
@@ -97,13 +97,13 @@ def run():
         os.environ["BEAMLINE"] = args.microscope
 
     machine_config = get_machine_config()
-    security_config = get_security_config()
+    global_config = get_global_config()
     _url = url(machine_config)
     engine = create_engine(_url)
     murfey_db = Session(engine)
 
     _transport_object = TransportManager(args.transport)
-    _transport_object.feedback_queue = security_config.feedback_queue
+    _transport_object.feedback_queue = global_config.feedback_queue
 
     query = (
         select(Movie)
diff --git a/src/murfey/cli/spa_ispyb_messages.py b/src/murfey/cli/spa_ispyb_messages.py
index 6c54d5e2..5f8484a6 100644
--- a/src/murfey/cli/spa_ispyb_messages.py
+++ b/src/murfey/cli/spa_ispyb_messages.py
@@ -22,7 +22,7 @@
 from murfey.server.ispyb import Session, TransportManager, get_session_id
 from murfey.server.murfey_db import url
 from murfey.util import db
-from murfey.util.config import get_machine_config, get_microscope, get_security_config
+from murfey.util.config import get_global_config, get_machine_config, get_microscope
 
 
 def run():
@@ -341,7 +341,7 @@ def run():
         .where(db.ProcessingJob.recipe == "em-spa-preprocess")
     ).one()
     machine_config = get_machine_config()
-    security_config = get_security_config()
+    global_config = get_global_config()
     params = db.SPARelionParameters(
         pj_id=collected_ids[2].id,
         angpix=float(metadata["pixel_size_on_image"]) * 1e10,
@@ -378,7 +378,7 @@ def run():
 
     if args.flush_preprocess:
         _transport_object = TransportManager(args.transport)
-        _transport_object.feedback_queue = security_config.feedback_queue
+        _transport_object.feedback_queue = global_config.feedback_queue
         stashed_files = murfey_db.exec(
             select(db.PreprocessStash)
             .where(db.PreprocessStash.session_id == args.session_id)
diff --git a/src/murfey/server/__init__.py b/src/murfey/server/__init__.py
index dfebed4b..3f953638 100644
--- a/src/murfey/server/__init__.py
+++ b/src/murfey/server/__init__.py
@@ -51,10 +51,10 @@
 from murfey.util import LogFilter
 from murfey.util.config import (
     MachineConfig,
+    get_global_config,
     get_hostname,
     get_machine_config,
     get_microscope,
-    get_security_config,
 )
 from murfey.util.spa_params import default_spa_parameters
 from murfey.util.state import global_state
@@ -74,7 +74,7 @@
 _transport_object: TransportManager | None = None
 
 try:
-    _url = url(get_security_config())
+    _url = url(get_global_config())
     engine = create_engine(_url)
     murfey_db = Session(engine, expire_on_commit=False)
 except Exception:
@@ -296,9 +296,9 @@ def run():
     # Set up logging now that the desired verbosity is known
     _set_up_logging(quiet=args.quiet, verbosity=args.verbose)
 
-    security_config = get_security_config()
+    global_config = get_global_config()
     if not args.temporary and _transport_object:
-        _transport_object.feedback_queue = security_config.feedback_queue
+        _transport_object.feedback_queue = global_config.feedback_queue
     rabbit_thread = Thread(
         target=feedback_listen,
         daemon=True,
diff --git a/src/murfey/server/api/auth.py b/src/murfey/server/api/auth.py
index c962fa65..66c1dc10 100644
--- a/src/murfey/server/api/auth.py
+++ b/src/murfey/server/api/auth.py
@@ -19,7 +19,7 @@
 
 from murfey.server import sanitise
 from murfey.server.murfey_db import murfey_db, url
-from murfey.util.config import get_machine_config, get_security_config
+from murfey.util.config import get_global_config, get_machine_config
 from murfey.util.db import MurfeyUser as User
 from murfey.util.db import Session as MurfeySession
 
@@ -63,19 +63,19 @@ async def __call__(self, request: Request):
 
 
 # Set up variables used for authentication
-security_config = get_security_config()
+global_config = get_global_config()
 machine_config = get_machine_config()
 auth_url = (
     machine_config[os.getenv("BEAMLINE", "")].auth_url
     if machine_config.get(os.getenv("BEAMLINE", ""))
     else ""
 )
-ALGORITHM = security_config.auth_algorithm or "HS256"
-SECRET_KEY = security_config.auth_key or secrets.token_hex(32)
-if security_config.auth_type == "password":
+ALGORITHM = global_config.auth_algorithm or "HS256"
+SECRET_KEY = global_config.auth_key or secrets.token_hex(32)
+if global_config.auth_type == "password":
     oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
 else:
-    oauth2_scheme = CookieScheme(cookie_key=security_config.cookie_key)
+    oauth2_scheme = CookieScheme(cookie_key=global_config.cookie_key)
 pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
 
 instrument_server_tokens: Dict[float, dict] = {}
@@ -96,7 +96,7 @@ def hash_password(password: str) -> str:
 
 # Set up database engine
 try:
-    _url = url(security_config)
+    _url = url(global_config)
     engine = create_engine(_url)
 except Exception:
     engine = None
@@ -114,7 +114,7 @@ def validate_user(username: str, password: str) -> bool:
 def validate_visit(visit_name: str, token: str) -> bool:
     if validators := entry_points().select(
         group="murfey.auth.session_validation",
-        name=security_config.auth_type,
+        name=global_config.auth_type,
     ):
         return validators[0].load()(visit_name, token)
     return True
@@ -166,12 +166,12 @@ async def validate_token(token: Annotated[str, Depends(oauth2_scheme)]):
         if auth_url:
             headers = (
                 {}
-                if security_config.auth_type == "cookie"
+                if global_config.auth_type == "cookie"
                 else {"Authorization": f"Bearer {token}"}
             )
             cookies = (
-                {security_config.cookie_key: token}
-                if security_config.auth_type == "cookie"
+                {global_config.cookie_key: token}
+                if global_config.auth_type == "cookie"
                 else {}
             )
             async with aiohttp.ClientSession(cookies=cookies) as session:
@@ -186,7 +186,7 @@ async def validate_token(token: Annotated[str, Depends(oauth2_scheme)]):
         else:
             if validators := entry_points().select(
                 group="murfey.auth.token_validation",
-                name=security_config.auth_type,
+                name=global_config.auth_type,
             ):
                 validators[0].load()(token)
             else:
@@ -290,8 +290,8 @@ async def mint_session_token(session_id: MurfeySessionID, db=murfey_db):
         db.exec(select(MurfeySession).where(MurfeySession.id == session_id)).one().visit
     )
     expiry_time = None
-    if security_config.session_token_timeout:
-        expiry_time = time.time() + security_config.session_token_timeout
+    if global_config.session_token_timeout:
+        expiry_time = time.time() + global_config.session_token_timeout
     token = create_access_token(
         {
             "session": session_id,
diff --git a/src/murfey/server/api/clem.py b/src/murfey/server/api/clem.py
index 0f948c36..0bbfed9c 100644
--- a/src/murfey/server/api/clem.py
+++ b/src/murfey/server/api/clem.py
@@ -76,6 +76,8 @@ def validate_and_sanitise(
     machine_config = get_machine_config(instrument_name=instrument_name)[
         instrument_name
     ]
+    if not machine_config.rsync_basepath:
+        raise ValueError("rsync basepath not set")
     base_path = machine_config.rsync_basepath.as_posix()
 
     # Check that full file path doesn't contain unallowed characters
diff --git a/src/murfey/server/main.py b/src/murfey/server/main.py
index 61ab0820..618f03f5 100644
--- a/src/murfey/server/main.py
+++ b/src/murfey/server/main.py
@@ -21,7 +21,7 @@
 import murfey.server.websocket
 import murfey.util.models
 from murfey.server import template_files
-from murfey.util.config import get_security_config
+from murfey.util.config import get_global_config
 
 # Import Murfey server or demo server based on settings
 if os.getenv("MURFEY_DEMO"):
@@ -39,7 +39,7 @@ class Settings(BaseSettings):
     murfey_machine_configuration: str = ""
 
 
-security_config = get_security_config()
+global_config = get_global_config()
 
 settings = Settings()
 
@@ -50,7 +50,7 @@ class Settings(BaseSettings):
 
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=security_config.allow_origins,
+    allow_origins=global_config.allow_origins,
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
diff --git a/src/murfey/server/murfey_db.py b/src/murfey/server/murfey_db.py
index 5a0690e5..75d2efae 100644
--- a/src/murfey/server/murfey_db.py
+++ b/src/murfey/server/murfey_db.py
@@ -8,23 +8,23 @@
 from sqlalchemy.pool import NullPool
 from sqlmodel import Session, create_engine
 
-from murfey.util.config import ServerConfig, get_security_config
+from murfey.util.config import GlobalConfig, get_global_config
 
 
-def url(security_config: ServerConfig | None = None) -> str:
-    security_config = security_config or get_security_config()
-    with open(security_config.murfey_db_credentials, "r") as stream:
+def url(global_config: GlobalConfig | None = None) -> str:
+    global_config = global_config or get_global_config()
+    with open(global_config.murfey_db_credentials, "r") as stream:
         creds = yaml.safe_load(stream)
-    f = Fernet(security_config.crypto_key.encode("ascii"))
+    f = Fernet(global_config.crypto_key.encode("ascii"))
     p = f.decrypt(creds["password"].encode("ascii"))
     return f"postgresql+psycopg2://{creds['username']}:{p.decode()}@{creds['host']}:{creds['port']}/{creds['database']}"
 
 
 def get_murfey_db_session(
-    security_config: ServerConfig | None = None,
+    global_config: GlobalConfig | None = None,
 ) -> Session:  # type: ignore
-    _url = url(security_config)
-    if security_config and not security_config.sqlalchemy_pooling:
+    _url = url(global_config)
+    if global_config and not global_config.sqlalchemy_pooling:
         engine = create_engine(_url, poolclass=NullPool)
     else:
         engine = create_engine(_url)
@@ -37,7 +37,7 @@ def get_murfey_db_session(
 
 murfey_db_session = partial(
     get_murfey_db_session,
-    get_security_config(),
+    get_global_config(),
 )
 
 murfey_db: Session = Depends(murfey_db_session)
diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 34401268..8676dfa0 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -13,8 +13,10 @@
 
 
 class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
+    """
+    General information about the instrument being supported
+    """
 
-    # General info about the instrument
     display_name: str = Field(
         default="",
         description="Name of instrument used for display purposes, i.e. Krios I.",
@@ -39,8 +41,9 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
         ),
     )
 
-    """"""
-    # Instrument hardware/software-related information
+    """
+    Information about the hardware and software on the instrument machine
+    """
     camera: str = Field(
         default="FALCON",
         description=(
@@ -104,7 +107,6 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
         ),
     )
 
-    """"""
     # Instrument-side file paths
     data_directories: dict[Path, str] = Field(
         default={},
@@ -157,7 +159,9 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
     )  # Only if Falcon is used
     # To avoid others having to follow the {year}/{visit} format we are doing
 
-    """"""
+    """
+    Data transfer-related settings
+    """
     # rsync-related settings (only if rsync is used)
     data_transfer_enabled: bool = Field(
         default=True,
@@ -211,7 +215,9 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
         # files in different locations according to the workflows they correspond to
     )
 
-    """"""
+    """
+    Data processing-related settings
+    """
     # Processing-related keys
     processing_enabled: bool = Field(
         default=True,
@@ -288,7 +294,7 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
         ),
     )
 
-    # Phase-contrast related processing workflows
+    # TEM-related processing workflows
     recipes: dict[str, str] = Field(
         default={
             "em-spa-bfactor": "em-spa-bfactor",
@@ -316,7 +322,6 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
     )
 
     # Particle picking settings
-    # machine_learning_model
     default_model: Optional[Path] = Field(
         default=None,
         description=(
@@ -329,7 +334,7 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
             "Relative path to where user-uploaded machine learning models are stored. "
             "Murfey will look for the folders under the current visit."
         ),
-    )  # User-uploaded models
+    )
 
     initial_model_search_directory: str = Field(
         default="processing/initial_model",  # User-uploaded electron density models
@@ -339,7 +344,9 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
         ),
     )
 
-    """"""
+    """
+    Server and network-related configurations
+    """
     # Network connections
     frontend_url: str = Field(
         default="http://localhost:3000",
@@ -355,7 +362,7 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
     )
 
     # Security-related keys
-    security_configuration_path: Optional[Path] = Field(
+    global_configuration_path: Optional[Path] = Field(
         default=None,
         description=(
             "Full file path to the YAML file containing the configurations for the "
@@ -439,7 +446,7 @@ def machine_config_from_file(
     }
 
 
-class ServerConfig(BaseModel):
+class GlobalConfig(BaseModel):
     # Database connection settings
     murfey_db_credentials: str
     sqlalchemy_pooling: bool = True
@@ -461,15 +468,15 @@ class ServerConfig(BaseModel):
     allow_origins: List[str] = ["*"]  # Restrict to only certain hostnames
 
 
-def server_config_from_file(config_file_path: Path) -> ServerConfig:
+def global_config_from_file(config_file_path: Path) -> GlobalConfig:
     with open(config_file_path, "r") as config_stream:
         config = yaml.safe_load(config_stream)
-    return ServerConfig(**config)
+    return GlobalConfig(**config)
 
 
 class Settings(BaseSettings):
     murfey_machine_configuration: str = ""
-    murfey_security_configuration: str = ""
+    murfey_global_configuration: str = ""
 
 
 settings = Settings()
@@ -491,16 +498,16 @@ def get_microscope(machine_config: MachineConfig | None = None) -> str:
 
 
 @lru_cache(maxsize=1)
-def get_security_config() -> ServerConfig:
-    if settings.murfey_security_configuration:
-        return server_config_from_file(Path(settings.murfey_security_configuration))
+def get_global_config() -> GlobalConfig:
+    if settings.murfey_global_configuration:
+        return global_config_from_file(Path(settings.murfey_global_configuration))
     if settings.murfey_machine_configuration and os.getenv("BEAMLINE"):
         machine_config = get_machine_config(instrument_name=os.getenv("BEAMLINE"))[
             os.getenv("BEAMLINE", "")
         ]
-        if machine_config.security_configuration_path:
-            return server_config_from_file(machine_config.security_configuration_path)
-    return ServerConfig(
+        if machine_config.global_configuration_path:
+            return global_config_from_file(machine_config.global_configuration_path)
+    return GlobalConfig(
         session_validation="",
         murfey_db_credentials="",
         crypto_key="",

From bde9c953cdf70f2076a44ba62e8133ec031388bb Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 13 Nov 2024 12:03:24 +0000
Subject: [PATCH 25/91] Missed renaming 'get_security_config' in 'tests' folder

---
 tests/cli/test_decrypt_password.py  | 10 +++++-----
 tests/cli/test_generate_password.py | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/cli/test_decrypt_password.py b/tests/cli/test_decrypt_password.py
index 65952e5e..343c1208 100644
--- a/tests/cli/test_decrypt_password.py
+++ b/tests/cli/test_decrypt_password.py
@@ -5,16 +5,16 @@
 from cryptography.fernet import Fernet
 
 from murfey.cli.decrypt_db_password import run
-from murfey.util.config import get_security_config
+from murfey.util.config import get_global_config
 
 
 def test_decrypt_password(capsys, tmp_path):
-    security_config = get_security_config()
+    global_config = get_global_config()
     crypto_key = Fernet.generate_key()
-    security_config.crypto_key = crypto_key.decode("ascii")
+    global_config.crypto_key = crypto_key.decode("ascii")
     with open(tmp_path / "config.yaml", "w") as cfg:
-        yaml.dump(security_config.dict(), cfg)
-    os.environ["MURFEY_SECURITY_CONFIGURATION"] = str(tmp_path / "config.yaml")
+        yaml.dump(global_config.dict(), cfg)
+    os.environ["MURFEY_global_configURATION"] = str(tmp_path / "config.yaml")
     password = "abcd"
     f = Fernet(crypto_key)
     encrypted_password = f.encrypt(password.encode("ascii")).decode()
diff --git a/tests/cli/test_generate_password.py b/tests/cli/test_generate_password.py
index fa48e9cf..6d41740d 100644
--- a/tests/cli/test_generate_password.py
+++ b/tests/cli/test_generate_password.py
@@ -4,16 +4,16 @@
 from cryptography.fernet import Fernet
 
 from murfey.cli.generate_db_password import run
-from murfey.util.config import get_security_config
+from murfey.util.config import get_global_config
 
 
 def test_generate_password(capsys, tmp_path):
-    security_config = get_security_config()
+    global_config = get_global_config()
     crypto_key = Fernet.generate_key()
-    security_config.crypto_key = crypto_key.decode("ascii")
+    global_config.crypto_key = crypto_key.decode("ascii")
     with open(tmp_path / "config.yaml", "w") as cfg:
-        yaml.dump(security_config.dict(), cfg)
-    os.environ["MURFEY_SECURITY_CONFIGURATION"] = str(tmp_path / "config.yaml")
+        yaml.dump(global_config.dict(), cfg)
+    os.environ["MURFEY_global_configURATION"] = str(tmp_path / "config.yaml")
     run()
     captured = capsys.readouterr()
     f = Fernet(crypto_key)

From edfb058c24972c7dec6b82077bb47eb99f48f9af Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 13 Nov 2024 13:57:40 +0000
Subject: [PATCH 26/91] Added default options to the description of 'flip_gain'

---
 src/murfey/util/config.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 8676dfa0..fabc7954 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -67,7 +67,8 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
     flip_gain: Literal["x", "y", ""] = Field(
         default="",
         description=(
-            "State if the gain reference needs to be flipped along a specific axis."
+            "State if the gain reference needs to be flipped along a specific axis. "
+            "Options are 'x', 'y', or ''."
         ),
         # NOTE: This is a placeholder for a key that will be implemented in the future
     )

From c1a7ccf5f7df3910773ba9af4f6922bd490b8f8e Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 14 Nov 2024 14:56:11 +0000
Subject: [PATCH 27/91] Rearranged keys and updated descriptions

---
 src/murfey/util/config.py | 100 +++++++++++++++++++-------------------
 1 file changed, 51 insertions(+), 49 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index fabc7954..1805d324 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -49,7 +49,8 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
         description=(
             "Name of the camera used by the TEM. This is only relevant for TEMs to "
             "determine how the gain reference needs to be processed, e.g., if it has "
-            "to be binned down from superres or flipped along the x-/y-axis."
+            "to be binned down from superres or flipped along the x- or y-axis. "
+            "Options: 'FALCON', 'K3_FLIPX', 'K3_FLIPY'"
         ),
         # NOTE:
         #   Supported options: Falcon 4, Falcon 4I, K2, K3 (superres)
@@ -68,7 +69,7 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
         default="",
         description=(
             "State if the gain reference needs to be flipped along a specific axis. "
-            "Options are 'x', 'y', or ''."
+            "Options: 'x', 'y', or ''."
         ),
         # NOTE: This is a placeholder for a key that will be implemented in the future
     )
@@ -78,7 +79,7 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
             "Nested dictionary containing the calibrations for this microscope. "
             "E.g., 'magnification' would be a valid dictionary, in which the "
             "field-of-view at each magnfication level is provided as a key-value "
-            "pair."
+            "pair. Options: 'magnification'"
         ),
     )
 
@@ -98,7 +99,7 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
             "key-value pairs."
         ),
     )
-    software_settings_output_directories: dict[str, list[Path]] = Field(
+    software_settings_output_directories: dict[str, list[str]] = Field(
         default={},
         description=(
             "A dictionary in which the keys are the full file paths to the settings "
@@ -109,14 +110,6 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
     )
 
     # Instrument-side file paths
-    data_directories: dict[Path, str] = Field(
-        default={},
-        description=(
-            "Dictionary of key-value pairs, where the keys are full paths to where "
-            "data is stored on the client machine, and the value denotes the type "
-            "of data stored at that path."
-        ),
-    )
     data_required_substrings: dict[str, dict[str, list[str]]] = Field(
         default={},
         description=(
@@ -126,6 +119,14 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
             "processing."
         ),
     )
+    data_directories: dict[str, str] = Field(
+        default={},
+        description=(
+            "Dictionary of key-value pairs, where the keys are full paths to where "
+            "data is stored on the client machine, and the value denotes the type "
+            "of data stored at that path."
+        ),
+    )
     create_directories: dict[str, str] = Field(
         default={"atlas": "atlas"},
         description=(
@@ -157,8 +158,9 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
             "be a string, with '{visit}' and '{year}' being optional arguments that "
             "can be embedded in the string. E.g.: /home/user/data/{year}/{visit}"
         ),
-    )  # Only if Falcon is used
-    # To avoid others having to follow the {year}/{visit} format we are doing
+        # Only if Falcon is used
+        # To avoid others having to follow the {year}/{visit} format we are doing
+    )
 
     """
     Data transfer-related settings
@@ -260,41 +262,6 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
         # users to add more processing options to their workflow
     )
 
-    # Extra plugins for data acquisition(?)
-    external_executables: dict[str, Path] = Field(
-        default={},
-        description=(
-            "Dictionary containing additional software packages to be used as part of "
-            "the processing workflow. The keys are the names of the packages and the "
-            "values are the full paths to where the executables are located."
-        ),
-    )
-    external_executables_eer: dict[str, Path] = Field(
-        default={},
-        description=(
-            "A similar dictionary, but for the executables associated with processing "
-            "EER files."
-        ),
-        # NOTE: Both external_executables variables should be combined into one. The
-        # EER ones could be their own key, where different software packages are
-        # provided for different file types in different workflows.
-    )
-    external_environment: dict[str, str] = Field(
-        default={},
-        description=(
-            "Dictionary containing full paths to supporting files and executables that "
-            "are needed to run the executables to be used. These paths will be added "
-            "to the $PATH environment variable."
-        ),
-    )
-    plugin_packages: dict[str, Path] = Field(
-        default={},
-        description=(
-            "Dictionary containing full paths to additional plugins for Murfey that "
-            "help support the workflow."
-        ),
-    )
-
     # TEM-related processing workflows
     recipes: dict[str, str] = Field(
         default={
@@ -345,6 +312,41 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
         ),
     )
 
+    # Extra plugins for data acquisition(?)
+    external_executables: dict[str, Path] = Field(
+        default={},
+        description=(
+            "Dictionary containing additional software packages to be used as part of "
+            "the processing workflow. The keys are the names of the packages and the "
+            "values are the full paths to where the executables are located."
+        ),
+    )
+    external_executables_eer: dict[str, Path] = Field(
+        default={},
+        description=(
+            "A similar dictionary, but for the executables associated with processing "
+            "EER files."
+        ),
+        # NOTE: Both external_executables variables should be combined into one. The
+        # EER ones could be their own key, where different software packages are
+        # provided for different file types in different workflows.
+    )
+    external_environment: dict[str, str] = Field(
+        default={},
+        description=(
+            "Dictionary containing full paths to supporting files and executables that "
+            "are needed to run the executables to be used. These paths will be added "
+            "to the $PATH environment variable."
+        ),
+    )
+    plugin_packages: dict[str, Path] = Field(
+        default={},
+        description=(
+            "Dictionary containing full paths to additional plugins for Murfey that "
+            "help support the data collection and processing workflow."
+        ),
+    )
+
     """
     Server and network-related configurations
     """

From e6178db1cae0b017fb7c3bd4512447b1ab4a20ea Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 14 Nov 2024 19:04:05 +0000
Subject: [PATCH 28/91] Updated 'generate_config' CLI (WIP)

---
 src/murfey/cli/generate_config.py | 571 +++++++++++++++++++++++++++++-
 1 file changed, 553 insertions(+), 18 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index e7c42916..445eca89 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -1,27 +1,562 @@
+import json
+import re
+from pathlib import Path
+from typing import Optional
+
+import yaml
 from pydantic import ValidationError
-from pydantic.fields import UndefinedType
-from rich.pretty import pprint
-from rich.prompt import Prompt
+from pydantic.fields import ModelField, UndefinedType
+from rich.console import Console
 
 from murfey.util.config import MachineConfig
 
+# Create a console object for pretty printing
+console = Console()
 
-def run():
-    new_config = {}
-    for k, field in MachineConfig.__fields__.items():
-        pprint(field.name)
-        pprint(field.field_info.description)
-        if isinstance(field.field_info.default, UndefinedType):
-            value = Prompt.ask("Please provide a value")
+
+def prompt(message: str, style: str = "") -> str:
+    """
+    Helper function to pretty print the prompt message and add the actual prompt on a
+    newline.
+    """
+    console.print(message, style=style)
+    return input("> ")
+
+
+def print_field_info(field: ModelField):
+    console.print(field.name, style="bold cyan")
+    console.print(field.field_info.description, style="cyan")
+    if not isinstance(field.field_info.default, UndefinedType):
+        console.print(f"Default: {field.field_info.default!r}", style="cyan")
+
+
+def ask_for_input(category: str, again: bool = False):
+    """
+    Short while loop when to facilitate adding more than one value to a field in the
+    config.
+    """
+    message = (
+        "Would you like to add " + ("another" if again else "a") + f" {category}? (y/n)"
+    )
+    while True:
+        answer = (
+            prompt(
+                message,
+            )
+            .lower()
+            .strip()
+        )
+        if answer in ("y", "yes"):
+            return True
+        elif answer in ("n", "no"):
+            return False
         else:
-            value = Prompt.ask(
-                "Please provide a value", default=field.field_info.default
+            console.print("Invalid input. Please try again.", style="red")
+
+
+def confirm_overwrite(key: str):
+    """
+    Check whether a key should be overwritten if a duplicate is detected
+    """
+    message = f"{key!r} already exists; do you wish to overwrite it? (y/n)"
+    while True:
+        answer = (
+            prompt(
+                message,
             )
-        new_config[k] = value
+            .lower()
+            .strip()
+        )
+        if answer in ("y", "yes"):
+            return True
+        elif answer in ("n", "no"):
+            return False
+        else:
+            console.print("Invalid input. Please try again.", style="red")
+
+
+def populate_field(key: str, field: ModelField):
+    """
+    General function for inputting and validating the value of a single field against
+    its Pydantic model.
+    """
+
+    # Display information on the field to be filled
+    print_field_info(field)
+    message = "Please provide a value (press Enter to leave it blank as '')."
+    while True:
+        # Validate fields as you key them in
+        value, error = field.validate(
+            prompt(message),
+            {},
+            loc=key,
+        )
+        if not error:
+            console.print(
+                f"{key!r} validated as {type(value)}: {value!r}", style="green"
+            )
+            return value
+        else:
+            console.print("Invalid input. Please try again.", style="red")
+
+
+def add_calibrations(key: str, field: ModelField) -> dict:
+    """
+    Populate the 'calibrations' field with dictionaries.
+    """
 
+    def get_calibration():
+        # Request for a file to read settings from
+        calibration_file = Path(
+            prompt(
+                "What is the full file path to the calibration file? This should be a "
+                "JSON file.",
+            )
+        )
         try:
-            MachineConfig.validate(new_config)
-        except ValidationError as exc:
-            for ve in exc.errors():
-                if ve["type"] != "value_error.missing":
-                    print("Validation failed")
+            with open(calibration_file, "r") as file:
+                calibration_values: dict = json.load(file)
+                return calibration_values
+        except Exception as e:
+            console.print(
+                f"Error opening the provided file: {e}",
+                style="red",
+            )
+            if ask_for_input("calibration file", True) is True:
+                return get_calibration()
+            else:
+                return {}
+
+    # Settings
+    known_calibraions = ("magnification",)
+
+    # Start of add_calibrations
+    print_field_info(field)
+    category = "calibration setting"
+    calibrations: dict = {}
+    add_calibration = ask_for_input(category, False)
+    while add_calibration is True:
+        calibration_type = prompt(
+            "What type of calibration settings are you providing?",
+        ).lower()
+        # Check if it's a known type of calibration
+        if calibration_type not in known_calibraions:
+            console.print(
+                f"{calibration_type} is not a known type of calibration",
+                style="red",
+            )
+            add_calibration = ask_for_input(category, True)
+            continue
+        # Handle duplicate keys
+        if calibration_type in calibrations.keys():
+            if confirm_overwrite(calibration_type) is False:
+                add_calibration = ask_for_input(category, True)
+                continue
+        # Skip failed inputs
+        calibration_values = get_calibration()
+        if not calibration_values:
+            add_calibration = ask_for_input(category, True)
+            continue
+
+        # Add calibration to master dict
+        calibrations[calibration_type] = calibration_values
+        console.print(
+            f"Added {calibration_type} to the calibrations field: {calibration_values}",
+            style="green",
+        )
+
+        # Check if any more calibrations need to be added
+        add_calibration = ask_for_input(category="calibration setting", again=True)
+
+    # Validate the nested dictionary structure
+    validated_calibrations, error = field.validate(calibrations, {}, loc=field)
+    if not error:
+        console.print(
+            f"{key!r} validated as {type(validated_calibrations)}: {validated_calibrations!r}",
+            style="green",
+        )
+        return validated_calibrations
+    else:
+        console.print(
+            f"Failed to validate the provided calibrations: {error}", style="red"
+        )
+        console.print("Returning an empty dictionary")
+        return {}
+
+
+def add_software_packages(config: dict):
+    def ask_about_xml_path() -> bool:
+        message = (
+            "Does this software package have a settings file that needs modification? "
+            "(y/n)"
+        )
+        answer = prompt(message).lower().strip()
+
+        # Validate
+        if answer in ("y", "yes"):
+            return True
+        if answer in ("n", "no"):
+            return False
+        console.print("Invalid input.", style="red")
+        return ask_about_xml_path()
+
+    def get_software_name() -> str:
+        name = (
+            prompt(
+                "What is the name of the software package? Supported options: 'autotem', "
+                "'epu', 'leica', 'serialem', 'tomo'",
+            )
+            .lower()
+            .strip()
+        )
+        # Validate name against "acquisition_software" field
+        field = MachineConfig.__fields__["acquisition_software"]
+        validated_name, error = field.validate([name], {}, loc="acquisition_software")
+        if not error:
+            return validated_name[0]
+        console.print(
+            "Invalid software name.",
+            style="red",
+        )
+        if ask_for_input("software package", True) is True:
+            return get_software_name()
+        return ""
+
+    def get_xml_file() -> Optional[Path]:
+        xml_file = Path(
+            prompt(
+                "What is the full file path of the settings file? This should be an "
+                "XML file.",
+            )
+        )
+        # Validate
+        if xml_file.suffix:
+            return xml_file
+        console.print(
+            "The path entered does not point to a file.",
+            style="red",
+        )
+        if ask_for_input("settings file", True) is True:
+            return get_xml_file()
+        return None
+
+    def get_xml_tree_path() -> str:
+        xml_tree_path = prompt(
+            "What is the path through the XML file to the node to overwrite?",
+        )
+        # Possibly some validation checks later
+        return xml_tree_path
+
+    def get_extensions_and_substrings() -> dict[str, list[str]]:
+        def get_file_extension() -> str:
+            extension = prompt(
+                "Please enter the extension of a file produced by this package "
+                "that is to be analysed (e.g., '.tiff', '.eer', etc.).",
+            ).strip()
+            # Validate
+            if not (extension.startswith(".") and extension.replace(".", "").isalnum()):
+                console.print(
+                    "This is an invalid file extension. Please try again. ",
+                    style="red",
+                )
+                return get_file_extension()
+            if extension in unsorted_dict.keys():
+                console.print("This extension has already been provided")
+                return ""
+            return extension
+
+        def get_file_substring() -> str:
+            substring = prompt(
+                "Please enter a keyword that will be present in files with this "
+                "extension. This field is case-sensitive.",
+            ).strip()
+            # Validate
+            if bool(re.fullmatch(r"[\w\s\-]*", substring)) is False:
+                console.print(
+                    "Invalid characters are present in this substring. Please "
+                    "try again. ",
+                    style="red",
+                )
+                return get_file_substring()
+            if substring in substrings:
+                console.print("This substring has already been provided.")
+                return ""
+            return substring
+
+        # Start of get_extensions_and_substrings
+        unsorted_dict: dict = {}
+        add_extension = ask_for_input("file extension", False)
+        while add_extension is True:
+            extension = get_file_extension()
+            if not extension:
+                add_extension = ask_for_input("file extension", True)
+                continue
+            substrings: list[str] = []
+            add_substring = ask_for_input("file substring", False)
+            while add_substring is True:
+                substring = get_file_substring()
+                if not substring:
+                    add_substring = ask_for_input("file substring", True)
+                    continue
+                substrings.append(substring)
+                add_substring = ask_for_input("file substring", True)
+            unsorted_dict[extension] = sorted(substrings)
+            add_extension = ask_for_input("file extension", True)
+
+        sorted_dict: dict = {}
+        for key in sorted(unsorted_dict.keys()):
+            sorted_dict[key] = unsorted_dict[key]
+        return sorted_dict
+
+    # Start of add_software_packages
+    category = "software package"
+    add_input = ask_for_input(category, again=False)
+    package_info: dict = {}
+    while add_input:
+        # Collect inputs
+        console.print("acquisition_software", style="bold cyan")
+        console.print(
+            "This is where aquisition software packages present on the instrument "
+            "machine can be set.",
+            style="cyan",
+        )
+        console.print(
+            "Options: 'epu', 'tomo', 'serialem', 'autotem', 'leica'",
+            style="cyan",
+        )
+        name = get_software_name()
+        if name in package_info.keys():
+            if confirm_overwrite(name) is False:
+                add_input(category, False)
+                continue
+
+        version = prompt(
+            "What is the version number of this software package? Press Enter to leave "
+            "it blank if you're unsure.",
+        )
+
+        console.print("software_settings_output_directories", style="bold cyan")
+        console.print(
+            "Some software packages will have settings files that require modification "
+            "in order to ensure files are saved to the desired folders.",
+            style="cyan",
+        )
+        if ask_about_xml_path() is True:
+            xml_file = get_xml_file()
+            xml_tree_path = get_xml_tree_path()
+        else:
+            xml_file = None
+            xml_tree_path = ""
+
+        console.print("data_required_substrings", style="bold cyan")
+        console.print(
+            "Different software packages will generate different output files. Only "
+            "files with certain extensions and keywords in their filenames are needed "
+            "for data processing. They are listed out here.",
+            style="cyan",
+        )
+        file_ext_ss = get_extensions_and_substrings()
+
+        # Compile keys for this package as a dict
+        package_info[name] = {
+            "version": version,
+            "xml_file": xml_file,
+            "xml_tree_path": xml_tree_path,
+            "extensions_and_substrings": file_ext_ss,
+        }
+        add_input = ask_for_input(category, again=True)
+
+    # Re-pack keys and values according to the current config field structures
+    console.print("Compiling and validating inputs...")
+    acquisition_software: list = []
+    software_versions: dict = {}
+    software_settings_output_directories: dict = {}
+    data_required_substrings: dict = {}
+
+    # Add keys after sorting
+    for key in sorted(package_info.keys()):
+        acquisition_software.append(key)
+        if package_info[key]["version"]:
+            software_versions[key] = package_info[key]["version"]
+        if package_info[key]["xml_file"]:
+            software_settings_output_directories[str(package_info[key]["xml_file"])] = (
+                package_info[key]["xml_tree_path"]
+            )
+        if package_info[key]["extensions_and_substrings"]:
+            data_required_substrings[key] = package_info[key][
+                "extensions_and_substrings"
+            ]
+
+    # Validate against their respective fields
+    to_validate = (
+        ("acquisition_software", acquisition_software),
+        ("software_versions", software_versions),
+        ("software_settings_output_directories", software_settings_output_directories),
+        ("data_required_substrings", data_required_substrings),
+    )
+    for field_name, value in to_validate:
+        field = MachineConfig.__fields__[field_name]
+        validated_value, error = field.validate(value, {}, loc=field_name)
+        if not error:
+            config[field_name] = validated_value
+            console.print(
+                f"{field_name!r} validated as {type(validated_value)}: {validated_value!r}",
+                style="green",
+            )
+        else:
+            console.print(
+                f"Validation failed due to the following error: {error}",
+                style="red",
+            )
+            console.print("Please try again.", style="red")
+            return add_software_packages(config)
+
+    # Return updated dictionary
+    return config
+
+
+def set_up_data_transfer(config: dict) -> dict:
+    return config
+
+
+def set_up_data_processing(config: dict) -> dict:
+    return config
+
+
+def set_up_external_executables(config: dict) -> dict:
+    return config
+
+
+def run():
+    new_config = {}
+    for key, field in MachineConfig.__fields__.items():
+        """
+        Logic for complicated or related fields
+        """
+        if key == "superres":
+            camera: str = new_config["camera"]
+            new_config[key] = True if camera.lower().startswith("gatan") else False
+            continue
+        if key == "calibrations":
+            new_config[key] = add_calibrations(key, field)
+            continue
+
+        # Acquisition software block
+        if key == "acquisition_software":
+            new_config = add_software_packages(new_config)
+            continue
+        if key in (
+            "software_versions",
+            "software_settings_output_directories",
+            "data_required_substrings",
+        ):
+            continue
+        # End of software block
+
+        if key == "data_directories":
+            # TODO
+            continue
+        if key == "create_directories":
+            # TODO
+            continue
+        if key == "analyse_created_directories":
+            # TODO
+            continue
+
+        # Data transfer block
+        if key == "data_transfer_enabled":
+            # TODO: Set up data transfer settings in a separate function
+            new_config = set_up_data_transfer(new_config)
+            continue
+        if key in (
+            "allow_removal",
+            "rsync_basepath",
+            "rsync_module",
+            "upstream_data_directories",
+            "upstream_data_download_directory",
+            "upstream_data_tiff_locations",
+        ):
+            continue
+        # End of data transfer block
+
+        # Data processing block
+        if key == "processing_enabled":
+            new_config = set_up_data_processing(new_config)
+            continue
+        if key in (
+            "process_by_default",
+            "gain_directory_name",
+            "processed_directory_name",
+            "processed_extra_directory",
+            "recipes",
+            "modular_spa",
+            "default_model",
+            "model_search_directory",
+            "initial_model_search_directory",
+        ):
+            continue
+        # End of data processing block
+
+        # External plugins and executables block
+        if key == "external_executables":
+            # TODO: Set up external plugins and exectuables
+            new_config = set_up_external_executables(new_config)
+            continue
+        if key in ("external_executables_eer", "external_environment"):
+            continue
+        # End of external executables block
+
+        if key == "plugin_packages":
+            # TODO
+            continue
+
+        """
+        Standard method of inputting values
+        """
+
+        new_config[key] = populate_field(key, field)
+
+    # Validate the entire config one last time
+    try:
+        MachineConfig.validate(new_config)
+    except ValidationError as exc:
+        for ve in exc.errors():
+            if ve["type"] != "value_error.missing":
+                print("Validation failed")
+        exit()
+
+    # Save the config
+    config_name = prompt(
+        "Machine config successfully validated. What would you like to name the file? "
+        "(E.g. 'my_machine_config')"
+    )
+    config_path = Path(prompt("Where would you like to save this config?"))
+    config_file = config_path / f"{config_name}.yaml"
+    # Create save directory
+    config_path.mkdir(parents=True, exist_ok=True)
+
+    # Check if config file already exists at the location
+    if config_file.exists():
+        # Check if the settings at this machine already exist
+        with open(config_file) as existing_file:
+            try:
+                old_config: dict[str, dict] = yaml.safe_load(existing_file)
+            except yaml.YAMLError as error:
+                console.print(error, style="red")
+                exit()
+        for key in new_config.keys():
+            if key in old_config.keys():
+                if confirm_overwrite() is False:
+                    old_config[key].update(new_config[key])
+                else:
+                    old_config[key] = new_config[key]
+            else:
+                old_config[key] = new_config[key]
+        # Overwrite
+        new_config = old_config
+    with open(config_file, "w") as save_file:
+        yaml.dump(new_config, save_file, default_flow_style=False)
+    console.print(
+        f"Machine config file successfully created at {str(config_path)}", styel="green"
+    )
+    console.print("Machine config setup complete", style="green")

From 4e7737173f1b52c746a80061b9902a19e2c6eb8b Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Fri, 15 Nov 2024 09:13:52 +0000
Subject: [PATCH 29/91] Fixed errors in code logic

---
 src/murfey/cli/generate_config.py | 34 ++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 445eca89..336df99a 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -327,7 +327,7 @@ def get_file_substring() -> str:
         name = get_software_name()
         if name in package_info.keys():
             if confirm_overwrite(name) is False:
-                add_input(category, False)
+                add_input = ask_for_input(category, False)
                 continue
 
         version = prompt(
@@ -525,38 +525,52 @@ def run():
                 print("Validation failed")
         exit()
 
-    # Save the config
+    # Save config under its instrument name
+    master_config: dict[str, dict] = {new_config["instrument_name"]: new_config}
+
+    # Create save path for config
     config_name = prompt(
         "Machine config successfully validated. What would you like to name the file? "
         "(E.g. 'my_machine_config')"
     )
     config_path = Path(prompt("Where would you like to save this config?"))
     config_file = config_path / f"{config_name}.yaml"
-    # Create save directory
     config_path.mkdir(parents=True, exist_ok=True)
 
     # Check if config file already exists at the location
     if config_file.exists():
-        # Check if the settings at this machine already exist
         with open(config_file) as existing_file:
             try:
                 old_config: dict[str, dict] = yaml.safe_load(existing_file)
             except yaml.YAMLError as error:
                 console.print(error, style="red")
+                # Provide option to quit or try again
+                if ask_for_input("machine configuration", True) is True:
+                    return run()
+                console.print("Exiting machine configuration setup guide")
                 exit()
-        for key in new_config.keys():
+        # Check if settings already exist for this machine
+        for key in master_config.keys():
             if key in old_config.keys():
-                if confirm_overwrite() is False:
+                if confirm_overwrite(key) is False:
                     old_config[key].update(new_config[key])
                 else:
                     old_config[key] = new_config[key]
             else:
                 old_config[key] = new_config[key]
         # Overwrite
-        new_config = old_config
+        master_config = old_config
     with open(config_file, "w") as save_file:
-        yaml.dump(new_config, save_file, default_flow_style=False)
+        yaml.dump(master_config, save_file, default_flow_style=False)
     console.print(
-        f"Machine config file successfully created at {str(config_path)}", styel="green"
+        f"Machine configuration for {new_config['instrument_name']!r} "
+        f"successfully saved as {str(config_file)!r}",
+        style="green",
     )
-    console.print("Machine config setup complete", style="green")
+    console.print("Machine configuration complete", style="green")
+
+    # Provide option to set up another machine configuration
+    if ask_for_input("machine configuration", True) is True:
+        return run()
+    console.print("Exiting machine configuration setup guide")
+    exit()

From 3ac7467263f19d1ccb111332052200dbd0fdcbd1 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Fri, 15 Nov 2024 12:34:42 +0000
Subject: [PATCH 30/91] Set data transfer and data processing to False by
 default; set rsync basepath to / by default; added Config class to
 MachineConfig model

---
 src/murfey/util/config.py | 46 +++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 1805d324..b5109cb9 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -8,11 +8,19 @@
 
 import yaml
 from backports.entry_points_selectable import entry_points
-from pydantic import BaseModel, BaseSettings, Extra, Field, root_validator, validator
+from pydantic import (
+    BaseConfig,
+    BaseModel,
+    BaseSettings,
+    Extra,
+    Field,
+    root_validator,
+    validator,
+)
 from pydantic.errors import NoneIsNotAllowedError
 
 
-class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
+class MachineConfig(BaseModel):
     """
     General information about the instrument being supported
     """
@@ -167,19 +175,18 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
     """
     # rsync-related settings (only if rsync is used)
     data_transfer_enabled: bool = Field(
-        default=True,
+        default=False,
         description=("Toggle whether to enable data transfer via rsync."),
         # NOTE: Only request input for this code block if data transfer is enabled
     )
-    allow_removal: bool = Field(
-        default=False, description="Allow original files to be removed after rsync."
-    )
-    rsync_basepath: Optional[Path] = Field(
-        default=None,
+    rsync_basepath: Path = Field(
+        default=Path("/"),
         description=(
             "Full path on the storage server that the rsync daemon will append the "
             "relative paths of the transferred files to."
         ),
+        # If rsync is disabled, rsync_basepath works out to be "/".
+        # Must always be set.
     )
     rsync_module: Optional[str] = Field(
         default=None,
@@ -191,6 +198,9 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
             "different sub-folders to save the data to."
         ),
     )
+    allow_removal: bool = Field(
+        default=False, description="Allow original files to be removed after rsync."
+    )
 
     # Related visits and data
     upstream_data_directories: list[Path] = Field(
@@ -223,7 +233,7 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
     """
     # Processing-related keys
     processing_enabled: bool = Field(
-        default=True,
+        default=False,
         description="Toggle whether to enable data processing.",
         # NOTE: Only request input for this code block if processing is enabled
     )
@@ -389,8 +399,16 @@ class MachineConfig(BaseModel, extra=Extra.allow):  # type: ignore
         ),
     )
 
+    class Config(BaseConfig):
+        """
+        Additional settings for how this Pydantic model behaves
+        """
+
+        extra = Extra.allow
+        json_encoders = {Path: str}
+
     @validator("camera", always=True, pre=True)
-    def _validate_camera_model(cls, value: str):
+    def __validate_camera_model__(cls, value: str):
         # Let non-strings fail validation naturally
         if not isinstance(value, str):
             return value
@@ -411,7 +429,7 @@ def _validate_camera_model(cls, value: str):
             )
 
     @root_validator(pre=False)
-    def _validate_superres(cls, model: dict):
+    def __validate_superres__(cls, model: dict):
         camera: str = model.get("camera", "")
         model["superres"] = True if camera.startswith("K3") else False
         return model
@@ -420,6 +438,9 @@ def _validate_superres(cls, model: dict):
     def __validate_rsync_basepath_if_transfer_enabled__(
         cls, v: Optional[str], values: Mapping[str, Any]
     ) -> Any:
+        """
+        If data transfer is enabled, an rsync basepath must be provided.
+        """
         if values.get("data_transfer_enabled"):
             if v is None:
                 raise NoneIsNotAllowedError
@@ -429,6 +450,9 @@ def __validate_rsync_basepath_if_transfer_enabled__(
     def __validate_default_model_if_processing_enabled_and_spa_possible__(
         cls, v: Optional[str], values: Mapping[str, Any]
     ) -> Any:
+        """
+        If data processing is enabled, a machine learning model must be provided.
+        """
         if values.get("processing_enabled") and "epu" in values.get(
             "acquisition_software", []
         ):

From b7ac67f1d8083ffb9ad54e0db2f3dcc09e018b8a Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Fri, 15 Nov 2024 12:35:47 +0000
Subject: [PATCH 31/91] Fixed logic in setup to save config as a YAML file with
 complex objects properly serialised

---
 src/murfey/cli/generate_config.py | 51 ++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 336df99a..5c09d200 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -309,21 +309,22 @@ def get_file_substring() -> str:
         return sorted_dict
 
     # Start of add_software_packages
+    console.print("acquisition_software", style="bold cyan")
+    console.print(
+        "This is where aquisition software packages present on the instrument "
+        "machine can be set.",
+        style="cyan",
+    )
+    console.print(
+        "Options: 'epu', 'tomo', 'serialem', 'autotem', 'leica'",
+        style="cyan",
+    )
+    package_info: dict = {}
     category = "software package"
     add_input = ask_for_input(category, again=False)
-    package_info: dict = {}
     while add_input:
         # Collect inputs
         console.print("acquisition_software", style="bold cyan")
-        console.print(
-            "This is where aquisition software packages present on the instrument "
-            "machine can be set.",
-            style="cyan",
-        )
-        console.print(
-            "Options: 'epu', 'tomo', 'serialem', 'autotem', 'leica'",
-            style="cyan",
-        )
         name = get_software_name()
         if name in package_info.keys():
             if confirm_overwrite(name) is False:
@@ -516,17 +517,24 @@ def run():
 
         new_config[key] = populate_field(key, field)
 
-    # Validate the entire config one last time
+    # Validate the entire config again and convert into JSON/YAML-safe dict
     try:
-        MachineConfig.validate(new_config)
-    except ValidationError as exc:
-        for ve in exc.errors():
-            if ve["type"] != "value_error.missing":
-                print("Validation failed")
+        new_config_json = MachineConfig(**new_config).json()
+        new_config_safe = json.loads(new_config_json)
+    except ValidationError as exception:
+        # Print out validation errors found
+        console.print("Validation failed", style="red")
+        for error in exception.errors():
+            console.print(f"{error}", style="red")
+        # Offer to redo the setup, otherwise quit setup
+        if ask_for_input("machine configuration", True) is True:
+            return run()
         exit()
 
     # Save config under its instrument name
-    master_config: dict[str, dict] = {new_config["instrument_name"]: new_config}
+    master_config: dict[str, dict] = {
+        new_config_safe["instrument_name"]: new_config_safe
+    }
 
     # Create save path for config
     config_name = prompt(
@@ -551,11 +559,10 @@ def run():
                 exit()
         # Check if settings already exist for this machine
         for key in master_config.keys():
-            if key in old_config.keys():
-                if confirm_overwrite(key) is False:
-                    old_config[key].update(new_config[key])
-                else:
-                    old_config[key] = new_config[key]
+            # Check if overwriting of existing config is needed
+            if key in old_config.keys() and confirm_overwrite(key) is False:
+                old_config[key].update(new_config[key])
+            # Add new machine config
             else:
                 old_config[key] = new_config[key]
         # Overwrite

From e7b37bfa440913aabdef6e8b1c35412919de2df4 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Mon, 18 Nov 2024 11:23:10 +0000
Subject: [PATCH 32/91] Adjusted type hint for 'rsync_module'

---
 src/murfey/util/config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index b5109cb9..5cfa236f 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -188,8 +188,8 @@ class MachineConfig(BaseModel):
         # If rsync is disabled, rsync_basepath works out to be "/".
         # Must always be set.
     )
-    rsync_module: Optional[str] = Field(
-        default=None,
+    rsync_module: str = Field(
+        default="",
         description=(
             "Name of the rsync module the files are being transferred with. The module "
             "will be appended to the rsync base path, and the relative paths will be "

From 617d84bba0bad49d9f573f29196c60509f85e1b7 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Mon, 18 Nov 2024 11:23:58 +0000
Subject: [PATCH 33/91] Fixed logic when saving config; adjusted colour of
 messages

---
 src/murfey/cli/generate_config.py | 104 +++++++++++++++---------------
 1 file changed, 51 insertions(+), 53 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 5c09d200..606aa886 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -1,7 +1,7 @@
 import json
 import re
 from pathlib import Path
-from typing import Optional
+from typing import Optional, get_type_hints
 
 import yaml
 from pydantic import ValidationError
@@ -12,6 +12,7 @@
 
 # Create a console object for pretty printing
 console = Console()
+machine_config_types: dict = get_type_hints(MachineConfig)
 
 
 def prompt(message: str, style: str = "") -> str:
@@ -24,55 +25,48 @@ def prompt(message: str, style: str = "") -> str:
 
 
 def print_field_info(field: ModelField):
-    console.print(field.name, style="bold cyan")
-    console.print(field.field_info.description, style="cyan")
+    """
+    Helper function to print out the name of the key being set up, along with a short
+    description of what purpose the key serves.
+    """
+    console.print(
+        f"{field.name.replace('_', ' ').title()} ({field.name})",
+        style="bold bright_cyan",
+    )
+    console.print(field.field_info.description, style="italic bright_cyan")
     if not isinstance(field.field_info.default, UndefinedType):
-        console.print(f"Default: {field.field_info.default!r}", style="cyan")
+        console.print(f"Default: {field.field_info.default!r}", style="bright_cyan")
 
 
 def ask_for_input(category: str, again: bool = False):
     """
-    Short while loop when to facilitate adding more than one value to a field in the
-    config.
+    Perform a Boolean check to see if another value is to be appended to the current
+    parameter being set up.
     """
     message = (
         "Would you like to add " + ("another" if again else "a") + f" {category}? (y/n)"
     )
     while True:
-        answer = (
-            prompt(
-                message,
-            )
-            .lower()
-            .strip()
-        )
+        answer = prompt(message, style="yellow").lower().strip()
         if answer in ("y", "yes"):
             return True
-        elif answer in ("n", "no"):
+        if answer in ("n", "no"):
             return False
-        else:
-            console.print("Invalid input. Please try again.", style="red")
+        console.print("Invalid input. Please try again.", style="red")
 
 
 def confirm_overwrite(key: str):
     """
-    Check whether a key should be overwritten if a duplicate is detected
+    Check whether a key should be overwritten if a duplicate is detected.
     """
     message = f"{key!r} already exists; do you wish to overwrite it? (y/n)"
     while True:
-        answer = (
-            prompt(
-                message,
-            )
-            .lower()
-            .strip()
-        )
+        answer = prompt(message, style="yellow").lower().strip()
         if answer in ("y", "yes"):
             return True
-        elif answer in ("n", "no"):
+        if answer in ("n", "no"):
             return False
-        else:
-            console.print("Invalid input. Please try again.", style="red")
+        console.print("Invalid input. Please try again.", style="red")
 
 
 def populate_field(key: str, field: ModelField):
@@ -85,17 +79,22 @@ def populate_field(key: str, field: ModelField):
     print_field_info(field)
     message = "Please provide a value (press Enter to leave it blank as '')."
     while True:
-        # Validate fields as you key them in
-        value, error = field.validate(
-            prompt(message),
-            {},
-            loc=key,
+        # Get value
+        answer = prompt(message, style="yellow")
+        # Translate empty string into None for fields that take Path values
+        value = (
+            None
+            if (not answer and machine_config_types.get(key) in (Path, Optional[Path]))
+            else answer
         )
+
+        validated_value, error = field.validate(value, {}, loc=key)
         if not error:
             console.print(
-                f"{key!r} validated as {type(value)}: {value!r}", style="green"
+                f"{key!r} validated as {type(validated_value)}: {validated_value!r}",
+                style="bright_green",
             )
-            return value
+            return validated_value
         else:
             console.print("Invalid input. Please try again.", style="red")
 
@@ -162,7 +161,7 @@ def get_calibration():
         calibrations[calibration_type] = calibration_values
         console.print(
             f"Added {calibration_type} to the calibrations field: {calibration_values}",
-            style="green",
+            style="bright_green",
         )
 
         # Check if any more calibrations need to be added
@@ -173,7 +172,7 @@ def get_calibration():
     if not error:
         console.print(
             f"{key!r} validated as {type(validated_calibrations)}: {validated_calibrations!r}",
-            style="green",
+            style="bright_green",
         )
         return validated_calibrations
     else:
@@ -309,22 +308,22 @@ def get_file_substring() -> str:
         return sorted_dict
 
     # Start of add_software_packages
-    console.print("acquisition_software", style="bold cyan")
+    console.print("acquisition_software", style="bold bright_cyan")
     console.print(
         "This is where aquisition software packages present on the instrument "
         "machine can be set.",
-        style="cyan",
+        style="bright_cyan",
     )
     console.print(
         "Options: 'epu', 'tomo', 'serialem', 'autotem', 'leica'",
-        style="cyan",
+        style="bright_cyan",
     )
     package_info: dict = {}
     category = "software package"
     add_input = ask_for_input(category, again=False)
     while add_input:
         # Collect inputs
-        console.print("acquisition_software", style="bold cyan")
+        console.print("acquisition_software", style="bold bright_cyan")
         name = get_software_name()
         if name in package_info.keys():
             if confirm_overwrite(name) is False:
@@ -336,11 +335,11 @@ def get_file_substring() -> str:
             "it blank if you're unsure.",
         )
 
-        console.print("software_settings_output_directories", style="bold cyan")
+        console.print("software_settings_output_directories", style="bold bright_cyan")
         console.print(
             "Some software packages will have settings files that require modification "
             "in order to ensure files are saved to the desired folders.",
-            style="cyan",
+            style="bright_cyan",
         )
         if ask_about_xml_path() is True:
             xml_file = get_xml_file()
@@ -349,12 +348,12 @@ def get_file_substring() -> str:
             xml_file = None
             xml_tree_path = ""
 
-        console.print("data_required_substrings", style="bold cyan")
+        console.print("data_required_substrings", style="bold bright_cyan")
         console.print(
             "Different software packages will generate different output files. Only "
             "files with certain extensions and keywords in their filenames are needed "
             "for data processing. They are listed out here.",
-            style="cyan",
+            style="bright_cyan",
         )
         file_ext_ss = get_extensions_and_substrings()
 
@@ -402,7 +401,7 @@ def get_file_substring() -> str:
             config[field_name] = validated_value
             console.print(
                 f"{field_name!r} validated as {type(validated_value)}: {validated_value!r}",
-                style="green",
+                style="bright_green",
             )
         else:
             console.print(
@@ -519,8 +518,7 @@ def run():
 
     # Validate the entire config again and convert into JSON/YAML-safe dict
     try:
-        new_config_json = MachineConfig(**new_config).json()
-        new_config_safe = json.loads(new_config_json)
+        new_config_safe: dict = json.loads(MachineConfig(**new_config).json())
     except ValidationError as exception:
         # Print out validation errors found
         console.print("Validation failed", style="red")
@@ -561,23 +559,23 @@ def run():
         for key in master_config.keys():
             # Check if overwriting of existing config is needed
             if key in old_config.keys() and confirm_overwrite(key) is False:
-                old_config[key].update(new_config[key])
+                old_config[key].update(master_config[key])
             # Add new machine config
             else:
-                old_config[key] = new_config[key]
+                old_config[key] = master_config[key]
         # Overwrite
         master_config = old_config
     with open(config_file, "w") as save_file:
         yaml.dump(master_config, save_file, default_flow_style=False)
     console.print(
-        f"Machine configuration for {new_config['instrument_name']!r} "
+        f"Machine configuration for {new_config_safe['instrument_name']!r} "
         f"successfully saved as {str(config_file)!r}",
-        style="green",
+        style="bright_green",
     )
-    console.print("Machine configuration complete", style="green")
+    console.print("Machine configuration complete", style="bright_green")
 
     # Provide option to set up another machine configuration
     if ask_for_input("machine configuration", True) is True:
         return run()
-    console.print("Exiting machine configuration setup guide")
+    console.print("Exiting machine configuration setup guide", style="bright_green")
     exit()

From 92a57df4d72b0e8e26ef7098133eaf01f2c44f61 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Mon, 18 Nov 2024 11:52:55 +0000
Subject: [PATCH 34/91] Added debug flag for machine config setup; further
 updated colours of messages

---
 src/murfey/cli/generate_config.py | 101 +++++++++++++++++++++---------
 1 file changed, 71 insertions(+), 30 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 606aa886..90d514d0 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -1,3 +1,4 @@
+import argparse
 import json
 import re
 from pathlib import Path
@@ -12,6 +13,8 @@
 
 # Create a console object for pretty printing
 console = Console()
+
+# Compile types for each key present in MachineConfig
 machine_config_types: dict = get_type_hints(MachineConfig)
 
 
@@ -69,7 +72,7 @@ def confirm_overwrite(key: str):
         console.print("Invalid input. Please try again.", style="red")
 
 
-def populate_field(key: str, field: ModelField):
+def populate_field(key: str, field: ModelField, debug: bool = False):
     """
     General function for inputting and validating the value of a single field against
     its Pydantic model.
@@ -90,16 +93,18 @@ def populate_field(key: str, field: ModelField):
 
         validated_value, error = field.validate(value, {}, loc=key)
         if not error:
-            console.print(
-                f"{key!r} validated as {type(validated_value)}: {validated_value!r}",
-                style="bright_green",
-            )
+            console.print(f"{key!r} successfully validated", style="bright_green")
+            if debug:
+                console.print(
+                    f"{type(validated_value)}\n{validated_value!r}",
+                    style="bright_green",
+                )
             return validated_value
         else:
             console.print("Invalid input. Please try again.", style="red")
 
 
-def add_calibrations(key: str, field: ModelField) -> dict:
+def add_calibrations(key: str, field: ModelField, debug: bool = False) -> dict:
     """
     Populate the 'calibrations' field with dictionaries.
     """
@@ -110,6 +115,7 @@ def get_calibration():
             prompt(
                 "What is the full file path to the calibration file? This should be a "
                 "JSON file.",
+                style="yellow",
             )
         )
         try:
@@ -137,6 +143,7 @@ def get_calibration():
     while add_calibration is True:
         calibration_type = prompt(
             "What type of calibration settings are you providing?",
+            style="yellow",
         ).lower()
         # Check if it's a known type of calibration
         if calibration_type not in known_calibraions:
@@ -160,9 +167,11 @@ def get_calibration():
         # Add calibration to master dict
         calibrations[calibration_type] = calibration_values
         console.print(
-            f"Added {calibration_type} to the calibrations field: {calibration_values}",
+            f"Added {calibration_type} to the calibrations field",
             style="bright_green",
         )
+        if debug:
+            console.print(f"{calibration_values}", style="bright_green")
 
         # Check if any more calibrations need to be added
         add_calibration = ask_for_input(category="calibration setting", again=True)
@@ -170,26 +179,28 @@ def get_calibration():
     # Validate the nested dictionary structure
     validated_calibrations, error = field.validate(calibrations, {}, loc=field)
     if not error:
-        console.print(
-            f"{key!r} validated as {type(validated_calibrations)}: {validated_calibrations!r}",
-            style="bright_green",
-        )
+        console.print(f"{key!r} validated successfully", style="bright_green")
+        if debug:
+            console.print(
+                f"{type(validated_calibrations)}\n{validated_calibrations!r}",
+                style="bright_green",
+            )
         return validated_calibrations
     else:
         console.print(
             f"Failed to validate the provided calibrations: {error}", style="red"
         )
-        console.print("Returning an empty dictionary")
+        console.print("Returning an empty dictionary", style="red")
         return {}
 
 
-def add_software_packages(config: dict):
+def add_software_packages(config: dict, debug: bool = False):
     def ask_about_xml_path() -> bool:
         message = (
             "Does this software package have a settings file that needs modification? "
             "(y/n)"
         )
-        answer = prompt(message).lower().strip()
+        answer = prompt(message, style="yellow").lower().strip()
 
         # Validate
         if answer in ("y", "yes"):
@@ -226,6 +237,7 @@ def get_xml_file() -> Optional[Path]:
             prompt(
                 "What is the full file path of the settings file? This should be an "
                 "XML file.",
+                style="yellow",
             )
         )
         # Validate
@@ -242,6 +254,7 @@ def get_xml_file() -> Optional[Path]:
     def get_xml_tree_path() -> str:
         xml_tree_path = prompt(
             "What is the path through the XML file to the node to overwrite?",
+            style="yellow",
         )
         # Possibly some validation checks later
         return xml_tree_path
@@ -251,6 +264,7 @@ def get_file_extension() -> str:
             extension = prompt(
                 "Please enter the extension of a file produced by this package "
                 "that is to be analysed (e.g., '.tiff', '.eer', etc.).",
+                style="yellow",
             ).strip()
             # Validate
             if not (extension.startswith(".") and extension.replace(".", "").isalnum()):
@@ -268,6 +282,7 @@ def get_file_substring() -> str:
             substring = prompt(
                 "Please enter a keyword that will be present in files with this "
                 "extension. This field is case-sensitive.",
+                style="yellow",
             ).strip()
             # Validate
             if bool(re.fullmatch(r"[\w\s\-]*", substring)) is False:
@@ -333,6 +348,7 @@ def get_file_substring() -> str:
         version = prompt(
             "What is the version number of this software package? Press Enter to leave "
             "it blank if you're unsure.",
+            style="yellow",
         )
 
         console.print("software_settings_output_directories", style="bold bright_cyan")
@@ -400,9 +416,13 @@ def get_file_substring() -> str:
         if not error:
             config[field_name] = validated_value
             console.print(
-                f"{field_name!r} validated as {type(validated_value)}: {validated_value!r}",
-                style="bright_green",
+                f"{field_name!r} validated successfully", style="bright_green"
             )
+            if debug:
+                console.print(
+                    f"{type(validated_value)}\n{validated_value!r}",
+                    style="bright_green",
+                )
         else:
             console.print(
                 f"Validation failed due to the following error: {error}",
@@ -415,20 +435,23 @@ def get_file_substring() -> str:
     return config
 
 
-def set_up_data_transfer(config: dict) -> dict:
+def set_up_data_transfer(config: dict, debug: bool = False) -> dict:
     return config
 
 
-def set_up_data_processing(config: dict) -> dict:
+def set_up_data_processing(config: dict, debug: bool = False) -> dict:
     return config
 
 
-def set_up_external_executables(config: dict) -> dict:
+def set_up_external_executables(config: dict, debug: bool = False) -> dict:
     return config
 
 
-def run():
-    new_config = {}
+def set_up_machine_config(debug: bool = False):
+    """
+    Main function which runs through the setup process.
+    """
+    new_config: dict = {}
     for key, field in MachineConfig.__fields__.items():
         """
         Logic for complicated or related fields
@@ -438,12 +461,12 @@ def run():
             new_config[key] = True if camera.lower().startswith("gatan") else False
             continue
         if key == "calibrations":
-            new_config[key] = add_calibrations(key, field)
+            new_config[key] = add_calibrations(key, field, debug)
             continue
 
         # Acquisition software block
         if key == "acquisition_software":
-            new_config = add_software_packages(new_config)
+            new_config = add_software_packages(new_config, debug)
             continue
         if key in (
             "software_versions",
@@ -466,7 +489,7 @@ def run():
         # Data transfer block
         if key == "data_transfer_enabled":
             # TODO: Set up data transfer settings in a separate function
-            new_config = set_up_data_transfer(new_config)
+            new_config = set_up_data_transfer(new_config, debug)
             continue
         if key in (
             "allow_removal",
@@ -481,7 +504,7 @@ def run():
 
         # Data processing block
         if key == "processing_enabled":
-            new_config = set_up_data_processing(new_config)
+            new_config = set_up_data_processing(new_config, debug)
             continue
         if key in (
             "process_by_default",
@@ -500,7 +523,7 @@ def run():
         # External plugins and executables block
         if key == "external_executables":
             # TODO: Set up external plugins and exectuables
-            new_config = set_up_external_executables(new_config)
+            new_config = set_up_external_executables(new_config, debug)
             continue
         if key in ("external_executables_eer", "external_environment"):
             continue
@@ -514,7 +537,7 @@ def run():
         Standard method of inputting values
         """
 
-        new_config[key] = populate_field(key, field)
+        new_config[key] = populate_field(key, field, debug)
 
     # Validate the entire config again and convert into JSON/YAML-safe dict
     try:
@@ -535,11 +558,14 @@ def run():
     }
 
     # Create save path for config
+    console.print("Machine config successfully validated.", style="green")
     config_name = prompt(
-        "Machine config successfully validated. What would you like to name the file? "
-        "(E.g. 'my_machine_config')"
+        "What would you like to name the file? (E.g. 'my_machine_config')",
+        style="yellow",
+    )
+    config_path = Path(
+        prompt("Where would you like to save this config?", style="yellow")
     )
-    config_path = Path(prompt("Where would you like to save this config?"))
     config_file = config_path / f"{config_name}.yaml"
     config_path.mkdir(parents=True, exist_ok=True)
 
@@ -579,3 +605,18 @@ def run():
         return run()
     console.print("Exiting machine configuration setup guide", style="bright_green")
     exit()
+
+
+def run():
+    # Set up arg parser
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Prints additional messages to show setup progress.",
+    )
+    args = parser.parse_args()
+
+    set_up_machine_config(args.debug)
+
+    pass

From bf2ce951f82e2db18f57a58d14b5cea0bd576b6d Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Mon, 18 Nov 2024 12:01:48 +0000
Subject: [PATCH 35/91] Replaced 'exit() in 'set_up_machine_config' with
 'return'.

---
 src/murfey/cli/generate_config.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 90d514d0..4c309f42 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -549,8 +549,8 @@ def set_up_machine_config(debug: bool = False):
             console.print(f"{error}", style="red")
         # Offer to redo the setup, otherwise quit setup
         if ask_for_input("machine configuration", True) is True:
-            return run()
-        exit()
+            return set_up_machine_config(debug)
+        return False
 
     # Save config under its instrument name
     master_config: dict[str, dict] = {
@@ -578,7 +578,7 @@ def set_up_machine_config(debug: bool = False):
                 console.print(error, style="red")
                 # Provide option to quit or try again
                 if ask_for_input("machine configuration", True) is True:
-                    return run()
+                    return set_up_machine_config(debug)
                 console.print("Exiting machine configuration setup guide")
                 exit()
         # Check if settings already exist for this machine
@@ -602,9 +602,9 @@ def set_up_machine_config(debug: bool = False):
 
     # Provide option to set up another machine configuration
     if ask_for_input("machine configuration", True) is True:
-        return run()
+        return set_up_machine_config(debug)
     console.print("Exiting machine configuration setup guide", style="bright_green")
-    exit()
+    return True
 
 
 def run():
@@ -618,5 +618,3 @@ def run():
     args = parser.parse_args()
 
     set_up_machine_config(args.debug)
-
-    pass

From 4cf7e2a566872985955124ada00d798f0370f455 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Mon, 18 Nov 2024 15:42:56 +0000
Subject: [PATCH 36/91] Added function to set up data directories

---
 src/murfey/cli/generate_config.py | 132 ++++++++++++++++++++++++++----
 1 file changed, 115 insertions(+), 17 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 4c309f42..c9aec103 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -2,7 +2,7 @@
 import json
 import re
 from pathlib import Path
-from typing import Optional, get_type_hints
+from typing import Any, Optional, get_type_hints
 
 import yaml
 from pydantic import ValidationError
@@ -104,7 +104,9 @@ def populate_field(key: str, field: ModelField, debug: bool = False):
             console.print("Invalid input. Please try again.", style="red")
 
 
-def add_calibrations(key: str, field: ModelField, debug: bool = False) -> dict:
+def add_calibrations(
+    key: str, field: ModelField, debug: bool = False
+) -> dict[str, dict]:
     """
     Populate the 'calibrations' field with dictionaries.
     """
@@ -194,7 +196,7 @@ def get_calibration():
         return {}
 
 
-def add_software_packages(config: dict, debug: bool = False):
+def add_software_packages(config: dict, debug: bool = False) -> dict[str, Any]:
     def ask_about_xml_path() -> bool:
         message = (
             "Does this software package have a settings file that needs modification? "
@@ -215,6 +217,7 @@ def get_software_name() -> str:
             prompt(
                 "What is the name of the software package? Supported options: 'autotem', "
                 "'epu', 'leica', 'serialem', 'tomo'",
+                style="yellow",
             )
             .lower()
             .strip()
@@ -297,7 +300,9 @@ def get_file_substring() -> str:
                 return ""
             return substring
 
-        # Start of get_extensions_and_substrings
+        """
+        Start of get_extensions_and_substrings
+        """
         unsorted_dict: dict = {}
         add_extension = ask_for_input("file extension", False)
         while add_extension is True:
@@ -322,40 +327,62 @@ def get_file_substring() -> str:
             sorted_dict[key] = unsorted_dict[key]
         return sorted_dict
 
-    # Start of add_software_packages
-    console.print("acquisition_software", style="bold bright_cyan")
+    """
+    Start of add_software_packages
+    """
     console.print(
-        "This is where aquisition software packages present on the instrument "
-        "machine can be set.",
-        style="bright_cyan",
+        "Acquisition Software (acquisition_software)",
+        style="bold bright_cyan",
     )
     console.print(
-        "Options: 'epu', 'tomo', 'serialem', 'autotem', 'leica'",
-        style="bright_cyan",
+        "This is where aquisition software packages present on the instrument machine "
+        "can be specified, along with the output file names and extensions that are of "
+        "interest.",
+        style="italic bright_cyan",
     )
     package_info: dict = {}
     category = "software package"
     add_input = ask_for_input(category, again=False)
     while add_input:
         # Collect inputs
-        console.print("acquisition_software", style="bold bright_cyan")
+        console.print(
+            "Acquisition Software (acquisition_software)",
+            style="bold bright_cyan",
+        )
+        console.print(
+            "Name of the acquisition software installed on this instrument.",
+            style="italic bright_cyan",
+        )
+        console.print(
+            "Options: 'autotem', 'epu', 'leica', 'serialem', 'tomo'",
+            style="bright_cyan",
+        )
         name = get_software_name()
         if name in package_info.keys():
             if confirm_overwrite(name) is False:
                 add_input = ask_for_input(category, False)
                 continue
 
+        console.print(
+            "Software Versions (software_versions)",
+            style="bold bright_cyan",
+        )
         version = prompt(
             "What is the version number of this software package? Press Enter to leave "
             "it blank if you're unsure.",
             style="yellow",
         )
 
-        console.print("software_settings_output_directories", style="bold bright_cyan")
+        console.print(
+            "Software Settings Output Directories (software_settings_output_directories)",
+            style="bold bright_cyan",
+        )
         console.print(
             "Some software packages will have settings files that require modification "
-            "in order to ensure files are saved to the desired folders.",
-            style="bright_cyan",
+            "in order to ensure files are saved to the desired folders. The paths to "
+            "the files and the path to the nodes in the settings files both need to be "
+            "provided.",
+            style="italic bright_cyan",
         )
         if ask_about_xml_path() is True:
             xml_file = get_xml_file()
@@ -364,12 +391,15 @@ def get_file_substring() -> str:
             xml_file = None
             xml_tree_path = ""
 
-        console.print("data_required_substrings", style="bold bright_cyan")
+        console.print(
+            "Data Required Substrings (data_required_substrings)",
+            style="bold bright_cyan",
+        )
         console.print(
             "Different software packages will generate different output files. Only "
             "files with certain extensions and keywords in their filenames are needed "
             "for data processing. They are listed out here.",
-            style="bright_cyan",
+            style="italic bright_cyan",
         )
         file_ext_ss = get_extensions_and_substrings()
 
@@ -435,6 +465,73 @@ def get_file_substring() -> str:
     return config
 
 
+def add_data_directories(
+    key: str, field: ModelField, debug: bool = False
+) -> dict[str, str]:
+    def get_directory() -> Optional[Path]:
+        answer = prompt(
+            "What is the full file path to the data directory you wish to add?",
+            style="yellow",
+        )
+        # Convert "" into None
+        if not answer:
+            return None
+        return Path(answer)
+
+    def get_directory_type():
+        answer = prompt(
+            "What type of data is stored in this directory? Options: 'microscope', "
+            "'detector'",
+            style="yellow",
+        ).lower()
+        if answer not in ("microscope", "detector"):
+            console.print("Invalid directory type.", style="red")
+            if ask_for_input("directory type", True) is True:
+                return get_directory_type()
+            return ""
+        return answer
+
+    """
+    Start of add_data_directories
+    """
+    print_field_info(field)
+    data_directories: dict[str, str] = {}
+    category = "data directory"
+    add_directory = ask_for_input(category, False)
+    while add_directory is True:
+        directory = get_directory()
+        # Move on to next loop or exit if no directory provided
+        if not directory:
+            console.print("No directory added", style="red")
+            add_directory = ask_for_input(category, True)
+            continue
+
+        # Get the directory type
+        directory_type = get_directory_type()
+        if not directory_type:
+            console.print("No directory type provided", style="red")
+
+        # Add to dictionary
+        data_directories[str(directory)] = directory_type
+
+        # Check if more need to be added
+        add_directory = ask_for_input(category, True)
+        continue
+
+    # Validate and return
+    validated_data_directories, error = field.validate(data_directories, {}, loc=key)
+    if not error:
+        console.print(f"Validated {key!r} successfully", style="bright_green")
+        if debug:
+            console.print(f"{type(validated_data_directories)}")
+            console.print(f"{validated_data_directories}")
+        return data_directories
+    console.print(f"Failed to validate {key!r}", style="red")
+    if ask_for_input(category, True) is True:
+        return add_data_directories(key, field, debug)
+    return {}
+
+
 def set_up_data_transfer(config: dict, debug: bool = False) -> dict:
     return config
 
@@ -478,6 +575,7 @@ def set_up_machine_config(debug: bool = False):
 
         if key == "data_directories":
             # TODO
+            new_config[key] = add_data_directories(key, field, debug)
             continue
         if key == "create_directories":
             # TODO

From a7a6b05945fe6c1483475d5a64b51a788562074e Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Mon, 18 Nov 2024 18:16:54 +0000
Subject: [PATCH 37/91] Updated notes and type hints for 'camera'

---
 src/murfey/util/config.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 5cfa236f..a91df6b5 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -52,7 +52,7 @@ class MachineConfig(BaseModel):
     """
     Information about the hardware and software on the instrument machine
     """
-    camera: str = Field(
+    camera: Literal["FALCON", "K3_FLIPX", "K3_FLIPY"] = Field(
         default="FALCON",
         description=(
             "Name of the camera used by the TEM. This is only relevant for TEMs to "
@@ -61,10 +61,11 @@ class MachineConfig(BaseModel):
             "Options: 'FALCON', 'K3_FLIPX', 'K3_FLIPY'"
         ),
         # NOTE:
-        #   Supported options: Falcon 4, Falcon 4I, K2, K3 (superres)
-        #   _FLIPX/_FLIPY is to tell it what to do with the gain reference
-        #   Will need to create a new key to record whether the gain reference image
-        #   needs to be flippedflip_gain: X, Y, None
+        #   Eventually need to support Falcon 4, Falcon 4I, K2, K3 (superres)
+        #   _FLIPX/_FLIPY is to tell it what to do with the gain reference.
+        #   -   These will eventually be removed, leaving only the camera name
+        #   -   Will need to create a new key to record whether the gain reference
+        #       image needs to be flippedflip_gain: X, Y, None
     )
     superres: bool = Field(
         default=False,

From 71de649004f6012923f0ddae6f59b70848b01dd4 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Mon, 18 Nov 2024 18:17:56 +0000
Subject: [PATCH 38/91] Added function to set up directories for Murfey to
 create

---
 src/murfey/cli/generate_config.py | 79 ++++++++++++++++++++++++++++---
 1 file changed, 72 insertions(+), 7 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index c9aec103..e3f93339 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -469,21 +469,22 @@ def add_data_directories(
     key: str, field: ModelField, debug: bool = False
 ) -> dict[str, str]:
     def get_directory() -> Optional[Path]:
+        message = "What is the full file path to the data directory you wish to add?"
         answer = prompt(
-            "What is the full file path to the data directory you wish to add?",
+            message,
             style="yellow",
-        )
+        ).strip()
         # Convert "" into None
         if not answer:
             return None
         return Path(answer)
 
     def get_directory_type():
-        answer = prompt(
+        message = (
             "What type of data is stored in this directory? Options: 'microscope', "
-            "'detector'",
-            style="yellow",
-        ).lower()
+            "'detector'"
+        )
+        answer = prompt(message, style="yellow").lower().strip()
         if answer not in ("microscope", "detector"):
             console.print("Invalid directory type.", style="red")
             if ask_for_input("directory type", True) is True:
@@ -532,6 +533,70 @@ def get_directory_type():
     return {}
 
 
+def add_create_directories(
+    key: str, field: ModelField, debug: bool = False
+) -> dict[str, str]:
+    def get_folder() -> str:
+        message = (
+            "Please input the name of the folder for Murfey to create. Press Enter "
+            "to skip this."
+        )
+        answer = prompt(message, style="yellow").lower().strip()
+        if bool(re.fullmatch(r"[\w\s\-]*", answer)) is False:
+            console.print(
+                "There are unsafe characters present in this folder name. Please "
+                "use a different one.",
+                style="red",
+            )
+            if ask_for_input("folder name", True) is True:
+                return get_folder()
+            return ""
+        return answer
+
+    def get_folder_alias() -> str:
+        message = "Please enter the name you want to map this folder to within Murfey."
+        answer = prompt(message, style="yellow").lower().strip()
+        if bool(re.fullmatch(r"[\w\s\-]*", answer)) is False:
+            console.print(
+                "There are unsafe characters present in this folder name. Please "
+                "use a different one.",
+                style="red",
+            )
+            if ask_for_input("folder alias", True) is True:
+                return get_folder_alias()
+            return ""
+        return answer
+
+    """
+    Start of add_create_directories
+    """
+    print_field_info(field)
+    directories_to_create: dict[str, str] = {}
+    category = "directory for Murfey to create"
+    add_directory: bool = ask_for_input(category, False)
+    while add_directory is True:
+        folder_name = get_folder()
+        if not folder_name:
+            console.print(
+                "No folder name provided",
+                style="red",
+            )
+            add_directory = ask_for_input(category, True)
+            continue
+        folder_alias = get_folder_alias()
+        if not folder_alias:
+            console.print(
+                "No folder alias provided",
+                style="red",
+            )
+            add_directory = ask_for_input(category, True)
+            continue
+        directories_to_create[folder_alias] = folder_name
+        add_directory = ask_for_input(category, True)
+        continue
+    return directories_to_create
+
+
 def set_up_data_transfer(config: dict, debug: bool = False) -> dict:
     return config
 
@@ -574,10 +639,10 @@ def set_up_machine_config(debug: bool = False):
         # End of software block
 
         if key == "data_directories":
-            # TODO
             new_config[key] = add_data_directories(key, field, debug)
             continue
         if key == "create_directories":
+            new_config[key] = add_create_directories(key, field, debug)
             # TODO
             continue
         if key == "analyse_created_directories":

From 422d1f5ddcb6f9da3a2415393b14bcb56c9db559 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Mon, 18 Nov 2024 19:03:23 +0000
Subject: [PATCH 39/91] Added function to set up folders to analyse; added
 validation to setup functions

---
 src/murfey/cli/generate_config.py | 108 +++++++++++++++++++++++-------
 1 file changed, 82 insertions(+), 26 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index e3f93339..19f5446a 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -197,21 +197,6 @@ def get_calibration():
 
 
 def add_software_packages(config: dict, debug: bool = False) -> dict[str, Any]:
-    def ask_about_xml_path() -> bool:
-        message = (
-            "Does this software package have a settings file that needs modification? "
-            "(y/n)"
-        )
-        answer = prompt(message, style="yellow").lower().strip()
-
-        # Validate
-        if answer in ("y", "yes"):
-            return True
-        if answer in ("n", "no"):
-            return False
-        console.print("Invalid input.", style="red")
-        return ask_about_xml_path()
-
     def get_software_name() -> str:
         name = (
             prompt(
@@ -235,6 +220,21 @@ def get_software_name() -> str:
             return get_software_name()
         return ""
 
+    def ask_about_xml_path() -> bool:
+        message = (
+            "Does this software package have a settings file that needs modification? "
+            "(y/n)"
+        )
+        answer = prompt(message, style="yellow").lower().strip()
+
+        # Validate
+        if answer in ("y", "yes"):
+            return True
+        if answer in ("n", "no"):
+            return False
+        console.print("Invalid input.", style="red")
+        return ask_about_xml_path()
+
     def get_xml_file() -> Optional[Path]:
         xml_file = Path(
             prompt(
@@ -525,7 +525,7 @@ def get_directory_type():
         console.print(f"Validated {key!r} successfully", style="bright_green")
         if debug:
             console.print(f"{type(validated_data_directories)}")
-            console.print(f"{validated_data_directories}")
+            console.print(f"{validated_data_directories!r}")
         return data_directories
     console.print(f"Failed to validate {key!r}", style="red")
     if ask_for_input(category, True) is True:
@@ -537,10 +537,7 @@ def add_create_directories(
     key: str, field: ModelField, debug: bool = False
 ) -> dict[str, str]:
     def get_folder() -> str:
-        message = (
-            "Please input the name of the folder for Murfey to create. Press Enter "
-            "to skip this."
-        )
+        message = "Please enter the name of the folder for Murfey to create."
         answer = prompt(message, style="yellow").lower().strip()
         if bool(re.fullmatch(r"[\w\s\-]*", answer)) is False:
             console.print(
@@ -554,7 +551,7 @@ def get_folder() -> str:
         return answer
 
     def get_folder_alias() -> str:
-        message = "Please enter the name you want to map this folder to within Murfey."
+        message = "Please enter the name Murfey should map this folder to."
         answer = prompt(message, style="yellow").lower().strip()
         if bool(re.fullmatch(r"[\w\s\-]*", answer)) is False:
             console.print(
@@ -571,8 +568,8 @@ def get_folder_alias() -> str:
     Start of add_create_directories
     """
     print_field_info(field)
-    directories_to_create: dict[str, str] = {}
-    category = "directory for Murfey to create"
+    folders_to_create: dict[str, str] = {}
+    category = "folder for Murfey to create"
     add_directory: bool = ask_for_input(category, False)
     while add_directory is True:
         folder_name = get_folder()
@@ -591,10 +588,69 @@ def get_folder_alias() -> str:
             )
             add_directory = ask_for_input(category, True)
             continue
-        directories_to_create[folder_alias] = folder_name
+        folders_to_create[folder_alias] = folder_name
         add_directory = ask_for_input(category, True)
         continue
-    return directories_to_create
+
+    # Validate and return
+    validated_folders, errors = field.validate(folders_to_create, {}, loc=key)
+    if not errors:
+        console.print(f"{key!r} validated successfully", style="bright_green")
+        if debug:
+            console.print(f"{type(validated_folders)}", style="bright_green")
+            console.print(f"{validated_folders!r}", style="bright_green")
+        return folders_to_create
+    console.print(f"Failed to validate {key!r}")
+    if ask_for_input(category, True) is True:
+        return add_create_directories(key, field, debug)
+    return {}
+
+
+def add_analyse_created_directories(
+    key: str, field: ModelField, debug: bool = False
+) -> list[str]:
+    def get_folder() -> str:
+        message = "Please enter the name of the folder that Murfey is to analyse."
+        answer = prompt(message, style="yellow").lower().strip()
+        if bool(re.fullmatch(r"[\w\s\-]*", answer)) is False:
+            console.print(
+                "There are unsafe characters present in the folder name. Please "
+                "use a different folder.",
+                style="red",
+            )
+            if ask_for_input("folder name", True) is True:
+                return get_folder()
+            return ""
+        return answer
+
+    """
+    Start of add_analyse_created_directories
+    """
+    folders_to_create: list[str] = []
+    category = "folder for Murfey to analyse"
+    add_folder = ask_for_input(category, False)
+    while add_folder is True:
+        folder_name = get_folder()
+        if not folder_name:
+            console.print("No folder name provided", style="red")
+            add_folder = ask_for_input(category, True)
+            continue
+        folders_to_create.append(folder_name)
+        add_folder = ask_for_input(category, True)
+        continue
+
+    # Validate and return
+    validated_folders, errors = field.validate(folders_to_create, {}, loc=key)
+    if not errors:
+        console.print(f"{key!r} validated successfully", style="bright_green")
+        if debug:
+            console.print(f"{type(validated_folders)}", style="bright_green")
+            console.print(f"{validated_folders!r}", style="bright_green")
+        return sorted(validated_folders)
+    console.print(f"Failed to validate {key!r}", style="red")
+    if ask_for_input(category, True) is True:
+        return add_analyse_created_directories(key, field, debug)
+    return []
 
 
 def set_up_data_transfer(config: dict, debug: bool = False) -> dict:
@@ -643,9 +699,9 @@ def set_up_machine_config(debug: bool = False):
             continue
         if key == "create_directories":
             new_config[key] = add_create_directories(key, field, debug)
-            # TODO
             continue
         if key == "analyse_created_directories":
+            new_config[key] = add_analyse_created_directories(key, field, debug)
             # TODO
             continue
 

From 0cf5b15f206255de0eda308e733def187eebe64d Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Tue, 19 Nov 2024 14:27:50 +0000
Subject: [PATCH 40/91] Added function to standardise field validation.

---
 src/murfey/cli/generate_config.py | 219 +++++++++++++-----------------
 1 file changed, 96 insertions(+), 123 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 19f5446a..24abe0f2 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -72,7 +72,21 @@ def confirm_overwrite(key: str):
         console.print("Invalid input. Please try again.", style="red")
 
 
-def populate_field(key: str, field: ModelField, debug: bool = False):
+def validate_value(value: Any, key: str, field: ModelField, debug: bool = False) -> Any:
+    """
+    Helper function to validate the value of the desired field for a Pydantic model.
+    """
+    validated_value, errors = field.validate(value, {}, loc=key)
+    if errors:
+        raise ValidationError(errors, MachineConfig)
+    console.print(f"{key!r} validated successfully.", style="bright_green")
+    if debug:
+        console.print(f"Type: {type(validated_value)}", style="bright_green")
+        console.print(f"{validated_value!r}", style="bright_green")
+    return validated_value
+
+
+def populate_field(key: str, field: ModelField, debug: bool = False) -> Any:
     """
     General function for inputting and validating the value of a single field against
     its Pydantic model.
@@ -91,17 +105,14 @@ def populate_field(key: str, field: ModelField, debug: bool = False):
             else answer
         )
 
-        validated_value, error = field.validate(value, {}, loc=key)
-        if not error:
-            console.print(f"{key!r} successfully validated", style="bright_green")
+        # Validate and return
+        try:
+            return validate_value(value, key, field, debug)
+        except ValidationError as error:
             if debug:
-                console.print(
-                    f"{type(validated_value)}\n{validated_value!r}",
-                    style="bright_green",
-                )
-            return validated_value
-        else:
-            console.print("Invalid input. Please try again.", style="red")
+                console.print(error, style="red")
+            console.print(f"Invalid input for {key!r}. Please try again")
+            continue
 
 
 def add_calibrations(
@@ -179,70 +190,53 @@ def get_calibration():
         add_calibration = ask_for_input(category="calibration setting", again=True)
 
     # Validate the nested dictionary structure
-    validated_calibrations, error = field.validate(calibrations, {}, loc=field)
-    if not error:
-        console.print(f"{key!r} validated successfully", style="bright_green")
+    try:
+        return validate_value(calibrations, key, field, debug)
+    except ValidationError as error:
         if debug:
-            console.print(
-                f"{type(validated_calibrations)}\n{validated_calibrations!r}",
-                style="bright_green",
-            )
-        return validated_calibrations
-    else:
-        console.print(
-            f"Failed to validate the provided calibrations: {error}", style="red"
-        )
+            console.print(error, style="red")
+        console.print(f"Failed to validate {key!r}", style="red")
         console.print("Returning an empty dictionary", style="red")
         return {}
 
 
 def add_software_packages(config: dict, debug: bool = False) -> dict[str, Any]:
     def get_software_name() -> str:
-        name = (
-            prompt(
-                "What is the name of the software package? Supported options: 'autotem', "
-                "'epu', 'leica', 'serialem', 'tomo'",
-                style="yellow",
-            )
-            .lower()
-            .strip()
+        message = (
+            "What is the name of the software package? Supported options: 'autotem', "
+            "'epu', 'leica', 'serialem', 'tomo'"
         )
+        name = prompt(message, style="yellow").lower().strip()
         # Validate name against "acquisition_software" field
-        field = MachineConfig.__fields__["acquisition_software"]
-        validated_name, error = field.validate([name], {}, loc="acquisition_software")
-        if not error:
-            return validated_name[0]
-        console.print(
-            "Invalid software name.",
-            style="red",
-        )
-        if ask_for_input("software package", True) is True:
-            return get_software_name()
-        return ""
+        try:
+            field = MachineConfig.__fields__["acquisition_software"]
+            return validate_value([name], "acquisition_software", field, False)[0]
+        except ValidationError:
+            console.print("Invalid software name.", style="red")
+            if ask_for_input("software package", True) is True:
+                return get_software_name()
+            return ""
 
     def ask_about_xml_path() -> bool:
         message = (
             "Does this software package have a settings file that needs modification? "
             "(y/n)"
         )
-        answer = prompt(message, style="yellow").lower().strip()
-
-        # Validate
-        if answer in ("y", "yes"):
-            return True
-        if answer in ("n", "no"):
-            return False
-        console.print("Invalid input.", style="red")
-        return ask_about_xml_path()
+        while True:
+            answer = prompt(message, style="yellow").lower().strip()
+            # Validate
+            if answer in ("y", "yes"):
+                return True
+            if answer in ("n", "no"):
+                return False
+            console.print("Invalid input.", style="red")
 
     def get_xml_file() -> Optional[Path]:
-        xml_file = Path(
-            prompt(
-                "What is the full file path of the settings file? This should be an "
-                "XML file.",
-                style="yellow",
-            )
+        message = (
+            "What is the full file path of the settings file? This should be an "
+            "XML file."
         )
+        xml_file = Path(prompt(message, style="yellow").strip())
         # Validate
         if xml_file.suffix:
             return xml_file
@@ -255,20 +249,18 @@ def get_xml_file() -> Optional[Path]:
         return None
 
     def get_xml_tree_path() -> str:
-        xml_tree_path = prompt(
-            "What is the path through the XML file to the node to overwrite?",
-            style="yellow",
-        )
-        # Possibly some validation checks later
+        message = "What is the path through the XML file to the node to overwrite?"
+        xml_tree_path = prompt(message, style="yellow").strip()
+        # TODO: Currently no test cases for this method
         return xml_tree_path
 
     def get_extensions_and_substrings() -> dict[str, list[str]]:
         def get_file_extension() -> str:
-            extension = prompt(
+            message = (
                 "Please enter the extension of a file produced by this package "
-                "that is to be analysed (e.g., '.tiff', '.eer', etc.).",
-                style="yellow",
-            ).strip()
+                "that is to be analysed (e.g., '.tiff', '.eer', etc.)."
+            )
+            extension = prompt(message, style="yellow").strip().lower()
             # Validate
             if not (extension.startswith(".") and extension.replace(".", "").isalnum()):
                 console.print(
@@ -282,15 +274,15 @@ def get_file_extension() -> str:
             return extension
 
         def get_file_substring() -> str:
-            substring = prompt(
+            message = (
                 "Please enter a keyword that will be present in files with this "
-                "extension. This field is case-sensitive.",
-                style="yellow",
-            ).strip()
+                "extension. This field is case-sensitive."
+            )
+            substring = prompt(message, style="yellow").strip()
             # Validate
             if bool(re.fullmatch(r"[\w\s\-]*", substring)) is False:
                 console.print(
-                    "Invalid characters are present in this substring. Please "
+                    "Unsafe characters are present in this substring. Please "
                     "try again. ",
                     style="red",
                 )
@@ -441,23 +433,13 @@ def get_file_substring() -> str:
         ("data_required_substrings", data_required_substrings),
     )
     for field_name, value in to_validate:
-        field = MachineConfig.__fields__[field_name]
-        validated_value, error = field.validate(value, {}, loc=field_name)
-        if not error:
-            config[field_name] = validated_value
-            console.print(
-                f"{field_name!r} validated successfully", style="bright_green"
-            )
+        try:
+            field = MachineConfig.__fields__[field_name]
+            config[field_name] = validate_value(value, field_name, field, debug)
+        except ValidationError as error:
             if debug:
-                console.print(
-                    f"{type(validated_value)}\n{validated_value!r}",
-                    style="bright_green",
-                )
-        else:
-            console.print(
-                f"Validation failed due to the following error: {error}",
-                style="red",
-            )
+                console.print(error, style="red")
+            console.print(f"Failed to validate {field_name!r}", style="red")
             console.print("Please try again.", style="red")
             return add_software_packages(config)
 
@@ -470,10 +452,7 @@ def add_data_directories(
 ) -> dict[str, str]:
     def get_directory() -> Optional[Path]:
         message = "What is the full file path to the data directory you wish to add?"
-        answer = prompt(
-            message,
-            style="yellow",
-        ).strip()
+        answer = prompt(message, style="yellow").strip()
         # Convert "" into None
         if not answer:
             return None
@@ -520,17 +499,15 @@ def get_directory_type():
         continue
 
     # Validate and return
-    validated_data_directories, error = field.validate(data_directories, {}, loc=key)
-    if not error:
-        console.print(f"Validated {key!r} successfully", style="bright_green")
+    try:
+        return validate_value(data_directories, key, field, debug)
+    except ValidationError as error:
         if debug:
-            console.print(f"{type(validated_data_directories)}")
-            console.print(f"{validated_data_directories!r}")
-        return data_directories
-    console.print(f"Failed to validate {key!r}", style="red")
-    if ask_for_input(category, True) is True:
-        return add_data_directories(key, field, debug)
-    return {}
+            console.print(error, style="red")
+        console.print(f"Failed to validate {key!r}", style="red")
+        if ask_for_input(category, True) is True:
+            return add_data_directories(key, field, debug)
+        return {}
 
 
 def add_create_directories(
@@ -593,17 +570,15 @@ def get_folder_alias() -> str:
         continue
 
     # Validate and return
-    validated_folders, errors = field.validate(folders_to_create, {}, loc=key)
-    if not errors:
-        console.print(f"{key!r} validated successfully", style="bright_green")
+    try:
+        return validate_value(folders_to_create, key, field, debug)
+    except ValidationError as error:
         if debug:
-            console.print(f"{type(validated_folders)}", style="bright_green")
-            console.print(f"{validated_folders!r}", style="bright_green")
-        return folders_to_create
-    console.print(f"Failed to validate {key!r}")
-    if ask_for_input(category, True) is True:
-        return add_create_directories(key, field, debug)
-    return {}
+            console.print(error, style="red")
+        console.print(f"Failed to validate {key!r}", style="red")
+        if ask_for_input(category, True) is True:
+            return add_create_directories(key, field, debug)
+        return {}
 
 
 def add_analyse_created_directories(
@@ -626,7 +601,7 @@ def get_folder() -> str:
     """
     Start of add_analyse_created_directories
     """
-    folders_to_create: list[str] = []
+    folders_to_analyse: list[str] = []
     category = "folder for Murfey to analyse"
     add_folder = ask_for_input(category, False)
     while add_folder is True:
@@ -635,22 +610,20 @@ def get_folder() -> str:
             console.print("No folder name provided", style="red")
             add_folder = ask_for_input(category, True)
             continue
-        folders_to_create.append(folder_name)
+        folders_to_analyse.append(folder_name)
         add_folder = ask_for_input(category, True)
         continue
 
     # Validate and return
-    validated_folders, errors = field.validate(folders_to_create, {}, loc=key)
-    if not errors:
-        console.print(f"{key!r} validated successfully", style="bright_green")
+    try:
+        return sorted(validate_value(folders_to_analyse, key, field, debug))
+    except ValidationError as error:
         if debug:
-            console.print(f"{type(validated_folders)}", style="bright_green")
-            console.print(f"{validated_folders!r}", style="bright_green")
-        return sorted(validated_folders)
-    console.print(f"Failed to validate {key!r}", style="red")
-    if ask_for_input(category, True) is True:
-        return add_analyse_created_directories(key, field, debug)
-    return []
+            console.print(error, style="red")
+        console.print(f"Failed to validate {key!r}", style="red")
+        if ask_for_input(category, True) is True:
+            return add_analyse_created_directories(key, field, debug)
+        return []
 
 
 def set_up_data_transfer(config: dict, debug: bool = False) -> dict:

From 97e7117bf0b93f840f02bc65f04c743661b808e4 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Tue, 19 Nov 2024 16:46:38 +0000
Subject: [PATCH 41/91] Updated field description

---
 src/murfey/util/config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index a91df6b5..1d81d559 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -87,8 +87,8 @@ class MachineConfig(BaseModel):
         description=(
             "Nested dictionary containing the calibrations for this microscope. "
             "E.g., 'magnification' would be a valid dictionary, in which the "
-            "field-of-view at each magnfication level is provided as a key-value "
-            "pair. Options: 'magnification'"
+            "pixel size (in angstroms) at each magnfication level is provided as a "
+            "key-value pair. Options: 'magnification'"
         ),
     )
 

From 03d60f5c780a2b11d3cccfde2e7b1cb1b1e8cece Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Tue, 19 Nov 2024 18:26:29 +0000
Subject: [PATCH 42/91] Added more helper functions to construct generics lists
 and dicts, and to check for value duplicates; updated grammar for some of the
 auto-generated prompts

---
 src/murfey/cli/generate_config.py | 158 ++++++++++++++++++++++++++----
 1 file changed, 139 insertions(+), 19 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 24abe0f2..3b49e078 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -1,6 +1,7 @@
 import argparse
 import json
 import re
+from ast import literal_eval
 from pathlib import Path
 from typing import Any, Optional, get_type_hints
 
@@ -20,8 +21,8 @@
 
 def prompt(message: str, style: str = "") -> str:
     """
-    Helper function to pretty print the prompt message and add the actual prompt on a
-    newline.
+    Helper function to pretty print a message and have the user input their response
+    on a new line.
     """
     console.print(message, style=style)
     return input("> ")
@@ -41,14 +42,10 @@ def print_field_info(field: ModelField):
         console.print(f"Default: {field.field_info.default!r}", style="bright_cyan")
 
 
-def ask_for_input(category: str, again: bool = False):
+def ask_for_permission(message: str) -> bool:
     """
-    Perform a Boolean check to see if another value is to be appended to the current
-    parameter being set up.
+    Helper function to generate a Boolean based on user input
     """
-    message = (
-        "Would you like to add " + ("another" if again else "a") + f" {category}? (y/n)"
-    )
     while True:
         answer = prompt(message, style="yellow").lower().strip()
         if answer in ("y", "yes"):
@@ -56,20 +53,143 @@ def ask_for_input(category: str, again: bool = False):
         if answer in ("n", "no"):
             return False
         console.print("Invalid input. Please try again.", style="red")
+        continue
 
 
-def confirm_overwrite(key: str):
+def ask_for_input(parameter: str, again: bool = False):
     """
-    Check whether a key should be overwritten if a duplicate is detected.
+    Asks the user if another value should be entered into the current data structure.
     """
-    message = f"{key!r} already exists; do you wish to overwrite it? (y/n)"
-    while True:
-        answer = prompt(message, style="yellow").lower().strip()
-        if answer in ("y", "yes"):
-            return True
-        if answer in ("n", "no"):
-            return False
-        console.print("Invalid input. Please try again.", style="red")
+    message = (
+        "Would you like to add "
+        + (
+            "another"
+            if again
+            else (
+                "an" if parameter.lower().startswith(("a", "e", "i", "o", "u")) else "a"
+            )
+        )
+        + f" {parameter}? (y/n)"
+    )
+    return ask_for_permission(message)
+
+
+def confirm_overwrite(value: str):
+    """
+    Asks the user if a value that already exists should be overwritten.
+    """
+    message = f"{value!r} already exists; do you wish to overwrite it? (y/n)"
+    return ask_for_permission(message)
+
+
+def confirm_duplicate(value: str):
+    """
+    Asks the user if a duplicate value should be allowed.
+    """
+    message = f"{value!r} already exists; do you want to add a duplicate? (y/n)"
+    return ask_for_permission(message)
+
+
+def construct_list(
+    list_name: str,
+    prompt_message: str,
+    allow_empty: bool = False,
+    allow_eval: bool = True,
+    many_types: bool = True,
+    debug: bool = False,
+) -> list[Any]:
+    """
+    Helper function to facilitate interactive construction of a list to be stored
+    under the current parameter.
+    """
+    lst: list = []
+    add_entry = ask_for_input(list_name, False)
+    message = prompt_message
+    while add_entry is True:
+        value = prompt(message, style="yellow").strip()
+        # Reject empty inputs if set
+        if not value and not allow_empty:
+            console.print("No value provided.", style="red")
+            add_entry = ask_for_input(list_name, True)
+            continue
+        # Convert numericals if set
+        try:
+            eval_value = (
+                literal_eval(value)
+                if allow_eval and isinstance(literal_eval(value), (int, float, complex))
+                else value
+            )
+        except Exception:
+            eval_value = value
+        # Confirm if duplicate entry should be added
+        if eval_value in lst and confirm_duplicate(str(eval_value)) is False:
+            add_entry = ask_for_input(list_name, True)
+            continue
+        lst.append(eval_value)
+        # Reject list with multiple types if set
+        if not many_types and len({type(item) for item in lst}) > 1:
+            console.print(
+                "The provided value is of a different type to the other members. \n"
+                "It won't be added to the list.",
+                style="red",
+            )
+            lst = lst[:-1]
+        add_entry = ask_for_input(list_name, True)
+        continue
+    return lst
+
+
+def construct_dict(
+    dict_name: str,
+    key_name: str,
+    value_name: str,
+    allow_empty_key: bool = True,
+    allow_empty_value: bool = True,
+    allow_eval: bool = True,
+    sort_keys: bool = True,
+    debug: bool = False,
+) -> dict[str, Any]:
+    """
+    Helper function to facilitate interative construction of a dictionary.
+    """
+    dct: dict = {}
+    add_entry = ask_for_input(dict_name, False)
+    key_message = f"Please enter a {key_name}"
+    value_message = f"Please enter a {value_name}"
+    while add_entry is True:
+        key = prompt(key_message, style="yellow").strip().lower()
+        # Reject empty keys if set
+        if not allow_empty_key and not key:
+            console.print(f"No {key_name} provided.")
+            add_entry = ask_for_input(dict_name, True)
+            continue
+        # Confirm overwrite key on duplicate
+        if key in dct.keys():
+            if confirm_overwrite(key) is False:
+                add_entry = ask_for_input(dict_name, True)
+                continue
+        value = prompt(value_message, style="yellow").strip()
+        # Reject empty values if set
+        if not allow_empty_value and not value:
+            console.print("No value provided", style="red")
+            add_entry = ask_for_input(dict_name, True)
+            continue
+        # Convert values to numericals if set
+        try:
+            eval_value = (
+                literal_eval(value)
+                if allow_eval and isinstance(literal_eval(value), (int, float, complex))
+                else value
+            )
+        except Exception:
+            eval_value = value
+        dct[key] = eval_value
+        add_entry = ask_for_input(dict_name, True)
+        continue
+
+    # Sort keys if set
+    dct = {key: dct[key] for key in sorted(dct.keys())} if sort_keys else dct
+    return dct
 
 
 def validate_value(value: Any, key: str, field: ModelField, debug: bool = False) -> Any:
@@ -187,7 +307,7 @@ def get_calibration():
             console.print(f"{calibration_values}", style="bright_green")
 
         # Check if any more calibrations need to be added
-        add_calibration = ask_for_input(category="calibration setting", again=True)
+        add_calibration = ask_for_input("calibration setting", again=True)
 
     # Validate the nested dictionary structure
     try:

From 9954b76db0fb07dca2676f92afa3cc404f107e10 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 20 Nov 2024 09:45:26 +0000
Subject: [PATCH 43/91] Replaced requesting for calibration JSONs with manual
 input instead (not everyone will have calibrations in readable formats)

---
 src/murfey/cli/generate_config.py | 46 +++++++++++--------------------
 1 file changed, 16 insertions(+), 30 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 3b49e078..a7290712 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -154,8 +154,8 @@ def construct_dict(
     """
     dct: dict = {}
     add_entry = ask_for_input(dict_name, False)
-    key_message = f"Please enter a {key_name}"
-    value_message = f"Please enter a {value_name}"
+    key_message = f"Please enter the {key_name}"
+    value_message = f"Please enter the {value_name}"
     while add_entry is True:
         key = prompt(key_message, style="yellow").strip().lower()
         # Reject empty keys if set
@@ -241,32 +241,10 @@ def add_calibrations(
     """
     Populate the 'calibrations' field with dictionaries.
     """
-
-    def get_calibration():
-        # Request for a file to read settings from
-        calibration_file = Path(
-            prompt(
-                "What is the full file path to the calibration file? This should be a "
-                "JSON file.",
-                style="yellow",
-            )
-        )
-        try:
-            with open(calibration_file, "r") as file:
-                calibration_values: dict = json.load(file)
-                return calibration_values
-        except Exception as e:
-            console.print(
-                f"Error opening the provided file: {e}",
-                style="red",
-            )
-            if ask_for_input("calibration file", True) is True:
-                return get_calibration()
-            else:
-                return {}
-
-    # Settings
-    known_calibraions = ("magnification",)
+    # Known calibrations and what to call their keys and values
+    known_calibrations: dict[str, tuple[str, str]] = {
+        "magnification": ("magnification", "pixel size (in angstroms)")
+    }
 
     # Start of add_calibrations
     print_field_info(field)
@@ -279,7 +257,7 @@ def get_calibration():
             style="yellow",
         ).lower()
         # Check if it's a known type of calibration
-        if calibration_type not in known_calibraions:
+        if calibration_type not in known_calibrations.keys():
             console.print(
                 f"{calibration_type} is not a known type of calibration",
                 style="red",
@@ -292,7 +270,15 @@ def get_calibration():
                 add_calibration = ask_for_input(category, True)
                 continue
         # Skip failed inputs
-        calibration_values = get_calibration()
+        calibration_values = construct_dict(
+            f"{calibration_type} setting",
+            known_calibrations[calibration_type][0],
+            known_calibrations[calibration_type][1],
+            allow_empty_key=False,
+            allow_empty_value=False,
+            allow_eval=True,
+            sort_keys=True,
+        )
         if not calibration_values:
             add_calibration = ask_for_input(category, True)
             continue

From 8c5b3a85f9911e442a74d3841b40c1fab3854143 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 20 Nov 2024 09:56:25 +0000
Subject: [PATCH 44/91] Added logic to sort fully numeric dictionary keys
 numerically and separately from alphanumeric ones

---
 src/murfey/cli/generate_config.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index a7290712..77541e06 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -188,7 +188,18 @@ def construct_dict(
         continue
 
     # Sort keys if set
-    dct = {key: dct[key] for key in sorted(dct.keys())} if sort_keys else dct
+    dct = (
+        {
+            key: dct[key]
+            for key in sorted(
+                dct.keys(),
+                # Sort numeric keys as numerals and alphanumeric keys alphabetically
+                key=(lambda k: (0, float(k) if str(k).isdigit() else (1, str(k)))),
+            )
+        }
+        if sort_keys
+        else dct
+    )
     return dct
 
 

From 431e477d48d3415a63839619a71f6051024693de Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 20 Nov 2024 11:24:32 +0000
Subject: [PATCH 45/91] Updated dictionary and list sorting conditions

---
 src/murfey/cli/generate_config.py | 89 +++++++++++++++++++++++--------
 1 file changed, 68 insertions(+), 21 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 77541e06..d2030844 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -1,9 +1,11 @@
+from __future__ import annotations
+
 import argparse
 import json
 import re
 from ast import literal_eval
 from pathlib import Path
-from typing import Any, Optional, get_type_hints
+from typing import Any, Optional, Type, get_type_hints
 
 import yaml
 from pydantic import ValidationError
@@ -91,11 +93,13 @@ def confirm_duplicate(value: str):
 
 
 def construct_list(
-    list_name: str,
+    value_name: str,
     prompt_message: str,
     allow_empty: bool = False,
     allow_eval: bool = True,
     many_types: bool = True,
+    restrict_to_types: Optional[Type[Any] | tuple[Type[Any]]] = None,
+    sort_values: bool = True,
     debug: bool = False,
 ) -> list[Any]:
     """
@@ -103,38 +107,62 @@ def construct_list(
     under the current parameter.
     """
     lst: list = []
-    add_entry = ask_for_input(list_name, False)
+    add_entry = ask_for_input(value_name, False)
     message = prompt_message
     while add_entry is True:
         value = prompt(message, style="yellow").strip()
         # Reject empty inputs if set
         if not value and not allow_empty:
             console.print("No value provided.", style="red")
-            add_entry = ask_for_input(list_name, True)
+            add_entry = ask_for_input(value_name, True)
             continue
-        # Convert numericals if set
+        # Convert values if set
         try:
-            eval_value = (
-                literal_eval(value)
-                if allow_eval and isinstance(literal_eval(value), (int, float, complex))
-                else value
-            )
+            eval_value = literal_eval(value)
         except Exception:
             eval_value = value
+        # Check if it's a permitted type (continue to allow None as value)
+        if restrict_to_types is not None:
+            allowed_types = (
+                (restrict_to_types,)
+                if not isinstance(restrict_to_types, (list, tuple))
+                else restrict_to_types
+            )
+            if not isinstance(eval_value, allowed_types):
+                console.print(
+                    f"The provided value ({type(eval_value)}) is not an allowed type.",
+                    style="red",
+                )
+                add_entry = ask_for_input(value_name, True)
+                continue
         # Confirm if duplicate entry should be added
         if eval_value in lst and confirm_duplicate(str(eval_value)) is False:
-            add_entry = ask_for_input(list_name, True)
+            add_entry = ask_for_input(value_name, True)
             continue
         lst.append(eval_value)
         # Reject list with multiple types if set
         if not many_types and len({type(item) for item in lst}) > 1:
             console.print(
-                "The provided value is of a different type to the other members. \n"
-                "It won't be added to the list.",
+                "The provided value is of a different type to the other members. It "
+                "won't be added to the list.",
                 style="red",
             )
             lst = lst[:-1]
-        add_entry = ask_for_input(list_name, True)
+        # Sort values if set
+        # Sort numeric values differently from alphanumeric ones
+        lst = (
+            sorted(
+                lst,
+                key=lambda v: (
+                    (0, float(v))
+                    if isinstance(v, (int, float))
+                    else (1, abs(v), v.real) if isinstance(v, complex) else (2, str(v))
+                ),
+            )
+            if sort_values
+            else lst
+        )
+        add_entry = ask_for_input(value_name, True)
         continue
     return lst
 
@@ -152,6 +180,21 @@ def construct_dict(
     """
     Helper function to facilitate interative construction of a dictionary.
     """
+
+    def is_type(value: str, instance: Type[Any] | tuple[Type[Any], ...]) -> bool:
+        """
+        Checks if the string provided evaluates to one of the desired types
+        """
+        instance = (instance,) if not isinstance(instance, (list, tuple)) else instance
+        try:
+            eval_value = literal_eval(value)
+        except Exception:
+            eval_value = value
+        return isinstance(eval_value, instance)
+
+    """
+    Start of construct_dict
+    """
     dct: dict = {}
     add_entry = ask_for_input(dict_name, False)
     key_message = f"Please enter the {key_name}"
@@ -176,11 +219,7 @@ def construct_dict(
             continue
         # Convert values to numericals if set
         try:
-            eval_value = (
-                literal_eval(value)
-                if allow_eval and isinstance(literal_eval(value), (int, float, complex))
-                else value
-            )
+            eval_value = literal_eval(value)
         except Exception:
             eval_value = value
         dct[key] = eval_value
@@ -188,13 +227,21 @@ def construct_dict(
         continue
 
     # Sort keys if set
+    # Sort numeric keys separately from alphanumeric ones
     dct = (
         {
             key: dct[key]
             for key in sorted(
                 dct.keys(),
-                # Sort numeric keys as numerals and alphanumeric keys alphabetically
-                key=(lambda k: (0, float(k) if str(k).isdigit() else (1, str(k)))),
+                key=lambda k: (
+                    (0, float(k))
+                    if is_type(k, (int, float))
+                    else (
+                        (1, abs(complex(k)), complex(k).real)
+                        if is_type(k, complex)
+                        else (2, str(k))
+                    )
+                ),
             )
         }
         if sort_keys

From b551dfd65359fe6e92c162c67d6deb78a259e024 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 20 Nov 2024 11:26:32 +0000
Subject: [PATCH 46/91] Updated dictionary and list sorting conditions

---
 src/murfey/cli/generate_config.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index d2030844..78888006 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -118,7 +118,7 @@ def construct_list(
             continue
         # Convert values if set
         try:
-            eval_value = literal_eval(value)
+            eval_value = literal_eval(value) if allow_eval else value
         except Exception:
             eval_value = value
         # Check if it's a permitted type (continue to allow None as value)
@@ -175,6 +175,7 @@ def construct_dict(
     allow_empty_value: bool = True,
     allow_eval: bool = True,
     sort_keys: bool = True,
+    restrict_values_to_types: Optional[Type[Any] | tuple[Type[Any], ...]] = None,
     debug: bool = False,
 ) -> dict[str, Any]:
     """
@@ -219,7 +220,7 @@ def is_type(value: str, instance: Type[Any] | tuple[Type[Any], ...]) -> bool:
             continue
         # Convert values to numericals if set
         try:
-            eval_value = literal_eval(value)
+            eval_value = literal_eval(value) if allow_eval else value
         except Exception:
             eval_value = value
         dct[key] = eval_value

From e21928a7ae098ab9ac256e66d34e7a48c7d1d6b5 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 20 Nov 2024 11:53:53 +0000
Subject: [PATCH 47/91] Fixed sorting logic for lists and dicts; added option
 to restrict allowed type inputs

---
 src/murfey/cli/generate_config.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 78888006..d3e71b7a 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -175,7 +175,7 @@ def construct_dict(
     allow_empty_value: bool = True,
     allow_eval: bool = True,
     sort_keys: bool = True,
-    restrict_values_to_types: Optional[Type[Any] | tuple[Type[Any], ...]] = None,
+    restrict_to_types: Optional[Type[Any] | tuple[Type[Any], ...]] = None,
     debug: bool = False,
 ) -> dict[str, Any]:
     """
@@ -218,11 +218,23 @@ def is_type(value: str, instance: Type[Any] | tuple[Type[Any], ...]) -> bool:
             console.print("No value provided", style="red")
             add_entry = ask_for_input(dict_name, True)
             continue
-        # Convert values to numericals if set
+        # Convert values if set
         try:
             eval_value = literal_eval(value) if allow_eval else value
         except Exception:
             eval_value = value
+        # Reject incorrect value types if set
+        if restrict_to_types is not None:
+            allowed_types = (
+                (restrict_to_types,)
+                if not isinstance(restrict_to_types, (tuple, list))
+                else restrict_to_types
+            )
+            if not isinstance(eval_value, allowed_types):
+                console.print("The value is not of an allowed type.", style="red")
+                add_entry = ask_for_input(dict_name, True)
+                continue
+        # Assign value to key
         dct[key] = eval_value
         add_entry = ask_for_input(dict_name, True)
         continue

From e64eadd820a24848b34f15d7f29cf3e32f1a40f6 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 20 Nov 2024 13:44:31 +0000
Subject: [PATCH 48/91] Replaced more parts of the config setup with the
 generic dict and list construction functions

---
 src/murfey/cli/generate_config.py | 123 ++++++++++--------------------
 1 file changed, 40 insertions(+), 83 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index d3e71b7a..52a846ca 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -9,6 +9,7 @@
 
 import yaml
 from pydantic import ValidationError
+from pydantic.error_wrappers import ErrorWrapper
 from pydantic.fields import ModelField, UndefinedType
 from rich.console import Console
 
@@ -103,8 +104,7 @@ def construct_list(
     debug: bool = False,
 ) -> list[Any]:
     """
-    Helper function to facilitate interactive construction of a list to be stored
-    under the current parameter.
+    Helper function to facilitate interactive construction of a list.
     """
     lst: list = []
     add_entry = ask_for_input(value_name, False)
@@ -269,7 +269,9 @@ def validate_value(value: Any, key: str, field: ModelField, debug: bool = False)
     """
     validated_value, errors = field.validate(value, {}, loc=key)
     if errors:
-        raise ValidationError(errors, MachineConfig)
+        raise ValidationError(
+            ([errors] if isinstance(errors, ErrorWrapper) else errors), MachineConfig
+        )
     console.print(f"{key!r} validated successfully.", style="bright_green")
     if debug:
         console.print(f"Type: {type(validated_value)}", style="bright_green")
@@ -314,6 +316,7 @@ def add_calibrations(
     """
     # Known calibrations and what to call their keys and values
     known_calibrations: dict[str, tuple[str, str]] = {
+        # Calibration type | Key name | Value name
         "magnification": ("magnification", "pixel size (in angstroms)")
     }
 
@@ -342,7 +345,7 @@ def add_calibrations(
                 continue
         # Skip failed inputs
         calibration_values = construct_dict(
-            f"{calibration_type} setting",
+            f"{calibration_type} calibration",
             known_calibrations[calibration_type][0],
             known_calibrations[calibration_type][1],
             allow_empty_key=False,
@@ -445,56 +448,38 @@ def get_file_extension() -> str:
                     style="red",
                 )
                 return get_file_extension()
-            if extension in unsorted_dict.keys():
+            if extension in extension_dict.keys():
                 console.print("This extension has already been provided")
                 return ""
             return extension
 
-        def get_file_substring() -> str:
-            message = (
-                "Please enter a keyword that will be present in files with this "
-                "extension. This field is case-sensitive."
-            )
-            substring = prompt(message, style="yellow").strip()
-            # Validate
-            if bool(re.fullmatch(r"[\w\s\-]*", substring)) is False:
-                console.print(
-                    "Unsafe characters are present in this substring. Please "
-                    "try again. ",
-                    style="red",
-                )
-                return get_file_substring()
-            if substring in substrings:
-                console.print("This substring has already been provided.")
-                return ""
-            return substring
-
         """
         Start of get_extensions_and_substrings
         """
-        unsorted_dict: dict = {}
+        extension_dict: dict = {}
         add_extension = ask_for_input("file extension", False)
         while add_extension is True:
             extension = get_file_extension()
             if not extension:
                 add_extension = ask_for_input("file extension", True)
                 continue
-            substrings: list[str] = []
-            add_substring = ask_for_input("file substring", False)
-            while add_substring is True:
-                substring = get_file_substring()
-                if not substring:
-                    add_substring = ask_for_input("file substring", True)
-                    continue
-                substrings.append(substring)
-                add_substring = ask_for_input("file substring", True)
-            unsorted_dict[extension] = sorted(substrings)
+            substrings: list[str] = construct_list(
+                "file substring",
+                "Please enter a file substring associated with this extension",
+                allow_empty=False,
+                allow_eval=False,
+                many_types=False,
+                restrict_to_types=str,
+                sort_values=True,
+            )
+            extension_dict[extension] = substrings
             add_extension = ask_for_input("file extension", True)
+            continue
 
-        sorted_dict: dict = {}
-        for key in sorted(unsorted_dict.keys()):
-            sorted_dict[key] = unsorted_dict[key]
-        return sorted_dict
+        extension_dict = {
+            key: extension_dict[key] for key in sorted(extension_dict.keys())
+        }
+        return extension_dict
 
     """
     Start of add_software_packages
@@ -513,7 +498,7 @@ def get_file_substring() -> str:
     category = "software package"
     add_input = ask_for_input(category, again=False)
     while add_input:
-        # Collect inputs
+        # Collect software name
         console.print(
             "Acquisition Software (acquisition_software)",
             style="bold bright_cyan",
@@ -532,6 +517,7 @@ def get_file_substring() -> str:
                 add_input = ask_for_input(category, False)
                 continue
 
+        # Collect version info
         console.print(
             "Software Versions (software_versions)",
             style="bold bright_cyan",
@@ -542,6 +528,7 @@ def get_file_substring() -> str:
             style="yellow",
         )
 
+        # Collect settings files and modifications
         console.print(
             "Software Settings Output Directories (software_settings_output_directories)",
             style="bold bright_cyan",
@@ -560,6 +547,7 @@ def get_file_substring() -> str:
             xml_file = None
             xml_tree_path = ""
 
+        # Collect extensions and filename substrings
         console.print(
             "Data Required Substrings (data_required_substrings)",
             style="bold bright_cyan",
@@ -580,6 +568,7 @@ def get_file_substring() -> str:
             "extensions_and_substrings": file_ext_ss,
         }
         add_input = ask_for_input(category, again=True)
+        continue
 
     # Re-pack keys and values according to the current config field structures
     console.print("Compiling and validating inputs...")
@@ -627,53 +616,21 @@ def get_file_substring() -> str:
 def add_data_directories(
     key: str, field: ModelField, debug: bool = False
 ) -> dict[str, str]:
-    def get_directory() -> Optional[Path]:
-        message = "What is the full file path to the data directory you wish to add?"
-        answer = prompt(message, style="yellow").strip()
-        # Convert "" into None
-        if not answer:
-            return None
-        return Path(answer)
-
-    def get_directory_type():
-        message = (
-            "What type of data is stored in this directory? Options: 'microscope', "
-            "'detector'"
-        )
-        answer = prompt(message, style="yellow").lower().strip()
-        if answer not in ("microscope", "detector"):
-            console.print("Invalid directory type.", style="red")
-            if ask_for_input("directory type", True) is True:
-                return get_directory_type()
-            return ""
-        return answer
-
     """
-    Start of add_data_directories
+    Function to facilitate populating the data_directories field.
     """
     print_field_info(field)
-    data_directories: dict[str, str] = {}
     category = "data directory"
-    add_directory = ask_for_input(category, False)
-    while add_directory is True:
-        directory = get_directory()
-        # Move on to next loop or exit if no directory provided
-        if not directory:
-            console.print("No directory added", style="red")
-            add_directory = ask_for_input(category, True)
-            continue
-
-        # Get the directory type
-        directory_type = get_directory_type()
-        if not directory_type:
-            console.print("No directory type provided", style="red")
-
-        # Add to dictionary
-        data_directories[str(directory)] = directory_type
-
-        # Check if more need to be added
-        add_directory = ask_for_input(category, True)
-        continue
+    data_directories: dict[str, str] = construct_dict(
+        category,
+        "full file path to the data directory",
+        "data type",
+        allow_empty_key=False,
+        allow_empty_value=False,
+        allow_eval=False,
+        sort_keys=True,
+        restrict_to_types=str,
+    )
 
     # Validate and return
     try:

From 24f3a49a3983d5ef40efd21b0f69c58d81740e31 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 20 Nov 2024 15:10:35 +0000
Subject: [PATCH 49/91] Added option to pass functions as inputs to the list
 and dict constructors, allowing for nested data structures

---
 src/murfey/cli/generate_config.py | 38 ++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 52a846ca..24b91579 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -5,7 +5,7 @@
 import re
 from ast import literal_eval
 from pathlib import Path
-from typing import Any, Optional, Type, get_type_hints
+from typing import Any, Callable, Optional, Type, get_type_hints
 
 import yaml
 from pydantic import ValidationError
@@ -95,7 +95,8 @@ def confirm_duplicate(value: str):
 
 def construct_list(
     value_name: str,
-    prompt_message: str,
+    value_method: Optional[Callable] = None,
+    value_method_args: dict = {},
     allow_empty: bool = False,
     allow_eval: bool = True,
     many_types: bool = True,
@@ -108,9 +109,17 @@ def construct_list(
     """
     lst: list = []
     add_entry = ask_for_input(value_name, False)
-    message = prompt_message
     while add_entry is True:
-        value = prompt(message, style="yellow").strip()
+        value = (
+            prompt(
+                "Please enter "
+                + ("an" if value_name.startswith(("a", "e", "i", "o", "u")) else "a")
+                + f" {value_name}",
+                style="yellow",
+            )
+            if value_method is None
+            else value_method(**value_method_args)
+        )
         # Reject empty inputs if set
         if not value and not allow_empty:
             console.print("No value provided.", style="red")
@@ -172,6 +181,10 @@ def construct_dict(
     key_name: str,
     value_name: str,
     allow_empty_key: bool = True,
+    key_method: Optional[Callable] = None,
+    key_method_args: dict = {},
+    value_method: Optional[Callable] = None,
+    value_method_args: dict = {},
     allow_empty_value: bool = True,
     allow_eval: bool = True,
     sort_keys: bool = True,
@@ -201,10 +214,15 @@ def is_type(value: str, instance: Type[Any] | tuple[Type[Any], ...]) -> bool:
     key_message = f"Please enter the {key_name}"
     value_message = f"Please enter the {value_name}"
     while add_entry is True:
-        key = prompt(key_message, style="yellow").strip().lower()
+        # Add key
+        key = (
+            prompt(key_message, style="yellow").strip()
+            if key_method is None
+            else key_method(**key_method_args)
+        )
         # Reject empty keys if set
         if not allow_empty_key and not key:
-            console.print(f"No {key_name} provided.")
+            console.print(f"No {key_name} provided.", style="red")
             add_entry = ask_for_input(dict_name, True)
             continue
         # Confirm overwrite key on duplicate
@@ -212,7 +230,12 @@ def is_type(value: str, instance: Type[Any] | tuple[Type[Any], ...]) -> bool:
             if confirm_overwrite(key) is False:
                 add_entry = ask_for_input(dict_name, True)
                 continue
-        value = prompt(value_message, style="yellow").strip()
+        # Add value
+        value = (
+            prompt(value_message, style="yellow").strip()
+            if value_method is None
+            else value_method(**value_method_args)
+        )
         # Reject empty values if set
         if not allow_empty_value and not value:
             console.print("No value provided", style="red")
@@ -465,7 +488,6 @@ def get_file_extension() -> str:
                 continue
             substrings: list[str] = construct_list(
                 "file substring",
-                "Please enter a file substring associated with this extension",
                 allow_empty=False,
                 allow_eval=False,
                 many_types=False,

From a8e66ebe158dae845b71f5fe1cc5b5244a7345d1 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 20 Nov 2024 15:23:10 +0000
Subject: [PATCH 50/91] Replaced parts of 'add_software_package' with the
 generic list and dict constructors

---
 src/murfey/cli/generate_config.py | 68 +++++++++----------------------
 1 file changed, 20 insertions(+), 48 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 24b91579..c48bc7ee 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -457,52 +457,6 @@ def get_xml_tree_path() -> str:
         # TODO: Currently no test cases for this method
         return xml_tree_path
 
-    def get_extensions_and_substrings() -> dict[str, list[str]]:
-        def get_file_extension() -> str:
-            message = (
-                "Please enter the extension of a file produced by this package "
-                "that is to be analysed (e.g., '.tiff', '.eer', etc.)."
-            )
-            extension = prompt(message, style="yellow").strip().lower()
-            # Validate
-            if not (extension.startswith(".") and extension.replace(".", "").isalnum()):
-                console.print(
-                    "This is an invalid file extension. Please try again. ",
-                    style="red",
-                )
-                return get_file_extension()
-            if extension in extension_dict.keys():
-                console.print("This extension has already been provided")
-                return ""
-            return extension
-
-        """
-        Start of get_extensions_and_substrings
-        """
-        extension_dict: dict = {}
-        add_extension = ask_for_input("file extension", False)
-        while add_extension is True:
-            extension = get_file_extension()
-            if not extension:
-                add_extension = ask_for_input("file extension", True)
-                continue
-            substrings: list[str] = construct_list(
-                "file substring",
-                allow_empty=False,
-                allow_eval=False,
-                many_types=False,
-                restrict_to_types=str,
-                sort_values=True,
-            )
-            extension_dict[extension] = substrings
-            add_extension = ask_for_input("file extension", True)
-            continue
-
-        extension_dict = {
-            key: extension_dict[key] for key in sorted(extension_dict.keys())
-        }
-        return extension_dict
-
     """
     Start of add_software_packages
     """
@@ -580,14 +534,32 @@ def get_file_extension() -> str:
             "for data processing. They are listed out here.",
             style="italic bright_cyan",
         )
-        file_ext_ss = get_extensions_and_substrings()
+        extensions_and_substrings = construct_dict(
+            dict_name="file extension configuration",
+            key_name="file extension",
+            value_name="file substrings",
+            value_method=construct_list,
+            value_method_args={
+                "value_name": "file substring",
+                "allow_empty": False,
+                "allow_eval": False,
+                "many_types": False,
+                "restrict_to_types": str,
+                "sort_values": True,
+            },
+            allow_empty_key=False,
+            allow_empty_value=False,
+            allow_eval=False,
+            sort_keys=True,
+            restrict_to_types=list,
+        )
 
         # Compile keys for this package as a dict
         package_info[name] = {
             "version": version,
             "xml_file": xml_file,
             "xml_tree_path": xml_tree_path,
-            "extensions_and_substrings": file_ext_ss,
+            "extensions_and_substrings": extensions_and_substrings,
         }
         add_input = ask_for_input(category, again=True)
         continue

From e78408572dec4429f5e376a6a1a7ab78a03ba84d Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 20 Nov 2024 15:39:12 +0000
Subject: [PATCH 51/91] Replaced parts of 'add_create_directories' with the
 generic dict constructor

---
 src/murfey/cli/generate_config.py | 45 ++++++++++---------------------
 1 file changed, 14 insertions(+), 31 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index c48bc7ee..980ed923 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -180,11 +180,11 @@ def construct_dict(
     dict_name: str,
     key_name: str,
     value_name: str,
-    allow_empty_key: bool = True,
     key_method: Optional[Callable] = None,
     key_method_args: dict = {},
     value_method: Optional[Callable] = None,
     value_method_args: dict = {},
+    allow_empty_key: bool = True,
     allow_empty_value: bool = True,
     allow_eval: bool = True,
     sort_keys: bool = True,
@@ -425,14 +425,7 @@ def ask_about_xml_path() -> bool:
             "Does this software package have a settings file that needs modification? "
             "(y/n)"
         )
-        while True:
-            answer = prompt(message, style="yellow").lower().strip()
-            # Validate
-            if answer in ("y", "yes"):
-                return True
-            if answer in ("n", "no"):
-                return False
-            console.print("Invalid input.", style="red")
+        return ask_for_permission(message)
 
     def get_xml_file() -> Optional[Path]:
         message = (
@@ -673,29 +666,19 @@ def get_folder_alias() -> str:
     Start of add_create_directories
     """
     print_field_info(field)
-    folders_to_create: dict[str, str] = {}
     category = "folder for Murfey to create"
-    add_directory: bool = ask_for_input(category, False)
-    while add_directory is True:
-        folder_name = get_folder()
-        if not folder_name:
-            console.print(
-                "No folder name provided",
-                style="red",
-            )
-            add_directory = ask_for_input(category, True)
-            continue
-        folder_alias = get_folder_alias()
-        if not folder_alias:
-            console.print(
-                "No folder alias provided",
-                style="red",
-            )
-            add_directory = ask_for_input(category, True)
-            continue
-        folders_to_create[folder_alias] = folder_name
-        add_directory = ask_for_input(category, True)
-        continue
+    folders_to_create: dict[str, str] = construct_dict(
+        dict_name=category,
+        key_name="folder alias",
+        value_name="folder name",
+        key_method=get_folder_alias,
+        value_method=get_folder,
+        allow_empty_key=False,
+        allow_empty_value=False,
+        allow_eval=False,
+        sort_keys=True,
+        restrict_to_types=str,
+    )
 
     # Validate and return
     try:

From 9b7f6de94376615c5c45aacb5ceaf95e799796d0 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 20 Nov 2024 16:51:44 +0000
Subject: [PATCH 52/91] Added generic constructors for folder names, folder
 paths, and file paths

---
 src/murfey/cli/generate_config.py | 70 +++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 980ed923..06d36b31 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -93,6 +93,72 @@ def confirm_duplicate(value: str):
     return ask_for_permission(message)
 
 
+def get_folder_name(message: Optional[str] = None) -> str:
+    """
+    Helper function to interactively generate, validate, and return a folder name.
+    """
+    while True:
+        message = "Please enter the folder name." if message is None else message
+        value = prompt(message, style="yellow").strip()
+        if bool(re.fullmatch(r"[\w\s\-]*", value)) is True:
+            return value
+        console.print(
+            "There are unsafe characters present in this folder name. Please "
+            "use a different one.",
+            style="red",
+        )
+        if ask_for_input("folder name", True) is False:
+            return ""
+        continue
+
+
+def get_folder_path(
+    message: Optional[str] = None, as_path: bool = True
+) -> str | Path | None:
+    """
+    Helper function to interactively generate, validate, and return the full path
+    to a folder.
+    """
+    while True:
+        message = (
+            "Please enter the full path to the folder." if message is None else message
+        )
+        value = prompt(message, style="yellow").strip()
+        if not value:
+            return None
+        try:
+            path = Path(value).resolve()
+            return path if as_path is True else path.as_posix()
+        except Exception:
+            console.print("Unable to resolve provided file path", style="red")
+            if ask_for_input("file path", True) is False:
+                return None
+            continue
+
+
+def get_file_path(
+    message: Optional[str] = None, as_path: bool = True
+) -> str | Path | None:
+    """
+    Helper function to interactively generate, validate, and return the full path
+    to a file.
+    """
+    while True:
+        message = (
+            "Please enter the full path to the file." if message is None else message
+        )
+        value = prompt(message, style="yellow").strip()
+        if not value:
+            return None
+        file = Path(value).resolve()
+        if file.suffix:
+            return file if as_path is True else file.as_posix()
+        console.print(f"{str(file)!r} doesn't appear to be a file", style="red")
+        if ask_for_input("file", True) is False:
+            return None
+        continue
+
+
 def construct_list(
     value_name: str,
     value_method: Optional[Callable] = None,
@@ -405,6 +471,10 @@ def add_calibrations(
 
 def add_software_packages(config: dict, debug: bool = False) -> dict[str, Any]:
     def get_software_name() -> str:
+        """
+        Function to interactively generate, validate, and return the name of a
+        supported software package.
+        """
         message = (
             "What is the name of the software package? Supported options: 'autotem', "
             "'epu', 'leica', 'serialem', 'tomo'"

From 9d6679a510893db21c4cfe613802f60e24102854 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 20 Nov 2024 17:59:57 +0000
Subject: [PATCH 53/91] Replaced directory creation functions with the generic
 ones

---
 src/murfey/cli/generate_config.py | 150 ++++++++++--------------------
 1 file changed, 48 insertions(+), 102 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 06d36b31..af21b64d 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -112,9 +112,7 @@ def get_folder_name(message: Optional[str] = None) -> str:
         continue
 
 
-def get_folder_path(
-    message: Optional[str] = None, as_path: bool = True
-) -> str | Path | None:
+def get_folder_path(message: Optional[str] = None) -> Path | None:
     """
     Helper function to interactively generate, validate, and return the full path
     to a folder.
@@ -128,7 +126,7 @@ def get_folder_path(
             return None
         try:
             path = Path(value).resolve()
-            return path if as_path is True else path.as_posix()
+            return path
         except Exception:
             console.print("Unable to resolve provided file path", style="red")
             if ask_for_input("file path", True) is False:
@@ -136,9 +134,7 @@ def get_folder_path(
             continue
 
 
-def get_file_path(
-    message: Optional[str] = None, as_path: bool = True
-) -> str | Path | None:
+def get_file_path(message: Optional[str] = None) -> Path | None:
     """
     Helper function to interactively generate, validate, and return the full path
     to a file.
@@ -152,7 +148,7 @@ def get_file_path(
             return None
         file = Path(value).resolve()
         if file.suffix:
-            return file if as_path is True else file.as_posix()
+            return file
         console.print(f"{str(file)!r} doesn't appear to be a file", style="red")
         if ask_for_input("file", True) is False:
             return None
@@ -258,7 +254,7 @@ def construct_dict(
     debug: bool = False,
 ) -> dict[str, Any]:
     """
-    Helper function to facilitate interative construction of a dictionary.
+    Helper function to facilitate the interative construction of a dictionary.
     """
 
     def is_type(value: str, instance: Type[Any] | tuple[Type[Any], ...]) -> bool:
@@ -272,9 +268,6 @@ def is_type(value: str, instance: Type[Any] | tuple[Type[Any], ...]) -> bool:
             eval_value = value
         return isinstance(eval_value, instance)
 
-    """
-    Start of construct_dict
-    """
     dct: dict = {}
     add_entry = ask_for_input(dict_name, False)
     key_message = f"Please enter the {key_name}"
@@ -354,7 +347,7 @@ def is_type(value: str, instance: Type[Any] | tuple[Type[Any], ...]) -> bool:
 
 def validate_value(value: Any, key: str, field: ModelField, debug: bool = False) -> Any:
     """
-    Helper function to validate the value of the desired field for a Pydantic model.
+    Helper function to validate the value of a field in the Pydantic model.
     """
     validated_value, errors = field.validate(value, {}, loc=key)
     if errors:
@@ -490,31 +483,14 @@ def get_software_name() -> str:
                 return get_software_name()
             return ""
 
-    def ask_about_xml_path() -> bool:
+    def ask_about_settings_file() -> bool:
         message = (
             "Does this software package have a settings file that needs modification? "
             "(y/n)"
         )
         return ask_for_permission(message)
 
-    def get_xml_file() -> Optional[Path]:
-        message = (
-            "What is the full file path of the settings file? This should be an "
-            "XML file."
-        )
-        xml_file = Path(prompt(message, style="yellow").strip())
-        # Validate
-        if xml_file.suffix:
-            return xml_file
-        console.print(
-            "The path entered does not point to a file.",
-            style="red",
-        )
-        if ask_for_input("settings file", True) is True:
-            return get_xml_file()
-        return None
-
-    def get_xml_tree_path() -> str:
+    def get_settings_tree_path() -> str:
         message = "What is the path through the XML file to the node to overwrite?"
         xml_tree_path = prompt(message, style="yellow").strip()
         # TODO: Currently no test cases for this method
@@ -579,12 +555,16 @@ def get_xml_tree_path() -> str:
             "provided.",
             style="italic bright_cyan",
         )
-        if ask_about_xml_path() is True:
-            xml_file = get_xml_file()
-            xml_tree_path = get_xml_tree_path()
-        else:
-            xml_file = None
-            xml_tree_path = ""
+        settings_file: Optional[Path] = (
+            get_file_path(
+                "What is the full path to the settings file? This is usually an XML file."
+            )
+            if ask_about_settings_file() is True
+            else None
+        )
+        settings_tree_path = (
+            get_settings_tree_path().split("/") if settings_file else []
+        )
 
         # Collect extensions and filename substrings
         console.print(
@@ -597,7 +577,7 @@ def get_xml_tree_path() -> str:
             "for data processing. They are listed out here.",
             style="italic bright_cyan",
         )
-        extensions_and_substrings = construct_dict(
+        extensions_and_substrings: dict[str, list[str]] = construct_dict(
             dict_name="file extension configuration",
             key_name="file extension",
             value_name="file substrings",
@@ -620,8 +600,8 @@ def get_xml_tree_path() -> str:
         # Compile keys for this package as a dict
         package_info[name] = {
             "version": version,
-            "xml_file": xml_file,
-            "xml_tree_path": xml_tree_path,
+            "settings_file": settings_file,
+            "settings_tree_path": settings_tree_path,
             "extensions_and_substrings": extensions_and_substrings,
         }
         add_input = ask_for_input(category, again=True)
@@ -639,10 +619,10 @@ def get_xml_tree_path() -> str:
         acquisition_software.append(key)
         if package_info[key]["version"]:
             software_versions[key] = package_info[key]["version"]
-        if package_info[key]["xml_file"]:
-            software_settings_output_directories[str(package_info[key]["xml_file"])] = (
-                package_info[key]["xml_tree_path"]
-            )
+        if package_info[key]["settings_file"]:
+            software_settings_output_directories[
+                str(package_info[key]["settings_file"])
+            ] = package_info[key]["settings_tree_path"]
         if package_info[key]["extensions_and_substrings"]:
             data_required_substrings[key] = package_info[key][
                 "extensions_and_substrings"
@@ -704,36 +684,8 @@ def add_data_directories(
 def add_create_directories(
     key: str, field: ModelField, debug: bool = False
 ) -> dict[str, str]:
-    def get_folder() -> str:
-        message = "Please enter the name of the folder for Murfey to create."
-        answer = prompt(message, style="yellow").lower().strip()
-        if bool(re.fullmatch(r"[\w\s\-]*", answer)) is False:
-            console.print(
-                "There are unsafe characters present in this folder name. Please "
-                "use a different one.",
-                style="red",
-            )
-            if ask_for_input("folder name", True) is True:
-                return get_folder()
-            return ""
-        return answer
-
-    def get_folder_alias() -> str:
-        message = "Please enter the name Murfey should map this folder to."
-        answer = prompt(message, style="yellow").lower().strip()
-        if bool(re.fullmatch(r"[\w\s\-]*", answer)) is False:
-            console.print(
-                "There are unsafe characters present in this folder name. Please "
-                "use a different one.",
-                style="red",
-            )
-            if ask_for_input("folder alias", True) is True:
-                return get_folder_alias()
-            return ""
-        return answer
-
     """
-    Start of add_create_directories
+    Function to populate the create_directories field.
     """
     print_field_info(field)
     category = "folder for Murfey to create"
@@ -741,8 +693,14 @@ def get_folder_alias() -> str:
         dict_name=category,
         key_name="folder alias",
         value_name="folder name",
-        key_method=get_folder_alias,
-        value_method=get_folder,
+        key_method=get_folder_name,
+        key_method_args={
+            "message": "Please enter the name Murfey should map the folder to.",
+        },
+        value_method=get_folder_name,
+        value_method_args={
+            "message": "Please enter the name of the folder for Murfey to create.",
+        },
         allow_empty_key=False,
         allow_empty_value=False,
         allow_eval=False,
@@ -765,35 +723,23 @@ def get_folder_alias() -> str:
 def add_analyse_created_directories(
     key: str, field: ModelField, debug: bool = False
 ) -> list[str]:
-    def get_folder() -> str:
-        message = "Please enter the name of the folder that Murfey is to analyse."
-        answer = prompt(message, style="yellow").lower().strip()
-        if bool(re.fullmatch(r"[\w\s\-]*", answer)) is False:
-            console.print(
-                "There are unsafe characters present in the folder name. Please "
-                "use a different folder.",
-                style="red",
-            )
-            if ask_for_input("folder name", True) is True:
-                return get_folder()
-            return ""
-        return answer
-
     """
-    Start of add_analyse_created_directories
+    Function to populate the analyse_created_directories field
     """
-    folders_to_analyse: list[str] = []
+    print_field_info(field)
     category = "folder for Murfey to analyse"
-    add_folder = ask_for_input(category, False)
-    while add_folder is True:
-        folder_name = get_folder()
-        if not folder_name:
-            console.print("No folder name provided", style="red")
-            add_folder = ask_for_input(category, True)
-            continue
-        folders_to_analyse.append(folder_name)
-        add_folder = ask_for_input(category, True)
-        continue
+    folders_to_analyse: list[str] = construct_list(
+        value_name=category,
+        value_method=get_folder_name,
+        value_method_args={
+            "message": "Please enter the name of the folder that Murfey is to analyse."
+        },
+        allow_empty=False,
+        allow_eval=False,
+        many_types=False,
+        restrict_to_types=str,
+        sort_values=True,
+    )
 
     # Validate and return
     try:

From 423460d87a596920edc61782eed8ae7d4479ca29 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 21 Nov 2024 10:06:59 +0000
Subject: [PATCH 54/91] Updated description to make it more accurate

---
 src/murfey/util/config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 1d81d559..41ec1119 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -215,8 +215,8 @@ class MachineConfig(BaseModel):
     upstream_data_download_directory: Optional[Path] = Field(
         default=None,
         description=(
-            "Name of the folder on the instrument machine to transfer files of the same "
-            "visit from other instruments to."
+            "Path to the folder on this instrument machine to transfer files from other "
+            "machines to."
         ),
     )
     upstream_data_tiff_locations: list[str] = Field(

From d90b5855083f3a30ff11fbb0ee9e3a1557372498 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 21 Nov 2024 11:42:41 +0000
Subject: [PATCH 55/91] Updated description of 'upstream_data_directories'

---
 src/murfey/util/config.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 41ec1119..712b4a98 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -207,9 +207,9 @@ class MachineConfig(BaseModel):
     upstream_data_directories: list[Path] = Field(
         default=[],
         description=(
-            "List of file paths on other instruments for Murfey to look for the current "
-            "visit under. This is primarily used for multi-instrument workflows that "
-            "use processed data from other instruments as input."
+            "List of full paths to folders on other machines for Murfey to look for the "
+            "current visit in. This is primarily used for multi-instrument workflows "
+            "that use processed data from other instruments as input."
         ),
     )
     upstream_data_download_directory: Optional[Path] = Field(

From 975f04a18b8899b53bbc69c6038d64c218c2754e Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 21 Nov 2024 11:44:22 +0000
Subject: [PATCH 56/91] Added function to handle data transfer related fields
 in config

---
 src/murfey/cli/generate_config.py | 111 +++++++++++++++++++++++++++---
 1 file changed, 102 insertions(+), 9 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index af21b64d..6c29f9c1 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -274,7 +274,7 @@ def is_type(value: str, instance: Type[Any] | tuple[Type[Any], ...]) -> bool:
     value_message = f"Please enter the {value_name}"
     while add_entry is True:
         # Add key
-        key = (
+        key = str(
             prompt(key_message, style="yellow").strip()
             if key_method is None
             else key_method(**key_method_args)
@@ -457,7 +457,7 @@ def add_calibrations(
     except ValidationError as error:
         if debug:
             console.print(error, style="red")
-        console.print(f"Failed to validate {key!r}", style="red")
+        console.print(f"Failed to validate {key!r}.", style="red")
         console.print("Returning an empty dictionary", style="red")
         return {}
 
@@ -675,7 +675,7 @@ def add_data_directories(
     except ValidationError as error:
         if debug:
             console.print(error, style="red")
-        console.print(f"Failed to validate {key!r}", style="red")
+        console.print(f"Failed to validate {key!r}.", style="red")
         if ask_for_input(category, True) is True:
             return add_data_directories(key, field, debug)
         return {}
@@ -695,7 +695,7 @@ def add_create_directories(
         value_name="folder name",
         key_method=get_folder_name,
         key_method_args={
-            "message": "Please enter the name Murfey should map the folder to.",
+            "message": "Please enter the name Murfey should remember the folder as.",
         },
         value_method=get_folder_name,
         value_method_args={
@@ -714,7 +714,7 @@ def add_create_directories(
     except ValidationError as error:
         if debug:
             console.print(error, style="red")
-        console.print(f"Failed to validate {key!r}", style="red")
+        console.print(f"Failed to validate {key!r}.", style="red")
         if ask_for_input(category, True) is True:
             return add_create_directories(key, field, debug)
         return {}
@@ -728,6 +728,7 @@ def add_analyse_created_directories(
     """
     print_field_info(field)
     category = "folder for Murfey to analyse"
+
     folders_to_analyse: list[str] = construct_list(
         value_name=category,
         value_method=get_folder_name,
@@ -747,13 +748,108 @@ def add_analyse_created_directories(
     except ValidationError as error:
         if debug:
             console.print(error, style="red")
-        console.print(f"Failed to validate {key!r}", style="red")
+        console.print(f"Failed to validate {key!r}.", style="red")
         if ask_for_input(category, True) is True:
             return add_analyse_created_directories(key, field, debug)
         return []
 
 
 def set_up_data_transfer(config: dict, debug: bool = False) -> dict:
+    """
+    Helper function to set up the data transfer fields in the configuration
+    """
+
+    def get_upstream_data_directories(
+        key: str, field: ModelField, debug: bool = False
+    ) -> list[Path]:
+        print_field_info(field)
+        category = "upstream data directory"
+        upstream_data_directories = construct_list(
+            category,
+            value_method=get_folder_path,
+            value_method_args={
+                "message": (
+                    "Please enter the full path to the data directory "
+                    "you wish to search for files in."
+                ),
+            },
+            allow_empty=False,
+            allow_eval=False,
+            many_types=False,
+            restrict_to_types=Path,
+            sort_values=True,
+        )
+        try:
+            return validate_value(upstream_data_directories, key, field, debug)
+        except ValidationError as error:
+            if debug:
+                console.print(error, style="red")
+            console.print(f"Failed to validate {key!r}.", style="red")
+            if ask_for_input(category, True) is True:
+                return get_upstream_data_directories(key, field, debug)
+            return []
+
+    def get_upstream_data_tiff_locations(
+        key: str, field: ModelField, debug: bool = False
+    ) -> list[str]:
+        print_field_info(field)
+        category = "remote folder containing TIFF files"
+        upstream_data_tiff_locations = construct_list(
+            category,
+            value_method=get_folder_name,
+            value_method_args={
+                "message": (
+                    "Please enter the name of the folder on the remote machines "
+                    "in which to search for TIFF files."
+                )
+            },
+            allow_empty=False,
+            allow_eval=False,
+            many_types=False,
+            restrict_to_types=str,
+            sort_values=True,
+        )
+        try:
+            return validate_value(upstream_data_tiff_locations, key, field, debug)
+        except ValidationError as error:
+            if debug:
+                console.print(error, style="red")
+            console.print(f"Failed to validate {key!r}.", style="red")
+            if ask_for_input(category, True) is True:
+                return get_upstream_data_tiff_locations(key, field, debug)
+            return []
+
+    """
+    Start of set_up_data_transfer
+    """
+    for key in (
+        "data_transfer_enabled",
+        "rsync_basepath",
+        "rsync_module",
+        "allow_removal",
+        "upstream_data_directories",
+        "upstream_data_download_directory",
+        "upstream_data_tiff_locations",
+    ):
+        field = MachineConfig.__fields__[key]
+        # Use populate field to process simpler keys
+        if key in (
+            "data_transfer_enabled",
+            "rsync_basepath",
+            "rsync_module",
+            "allow_removal",
+            "upstream_data_download_directory",
+        ):
+            validated_value = populate_field(key, field, debug)
+
+        # Construct more complicated data structures
+        if key == "upstream_data_directories":
+            validated_value = get_upstream_data_directories(key, field, debug)
+        if key == "upstream_data_tiff_locations":
+            validated_value = get_upstream_data_tiff_locations(key, field, debug)
+        # Add to config
+        config[key] = validated_value
+
     return config
 
 
@@ -802,12 +898,10 @@ def set_up_machine_config(debug: bool = False):
             continue
         if key == "analyse_created_directories":
             new_config[key] = add_analyse_created_directories(key, field, debug)
-            # TODO
             continue
 
         # Data transfer block
         if key == "data_transfer_enabled":
-            # TODO: Set up data transfer settings in a separate function
             new_config = set_up_data_transfer(new_config, debug)
             continue
         if key in (
@@ -855,7 +949,6 @@ def set_up_machine_config(debug: bool = False):
         """
         Standard method of inputting values
         """
-
         new_config[key] = populate_field(key, field, debug)
 
     # Validate the entire config again and convert into JSON/YAML-safe dict

From 2925f6640c9bac490d0d21ba917451f17d2cf513 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 21 Nov 2024 11:58:47 +0000
Subject: [PATCH 57/91] Added placeholders to lambda functions to make sure
 sorting criteria tuples are of same length

---
 src/murfey/cli/generate_config.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 6c29f9c1..a75aa926 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -225,9 +225,13 @@ def construct_list(
             sorted(
                 lst,
                 key=lambda v: (
-                    (0, float(v))
+                    (0, float(v), 0)
                     if isinstance(v, (int, float))
-                    else (1, abs(v), v.real) if isinstance(v, complex) else (2, str(v))
+                    else (
+                        (1, abs(v), v.real)
+                        if isinstance(v, complex)
+                        else (2, str(v), "")
+                    )
                 ),
             )
             if sort_values
@@ -329,12 +333,12 @@ def is_type(value: str, instance: Type[Any] | tuple[Type[Any], ...]) -> bool:
             for key in sorted(
                 dct.keys(),
                 key=lambda k: (
-                    (0, float(k))
+                    (0, float(k), 0)
                     if is_type(k, (int, float))
                     else (
                         (1, abs(complex(k)), complex(k).real)
                         if is_type(k, complex)
-                        else (2, str(k))
+                        else (2, str(k), "")
                     )
                 ),
             )

From 0cffe368509edcd5c0edd6f1f6c2ca7d320dbf08 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 21 Nov 2024 12:36:09 +0000
Subject: [PATCH 58/91] Allowed more functions to offer to be run again if a
 field fails validation

---
 src/murfey/cli/generate_config.py | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index a75aa926..74daca28 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -462,6 +462,8 @@ def add_calibrations(
         if debug:
             console.print(error, style="red")
         console.print(f"Failed to validate {key!r}.", style="red")
+        if ask_for_input(category, True) is True:
+            return add_calibrations(key, field, debug)
         console.print("Returning an empty dictionary", style="red")
         return {}
 
@@ -485,6 +487,7 @@ def get_software_name() -> str:
             console.print("Invalid software name.", style="red")
             if ask_for_input("software package", True) is True:
                 return get_software_name()
+            console.print("Returning an empty string.", style="red")
             return ""
 
     def ask_about_settings_file() -> bool:
@@ -647,8 +650,9 @@ def get_settings_tree_path() -> str:
             if debug:
                 console.print(error, style="red")
             console.print(f"Failed to validate {field_name!r}", style="red")
-            console.print("Please try again.", style="red")
-            return add_software_packages(config)
+            if ask_for_input("software package configuration", True) is True:
+                return add_software_packages(config)
+            console.print(f"Skipped adding {field_name!r}.", style="red")
 
     # Return updated dictionary
     return config
@@ -682,6 +686,7 @@ def add_data_directories(
         console.print(f"Failed to validate {key!r}.", style="red")
         if ask_for_input(category, True) is True:
             return add_data_directories(key, field, debug)
+        console.print("Returning an empty dictionary.", style="red")
         return {}
 
 
@@ -721,6 +726,7 @@ def add_create_directories(
         console.print(f"Failed to validate {key!r}.", style="red")
         if ask_for_input(category, True) is True:
             return add_create_directories(key, field, debug)
+        console.print("Returning an empty dictionary.", style="red")
         return {}
 
 
@@ -755,6 +761,7 @@ def add_analyse_created_directories(
         console.print(f"Failed to validate {key!r}.", style="red")
         if ask_for_input(category, True) is True:
             return add_analyse_created_directories(key, field, debug)
+        console.print("Returning an empty list.", style="red")
         return []
 
 
@@ -791,6 +798,7 @@ def get_upstream_data_directories(
             console.print(f"Failed to validate {key!r}.", style="red")
             if ask_for_input(category, True) is True:
                 return get_upstream_data_directories(key, field, debug)
+            console.print("Returning an empty list.", style="red")
             return []
 
     def get_upstream_data_tiff_locations(
@@ -821,6 +829,7 @@ def get_upstream_data_tiff_locations(
             console.print(f"Failed to validate {key!r}.", style="red")
             if ask_for_input(category, True) is True:
                 return get_upstream_data_tiff_locations(key, field, debug)
+            console.print("Returning an empty list.", style="red")
             return []
 
     """
@@ -836,21 +845,15 @@ def get_upstream_data_tiff_locations(
         "upstream_data_tiff_locations",
     ):
         field = MachineConfig.__fields__[key]
-        # Use populate field to process simpler keys
-        if key in (
-            "data_transfer_enabled",
-            "rsync_basepath",
-            "rsync_module",
-            "allow_removal",
-            "upstream_data_download_directory",
-        ):
-            validated_value = populate_field(key, field, debug)
-
         # Construct more complicated data structures
         if key == "upstream_data_directories":
-            validated_value = get_upstream_data_directories(key, field, debug)
-        if key == "upstream_data_tiff_locations":
+            validated_value: Any = get_upstream_data_directories(key, field, debug)
+        elif key == "upstream_data_tiff_locations":
             validated_value = get_upstream_data_tiff_locations(key, field, debug)
+        # Use populate field to process simpler keys
+        else:
+            validated_value = populate_field(key, field, debug)
+
         # Add to config
         config[key] = validated_value
 

From c3be80ee71327edd0746234e9e52b10a8b34a07b Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 21 Nov 2024 13:47:40 +0000
Subject: [PATCH 59/91] Added function to populate data processing fields in
 config

---
 src/murfey/cli/generate_config.py | 53 +++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 74daca28..8cbd387b 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -861,6 +861,59 @@ def get_upstream_data_tiff_locations(
 
 
 def set_up_data_processing(config: dict, debug: bool = False) -> dict:
+    """
+    Helper function to set up the data processing fields in the config.
+    """
+
+    def add_recipes(key: str, field: ModelField, debug: bool = False) -> dict[str, str]:
+        print_field_info(field)
+        category = "processing recipe"
+        recipes = construct_dict(
+            category,
+            key_name="name of the recipe",
+            value_name="name of the recipe file",
+            allow_empty_key=False,
+            allow_empty_value=False,
+            allow_eval=False,
+            sort_keys=True,
+            restrict_to_types=str,
+        )
+        try:
+            return validate_value(recipes, key, field, debug)
+        except ValidationError as error:
+            if debug:
+                console.print(error, style="red")
+            console.print(f"Failed to validate {key!r}.", style="red")
+            if ask_for_input(category, True) is True:
+                return add_recipes(key, field, debug)
+            console.print("Returning an empty dictionary.", style="red")
+            return {}
+
+    """
+    Start of set_up_data_processing
+    """
+    # Process in order
+    for key in (
+        "processing_enabled",
+        "process_by_default",
+        "gain_directory_name",
+        "processed_directory_name",
+        "processed_extra_directory",
+        "recipes",
+        "modular_spa",
+        "default_model",
+        "model_search_directory",
+        "initial_model_search_directory",
+    ):
+        field = MachineConfig.__fields__[key]
+        # Handle complex keys
+        if key == "recipes":
+            validated_value: Any = add_recipes(key, field, debug)
+        # Populate fields of simple keys
+        else:
+            validated_value = populate_field(key, field, debug)
+        config[key] = validated_value
+
     return config
 
 

From ae8a9fe286e26b7287bb93e542313b6f2c5dbe72 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 21 Nov 2024 16:18:40 +0000
Subject: [PATCH 60/91] Adjusted order of some keys in MachineConfig

---
 src/murfey/util/config.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 712b4a98..64b44cf4 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -180,6 +180,9 @@ class MachineConfig(BaseModel):
         description=("Toggle whether to enable data transfer via rsync."),
         # NOTE: Only request input for this code block if data transfer is enabled
     )
+    allow_removal: bool = Field(
+        default=False, description="Allow original files to be removed after rsync."
+    )
     rsync_basepath: Path = Field(
         default=Path("/"),
         description=(
@@ -199,9 +202,6 @@ class MachineConfig(BaseModel):
             "different sub-folders to save the data to."
         ),
     )
-    allow_removal: bool = Field(
-        default=False, description="Allow original files to be removed after rsync."
-    )
 
     # Related visits and data
     upstream_data_directories: list[Path] = Field(
@@ -361,6 +361,14 @@ class MachineConfig(BaseModel):
     """
     Server and network-related configurations
     """
+    # Security-related keys
+    global_configuration_path: Optional[Path] = Field(
+        default=None,
+        description=(
+            "Full file path to the YAML file containing the configurations for the "
+            "Murfey server."
+        ),
+    )
     # Network connections
     frontend_url: str = Field(
         default="http://localhost:3000",
@@ -374,15 +382,6 @@ class MachineConfig(BaseModel):
         default="http://localhost:8001",
         description="URL to the instrument server.",
     )
-
-    # Security-related keys
-    global_configuration_path: Optional[Path] = Field(
-        default=None,
-        description=(
-            "Full file path to the YAML file containing the configurations for the "
-            "Murfey server."
-        ),
-    )
     auth_url: str = Field(
         default="",
         description="URL to where users can authenticate their Murfey sessions.",

From 8878244ebc78b646a875cee645583a7eb6e49dd8 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 21 Nov 2024 16:44:17 +0000
Subject: [PATCH 61/91] Updated descriptionof 'external_environment' field in
 MachineConfig

---
 src/murfey/util/config.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 64b44cf4..920980c4 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -314,7 +314,6 @@ class MachineConfig(BaseModel):
             "Murfey will look for the folders under the current visit."
         ),
     )
-
     initial_model_search_directory: str = Field(
         default="processing/initial_model",  # User-uploaded electron density models
         description=(
@@ -345,9 +344,11 @@ class MachineConfig(BaseModel):
     external_environment: dict[str, str] = Field(
         default={},
         description=(
-            "Dictionary containing full paths to supporting files and executables that "
-            "are needed to run the executables to be used. These paths will be added "
-            "to the $PATH environment variable."
+            "Dictionary containing full paths to folders containing the supporting "
+            "software needed to run the executables to be used. These paths will be "
+            "appended to the $PATH environment variable, so if multiple paths are "
+            "associated with a single executable, they need to be provided as colon-"
+            "separated strings. E.g. /this/is/one/folder:/this/is/another/one"
         ),
     )
     plugin_packages: dict[str, Path] = Field(

From 1f6a774a7de82284ff99b02826836da68350e6f6 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 21 Nov 2024 17:08:19 +0000
Subject: [PATCH 62/91] Added functions to handle external executables and
 environments, as well as Murfey plugins

---
 src/murfey/cli/generate_config.py | 103 +++++++++++++++++++++++++++---
 1 file changed, 95 insertions(+), 8 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 8cbd387b..54d28486 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -917,8 +917,94 @@ def add_recipes(key: str, field: ModelField, debug: bool = False) -> dict[str, s
     return config
 
 
-def set_up_external_executables(config: dict, debug: bool = False) -> dict:
-    return config
+def add_external_executables(
+    key: str, field: ModelField, debug: bool = False
+) -> dict[str, Path]:
+    print_field_info(field)
+    category = "external executable"
+    external_executables = construct_dict(
+        dict_name=category,
+        key_name="name of the executable",
+        value_name="full file path to the executable",
+        value_method=get_folder_path,
+        value_method_args={
+            "message": ("Please enter the full file path to the executable"),
+        },
+        allow_empty_key=False,
+        allow_empty_value=False,
+        allow_eval=False,
+        sort_keys=True,
+        restrict_to_types=Path,
+    )
+    try:
+        return validate_value(external_executables, key, field, debug)
+    except ValidationError as error:
+        if debug:
+            console.print(error, style="red")
+        console.print(f"Failed to validate {key!r}.", style="red")
+        if ask_for_input(category, True) is True:
+            return add_external_executables(key, field, debug)
+        console.print("Returning an empty dictionary.", style="red")
+        return {}
+
+
+def add_external_environment(
+    key: str, field: ModelField, debug: bool = False
+) -> dict[str, str]:
+    print_field_info(field)
+    category = "external environment"
+    external_environment = construct_dict(
+        dict_name=category,
+        key_name="name of the environment",
+        value_name="full path to the folder",
+        allow_empty_key=False,
+        allow_empty_value=False,
+        allow_eval=False,
+        sort_keys=True,
+        restrict_to_types=str,
+    )
+    try:
+        return validate_value(external_environment, key, field, debug)
+    except ValidationError as error:
+        if debug:
+            console.print(error, style="red")
+        console.print(f"Failed to validate {key!r}.", style="red")
+        if ask_for_input(category, True) is True:
+            return add_external_environment(key, field, debug)
+        console.print("Returning an empty dictionary.", style="red")
+        return {}
+
+
+def add_murfey_plugins(key: str, field: ModelField, debug: bool = False) -> dict:
+    """
+    Helper function to set up the Murfey plugins field in the config.
+    """
+    print_field_info(field)
+    category = "Murfey plugin package"
+    plugins = construct_dict(
+        dict_name=category,
+        key_name="name of the plugin",
+        value_name="full file path to the plugin",
+        value_method=get_file_path,
+        value_method_args={
+            "message": "Please enter the full file path to the plugin.",
+        },
+        allow_empty_key=False,
+        allow_empty_value=False,
+        allow_eval=False,
+        sort_keys=True,
+        restrict_to_types=Path,
+    )
+    try:
+        return validate_value(plugins, key, field, debug)
+    except ValidationError as error:
+        if debug:
+            console.print(error, style="red")
+        console.print(f"Failed to validate {key!r}.", style="red")
+        if ask_for_input(category, True) is True:
+            return add_murfey_plugins(key, field, debug)
+        console.print("Returning an empty dictionary.", style="red")
+        return {}
 
 
 def set_up_machine_config(debug: bool = False):
@@ -994,16 +1080,17 @@ def set_up_machine_config(debug: bool = False):
         # End of data processing block
 
         # External plugins and executables block
-        if key == "external_executables":
-            # TODO: Set up external plugins and exectuables
-            new_config = set_up_external_executables(new_config, debug)
+        if key in ("external_executables", "external_executables_eer"):
+            new_config[key] = add_external_executables(key, field, debug)
             continue
-        if key in ("external_executables_eer", "external_environment"):
+        if key == "external_environment":
+            new_config[key] = add_external_environment(key, field, debug)
             continue
         # End of external executables block
 
         if key == "plugin_packages":
-            # TODO
+            # TODO:
+            new_config[key] = add_murfey_plugins(key, field, debug)
             continue
 
         """
@@ -1064,7 +1151,7 @@ def set_up_machine_config(debug: bool = False):
         # Overwrite
         master_config = old_config
     with open(config_file, "w") as save_file:
-        yaml.dump(master_config, save_file, default_flow_style=False)
+        yaml.dump(master_config, save_file, default_flow_style=False, sort_keys=False)
     console.print(
         f"Machine configuration for {new_config_safe['instrument_name']!r} "
         f"successfully saved as {str(config_file)!r}",

From 6ffd694eaab607267b0b1c3eda51545e950eb4f1 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 21 Nov 2024 17:09:38 +0000
Subject: [PATCH 63/91] Forgot to remove a #TODO comment

---
 src/murfey/cli/generate_config.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 54d28486..72ef2d0e 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -1089,7 +1089,6 @@ def set_up_machine_config(debug: bool = False):
         # End of external executables block
 
         if key == "plugin_packages":
-            # TODO:
             new_config[key] = add_murfey_plugins(key, field, debug)
             continue
 

From 0dae68a0c4c36180a4b5bcbbd0adbf02a02fdbfd Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 21 Nov 2024 17:14:54 +0000
Subject: [PATCH 64/91] Added logic to write machine configs to YAML file in
 alphabetical order of instrument name

---
 src/murfey/cli/generate_config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 72ef2d0e..b9765b03 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -1147,8 +1147,8 @@ def set_up_machine_config(debug: bool = False):
             # Add new machine config
             else:
                 old_config[key] = master_config[key]
-        # Overwrite
-        master_config = old_config
+        # Regenerate dictionary and store machine configs alphabetically
+        master_config = {key: old_config[key] for key in sorted(old_config.keys())}
     with open(config_file, "w") as save_file:
         yaml.dump(master_config, save_file, default_flow_style=False, sort_keys=False)
     console.print(

From d97766252fde7b9f053e4b68de8d12d80004a20d Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 21 Nov 2024 18:09:52 +0000
Subject: [PATCH 65/91] Add alias to 'global_configuration_path' to facilitate
 shift away from 'security_configuration_path'

---
 src/murfey/util/config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 920980c4..3782c60e 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -369,6 +369,7 @@ class MachineConfig(BaseModel):
             "Full file path to the YAML file containing the configurations for the "
             "Murfey server."
         ),
+        alias="security_configuration_path",
     )
     # Network connections
     frontend_url: str = Field(

From c543f6a62ad948f24a516a2c0b03835b06e33345 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 21 Nov 2024 18:50:05 +0000
Subject: [PATCH 66/91] Added a welcome panel to the setup tool; adjusted font
 colours

---
 src/murfey/cli/generate_config.py | 163 ++++++++++++++++++------------
 src/murfey/util/config.py         |   2 +-
 2 files changed, 98 insertions(+), 67 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index b9765b03..a2f2e113 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -12,6 +12,8 @@
 from pydantic.error_wrappers import ErrorWrapper
 from pydantic.fields import ModelField, UndefinedType
 from rich.console import Console
+from rich.panel import Panel
+from rich.text import Text
 
 from murfey.util.config import MachineConfig
 
@@ -31,18 +33,42 @@ def prompt(message: str, style: str = "") -> str:
     return input("> ")
 
 
+def print_welcome_message():
+    welcome_message = Text(
+        "Welcome to the Murfey configuration setup tool!", style="bold bright_magenta"
+    )
+    panel_content = Text()
+    panel_content.append(
+        "This tool will walk you through the process of setting up Murfey's "
+        "configuration file for your instrument, allowing you to supercharge "
+        "your data processing pipeline with Murfey's capacity for automated "
+        "data transfer and data processing coordination across devices.",
+        style="bright_white",
+    )
+    panel = Panel(
+        panel_content,
+        expand=True,
+    )
+    console.rule(welcome_message)
+    console.print(panel, justify="center")
+    console.rule()
+
+    prompt("Press any key to begin the setup")
+
+
 def print_field_info(field: ModelField):
     """
     Helper function to print out the name of the key being set up, along with a short
     description of what purpose the key serves.
     """
+    console.print()
     console.print(
         f"{field.name.replace('_', ' ').title()} ({field.name})",
         style="bold bright_cyan",
     )
-    console.print(field.field_info.description, style="italic bright_cyan")
+    console.print(field.field_info.description, style="bright_white")
     if not isinstance(field.field_info.default, UndefinedType):
-        console.print(f"Default: {field.field_info.default!r}", style="bright_cyan")
+        console.print(f"Default: {field.field_info.default!r}", style="bold white")
 
 
 def ask_for_permission(message: str) -> bool:
@@ -50,12 +76,12 @@ def ask_for_permission(message: str) -> bool:
     Helper function to generate a Boolean based on user input
     """
     while True:
-        answer = prompt(message, style="yellow").lower().strip()
+        answer = prompt(message, style="bright_yellow").lower().strip()
         if answer in ("y", "yes"):
             return True
         if answer in ("n", "no"):
             return False
-        console.print("Invalid input. Please try again.", style="red")
+        console.print("Invalid input. Please try again.", style="bright_red")
         continue
 
 
@@ -99,13 +125,13 @@ def get_folder_name(message: Optional[str] = None) -> str:
     """
     while True:
         message = "Please enter the folder name." if message is None else message
-        value = prompt(message, style="yellow").strip()
+        value = prompt(message, style="bright_yellow").strip()
         if bool(re.fullmatch(r"[\w\s\-]*", value)) is True:
             return value
         console.print(
             "There are unsafe characters present in this folder name. Please "
             "use a different one.",
-            style="red",
+            style="bright_red",
         )
         if ask_for_input("folder name", True) is False:
             return ""
@@ -121,14 +147,14 @@ def get_folder_path(message: Optional[str] = None) -> Path | None:
         message = (
             "Please enter the full path to the folder." if message is None else message
         )
-        value = prompt(message, style="yellow").strip()
+        value = prompt(message, style="bright_yellow").strip()
         if not value:
             return None
         try:
             path = Path(value).resolve()
             return path
         except Exception:
-            console.print("Unable to resolve provided file path", style="red")
+            console.print("Unable to resolve provided file path", style="bright_red")
             if ask_for_input("file path", True) is False:
                 return None
             continue
@@ -143,13 +169,13 @@ def get_file_path(message: Optional[str] = None) -> Path | None:
         message = (
             "Please enter the full path to the file." if message is None else message
         )
-        value = prompt(message, style="yellow").strip()
+        value = prompt(message, style="bright_yellow").strip()
         if not value:
             return None
         file = Path(value).resolve()
         if file.suffix:
             return file
-        console.print(f"{str(file)!r} doesn't appear to be a file", style="red")
+        console.print(f"{str(file)!r} doesn't appear to be a file", style="bright_red")
         if ask_for_input("file", True) is False:
             return None
         continue
@@ -177,14 +203,14 @@ def construct_list(
                 "Please enter "
                 + ("an" if value_name.startswith(("a", "e", "i", "o", "u")) else "a")
                 + f" {value_name}",
-                style="yellow",
+                style="bright_yellow",
             )
             if value_method is None
             else value_method(**value_method_args)
         )
         # Reject empty inputs if set
         if not value and not allow_empty:
-            console.print("No value provided.", style="red")
+            console.print("No value provided.", style="bright_red")
             add_entry = ask_for_input(value_name, True)
             continue
         # Convert values if set
@@ -202,7 +228,7 @@ def construct_list(
             if not isinstance(eval_value, allowed_types):
                 console.print(
                     f"The provided value ({type(eval_value)}) is not an allowed type.",
-                    style="red",
+                    style="bright_red",
                 )
                 add_entry = ask_for_input(value_name, True)
                 continue
@@ -216,7 +242,7 @@ def construct_list(
             console.print(
                 "The provided value is of a different type to the other members. It "
                 "won't be added to the list.",
-                style="red",
+                style="bright_red",
             )
             lst = lst[:-1]
         # Sort values if set
@@ -279,13 +305,13 @@ def is_type(value: str, instance: Type[Any] | tuple[Type[Any], ...]) -> bool:
     while add_entry is True:
         # Add key
         key = str(
-            prompt(key_message, style="yellow").strip()
+            prompt(key_message, style="bright_yellow").strip()
             if key_method is None
             else key_method(**key_method_args)
         )
         # Reject empty keys if set
         if not allow_empty_key and not key:
-            console.print(f"No {key_name} provided.", style="red")
+            console.print(f"No {key_name} provided.", style="bright_red")
             add_entry = ask_for_input(dict_name, True)
             continue
         # Confirm overwrite key on duplicate
@@ -295,13 +321,13 @@ def is_type(value: str, instance: Type[Any] | tuple[Type[Any], ...]) -> bool:
                 continue
         # Add value
         value = (
-            prompt(value_message, style="yellow").strip()
+            prompt(value_message, style="bright_yellow").strip()
             if value_method is None
             else value_method(**value_method_args)
         )
         # Reject empty values if set
         if not allow_empty_value and not value:
-            console.print("No value provided", style="red")
+            console.print("No value provided", style="bright_red")
             add_entry = ask_for_input(dict_name, True)
             continue
         # Convert values if set
@@ -317,7 +343,9 @@ def is_type(value: str, instance: Type[Any] | tuple[Type[Any], ...]) -> bool:
                 else restrict_to_types
             )
             if not isinstance(eval_value, allowed_types):
-                console.print("The value is not of an allowed type.", style="red")
+                console.print(
+                    "The value is not of an allowed type.", style="bright_red"
+                )
                 add_entry = ask_for_input(dict_name, True)
                 continue
         # Assign value to key
@@ -376,7 +404,7 @@ def populate_field(key: str, field: ModelField, debug: bool = False) -> Any:
     message = "Please provide a value (press Enter to leave it blank as '')."
     while True:
         # Get value
-        answer = prompt(message, style="yellow")
+        answer = prompt(message, style="bright_yellow")
         # Translate empty string into None for fields that take Path values
         value = (
             None
@@ -389,7 +417,7 @@ def populate_field(key: str, field: ModelField, debug: bool = False) -> Any:
             return validate_value(value, key, field, debug)
         except ValidationError as error:
             if debug:
-                console.print(error, style="red")
+                console.print(error, style="bright_red")
             console.print(f"Invalid input for {key!r}. Please try again")
             continue
 
@@ -414,13 +442,13 @@ def add_calibrations(
     while add_calibration is True:
         calibration_type = prompt(
             "What type of calibration settings are you providing?",
-            style="yellow",
+            style="bright_yellow",
         ).lower()
         # Check if it's a known type of calibration
         if calibration_type not in known_calibrations.keys():
             console.print(
                 f"{calibration_type} is not a known type of calibration",
-                style="red",
+                style="bright_red",
             )
             add_calibration = ask_for_input(category, True)
             continue
@@ -460,11 +488,11 @@ def add_calibrations(
         return validate_value(calibrations, key, field, debug)
     except ValidationError as error:
         if debug:
-            console.print(error, style="red")
-        console.print(f"Failed to validate {key!r}.", style="red")
+            console.print(error, style="bright_red")
+        console.print(f"Failed to validate {key!r}.", style="bright_red")
         if ask_for_input(category, True) is True:
             return add_calibrations(key, field, debug)
-        console.print("Returning an empty dictionary", style="red")
+        console.print("Returning an empty dictionary", style="bright_red")
         return {}
 
 
@@ -478,16 +506,16 @@ def get_software_name() -> str:
             "What is the name of the software package? Supported options: 'autotem', "
             "'epu', 'leica', 'serialem', 'tomo'"
         )
-        name = prompt(message, style="yellow").lower().strip()
+        name = prompt(message, style="bright_yellow").lower().strip()
         # Validate name against "acquisition_software" field
         try:
             field = MachineConfig.__fields__["acquisition_software"]
             return validate_value([name], "acquisition_software", field, False)[0]
         except ValidationError:
-            console.print("Invalid software name.", style="red")
+            console.print("Invalid software name.", style="bright_red")
             if ask_for_input("software package", True) is True:
                 return get_software_name()
-            console.print("Returning an empty string.", style="red")
+            console.print("Returning an empty string.", style="bright_red")
             return ""
 
     def ask_about_settings_file() -> bool:
@@ -499,7 +527,7 @@ def ask_about_settings_file() -> bool:
 
     def get_settings_tree_path() -> str:
         message = "What is the path through the XML file to the node to overwrite?"
-        xml_tree_path = prompt(message, style="yellow").strip()
+        xml_tree_path = prompt(message, style="bright_yellow").strip()
         # TODO: Currently no test cases for this method
         return xml_tree_path
 
@@ -547,7 +575,7 @@ def get_settings_tree_path() -> str:
         version = prompt(
             "What is the version number of this software package? Press Enter to leave "
             "it blank if you're unsure.",
-            style="yellow",
+            style="bright_yellow",
         )
 
         # Collect settings files and modifications
@@ -648,11 +676,11 @@ def get_settings_tree_path() -> str:
             config[field_name] = validate_value(value, field_name, field, debug)
         except ValidationError as error:
             if debug:
-                console.print(error, style="red")
-            console.print(f"Failed to validate {field_name!r}", style="red")
+                console.print(error, style="bright_red")
+            console.print(f"Failed to validate {field_name!r}", style="bright_red")
             if ask_for_input("software package configuration", True) is True:
                 return add_software_packages(config)
-            console.print(f"Skipped adding {field_name!r}.", style="red")
+            console.print(f"Skipped adding {field_name!r}.", style="bright_red")
 
     # Return updated dictionary
     return config
@@ -682,11 +710,11 @@ def add_data_directories(
         return validate_value(data_directories, key, field, debug)
     except ValidationError as error:
         if debug:
-            console.print(error, style="red")
-        console.print(f"Failed to validate {key!r}.", style="red")
+            console.print(error, style="bright_red")
+        console.print(f"Failed to validate {key!r}.", style="bright_red")
         if ask_for_input(category, True) is True:
             return add_data_directories(key, field, debug)
-        console.print("Returning an empty dictionary.", style="red")
+        console.print("Returning an empty dictionary.", style="bright_red")
         return {}
 
 
@@ -722,11 +750,11 @@ def add_create_directories(
         return validate_value(folders_to_create, key, field, debug)
     except ValidationError as error:
         if debug:
-            console.print(error, style="red")
-        console.print(f"Failed to validate {key!r}.", style="red")
+            console.print(error, style="bright_red")
+        console.print(f"Failed to validate {key!r}.", style="bright_red")
         if ask_for_input(category, True) is True:
             return add_create_directories(key, field, debug)
-        console.print("Returning an empty dictionary.", style="red")
+        console.print("Returning an empty dictionary.", style="bright_red")
         return {}
 
 
@@ -757,11 +785,11 @@ def add_analyse_created_directories(
         return sorted(validate_value(folders_to_analyse, key, field, debug))
     except ValidationError as error:
         if debug:
-            console.print(error, style="red")
-        console.print(f"Failed to validate {key!r}.", style="red")
+            console.print(error, style="bright_red")
+        console.print(f"Failed to validate {key!r}.", style="bright_red")
         if ask_for_input(category, True) is True:
             return add_analyse_created_directories(key, field, debug)
-        console.print("Returning an empty list.", style="red")
+        console.print("Returning an empty list.", style="bright_red")
         return []
 
 
@@ -794,11 +822,11 @@ def get_upstream_data_directories(
             return validate_value(upstream_data_directories, key, field, debug)
         except ValidationError as error:
             if debug:
-                console.print(error, style="red")
-            console.print(f"Failed to validate {key!r}.", style="red")
+                console.print(error, style="bright_red")
+            console.print(f"Failed to validate {key!r}.", style="bright_red")
             if ask_for_input(category, True) is True:
                 return get_upstream_data_directories(key, field, debug)
-            console.print("Returning an empty list.", style="red")
+            console.print("Returning an empty list.", style="bright_red")
             return []
 
     def get_upstream_data_tiff_locations(
@@ -825,11 +853,11 @@ def get_upstream_data_tiff_locations(
             return validate_value(upstream_data_tiff_locations, key, field, debug)
         except ValidationError as error:
             if debug:
-                console.print(error, style="red")
-            console.print(f"Failed to validate {key!r}.", style="red")
+                console.print(error, style="bright_red")
+            console.print(f"Failed to validate {key!r}.", style="bright_red")
             if ask_for_input(category, True) is True:
                 return get_upstream_data_tiff_locations(key, field, debug)
-            console.print("Returning an empty list.", style="red")
+            console.print("Returning an empty list.", style="bright_red")
             return []
 
     """
@@ -882,11 +910,11 @@ def add_recipes(key: str, field: ModelField, debug: bool = False) -> dict[str, s
             return validate_value(recipes, key, field, debug)
         except ValidationError as error:
             if debug:
-                console.print(error, style="red")
-            console.print(f"Failed to validate {key!r}.", style="red")
+                console.print(error, style="bright_red")
+            console.print(f"Failed to validate {key!r}.", style="bright_red")
             if ask_for_input(category, True) is True:
                 return add_recipes(key, field, debug)
-            console.print("Returning an empty dictionary.", style="red")
+            console.print("Returning an empty dictionary.", style="bright_red")
             return {}
 
     """
@@ -940,11 +968,11 @@ def add_external_executables(
         return validate_value(external_executables, key, field, debug)
     except ValidationError as error:
         if debug:
-            console.print(error, style="red")
-        console.print(f"Failed to validate {key!r}.", style="red")
+            console.print(error, style="bright_red")
+        console.print(f"Failed to validate {key!r}.", style="bright_red")
         if ask_for_input(category, True) is True:
             return add_external_executables(key, field, debug)
-        console.print("Returning an empty dictionary.", style="red")
+        console.print("Returning an empty dictionary.", style="bright_red")
         return {}
 
 
@@ -967,11 +995,11 @@ def add_external_environment(
         return validate_value(external_environment, key, field, debug)
     except ValidationError as error:
         if debug:
-            console.print(error, style="red")
-        console.print(f"Failed to validate {key!r}.", style="red")
+            console.print(error, style="bright_red")
+        console.print(f"Failed to validate {key!r}.", style="bright_red")
         if ask_for_input(category, True) is True:
             return add_external_environment(key, field, debug)
-        console.print("Returning an empty dictionary.", style="red")
+        console.print("Returning an empty dictionary.", style="bright_red")
         return {}
 
 
@@ -999,11 +1027,11 @@ def add_murfey_plugins(key: str, field: ModelField, debug: bool = False) -> dict
         return validate_value(plugins, key, field, debug)
     except ValidationError as error:
         if debug:
-            console.print(error, style="red")
-        console.print(f"Failed to validate {key!r}.", style="red")
+            console.print(error, style="bright_red")
+        console.print(f"Failed to validate {key!r}.", style="bright_red")
         if ask_for_input(category, True) is True:
             return add_murfey_plugins(key, field, debug)
-        console.print("Returning an empty dictionary.", style="red")
+        console.print("Returning an empty dictionary.", style="bright_red")
         return {}
 
 
@@ -1011,6 +1039,9 @@ def set_up_machine_config(debug: bool = False):
     """
     Main function which runs through the setup process.
     """
+
+    print_welcome_message()
+
     new_config: dict = {}
     for key, field in MachineConfig.__fields__.items():
         """
@@ -1102,9 +1133,9 @@ def set_up_machine_config(debug: bool = False):
         new_config_safe: dict = json.loads(MachineConfig(**new_config).json())
     except ValidationError as exception:
         # Print out validation errors found
-        console.print("Validation failed", style="red")
+        console.print("Validation failed", style="bright_red")
         for error in exception.errors():
-            console.print(f"{error}", style="red")
+            console.print(f"{error}", style="bright_red")
         # Offer to redo the setup, otherwise quit setup
         if ask_for_input("machine configuration", True) is True:
             return set_up_machine_config(debug)
@@ -1119,10 +1150,10 @@ def set_up_machine_config(debug: bool = False):
     console.print("Machine config successfully validated.", style="green")
     config_name = prompt(
         "What would you like to name the file? (E.g. 'my_machine_config')",
-        style="yellow",
+        style="bright_yellow",
     )
     config_path = Path(
-        prompt("Where would you like to save this config?", style="yellow")
+        prompt("Where would you like to save this config?", style="bright_yellow")
     )
     config_file = config_path / f"{config_name}.yaml"
     config_path.mkdir(parents=True, exist_ok=True)
@@ -1133,7 +1164,7 @@ def set_up_machine_config(debug: bool = False):
             try:
                 old_config: dict[str, dict] = yaml.safe_load(existing_file)
             except yaml.YAMLError as error:
-                console.print(error, style="red")
+                console.print(error, style="bright_red")
                 # Provide option to quit or try again
                 if ask_for_input("machine configuration", True) is True:
                     return set_up_machine_config(debug)
diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 3782c60e..7c3bdb62 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -348,7 +348,7 @@ class MachineConfig(BaseModel):
             "software needed to run the executables to be used. These paths will be "
             "appended to the $PATH environment variable, so if multiple paths are "
             "associated with a single executable, they need to be provided as colon-"
-            "separated strings. E.g. /this/is/one/folder:/this/is/another/one"
+            "separated strings. E.g. '/this/is/one/folder:/this/is/another/one'"
         ),
     )
     plugin_packages: dict[str, Path] = Field(

From 8db6865c6a3705266053cd41b72a0855362fb332 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 18 Dec 2024 17:54:33 +0000
Subject: [PATCH 67/91] Put y/n prompt in a different colour

---
 src/murfey/cli/generate_config.py | 8 ++++----
 src/murfey/util/config.py         | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index a2f2e113..03aa4d5a 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -98,7 +98,7 @@ def ask_for_input(parameter: str, again: bool = False):
                 "an" if parameter.lower().startswith(("a", "e", "i", "o", "u")) else "a"
             )
         )
-        + f" {parameter}? (y/n)"
+        + f" {parameter}? [bold cyan](y/n)[/bold cyan]"
     )
     return ask_for_permission(message)
 
@@ -107,7 +107,7 @@ def confirm_overwrite(value: str):
     """
     Asks the user if a value that already exists should be overwritten.
     """
-    message = f"{value!r} already exists; do you wish to overwrite it? (y/n)"
+    message = f"{value!r} already exists; do you wish to overwrite it? [bold cyan](y/n)[/bold cyan]"
     return ask_for_permission(message)
 
 
@@ -115,7 +115,7 @@ def confirm_duplicate(value: str):
     """
     Asks the user if a duplicate value should be allowed.
     """
-    message = f"{value!r} already exists; do you want to add a duplicate? (y/n)"
+    message = f"{value!r} already exists; do you want to add a duplicate? [bold cyan](y/n)[/bold cyan]"
     return ask_for_permission(message)
 
 
@@ -521,7 +521,7 @@ def get_software_name() -> str:
     def ask_about_settings_file() -> bool:
         message = (
             "Does this software package have a settings file that needs modification? "
-            "(y/n)"
+            "[bold cyan](y/n)[/bold cyan]"
         )
         return ask_for_permission(message)
 
diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 1746ee3b..fb1a998a 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -165,7 +165,7 @@ class MachineConfig(BaseModel):
             "File path template that can be provided if the EER fractionation files "
             "are saved in a location separate from the rest of the data. This will "
             "be a string, with '{visit}' and '{year}' being optional arguments that "
-            "can be embedded in the string. E.g.: /home/user/data/{year}/{visit}"
+            "can be embedded in the string. E.g.: '/home/user/data/{year}/{visit}'"
         ),
         # Only if Falcon is used
         # To avoid others having to follow the {year}/{visit} format we are doing

From 094470eb3bfa066e0aa26b84228eb0939d8b9637 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 19 Dec 2024 12:09:16 +0000
Subject: [PATCH 68/91] Allow setup wizard to skip sections if they are
 disabled; updated font styles of config key descriptions

---
 src/murfey/cli/generate_config.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/generate_config.py
index 03aa4d5a..76efa5a0 100644
--- a/src/murfey/cli/generate_config.py
+++ b/src/murfey/cli/generate_config.py
@@ -98,7 +98,7 @@ def ask_for_input(parameter: str, again: bool = False):
                 "an" if parameter.lower().startswith(("a", "e", "i", "o", "u")) else "a"
             )
         )
-        + f" {parameter}? [bold cyan](y/n)[/bold cyan]"
+        + f" {parameter}? [bold bright_magenta](y/n)[/bold bright_magenta]"
     )
     return ask_for_permission(message)
 
@@ -107,7 +107,7 @@ def confirm_overwrite(value: str):
     """
     Asks the user if a value that already exists should be overwritten.
     """
-    message = f"{value!r} already exists; do you wish to overwrite it? [bold cyan](y/n)[/bold cyan]"
+    message = f"{value!r} already exists; do you wish to overwrite it? [bold bright_magenta](y/n)[/bold bright_magenta]"
     return ask_for_permission(message)
 
 
@@ -115,7 +115,7 @@ def confirm_duplicate(value: str):
     """
     Asks the user if a duplicate value should be allowed.
     """
-    message = f"{value!r} already exists; do you want to add a duplicate? [bold cyan](y/n)[/bold cyan]"
+    message = f"{value!r} already exists; do you want to add a duplicate? [bold bright_magenta](y/n)[/bold bright_magenta]"
     return ask_for_permission(message)
 
 
@@ -418,7 +418,9 @@ def populate_field(key: str, field: ModelField, debug: bool = False) -> Any:
         except ValidationError as error:
             if debug:
                 console.print(error, style="bright_red")
-            console.print(f"Invalid input for {key!r}. Please try again")
+            console.print(
+                f"Invalid input for {key!r}. Please try again", style="bright_red"
+            )
             continue
 
 
@@ -521,7 +523,7 @@ def get_software_name() -> str:
     def ask_about_settings_file() -> bool:
         message = (
             "Does this software package have a settings file that needs modification? "
-            "[bold cyan](y/n)[/bold cyan]"
+            "[bold bright_magenta](y/n)[/bold bright_magenta]"
         )
         return ask_for_permission(message)
 
@@ -542,7 +544,7 @@ def get_settings_tree_path() -> str:
         "This is where aquisition software packages present on the instrument machine "
         "can be specified, along with the output file names and extensions that are of "
         "interest.",
-        style="italic bright_cyan",
+        style="bright_white",
     )
     package_info: dict = {}
     category = "software package"
@@ -555,7 +557,7 @@ def get_settings_tree_path() -> str:
         )
         console.print(
             "Name of the acquisition software installed on this instrument.",
-            style="italic bright_cyan",
+            style="bright_white",
         )
         console.print(
             "Options: 'autotem', 'epu', 'leica', 'serialem', 'tomo'",
@@ -588,7 +590,7 @@ def get_settings_tree_path() -> str:
             "in order to ensure files are saved to the desired folders. The paths to "
             "the files and the path to the nodes in the settings files both need to be "
             "provided.",
-            style="italic bright_cyan",
+            style="bright_white",
         )
         settings_file: Optional[Path] = (
             get_file_path(
@@ -610,7 +612,7 @@ def get_settings_tree_path() -> str:
             "Different software packages will generate different output files. Only "
             "files with certain extensions and keywords in their filenames are needed "
             "for data processing. They are listed out here.",
-            style="italic bright_cyan",
+            style="bright_white",
         )
         extensions_and_substrings: dict[str, list[str]] = construct_dict(
             dict_name="file extension configuration",
@@ -873,6 +875,9 @@ def get_upstream_data_tiff_locations(
         "upstream_data_tiff_locations",
     ):
         field = MachineConfig.__fields__[key]
+        # Skip everything in this section if data transfer is set to False
+        if config.get("data_transfer_enabled", None) is False:
+            continue
         # Construct more complicated data structures
         if key == "upstream_data_directories":
             validated_value: Any = get_upstream_data_directories(key, field, debug)
@@ -934,6 +939,9 @@ def add_recipes(key: str, field: ModelField, debug: bool = False) -> dict[str, s
         "initial_model_search_directory",
     ):
         field = MachineConfig.__fields__[key]
+        # Skip this section of processing is disabled
+        if config.get("processing_enabled", None) is False:
+            continue
         # Handle complex keys
         if key == "recipes":
             validated_value: Any = add_recipes(key, field, debug)

From 043046e506f89640751d9b773d0c23dfb96861d6 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 19 Dec 2024 15:20:12 +0000
Subject: [PATCH 69/91] Renamed 'generate_config' to 'create_config'

---
 pyproject.toml                                          | 2 +-
 src/murfey/cli/{generate_config.py => create_config.py} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename src/murfey/cli/{generate_config.py => create_config.py} (100%)

diff --git a/pyproject.toml b/pyproject.toml
index 2e481e28..d0c56a6f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,11 +80,11 @@ GitHub = "https://github.com/DiamondLightSource/python-murfey"
 [project.scripts]
 murfey = "murfey.client:run"
 "murfey.add_user" = "murfey.cli.add_user:run"
+"murfey.create_config" = "murfey.cli.create_config:run"
 "murfey.create_db" = "murfey.cli.create_db:run"
 "murfey.db_sql" = "murfey.cli.murfey_db_sql:run"
 "murfey.decrypt_password" = "murfey.cli.decrypt_db_password:run"
 "murfey.dlq_murfey" = "murfey.cli.dlq_resubmit:run"
-"murfey.generate_config" = "murfey.cli.generate_config:run"
 "murfey.generate_key" = "murfey.cli.generate_crypto_key:run"
 "murfey.generate_password" = "murfey.cli.generate_db_password:run"
 "murfey.instrument_server" = "murfey.instrument_server:run"
diff --git a/src/murfey/cli/generate_config.py b/src/murfey/cli/create_config.py
similarity index 100%
rename from src/murfey/cli/generate_config.py
rename to src/murfey/cli/create_config.py

From cfffcf2551a5bee57c5a8b617547980b12a623d1 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 19 Dec 2024 16:09:00 +0000
Subject: [PATCH 70/91] Empty console inputs for simple fields are now
 converted to the default field values

---
 src/murfey/cli/create_config.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/murfey/cli/create_config.py b/src/murfey/cli/create_config.py
index 76efa5a0..739fba0f 100644
--- a/src/murfey/cli/create_config.py
+++ b/src/murfey/cli/create_config.py
@@ -401,16 +401,17 @@ def populate_field(key: str, field: ModelField, debug: bool = False) -> Any:
 
     # Display information on the field to be filled
     print_field_info(field)
-    message = "Please provide a value (press Enter to leave it blank as '')."
+
+    message = (
+        "Please provide a value (press Enter to use the default value of "
+        f"{field.field_info.default!r})."
+    )
     while True:
         # Get value
         answer = prompt(message, style="bright_yellow")
-        # Translate empty string into None for fields that take Path values
-        value = (
-            None
-            if (not answer and machine_config_types.get(key) in (Path, Optional[Path]))
-            else answer
-        )
+
+        # Convert empty console input into default values
+        value = field.field_info.default if not answer else answer
 
         # Validate and return
         try:
@@ -449,7 +450,7 @@ def add_calibrations(
         # Check if it's a known type of calibration
         if calibration_type not in known_calibrations.keys():
             console.print(
-                f"{calibration_type} is not a known type of calibration",
+                f"{calibration_type!r} is not a known type of calibration",
                 style="bright_red",
             )
             add_calibration = ask_for_input(category, True)

From aab451f5fa2e7b9ebf09da19230ba5a6e2f5a53f Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 19 Dec 2024 17:15:59 +0000
Subject: [PATCH 71/91] Updated data directories type from dict[str, str] to
 list[Path]; changed default camera value to ''

---
 src/murfey/util/config.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index fb1a998a..ee2715a0 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -52,13 +52,13 @@ class MachineConfig(BaseModel):
     """
     Information about the hardware and software on the instrument machine
     """
-    camera: Literal["FALCON", "K3_FLIPX", "K3_FLIPY"] = Field(
-        default="FALCON",
+    camera: Literal["FALCON", "K3_FLIPX", "K3_FLIPY", ""] = Field(
+        default="",
         description=(
             "Name of the camera used by the TEM. This is only relevant for TEMs to "
             "determine how the gain reference needs to be processed, e.g., if it has "
             "to be binned down from superres or flipped along the x- or y-axis. "
-            "Options: 'FALCON', 'K3_FLIPX', 'K3_FLIPY'"
+            "Options: 'FALCON', 'K3_FLIPX', 'K3_FLIPY', ''"
         ),
         # NOTE:
         #   Eventually need to support Falcon 4, Falcon 4I, K2, K3 (superres)
@@ -128,12 +128,10 @@ class MachineConfig(BaseModel):
             "processing."
         ),
     )
-    data_directories: dict[str, str] = Field(
-        default={},
+    data_directories: list[Path] = Field(
+        default=[],
         description=(
-            "Dictionary of key-value pairs, where the keys are full paths to where "
-            "data is stored on the client machine, and the value denotes the type "
-            "of data stored at that path."
+            "List of full paths to where data is stored on the instrument machine."
         ),
     )
     create_directories: dict[str, str] = Field(

From 436fab74ca88a7372329d0e0d5d59e21b6d6504c Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 19 Dec 2024 17:18:35 +0000
Subject: [PATCH 72/91] Added logic to use default values for fields if empty
 console input is received; updated 'data_directories' creation function to
 reflect change in MachineConfig

---
 src/murfey/cli/create_config.py | 123 +++++++++++++++++++++-----------
 1 file changed, 81 insertions(+), 42 deletions(-)

diff --git a/src/murfey/cli/create_config.py b/src/murfey/cli/create_config.py
index 739fba0f..1cb22ec6 100644
--- a/src/murfey/cli/create_config.py
+++ b/src/murfey/cli/create_config.py
@@ -103,6 +103,19 @@ def ask_for_input(parameter: str, again: bool = False):
     return ask_for_permission(message)
 
 
+def ask_to_use_default(field: ModelField):
+    """
+    Asks the user if they want to populate the current configuration key with the
+    default value.
+    """
+    message = (
+        "Would you like to use the default value for this field? \n"
+        f"Default: {field.field_info.default!r} \n"
+        "[bold bright_magenta](y/n)[/bold bright_magenta]"
+    )
+    return ask_for_permission(message)
+
+
 def confirm_overwrite(value: str):
     """
     Asks the user if a value that already exists should be overwritten.
@@ -410,8 +423,16 @@ def populate_field(key: str, field: ModelField, debug: bool = False) -> Any:
         # Get value
         answer = prompt(message, style="bright_yellow")
 
-        # Convert empty console input into default values
-        value = field.field_info.default if not answer else answer
+        # Convert empty console input into default values if they are None-like
+        if not field.field_info.default:
+            value = field.field_info.default if not answer else answer
+        # Convert inverted commas into empty strings if defaults are not None-like
+        else:
+            value = (
+                ""
+                if answer in ("''", '""') and isinstance(field.field_info.default, str)
+                else answer
+            )
 
         # Validate and return
         try:
@@ -691,21 +712,27 @@ def get_settings_tree_path() -> str:
 
 def add_data_directories(
     key: str, field: ModelField, debug: bool = False
-) -> dict[str, str]:
+) -> list[Path]:
     """
     Function to facilitate populating the data_directories field.
     """
     print_field_info(field)
-    category = "data directory"
-    data_directories: dict[str, str] = construct_dict(
-        category,
-        "full file path to the data directory",
-        "data type",
-        allow_empty_key=False,
-        allow_empty_value=False,
+    description = "data directory path"
+    data_directories: list[Path] = construct_list(
+        description,
+        value_method=get_folder_path,
+        value_method_args={
+            "message": (
+                "Please enter the full path to the data directory "
+                "where the files are stored."
+            ),
+        },
+        allow_empty=False,
         allow_eval=False,
-        sort_keys=True,
-        restrict_to_types=str,
+        many_types=False,
+        restrict_to_types=Path,
+        sort_values=True,
+        debug=debug,
     )
 
     # Validate and return
@@ -715,10 +742,10 @@ def add_data_directories(
         if debug:
             console.print(error, style="bright_red")
         console.print(f"Failed to validate {key!r}.", style="bright_red")
-        if ask_for_input(category, True) is True:
+        if ask_for_input(description, True) is True:
             return add_data_directories(key, field, debug)
         console.print("Returning an empty dictionary.", style="bright_red")
-        return {}
+        return []
 
 
 def add_create_directories(
@@ -728,24 +755,30 @@ def add_create_directories(
     Function to populate the create_directories field.
     """
     print_field_info(field)
-    category = "folder for Murfey to create"
-    folders_to_create: dict[str, str] = construct_dict(
-        dict_name=category,
-        key_name="folder alias",
-        value_name="folder name",
-        key_method=get_folder_name,
-        key_method_args={
-            "message": "Please enter the name Murfey should remember the folder as.",
-        },
-        value_method=get_folder_name,
-        value_method_args={
-            "message": "Please enter the name of the folder for Murfey to create.",
-        },
-        allow_empty_key=False,
-        allow_empty_value=False,
-        allow_eval=False,
-        sort_keys=True,
-        restrict_to_types=str,
+
+    # Manually enter fields if default value is not used
+    description = "folder for Murfey to create"
+    folders_to_create: dict[str, str] = (
+        field.field_info.default
+        if ask_to_use_default(field) is True
+        else construct_dict(
+            dict_name=description,
+            key_name="folder alias",
+            value_name="folder name",
+            key_method=get_folder_name,
+            key_method_args={
+                "message": "Please enter the name Murfey should remember the folder as.",
+            },
+            value_method=get_folder_name,
+            value_method_args={
+                "message": "Please enter the name of the folder for Murfey to create.",
+            },
+            allow_empty_key=False,
+            allow_empty_value=False,
+            allow_eval=False,
+            sort_keys=True,
+            restrict_to_types=str,
+        )
     )
 
     # Validate and return
@@ -755,7 +788,7 @@ def add_create_directories(
         if debug:
             console.print(error, style="bright_red")
         console.print(f"Failed to validate {key!r}.", style="bright_red")
-        if ask_for_input(category, True) is True:
+        if ask_for_input(description, True) is True:
             return add_create_directories(key, field, debug)
         console.print("Returning an empty dictionary.", style="bright_red")
         return {}
@@ -901,16 +934,22 @@ def set_up_data_processing(config: dict, debug: bool = False) -> dict:
 
     def add_recipes(key: str, field: ModelField, debug: bool = False) -> dict[str, str]:
         print_field_info(field)
+
+        # Manually construct the dictionary if the default value is not used
         category = "processing recipe"
-        recipes = construct_dict(
-            category,
-            key_name="name of the recipe",
-            value_name="name of the recipe file",
-            allow_empty_key=False,
-            allow_empty_value=False,
-            allow_eval=False,
-            sort_keys=True,
-            restrict_to_types=str,
+        recipes = (
+            field.field_info.default
+            if ask_to_use_default(field) is True
+            else construct_dict(
+                category,
+                key_name="name of the recipe",
+                value_name="name of the recipe file",
+                allow_empty_key=False,
+                allow_empty_value=False,
+                allow_eval=False,
+                sort_keys=True,
+                restrict_to_types=str,
+            )
         )
         try:
             return validate_value(recipes, key, field, debug)

From ae2d291af7d425263da240d9cd856ede3d4e71d7 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 19 Dec 2024 17:22:04 +0000
Subject: [PATCH 73/91] Recipe keys are nowtype hinted as string literals

---
 src/murfey/util/config.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index ee2715a0..5017ce99 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -272,7 +272,18 @@ class MachineConfig(BaseModel):
     )
 
     # TEM-related processing workflows
-    recipes: dict[str, str] = Field(
+    recipes: dict[
+        Literal[
+            "em-spa-bfactor",
+            "em-spa-class2d",
+            "em-spa-class3d",
+            "em-spa-preprocess",
+            "em-spa-refine",
+            "em-tomo-preprocess",
+            "em-tomo-align",
+        ],
+        str,
+    ] = Field(
         default={
             "em-spa-bfactor": "em-spa-bfactor",
             "em-spa-class2d": "em-spa-class2d",

From 53a30ad77d7f223b209f8334b1759fc7433f8f8f Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 19 Dec 2024 17:35:28 +0000
Subject: [PATCH 74/91] Grouped 'rsync_url' together with other data
 transfer-related keys

---
 src/murfey/cli/create_config.py |  2 ++
 src/murfey/util/config.py       | 14 +++++++-------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/murfey/cli/create_config.py b/src/murfey/cli/create_config.py
index 1cb22ec6..e7400e6e 100644
--- a/src/murfey/cli/create_config.py
+++ b/src/murfey/cli/create_config.py
@@ -903,6 +903,7 @@ def get_upstream_data_tiff_locations(
         "data_transfer_enabled",
         "rsync_basepath",
         "rsync_module",
+        "rsync_url",
         "allow_removal",
         "upstream_data_directories",
         "upstream_data_download_directory",
@@ -1133,6 +1134,7 @@ def set_up_machine_config(debug: bool = False):
             "allow_removal",
             "rsync_basepath",
             "rsync_module",
+            "rsync_url",
             "upstream_data_directories",
             "upstream_data_download_directory",
             "upstream_data_tiff_locations",
diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 5017ce99..523e1d5f 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -200,6 +200,13 @@ class MachineConfig(BaseModel):
             "different sub-folders to save the data to."
         ),
     )
+    rsync_url: str = Field(
+        default="",
+        description=(
+            "URL to a remote rsync daemon. By default, the rsync daemon will be "
+            "running on the client machine, and this defaults to an empty string."
+        ),
+    )
 
     # Related visits and data
     upstream_data_directories: list[Path] = Field(
@@ -397,13 +404,6 @@ class MachineConfig(BaseModel):
         default="",
         description="URL to where users can authenticate their Murfey sessions.",
     )
-    rsync_url: str = Field(
-        default="",
-        description=(
-            "URL to a remote rsync daemon. By default, the rsync daemon will be "
-            "running on the client machine, and this defaults to an empty string."
-        ),
-    )
 
     # RabbitMQ-specifc keys
     failure_queue: str = Field(

From 194341bd38da003710292207c08129bca6734409 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 19 Dec 2024 17:47:53 +0000
Subject: [PATCH 75/91] Updated printed messages

---
 src/murfey/cli/create_config.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/murfey/cli/create_config.py b/src/murfey/cli/create_config.py
index e7400e6e..222efeea 100644
--- a/src/murfey/cli/create_config.py
+++ b/src/murfey/cli/create_config.py
@@ -53,7 +53,7 @@ def print_welcome_message():
     console.print(panel, justify="center")
     console.rule()
 
-    prompt("Press any key to begin the setup")
+    input("Press 'Enter' to begin the setup")
 
 
 def print_field_info(field: ModelField):
@@ -109,9 +109,9 @@ def ask_to_use_default(field: ModelField):
     default value.
     """
     message = (
-        "Would you like to use the default value for this field? \n"
-        f"Default: {field.field_info.default!r} \n"
-        "[bold bright_magenta](y/n)[/bold bright_magenta]"
+        "Would you like to use the default value for this field? "
+        "[bold bright_magenta](y/n)[/bold bright_magenta] \n"
+        f"{field.field_info.default!r}"
     )
     return ask_for_permission(message)
 

From 2b25266e58b8b4299fd00517ab5091b8d7985387 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 19 Dec 2024 17:51:39 +0000
Subject: [PATCH 76/91] Removed unused variable

---
 src/murfey/cli/create_config.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/murfey/cli/create_config.py b/src/murfey/cli/create_config.py
index 222efeea..0ca36a1a 100644
--- a/src/murfey/cli/create_config.py
+++ b/src/murfey/cli/create_config.py
@@ -5,7 +5,7 @@
 import re
 from ast import literal_eval
 from pathlib import Path
-from typing import Any, Callable, Optional, Type, get_type_hints
+from typing import Any, Callable, Optional, Type
 
 import yaml
 from pydantic import ValidationError
@@ -20,9 +20,6 @@
 # Create a console object for pretty printing
 console = Console()
 
-# Compile types for each key present in MachineConfig
-machine_config_types: dict = get_type_hints(MachineConfig)
-
 
 def prompt(message: str, style: str = "") -> str:
     """

From 93ebb079ee16899a495463043814b9e113979d7e Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 19 Dec 2024 18:04:23 +0000
Subject: [PATCH 77/91] Modernised type hints

---
 src/murfey/util/config.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 523e1d5f..29c27f5e 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -4,7 +4,7 @@
 import socket
 from functools import lru_cache
 from pathlib import Path
-from typing import Any, Dict, List, Literal, Mapping, Optional, Union
+from typing import Any, Literal, Mapping, Optional
 
 import yaml
 from backports.entry_points_selectable import entry_points
@@ -82,7 +82,7 @@ class MachineConfig(BaseModel):
         ),
         # NOTE: This is a placeholder for a key that will be implemented in the future
     )
-    calibrations: dict[str, dict[str, Union[dict, float]]] = Field(
+    calibrations: dict[str, dict[str, dict | float]] = Field(
         default={},
         description=(
             "Nested dictionary containing the calibrations for this microscope. "
@@ -405,7 +405,7 @@ class MachineConfig(BaseModel):
         description="URL to where users can authenticate their Murfey sessions.",
     )
 
-    # RabbitMQ-specifc keys
+    # RabbitMQ-specific keys
     failure_queue: str = Field(
         default="",
         description="Name of RabbitMQ queue where failed API calls will be recorded.",
@@ -481,7 +481,7 @@ def __validate_default_model_if_processing_enabled_and_spa_possible__(
 
 def machine_config_from_file(
     config_file_path: Path, instrument: str = ""
-) -> Dict[str, MachineConfig]:
+) -> dict[str, MachineConfig]:
     with open(config_file_path, "r") as config_stream:
         config = yaml.safe_load(config_stream)
     return {
@@ -510,7 +510,7 @@ class GlobalConfig(BaseModel):
     session_token_timeout: Optional[int] = (
         None  # seconds; typically the length of a microscope session plus a bit
     )
-    allow_origins: List[str] = ["*"]  # Restrict to only certain hostnames
+    allow_origins: list[str] = ["*"]  # Restrict to only certain hostnames
 
 
 def global_config_from_file(config_file_path: Path) -> GlobalConfig:
@@ -563,7 +563,7 @@ def get_global_config() -> GlobalConfig:
 
 
 @lru_cache(maxsize=1)
-def get_machine_config(instrument_name: str = "") -> Dict[str, MachineConfig]:
+def get_machine_config(instrument_name: str = "") -> dict[str, MachineConfig]:
     machine_config = {
         "": MachineConfig(
             acquisition_software=[],

From d19683dabaac7cef17473c76a51be0547d5abcf4 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 19 Dec 2024 18:32:53 +0000
Subject: [PATCH 78/91] Fixed improperly cased 'GLOBAL_CONFIGURATION_PATH'

---
 tests/cli/test_decrypt_password.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/cli/test_decrypt_password.py b/tests/cli/test_decrypt_password.py
index 343c1208..703823d3 100644
--- a/tests/cli/test_decrypt_password.py
+++ b/tests/cli/test_decrypt_password.py
@@ -14,7 +14,7 @@ def test_decrypt_password(capsys, tmp_path):
     global_config.crypto_key = crypto_key.decode("ascii")
     with open(tmp_path / "config.yaml", "w") as cfg:
         yaml.dump(global_config.dict(), cfg)
-    os.environ["MURFEY_global_configURATION"] = str(tmp_path / "config.yaml")
+    os.environ["MURFEY_GLOBAL_CONFIGURATION"] = str(tmp_path / "config.yaml")
     password = "abcd"
     f = Fernet(crypto_key)
     encrypted_password = f.encrypt(password.encode("ascii")).decode()

From 05b10571c3f8b60b556d993e263958c8375fa55e Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 19 Dec 2024 18:33:46 +0000
Subject: [PATCH 79/91] Union type hint still needed for Python 3.9 pytest
 tests

---
 src/murfey/util/config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 29c27f5e..369ee1b6 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -4,7 +4,7 @@
 import socket
 from functools import lru_cache
 from pathlib import Path
-from typing import Any, Literal, Mapping, Optional
+from typing import Any, Literal, Mapping, Optional, Union
 
 import yaml
 from backports.entry_points_selectable import entry_points
@@ -82,7 +82,7 @@ class MachineConfig(BaseModel):
         ),
         # NOTE: This is a placeholder for a key that will be implemented in the future
     )
-    calibrations: dict[str, dict[str, dict | float]] = Field(
+    calibrations: dict[str, dict[str, Union[dict, float]]] = Field(
         default={},
         description=(
             "Nested dictionary containing the calibrations for this microscope. "

From d43ab647cbca3f67bcc7ec63ccf7956cf743d7dd Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Thu, 19 Dec 2024 18:44:51 +0000
Subject: [PATCH 80/91] Fixed improperly cased 'MURFEY_GLOBAL_CONFIGURATION'

---
 tests/cli/test_generate_password.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/cli/test_generate_password.py b/tests/cli/test_generate_password.py
index 6d41740d..43cfa00d 100644
--- a/tests/cli/test_generate_password.py
+++ b/tests/cli/test_generate_password.py
@@ -13,7 +13,7 @@ def test_generate_password(capsys, tmp_path):
     global_config.crypto_key = crypto_key.decode("ascii")
     with open(tmp_path / "config.yaml", "w") as cfg:
         yaml.dump(global_config.dict(), cfg)
-    os.environ["MURFEY_global_configURATION"] = str(tmp_path / "config.yaml")
+    os.environ["MURFEY_GLOBAL_CONFIGURATION"] = str(tmp_path / "config.yaml")
     run()
     captured = capsys.readouterr()
     f = Fernet(crypto_key)

From 85d905dae4e3b90ebc067ecf777b49e3e6fd972d Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Fri, 20 Dec 2024 16:07:00 +0000
Subject: [PATCH 81/91] 'murfey_db_credentials' in GlobalConfig is now a Path

---
 src/murfey/server/murfey_db.py | 4 ++++
 src/murfey/util/config.py      | 8 +++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/murfey/server/murfey_db.py b/src/murfey/server/murfey_db.py
index 75d2efae..cc3085c8 100644
--- a/src/murfey/server/murfey_db.py
+++ b/src/murfey/server/murfey_db.py
@@ -13,6 +13,10 @@
 
 def url(global_config: GlobalConfig | None = None) -> str:
     global_config = global_config or get_global_config()
+    if global_config.murfey_db_credentials is None:
+        raise ValueError(
+            "No database credentials file was provided for this instance of Murfey"
+        )
     with open(global_config.murfey_db_credentials, "r") as stream:
         creds = yaml.safe_load(stream)
     f = Fernet(global_config.crypto_key.encode("ascii"))
diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 369ee1b6..9f26f3be 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -493,7 +493,13 @@ def machine_config_from_file(
 
 class GlobalConfig(BaseModel):
     # Database connection settings
-    murfey_db_credentials: str
+    murfey_db_credentials: Optional[Path] = Field(
+        default=None,
+        description=(
+            "Full file path to where Murfey's SQL database credentials are stored. "
+            "This is typically a YAML file."
+        ),
+    )
     sqlalchemy_pooling: bool = True
     crypto_key: str
 

From c8e6caa982826c07d7cfd5f8aaf4abeb9ca88387 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Fri, 20 Dec 2024 16:10:35 +0000
Subject: [PATCH 82/91] 'murfey_db_credentials' no longer part of MachineConfig

---
 src/murfey/util/config.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index 9f26f3be..d432cae4 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -560,7 +560,7 @@ def get_global_config() -> GlobalConfig:
             return global_config_from_file(machine_config.global_configuration_path)
     return GlobalConfig(
         session_validation="",
-        murfey_db_credentials="",
+        murfey_db_credentials=None,
         crypto_key="",
         auth_key="",
         auth_algorithm="",
@@ -576,7 +576,6 @@ def get_machine_config(instrument_name: str = "") -> dict[str, MachineConfig]:
             calibrations={},
             data_directories=[],
             rsync_basepath=Path("dls/tmp"),
-            murfey_db_credentials="",
             default_model="/tmp/weights.h5",
         )
     }

From 0b12cb7d77c788cae585969ef6287c768d3f7343 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Fri, 20 Dec 2024 16:15:03 +0000
Subject: [PATCH 83/91] Added description for 'sqlalchemy_pooling' field in
 GlobalConfig

---
 src/murfey/util/config.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index d432cae4..c40f56e7 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -500,7 +500,14 @@ class GlobalConfig(BaseModel):
             "This is typically a YAML file."
         ),
     )
-    sqlalchemy_pooling: bool = True
+    sqlalchemy_pooling: bool = Field(
+        default=True,
+        description=(
+            "Toggles connection pooling functionality in the SQL database. If 'True', "
+            "clients will connect to the database using an existing pool of connections "
+            "instead of creating a new one every time."
+        ),
+    )
     crypto_key: str
 
     # RabbitMQ settings

From 01f953215f484166a1ad7e605344a3bb59022bab Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Fri, 20 Dec 2024 16:20:34 +0000
Subject: [PATCH 84/91] Added description for the 'crypto_key' field in the
 GlobalConfig

---
 src/murfey/util/config.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index c40f56e7..fa54f250 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -508,7 +508,13 @@ class GlobalConfig(BaseModel):
             "instead of creating a new one every time."
         ),
     )
-    crypto_key: str
+    crypto_key: str = Field(
+        default="",
+        description=(
+            "The encryption key used for the SQL database. This can be generated by "
+            "Murfey using the 'murfey.generate_key' command."
+        ),
+    )
 
     # RabbitMQ settings
     feedback_queue: str = "murfey_feedback"

From 22b72f320bc101bec305cd7e13df5bd58d937ac8 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Fri, 20 Dec 2024 16:24:38 +0000
Subject: [PATCH 85/91] Added description for the 'feedback_queue' field in the
 GlobalConfig

---
 src/murfey/util/config.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index fa54f250..b16d94c6 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -517,7 +517,15 @@ class GlobalConfig(BaseModel):
     )
 
     # RabbitMQ settings
-    feedback_queue: str = "murfey_feedback"
+    feedback_queue: str = Field(
+        default="murfey_feedback",
+        description=(
+            "The name of the RabbitMQ queue that will receive instructions and "
+            "the results of processing jobs on behalf of Murfey. This queue can be "
+            "by multiple server instances, which is why it's stored here instead of "
+            "in the machine configuration."
+        ),
+    )
 
     # Server authentication settings
     auth_type: Literal["password", "cookie"] = "password"

From 3742c7185c4e55765d788b02e76c2fd0c766b62a Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Fri, 20 Dec 2024 16:27:40 +0000
Subject: [PATCH 86/91] Added description for the 'auth_type' field in the
 GlobalConfig

---
 src/murfey/util/config.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index b16d94c6..a66a5f29 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -528,7 +528,14 @@ class GlobalConfig(BaseModel):
     )
 
     # Server authentication settings
-    auth_type: Literal["password", "cookie"] = "password"
+    auth_type: Literal["password", "cookie"] = Field(
+        default="password",
+        description=(
+            "Choose how Murfey will authenticate new connections that it receives. "
+            "This can be done at present via password authentication or exchanging "
+            "cookies."
+        ),
+    )
     auth_key: str = ""
     auth_algorithm: str = ""
     cookie_key: str = ""

From aba0618580e78a75bf2e327b796a931f700e063a Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Tue, 7 Jan 2025 17:43:26 +0000
Subject: [PATCH 87/91] 'MachineConfig.rsync_basepath' is now no longer None

---
 src/murfey/server/__init__.py     |  2 --
 src/murfey/server/api/__init__.py | 10 ----------
 src/murfey/server/api/clem.py     |  2 --
 src/murfey/server/api/spa.py      |  2 --
 4 files changed, 16 deletions(-)

diff --git a/src/murfey/server/__init__.py b/src/murfey/server/__init__.py
index 8befea16..001d563b 100644
--- a/src/murfey/server/__init__.py
+++ b/src/murfey/server/__init__.py
@@ -1622,8 +1622,6 @@ def _register_class_selection(message: dict, _db=murfey_db, demo: bool = False):
 
 def _find_initial_model(visit: str, machine_config: MachineConfig) -> Path | None:
     if machine_config.initial_model_search_directory:
-        if not machine_config.rsync_basepath:
-            return None
         visit_directory = (
             machine_config.rsync_basepath
             / (machine_config.rsync_module or "data")
diff --git a/src/murfey/server/api/__init__.py b/src/murfey/server/api/__init__.py
index b679a5ee..e947d9f7 100644
--- a/src/murfey/server/api/__init__.py
+++ b/src/murfey/server/api/__init__.py
@@ -1291,8 +1291,6 @@ def suggest_path(
         raise ValueError(
             "No machine configuration set when suggesting destination path"
         )
-    if not machine_config.rsync_basepath:
-        raise ValueError("No rsync basepath set")
 
     # Construct the full path to where the dataset is to be saved
     check_path = machine_config.rsync_basepath / base_path
@@ -1416,8 +1414,6 @@ def start_dc(
     machine_config = get_machine_config(instrument_name=instrument_name)[
         instrument_name
     ]
-    if not machine_config.rsync_basepath:
-        raise ValueError("No rsync basepath set")
     log.info(
         f"Starting data collection on microscope {get_microscope(machine_config=machine_config)} "
         f"with basepath {sanitise(str(machine_config.rsync_basepath))} and directory {sanitise(dc_params.image_directory)}"
@@ -1505,8 +1501,6 @@ async def process_gain(
         executables = machine_config.external_executables
     env = machine_config.external_environment
     safe_path_name = secure_filename(gain_reference_params.gain_ref.name)
-    if not machine_config.rsync_basepath:
-        raise ValueError("No rsync basepath set")
     filepath = (
         machine_config.rsync_basepath
         / (machine_config.rsync_module or "data")
@@ -1595,8 +1589,6 @@ async def write_eer_fractionation_file(
             )
         ) / secure_filename(fractionation_params.fractionation_file_name)
     else:
-        if not machine_config.rsync_basepath:
-            raise ValueError("rsync basepath not set")
         file_path = (
             machine_config.rsync_basepath
             / (machine_config.rsync_module or "data")
@@ -1642,8 +1634,6 @@ async def make_gif(
     machine_config = get_machine_config(instrument_name=instrument_name)[
         instrument_name
     ]
-    if not machine_config.rsync_basepath:
-        raise ValueError("rsync basepath not set")
     output_dir = (
         machine_config.rsync_basepath
         / (machine_config.rsync_module or "data")
diff --git a/src/murfey/server/api/clem.py b/src/murfey/server/api/clem.py
index 9f1a9125..606e1849 100644
--- a/src/murfey/server/api/clem.py
+++ b/src/murfey/server/api/clem.py
@@ -77,8 +77,6 @@ def validate_and_sanitise(
     machine_config = get_machine_config(instrument_name=instrument_name)[
         instrument_name
     ]
-    if not machine_config.rsync_basepath:
-        raise ValueError("rsync basepath not set")
     base_path = machine_config.rsync_basepath.as_posix()
 
     # Check that full file path doesn't contain unallowed characters
diff --git a/src/murfey/server/api/spa.py b/src/murfey/server/api/spa.py
index 9166b911..bfb889a6 100644
--- a/src/murfey/server/api/spa.py
+++ b/src/murfey/server/api/spa.py
@@ -21,8 +21,6 @@ def _cryolo_model_path(visit: str, instrument_name: str) -> Path:
         instrument_name
     ]
     # Raise error if relevant keys weren't set in MachineConfig
-    if not machine_config.rsync_basepath:
-        raise ValueError("Unable to find crYOLO model; rsync_basepath was not set")
     if not machine_config.default_model:
         raise ValueError("No default crYOLO model was set")
 

From af80bf363f9b64cb3ddbf5a5b17b6372ea7dd29d Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Tue, 7 Jan 2025 17:54:09 +0000
Subject: [PATCH 88/91] Grouped all skippable keys under a single 'if' block

---
 src/murfey/cli/create_config.py | 61 +++++++++++++++------------------
 1 file changed, 28 insertions(+), 33 deletions(-)

diff --git a/src/murfey/cli/create_config.py b/src/murfey/cli/create_config.py
index 0ca36a1a..6165a135 100644
--- a/src/murfey/cli/create_config.py
+++ b/src/murfey/cli/create_config.py
@@ -1101,17 +1101,10 @@ def set_up_machine_config(debug: bool = False):
             new_config[key] = add_calibrations(key, field, debug)
             continue
 
-        # Acquisition software block
+        # Acquisition software section
         if key == "acquisition_software":
             new_config = add_software_packages(new_config, debug)
             continue
-        if key in (
-            "software_versions",
-            "software_settings_output_directories",
-            "data_required_substrings",
-        ):
-            continue
-        # End of software block
 
         if key == "data_directories":
             new_config[key] = add_data_directories(key, field, debug)
@@ -1123,11 +1116,35 @@ def set_up_machine_config(debug: bool = False):
             new_config[key] = add_analyse_created_directories(key, field, debug)
             continue
 
-        # Data transfer block
+        # Data transfer section
         if key == "data_transfer_enabled":
             new_config = set_up_data_transfer(new_config, debug)
             continue
+
+        # Data processing section
+        if key == "processing_enabled":
+            new_config = set_up_data_processing(new_config, debug)
+            continue
+
+        # External plugins and executables section
+        if key in ("external_executables", "external_executables_eer"):
+            new_config[key] = add_external_executables(key, field, debug)
+            continue
+        if key == "external_environment":
+            new_config[key] = add_external_environment(key, field, debug)
+            continue
+
+        if key == "plugin_packages":
+            new_config[key] = add_murfey_plugins(key, field, debug)
+            continue
+
+        # All the keys that can be skipped
         if key in (
+            # Acquisition software section
+            "software_versions",
+            "software_settings_output_directories",
+            "data_required_substrings",
+            # Data transfer section
             "allow_removal",
             "rsync_basepath",
             "rsync_module",
@@ -1135,15 +1152,7 @@ def set_up_machine_config(debug: bool = False):
             "upstream_data_directories",
             "upstream_data_download_directory",
             "upstream_data_tiff_locations",
-        ):
-            continue
-        # End of data transfer block
-
-        # Data processing block
-        if key == "processing_enabled":
-            new_config = set_up_data_processing(new_config, debug)
-            continue
-        if key in (
+            # Data processing section
             "process_by_default",
             "gain_directory_name",
             "processed_directory_name",
@@ -1155,27 +1164,13 @@ def set_up_machine_config(debug: bool = False):
             "initial_model_search_directory",
         ):
             continue
-        # End of data processing block
-
-        # External plugins and executables block
-        if key in ("external_executables", "external_executables_eer"):
-            new_config[key] = add_external_executables(key, field, debug)
-            continue
-        if key == "external_environment":
-            new_config[key] = add_external_environment(key, field, debug)
-            continue
-        # End of external executables block
-
-        if key == "plugin_packages":
-            new_config[key] = add_murfey_plugins(key, field, debug)
-            continue
 
         """
         Standard method of inputting values
         """
         new_config[key] = populate_field(key, field, debug)
 
-    # Validate the entire config again and convert into JSON/YAML-safe dict
+    # Validate the entire config and convert into JSON/YAML-safe dict
     try:
         new_config_safe: dict = json.loads(MachineConfig(**new_config).json())
     except ValidationError as exception:

From 91675121656b29b86c02395d61a44d65f805ec57 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Wed, 8 Jan 2025 11:54:12 +0000
Subject: [PATCH 89/91] Adjust logic for using default values when an empty
 console input is provided

---
 src/murfey/cli/create_config.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/murfey/cli/create_config.py b/src/murfey/cli/create_config.py
index 6165a135..1afea8a5 100644
--- a/src/murfey/cli/create_config.py
+++ b/src/murfey/cli/create_config.py
@@ -420,16 +420,16 @@ def populate_field(key: str, field: ModelField, debug: bool = False) -> Any:
         # Get value
         answer = prompt(message, style="bright_yellow")
 
-        # Convert empty console input into default values if they are None-like
-        if not field.field_info.default:
-            value = field.field_info.default if not answer else answer
-        # Convert inverted commas into empty strings if defaults are not None-like
-        else:
-            value = (
-                ""
-                if answer in ("''", '""') and isinstance(field.field_info.default, str)
-                else answer
-            )
+        # Parse field input if a default has been provided
+        if not isinstance(field.field_info.default, UndefinedType):
+            # Convert empty console inputs into default field values
+            if not answer:
+                value = field.field_info.default
+            # Convert inverted commas into empty strings
+            elif answer in ('""', "''") and isinstance(field.field_info.default, str):
+                value = ""
+            else:
+                value = answer
 
         # Validate and return
         try:

From 838720fd62992741da82d23aa6638ca8defa5b8a Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Fri, 10 Jan 2025 11:08:45 +0000
Subject: [PATCH 90/91] 'murfey_db_credentials', and 'rabbit_mq_credentials'
 should be mandatory fields, but have type Optional[Path]

---
 src/murfey/server/__init__.py |  2 ++
 src/murfey/util/config.py     | 14 +++++++-------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/murfey/server/__init__.py b/src/murfey/server/__init__.py
index 2f8e0be8..a2675f5f 100644
--- a/src/murfey/server/__init__.py
+++ b/src/murfey/server/__init__.py
@@ -294,6 +294,8 @@ def run():
         # Run in demo mode with no connections set up
         os.environ["MURFEY_DEMO"] = "1"
     else:
+        if not global_config.rabbitmq_credentials:
+            raise FileNotFoundError("No RabbitMQ credentials file provided")
         # Load RabbitMQ configuration and set up the connection
         PikaTransport().load_configuration_file(global_config.rabbitmq_credentials)
         _set_up_transport("PikaTransport")
diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py
index f51c1fc2..3e53870a 100644
--- a/src/murfey/util/config.py
+++ b/src/murfey/util/config.py
@@ -380,7 +380,6 @@ class MachineConfig(BaseModel):
     """
     # Security-related keys
     global_configuration_path: Optional[Path] = Field(
-        default=None,
         description=(
             "Full file path to the YAML file containing the configurations for the "
             "Murfey server."
@@ -493,9 +492,7 @@ def machine_config_from_file(
 
 class GlobalConfig(BaseModel):
     # Database connection settings
-    rabbitmq_credentials: str
     murfey_db_credentials: Optional[Path] = Field(
-        default=None,
         description=(
             "Full file path to where Murfey's SQL database credentials are stored. "
             "This is typically a YAML file."
@@ -518,6 +515,7 @@ class GlobalConfig(BaseModel):
     )
 
     # RabbitMQ settings
+    rabbitmq_credentials: Optional[Path]
     feedback_queue: str = Field(
         default="murfey_feedback",
         description=(
@@ -540,12 +538,13 @@ class GlobalConfig(BaseModel):
     auth_key: str = ""
     auth_algorithm: str = ""
     cookie_key: str = ""
-
     session_validation: str = ""
     session_token_timeout: Optional[int] = (
         None  # seconds; typically the length of a microscope session plus a bit
     )
     allow_origins: list[str] = ["*"]  # Restrict to only certain hostnames
+
+    # Graylog settings
     graylog_host: str = ""
     graylog_port: Optional[int] = None
 
@@ -595,10 +594,11 @@ def get_global_config() -> GlobalConfig:
         machine_config = get_machine_config(instrument_name=os.getenv("BEAMLINE"))[
             os.getenv("BEAMLINE", "")
         ]
-        if machine_config.global_configuration_path:
-            return global_config_from_file(machine_config.global_configuration_path)
+        if not machine_config.global_configuration_path:
+            raise FileNotFoundError("No global configuration file provided")
+        return global_config_from_file(machine_config.global_configuration_path)
     return GlobalConfig(
-        rabbitmq_credentials="",
+        rabbitmq_credentials=None,
         session_validation="",
         murfey_db_credentials=None,
         crypto_key="",

From a5c4029d51b57b4b2b4faf1d9bed18d29ec9597a Mon Sep 17 00:00:00 2001
From: Eu Pin Tien <eu-pin.tien@diamond.ac.uk>
Date: Fri, 10 Jan 2025 14:06:36 +0000
Subject: [PATCH 91/91] Fixed broken logic with interactive prompt when a field
 doesn't have a default value

---
 src/murfey/cli/create_config.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/murfey/cli/create_config.py b/src/murfey/cli/create_config.py
index 1afea8a5..ff77f621 100644
--- a/src/murfey/cli/create_config.py
+++ b/src/murfey/cli/create_config.py
@@ -412,10 +412,12 @@ def populate_field(key: str, field: ModelField, debug: bool = False) -> Any:
     # Display information on the field to be filled
     print_field_info(field)
 
-    message = (
-        "Please provide a value (press Enter to use the default value of "
-        f"{field.field_info.default!r})."
+    defaults_prompt = (
+        f"press Enter to use the default value of {field.field_info.default!r}"
+        if not isinstance(field.field_info.default, UndefinedType)
+        else "this field is mandatory"
     )
+    message = f"Please provide a value ({defaults_prompt})."
     while True:
         # Get value
         answer = prompt(message, style="bright_yellow")
@@ -430,6 +432,8 @@ def populate_field(key: str, field: ModelField, debug: bool = False) -> Any:
                 value = ""
             else:
                 value = answer
+        else:
+            value = answer
 
         # Validate and return
         try:
@@ -555,6 +559,7 @@ def get_settings_tree_path() -> str:
     """
     Start of add_software_packages
     """
+    console.print()
     console.print(
         "Acquisition Software (acquisition_software)",
         style="bold bright_cyan",
@@ -971,7 +976,6 @@ def add_recipes(key: str, field: ModelField, debug: bool = False) -> dict[str, s
         "processed_directory_name",
         "processed_extra_directory",
         "recipes",
-        "modular_spa",
         "default_model",
         "model_search_directory",
         "initial_model_search_directory",