devitocodes
diff --git a/‎.github/workflows/docker-bases.yml
+2 b/‎.github/workflows/docker-bases.yml
+2
diff --git a/‎devito/__init__.py
+3-2 b/‎devito/__init__.py
+3-2
diff --git a/‎devito/arch/archinfo.py
+64-1 b/‎devito/arch/archinfo.py
+64-1
diff --git a/‎devito/arch/compiler.py
+43-31 b/‎devito/arch/compiler.py
+43-31
diff --git a/‎devito/builtins/arithmetic.py
+2-2 b/‎devito/builtins/arithmetic.py
+2-2
diff --git a/‎devito/builtins/utils.py
+4-1 b/‎devito/builtins/utils.py
+4-1
@@ -255,4 +255,6 @@ jobs:
           file: './docker/Dockerfile.amd'
           push: true
           target: 'hip'
+          build-args: |
+            ROCM_VERSION=6.3.4
           tags: devitocodes/bases:amd-hip
@@ -56,7 +56,8 @@ def reinit_compiler(val):
     """
     Re-initialize the Compiler.
     """
-    configuration['compiler'].__init__(suffix=configuration['compiler'].suffix,
+    configuration['compiler'].__init__(name=configuration['compiler'].name,
+                                       suffix=configuration['compiler'].suffix,
                                        mpi=configuration['mpi'])
     return val
 
@@ -65,7 +66,7 @@ def reinit_compiler(val):
 configuration.add('platform', 'cpu64', list(platform_registry),
                   callback=lambda i: platform_registry[i]())
 configuration.add('compiler', 'custom', compiler_registry,
-                  callback=lambda i: compiler_registry[i]())
+                  callback=lambda i: compiler_registry[i](name=i))
 
 # Setup language for shared-memory parallelism
 preprocessor = lambda i: {0: 'C', 1: 'openmp'}.get(i, i)  # Handles DEVITO_OPENMP deprec
 
@@ -330,6 +330,69 @@ def cbk(deviceid=0):
     except OSError:
         pass
 
+    # *** Third try: `sycl-ls`, clearly only works with Intel cards
+    try:
+        gpu_infos = {}
+
+        # sycl-ls sometimes finds gpu twice with opencl and without so
+        # we need to make sure we don't get duplicates
+        selected_platform = None
+        platform_block = ""
+
+        proc = Popen(["sycl-ls", "--verbose"], stdout=PIPE, stderr=DEVNULL, text=True)
+        sycl_output, _ = proc.communicate()
+
+        # Extract platform blocks
+        platforms = re.findall(r"Platform \[#(\d+)\]:([\s\S]*?)(?=Platform \[#\d+\]:|$)",
+                               sycl_output)
+
+        # Select Level-Zero if available, otherwise use OpenCL
+        for platform_id, platform_content in platforms:
+            if "Intel(R) Level-Zero" in platform_content:
+                selected_platform = platform_id
+                platform_block = platform_content
+                break
+            elif "Intel(R) OpenCL Graphics" in platform_content and \
+                    selected_platform is None:
+                selected_platform = platform_id
+                platform_block = platform_content
+
+        # Extract GPU devices from the selected platform
+        devices = re.findall(r"Device \[#(\d+)\]:([\s\S]*?)(?=Device \[#\d+\]:|$)",
+                             platform_block)
+
+        for device_id, device_block in devices:
+            if re.search(r"^\s*Type\s*:\s*gpu", device_block, re.MULTILINE):
+                name_match = re.search(r"^\s*Name\s*:\s*(.+)", device_block, re.MULTILINE)
+
+                if name_match:
+                    name = name_match.group(1).strip()
+
+                    # Store GPU info with correct physical ID
+                    gpu_infos[device_id] = {
+                        "physicalid": device_id,
+                        "product": name
+                    }
+
+        gpu_info = homogenise_gpus(list(gpu_infos.values()))
+
+        # Also attach callbacks to retrieve instantaneous memory info
+        # Now this should be done using xpu-smi but for some reason
+        # it throws a lot of weird errors in docker so skipping for now
+        for i in ['total', 'free', 'used']:
+            def make_cbk(i):
+                def cbk(deviceid=0):
+                    return None
+                return cbk
+
+            gpu_info['mem.%s' % i] = make_cbk(i)
+
+        gpu_infos['architecture'] = 'Intel'
+        return gpu_info
+
+    except OSError:
+        pass
+
     # *** Second try: `lshw`
     try:
         info_cmd = ['lshw', '-C', 'video']
@@ -391,7 +454,7 @@ def parse_product_arch():
         gpu_infos = []
         for line in lines:
             # Graphics cards are listed as VGA or 3D controllers in lspci
-            if 'VGA' in line or '3D' in line:
+            if any(i in line for i in ('VGA', '3D', 'Display')):
                 gpu_info = {}
                 # Lines produced by lspci command are of the form:
                 #   xxxx:xx:xx.x Device Type: Name
 
@@ -180,12 +180,21 @@ def __init__(self):
     """
 
     fields = {'cc', 'ld'}
-    _cpp = False
+    _default_cpp = False
+    _cxxstd = 'c++14'
+    _cstd = 'c99'
 
     def __init__(self, **kwargs):
+        maybe_name = kwargs.pop('name', self.__class__.__name__)
+        if isinstance(maybe_name, Compiler):
+            self._name = maybe_name.name
+        else:
+            self._name = maybe_name
+
         super().__init__(**kwargs)
 
         self.__lookup_cmds__()
+        self._cpp = kwargs.get('cpp', self._default_cpp)
 
         self.suffix = kwargs.get('suffix')
         if not kwargs.get('mpi'):
@@ -195,7 +204,7 @@ def __init__(self, **kwargs):
             self.cc = self.MPICC if self._cpp is False else self.MPICXX
         self.ld = self.cc  # Wanted by the superclass
 
-        self.cflags = ['-O3', '-g', '-fPIC', '-Wall', '-std=c99']
+        self.cflags = ['-O3', '-g', '-fPIC', '-Wall', f'-std={self.std}']
         self.ldflags = ['-shared']
 
         self.include_dirs = []
@@ -225,13 +234,13 @@ def __new_with__(self, **kwargs):
         Create a new Compiler from an existing one, inherenting from it
         the flags that are not specified via ``kwargs``.
         """
-        return self.__class__(suffix=kwargs.pop('suffix', self.suffix),
+        return self.__class__(name=self.name, suffix=kwargs.pop('suffix', self.suffix),
                               mpi=kwargs.pop('mpi', configuration['mpi']),
                               **kwargs)
 
     @property
     def name(self):
-        return self.__class__.__name__
+        return self._name
 
     @property
     def version(self):
@@ -247,6 +256,10 @@ def version(self):
 
         return version
 
+    @property
+    def std(self):
+        return self._cxxstd if self._cpp else self._cstd
+
     def get_version(self):
         result, stdout, stderr = call_capture_output((self.cc, "--version"))
         if result != 0:
@@ -482,15 +495,15 @@ def __init_finalize__(self, **kwargs):
         platform = kwargs.pop('platform', configuration['platform'])
 
         if isinstance(platform, NvidiaDevice):
-            self.cflags.remove('-std=c99')
+            self.cflags.remove(f'-std={self.std}')
             # Add flags for OpenMP offloading
             if language in ['C', 'openmp']:
                 cc = get_nvidia_cc()
                 if cc:
                     self.cflags += ['-Xopenmp-target', f'-march=sm_{cc}']
                 self.ldflags += ['-fopenmp', '-fopenmp-targets=nvptx64-nvidia-cuda']
         elif platform is AMDGPUX:
-            self.cflags.remove('-std=c99')
+            self.cflags.remove(f'-std={self.std}')
             # Add flags for OpenMP offloading
             if language in ['C', 'openmp']:
                 self.ldflags += ['-target', 'x86_64-pc-linux-gnu']
@@ -553,9 +566,9 @@ def __init_finalize__(self, **kwargs):
             self.ldflags += ['-fopenmp']
 
         if isinstance(platform, NvidiaDevice):
-            self.cflags.remove('-std=c99')
+            self.cflags.remove(f'-std={self.std}')
         elif platform is AMDGPUX:
-            self.cflags.remove('-std=c99')
+            self.cflags.remove(f'-std={self.std}')
             # Add flags for OpenMP offloading
             if language in ['C', 'openmp']:
                 self.ldflags += ['-target', 'x86_64-pc-linux-gnu']
@@ -590,16 +603,13 @@ def __lookup_cmds__(self):
 
 class PGICompiler(Compiler):
 
-    _cpp = True
+    _default_cpp = True
 
     def __init_finalize__(self, **kwargs):
 
-        self.cflags.remove('-std=c99')
         self.cflags.remove('-O3')
         self.cflags.remove('-Wall')
 
-        self.cflags.append('-std=c++11')
-
         language = kwargs.pop('language', configuration['language'])
         platform = kwargs.pop('platform', configuration['platform'])
 
@@ -643,14 +653,13 @@ def __lookup_cmds__(self):
 
 class CudaCompiler(Compiler):
 
-    _cpp = True
+    _default_cpp = True
 
     def __init_finalize__(self, **kwargs):
 
-        self.cflags.remove('-std=c99')
         self.cflags.remove('-Wall')
         self.cflags.remove('-fPIC')
-        self.cflags.extend(['-std=c++14', '-Xcompiler', '-fPIC'])
+        self.cflags.extend(['-Xcompiler', '-fPIC'])
 
         if configuration['mpi']:
             # We rather use `nvcc` to compile MPI, but for this we have to
@@ -717,15 +726,10 @@ def __lookup_cmds__(self):
 
 class HipCompiler(Compiler):
 
-    _cpp = True
+    _default_cpp = True
 
     def __init_finalize__(self, **kwargs):
 
-        self.cflags.remove('-std=c99')
-        self.cflags.remove('-Wall')
-        self.cflags.remove('-fPIC')
-        self.cflags.extend(['-std=c++14', '-fPIC'])
-
         if configuration['mpi']:
             # We rather use `hipcc` to compile MPI, but for this we have to
             # explicitly pass the flags that an `mpicc` would implicitly use
@@ -831,7 +835,7 @@ def __init_finalize__(self, **kwargs):
         language = kwargs.pop('language', configuration['language'])
 
         if language == 'sycl':
-            raise ValueError("Use SyclCompiler to jit-compile sycl")
+            warning(f"Use SyclCompiler (`sycl`) to jit-compile sycl, not {self.name}")
 
         elif language == 'openmp':
             # Earlier versions to OneAPI 2023.2.0 (clang17 underneath), have an
@@ -878,7 +882,7 @@ def __lookup_cmds__(self):
 
 class SyclCompiler(OneapiCompiler):
 
-    _cpp = True
+    _default_cpp = True
 
     def __init_finalize__(self, **kwargs):
         IntelCompiler.__init_finalize__(self, **kwargs)
@@ -887,9 +891,9 @@ def __init_finalize__(self, **kwargs):
         language = kwargs.pop('language', configuration['language'])
 
         if language != 'sycl':
-            raise ValueError("Expected language sycl with SyclCompiler")
+            warning(f"Expected language sycl with SyclCompiler, not {language}")
 
-        self.cflags.remove('-std=c99')
+        self.cflags.remove(f'-std={self.std}')
         self.cflags.append('-fsycl')
 
         self.cflags.remove('-g')  # -g disables some optimizations in IGC
@@ -903,7 +907,7 @@ def __init_finalize__(self, **kwargs):
         elif isinstance(platform, IntelDevice):
             self.cflags.append('-fsycl-targets=spir64')
         else:
-            raise NotImplementedError(f"Unsupported platform {platform}")
+            warning(f"Unsupported platform {platform}")
 
 
 class CustomCompiler(Compiler):
@@ -945,7 +949,6 @@ def __new__(cls, *args, **kwargs):
         obj = super().__new__(cls)
         # Keep base to initialize accordingly
         obj._base = kwargs.pop('base', _base)
-        obj._cpp = obj._base._cpp
 
         return obj
 
@@ -976,6 +979,10 @@ def __lookup_cmds__(self):
     def __new_with__(self, **kwargs):
         return super().__new_with__(base=self._base, **kwargs)
 
+    @property
+    def _default_cpp(self):
+        return self._base._default_cpp
+
 
 class CompilerRegistry(dict):
     """
@@ -984,15 +991,19 @@ class CompilerRegistry(dict):
     """
 
     def __getitem__(self, key):
+        if isinstance(key, Compiler):
+            key = key.name
+
         if key.startswith('gcc-'):
             i = key.split('-')[1]
             return partial(GNUCompiler, suffix=i)
+
         return super().__getitem__(key)
 
-    def __contains__(self, k):
-        if isinstance(k, Compiler):
-            k = k.name
-        return k in self.keys() or k.startswith('gcc-')
+    def __contains__(self, key):
+        if isinstance(key, Compiler):
+            key = key.name
+        return key in self.keys() or key.startswith('gcc-')
 
 
 _compiler_registry = {
@@ -1011,6 +1022,7 @@ def __contains__(self, k):
     'nvc++': NvidiaCompiler,
     'nvidia': NvidiaCompiler,
     'cuda': CudaCompiler,
+    'nvcc': CudaCompiler,
     'osx': ClangCompiler,
     'intel': OneapiCompiler,
     'icx': OneapiCompiler,
 
@@ -32,13 +32,13 @@ def norm(f, order=2):
     s = dv.types.Symbol(name='sum', dtype=n.dtype)
 
     op = dv.Operator([dv.Eq(s, 0.0)] + eqns +
-                     [dv.Inc(s, dv.Abs(Pow(p, order))), dv.Eq(n[0], s)],
+                     [dv.Inc(s, Pow(dv.Abs(p), order)), dv.Eq(n[0], s)],
                      name='norm%d' % order)
     op.apply(**kwargs)
 
     v = np.power(n.data[0], 1/order)
 
-    return f.dtype(v)
+    return np.real(f.dtype(v))
 
 
 @dv.switchconfig(log_level='ERROR')
 
@@ -23,7 +23,10 @@
     # NOTE: np.float128 isn't really a thing, see for example
     # https://github.com/numpy/numpy/issues/10288
     # https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html#1070
-    np.float64: np.float64
+    np.float64: np.float64,
+    # ComplexX accumulates on Complex2X
+    np.complex64: np.complex128,
+    np.complex128: np.complex128,
 }