diff --git a/README.rst b/README.rst
index ba1194a..58c9b6e 100644
--- a/README.rst
+++ b/README.rst
@@ -138,10 +138,19 @@ C++/CUDA Projects
 
     python -c "import cuvec; print(cuvec.include_path)"
 
+For reference, see ``cuvec.example_mod``'s source code: `example_mod.cu <https://github.com/AMYPAD/CuVec/blob/master/cuvec/src/example_mod/example_mod.cu>`_.
+
+SWIG Projects
+-------------
+
+Using the include path from above, simply ``%include "cuvec.i"`` in a SWIG interface file.
+
+For reference, see ``cuvec.example_swig``'s source code: `example_swig.i <https://github.com/AMYPAD/CuVec/blob/master/cuvec/src/example_swig/example_swig.i>`_ and `example_swig.cu <https://github.com/AMYPAD/CuVec/blob/master/cuvec/src/example_swig/example_swig.cu>`_.
+
 CMake Projects
 --------------
 
-This is likely unnecessary (see above).
+This is likely unnecessary (see above for simpler ``#include`` instructions).
 
 The raw C++/CUDA libraries may be included in external projects using ``cmake``.
 Simply build the project and use ``find_package(AMYPADcuvec)``.
diff --git a/cuvec/CMakeLists.txt b/cuvec/CMakeLists.txt
index 4dda903..d2ad78d 100644
--- a/cuvec/CMakeLists.txt
+++ b/cuvec/CMakeLists.txt
@@ -12,6 +12,13 @@ find_package(CUDAToolkit REQUIRED)
 if(SKBUILD)
   find_package(PythonExtensions REQUIRED)
   set(LIB_TYPE "MODULE")
+
+  find_package(SWIG 3.0 COMPONENTS python)
+  if(SWIG_FOUND)
+  include(${SWIG_USE_FILE})
+  set(${CMAKE_PROJECT_NAME}_SWIG_SRC "${CMAKE_CURRENT_LIST_DIR}/include/${CMAKE_PROJECT_NAME}.i")
+  set_source_files_properties("${${CMAKE_PROJECT_NAME}_SWIG_SRC}" PROPERTIES CPLUSPLUS ON)
+  endif()
 else()
   set(LIB_TYPE "SHARED")
 endif()
@@ -43,7 +50,7 @@ add_library(AMYPAD::${PROJECT_NAME} ALIAS ${PROJECT_NAME})
 target_include_directories(${PROJECT_NAME} PUBLIC
   "$<BUILD_INTERFACE:${${CMAKE_PROJECT_NAME}_INCLUDE_DIRS}>"
   "$<INSTALL_INTERFACE:${CMAKE_PROJECT_NAME}/include>")
-target_link_libraries(${PROJECT_NAME} ${Python3_LIBRARIES} ${CUDA_LIBRARIES})
+target_link_libraries(${PROJECT_NAME} ${Python3_LIBRARIES} CUDA::cudart_static)
 
 if(SKBUILD)
   python_extension_module(${PROJECT_NAME})
@@ -61,6 +68,7 @@ install(EXPORT ${PROJECT_NAME}Targets FILE AMYPAD${PROJECT_NAME}Targets.cmake
   NAMESPACE AMYPAD:: DESTINATION ${CMAKE_PROJECT_NAME}/cmake)
 
 add_subdirectory(src/example_mod)
+add_subdirectory(src/example_swig)
 
 # install project
 
diff --git a/cuvec/__init__.py b/cuvec/__init__.py
index 5fec603..998781f 100644
--- a/cuvec/__init__.py
+++ b/cuvec/__init__.py
@@ -35,8 +35,7 @@
     from warnings import warn
     warn(str(err), UserWarning)
 else:
-    from .helpers import CuVec, asarray, copy, zeros
-    from .pycuvec import cu_copy, cu_zeros, typecodes, vec_types
+    from .pycuvec import CuVec, asarray, copy, cu_copy, cu_zeros, typecodes, vec_types, zeros
 
 # for use in `cmake -DCMAKE_PREFIX_PATH=...`
 cmake_prefix = Path(resource_filename(__name__, "cmake")).resolve()
diff --git a/cuvec/helpers.py b/cuvec/helpers.py
deleted file mode 100644
index 7801f25..0000000
--- a/cuvec/helpers.py
+++ /dev/null
@@ -1,90 +0,0 @@
-"""Useful helper functions."""
-import logging
-from textwrap import dedent
-
-import numpy as np
-
-from .pycuvec import cu_copy, cu_zeros, vec_types
-
-log = logging.getLogger(__name__)
-_Vector_types = tuple(vec_types.values())
-_Vector_types_s = tuple(map(str, vec_types.values()))
-
-
-def is_raw_cuvec(cuvec):
-    """
-    Returns `True` when given the output of
-    CPython API functions returning `PyCuVec<T> *` PyObjects.
-
-    This is needed since conversely `isinstance(cuvec, CuVec)` may be `False`
-    due to external libraries
-    `#include "pycuvec.cuh"` making a distinct type object.
-    """
-    return isinstance(cuvec, _Vector_types) or str(type(cuvec)) in _Vector_types_s
-
-
-class CuVec(np.ndarray):
-    """
-    A `numpy.ndarray` compatible view with a `cuvec` member containing the
-    underlying `cuvec.Vector_*` object (for use in CPython API function calls).
-    """
-    def __new__(cls, arr):
-        """arr: `cuvec.CuVec`, raw `cuvec.Vector_*`, or `numpy.ndarray`"""
-        if is_raw_cuvec(arr):
-            log.debug("wrap raw %s", type(arr))
-            obj = np.asarray(arr).view(cls)
-            obj.cuvec = arr
-            return obj
-        if isinstance(arr, CuVec) and hasattr(arr, 'cuvec'):
-            log.debug("new view")
-            obj = np.asarray(arr).view(cls)
-            obj.cuvec = arr.cuvec
-            return obj
-        if isinstance(arr, np.ndarray):
-            log.debug("copy")
-            return copy(arr)
-        raise NotImplementedError(
-            dedent("""\
-            Not intended for explicit construction
-            (do not do `cuvec.CuVec((42, 1337))`;
-            instead use `cuvec.zeros((42, 137))`"""))
-
-    @property
-    def __cuda_array_interface__(self):
-        if not hasattr(self, 'cuvec'):
-            raise AttributeError(
-                dedent("""\
-                `numpy.ndarray` object has no attribute `cuvec`:
-                try using `cuvec.asarray()` first."""))
-        res = self.__array_interface__
-        return {
-            'shape': res['shape'], 'typestr': res['typestr'], 'data': res['data'], 'version': 3}
-
-
-def zeros(shape, dtype="float32"):
-    """
-    Returns a `cuvec.CuVec` view of a new `numpy.ndarray`
-    of the specified shape and data type (`cuvec` equivalent of `numpy.zeros`).
-    """
-    return CuVec(cu_zeros(shape, dtype))
-
-
-def copy(arr):
-    """
-    Returns a `cuvec.CuVec` view of a new `numpy.ndarray`
-    with data copied from the specified `arr`
-    (`cuvec` equivalent of `numpy.copy`).
-    """
-    return CuVec(cu_copy(arr))
-
-
-def asarray(arr, dtype=None, order=None):
-    """
-    Returns a `cuvec.CuVec` view of `arr`, avoiding memory copies if possible.
-    (`cuvec` equivalent of `numpy.asarray`).
-    """
-    if not isinstance(arr, np.ndarray) and is_raw_cuvec(arr):
-        res = CuVec(arr)
-        if dtype is None or res.dtype == np.dtype(dtype):
-            return CuVec(np.asanyarray(res, order=order))
-    return CuVec(np.asanyarray(arr, dtype=dtype, order=order))
diff --git a/cuvec/include/cuvec.cuh b/cuvec/include/cuvec.cuh
index e630e8a..b2e7f05 100644
--- a/cuvec/include/cuvec.cuh
+++ b/cuvec/include/cuvec.cuh
@@ -5,6 +5,7 @@
 #ifndef _CUVEC_H_
 #define _CUVEC_H_
 
+#include "cuda_runtime.h"
 #include <cstdio>  // fprintf
 #include <cstdlib> // std::size_t
 #include <limits>  // std::numeric_limits
@@ -12,7 +13,7 @@
 #include <vector>  // std::vector
 
 namespace cuvec {
-void HandleError(cudaError_t err, const char *file, int line) {
+static void HandleError(cudaError_t err, const char *file, int line) {
   if (err != cudaSuccess) {
     fprintf(stderr, "%s in %s at line %d\n", cudaGetErrorString(err), file, line);
     exit(EXIT_FAILURE);
diff --git a/cuvec/include/cuvec.i b/cuvec/include/cuvec.i
new file mode 100644
index 0000000..ab2fb79
--- /dev/null
+++ b/cuvec/include/cuvec.i
@@ -0,0 +1,40 @@
+/**
+ * SWIG template header wrapping `CuVec<T>`. Provides:
+ *     CuVec(T)
+ * for external use via `%include "cuvec.i"`.
+ * Note that `CuVec(T)` is `%define`d to be `CuVec<T>`, which in turn is
+ * defined in "cuvec.cuh"
+ */
+%include "std_vector.i"
+
+%{
+#include "cuvec.cuh"    // CuAlloc
+#include "cuda_fp16.h"  // __half
+
+template<class T> size_t data(CuVec<T> &vec) {
+  return (size_t) vec.data();
+};
+%}
+
+/// `%define X Y` rather than `using X = Y;`
+/// due to https://github.com/swig/swig/issues/1058
+%define CuVec(T) std::vector<T, CuAlloc<T>> %enddef
+
+template<class T> size_t data(CuVec(T) &vec);
+
+%define MKCUVEC(T, typechar)
+%template(CuVec_ ## typechar) CuVec(T);
+%template(CuVec_ ## typechar ## _data) data<T>;
+%enddef
+MKCUVEC(signed char, b)
+MKCUVEC(unsigned char, B)
+MKCUVEC(char, c)
+MKCUVEC(short, h)
+MKCUVEC(unsigned short, H)
+MKCUVEC(int, i)
+MKCUVEC(unsigned int, I)
+MKCUVEC(long long, q)
+MKCUVEC(unsigned long long, Q)
+MKCUVEC(__half, e)
+MKCUVEC(float, f)
+MKCUVEC(double, d)
diff --git a/cuvec/include/pycuvec.cuh b/cuvec/include/pycuvec.cuh
index 15f62f4..6b4e2de 100644
--- a/cuvec/include/pycuvec.cuh
+++ b/cuvec/include/pycuvec.cuh
@@ -21,47 +21,25 @@ namespace cuvec {
 template <typename T> struct PyType {
   static const char *format() { return typeid(T).name(); }
 };
-template <> struct PyType<char> {
-  static const char *format() { return "c"; }
-};
-template <> struct PyType<signed char> {
-  static const char *format() { return "b"; }
-};
-template <> struct PyType<unsigned char> {
-  static const char *format() { return "B"; }
-};
+#define _PYCVEC_TPCHR(T, typestr)                                                                 \
+  template <> struct PyType<T> {                                                                  \
+    static const char *format() { return typestr; }                                               \
+  }
+_PYCVEC_TPCHR(char, "c");
+_PYCVEC_TPCHR(signed char, "b");
+_PYCVEC_TPCHR(unsigned char, "B");
 #ifdef _Bool
-template <> struct PyType<_Bool> {
-  static const char *format() { return "?"; }
-};
+_PYCVEC_TPCHR(_Bool, "?");
 #endif
-template <> struct PyType<short> {
-  static const char *format() { return "h"; }
-};
-template <> struct PyType<unsigned short> {
-  static const char *format() { return "H"; }
-};
-template <> struct PyType<int> {
-  static const char *format() { return "i"; }
-};
-template <> struct PyType<unsigned int> {
-  static const char *format() { return "I"; }
-};
-template <> struct PyType<long long> {
-  static const char *format() { return "q"; }
-};
-template <> struct PyType<unsigned long long> {
-  static const char *format() { return "Q"; }
-};
-template <> struct PyType<__half> {
-  static const char *format() { return "e"; }
-};
-template <> struct PyType<float> {
-  static const char *format() { return "f"; }
-};
-template <> struct PyType<double> {
-  static const char *format() { return "d"; }
-};
+_PYCVEC_TPCHR(short, "h");
+_PYCVEC_TPCHR(unsigned short, "H");
+_PYCVEC_TPCHR(int, "i");
+_PYCVEC_TPCHR(unsigned int, "I");
+_PYCVEC_TPCHR(long long, "q");
+_PYCVEC_TPCHR(unsigned long long, "Q");
+_PYCVEC_TPCHR(__half, "e");
+_PYCVEC_TPCHR(float, "f");
+_PYCVEC_TPCHR(double, "d");
 } // namespace cuvec
 
 /** classes */
@@ -71,8 +49,9 @@ template <class T> struct PyCuVec {
   std::vector<Py_ssize_t> shape;
   std::vector<Py_ssize_t> strides;
 };
+namespace cuvec {
 /// __init__
-template <class T> static int PyCuVec_init(PyCuVec<T> *self, PyObject *args, PyObject *kwargs) {
+template <class T> int PyCuVec_init(PyCuVec<T> *self, PyObject *args, PyObject *kwargs) {
   PyObject *shape;
   static const char *kwds[] = {"shape", NULL};
   if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i", (char **)kwds, &shape)) return -1;
@@ -97,7 +76,7 @@ template <class T> static int PyCuVec_init(PyCuVec<T> *self, PyObject *args, PyO
   return 0;
 }
 /// __del__
-template <class T> static void PyCuVec_dealloc(PyCuVec<T> *self) {
+template <class T> void PyCuVec_dealloc(PyCuVec<T> *self) {
   self->vec.clear();
   self->vec.shrink_to_fit();
   self->shape.clear();
@@ -107,13 +86,13 @@ template <class T> static void PyCuVec_dealloc(PyCuVec<T> *self) {
   Py_TYPE(self)->tp_free((PyObject *)self);
 }
 /// __name__
-template <class T> const std::string PyCuVec_t_str() {
-  std::stringstream s;
-  s << "Vector_" << cuvec::PyType<T>::format();
-  return s.str();
+template <class T> const char *PyCuVec_t_str() {
+  static char s[] = "PyCuVec_X";
+  s[8] = cuvec::PyType<T>::format()[0];
+  return s;
 }
 /// __str__
-template <class T> static PyObject *PyCuVec_str(PyCuVec<T> *self) {
+template <class T> PyObject *PyCuVec_str(PyCuVec<T> *self) {
   std::stringstream s;
   s << PyCuVec_t_str<T>() << "((";
   if (self->shape.size() > 0) s << self->shape[0];
@@ -124,7 +103,7 @@ template <class T> static PyObject *PyCuVec_str(PyCuVec<T> *self) {
   return ret;
 }
 /// buffer interface
-template <class T> static int PyCuVec_getbuffer(PyObject *obj, Py_buffer *view, int flags) {
+template <class T> int PyCuVec_getbuffer(PyObject *obj, Py_buffer *view, int flags) {
   if (view == NULL) {
     PyErr_SetString(PyExc_BufferError, "NULL view in getbuffer");
     view->obj = NULL;
@@ -147,59 +126,59 @@ template <class T> static int PyCuVec_getbuffer(PyObject *obj, Py_buffer *view,
   Py_INCREF(view->obj);
   return 0;
 }
-template <class T> static void PyCuVec_releasebuffer(PyObject *obj, Py_buffer *view) {
+template <class T> void PyCuVec_releasebuffer(PyObject *obj, Py_buffer *view) {
   if (view == NULL) {
     PyErr_SetString(PyExc_BufferError, "NULL view in release");
     return;
   }
   // Py_DECREF(obj) is automatic
 }
+} // namespace cuvec
 /// class
 template <class T> struct PyCuVec_tp {
-  const std::string name;
   PyBufferProcs as_buffer;
   PyTypeObject tp_obj;
   PyCuVec_tp()
-      : name(PyCuVec_t_str<T>()), as_buffer({
-                                      (getbufferproc)PyCuVec_getbuffer<T>,
-                                      (releasebufferproc)PyCuVec_releasebuffer<T>,
-                                  }),
+      : as_buffer({
+            (getbufferproc)cuvec::PyCuVec_getbuffer<T>,
+            (releasebufferproc)cuvec::PyCuVec_releasebuffer<T>,
+        }),
         tp_obj({
-            PyVarObject_HEAD_INIT(NULL, 0) name.c_str(), /* tp_name */
-            sizeof(PyCuVec<T>),                          /* tp_basicsize */
-            0,                                           /* tp_itemsize */
-            (destructor)PyCuVec_dealloc<T>,              /* tp_dealloc */
-            0,                                           /* tp_print */
-            0,                                           /* tp_getattr */
-            0,                                           /* tp_setattr */
-            0,                                           /* tp_reserved */
-            0,                                           /* tp_repr */
-            0,                                           /* tp_as_number */
-            0,                                           /* tp_as_sequence */
-            0,                                           /* tp_as_mapping */
-            0,                                           /* tp_hash  */
-            0,                                           /* tp_call */
-            (reprfunc)PyCuVec_str<T>,                    /* tp_str */
-            0,                                           /* tp_getattro */
-            0,                                           /* tp_setattro */
-            &as_buffer,                                  /* tp_as_buffer */
-            Py_TPFLAGS_DEFAULT,                          /* tp_flags */
-            "Arguments\n---------\nshape  : tuple",      /* tp_doc */
-            0,                                           /* tp_traverse */
-            0,                                           /* tp_clear */
-            0,                                           /* tp_richcompare */
-            0,                                           /* tp_weaklistoffset */
-            0,                                           /* tp_iter */
-            0,                                           /* tp_iternext */
-            0,                                           /* tp_methods */
-            0,                                           /* tp_members */
-            0,                                           /* tp_getset */
-            0,                                           /* tp_base */
-            0,                                           /* tp_dict */
-            0,                                           /* tp_descr_get */
-            0,                                           /* tp_descr_set */
-            0,                                           /* tp_dictoffset */
-            (initproc)PyCuVec_init<T>,                   /* tp_init */
+            PyVarObject_HEAD_INIT(NULL, 0) cuvec::PyCuVec_t_str<T>(), /* tp_name */
+            sizeof(PyCuVec<T>),                                       /* tp_basicsize */
+            0,                                                        /* tp_itemsize */
+            (destructor)cuvec::PyCuVec_dealloc<T>,                    /* tp_dealloc */
+            0,                                                        /* tp_print */
+            0,                                                        /* tp_getattr */
+            0,                                                        /* tp_setattr */
+            0,                                                        /* tp_reserved */
+            0,                                                        /* tp_repr */
+            0,                                                        /* tp_as_number */
+            0,                                                        /* tp_as_sequence */
+            0,                                                        /* tp_as_mapping */
+            0,                                                        /* tp_hash  */
+            0,                                                        /* tp_call */
+            (reprfunc)cuvec::PyCuVec_str<T>,                          /* tp_str */
+            0,                                                        /* tp_getattro */
+            0,                                                        /* tp_setattro */
+            &as_buffer,                                               /* tp_as_buffer */
+            Py_TPFLAGS_DEFAULT,                                       /* tp_flags */
+            "Arguments\n---------\nshape  : tuple",                   /* tp_doc */
+            0,                                                        /* tp_traverse */
+            0,                                                        /* tp_clear */
+            0,                                                        /* tp_richcompare */
+            0,                                                        /* tp_weaklistoffset */
+            0,                                                        /* tp_iter */
+            0,                                                        /* tp_iternext */
+            0,                                                        /* tp_methods */
+            0,                                                        /* tp_members */
+            0,                                                        /* tp_getset */
+            0,                                                        /* tp_base */
+            0,                                                        /* tp_dict */
+            0,                                                        /* tp_descr_get */
+            0,                                                        /* tp_descr_set */
+            0,                                                        /* tp_dictoffset */
+            (initproc)cuvec::PyCuVec_init<T>,                         /* tp_init */
         }) {
     tp_obj.tp_new = PyType_GenericNew;
     if (PyType_Ready(&tp_obj) < 0) fprintf(stderr, "error: count not finalise\n");
@@ -208,9 +187,9 @@ template <class T> struct PyCuVec_tp {
 
 /// Helper functions for creating `PyCuVec<T> *`s in C++ for casting to CPython API `PyObject *`s
 template <class T> PyCuVec<T> *PyCuVec_new() {
-  static PyCuVec_tp<T> Vector_T;
-  if (PyType_Ready(&Vector_T.tp_obj) < 0) return NULL;
-  return (PyCuVec<T> *)Vector_T.tp_obj.tp_alloc(&Vector_T.tp_obj, 1);
+  static PyCuVec_tp<T> PyCuVec_T;
+  if (PyType_Ready(&PyCuVec_T.tp_obj) < 0) return NULL;
+  return (PyCuVec<T> *)PyCuVec_T.tp_obj.tp_alloc(&PyCuVec_T.tp_obj, 1);
 }
 template <class T> PyCuVec<T> *PyCuVec_zeros(std::vector<Py_ssize_t> shape) {
   PyCuVec<T> *self = PyCuVec_new<T>();
diff --git a/cuvec/pycuvec.py b/cuvec/pycuvec.py
index fb92ae4..3d21efa 100644
--- a/cuvec/pycuvec.py
+++ b/cuvec/pycuvec.py
@@ -1,52 +1,138 @@
 """Thin wrappers around `cuvec` C++/CUDA module"""
 import array
+import logging
 from collections.abc import Sequence
+from textwrap import dedent
 
 import numpy as np
 
 from .cuvec import (
-    Vector_b,
-    Vector_B,
-    Vector_c,
-    Vector_d,
-    Vector_e,
-    Vector_f,
-    Vector_h,
-    Vector_H,
-    Vector_i,
-    Vector_I,
-    Vector_q,
-    Vector_Q,
+    PyCuVec_b,
+    PyCuVec_B,
+    PyCuVec_c,
+    PyCuVec_d,
+    PyCuVec_e,
+    PyCuVec_f,
+    PyCuVec_h,
+    PyCuVec_H,
+    PyCuVec_i,
+    PyCuVec_I,
+    PyCuVec_q,
+    PyCuVec_Q,
 )
 
+log = logging.getLogger(__name__)
 # u: non-standard np.dype('S2'); l/L: inconsistent between `array` and `numpy`
 typecodes = ''.join(i for i in array.typecodes if i not in "ulL") + "e"
 vec_types = {
-    np.dtype('int8'): Vector_b,
-    np.dtype('uint8'): Vector_B,
-    np.dtype('S1'): Vector_c,
-    np.dtype('int16'): Vector_h,
-    np.dtype('uint16'): Vector_H,
-    np.dtype('int32'): Vector_i,
-    np.dtype('uint32'): Vector_I,
-    np.dtype('int64'): Vector_q,
-    np.dtype('uint64'): Vector_Q,
-    np.dtype('float16'): Vector_e,
-    np.dtype('float32'): Vector_f,
-    np.dtype('float64'): Vector_d}
+    np.dtype('int8'): PyCuVec_b,
+    np.dtype('uint8'): PyCuVec_B,
+    np.dtype('S1'): PyCuVec_c,
+    np.dtype('int16'): PyCuVec_h,
+    np.dtype('uint16'): PyCuVec_H,
+    np.dtype('int32'): PyCuVec_i,
+    np.dtype('uint32'): PyCuVec_I,
+    np.dtype('int64'): PyCuVec_q,
+    np.dtype('uint64'): PyCuVec_Q,
+    np.dtype('float16'): PyCuVec_e,
+    np.dtype('float32'): PyCuVec_f,
+    np.dtype('float64'): PyCuVec_d}
 
 
 def cu_zeros(shape, dtype="float32"):
     """
-    Returns a new `<cuvec.Vector_*>` of the specified shape and data type.
+    Returns a new `<cuvec.PyCuVec_*>` of the specified shape and data type.
     """
     return vec_types[np.dtype(dtype)](shape if isinstance(shape, Sequence) else (shape,))
 
 
 def cu_copy(arr):
     """
-    Returns a new `<cuvec.Vector_*>` with data copied from the specified `arr`.
+    Returns a new `<cuvec.PyCuVec_*>` with data copied from the specified `arr`.
     """
     res = cu_zeros(arr.shape, arr.dtype)
-    np.asarray(res)[:] = arr[:]
+    np.asarray(res).flat = arr.flat
     return res
+
+
+_PyCuVec_types = tuple(vec_types.values())
+_PyCuVec_types_s = tuple(map(str, vec_types.values()))
+
+
+def is_raw_cuvec(cuvec):
+    """
+    Returns `True` when given the output of
+    CPython API functions returning `PyCuVec<T> *` PyObjects.
+
+    This is needed since conversely `isinstance(cuvec, CuVec)` may be `False`
+    due to external libraries
+    `#include "pycuvec.cuh"` making a distinct type object.
+    """
+    return isinstance(cuvec, _PyCuVec_types) or str(type(cuvec)) in _PyCuVec_types_s
+
+
+class CuVec(np.ndarray):
+    """
+    A `numpy.ndarray` compatible view with a `cuvec` member containing the
+    underlying `cuvec.PyCuVec_*` object (for use in CPython API function calls).
+    """
+    def __new__(cls, arr):
+        """arr: `cuvec.CuVec`, raw `cuvec.PyCuVec_*`, or `numpy.ndarray`"""
+        if is_raw_cuvec(arr):
+            log.debug("wrap raw %s", type(arr))
+            obj = np.asarray(arr).view(cls)
+            obj.cuvec = arr
+            return obj
+        if isinstance(arr, CuVec) and hasattr(arr, 'cuvec'):
+            log.debug("new view")
+            obj = np.asarray(arr).view(cls)
+            obj.cuvec = arr.cuvec
+            return obj
+        if isinstance(arr, np.ndarray):
+            log.debug("copy")
+            return copy(arr)
+        raise NotImplementedError(
+            dedent("""\
+            Not intended for explicit construction
+            (do not do `cuvec.CuVec((42, 1337))`;
+            instead use `cuvec.zeros((42, 137))`"""))
+
+    @property
+    def __cuda_array_interface__(self):
+        if not hasattr(self, 'cuvec'):
+            raise AttributeError(
+                dedent("""\
+                `numpy.ndarray` object has no attribute `cuvec`:
+                try using `cuvec.asarray()` first."""))
+        res = self.__array_interface__
+        return {
+            'shape': res['shape'], 'typestr': res['typestr'], 'data': res['data'], 'version': 3}
+
+
+def zeros(shape, dtype="float32"):
+    """
+    Returns a `cuvec.CuVec` view of a new `numpy.ndarray`
+    of the specified shape and data type (`cuvec` equivalent of `numpy.zeros`).
+    """
+    return CuVec(cu_zeros(shape, dtype))
+
+
+def copy(arr):
+    """
+    Returns a `cuvec.CuVec` view of a new `numpy.ndarray`
+    with data copied from the specified `arr`
+    (`cuvec` equivalent of `numpy.copy`).
+    """
+    return CuVec(cu_copy(arr))
+
+
+def asarray(arr, dtype=None, order=None):
+    """
+    Returns a `cuvec.CuVec` view of `arr`, avoiding memory copies if possible.
+    (`cuvec` equivalent of `numpy.asarray`).
+    """
+    if not isinstance(arr, np.ndarray) and is_raw_cuvec(arr):
+        res = CuVec(arr)
+        if dtype is None or res.dtype == np.dtype(dtype):
+            return CuVec(np.asanyarray(res, order=order))
+    return CuVec(np.asanyarray(arr, dtype=dtype, order=order))
diff --git a/cuvec/src/cuvec.cu b/cuvec/src/cuvec.cu
index 6f4ae48..7f2745e 100644
--- a/cuvec/src/cuvec.cu
+++ b/cuvec/src/cuvec.cu
@@ -20,22 +20,6 @@ static PyMethodDef cuvec_methods[] = {
     {NULL, NULL, 0, NULL} // Sentinel
 };
 
-/** classes */
-static PyCuVec_tp<char> Vector_c;
-static PyCuVec_tp<signed char> Vector_b;
-static PyCuVec_tp<unsigned char> Vector_B;
-// #ifdef _Bool
-// #endif
-static PyCuVec_tp<short> Vector_h;
-static PyCuVec_tp<unsigned short> Vector_H;
-static PyCuVec_tp<int> Vector_i;
-static PyCuVec_tp<unsigned int> Vector_I;
-static PyCuVec_tp<long long> Vector_q;          // _l
-static PyCuVec_tp<unsigned long long> Vector_Q; // _L
-static PyCuVec_tp<__half> Vector_e;
-static PyCuVec_tp<float> Vector_f;
-static PyCuVec_tp<double> Vector_d;
-
 /** module */
 static struct PyModuleDef cuvec_module = {
     PyModuleDef_HEAD_INIT,
@@ -50,60 +34,34 @@ PyMODINIT_FUNC PyInit_cuvec(void) {
   PyObject *m = PyModule_Create(&cuvec_module);
   if (m == NULL) return NULL;
 
-  if (PyType_Ready(&Vector_c.tp_obj) < 0) return NULL;
-  Py_INCREF(&Vector_c.tp_obj);
-  PyModule_AddObject(m, Vector_c.name.c_str(), (PyObject *)&Vector_c.tp_obj);
-
-  if (PyType_Ready(&Vector_b.tp_obj) < 0) return NULL;
-  Py_INCREF(&Vector_b.tp_obj);
-  PyModule_AddObject(m, Vector_b.name.c_str(), (PyObject *)&Vector_b.tp_obj);
-
-  if (PyType_Ready(&Vector_B.tp_obj) < 0) return NULL;
-  Py_INCREF(&Vector_B.tp_obj);
-  PyModule_AddObject(m, Vector_B.name.c_str(), (PyObject *)&Vector_B.tp_obj);
+#define _PYCUVEC_EXPOSE(T, typechar)                                                              \
+  static PyCuVec_tp<T> PyCuVec_##typechar;                                                        \
+  if (PyType_Ready(&PyCuVec_##typechar.tp_obj) < 0) return NULL;                                  \
+  Py_INCREF(&PyCuVec_##typechar.tp_obj);                                                          \
+  PyModule_AddObject(m, PyCuVec_##typechar.tp_obj.tp_name, (PyObject *)&PyCuVec_##typechar.tp_obj)
 
+  _PYCUVEC_EXPOSE(signed char, b);
+  _PYCUVEC_EXPOSE(unsigned char, B);
+  _PYCUVEC_EXPOSE(char, c);
   // #ifdef _Bool
   // #endif
-
-  if (PyType_Ready(&Vector_h.tp_obj) < 0) return NULL;
-  Py_INCREF(&Vector_h.tp_obj);
-  PyModule_AddObject(m, Vector_h.name.c_str(), (PyObject *)&Vector_h.tp_obj);
-
-  if (PyType_Ready(&Vector_H.tp_obj) < 0) return NULL;
-  Py_INCREF(&Vector_H.tp_obj);
-  PyModule_AddObject(m, Vector_H.name.c_str(), (PyObject *)&Vector_H.tp_obj);
-
-  if (PyType_Ready(&Vector_i.tp_obj) < 0) return NULL;
-  Py_INCREF(&Vector_i.tp_obj);
-  PyModule_AddObject(m, Vector_i.name.c_str(), (PyObject *)&Vector_i.tp_obj);
-
-  if (PyType_Ready(&Vector_I.tp_obj) < 0) return NULL;
-  Py_INCREF(&Vector_I.tp_obj);
-  PyModule_AddObject(m, Vector_I.name.c_str(), (PyObject *)&Vector_I.tp_obj);
-
-  if (PyType_Ready(&Vector_q.tp_obj) < 0) return NULL;
-  Py_INCREF(&Vector_q.tp_obj);
-  PyModule_AddObject(m, Vector_q.name.c_str(), (PyObject *)&Vector_q.tp_obj);
-  Py_INCREF(&Vector_q.tp_obj);
-  PyModule_AddObject(m, "Vector_l", (PyObject *)&Vector_q.tp_obj);
-
-  if (PyType_Ready(&Vector_Q.tp_obj) < 0) return NULL;
-  Py_INCREF(&Vector_Q.tp_obj);
-  PyModule_AddObject(m, Vector_Q.name.c_str(), (PyObject *)&Vector_Q.tp_obj);
-  Py_INCREF(&Vector_Q.tp_obj);
-  PyModule_AddObject(m, "Vector_L", (PyObject *)&Vector_Q.tp_obj);
-
-  if (PyType_Ready(&Vector_e.tp_obj) < 0) return NULL;
-  Py_INCREF(&Vector_e.tp_obj);
-  PyModule_AddObject(m, Vector_e.name.c_str(), (PyObject *)&Vector_e.tp_obj);
-
-  if (PyType_Ready(&Vector_f.tp_obj) < 0) return NULL;
-  Py_INCREF(&Vector_f.tp_obj);
-  PyModule_AddObject(m, Vector_f.name.c_str(), (PyObject *)&Vector_f.tp_obj);
-
-  if (PyType_Ready(&Vector_d.tp_obj) < 0) return NULL;
-  Py_INCREF(&Vector_d.tp_obj);
-  PyModule_AddObject(m, Vector_d.name.c_str(), (PyObject *)&Vector_d.tp_obj);
+  _PYCUVEC_EXPOSE(short, h);
+  _PYCUVEC_EXPOSE(unsigned short, H);
+  _PYCUVEC_EXPOSE(int, i);
+  _PYCUVEC_EXPOSE(unsigned int, I);
+  _PYCUVEC_EXPOSE(long long, q);
+  _PYCUVEC_EXPOSE(unsigned long long, Q);
+  _PYCUVEC_EXPOSE(__half, e);
+  _PYCUVEC_EXPOSE(float, f);
+  _PYCUVEC_EXPOSE(double, d);
+
+  /* aliases: inconsistent between `numpy.dtype` and `array.typecodes`
+  Py_INCREF(&PyCuVec_q.tp_obj);
+  PyModule_AddObject(m, "PyCuVec_l", (PyObject *)&PyCuVec_q.tp_obj);
+
+  Py_INCREF(&PyCuVec_Q.tp_obj);
+  PyModule_AddObject(m, "PyCuVec_L", (PyObject *)&PyCuVec_Q.tp_obj);
+  */
 
   PyObject *author = Py_BuildValue("s", "Casper da Costa-Luis (https://github.com/casperdcl)");
   if (author == NULL) return NULL;
@@ -113,7 +71,7 @@ PyMODINIT_FUNC PyInit_cuvec(void) {
   if (date == NULL) return NULL;
   PyModule_AddObject(m, "__date__", date);
 
-  PyObject *version = Py_BuildValue("s", "0.3.0");
+  PyObject *version = Py_BuildValue("s", "0.4.0");
   if (version == NULL) return NULL;
   PyModule_AddObject(m, "__version__", version);
 
diff --git a/cuvec/src/example_mod/CMakeLists.txt b/cuvec/src/example_mod/CMakeLists.txt
index 3322036..4670c32 100644
--- a/cuvec/src/example_mod/CMakeLists.txt
+++ b/cuvec/src/example_mod/CMakeLists.txt
@@ -8,7 +8,7 @@ add_library(${PROJECT_NAME} MODULE ${SRC})
 target_include_directories(${PROJECT_NAME} PUBLIC
   "$<BUILD_INTERFACE:${${CMAKE_PROJECT_NAME}_INCLUDE_DIRS}>"
   "$<INSTALL_INTERFACE:${CMAKE_PROJECT_NAME}/include>")
-target_link_libraries(${PROJECT_NAME} ${Python3_LIBRARIES} ${CUDA_LIBRARIES})
+target_link_libraries(${PROJECT_NAME} ${Python3_LIBRARIES} CUDA::cudart_static)
 
 if(SKBUILD)
   python_extension_module(${PROJECT_NAME})
diff --git a/cuvec/src/example_mod/example_mod.cu b/cuvec/src/example_mod/example_mod.cu
index 539836f..8d1cc3b 100644
--- a/cuvec/src/example_mod/example_mod.cu
+++ b/cuvec/src/example_mod/example_mod.cu
@@ -14,17 +14,34 @@ __global__ void _d_incr(float *dst, float *src, int X, int Y) {
   if (y >= Y) return;
   dst[y * X + x] = src[y * X + x] + 1;
 }
-static PyObject *increment_f(PyObject *self, PyObject *args) {
-  PyCuVec<float> *src;
-  if (!PyArg_ParseTuple(args, "O", (PyObject **)&src)) return NULL;
+static PyObject *increment2d_f(PyObject *self, PyObject *args, PyObject *kwargs) {
+  PyCuVec<float> *dst = NULL;
+  PyCuVec<float> *src = NULL;
+  static const char *kwds[] = {"src", "output", NULL};
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", (char **)kwds, (PyObject **)&src,
+                                   (PyObject **)&dst))
+    return NULL;
+  if (!src) return NULL;
   std::vector<Py_ssize_t> &N = src->shape;
+  if (N.size() != 2) {
+    PyErr_SetString(PyExc_IndexError, "`src` must be 2D");
+    return NULL;
+  }
 
   cudaEvent_t eStart, eAlloc, eKern;
   cudaEventCreate(&eStart);
   cudaEventCreate(&eAlloc);
   cudaEventCreate(&eKern);
   cudaEventRecord(eStart);
-  PyCuVec<float> *dst = PyCuVec_zeros_like(src);
+  if (dst) {
+    if (N != dst->shape) {
+      PyErr_SetString(PyExc_IndexError, "`output` must be same shape as `src`");
+      return NULL;
+    }
+  } else {
+    dst = PyCuVec_zeros_like(src);
+    if (!dst) return NULL;
+  }
   cudaEventRecord(eAlloc);
   dim3 thrds((N[1] + 31) / 32, (N[0] + 31) / 32);
   dim3 blcks(32, 32);
@@ -39,7 +56,8 @@ static PyObject *increment_f(PyObject *self, PyObject *args) {
   return Py_BuildValue("ddO", double(alloc_ms), double(kernel_ms), (PyObject *)dst);
 }
 static PyMethodDef example_methods[] = {
-    {"increment_f", increment_f, METH_VARARGS, "Returns (alloc_ms, kernel_ms, input + 1)."},
+    {"increment2d_f", (PyCFunction)increment2d_f, METH_VARARGS | METH_KEYWORDS,
+     "Args: src, output (optional). Returns: alloc_ms, kernel_ms, src + 1."},
     {NULL, NULL, 0, NULL} // Sentinel
 };
 
diff --git a/cuvec/src/example_swig/CMakeLists.txt b/cuvec/src/example_swig/CMakeLists.txt
new file mode 100644
index 0000000..2944321
--- /dev/null
+++ b/cuvec/src/example_swig/CMakeLists.txt
@@ -0,0 +1,32 @@
+project(example_swig)
+file(GLOB SRC LIST_DIRECTORIES false "*.cu")
+file(GLOB ISRC LIST_DIRECTORIES false "*.i")
+
+#include_directories(${Python3_INCLUDE_DIRS})
+#include_directories(${Python3_NumPy_INCLUDE_DIRS})
+
+if(SWIG_FOUND)
+  include_directories(${CUDAToolkit_INCLUDE_DIRS})
+  set_source_files_properties(${PROJECT_NAME}.i PROPERTIES CPLUSPLUS ON)
+  set_source_files_properties(${PROJECT_NAME}.i PROPERTIES USE_TARGET_INCLUDE_DIRECTORIES ON)
+  swig_add_library(${PROJECT_NAME} LANGUAGE python SOURCES ${ISRC} ${SRC})
+  swig_link_libraries(${PROJECT_NAME} CUDA::cudart_static)
+else()
+  add_library(${PROJECT_NAME} MODULE ${SRC})
+  target_link_libraries(${PROJECT_NAME} CUDA::cudart_static)
+endif()
+target_include_directories(${PROJECT_NAME} PUBLIC
+  "$<BUILD_INTERFACE:${${CMAKE_PROJECT_NAME}_INCLUDE_DIRS}>"
+  "$<INSTALL_INTERFACE:${CMAKE_PROJECT_NAME}/include>")
+
+if(SKBUILD)
+  python_extension_module(${PROJECT_NAME})
+endif()
+set_target_properties(${PROJECT_NAME} PROPERTIES
+  CXX_STANDARD 11
+  VERSION ${CMAKE_PROJECT_VERSION} SOVERSION ${CMAKE_PROJECT_VERSION_MAJOR}
+  INTERFACE_${PROJECT_NAME}_MAJOR_VERSION ${CMAKE_PROJECT_VERSION_MAJOR})
+set_property(TARGET ${PROJECT_NAME} APPEND PROPERTY COMPATIBLE_INTERFACE_STRING ${PROJECT_NAME}_MAJOR_VERSION)
+install(TARGETS ${PROJECT_NAME}
+  INCLUDES DESTINATION ${CMAKE_PROJECT_NAME}/include
+  LIBRARY DESTINATION ${CMAKE_PROJECT_NAME})
diff --git a/cuvec/src/example_swig/example_swig.cu b/cuvec/src/example_swig/example_swig.cu
new file mode 100644
index 0000000..ebb37eb
--- /dev/null
+++ b/cuvec/src/example_swig/example_swig.cu
@@ -0,0 +1,44 @@
+/**
+ * Example external SWIG extension module using CuVec.
+ *
+ * Copyright (2021) Casper da Costa-Luis
+ */
+#include "cuvec.cuh" // CuVec
+#include <stdexcept> // std::length_error
+/** functions */
+/// dst = src + 1
+__global__ void _d_incr(float *dst, float *src, int N) {
+  int i = threadIdx.x + blockDim.x * blockIdx.x;
+  if (i >= N) return;
+  dst[i] = src[i] + 1;
+}
+CuVec<float> *increment_f(CuVec<float> &src, CuVec<float> *dst, bool timing) {
+  cudaEvent_t eStart, eAlloc, eKern;
+  cudaEventCreate(&eStart);
+  cudaEventCreate(&eAlloc);
+  cudaEventCreate(&eKern);
+  cudaEventRecord(eStart);
+  if (!dst) {
+    dst = new CuVec<float>;
+    dst->resize(src.size());
+  }
+  if (src.size() != dst->size()) throw std::length_error("`output` must be same shape as `src`");
+  cudaEventRecord(eAlloc);
+  dim3 thrds((src.size() + 1023) / 1024, 1, 1);
+  dim3 blcks(1024, 1, 1);
+  _d_incr<<<thrds, blcks>>>(dst->data(), src.data(), src.size());
+  cuvec::HandleError(cudaGetLastError(), __FILE__, __LINE__);
+  // cudaDeviceSynchronize();
+  cudaEventRecord(eKern);
+  cudaEventSynchronize(eKern);
+  float alloc_ms, kernel_ms;
+  cudaEventElapsedTime(&alloc_ms, eStart, eAlloc);
+  cudaEventElapsedTime(&kernel_ms, eAlloc, eKern);
+  // fprintf(stderr, "%.3f ms, %.3f ms\n", alloc_ms, kernel_ms);
+  if (timing) {
+    // hack: store times in last two elements of dst
+    (*dst)[src.size() - 2] = alloc_ms;
+    (*dst)[src.size() - 1] = kernel_ms;
+  }
+  return dst;
+}
diff --git a/cuvec/src/example_swig/example_swig.i b/cuvec/src/example_swig/example_swig.i
new file mode 100644
index 0000000..452fdd3
--- /dev/null
+++ b/cuvec/src/example_swig/example_swig.i
@@ -0,0 +1,18 @@
+%module example_swig
+
+%include "exception.i"
+%exception {
+  try {
+    $action
+  } catch (const std::exception &e) {
+    SWIG_exception(SWIG_RuntimeError, e.what());
+  }
+}
+
+%include "cuvec.i" // %{ CuVec<T> %}, CuVec(T)
+%{
+/// signatures from "example_swig.cu"
+CuVec<float> *increment_f(CuVec<float> &src, CuVec<float> *output = NULL, bool timing = false);
+%}
+/// expose definitions
+CuVec(float) *increment_f(CuVec(float) &src, CuVec(float) *output = NULL, bool timing = false);
diff --git a/cuvec/swigcuvec.py b/cuvec/swigcuvec.py
new file mode 100644
index 0000000..bf7ec95
--- /dev/null
+++ b/cuvec/swigcuvec.py
@@ -0,0 +1,180 @@
+"""
+Thin wrappers around `example_swig` C++/CUDA module
+
+A SWIG-driven equivalent of the CPython Extension API-driven `pycuvec.py`
+"""
+import array
+import logging
+import re
+from functools import partial
+from textwrap import dedent
+
+import numpy as np
+
+from . import example_swig as sw
+
+log = logging.getLogger(__name__)
+# u: non-standard np.dype('S2'); l/L: inconsistent between `array` and `numpy`
+typecodes = ''.join(i for i in array.typecodes if i not in "ulL") + "e"
+RE_SWIG_TYPE = ("<Swig Object of type"
+                r" 'std::vector<\s*(\w+)\s*,\s*CuAlloc<\s*\1\s*>\s*>\s*\*' at 0x\w+>")
+SWIG_TYPES = {
+    "signed char": 'b',
+    "unsigned char": 'B',
+    "char": 'c',
+    "short": 'h',
+    "unsigned short": 'H',
+    "int": 'i',
+    "unsigned int": 'I',
+    "long long": 'q',
+    "unsigned long long": 'Q',
+    "__half": 'e',
+    "float": 'f',
+    "double": 'd'} # yapf: disable
+
+
+class SWIGVector:
+    def __init__(self, typechar, size, cuvec=None):
+        """
+        Thin wrapper around `SwigPyObject<CuVec<Type>>`. Always takes ownership.
+        Args:
+          typechar(char)
+          size(tuple(int))
+          cuvec(SwigPyObject<CuVec<Type>>): if given,
+            `typechar` and `size` are ignored
+        """
+        if cuvec is not None:
+            assert is_raw_cuvec(cuvec)
+            self.typechar = SWIG_TYPES[re.match(RE_SWIG_TYPE, str(cuvec)).group(1)]
+            self.cuvec = cuvec
+            return
+        self.typechar = typechar
+        self.cuvec = getattr(sw, f'new_CuVec_{typechar}')(size)
+
+    def __del__(self):
+        getattr(sw, f'delete_CuVec_{self.typechar}')(self.cuvec)
+
+    def __len__(self):
+        return getattr(sw, f'CuVec_{self.typechar}___len__')(self.cuvec)
+
+    def data(self):
+        return getattr(sw, f'CuVec_{self.typechar}_data')(self.cuvec)
+
+    @property
+    def __array_interface__(self):
+        return {
+            'shape': (len(self),), 'typestr': np.dtype(self.typechar).str,
+            'data': (self.data(), False), 'version': 3}
+
+    @property
+    def __cuda_array_interface__(self):
+        return self.__array_interface__
+
+    def __repr__(self):
+        return f"{type(self).__name__}('{self.typechar}', {len(self)})"
+
+    def __str__(self):
+        return f"{np.dtype(self.typechar)}[{len(self)}] at 0x{self.data():x}"
+
+
+vec_types = {np.dtype(c): partial(SWIGVector, c) for c in typecodes}
+
+
+def cu_zeros(size, dtype="float32"):
+    """
+    Returns a new `SWIGVector` of the specified size and data type.
+    """
+    return vec_types[np.dtype(dtype)](size)
+
+
+def cu_copy(arr):
+    """
+    Returns a new `SWIGVector` with data copied from the specified `arr`.
+    """
+    res = cu_zeros(arr.size, arr.dtype)
+    np.asarray(res).flat = arr.flat
+    return res
+
+
+def is_raw_cuvec(arr):
+    return type(arr).__name__ == "SwigPyObject" and re.match(RE_SWIG_TYPE, str(arr))
+
+
+def is_raw_swvec(arr):
+    return isinstance(arr, SWIGVector) or type(arr).__name__ == "SWIGVector"
+
+
+class CuVec(np.ndarray):
+    """
+    A `numpy.ndarray` compatible view with a `cuvec` member containing the
+    underlying `SWIGVector` object (for use in CPython API function calls).
+    """
+    def __new__(cls, arr):
+        """arr: `cuvec.CuVec`, raw `SWIGVector`, or `numpy.ndarray`"""
+        if is_raw_swvec(arr):
+            log.debug("wrap swraw %s", type(arr))
+            obj = np.asarray(arr).view(cls)
+            obj.swvec = arr
+            obj.cuvec = arr.cuvec
+            return obj
+        if isinstance(arr, CuVec) and hasattr(arr, 'swvec'):
+            log.debug("new view")
+            obj = np.asarray(arr).view(cls)
+            obj.swvec = arr.swvec
+            obj.cuvec = arr.swvec.cuvec
+            return obj
+        if isinstance(arr, np.ndarray):
+            log.debug("copy")
+            return copy(arr)
+        raise NotImplementedError(
+            dedent("""\
+            Not intended for explicit construction
+            (do not do `cuvec.CuVec((42, 1337))`;
+            instead use `cuvec.zeros((42, 137))`"""))
+
+    @property
+    def __cuda_array_interface__(self):
+        if not hasattr(self, 'cuvec'):
+            raise AttributeError(
+                dedent("""\
+                `numpy.ndarray` object has no attribute `cuvec`:
+                try using `cuvec.asarray()` first."""))
+        res = self.__array_interface__
+        return {
+            'shape': res['shape'], 'typestr': res['typestr'], 'data': res['data'], 'version': 3}
+
+
+def zeros(shape, dtype="float32"):
+    """
+    Returns a `cuvec.CuVec` view of a new `numpy.ndarray`
+    of the specified shape and data type (`cuvec` equivalent of `numpy.zeros`).
+    """
+    res = CuVec(cu_zeros(int(np.prod(shape)), dtype))
+    res.resize(shape)
+    return res
+
+
+def copy(arr):
+    """
+    Returns a `cuvec.CuVec` view of a new `numpy.ndarray`
+    with data copied from the specified `arr`
+    (`cuvec` equivalent of `numpy.copy`).
+    """
+    res = CuVec(cu_copy(arr))
+    res.resize(arr.shape)
+    return res
+
+
+def asarray(arr, dtype=None, order=None):
+    """
+    Returns a `cuvec.CuVec` view of `arr`, avoiding memory copies if possible.
+    (`cuvec` equivalent of `numpy.asarray`).
+    """
+    if is_raw_cuvec(arr):
+        log.debug("taking ownership")
+        arr = SWIGVector(None, None, arr)
+    if not isinstance(arr, np.ndarray) and is_raw_swvec(arr):
+        res = CuVec(arr)
+        if dtype is None or res.dtype == np.dtype(dtype):
+            return CuVec(np.asanyarray(res, order=order))
+    return CuVec(np.asanyarray(arr, dtype=dtype, order=order))
diff --git a/tests/test_perf.py b/tests/test_perf.py
index 652ad20..4b95706 100644
--- a/tests/test_perf.py
+++ b/tests/test_perf.py
@@ -2,8 +2,21 @@
 from time import time
 
 import numpy as np
+from pytest import mark, skip
 
-import cuvec as cu
+from cuvec import pycuvec as cu
+
+# `example_mod` is defined in ../cuvec/src/example_mod/
+from cuvec.example_mod import increment2d_f as cuinc
+
+try:
+    # alternative to `cu`
+    from cuvec import swigcuvec as sw
+
+    # `example_swig` is defined in ../cuvec/src/example_swig/
+    from cuvec.example_swig import increment_f as swinc
+except ImportError:
+    sw, swinc = None, None
 
 
 def _time_overhead():
@@ -24,10 +37,28 @@ def inner(*args, **kwargs):
     return inner
 
 
-def test_perf(shape=(1337, 42), quiet=False):
-    # `example_mod` is defined in ../cuvec/src/example_mod/
-    from cuvec.example_mod import increment_f
+def retry_on_except(n=3):
+    """decroator for retrying `n` times before raising Exceptions"""
+    def wrapper(func):
+        @wraps(func)
+        def test_inner(*args, **kwargs):
+            for i in range(1, n + 1):
+                try:
+                    return func(*args, **kwargs)
+                except Exception:
+                    if i >= n:
+                        raise
+
+        return test_inner
+
+    return wrapper
+
 
+@mark.parametrize("shape,cu,increment", [((1337, 42), cu, cuinc), (1337 * 42, sw, swinc)])
+@retry_on_except()
+def test_perf(shape, cu, increment, quiet=False):
+    if cu is None:
+        skip("SWIG not available")
     overhead = np.mean([_time_overhead() for _ in range(100)])
     t = {}
     t['create src'], src = timer(cu.zeros)(shape, "float32")
@@ -38,13 +69,23 @@ def test_perf(shape=(1337, 42), quiet=False):
     t['assign'] = (time() - tic - overhead) * 1000
 
     if not quiet:
-        t['warmup'], (t['> create dst'], t['> kernel'], _) = timer(increment_f)(src.cuvec)
-    t['call ext'], (t['- create dst'], t['- kernel'], res) = timer(increment_f)(src.cuvec)
-    t['view'], dst = timer(cu.asarray)(res)
+        if cu is sw:
+            t['warmup'], res = timer(increment)(src.cuvec, None, True)
+            t['> create dst'], t['> kernel'] = cu.asarray(res)[-2:]
+        else:
+            t['warmup'], (t['> create dst'], t['> kernel'], _) = timer(increment)(src.cuvec)
+    if cu is sw:
+        t['call ext'], res = timer(increment)(src.cuvec, None, True)
+        t['- create dst'], t['- kernel'] = None, None
+        t['view'], dst = timer(cu.asarray)(res)
+        t['- create dst'], t['- kernel'] = dst[-2:]
+    else:
+        t['call ext'], (t['- create dst'], t['- kernel'], res) = timer(increment)(src.cuvec)
+        t['view'], dst = timer(cu.asarray)(res)
 
     if not quiet:
         print("\n".join(f"{k.ljust(14)} | {v:.3f}" for k, v in t.items()))
-    assert (src + 1 == dst).all()
+    assert (src + 1 == dst)[:-2 if cu is sw else None].all()
     # even a fast kernel takes longer than API overhead
     assert t['- kernel'] / (t['call ext'] - t['- create dst']) > 0.5
     # API call should be <0.1 ms... but set a higher threshold of 2 ms
@@ -59,14 +100,16 @@ def test_perf(shape=(1337, 42), quiet=False):
         trange = range
     nruns = 1000
 
-    print("# One run:")
-    test_perf((1000, 1000))
-
-    print("Repeating & averaging performance test metrics over {nruns} runs.")
-    runs = [test_perf((1000, 1000), True) for _ in trange(nruns)]
-    pretty = {
-        'create src': 'Create input', 'assign': 'Assign', 'call ext': 'Call extension',
-        '- create dst': '-- Create output', '- kernel': '-- Launch kernel', 'view': 'View'}
-    runs = {pretty[k]: [i[k] for i in runs] for k in runs[0]}
-    print("\n".join(f"{k.ljust(16)} | {np.mean(v):.3f} ± {np.std(v, ddof=1)/np.sqrt(len(v)):.3f}"
-                    for k, v in runs.items()))
+    for args in [((1000, 1000), cu, cuinc), (1000 * 1000, sw, swinc)]:
+        print(f"# One run ({args[1].__name__}):")
+        test_perf(*args)
+
+        print(f"# Average over {nruns} runs:")
+        runs = [test_perf(*args, quiet=True) for _ in trange(nruns)]
+        pretty = {
+            'create src': 'Create input', 'assign': 'Assign', 'call ext': 'Call extension',
+            '- create dst': '-- Create output', '- kernel': '-- Launch kernel', 'view': 'View'}
+        runs = {pretty[k]: [i[k] for i in runs] for k in runs[0]}
+        print("\n".join(
+            f"{k.ljust(16)} | {np.mean(v):.3f} ± {np.std(v, ddof=1)/np.sqrt(len(v)):.3f}"
+            for k, v in runs.items()))
diff --git a/tests/test_pycuvec.py b/tests/test_pycuvec.py
index 349dbf2..518e985 100644
--- a/tests/test_pycuvec.py
+++ b/tests/test_pycuvec.py
@@ -1,14 +1,17 @@
+import logging
+
 import numpy as np
-from pytest import mark
+from pytest import importorskip, mark, raises
 
 import cuvec as cu
 
+shape = 127, 344, 344
+
 
 @mark.parametrize("tp", list(cu.typecodes))
-def test_Vector_asarray(tp):
-    """tp(char): any of bBhHiIqQfd"""
-    v = getattr(cu.cuvec, f"Vector_{tp}")((1, 2, 3))
-    assert str(v) == f"Vector_{tp}((1, 2, 3))"
+def test_PyCuVec_asarray(tp):
+    v = getattr(cu.cuvec, f"PyCuVec_{tp}")((1, 2, 3))
+    assert str(v) == f"PyCuVec_{tp}((1, 2, 3))"
     a = np.asarray(v)
     assert not a.any()
     a[0, 0] = 42
@@ -19,9 +22,104 @@ def test_Vector_asarray(tp):
     del a, b, v
 
 
-def test_Vector_strides():
-    shape = 127, 344, 344
-    v = cu.cuvec.Vector_f(shape)
+def test_PyCuVec_strides():
+    v = cu.cuvec.PyCuVec_f(shape)
     a = np.asarray(v)
     assert a.shape == shape
     assert a.strides == (473344, 1376, 4)
+
+
+@mark.parametrize("spec,result", [("i", np.int32), ("d", np.float64)])
+def test_zeros(spec, result):
+    a = np.asarray(cu.zeros(shape, spec))
+    assert a.dtype == result
+    assert a.shape == shape
+    assert not a.any()
+
+
+def test_copy():
+    a = np.random.random(shape)
+    b = np.asarray(cu.copy(a))
+    assert a.shape == b.shape
+    assert a.dtype == b.dtype
+    assert (a == b).all()
+
+
+def test_CuVec_creation(caplog):
+    with raises(TypeError):
+        cu.CuVec()
+
+    with raises(NotImplementedError):
+        cu.CuVec(shape)
+
+    caplog.set_level(logging.DEBUG)
+    caplog.clear()
+    v = cu.CuVec(np.ones(shape, dtype='h'))
+    assert [i[1:] for i in caplog.record_tuples] == [(10, 'copy'),
+                                                     (10, "wrap raw <class 'PyCuVec_h'>")]
+    assert v.shape == shape
+    assert v.dtype.char == 'h'
+    assert (v == 1).all()
+
+    caplog.clear()
+    v = cu.zeros(shape, 'd')
+    assert [i[1:] for i in caplog.record_tuples] == [(10, "wrap raw <class 'PyCuVec_d'>")]
+
+    caplog.clear()
+    v[0, 0, 0] = 1
+    assert not caplog.record_tuples
+    w = cu.CuVec(v)
+    assert [i[1:] for i in caplog.record_tuples] == [(10, "new view")]
+
+    caplog.clear()
+    assert w[0, 0, 0] == 1
+    v[0, 0, 0] = 9
+    assert w[0, 0, 0] == 9
+    assert v.cuvec is w.cuvec
+    assert v.data == w.data
+    assert not caplog.record_tuples
+
+
+def test_asarray():
+    v = cu.asarray(np.random.random(shape))
+    w = cu.CuVec(v)
+    assert w.cuvec == v.cuvec
+    assert (w == v).all()
+    assert np.asarray(w.cuvec).data == np.asarray(v.cuvec).data
+    x = cu.asarray(w.cuvec)
+    assert x.cuvec == v.cuvec
+    assert (x == v).all()
+    assert np.asarray(x.cuvec).data == np.asarray(v.cuvec).data
+    y = cu.asarray(x.tolist())
+    assert y.cuvec != v.cuvec
+    assert (y == v).all()
+    assert np.asarray(y.cuvec).data == np.asarray(v.cuvec).data
+    z = cu.asarray(v[:])
+    assert z.cuvec != v.cuvec
+    assert (z == v[:]).all()
+    assert np.asarray(z.cuvec).data == np.asarray(v.cuvec).data
+    s = cu.asarray(v[1:])
+    assert s.cuvec != v.cuvec
+    assert (s == v[1:]).all()
+    assert np.asarray(s.cuvec).data != np.asarray(v.cuvec).data
+
+
+def test_cuda_array_interface():
+    cupy = importorskip("cupy")
+    v = cu.asarray(np.random.random(shape))
+    assert hasattr(v, '__cuda_array_interface__')
+
+    c = cupy.asarray(v)
+    assert (c == v).all()
+    c[0, 0, 0] = 1
+    cu.dev_sync()
+    assert c[0, 0, 0] == v[0, 0, 0]
+    c[0, 0, 0] = 0
+    cu.dev_sync()
+    assert c[0, 0, 0] == v[0, 0, 0]
+
+    ndarr = v + 1
+    assert ndarr.shape == v.shape
+    assert ndarr.dtype == v.dtype
+    with raises(AttributeError):
+        ndarr.__cuda_array_interface__
diff --git a/tests/test_helpers.py b/tests/test_swigcuvec.py
similarity index 53%
rename from tests/test_helpers.py
rename to tests/test_swigcuvec.py
index 88f6422..69bd116 100644
--- a/tests/test_helpers.py
+++ b/tests/test_swigcuvec.py
@@ -3,11 +3,32 @@
 import numpy as np
 from pytest import importorskip, mark, raises
 
-import cuvec as cu
+from cuvec import dev_sync
 
+cu = importorskip("cuvec.swigcuvec")
 shape = 127, 344, 344
 
 
+@mark.parametrize("tp", list(cu.typecodes))
+def test_SWIGVector_asarray(tp):
+    v = cu.SWIGVector(tp, 1337)
+    assert repr(v) == f"SWIGVector('{tp}', 1337)"
+    a = np.asarray(v)
+    assert not a.any()
+    a[:7] = 42
+    b = np.asarray(v)
+    assert (b[:7] == 42).all()
+    assert not b[7:].any()
+    assert a.dtype == np.dtype(tp)
+    del a, b, v
+
+
+def test_strides():
+    a = cu.zeros(shape)
+    assert a.shape == shape
+    assert a.strides == (473344, 1376, 4)
+
+
 @mark.parametrize("spec,result", [("i", np.int32), ("d", np.float64)])
 def test_zeros(spec, result):
     a = np.asarray(cu.zeros(shape, spec))
@@ -34,15 +55,16 @@ def test_CuVec_creation(caplog):
     caplog.set_level(logging.DEBUG)
     caplog.clear()
     v = cu.CuVec(np.ones(shape, dtype='h'))
-    assert [i[1:] for i in caplog.record_tuples] == [(10, 'copy'),
-                                                     (10, "wrap raw <class 'Vector_h'>")]
+    assert [i[1:] for i in caplog.record_tuples] == [
+        (10, 'copy'), (10, "wrap swraw <class 'cuvec.swigcuvec.SWIGVector'>")]
     assert v.shape == shape
     assert v.dtype.char == 'h'
     assert (v == 1).all()
 
     caplog.clear()
     v = cu.zeros(shape, 'd')
-    assert [i[1:] for i in caplog.record_tuples] == [(10, "wrap raw <class 'Vector_d'>")]
+    assert [i[1:] for i in caplog.record_tuples] == [
+        (10, "wrap swraw <class 'cuvec.swigcuvec.SWIGVector'>")]
 
     caplog.clear()
     v[0, 0, 0] = 1
@@ -64,23 +86,29 @@ def test_asarray():
     w = cu.CuVec(v)
     assert w.cuvec == v.cuvec
     assert (w == v).all()
-    assert np.asarray(w.cuvec).data == np.asarray(v.cuvec).data
-    x = cu.asarray(w.cuvec)
+    assert str(w.swvec) == str(v.swvec)
+    assert np.asarray(w.swvec).data == np.asarray(v.swvec).data
+    x = cu.asarray(w.swvec)
+    x.resize(w.shape)
     assert x.cuvec == v.cuvec
     assert (x == v).all()
-    assert np.asarray(x.cuvec).data == np.asarray(v.cuvec).data
+    assert str(x.swvec) == str(v.swvec)
+    assert np.asarray(x.swvec).data == np.asarray(v.swvec).data
     y = cu.asarray(x.tolist())
     assert y.cuvec != v.cuvec
     assert (y == v).all()
-    assert np.asarray(y.cuvec).data == np.asarray(v.cuvec).data
+    assert str(y.swvec) != str(v.swvec)
+    assert np.asarray(y.swvec).data == np.asarray(v.swvec).data
     z = cu.asarray(v[:])
     assert z.cuvec != v.cuvec
     assert (z == v[:]).all()
-    assert np.asarray(z.cuvec).data == np.asarray(v.cuvec).data
+    assert str(z.swvec) != str(v.swvec)
+    assert np.asarray(z.swvec).data == np.asarray(v.swvec).data
     s = cu.asarray(v[1:])
     assert s.cuvec != v.cuvec
     assert (s == v[1:]).all()
-    assert np.asarray(s.cuvec).data != np.asarray(v.cuvec).data
+    assert str(s.swvec) != str(v.swvec)
+    assert np.asarray(s.swvec).data != np.asarray(v.swvec).data
 
 
 def test_cuda_array_interface():
@@ -91,12 +119,37 @@ def test_cuda_array_interface():
     c = cupy.asarray(v)
     assert (c == v).all()
     c[0, 0, 0] = 1
+    dev_sync()
     assert c[0, 0, 0] == v[0, 0, 0]
     c[0, 0, 0] = 0
+    dev_sync()
     assert c[0, 0, 0] == v[0, 0, 0]
 
+    d = cupy.asarray(v.swvec)
+    d[0] = 1
+    dev_sync()
+    assert d[0] == v[0, 0, 0]
+    d[0] = 0
+    dev_sync()
+    assert d[0] == v[0, 0, 0]
+
     ndarr = v + 1
     assert ndarr.shape == v.shape
     assert ndarr.dtype == v.dtype
     with raises(AttributeError):
         ndarr.__cuda_array_interface__
+
+
+def test_increment():
+    # `example_swig` is defined in ../cuvec/src/example_swig/
+    from cuvec.example_swig import increment_f
+    a = cu.zeros(shape, 'f')
+    assert (a == 0).all()
+    increment_f(a.cuvec, a.cuvec)
+    assert (a == 1).all()
+
+    a[:] = 0
+    assert (a == 0).all()
+
+    res = cu.asarray(increment_f(a.cuvec))
+    assert (res == 1).all()