diff --git a/.drone.yml b/.drone.yml
index 6ac7118..be0eae9 100644
--- a/.drone.yml
+++ b/.drone.yml
@@ -11,30 +11,30 @@ steps:
   image: julia:1.5
   commands:
   - "julia --project=. --check-bounds=yes --color=yes -e 'using InteractiveUtils; versioninfo(verbose=true); using Pkg; Pkg.build(); Pkg.test(coverage=true)'"
+
 ---
 kind: pipeline
-name: linux - arm64 - Julia 1.0
-
+name: linux - arm - Julia 1.6
 platform:
   os: linux
-  arch: arm64
+  arch: arm
 
 steps:
 - name: build
-  image: julia:1.0
+  image: julia:1.6
   commands:
   - "julia --project=. --check-bounds=yes --color=yes -e 'using InteractiveUtils; versioninfo(verbose=true); using Pkg; Pkg.build(); Pkg.test(coverage=true)'"
 
 ---
 kind: pipeline
-name: linux - arm - Julia 1.0
+name: linux - arm64 - Julia 1.6
 
 platform:
   os: linux
-  arch: arm
+  arch: arm64
 
 steps:
 - name: build
-  image: julia:1.0
+  image: julia:1.6
   commands:
   - "julia --project=. --check-bounds=yes --color=yes -e 'using InteractiveUtils; versioninfo(verbose=true); using Pkg; Pkg.build(); Pkg.test(coverage=true)'"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..65063c3
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,42 @@
+name: CI
+on:
+  - push
+  - pull_request
+jobs:
+  test:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - '1.5'
+          - 'nightly'
+        os:
+          - ubuntu-latest
+          - macOS-latest
+          - windows-latest
+        arch:
+          - x64
+          - x86
+        exclude:
+          - os: macOS-latest
+            arch: x86
+    steps:
+      - uses: actions/checkout@v2
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+      - uses: actions/cache@v1
+        env:
+          cache-name: cache-artifacts
+        with:
+          path: ~/.julia/artifacts
+          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
+          restore-keys: |
+            ${{ runner.os }}-test-${{ env.cache-name }}-
+            ${{ runner.os }}-test-
+            ${{ runner.os }}-
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 423d9fa..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-## Documentation: http://docs.travis-ci.com/user/languages/julia/
-language: julia
-os:
-  - linux
-  - osx
-  - windows
-julia:
-  - 1.0
-  - 1
-  - nightly
-notifications:
-  email: false
-git:
-  depth: 99999999
-
-## uncomment the following lines to allow failures on nightly julia
-## (tests will run but not make your overall status red)
-matrix:
-  allow_failures:
-   - julia: nightly
-
-## uncomment and modify the following lines to manually install system packages
-#addons:
-#  apt: # apt-get for linux
-#    packages:
-#    - gfortran
-#before_script: # homebrew for mac
-#  - if [ $TRAVIS_OS_NAME = osx ]; then brew install gcc; fi
-
-## uncomment the following lines to override the default test script
-
-after_success:
-  # push coverage results to Codecov
-  - julia -e 'using Pkg; cd(Pkg.dir("StrBase")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())'
diff --git a/Project.toml b/Project.toml
index d17deff..b209a69 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,7 +4,7 @@ authors  = ["ScottPJones <scottjones@alum.mit.edu>"]
 keywords = ["Strings"]
 license  = "MIT"
 uuid     = "e79e7a6a-7bb1-5a4d-9d64-da657b06f53a"
-version = "1.0.4"
+version = "1.1.0"
 
 [deps]
 Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
@@ -24,9 +24,9 @@ Random  = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 test = ["Test", "Random"]
 
 [compat]
-julia = "^1.0.0"
-ModuleInterfaceTools = "^1.0.0"
+julia = "1"
+ModuleInterfaceTools = "1"
 MurmurHash3 = "^1.0.3"
-StrAPI = "^1.0.0"
+StrAPI = "1.1"
 ChrBase = "^1.0.1"
-CharSetEncodings = "^1.0.0"
+CharSetEncodings = "1"
diff --git a/src/StrBase.jl b/src/StrBase.jl
index 0ce1c45..84c3343 100644
--- a/src/StrBase.jl
+++ b/src/StrBase.jl
@@ -23,14 +23,15 @@ using ModuleInterfaceTools
 
 @api develop! check_string, unsafe_check_string, fast_check_string, skipascii, skipbmp,
               countmask, count_chars, _count_mask_al, _count_mask_ul, count_latin,
-              _copysub, _cvtsize, _repeat, empty_str, _data, _pntchunk, _str,
+              _copysub, _cvtsize, _repeat, empty_str, _data, _mask_bytes,
+              _pntchunk, _pntbigchunk, _str,
               ValidatedStyle, MutableStyle, EqualsStyle, CanContain
 
 @api develop LineCounts, CharTypes, CharStat, maxbit, calcstats, check_continuation,
              UTF_LONG, UTF_LATIN1, UTF_UNICODE2, UTF_UNICODE3, UTF_UNICODE4, UTF_SURROGATE,
-             UTF_INVALID, CHUNKSZ, CHUNKMSK,
+             UTF_INVALID, CHUNKSZ, CHUNKMSK, BIGCHUNKSZ, BIGCHUNKMSK,
              _memcmp, _memcpy, _memset, _fwd_memchr, _rev_memchr,
-             empty_string, _calcpnt, _mask_bytes, _allocate,
+             BigChunk, empty_string, _calcpnt, _allocate, SingleCU, MultiCU,
              MS_UTF8, MS_UTF16, MS_UTF32, MS_SubUTF32, MS_Latin, MS_ByteStr, MS_RawUTF8,
              _wrap_substr, _empty_sub,
              AccessType, UInt16_U, UInt32_U, UInt16_S, UInt32_S, UInt16_US, UInt32_US,
diff --git a/src/ascii.jl b/src/ascii.jl
index 20c7ef9..e8c9adc 100644
--- a/src/ascii.jl
+++ b/src/ascii.jl
@@ -1,32 +1,12 @@
 #=
 ASCIIStr type
 
-Copyright 2017-2018 Gandalf Software, Inc., Scott P. Jones,
+Copyright 2017-2020 Gandalf Software, Inc., Scott P. Jones,
 and other contributors to the Julia language
 Licensed under MIT License, see LICENSE.md
 Based in part on code for ASCIIString that used to be in Julia
 =#
 
-## overload methods for efficiency ##
-
-function _string(coll)
-    n = 0
-    for str in coll
-        n += ncodeunits(str)
-    end
-    buf, out = _allocate(UInt8, n)
-    for str in coll
-        @preserve str begin
-            len = ncodeunits(str)
-            unsafe_copyto!(out, pointer(str), len)
-            out += len
-        end
-    end
-    buf
-end
-
-string(c::MaybeSub{<:Str{ASCIICSE}}...) = length(c) == 1 ? c[1] : Str(ASCIICSE, _string(c))
-
 ## transcoding to ASCII ##
 
 function convert(::Type{<:Str{ASCIICSE}}, str::AbstractString)
diff --git a/src/compare.jl b/src/compare.jl
index 7b60f06..a7ab04a 100644
--- a/src/compare.jl
+++ b/src/compare.jl
@@ -54,7 +54,7 @@ end
     while pnt < fin
         str_done(b, pos) && return 1
         c1, pnt = _nextcp(C, pnt)
-        ch, pos = str_next(b, pos)
+        ch, pos = iterate(b, pos)
         c2 = ch%UInt32
         c1 == c2 || return ifelse(c1 < c2, -1, 1)
     end
@@ -93,7 +93,7 @@ function _cpeq(a::MaybeSub{T}, b) where {C<:CSE, T<:Str{C}}
     while pnt < fin
         str_done(b, pos) && return false
         c1, pnt = _nextcp(C, pnt)
-        ch, pos = str_next(b, pos)
+        ch, pos = iterate(b, pos)
         c1 == codepoint(ch) || return false
     end
     true
diff --git a/src/core.jl b/src/core.jl
index 8c53c91..49321a9 100644
--- a/src/core.jl
+++ b/src/core.jl
@@ -2,7 +2,7 @@
 Core functions
 
 
-Copyright 2017-2018 Gandalf Software, Inc., Scott P. Jones, and others (see Julia contributors)
+Copyright 2017-2020 Gandalf Software, Inc., Scott P. Jones, and others (see Julia contributors)
 Licensed under MIT License, see LICENSE.md
 
 Inspired by / derived from code in Julia
@@ -33,7 +33,7 @@ _nextcp(::Type{T}, pnt) where {T} = _nextcpfun(EncodingStyle(T), T, pnt)
 
 # Use more generic length check
 @inline _length_check(str::SubString{<:Str{C}}, cnt) where {C<:CSE} =
-    _length(MultiCU(), C, pointer(str), cnt)
+    @preserve str _length_ul(MultiCU(), C, pointer(str), cnt)
 
 # Go directly to aligned length check
 @inline _length_check(str::Str{C}, cnt) where {C<:CSE} =
@@ -42,7 +42,7 @@ _nextcp(::Type{T}, pnt) where {T} = _nextcpfun(EncodingStyle(T), T, pnt)
 @inline _length(::MultiCU, str::MaybeSub{T}) where {T<:Str} =
     (cnt = ncodeunits(str); cnt < 2 ? Int(cnt > 0) : @preserve str _length_check(str, cnt))
 
-@inline _length(::SingleCU, ::Type{<:CSE}, ::Ptr{<:CodeUnitTypes}, cnt::Int) = cnt
+@inline _length_ul(::SingleCU, ::Type{<:CSE}, ::Ptr{<:CodeUnitTypes}, cnt::Int) = cnt
 
 @inline _length(::MultiCU, str::Str{RawUTF8CSE}) = length(str.data)
 @inline _length(::MultiCU, str::Str{RawUTF8CSE}, i::Int, j::Int) = length(str.data, i, j)
@@ -55,7 +55,7 @@ _nextcp(::Type{T}, pnt) where {T} = _nextcpfun(EncodingStyle(T), T, pnt)
         0 <= j <  lim || boundserr(str, j)
     end
     (cnt = j - i + 1) <= 0 ? 0 :
-        @preserve str _length(cs, cse(str), bytoff(pointer(str), i - 1), cnt)
+        @preserve str _length_ul(cs, cse(str), bytoff(pointer(str), i - 1), cnt)
 end
 
 @inline _thisind(::SingleCU, str, len, pnt, pos) = Int(pos)
diff --git a/src/latin.jl b/src/latin.jl
index 2b4a512..9f63fcb 100644
--- a/src/latin.jl
+++ b/src/latin.jl
@@ -1,7 +1,8 @@
 #=
 LatinStr/_LatinStr type (ISO Latin1 8-bit subset of Unicode)
 
-Copyright 2017 Gandalf Software, Inc., Scott P. Jones, and other contributors to the Julia language
+Copyright 2017, 2020 Gandalf Software, Inc., Scott P. Jones,
+and other contributors to the Julia language
 Licensed under MIT License, see LICENSE.md
 Based in part on code for ASCIIString that used to be in Julia
 =#
@@ -13,23 +14,6 @@ is_latin(str::MaybeSub{<:Str{<:LatinCSE}}) = true
 is_bmp(str::MS_Latin) = true
 is_unicode(str::MS_Latin) = true
 
-const MS_ASCIILatin = MaybeSub{<:Str{<:Union{ASCIICSE, Latin_CSEs}}}
-
-function string(collection::MS_ASCIILatin...)
-    length(collection) == 1 && return collection[1]
-    len = 0
-    @inbounds for str in collection
-        len += ncodeunits(str)
-    end
-    buf, pnt = _allocate(len)
-    @inbounds for str in collection
-        len = ncodeunits(str)
-        _memcpy(pnt, pointer(str), len)
-        pnt += len
-    end
-    Str(LatinCSE, buf)
-end
-
 ## transcoding to Latin1 ##
 
 function convert(::Type{<:Str{C}}, str::AbstractString) where {C<:Latin_CSEs}
@@ -167,7 +151,7 @@ end
 
 function convert(::Type{<:Str{C}}, vec::Vector{CU}) where {C<:Latin_CSEs,CU<:CodeUnitTypes}
     # handle zero length string quickly
-    (len = length(vec)) == 0 && return _empty_str(C)
+    (len = length(vec)) == 0 && return empty_str(C)
     @preserve vec begin
         pnt = pointer(vec)
         # get number of bytes to allocate
diff --git a/src/search.jl b/src/search.jl
index a5ac2e8..16ca775 100644
--- a/src/search.jl
+++ b/src/search.jl
@@ -121,6 +121,10 @@ found(::Type{<:AbstractString}, v) = v != 0
 find_result(::Type{<:AbstractString}, v) = v
 
 nothing_sentinel(i) = first(i) == 0 ? nothing : i
+Base.findfirst(a::AbstractChar, b::Str)   = nothing_sentinel(find(First, a, b))
+Base.findlast(a::AbstractChar, b::Str)    = nothing_sentinel(find(Last, a, b))
+Base.findnext(a::AbstractChar, b::Str, i) = nothing_sentinel(find(Fwd, a, b, i))
+Base.findprev(a::AbstractChar, b::Str, i) = nothing_sentinel(find(Rev, a, b, i))
 Base.findfirst(a, b::Str)   = nothing_sentinel(find(First, a, b))
 Base.findlast(a, b::Str)    = nothing_sentinel(find(Last, a, b))
 Base.findnext(a, b::Str, i) = nothing_sentinel(find(Fwd, a, b, i))
@@ -189,7 +193,7 @@ function find(::Type{D}, needle::AbstractString, str::AbstractString,
     @inbounds is_valid(str, pos) || index_error(str, pos)
     (tlen = ncodeunits(needle)) == 0 && return pos:pos-1
     (cmp = CanContain(str, needle)) === NoCompare() && return _not_found
-    @inbounds ch, nxt = str_next(needle, 1)
+    @inbounds ch, nxt = iterate(needle, 1)
     is_valid(eltype(str), ch) || return _not_found
     # Check if single character
     if nxt > tlen
@@ -205,7 +209,7 @@ function find(::Type{T}, needle::AbstractString, str::AbstractString) where {T<:
     pos = T === First ? 1 : thisind(str, slen)
     (tlen = ncodeunits(needle)) == 0 && return pos:(pos-1)
     (cmp = CanContain(str, needle)) === NoCompare() && return _not_found
-    @inbounds ch, nxt = str_next(needle, 1)
+    @inbounds ch, nxt = iterate(needle, 1)
     is_valid(eltype(str), ch) || return _not_found
     # Check if single character
     if nxt > tlen
@@ -298,8 +302,8 @@ end
 """Compare two strings, starting at nxtstr and nxtsub"""
 @inline function _cmp_str(str, strpos, endpos, sub, subpos, endsub)
     while strpos <= endpos
-        c, strnxt = str_next(str, strpos)
-        d, subpos = str_next(sub, subpos)
+        c, strnxt = iterate(str, strpos)
+        d, subpos = iterate(sub, subpos)
         c == d || break
         subpos > endsub && return strpos
         strpos = strnxt
diff --git a/src/support.jl b/src/support.jl
index 73d4d1a..48c5ddd 100644
--- a/src/support.jl
+++ b/src/support.jl
@@ -264,7 +264,7 @@ function unsafe_check_string(str::T;
     totalchar = latin1byte = num2byte = num3byte = num4byte = invalids = 0
     pos = 1
     @inbounds while !str_done(str, pos)
-        chr, nxt = str_next(str, pos)
+        chr, nxt = iterate(str, pos)
         ch = chr%UInt32
         totalchar += 1
         if ch > 0x7f
@@ -288,7 +288,7 @@ function unsafe_check_string(str::T;
                     break
                 end
                 # next character *must* be a trailing surrogate character
-                chr, nxt = str_next(str, nxt)
+                chr, nxt = iterate(str, nxt)
                 if !is_surrogate_trail(chr)
                     accept_invalids || strerror(StrErrors.NOT_TRAIL, pos, chr)
                     invalids += 1
diff --git a/src/types.jl b/src/types.jl
index 3a36f99..2078f8b 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -30,18 +30,6 @@ _mskdn32(v, m, s) = _msk32(v, m) >>> s
 (::Type{Str})(::Type{C}, v::String) where {C<:CSE} = Str(C, v, nothing, nothing, nothing)
 (::Type{Str})(::Type{C}, v::Str) where {C<:CSE} = Str(C, v.data, nothing, nothing, nothing)
 
-# Handle change from endof -> lastindex
-@static if !isdefined(Base, :lastindex)
-    lastindex(str::AbstractString) = Base.endof(str)
-    lastindex(arr::AbstractArray) = Base.endof(arr)
-    Base.endof(str::Str) = lastindex(str)
-end
-@static if !isdefined(Base, :firstindex)
-    firstindex(str::AbstractString) = 1
-    # AbstractVector might be an OffsetArray
-    firstindex(str::Vector) = 1
-end
-
 # Definition of built-in Str types
 
 const empty_string = ""
@@ -131,8 +119,18 @@ pointer(s::Str{<:Quad_CSEs}) = reinterpret(Ptr{UInt32}, pointer(s.data))
 const CHUNKSZ = sizeof(UInt) # used for fast processing of strings
 const CHUNKMSK = (CHUNKSZ-1)%UInt
 
-_pntchunk(s::Union{String,Vector{UInt8}}) = reinterpret(Ptr{UInt}, pointer(s))
-_pntchunk(s::Str) = reinterpret(Ptr{UInt}, pointer(s.data))
+_pntchunk(p::Union{UInt,Ptr}) = reinterpret(Ptr{UInt}, p)
+_pntchunk(s::Union{String,Vector{UInt8}}) = _pntchunk(pointer(s))
+_pntchunk(s::Str) = _pntchunk(pointer(s.data))
+
+# Type and mask for even faster string handling
+const BigChunk = UInt === UInt32 ? UInt64 : UInt128
+const BIGCHUNKSZ = sizeof(BigChunk)
+const BIGCHUNKMSK = (BIGCHUNKSZ-1)%UInt
+
+_pntbigchunk(p::Union{UInt,Ptr}) = reinterpret(Ptr{BigChunk}, p)
+_pntbigchunk(s::Union{String,Vector{UInt8}}) = _pntbigchunk(pointer(s))
+_pntbigchunk(s::Str) = _pntbigchunk(pointer(s.data))
 
 """Length of string in codeunits"""
 ncodeunits(s::Str)              = sizeof(s)
@@ -144,6 +142,15 @@ ncodeunits(s::Str{<:Quad_CSEs}) = sizeof(s) >>> 2
 
 @inline _mask_bytes(n) = ((1%UInt) << ((n & CHUNKMSK) << 3)) - 0x1
 
+@inline _big_mask_bytes(n) = ((1%BigChunk) << ((n & BIGCHUNKMSK) << 3)) - 0x1
+
+@inline function _mask_bytes(v::T, cnt) where {T}
+    shft = ((cnt & (sizeof(T) - 1))%UInt) << 3
+    ifelse(shft == 0, v, v & ~(typemax(T) << shft))
+end
+
+@inline _widen_mask(msk::UInt) = ((msk%BigChunk) << (8*sizeof(UInt))) | msk
+
 # Support for SubString of Str
 
 Base.SubString(str::Str{C}) where {C<:SubSet_CSEs} =
diff --git a/src/utf16.jl b/src/utf16.jl
index bfc3b6d..cbd3d7b 100644
--- a/src/utf16.jl
+++ b/src/utf16.jl
@@ -10,34 +10,65 @@ Based in (small) part on code for UTF16String that used to be in Julia
 const _trail_mask = CHUNKSZ == 4 ? 0xdc00_dc00 : 0xdc00_dc00_dc00_dc00
 const _hi_bit_16  = CHUNKSZ == 4 ? 0x8000_8000 : 0x8000_8000_8000_8000
 
-@inline _mask_surr(v)  = xor((v | v<<1 | v<<2 | v<<3 | v<<4 | v<<5) & _hi_bit_16, _hi_bit_16)
-@inline _get_masked(v::UInt) = _mask_surr(xor(v, _trail_mask))
-@inline _get_masked(qpnt::Ptr{UInt}) = _get_masked(unsafe_load(qpnt))
-@inline _get_lead(qpnt) = xor(_get_masked(qpnt), _hi_bit_16)
+const _big_trail_mask = _widen_mask(_trail_mask)
+const _big_hi_bit_16  = _widen_mask(_hi_bit_16)
 
-@inline function _align_len_utf16(pnt, cnt, v)
-    len = 0
-    fin = pnt + cnt
-    while (pnt += CHUNKSZ) < fin
-        len += count_ones(v)
-        v = _get_lead(pnt)
+@inline _mask_surr(v, msk)  = xor((v | v<<1 | v<<2 | v<<3 | v<<4 | v<<5) & msk, msk)
+
+@inline _get_masked(v::UInt) = _mask_surr(xor(v, _trail_mask), _hi_bit_16)
+@inline _get_masked(v::BigChunk) = _mask_surr(xor(v, _big_trail_mask), _big_hi_bit_16)
+@inline _get_masked(qpnt::Ptr) = _get_masked(unsafe_load(qpnt))
+
+@inline _get_lead(qpnt::Ptr{UInt}) = xor(_get_masked(qpnt), _hi_bit_16)
+@inline _get_lead(qpnt::Ptr{BigChunk}) = xor(_get_masked(qpnt), _big_hi_bit_16)
+
+## overload methods for efficiency ##
+
+function _length_utf16_al(beg::Ptr{UInt16}, cnt::Int)
+    len = count_ones(_get_lead(_pntchunk(beg)))
+    cnt -= CHUNKSZ
+    pnt = _pntbigchunk(beg + CHUNKSZ)
+    v = _get_lead(pnt)
+    if cnt > BIGCHUNKSZ
+        fin = pnt + cnt
+        while (pnt += BIGCHUNKSZ) < fin
+            len += count_ones(v)
+            v = _get_lead(pnt)
+        end
     end
-    len + count_ones((cnt & CHUNKMSK) == 0 ? v : (v & _mask_bytes(cnt)))
+    len + count_ones(_mask_bytes(v, cnt))
 end
 
-_length_al(::MultiCU, ::Type{UTF16CSE}, beg::Ptr{UInt16}, cnt::Int) =
-    (pnt = reinterpret(Ptr{UInt}, beg); _align_len_utf16(pnt, cnt<<1, _get_lead(pnt)))
+function _length_al(::MultiCU, ::Type{UTF16CSE}, beg::Ptr{UInt16}, cnt::Int)
+    # First check very frequent cases of short strings
+    # (on 64-bit machines, 1-8 bytes, 9-16 bytes, and 17-24)
+    # taking advantage of the knowledge of how String types are stored in Julia,
+    # i.e. UInt length, immediate followed by the string data, aligned on sizeof(UInt)*2
+    cnt <<= 1
+    (cnt <= BIGCHUNKSZ
+     ? (cnt <= CHUNKSZ
+        ? count_ones(_mask_bytes(_get_lead(_pntchunk(beg), cnt)))
+        : count_ones(_mask_bytes(_get_lead(_pntbigchunk(beg), cnt))))
+     : _length_utf16_al(beg, cnt))
+end
 
-function _length(::MultiCU, ::Type{UTF16CSE}, beg::Ptr{UInt16}, cnt::Int)
+function _length_ul(::MultiCU, ::Type{UTF16CSE}, beg::Ptr{UInt16}, cnt::Int)
     align = reinterpret(UInt, beg)
-    pnt = reinterpret(Ptr{UInt}, align & ~CHUNKMSK)
+    pnt = reinterpret(Ptr{BigChunk}, align & ~BIGCHUNKMSK)
+    cnt <<= 1
     v = _get_lead(pnt)
-    if (align &= CHUNKMSK) != 0
-        msk = _mask_bytes(align)
-        v = (v & ~msk) | (msk & _trail_mask)
-        cnt += (align>>>1)
+    if (align &= BIGCHUNKMSK) != 0
+        msk = _big_mask_bytes(align)
+        v = (v & ~msk) | (msk & _big_trail_mask)
+        cnt += align
     end
-    _align_len_utf16(pnt, cnt<<1, v)
+    len = 0
+    fin = pnt + cnt
+    while (pnt += BIGCHUNKSZ) < fin
+        len += count_ones(v)
+        v = _get_lead(pnt)
+    end
+    len + count_ones(_mask_bytes(v, cnt))
 end
 
 function _nextind(::MultiCU, str::MS_UTF16, pos::Int, nchar::Int)
@@ -79,48 +110,44 @@ function _prevind(::MultiCU, str::MS_UTF16, pos::Int, nchar::Int)
 end
 
 # Check for any surrogate characters
-function is_bmp(str::MS_UTF16)
-    (siz = sizeof(str)) == 0 && return true
-    # Todo: handle unaligned for ARM32
-    @preserve str begin
-        siz < CHUNKSZ && return (_get_masked(_pntchunk(str)) & _mask_bytes(siz)) == 0
-
-        pnt, fin = _calcpnt(str, siz)
-        while (pnt += CHUNKSZ) <= fin
-            _get_masked(pnt) == 0 || return false
-        end
-        pnt - CHUNKSZ == fin || (_get_masked(pnt) & _mask_bytes(siz)) == 0
-    end
-end
-
-@inline function _check_bmp_utf16_al(pnt, cnt, v)
+@inline function _check_bmp_utf16_al(beg, cnt)
+    cnt <= CHUNKSZ && return _mask_bytes(_get_masked(_pntchunk(beg)), cnt) == 0
+    cnt <= BIGCHUNKSZ && return _mask_bytes(_get_masked(_pntbigchunk(beg)), cnt) == 0
+    _get_masked(_pntchunk(beg)) == 0 || return false
+    cnt -= CHUNKSZ
+    cnt <= BIGCHUNKSZ && return _mask_bytes(_get_masked(_pntbigchunk(beg)), cnt) == 0
+    pnt = _pntbigchunk(beg + CHUNKSZ)
+    v = _get_masked(pnt)
     fin = pnt + cnt
-    v = _get_masked(v)
-    while (pnt += CHUNKSZ) < fin
+    while (pnt += BIGCHUNKSZ) < fin
         v == 0 || return false
         v = _get_masked(pnt)
     end
-    ((cnt & CHUNKMSK) == 0 ? v : (v & _mask_bytes(cnt))) == 0
+    _mask_bytes(v, cnt) == 0
 end
-@inline _check_bmp_utf16_al(pnt, cnt) = _check_bmp_utf16_al(pnt, cnt, unsafe_load(pnt))
 
 @inline function _check_bmp_utf16_ul(beg, cnt)
     align = reinterpret(UInt, beg)
-    pnt = reinterpret(Ptr{UInt}, align & ~CHUNKMSK)
+    pnt = reinterpret(Ptr{BigChunk}, align & ~BIGCHUNKMSK)
     v = unsafe_load(pnt)
-    if (align &= CHUNKMSK) != 0
-        v &= ~_mask_bytes(align)
+    if (align &= BIGCHUNKMSK) != 0
+        v &= ~_big_mask_bytes(align)
         cnt += align
     end
-    _check_bmp_utf16_al(pnt, cnt, v)
+    v = _get_masked(v)
+    fin = pnt + cnt
+    while (pnt += BIGCHUNKSZ) < fin
+        v == 0 || return false
+        v = _get_masked(pnt)
+    end
+    _mask_bytes(v, cnt) == 0
 end
 
 is_bmp(str::Str{UTF16CSE}) =
-    (cnt = sizeof(str)) == 0 ? true :
-    @preserve str _check_bmp_utf16_al(reinterpret(Ptr{UInt}, pointer(str)), cnt)
+    (cnt = sizeof(str)) == 0 || @preserve str _check_bmp_utf16_al(pointer(str), cnt)
 
 is_bmp(str::SubString{<:Str{UTF16CSE}}) =
-    (cnt = sizeof(str)) == 0 ? true : @preserve str _check_bmp_utf16_ul(pointer(str), cnt)
+    (cnt = sizeof(str)) == 0 || @preserve str _check_bmp_utf16_ul(pointer(str), cnt)
 
 is_bmp(str::MaybeSub{<:Str{<:UCS2_CSEs}}) = true
 
diff --git a/src/utf8.jl b/src/utf8.jl
index 8b453ee..4300d27 100644
--- a/src/utf8.jl
+++ b/src/utf8.jl
@@ -1,7 +1,7 @@
 #=
 UTF8Str type
 
-Copyright 2017-2018 Gandalf Software, Inc., Scott P. Jones,
+Copyright 2017-2020 Gandalf Software, Inc., Scott P. Jones,
 and other contributors to the Julia language
 
 Licensed under MIT License, see LICENSE.md
@@ -90,161 +90,237 @@ xor 80 then << 1 then |
 =#
 
 const hi_mask = CHUNKSZ == 4 ? 0x8080_8080 : 0x8080_8080_8080_8080
+const big_hi_mask = _widen_mask(hi_mask)
 
-@inline _count_cont(v) = (v = xor(v, hi_mask); count_ones(xor(((v << 1) | v), hi_mask) & hi_mask))
-@inline msk_lead(v) = (v = xor(v, hi_mask); xor(xor(((v << 1) | v), hi_mask) & hi_mask, hi_mask))
+@inline get_high_mask(::UInt) = hi_mask
+@inline get_high_mask(::BigChunk) = big_hi_mask
 
-@inline function _align_len_utf8(pnt, cnt, v)
-    len = 0
-    fin = pnt + cnt
-    v = msk_lead(v)
-    while (pnt += CHUNKSZ) < fin
+@inline msk_lead(v, msk) = (v = xor(v, msk); xor(xor(((v << 1) | v), msk) & msk, msk))
+
+@inline msk_lead(v) = msk_lead(v, get_high_mask(v))
+
+@inline get_lead(T, ptr) = msk_lead(unsafe_load(reinterpret(Ptr{T}, ptr)))
+
+@inline count_masked(v, cnt) = count_ones(_mask_bytes(v, cnt))
+
+function _length_al(::MultiCU, ::Type{UTF8CSE}, beg::Ptr{UInt8}, cnt::Int)
+    # First check very frequent cases of short strings
+    # (on 64-bit machines, 1-8 bytes, 9-16 bytes, and 17-24)
+    # taking advantage of the knowledge of how String types are stored in Julia,
+    # i.e. UInt length, immediate followed by the string data, aligned on sizeof(UInt)*2
+    if cnt <= BIGCHUNKSZ
+        return (cnt <= CHUNKSZ
+                ? count_masked(get_lead(UInt, beg), cnt)
+                : count_masked(get_lead(BigChunk, beg), cnt))
+    end
+    len = count_ones(get_lead(UInt, beg))
+    cnt -= CHUNKSZ
+    pnt = _pntbigchunk(beg + CHUNKSZ)
+    v = get_lead(BigChunk, pnt)
+    cnt <= BIGCHUNKSZ && return len + count_masked(v, cnt)
+    fin = _pntbigchunk(beg + CHUNKSZ + cnt)
+    while (pnt += BIGCHUNKSZ) < fin
         len += count_ones(v)
-        v = msk_lead(unsafe_load(pnt))
+        v = get_lead(BigChunk, pnt)
     end
-    len + count_ones(cnt & CHUNKMSK == 0 ? v : (v & _mask_bytes(cnt)))
+    len + count_masked(v, cnt)
 end
 
-_length_al(::MultiCU, ::Type{UTF8CSE}, beg::Ptr{UInt8}, cnt::Int) =
-    (pnt = reinterpret(Ptr{UInt}, beg); _align_len_utf8(pnt, cnt, unsafe_load(pnt)))
-
-function _length(::MultiCU, ::Type{UTF8CSE}, beg::Ptr{UInt8}, cnt::Int)
+function _length_ul(::MultiCU, ::Type{UTF8CSE}, beg::Ptr{UInt8}, cnt::Int)
     align = reinterpret(UInt, beg)
-    pnt = reinterpret(Ptr{UInt}, align & ~CHUNKMSK)
+    pnt = reinterpret(Ptr{BigChunk}, align & ~BIGCHUNKMSK)
     v = unsafe_load(pnt)
-    if (align &= CHUNKMSK) != 0
-        msk = _mask_bytes(align)
-        v = (v & ~msk) | (msk & hi_mask)
+    if (align &= BIGCHUNKMSK) != 0
+        msk = _big_mask_bytes(align)
+        v = (v & ~msk) | (msk & big_hi_mask)
         cnt += align
     end
-    _align_len_utf8(pnt, cnt, v)
-end
-
-@inline function _check_mask_al(pnt, cnt, msk, v)
+    len = 0
     fin = pnt + cnt
-    while (pnt += CHUNKSZ) < fin
-        (v & msk) == 0 || return false
+    v = msk_lead(v)
+    while (pnt += BIGCHUNKSZ) < fin
+        len += count_ones(v)
+        v = msk_lead(unsafe_load(pnt))
+    end
+    len + count_masked(v, cnt)
+end
+
+@inline get_chunk(ptr, msk::T, cnt) where {T} =
+    _mask_bytes(unsafe_load(reinterpret(Ptr{T}, ptr)) & msk, cnt)
+
+@inline function _check_mask_al(ptr, cnt, msk)
+    # First check very frequent cases of short strings
+    # (on 64-bit machines, 1-8 bytes, 9-16 bytes, and 17-24)
+    # taking advantage of the knowledge of how String types are stored in Julia,
+    # i.e. UInt length, immediate followed by the string data, aligned on sizeof(UInt)*2
+    cnt <= CHUNKSZ && return get_chunk(ptr, msk, cnt) == 0
+    bigmsk = _widen_mask(msk)
+    cnt <= BIGCHUNKSZ && return get_chunk(ptr, bigmsk, cnt) == 0
+    (unsafe_load(_pntchunk(ptr)) & msk) == 0 || return false
+    cnt -= CHUNKSZ
+    cnt <= BIGCHUNKSZ && return get_chunk(ptr, bigmsk, cnt) == 0
+    pnt = _pntbigchunk(ptr + CHUNKSZ)
+    fin = _pntbigchunk(ptr + CHUNKSZ + cnt)
+    v = unsafe_load(pnt)
+    while (pnt += BIGCHUNKSZ) < fin
+        (v & bigmsk) == 0 || return false
         v = unsafe_load(pnt)
     end
-    (cnt & CHUNKMSK == 0 ? v : (v & _mask_bytes(cnt))) & msk == 0
+    _mask_bytes(v & bigmsk, cnt) == 0
 end
-@inline _check_mask_al(pnt, cnt, msk) = _check_mask_al(pnt, cnt, msk, unsafe_load(pnt))
 
 @inline function _check_mask_ul(beg, cnt, msk)
+    bigmsk = _widen_mask(msk)
     align = reinterpret(UInt, beg)
-    pnt = reinterpret(Ptr{UInt}, align & ~CHUNKMSK)
+    pnt = _pntbigchunk(align & ~BIGCHUNKMSK)
     v = unsafe_load(pnt)
-    if (align &= CHUNKMSK) != 0
-        v &= ~_mask_bytes(align)
+    if (align &= BIGCHUNKMSK) != 0
+        v &= ~_big_mask_bytes(align)
         cnt += align
     end
-    _check_mask_al(pnt, cnt, msk, v)
+    fin = pnt + cnt
+    while (pnt += BIGCHUNKSZ) < fin
+        (v & bigmsk) == 0 || return false
+        v = unsafe_load(pnt)
+    end
+    _mask_bytes(v & bigmsk, cnt) == 0
 end
 
 _ascii_mask(::Type{UInt8})  = hi_mask
-_ascii_mask(::Type{UInt16}) = 0xff80_ff80_ff80_ff80
-_ascii_mask(::Type{UInt32}) = 0xffffff80_ffffff80
+@static if UInt == 4
+    _ascii_mask(::Type{UInt16}) = 0xff80_ff80
+    _ascii_mask(::Type{UInt32}) = 0xffffff80
 
-_latin_mask(::Type{UInt16}) = 0xff00_ff00_ff00_ff00
-_latin_mask(::Type{UInt32}) = 0xffffff00_ffffff00
+    _latin_mask(::Type{UInt16}) = 0xff00_ff00
+    _latin_mask(::Type{UInt32}) = 0xffffff00
 
-const _bmp_mask_32   = 0xffff0000_ffff0000
+    const _bmp_mask_32   = 0xffff0000
+else
+    _ascii_mask(::Type{UInt16}) = 0xff80_ff80_ff80_ff80
+    _ascii_mask(::Type{UInt32}) = 0xffffff80_ffffff80
 
-is_ascii(str::SubString{<:Str{C}}) where {C<:Union{UTF8CSE,LatinCSE,Binary_CSEs,UTF16CSE,UCS2CSE,
-                                                   Text2CSE,Text4CSE,UTF32CSE}} =
-    (cnt = sizeof(str)) == 0 ? true :
-    @preserve str _check_mask_ul(pointer(str), cnt, _ascii_mask(codeunit(C)))
+    _latin_mask(::Type{UInt16}) = 0xff00_ff00_ff00_ff00
+    _latin_mask(::Type{UInt32}) = 0xffffff00_ffffff00
+
+    const _bmp_mask_32   = 0xffff0000_ffff0000
+end
+
+const ASCII_Union = Union{UTF8CSE,LatinCSE,Binary_CSEs,UTF16CSE,UCS2CSE,Text2CSE,Text4CSE,UTF32CSE}
+
+is_ascii(str::SubString{<:Str{C}}) where {C<:ASCII_Union} =
+    (cnt = sizeof(str)) == 0 ||
+        (@preserve str _check_mask_ul(pointer(str), cnt, _ascii_mask(codeunit(C))))
 
 is_ascii(vec::Vector{T}) where {T<:CodeUnitTypes} =
-    (cnt = sizeof(vec)) == 0 ? true :
-    @preserve vec _check_mask_ul(pointer(vec), cnt, _ascii_mask(T))
+    (cnt = sizeof(vec)) == 0 ||
+         (@preserve vec _check_mask_ul(pointer(vec), cnt, _ascii_mask(T)))
 
-is_ascii(str::Str{C}) where {C<:Union{UTF8_CSEs,LatinCSE,Binary_CSEs,UTF16CSE,UCS2CSE,
-                                      Text2CSE,Text4CSE,UTF32CSE}} =
-    (cnt = sizeof(str)) == 0 ? true :
-    @preserve str _check_mask_al(reinterpret(Ptr{UInt}, pointer(str)), cnt,
-                                 _ascii_mask(codeunit(C)))
+is_ascii(str::Str{C}) where {C<:ASCII_Union} =
+    (cnt = sizeof(str)) == 0 ||
+         (@preserve str _check_mask_al(pointer(str), cnt, _ascii_mask(codeunit(C))))
 
 # Todo! Here you need to see that 0b11yyyyxx at least 1 y must be set,
 # which indicates a non-Latin1 character
-_all_latin(val) = ((val & (val<<1) & (val<<2 | (val<<3) | (val<<4) | (val<<5))) & hi_mask) == 0
-
-@inline function _check_latin_utf8_al(pnt, cnt, v)
+_all_latin(val) =
+    ((val & (val<<1) & (val<<2 | (val<<3) | (val<<4) | (val<<5))) & get_high_mask(val)) == 0
+
+@inline function _check_latin_utf8_al(beg, cnt)
+    cnt <= CHUNKSZ && return _all_latin(_mask_bytes(unsafe_load(_pntchunk(ptr)), cnt))
+    cnt <= BIGCHUNKSZ && return _all_latin(_mask_bytes(unsafe_load(_pntbigchunk(ptr)), cnt))
+    _all_latin(unsafe_load(_pntchunk(ptr))) || return false
+    cnt -= CHUNKSZ
+    cnt <= BIGCHUNKSZ && return  _all_latin(_mask_bytes(unsafe_load(_pntbigchunk(ptr)), cnt))
+    pnt = _pntbigchunk(ptr + CHUNKSZ)
+    v = unsafe_load(pnt)
     fin = pnt + cnt
-    while (pnt += CHUNKSZ) < fin
+    while (pnt += BIGCHUNKSZ) < fin
         _all_latin(v) || return false
         v = unsafe_load(pnt)
     end
-    _all_latin(cnt & CHUNKMSK == 0 ? v : (v & _mask_bytes(cnt)))
+    _all_latin(_mask_bytes(v, cnt))
 end
-@inline _check_latin_utf8_al(pnt, cnt) = _check_latin_utf8_al(pnt, cnt, unsafe_load(pnt))
 
 @inline function _check_latin_utf8_ul(beg, cnt)
     align = reinterpret(UInt, beg)
-    pnt = reinterpret(Ptr{UInt}, align & ~CHUNKMSK)
+    pnt = reinterpret(Ptr{BigChunk}, align & ~BIGCHUNKMSK)
     v = unsafe_load(pnt)
-    if (align &= CHUNKMSK) != 0
-        v &= ~_mask_bytes(align)
+    if (align &= BIGCHUNKMSK) != 0
+        v &= ~_big_mask_bytes(align)
         cnt += align
     end
-    _check_latin_utf8_al(pnt, cnt, v)
+    fin = pnt + cnt
+    while (pnt += BIGCHUNKSZ) < fin
+        _all_latin(v) || return false
+        v = unsafe_load(pnt)
+    end
+    _all_latin(_mask_bytes(v, cnt))
 end
 
 is_latin(str::Str{UTF8CSE}) =
-    (siz = sizeof(str)) == 0 ? true :
-    @preserve str _check_latin_utf8_al(reinterpret(Ptr{UInt}, pointer(str)), siz)
+    (siz = sizeof(str)) == 0 || @preserve str _check_latin_utf8_al(pointer(str), siz)
 
 is_latin(str::SubString{<:Str{UTF8CSE}}) =
-    (cnt = sizeof(str)) == 0 ? true : @preserve str _check_latin_utf8_ul(pointer(str), cnt)
+    (cnt = sizeof(str)) == 0 || @preserve str _check_latin_utf8_ul(pointer(str), cnt)
 
 is_latin(vec::Vector{T}) where {T<:Union{UInt16,UInt32}} =
-    (cnt = sizeof(vec)) == 0 ? true :
+    (cnt = sizeof(vec)) == 0 ||
     @preserve vec _check_mask_ul(pointer(vec), cnt, _latin_mask(T))
 
 is_latin(str::SubString{<:Str{C}}) where {C<:Union{Word_CSEs,Quad_CSEs}} =
-    (cnt = sizeof(str)) == 0 ? true :
+    (cnt = sizeof(str)) == 0 ||
     @preserve str _check_mask_ul(pointer(str), cnt, _latin_mask(codeunit(C)))
 
 is_latin(str::Str{C}) where {C<:Union{Word_CSEs,Quad_CSEs}} =
-    (cnt = sizeof(str)) == 0 ? true :
+    (cnt = sizeof(str)) == 0 ||
     @preserve str _check_mask_al(pointer(str), cnt, _latin_mask(codeunit(C)))
 
 # All 4 top bits must be 1 (i.e. 0xfx) for this to be non-BMP
-_all_bmp(val) = ((val | (val<<1) | (val<<2) | (val<<3)) & hi_mask) == 0
-
-@inline function _check_bmp_utf8_al(pnt, cnt, v)
-    fin = pnt + cnt
-    while (pnt += CHUNKSZ) < fin
+_all_bmp(val) = ((val | (val<<1) | (val<<2) | (val<<3)) & get_high_mask(val)) == 0
+
+@inline function _check_bmp_utf8_al(beg, cnt)
+    cnt <= CHUNKSZ && return _all_bmp(_mask_bytes(unsafe_load(_pntchunk(ptr)), cnt))
+    bigmsk = _widen_mask(msk)
+    cnt <= BIGCHUNKSZ && return _all_bmp(_mask_bytes(unsafe_load(_pntbigchunk(ptr)), cnt))
+    _all_bmp(unsafe_load(_pntchunk(ptr))) || return false
+    cnt -= CHUNKSZ
+    cnt <= BIGCHUNKSZ && return  _all_bmp(_mask_bytes(unsafe_load(_pntbigchunk(ptr)), cnt))
+    pnt = _pntbigchunk(ptr + CHUNKSZ)
+    fin = _pntbigchunk(ptr + CHUNKSZ + cnt)
+    v = unsafe_load(pnt)
+    while (pnt += BIGCHUNKSZ) < fin
         _all_bmp(v) || return false
         v = unsafe_load(pnt)
     end
-    _all_bmp(cnt & CHUNKMSK == 0 ? v : (v & _mask_bytes(cnt)))
+    _all_bmp(_mask_bytes(v, cnt))
 end
-@inline _check_bmp_utf8_al(pnt, cnt) = _check_bmp_utf8_al(pnt, cnt, unsafe_load(pnt))
 
 @inline function _check_bmp_utf8_ul(beg, cnt)
     align = reinterpret(UInt, beg)
-    pnt = reinterpret(Ptr{UInt}, align & ~CHUNKMSK)
+    pnt = reinterpret(Ptr{BigChunk}, align & ~BIGCHUNKMSK)
     v = unsafe_load(pnt)
-    if (align &= CHUNKMSK) != 0
-        v &= ~_mask_bytes(align)
+    if (align &= BIGCHUNKMSK) != 0
+        v &= ~_big_mask_bytes(align)
         cnt += align
     end
-    _check_bmp_utf8_al(pnt, cnt, v)
+    fin = pnt + cnt
+    while (pnt += BIGCHUNKSZ) < fin
+        _all_bmp(v) || return false
+        v = unsafe_load(pnt)
+    end
+    _all_bmp(_mask_bytes(v, cnt))
 end
 
 is_bmp(str::Str{UTF8CSE}) =
-    (cnt = sizeof(str)) == 0 ? true :
-    @preserve str _check_bmp_utf8_al(reinterpret(Ptr{UInt}, pointer(str)), cnt)
+    (cnt = sizeof(str)) == 0 || @preserve str _check_bmp_utf8_al(pointer(str), cnt)
 
 is_bmp(str::SubString{<:Str{UTF8CSE}}) =
-    (cnt = sizeof(str)) == 0 ? true : @preserve str _check_bmp_utf8_ul(pointer(str), cnt)
+    (cnt = sizeof(str)) == 0 || @preserve str _check_bmp_utf8_ul(pointer(str), cnt)
 
 is_bmp(str::SubString{<:Str{<:Union{Text4CSE,UTF32CSE}}}) =
-    (cnt = sizeof(str)) == 0 ? true : @preserve str _check_mask_ul(pointer(str), cnt, _bmp_mask_32)
+    (cnt = sizeof(str)) == 0 || @preserve str _check_mask_ul(pointer(str), cnt, _bmp_mask_32)
 
 is_bmp(str::Str{<:Union{Text4CSE,UTF32CSE}}) =
-    (cnt = sizeof(str)) == 0 ? true : @preserve str _check_mask_al(pointer(str), cnt, _bmp_mask_32)
+    (cnt = sizeof(str)) == 0 || @preserve str _check_mask_al(pointer(str), cnt, _bmp_mask_32)
 
 is_unicode(str::MS_UTF8) = true
 
@@ -387,9 +463,9 @@ _iterate(::MultiCU, ::Type{T}, str::SubString{<:Str{RawUTF8CSE}}, pos::Int) wher
 end
 
 _next(::MultiCU, ::Type{T}, str::Str{RawUTF8CSE}, pos::Int) where {T} =
-    str_next(str.data, pos)
+    iterate(str.data, pos)
 _next(::MultiCU, ::Type{T}, str::SubString{<:Str{RawUTF8CSE}}, pos::Int) where {T} =
-    str_next(SubString(str.string.data, str.offset + pos, str.offset + ncodeunits(str)), 1)
+    iterate(SubString(str.string.data, str.offset + pos, str.offset + ncodeunits(str)), 1)
 
 ## overload methods for efficiency ##
 
@@ -524,15 +600,6 @@ _prevind(::MultiCU, str::Str{RawUTF8CSE}, pos::Int, nchar::Int) =
 _prevind(::MultiCU, str::Str{RawUTF8CSE}, pos::Int) =
     prevind(str.data, pos)
 
-#=
-const _ByteStr = Union{Str{ASCIICSE}, SubString{<:Str{ASCIICSE}},
-                       Str{UTF8CSE},  SubString{<:Str{UTF8CSE}}}
-
-string(s::_ByteStr) = s
-string(s::_ByteStr, c::_ByteStr...) = UTF8Str(_string(c))
-    # ^^ at least one must be UTF-8 or the ASCII-only method would get called
-=#
-
 function _reverse(::MultiCU, ::Type{UTF8CSE}, len, pnt::Ptr{T}) where {T<:CodeUnitTypes}
     buf, beg = _allocate(T, len)
     out = beg + len
diff --git a/src/util.jl b/src/util.jl
index 045b650..a4c39e4 100644
--- a/src/util.jl
+++ b/src/util.jl
@@ -7,6 +7,144 @@ Licensed under MIT License, see LICENSE.md
 Based initially on julia/test/strings/util.jl
 =#
 
+function _concat(T, a, b)
+    la = ncodeunits(a)
+    lb = ncodeunits(b)
+    buf, out = _allocate(T, la + lb)
+    @preserve a unsafe_copyto!(out, pointer(a), la)
+    @preserve b unsafe_copyto!(out + la, pointer(b), lb)
+    buf
+end
+
+function _string(T, a, b, rest)
+    la = ncodeunits(a)
+    lb = ncodeunits(b)
+    len = la + lb
+    @inbounds for str in rest
+        len += ncodeunits(str)
+    end
+    buf, out = _allocate(T, len)
+    @preserve a unsafe_copyto!(out, pointer(a), la)
+    out += la
+    @preserve b unsafe_copyto!(out, pointer(b), lb)
+    out += lb
+    @inbounds for str in rest
+        len = ncodeunits(str)
+        @preserve str unsafe_copyto!(out, pointer(str), len)
+        out += len
+    end
+    buf
+end
+
+function _string(T, coll)
+    len = 0
+    @inbounds for str in coll
+        len += ncodeunits(str)
+    end
+    buf, out = _allocate(T, len)
+    @inbounds for str in coll
+        len = ncodeunits(str)
+        @preserve str unsafe_copyto!(out, pointer(str), len)
+        out += len
+    end
+    buf
+end
+
+# Handle concatenation where all the same CSE for strings, and character set for characters
+#=
+"""
+WIP: this is rather tricky.
+It really should handle any type of Chr / Str / CSE, not just the ones defined
+in CharSetEncodings, ChrBase and StrBase
+Ideally, it could also handle mixes with String and Char (or other AbstractString / AbstractChar
+types.
+It may need to do two or even three passes, one to determine the correct type to be output,
+another to determine the output length, and finally another to copy the strings / characters into
+the buffer.
+The result type should be based on promotion rules, i.e. outputting UCS2Str if only ASCII, Latin, UCS2 characters and strings are in the list.
+This is difficult to do in a way that will still be type stable.
+"""
+
+function _string_chr(a::Union{<:Chr{CS,T}, <:Str{C}, SubString{<:Str{C}}}...
+                     ) where {CS<:CharSet,T,C<:CSE{CS}}
+    len = 0
+    for v in a
+        if v isa Chr
+            len += 1
+        else
+            len += ncodeunits(v)
+        end
+    end
+    buf, out = _allocate(T, len)
+    for v in a
+        len = ncodeunits(str)
+        @preserve str unsafe_copyto!(out, pointer(str), len)
+        out += len
+    end
+    buf
+end
+=#
+
+string(c::MaybeSub{<:Str}) = c
+string(c::MaybeSub{<:Str{<:Union{ASCIICSE,Latin_CSEs}}}...) = Str(LatinCSE, _string(UInt8, c))
+string(c::MaybeSub{<:Str{<:Union{ASCIICSE,UTF8CSE}}}...) = Str(UTF8CSE, _string(UInt8, c))
+string(c::MaybeSub{<:Str{<:UCS2_CSEs}}...) = Str(UCS2CSE, _string(UInt16, c))
+string(c::MaybeSub{<:Str{<:Union{UCS2_CSEs,UTF16CSE}}}...) = Str(UTF16CSE, _string(UInt16, c))
+string(c::MaybeSub{<:Str{<:UTF32_CSEs}}...) = Str(UTF32CSE, _string(UInt32, c))
+
+#=
+const MS_Str{C} = MaybeSub{<:Str{C}}
+string(a::MS_Str{C}, b::MS_Str{C}) where {C<:CSE} = Str(C, _concat(codeunit(C), a, b))
+string(a::MS_Str{C}, b::MS_Str{C}, c::MS_Str{C}...) where {C<:CSE} =
+    Str(C, _string(codeunit(C), a, b, c))
+
+string(a::T, b::T) where {T<:MS_Str{ASCIICSE}} = string(ASCIICSE, _concat(UInt8, a, b))
+string(a::T, b::T) where {T<:MS_Str{ASCIICSE}} = string(ASCIICSE, _concat(UInt8, a, b))
+string(a::T, b::T) where {T<:MS_Str{ASCIICSE}} = string(ASCIICSE, _concat(UInt8, a, b))
+
+const MS_AL = MS_Str{<:Union{ASCIICSE,Latin_CSEs}}
+string(a::MS_AL, b::MS_AL) = Str(LatinCSE, _concat(UInt8, a, b))
+string(a::MS_AL, b::MS_AL, c::MS_AL...) = Str(LatinCSE, _string(UInt8, a, b, c))
+
+const MS_AU = MS_Str{<:Union{ASCIICSE,UTF8CSE}}
+string(a::MS_AU, b::MS_AU) = Str(UTF8CSE, _concat(UInt8, a, b))
+string(a::MS_AU, b::MS_AU, c::MS_AU...) = Str(UTF8CSE, _string(UInt8, a, b, c))
+
+const MS_U2 = MS_Str{<:UCS2_CSEs}
+string(a::MS_U2, b::MS_U2) = Str(UCS2CSE, _concat(UInt16, a, b))
+string(a::MS_U2, b::MS_U2, c::MS_U2...) = Str(UCS2CSE, _string(UInt16, a, b, c))
+
+const MS_UT = MS_Str{<:Union{UCS2_CSEs,UTF16CSE}}
+string(a::MS_UT, b::MS_UT) = Str(UTF16CSE, _concat(UInt16, a, b))
+string(a::MS_UT, b::MS_UT, c::MS_UT...) = Str(UTF16CSE, _string(UInt16, a, b, c))
+
+const MS_U4 = MS_Str{<:UTF32_CSEs}
+string(a::MS_U4, b::MS_U4) = Str(UTF32CSE, _concat(UInt32, a, b))
+string(a::MS_U4, b::MS_U4, c::MS_U4...) = Str(UTF32CSE, _string(UInt32, a, b, c))
+=#
+
+#=
+string(c::MaybeSub{<:Str{<:Union{ASCIICSE,Latin_CSEs}}}...) =
+    length(c) == 1 ? c[1] : Str(LatinCSE, _string(UInt8, c))
+
+string(c::MaybeSub{<:Str{<:Union{ASCIICSE,UTF8CSE}}}...) =
+    length(c) == 1 ? c[1] : Str(UTF8CSE, _string(UInt8, c))
+
+string(c::MaybeSub{<:Str{<:UCS2_CSEs}}...) =
+    length(c) == 1 ? c[1] : Str(UCS2CSE, _string(UInt16, c))
+
+string(c::MaybeSub{<:Str{<:Union{UCS2_CSEs,UTF16CSE}}}...) =
+    length(c) == 1 ? c[1] : Str(UTF16CSE, _string(UInt16, c))
+
+string(c::MaybeSub{<:Str{<:UTF32_CSEs}}...) =
+    length(c) == 1 ? c[1] : Str(UTF32CSE, _string(UInt32, c))
+=#
+string(c::MaybeSub{<:Str{<:Union{ASCIICSE,Latin_CSEs}}}...) = Str(LatinCSE, _string(UInt8, c))
+string(c::MaybeSub{<:Str{<:Union{ASCIICSE,UTF8CSE}}}...) = Str(UTF8CSE, _string(UInt8, c))
+string(c::MaybeSub{<:Str{<:UCS2_CSEs}}...) = Str(UCS2CSE, _string(UInt16, c))
+string(c::MaybeSub{<:Str{<:Union{UCS2_CSEs,UTF16CSE}}}...) = Str(UTF16CSE, _string(UInt16, c))
+string(c::MaybeSub{<:Str{<:UTF32_CSEs}}...) = Str(UTF32CSE, _string(UInt32, c))
+
 # starts with and ends with predicates
 
 starts_with(a::MaybeSub{<:Str{C}}, b::MaybeSub{<:Str{C}}) where {C<:CSE} =
diff --git a/test/basic.jl b/test/basic.jl
index a925cde..ddfee95 100644
--- a/test/basic.jl
+++ b/test/basic.jl
@@ -305,8 +305,8 @@ let
 
     @test lastindex(srep) == 7
 
-    @test str_next(srep, 3) == ('β',5)
-    @test str_next(srep, 7) == ('β',9)
+    @test iterate(srep, 3) == ('β',5)
+    @test iterate(srep, 7) == ('β',9)
 
     @test srep[7] == 'β'
     @test_throws StringIndexError srep[8]
@@ -340,8 +340,8 @@ end
     @test_throws MethodError codeunit(tstr, true)
     @test_throws MethodError isvalid(tstr, 1)
     @test_throws MethodError isvalid(tstr, true)
-    @test_throws MethodError str_next(tstr, 1)
-    @test_throws MethodError str_next(tstr, true)
+    @test_throws MethodError iterate(tstr, 1)
+    @test_throws MethodError iterate(tstr, true)
     @test_throws MethodError lastindex(tstr)
 
     gstr = GenericString("12")
@@ -611,7 +611,7 @@ end
     for st in ("Hello", "Σ", "こんにちは", "😊😁")
         local s
         s = ST(st)
-        @test str_next(s, lastindex(s))[2] > sizeof(s)
+        @test iterate(s, lastindex(s))[2] > sizeof(s)
         @test nextind(s, lastindex(s)) > sizeof(s)
     end
 end
@@ -915,7 +915,7 @@ function testbin(::Type{ST}) where {ST}
                  b"\xf8\x9f\x98\x84", b"\xf8\x9f\x98\x84z")),
         s in lst
         st = ST(s)
-        @test str_next(st, 1)[2] == 2
+        @test iterate(st, 1)[2] == 2
         @test nextind(st, 1) == 2
     end
 
@@ -930,7 +930,7 @@ function testbin(::Type{ST}) where {ST}
         (s, r) in lst
         st = ST(s)
         (ST === BinaryStr || ST === Text1Str) && (r = 2)
-        @test str_next(st, 1)[2] == r
+        @test iterate(st, 1)[2] == r
         @test nextind(st, 1) == r
     end
 end
@@ -950,12 +950,7 @@ end
     @test String(sym) == string(Char(0xdcdb))
     @test Meta.lower(Main, sym) === sym
     res = string(Meta.parse(string(Char(0xdcdb)," = 1"),1,raise=false)[1])
-    @static if VERSION ≥ v"1.5.0-DEV.460"
-        @test res == "\$(Expr(:error, \"invalid UTF-8 sequence\"))"
-    else
-        @test startswith(res, "\$(Expr(:error, \"invalid character \\\"\\udcdb\\\"")
-        @test endswith(res,   "\"))")
-    end
+    @test res == "\$(Expr(:error, \"invalid UTF-8 sequence\"))"
 end
 
 @testset "invalid code point" begin
diff --git a/test/util.jl b/test/util.jl
index d85645b..3f77471 100644
--- a/test/util.jl
+++ b/test/util.jl
@@ -307,6 +307,31 @@
             #non-hex characters
             @test_throws ArgumentError hex2bytes(b"0123456789abcdefABCDEFGH")
         end
+
+        @testset "Concatenation" begin
+            asc = ASCIIStr("foo")
+            lat = LatinStr("bar")
+            ucs = UCS2Str("baz")
+            u32 = UTF32Str("silly")
+            ut8 = UTF8Str("test")
+            ut16 = UTF16Str("ugly")
+            haslat = _LatinStr("você")
+            hasucs = _UCS2Str("†")
+            hasu32 = _UTF32Str("\U1f596")
+            @test typeof(asc * asc) == ASCIIStr
+            @test typeof(asc * lat) == LatinStr
+            @test typeof(asc * ut8) == UTF8Str
+            @test typeof(asc * haslat) == LatinStr
+            @test typeof(lat * lat) == LatinStr
+            @test typeof(haslat * haslat) == _LatinStr
+            @test typeof(lat * haslat) == LatinStr
+            @test typeof(ucs * ucs) == UCS2Str
+            @test typeof(hasucs * hasucs) == _UCS2Str
+            @test typeof(ucs * hasucs) == UCS2Str
+            @test typeof(u32 * u32) == UTF32Str
+            @test typeof(hasu32 * hasu32) == _UTF32Str
+            @test typeof(u32 * hasu32) == UTF32Str
+        end
     end
 
     # b"" should be immutable