Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize some string operations, use GitHub Actions, update version and ARM CI #11

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions .drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,30 @@ steps:
image: julia:1.5
commands:
- "julia --project=. --check-bounds=yes --color=yes -e 'using InteractiveUtils; versioninfo(verbose=true); using Pkg; Pkg.build(); Pkg.test(coverage=true)'"

---
kind: pipeline
name: linux - arm64 - Julia 1.0

name: linux - arm - Julia 1.6
platform:
os: linux
arch: arm64
arch: arm

steps:
- name: build
image: julia:1.0
image: julia:1.6
commands:
- "julia --project=. --check-bounds=yes --color=yes -e 'using InteractiveUtils; versioninfo(verbose=true); using Pkg; Pkg.build(); Pkg.test(coverage=true)'"

---
kind: pipeline
name: linux - arm - Julia 1.0
name: linux - arm64 - Julia 1.6

platform:
os: linux
arch: arm
arch: arm64

steps:
- name: build
image: julia:1.0
image: julia:1.6
commands:
- "julia --project=. --check-bounds=yes --color=yes -e 'using InteractiveUtils; versioninfo(verbose=true); using Pkg; Pkg.build(); Pkg.test(coverage=true)'"
42 changes: 42 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: CI
on:
- push
- pull_request
jobs:
test:
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
version:
- '1.5'
- 'nightly'
os:
- ubuntu-latest
- macOS-latest
- windows-latest
arch:
- x64
- x86
exclude:
- os: macOS-latest
arch: x86
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: actions/cache@v1
env:
cache-name: cache-artifacts
with:
path: ~/.julia/artifacts
key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
restore-keys: |
${{ runner.os }}-test-${{ env.cache-name }}-
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
34 changes: 0 additions & 34 deletions .travis.yml

This file was deleted.

10 changes: 5 additions & 5 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ authors = ["ScottPJones <[email protected]>"]
keywords = ["Strings"]
license = "MIT"
uuid = "e79e7a6a-7bb1-5a4d-9d64-da657b06f53a"
version = "1.0.4"
version = "1.1.0"

[deps]
Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
Expand All @@ -24,9 +24,9 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
test = ["Test", "Random"]

[compat]
julia = "^1.0.0"
ModuleInterfaceTools = "^1.0.0"
julia = "1"
ModuleInterfaceTools = "1"
MurmurHash3 = "^1.0.3"
StrAPI = "^1.0.0"
StrAPI = "1.1"
ChrBase = "^1.0.1"
CharSetEncodings = "^1.0.0"
CharSetEncodings = "1"
7 changes: 4 additions & 3 deletions src/StrBase.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,15 @@ using ModuleInterfaceTools

@api develop! check_string, unsafe_check_string, fast_check_string, skipascii, skipbmp,
countmask, count_chars, _count_mask_al, _count_mask_ul, count_latin,
_copysub, _cvtsize, _repeat, empty_str, _data, _pntchunk, _str,
_copysub, _cvtsize, _repeat, empty_str, _data, _mask_bytes,
_pntchunk, _pntbigchunk, _str,
ValidatedStyle, MutableStyle, EqualsStyle, CanContain

@api develop LineCounts, CharTypes, CharStat, maxbit, calcstats, check_continuation,
UTF_LONG, UTF_LATIN1, UTF_UNICODE2, UTF_UNICODE3, UTF_UNICODE4, UTF_SURROGATE,
UTF_INVALID, CHUNKSZ, CHUNKMSK,
UTF_INVALID, CHUNKSZ, CHUNKMSK, BIGCHUNKSZ, BIGCHUNKMSK,
_memcmp, _memcpy, _memset, _fwd_memchr, _rev_memchr,
empty_string, _calcpnt, _mask_bytes, _allocate,
BigChunk, empty_string, _calcpnt, _allocate, SingleCU, MultiCU,
MS_UTF8, MS_UTF16, MS_UTF32, MS_SubUTF32, MS_Latin, MS_ByteStr, MS_RawUTF8,
_wrap_substr, _empty_sub,
AccessType, UInt16_U, UInt32_U, UInt16_S, UInt32_S, UInt16_US, UInt32_US,
Expand Down
22 changes: 1 addition & 21 deletions src/ascii.jl
Original file line number Diff line number Diff line change
@@ -1,32 +1,12 @@
#=
ASCIIStr type

Copyright 2017-2018 Gandalf Software, Inc., Scott P. Jones,
Copyright 2017-2020 Gandalf Software, Inc., Scott P. Jones,
and other contributors to the Julia language
Licensed under MIT License, see LICENSE.md
Based in part on code for ASCIIString that used to be in Julia
=#

## overload methods for efficiency ##

function _string(coll)
n = 0
for str in coll
n += ncodeunits(str)
end
buf, out = _allocate(UInt8, n)
for str in coll
@preserve str begin
len = ncodeunits(str)
unsafe_copyto!(out, pointer(str), len)
out += len
end
end
buf
end

string(c::MaybeSub{<:Str{ASCIICSE}}...) = length(c) == 1 ? c[1] : Str(ASCIICSE, _string(c))

## transcoding to ASCII ##

function convert(::Type{<:Str{ASCIICSE}}, str::AbstractString)
Expand Down
4 changes: 2 additions & 2 deletions src/compare.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ end
while pnt < fin
str_done(b, pos) && return 1
c1, pnt = _nextcp(C, pnt)
ch, pos = str_next(b, pos)
ch, pos = iterate(b, pos)
c2 = ch%UInt32
c1 == c2 || return ifelse(c1 < c2, -1, 1)
end
Expand Down Expand Up @@ -93,7 +93,7 @@ function _cpeq(a::MaybeSub{T}, b) where {C<:CSE, T<:Str{C}}
while pnt < fin
str_done(b, pos) && return false
c1, pnt = _nextcp(C, pnt)
ch, pos = str_next(b, pos)
ch, pos = iterate(b, pos)
c1 == codepoint(ch) || return false
end
true
Expand Down
8 changes: 4 additions & 4 deletions src/core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Core functions


Copyright 2017-2018 Gandalf Software, Inc., Scott P. Jones, and others (see Julia contributors)
Copyright 2017-2020 Gandalf Software, Inc., Scott P. Jones, and others (see Julia contributors)
Licensed under MIT License, see LICENSE.md

Inspired by / derived from code in Julia
Expand Down Expand Up @@ -33,7 +33,7 @@ _nextcp(::Type{T}, pnt) where {T} = _nextcpfun(EncodingStyle(T), T, pnt)

# Use more generic length check
@inline _length_check(str::SubString{<:Str{C}}, cnt) where {C<:CSE} =
_length(MultiCU(), C, pointer(str), cnt)
@preserve str _length_ul(MultiCU(), C, pointer(str), cnt)

# Go directly to aligned length check
@inline _length_check(str::Str{C}, cnt) where {C<:CSE} =
Expand All @@ -42,7 +42,7 @@ _nextcp(::Type{T}, pnt) where {T} = _nextcpfun(EncodingStyle(T), T, pnt)
@inline _length(::MultiCU, str::MaybeSub{T}) where {T<:Str} =
(cnt = ncodeunits(str); cnt < 2 ? Int(cnt > 0) : @preserve str _length_check(str, cnt))

@inline _length(::SingleCU, ::Type{<:CSE}, ::Ptr{<:CodeUnitTypes}, cnt::Int) = cnt
@inline _length_ul(::SingleCU, ::Type{<:CSE}, ::Ptr{<:CodeUnitTypes}, cnt::Int) = cnt

@inline _length(::MultiCU, str::Str{RawUTF8CSE}) = length(str.data)
@inline _length(::MultiCU, str::Str{RawUTF8CSE}, i::Int, j::Int) = length(str.data, i, j)
Expand All @@ -55,7 +55,7 @@ _nextcp(::Type{T}, pnt) where {T} = _nextcpfun(EncodingStyle(T), T, pnt)
0 <= j < lim || boundserr(str, j)
end
(cnt = j - i + 1) <= 0 ? 0 :
@preserve str _length(cs, cse(str), bytoff(pointer(str), i - 1), cnt)
@preserve str _length_ul(cs, cse(str), bytoff(pointer(str), i - 1), cnt)
end

@inline _thisind(::SingleCU, str, len, pnt, pos) = Int(pos)
Expand Down
22 changes: 3 additions & 19 deletions src/latin.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#=
LatinStr/_LatinStr type (ISO Latin1 8-bit subset of Unicode)

Copyright 2017 Gandalf Software, Inc., Scott P. Jones, and other contributors to the Julia language
Copyright 2017, 2020 Gandalf Software, Inc., Scott P. Jones,
and other contributors to the Julia language
Licensed under MIT License, see LICENSE.md
Based in part on code for ASCIIString that used to be in Julia
=#
Expand All @@ -13,23 +14,6 @@ is_latin(str::MaybeSub{<:Str{<:LatinCSE}}) = true
is_bmp(str::MS_Latin) = true
is_unicode(str::MS_Latin) = true

const MS_ASCIILatin = MaybeSub{<:Str{<:Union{ASCIICSE, Latin_CSEs}}}

function string(collection::MS_ASCIILatin...)
length(collection) == 1 && return collection[1]
len = 0
@inbounds for str in collection
len += ncodeunits(str)
end
buf, pnt = _allocate(len)
@inbounds for str in collection
len = ncodeunits(str)
_memcpy(pnt, pointer(str), len)
pnt += len
end
Str(LatinCSE, buf)
end

## transcoding to Latin1 ##

function convert(::Type{<:Str{C}}, str::AbstractString) where {C<:Latin_CSEs}
Expand Down Expand Up @@ -167,7 +151,7 @@ end

function convert(::Type{<:Str{C}}, vec::Vector{CU}) where {C<:Latin_CSEs,CU<:CodeUnitTypes}
# handle zero length string quickly
(len = length(vec)) == 0 && return _empty_str(C)
(len = length(vec)) == 0 && return empty_str(C)
@preserve vec begin
pnt = pointer(vec)
# get number of bytes to allocate
Expand Down
12 changes: 8 additions & 4 deletions src/search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ found(::Type{<:AbstractString}, v) = v != 0
find_result(::Type{<:AbstractString}, v) = v

nothing_sentinel(i) = first(i) == 0 ? nothing : i
Base.findfirst(a::AbstractChar, b::Str) = nothing_sentinel(find(First, a, b))
Base.findlast(a::AbstractChar, b::Str) = nothing_sentinel(find(Last, a, b))
Base.findnext(a::AbstractChar, b::Str, i) = nothing_sentinel(find(Fwd, a, b, i))
Base.findprev(a::AbstractChar, b::Str, i) = nothing_sentinel(find(Rev, a, b, i))
Base.findfirst(a, b::Str) = nothing_sentinel(find(First, a, b))
Base.findlast(a, b::Str) = nothing_sentinel(find(Last, a, b))
Base.findnext(a, b::Str, i) = nothing_sentinel(find(Fwd, a, b, i))
Expand Down Expand Up @@ -189,7 +193,7 @@ function find(::Type{D}, needle::AbstractString, str::AbstractString,
@inbounds is_valid(str, pos) || index_error(str, pos)
(tlen = ncodeunits(needle)) == 0 && return pos:pos-1
(cmp = CanContain(str, needle)) === NoCompare() && return _not_found
@inbounds ch, nxt = str_next(needle, 1)
@inbounds ch, nxt = iterate(needle, 1)
is_valid(eltype(str), ch) || return _not_found
# Check if single character
if nxt > tlen
Expand All @@ -205,7 +209,7 @@ function find(::Type{T}, needle::AbstractString, str::AbstractString) where {T<:
pos = T === First ? 1 : thisind(str, slen)
(tlen = ncodeunits(needle)) == 0 && return pos:(pos-1)
(cmp = CanContain(str, needle)) === NoCompare() && return _not_found
@inbounds ch, nxt = str_next(needle, 1)
@inbounds ch, nxt = iterate(needle, 1)
is_valid(eltype(str), ch) || return _not_found
# Check if single character
if nxt > tlen
Expand Down Expand Up @@ -298,8 +302,8 @@ end
"""Compare two strings, starting at nxtstr and nxtsub"""
@inline function _cmp_str(str, strpos, endpos, sub, subpos, endsub)
while strpos <= endpos
c, strnxt = str_next(str, strpos)
d, subpos = str_next(sub, subpos)
c, strnxt = iterate(str, strpos)
d, subpos = iterate(sub, subpos)
c == d || break
subpos > endsub && return strpos
strpos = strnxt
Expand Down
4 changes: 2 additions & 2 deletions src/support.jl
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ function unsafe_check_string(str::T;
totalchar = latin1byte = num2byte = num3byte = num4byte = invalids = 0
pos = 1
@inbounds while !str_done(str, pos)
chr, nxt = str_next(str, pos)
chr, nxt = iterate(str, pos)
ch = chr%UInt32
totalchar += 1
if ch > 0x7f
Expand All @@ -288,7 +288,7 @@ function unsafe_check_string(str::T;
break
end
# next character *must* be a trailing surrogate character
chr, nxt = str_next(str, nxt)
chr, nxt = iterate(str, nxt)
if !is_surrogate_trail(chr)
accept_invalids || strerror(StrErrors.NOT_TRAIL, pos, chr)
invalids += 1
Expand Down
Loading