Skip to content

Commit

Permalink
update: add more tests
Browse files Browse the repository at this point in the history
Signed-off-by: Milos Gajdos <[email protected]>
  • Loading branch information
milosgajdos committed Mar 23, 2024
1 parent be14600 commit 7f743cd
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 5 deletions.
15 changes: 15 additions & 0 deletions document/text/character_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,28 @@ func TestCharSplitter(t *testing.T) {
input: "foo.bar.baz.123",
exp: []string{"foo", ".bar", ".baz", ".123"},
},
{
size: 1,
overlap: 0,
keepSep: true,
sep: Sep{Value: `\.`, IsRegexp: true},
input: "foo.bar.baz.123",
exp: []string{"foo", ".bar", ".baz", ".123"},
},
{
size: 1,
overlap: 0,
sep: Sep{Value: ".", IsRegexp: false},
input: "foo.bar.baz.123",
exp: []string{"foo", "bar", "baz", "123"},
},
{
size: 1,
overlap: 0,
sep: Sep{Value: `\.`, IsRegexp: true},
input: "foo.bar.baz.123",
exp: []string{"foo", "bar", "baz", "123"},
},
}

for _, tc := range testCases {
Expand Down
9 changes: 4 additions & 5 deletions document/text/splitter.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,11 +157,10 @@ func (s *Splitter) splitText(text string, sep Sep) []string {
if sep.Value != "" {
if s.keepSep {
sepVal := sep.Value
if !sep.IsRegexp {
// NOTE: we must do this to unescape
// the escaped separator
sepVal, _ = unquoteMeta(sep.Value)
}
// NOTE: we must do this to unescape
// the escaped separator so we keep the raw separator.
sepVal, _ = unquoteMeta(sep.Value)

var results []string
splits := regexp.MustCompile("("+sep.Value+")").Split(text, -1)
// NOTE: we start iterating from 1, not 0!
Expand Down

0 comments on commit 7f743cd

Please sign in to comment.