Skip to content

Commit 71d3813

Browse files
feat: SSPAPI-332 Optimized uslug
1 parent 4e2cdb1 commit 71d3813

13 files changed

+1823
-18
lines changed

.idea/codeStyles/Project.xml

-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/jsLinters/eslint.xml

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

+37-5
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,53 @@
1-
# xxxxxx [![CircleCI](https://circleci.com/gh/shelfio/xxxxxx/tree/master.svg?style=svg)](https://circleci.com/gh/shelfio/xxxxxx/tree/master)![](https://img.shields.io/badge/code_style-prettier-ff69b4.svg)
1+
# fast-uslug [![CircleCI](https://circleci.com/gh/shelfio/fast-uslug/tree/master.svg?style=svg)](https://circleci.com/gh/shelfio/fast-uslug/tree/master)![](https://img.shields.io/badge/code_style-prettier-ff69b4.svg)
22

3-
> xxxxxx description
3+
> It's a fork of [uslug](https://github.com/jeremys/uslug). **6x** faster than uslug.
4+
5+
Permissive slug generator that works with unicode.
6+
We keep only characters from the categories Letter, Number and Separator (see [Unicode Categories](http://www.unicode.org/versions/Unicode6.0.0/ch04.pdf))
7+
and the common [CJK Unified Ideographs](http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf) as defined in the version 6.0.0 of the Unicode specification.
8+
9+
Inspired by [unicode-slugify](https://github.com/mozilla/unicode-slugify).
10+
Note that this slug generator is different from [node-slug](https://github.com/dodo/node-slug) which focus on translating unicode characters to english or latin equivalent.
11+
12+
## Quick Examples
13+
14+
```js
15+
fastUslug('Ґатунок Їхніх обценьок неперевершений!'); // 'ґатунок-їхніх-обценьок-неперевершений'
16+
fastUslug('汉语/漢語'); // '汉语漢語'
17+
18+
fastUslug('Y U NO', {lower: false}); // 'Y-U-NO'
19+
fastUslug('Y U NO', {spaces: true}); // 'y u no'
20+
fastUslug('Y-U|NO', {allowedChars: '|'}); // 'yu|no'
21+
```
422

523
## Install
624

725
```
8-
$ yarn add @shelf/xxxxxx
26+
$ yarn add @shelf/fast-uslug
927
```
1028

1129
## Usage
1230

1331
```js
14-
const {getFoo} = require('@shelf/xxxxxx');
32+
import {fastUslug} from '@shelf/fast-uslug';
1533

16-
getFoo();
34+
fastUslug('some string'); // some-string
1735
```
1836

37+
## Options
38+
39+
### fastUslug(string, options)
40+
41+
Generate a slug for the string passed.
42+
43+
**Arguments**
44+
45+
- string - The string you want to slugify.
46+
- options - An optional object that can contain:
47+
- allowedChars: a Set of chars that you want to be whitelisted. Default: '-\_~'.
48+
- lower: a Boolean to force to lower case the slug. Default: true.
49+
- spaces: a Boolean to allow spaces. Default: false.
50+
1951
## Publish
2052

2153
```sh

package.json

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
2-
"name": "@shelf/xxxxxx",
3-
"version": "0.0.0",
4-
"description": "xxxxxx",
2+
"name": "@shelf/fast-uslug",
3+
"version": "0.0.1",
4+
"description": "fast-uslug",
55
"license": "MIT",
66
"author": {
77
"name": "Vlad Holubiev",
@@ -47,12 +47,14 @@
4747
"@shelf/tsconfig": "0.0.6",
4848
"@types/jest": "27.4.0",
4949
"@types/node": "14",
50+
"@types/uslug": "1.0.1",
5051
"eslint": "8.6.0",
5152
"husky": "7.0.4",
5253
"jest": "27.4.7",
5354
"lint-staged": "12.1.7",
5455
"prettier": "2.5.1",
55-
"typescript": "4.5.4"
56+
"typescript": "4.5.4",
57+
"uslug": "1.0.4"
5658
},
5759
"engines": {
5860
"node": ">=14"

src/codes/LMN.ts

+1,526
Large diffs are not rendered by default.

src/codes/Z.ts

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// List of Unicode code that are flagged as separator.
2+
//
3+
// Contains Unicode code of:
4+
// - Zs = Separator, space
5+
// - Zl = Separator, line
6+
// - Zp = Separator, paragraph
7+
//
8+
// This list has been computed from http://unicode.org/Public/UNIDATA/UnicodeData.txt
9+
// curl -s http://unicode.org/Public/UNIDATA/UnicodeData.txt | grep -E ';Zs;|;Zl;|;Zp;' | cut -d \; -f 1 | xargs -I{} printf '%d, ' 0x{}
10+
//
11+
export const Z = new Set([
12+
32, 160, 5760, 8192, 8193, 8194, 8195, 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8232, 8233, 8239,
13+
8287, 12288,
14+
]);

src/codes/index.ts

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
export * from './LMN';
2+
export * from './Z';

src/consts.ts

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
export const normalizeForm = 'NFKC';
2+
3+
export const lmnSymbol = 1;
4+
export const zSymbol = 2;
5+
6+
export const spaceSymbol = ' ';
7+
export const emptyString = '';
8+
export const hyphenSymbol = '-';
9+
10+
export const severalSpacesRegex = /\s+/g;
11+
export const severalSpacesOrHyphensRegex = /[\s-]+/g;
12+
13+
export const defaultAllowedChars = new Set(['-', '_', '~']);
14+
15+
export const chineseL = 0x4e00;
16+
export const chineseR = 0x9fff;
17+
18+
export const koreanL = 0xac00;
19+
export const koreanR = 0xd7a3;
20+
21+
export const japaneseLL = 0x3000;
22+
export const japaneseLR = 0x3002;
23+
export const japaneseRL = 0xff01;
24+
export const japaneseRR = 0xff02;

src/helpers.ts

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import {LMN, Z} from './codes';
2+
import {
3+
chineseL,
4+
chineseR,
5+
emptyString,
6+
japaneseLL,
7+
japaneseLR,
8+
japaneseRL,
9+
japaneseRR,
10+
koreanL,
11+
koreanR,
12+
lmnSymbol,
13+
spaceSymbol,
14+
zSymbol,
15+
} from './consts';
16+
17+
export const getRawSlug = (chars: string, allowedChars: Set<string>): string => {
18+
const rawSlug = [];
19+
20+
for (const char of chars) {
21+
const code = char.charCodeAt(0);
22+
23+
if (allowChinese(code) || allowKorean(code)) {
24+
rawSlug.push(char);
25+
continue;
26+
}
27+
28+
if (allowJapanese(code)) {
29+
rawSlug.push(spaceSymbol);
30+
}
31+
32+
if (allowedChars.has(char)) {
33+
rawSlug.push(char);
34+
continue;
35+
}
36+
37+
const value = unicodeCategory(code);
38+
39+
if (!value) {
40+
continue;
41+
}
42+
43+
rawSlug.push(value === lmnSymbol ? char : spaceSymbol);
44+
}
45+
46+
return rawSlug.join(emptyString).trim();
47+
};
48+
49+
// Allow Common CJK Unified Ideographs
50+
// See: http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf - Table 12-2
51+
function allowChinese(code: number): boolean {
52+
return chineseL <= code && code <= chineseR;
53+
}
54+
55+
// Allow Hangul
56+
function allowKorean(code: number): boolean {
57+
return koreanL <= code && code <= koreanR;
58+
}
59+
60+
// Japanese ideographic punctuation
61+
function allowJapanese(code: number): boolean {
62+
return (japaneseLL <= code && code <= japaneseLR) || (japaneseRL <= code && code <= japaneseRR);
63+
}
64+
65+
function unicodeCategory(code: number): undefined | number {
66+
if (LMN.has(code)) {
67+
return lmnSymbol;
68+
}
69+
70+
if (Z.has(code)) {
71+
return zSymbol;
72+
}
73+
}

src/index.benchmark.test.ts

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import uslug from 'uslug';
2+
import mocks from './mocks';
3+
import {fastUslug} from './index';
4+
5+
const runTimes = 10;
6+
const desirableRunTimes = 1000;
7+
const testsIncreaseTimes = Math.floor(desirableRunTimes / mocks.length);
8+
const desirableTests = Array(testsIncreaseTimes).fill(mocks).flat();
9+
10+
describe.skip('old-new-uslug-benchmark', () => {
11+
let sumFaster = 0;
12+
13+
afterAll(() => {
14+
console.log(`In average fast-uslug ${(sumFaster / runTimes).toFixed(2)}x faster than uslug`);
15+
});
16+
17+
for (let i = 0; i < runTimes; i++) {
18+
describe('old-new-uslug', () => {
19+
let spentTimeOld: number;
20+
let spentTimeNew: number;
21+
22+
afterAll(() => {
23+
sumFaster += spentTimeOld / spentTimeNew;
24+
});
25+
26+
it.concurrent('should old slugify', async () => {
27+
const startOld = +new Date();
28+
for (const test of desirableTests) {
29+
expect(uslug(test[0])).toEqual(test[1]);
30+
}
31+
const endOld = +new Date();
32+
spentTimeOld = endOld - startOld;
33+
});
34+
35+
it.concurrent('should new slugify', async () => {
36+
const startNew = +new Date();
37+
for (const test of desirableTests) {
38+
expect(fastUslug(test[0])).toEqual(test[1]);
39+
}
40+
const endNew = +new Date();
41+
spentTimeNew = endNew - startNew;
42+
});
43+
});
44+
}
45+
});

src/index.test.ts

+24-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,26 @@
1-
import {getFoo} from './index';
1+
import mocks from './mocks';
2+
import {fastUslug} from './index';
23

3-
it('should return bar', () => {
4-
expect(getFoo()).toEqual('bar');
4+
it('should return slugified strings', () => {
5+
for (const mock of mocks) {
6+
expect(fastUslug(mock[0])).toEqual(mock[1]);
7+
}
8+
});
9+
10+
it('should return default allowed chars', () => {
11+
expect(fastUslug('~-_')).toEqual('~-_');
12+
});
13+
14+
it('should leave passed allowed chars', () => {
15+
expect(fastUslug('qwerty |@ qwerty', {allowedChars: new Set(['|', '@'])})).toEqual(
16+
'qwerty-|@-qwerty'
17+
);
18+
});
19+
20+
it('should not be lower cased', () => {
21+
expect(fastUslug('QWERTY', {lower: false})).toEqual('QWERTY');
22+
});
23+
24+
it('should leave one space between words', () => {
25+
expect(fastUslug(' qwerty qwerty ', {spaces: true})).toEqual('qwerty qwerty');
526
});

src/index.ts

+32-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,32 @@
1-
export function getFoo(): string {
2-
return 'bar';
3-
}
1+
import {
2+
defaultAllowedChars,
3+
emptyString,
4+
hyphenSymbol,
5+
normalizeForm,
6+
severalSpacesOrHyphensRegex,
7+
severalSpacesRegex,
8+
spaceSymbol,
9+
} from './consts';
10+
import {getRawSlug} from './helpers';
11+
12+
type OptionParams = {lower?: boolean; spaces?: boolean; allowedChars?: Set<string>};
13+
14+
export const fastUslug = (
15+
str: string = emptyString,
16+
{lower = true, spaces = false, allowedChars = defaultAllowedChars}: OptionParams = {}
17+
): string => {
18+
const chars = str.normalize(normalizeForm);
19+
const rawSlug = getRawSlug(chars, allowedChars);
20+
21+
let slug = rawSlug.replace(severalSpacesRegex, spaceSymbol);
22+
23+
if (!spaces) {
24+
slug = slug.replace(severalSpacesOrHyphensRegex, hyphenSymbol);
25+
}
26+
27+
if (lower) {
28+
slug = slug.toLowerCase();
29+
}
30+
31+
return slug;
32+
};

src/mocks.ts

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
export default [
2+
[
3+
'The \u212B symbol invented by A. J. \u00C5ngstr\u00F6m (1814, L\u00F6gd\u00F6, \u2013 1874) denotes the length 10\u207B\u00B9\u2070 m.',
4+
'the-å-symbol-invented-by-a-j-ångström-1814-lögdö-1874-denotes-the-length-1010-m',
5+
],
6+
['Ґатунок Їхніх обценьок неперевершений!', 'ґатунок-їхніх-обценьок-неперевершений'],
7+
['xx x - "#$@ x', 'xx-x-x'],
8+
['Bän...g (bang)', 'bäng-bang'],
9+
[' a ', 'a'],
10+
['tags/', 'tags'],
11+
['y_u_no', 'y_u_no'],
12+
['el-ni\xf1o', 'el-ni\xf1o'],
13+
['¿x', 'x'],
14+
['فار,سي', 'فارسي'],
15+
['เแโ|ใไ', 'เแโใไ'],
16+
['संसद में काम नहीं तो वेतन क्यों?', 'संसद-में-काम-नहीं-तो-वेतन-क्यों'],
17+
[
18+
"เร่งรัด 'ปรับเงินเดือนท้องถิ่น 1 ขั้น' ตามมติ ครม.",
19+
'เร่งรัด-ปรับเงินเดือนท้องถิ่น-1-ขั้น-ตามมติ-ครม',
20+
],
21+
['Ελληνικά - Ελληνικά', 'ελληνικά-ελληνικά'],
22+
['汉语/漢語', '汉语漢語'],
23+
['ϧ΃蒬蓣', '\u03e7蒬蓣'],
24+
['一二三四五六七八九十!。。。', '一二三四五六七八九十'],
25+
['日本語ドキュメンテ(ーション)', '日本語ドキュメンテーション'],
26+
[
27+
'オバマ大統領が病院爆撃の調査へ同意するように、協力してください!',
28+
'オバマ大統領が病院爆撃の調査へ同意するように-協力してください',
29+
],
30+
[
31+
'일본정부 법무대신(法務大臣): 우리는 일본 입관법의 재검토를 요구한다!',
32+
'일본정부-법무대신法務大臣-우리는-일본-입관법의-재검토를-요구한다',
33+
],
34+
];

0 commit comments

Comments
 (0)