feat(rule): RegExp-like Stringをサポート

textlint-ja · May 23, 2018 · 2e35940 · 2e35940
1 parent 754497f
commit 2e35940
Show file tree

Hide file tree

Showing 5 changed files with 2,716 additions and 34 deletions.
diff --git a/README.md b/README.md
@@ -4,6 +4,8 @@
 
 IMEの入力ミスによるtypoを見つけるルールです。
 
+`{日本語}{アルファベット}{日本語}`のように不自然な形でアルファベットが登場した場合をチェックしていないかをチェックしています。
+
 ## Example
 
 **OK**:
@@ -51,7 +53,7 @@ textlint --rule ja-unnatural-alphabet README.md
     - 無視するアルファベットや単語の配列
     - デフォルト: `["a", "i", "u", "e", "o", "n", 典型例 ]`
     - デフォルトでは母音とnを除外している
-    - `"/正規表現/"` のような文字列もサポート
+    - `"/正規表現/"` のような[RegExp-like String](https://github.com/textlint/regexp-string-matcher#regexp-like-string)もサポートしています
 
 ```json5
 {
@@ -67,6 +69,36 @@ textlint --rule ja-unnatural-alphabet README.md
 }
 ```
 
+### `allow`: `string[]`
+
+`allow`オプションには、エラーとしたくない文字列または[RegExp-like String](https://github.com/textlint/regexp-string-matcher#regexp-like-string)を指定できます。
+[RegExp-like String](https://github.com/textlint/regexp-string-matcher#regexp-like-string)についての詳細は次を参照してください。
+
+- [textlint/regexp-string-matcher: Regexp-like string matcher.](https://github.com/textlint/regexp-string-matcher#regexp-like-string)
+
+たとえば、`アンドロイドNを購入する`という文章は`{日本語}{アルファベット}{日本語}`のルールに該当するためエラーとなりますが、`allow`オプションでは問題として無視できます。
+
+```json5
+{
+    // 無視する設定を追加
+    "allow": [
+        "アンドロイドN"
+    ]
+}
+```
+
+同様に[RegExp-like String](https://github.com/textlint/regexp-string-matcher#regexp-like-string)を使うことで、`allow`オプションに正規表現での指定が可能です。
+次の設定は`アンドロイド{アルファベット}`は問題ないとしてエラーにしません。
+
+```json5
+{
+    "allow": [
+        // RegExp-like String は `/` と `/` で囲む
+        "/アンドロイド[a-zA-Z]/"
+    ]
+}
+```
+
 ## 参考文献
 
 > (3) 不自然なアルファベット

diff --git a/package.json b/package.json
@@ -34,7 +34,7 @@
     "textlint-scripts": "^1.2.2"
   },
   "dependencies": {
-    "escape-string-regexp": "^1.0.5",
+    "@textlint/regexp-string-matcher": "^1.0.2",
     "match-index": "^1.0.1",
     "regx": "^1.0.4"
   }

diff --git a/src/textlint-rule-ja-unnatural-alphabet.js b/src/textlint-rule-ja-unnatural-alphabet.js
@@ -1,6 +1,6 @@
 // MIT © 2017 azu
 "use strict";
-const escapeStringRegexp = require('escape-string-regexp');
+const { matchPatterns } = require("@textlint/regexp-string-matcher");
 const matchCaptureGroupAll = require("match-index").matchCaptureGroupAll;
 const regx = require("regx").default;
 // IME的に入力されそうな文字列
@@ -23,40 +23,16 @@ const matchUnnaturalAlphabet = (text) => {
 };
 
 /**
- * if actual is in the `exceptGroups`, return true
- * @param {MatchCaptureGroup[]} exceptGroups
+ * if actual is in the `matchPatternResults`, return true
+ * @param {matchPatternResult[]} matchPatternResults
  * @param {MatchCaptureGroup} actual
  * @returns {boolean}
  */
-const isIgnoredRange = (exceptGroups, actual) => {
-    return exceptGroups.some(({ text, index }) => {
-        const endIndex = index + text.length;
-        return index <= actual.index && actual.index <= endIndex;
+const isIgnoredRange = (matchPatternResults, actual) => {
+    return matchPatternResults.some(result => {
+        return result.startIndex <= actual.index && actual.index <= result.endIndex;
     });
 };
-/***
- *
- * @param {string} input
- * @param {string[]} allowAlphabets
- * @returns {MatchCaptureGroup[]}
- */
-const createIgnoreRanges = (input, allowAlphabets) => {
-    // str -> RegExp
-    const patterns = allowAlphabets.map(allowWord => {
-        if (!allowWord) {
-            return /^$/;
-        }
-        if (allowWord[0] === "/" && allowWord[allowWord.length - 1] === "/") {
-            const regExpString = allowWord.slice(1, allowWord.length - 1);
-            return new RegExp(`(${regExpString})`, "g");
-        }
-        const escapeString = escapeStringRegexp(allowWord);
-        return new RegExp(`(${escapeString})`, "g");
-    });
-    return patterns.reduce((total, pattern) => {
-        return total.concat(matchCaptureGroupAll(input, pattern));
-    }, []);
-};
 
 /**
  * ビルトインの無視するリスト
@@ -89,7 +65,7 @@ const report = (context, options = {}) => {
     return {
         [Syntax.Str](node){
             const text = getSource(node);
-            const ignoreMatch = createIgnoreRanges(text, allow);
+            const ignoreMatch = matchPatterns(text, allow);
             matchUnnaturalAlphabet(text).forEach((actual) => {
                 const { text, index } = actual;
                 // 無視する単語を含んでいるなら無視

diff --git a/test/textlint-rule-ja-unnatural-alphabet-test.js b/test/textlint-rule-ja-unnatural-alphabet-test.js
@@ -20,6 +20,16 @@ tester.run("textlint-rule-ja-unnatural-alphabet", rule, {
             options: {
                 allow: ["N"]
             }
+        }, {
+            text: "アンドロイドNは良し",
+            options: {
+                allow: ["アンドロイドN"]
+            }
+        }, {
+            text: "アンドロイドNは良し",
+            options: {
+                allow: ["/アンドロイド[a-zA-Z]/"]
+            }
         }
     ],
     invalid: [
@@ -48,4 +58,4 @@ tester.run("textlint-rule-ja-unnatural-alphabet", rule, {
             ]
         }
     ]
-});
+});