Skip to content

Commit

Permalink
Porting quoted text recognizer into project - v0 (#2528)
Browse files Browse the repository at this point in the history
* Add QuotedText recognizer skeleton

* Add structure to support multi-language

* Add English and Chinese quoted recognizer

* Add initial test spec cases

* Refinements according to code review

* Add NotSupportedByDesign annotations to test specs

* Update text model and fix code style issues

* Add multi-language quoted text recognizers

* Update QuotedText recognizer to expose models

* Filter French frequent false positive in Currency

* Fix build break

* Add NotSupportedByDesign attributes in specs for all non-dotnet platforms


Co-authored-by: Tom Laird-McConnell <[email protected]>
  • Loading branch information
mzyhappy authored May 6, 2021
1 parent 9003008 commit f820d76
Show file tree
Hide file tree
Showing 91 changed files with 1,836 additions and 14 deletions.
26 changes: 26 additions & 0 deletions .NET/Microsoft.Recognizers.Definitions.Common/BaseQuotedText.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
//
// Generation parameters:
// - DataFilename: Patterns\Base-QuotedText.yaml
// - Language: NULL
// - ClassName: BaseQuotedText
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------

namespace Microsoft.Recognizers.Definitions
{
using System;
using System.Collections.Generic;

public static class BaseQuotedText
{
public const string QuotedTextRegex = @"('([a-zA-Z0-9_]+)')";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<#@ template debug="true" hostspecific="true" language="C#" #>
<#
this.DataFilename = @"Patterns\Base-QuotedText.yaml";
this.Language = null;
this.ClassName = "BaseQuotedText";
#>
<#@ include file=".\CommonDefinitions.ttinclude"#>
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
//
// Generation parameters:
// - DataFilename: Patterns\Chinese\Chinese-QuotedText.yaml
// - Language: Chinese
// - ClassName: QuotedTextDefinitions
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------

namespace Microsoft.Recognizers.Definitions.Chinese
{
using System;
using System.Collections.Generic;

public static class QuotedTextDefinitions
{
public const string LangMarker = @"Chs";
public const string QuotedTextRegex1 = @"(“([^“”]+)”)";
public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)";
public const string QuotedTextRegex3 = @"(『([^『』]+)』)";
public const string QuotedTextRegex4 = @"(「([^「」]+)」)";
public const string QuotedTextRegex5 = @"(﹃([^﹃﹄]+)﹄)";
public const string QuotedTextRegex6 = @"(﹁([^﹁﹂]+)﹂)";
public const string QuotedTextRegex7 = @"(""([^""]+)"")";
public const string QuotedTextRegex8 = @"(\\'([^\']+)\\')";
public const string QuotedTextRegex9 = @"(`([^`]+)`)";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<#@ template debug="true" hostspecific="true" language="C#" #>
<#
this.DataFilename = @"Patterns\Chinese\Chinese-QuotedText.yaml";
this.Language = "Chinese";
this.ClassName = "QuotedTextDefinitions";
#>
<#@ include file="..\CommonDefinitions.ttinclude"#>
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
//
// Generation parameters:
// - DataFilename: Patterns\Dutch\Dutch-QuotedText.yaml
// - Language: Dutch
// - ClassName: QuotedTextDefinitions
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------

namespace Microsoft.Recognizers.Definitions.Dutch
{
using System;
using System.Collections.Generic;

public static class QuotedTextDefinitions
{
public const string LangMarker = @"Dut";
public const string QuotedTextRegex1 = @"(“([^“”]+)”)";
public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)";
public const string QuotedTextRegex3 = @"(„([^„”]+)”)";
public const string QuotedTextRegex4 = @"(,([^,’']+)[’'])";
public const string QuotedTextRegex5 = @"(""([^""]+)"")";
public const string QuotedTextRegex6 = @"(\\'([^\']+)\\')";
public const string QuotedTextRegex7 = @"(`([^`]+)`)";
public const string QuotedTextRegex8 = @"(123456)";
public const string QuotedTextRegex9 = @"(123456)";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<#@ template debug="true" hostspecific="true" language="C#" #>
<#
this.DataFilename = @"Patterns\Dutch\Dutch-QuotedText.yaml";
this.Language = "Dutch";
this.ClassName = "QuotedTextDefinitions";
#>
<#@ include file="..\CommonDefinitions.ttinclude"#>
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
//
// Generation parameters:
// - DataFilename: Patterns\English\English-QuotedText.yaml
// - Language: English
// - ClassName: QuotedTextDefinitions
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------

namespace Microsoft.Recognizers.Definitions.English
{
using System;
using System.Collections.Generic;

public static class QuotedTextDefinitions
{
public const string LangMarker = @"Eng";
public const string QuotedTextRegex1 = @"(“([^“”]+)”)";
public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)";
public const string QuotedTextRegex3 = @"(""([^""]+)"")";
public const string QuotedTextRegex4 = @"(\\'([^\']+)\\')";
public const string QuotedTextRegex5 = @"(`([^`]+)`)";
public const string QuotedTextRegex6 = @"(123456)";
public const string QuotedTextRegex7 = @"(123456)";
public const string QuotedTextRegex8 = @"(123456)";
public const string QuotedTextRegex9 = @"(123456)";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<#@ template debug="true" hostspecific="true" language="C#" #>
<#
this.DataFilename = @"Patterns\English\English-QuotedText.yaml";
this.Language = "English";
this.ClassName = "QuotedTextDefinitions";
#>
<#@ include file="..\CommonDefinitions.ttinclude"#>
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,8 @@ public static class NumbersWithUnitDefinitions
@"sos",
@"std",
@"try",
@"yer"
@"yer",
@"livre"
};
public static readonly Dictionary<string, string> InformationSuffixList = new Dictionary<string, string>
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
//
// Generation parameters:
// - DataFilename: Patterns\French\French-QuotedText.yaml
// - Language: French
// - ClassName: QuotedTextDefinitions
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------

namespace Microsoft.Recognizers.Definitions.French
{
using System;
using System.Collections.Generic;

public static class QuotedTextDefinitions
{
public const string LangMarker = @"Fr";
public const string QuotedTextRegex1 = @"(“([^“”]+)”)";
public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)";
public const string QuotedTextRegex3 = @"(""([^""]+)"")";
public const string QuotedTextRegex4 = @"(\\'([^\']+)\\')";
public const string QuotedTextRegex5 = @"(`([^`]+)`)";
public const string QuotedTextRegex6 = @"(«([^«»]+)»)";
public const string QuotedTextRegex7 = @"(‹([^‹›]+)›)";
public const string QuotedTextRegex8 = @"(123456)";
public const string QuotedTextRegex9 = @"(123456)---";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<#@ template debug="true" hostspecific="true" language="C#" #>
<#
this.DataFilename = @"Patterns\French\French-QuotedText.yaml";
this.Language = "French";
this.ClassName = "QuotedTextDefinitions";
#>
<#@ include file="..\CommonDefinitions.ttinclude"#>
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
//
// Generation parameters:
// - DataFilename: Patterns\German\German-QuotedText.yaml
// - Language: German
// - ClassName: QuotedTextDefinitions
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------

namespace Microsoft.Recognizers.Definitions.German
{
using System;
using System.Collections.Generic;

public static class QuotedTextDefinitions
{
public const string LangMarker = @"Ger";
public const string QuotedTextRegex1 = @"(„([^„“]+)“)";
public const string QuotedTextRegex2 = @"(‚([^‚‘]+)‘)";
public const string QuotedTextRegex3 = @"(""([^""]+)"")";
public const string QuotedTextRegex4 = @"(\\'([^\']+)\\')";
public const string QuotedTextRegex5 = @"(`([^`]+)`)";
public const string QuotedTextRegex6 = @"(«([^«»]+)»)";
public const string QuotedTextRegex7 = @"(‹([^‹›]+)›)";
public const string QuotedTextRegex8 = @"(»([^»«]+)«)";
public const string QuotedTextRegex9 = @"(›([^›‹]+)‹)";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<#@ template debug="true" hostspecific="true" language="C#" #>
<#
this.DataFilename = @"Patterns\German\German-QuotedText.yaml";
this.Language = "German";
this.ClassName = "QuotedTextDefinitions";
#>
<#@ include file="..\CommonDefinitions.ttinclude"#>
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
//
// Generation parameters:
// - DataFilename: Patterns\Hindi\Hindi-QuotedText.yaml
// - Language: Hindi
// - ClassName: QuotedTextDefinitions
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------

namespace Microsoft.Recognizers.Definitions.Hindi
{
using System;
using System.Collections.Generic;

public static class QuotedTextDefinitions
{
public const string LangMarker = @"Hin";
public const string QuotedTextRegex1 = @"(“([^“”]+)”)";
public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)";
public const string QuotedTextRegex3 = @"(""([^""]+)"")";
public const string QuotedTextRegex4 = @"(\\'([^\']+)\\')";
public const string QuotedTextRegex5 = @"(`([^`]+)`)";
public const string QuotedTextRegex6 = @"(123456)";
public const string QuotedTextRegex7 = @"(123456)";
public const string QuotedTextRegex8 = @"(123456)";
public const string QuotedTextRegex9 = @"(123456)";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<#@ template debug="true" hostspecific="true" language="C#" #>
<#
this.DataFilename = @"Patterns\Hindi\Hindi-QuotedText.yaml";
this.Language = "Hindi";
this.ClassName = "QuotedTextDefinitions";
#>
<#@ include file="..\CommonDefinitions.ttinclude"#>
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
//
// Generation parameters:
// - DataFilename: Patterns\Italian\Italian-QuotedText.yaml
// - Language: Italian
// - ClassName: QuotedTextDefinitions
// </auto-generated>
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// ------------------------------------------------------------------------------

namespace Microsoft.Recognizers.Definitions.Italian
{
using System;
using System.Collections.Generic;

public static class QuotedTextDefinitions
{
public const string LangMarker = @"Ita";
public const string QuotedTextRegex1 = @"(“([^“”]+)”)";
public const string QuotedTextRegex2 = @"(‘([^‘’]+)’)";
public const string QuotedTextRegex3 = @"(""([^""]+)"")";
public const string QuotedTextRegex4 = @"(\\'([^\']+)\\')";
public const string QuotedTextRegex5 = @"(`([^`]+)`)";
public const string QuotedTextRegex6 = @"(«([^«»]+)»)";
public const string QuotedTextRegex7 = @"(‹([^‹›]+)›)";
public const string QuotedTextRegex8 = @"(123456)";
public const string QuotedTextRegex9 = @"(123456)";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<#@ template debug="true" hostspecific="true" language="C#" #>
<#
this.DataFilename = @"Patterns\Italian\Italian-QuotedText.yaml";
this.Language = "Italian";
this.ClassName = "QuotedTextDefinitions";
#>
<#@ include file="..\CommonDefinitions.ttinclude"#>
Loading

0 comments on commit f820d76

Please sign in to comment.