Skip to content

Commit b0438a8

Browse files
committed
Apply grammar rules to token replacement values
Added support for the “grammaticalization” system in osrm-text-instructions. The Russian grammar file is now converted into a plist and used to inflect road names according to the cases specified in the instructions.
1 parent 08de875 commit b0438a8

File tree

4 files changed

+146
-13
lines changed

4 files changed

+146
-13
lines changed

OSRMTextInstructions/OSRMTextInstructions.swift

+120-9
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,55 @@ import MapboxDirections
33

44
// Will automatically read localized Instructions.plist
55
let OSRMTextInstructionsStrings = NSDictionary(contentsOfFile: Bundle(for: OSRMInstructionFormatter.self).path(forResource: "Instructions", ofType: "plist")!)!
6+
let OSRMTextInstructionsGrammar: NSDictionary? = {
7+
guard let path = Bundle(for: OSRMInstructionFormatter.self).path(forResource: "Grammar", ofType: "plist") else {
8+
return nil
9+
}
10+
11+
return NSDictionary(contentsOfFile: path)
12+
}()
13+
14+
extension NSRegularExpression.Options {
15+
init(javaScriptFlags: String) {
16+
var options: NSRegularExpression.Options = []
17+
for flag in javaScriptFlags.characters {
18+
switch flag {
19+
case "g":
20+
break
21+
case "i":
22+
options.insert(.caseInsensitive)
23+
case "m":
24+
options.insert(.anchorsMatchLines)
25+
case "u":
26+
// Character classes are always Unicode-aware in ICU regular expressions.
27+
options.insert(.useUnicodeWordBoundaries)
28+
case "y":
29+
break
30+
default:
31+
break
32+
}
33+
}
34+
self.init(rawValue: options.rawValue)
35+
}
36+
}
637

738
protocol Tokenized {
839
associatedtype T
940

1041
/**
1142
Replaces `{tokens}` in the receiver using the given closure.
1243
*/
13-
func replacingTokens(using interpolator: ((TokenType) -> T)) -> T
44+
func replacingTokens(using interpolator: ((TokenType, String?) -> T)) -> T
45+
46+
func inflected(into variant: String, version: String) -> T
1447
}
1548

1649
extension String: Tokenized {
1750
public var sentenceCased: String {
1851
return String(characters.prefix(1)).uppercased() + String(characters.dropFirst())
1952
}
2053

21-
public func replacingTokens(using interpolator: ((TokenType) -> String)) -> String {
54+
public func replacingTokens(using interpolator: ((TokenType, String?) -> String)) -> String {
2255
let scanner = Scanner(string: self)
2356
scanner.charactersToBeSkipped = nil
2457
var result = ""
@@ -38,9 +71,17 @@ extension String: Tokenized {
3871
continue
3972
}
4073

74+
var variant: NSString?
75+
if scanner.scanString(":", into: nil) {
76+
guard scanner.scanUpTo("}", into: &variant) else {
77+
result += ":"
78+
continue
79+
}
80+
}
81+
4182
if scanner.scanString("}", into: nil) {
4283
if let tokenType = TokenType(description: token! as String) {
43-
result += interpolator(tokenType)
84+
result += interpolator(tokenType, variant as String?)
4485
} else {
4586
result += "{\(token!)}"
4687
}
@@ -59,10 +100,34 @@ extension String: Tokenized {
59100
}
60101
return result
61102
}
103+
104+
func inflected(into variant: String, version: String) -> String {
105+
guard let grammar = OSRMTextInstructionsGrammar?[version] as? [String: Any] else {
106+
return self
107+
}
108+
109+
guard let rules = grammar[variant] as? [[String]] else {
110+
return self
111+
}
112+
113+
var grammaticalReplacement = " \(self) "
114+
var regularExpressionOptions: NSRegularExpression.Options = []
115+
if let meta = OSRMTextInstructionsGrammar?["meta"] as? [String: String],
116+
let flags = meta["regExpFlags"] {
117+
regularExpressionOptions = NSRegularExpression.Options(javaScriptFlags: flags)
118+
}
119+
120+
for rule in rules {
121+
let regularExpression = try! NSRegularExpression(pattern: rule[0], options: regularExpressionOptions)
122+
grammaticalReplacement = regularExpression.stringByReplacingMatches(in: grammaticalReplacement, options: [], range: NSRange(location: 0, length: grammaticalReplacement.characters.count), withTemplate: rule[1])
123+
}
124+
125+
return grammaticalReplacement.trimmingCharacters(in: .whitespaces)
126+
}
62127
}
63128

64129
extension NSAttributedString: Tokenized {
65-
public func replacingTokens(using interpolator: ((TokenType) -> NSAttributedString)) -> NSAttributedString {
130+
public func replacingTokens(using interpolator: ((TokenType, String?) -> NSAttributedString)) -> NSAttributedString {
66131
let scanner = Scanner(string: string)
67132
scanner.charactersToBeSkipped = nil
68133
let result = NSMutableAttributedString()
@@ -78,12 +143,21 @@ extension NSAttributedString: Tokenized {
78143

79144
var token: NSString?
80145
guard scanner.scanUpTo("}", into: &token) else {
146+
result.append(NSAttributedString(string: "}"))
81147
continue
82148
}
83149

150+
var variant: NSString?
151+
if scanner.scanString(":", into: nil) {
152+
guard scanner.scanUpTo("}", into: &variant) else {
153+
result.append(NSAttributedString(string: "}"))
154+
continue
155+
}
156+
}
157+
84158
if scanner.scanString("}", into: nil) {
85159
if let tokenType = TokenType(description: token! as String) {
86-
result.append(interpolator(tokenType))
160+
result.append(interpolator(tokenType, variant as String?))
87161
}
88162
} else {
89163
result.append(NSAttributedString(string: token! as String))
@@ -101,6 +175,34 @@ extension NSAttributedString: Tokenized {
101175
}
102176
return result as NSAttributedString
103177
}
178+
179+
@nonobjc func inflected(into variant: String, version: String) -> NSAttributedString {
180+
guard let grammar = OSRMTextInstructionsGrammar?[version] as? [String: Any] else {
181+
return self
182+
}
183+
184+
guard let rules = grammar[variant] as? [[String]] else {
185+
return self
186+
}
187+
188+
let grammaticalReplacement = NSMutableAttributedString(string: " ")
189+
grammaticalReplacement.append(self)
190+
grammaticalReplacement.append(NSAttributedString(string: " "))
191+
192+
var regularExpressionOptions: NSRegularExpression.Options = []
193+
if let meta = OSRMTextInstructionsGrammar?["meta"] as? [String: String],
194+
let flags = meta["regExpFlags"] {
195+
regularExpressionOptions = NSRegularExpression.Options(javaScriptFlags: flags)
196+
}
197+
198+
for rule in rules {
199+
let regularExpression = try! NSRegularExpression(pattern: rule[0], options: regularExpressionOptions)
200+
regularExpression.replaceMatches(in: grammaticalReplacement.mutableString, options: [], range: NSRange(location: 0, length: grammaticalReplacement.mutableString.length), withTemplate: rule[1])
201+
}
202+
203+
grammaticalReplacement.mutableString.replaceOccurrences(of: "^ +| +$", with: "", options: .regularExpression, range: NSRange(location: 0, length: grammaticalReplacement.mutableString.length))
204+
return grammaticalReplacement
205+
}
104206
}
105207

106208
public class OSRMInstructionFormatter: Formatter {
@@ -323,15 +425,21 @@ public class OSRMInstructionFormatter: Formatter {
323425
let attributedName = NSAttributedString(string: name, attributes: attrs)
324426
let attributedRef = NSAttributedString(string: ref, attributes: attrs)
325427
let phrase = NSAttributedString(string: self.phrase(named: .nameWithCode), attributes: attrs)
326-
wayName = phrase.replacingTokens(using: { (tokenType) -> NSAttributedString in
428+
wayName = phrase.replacingTokens(using: { (tokenType, variant) -> NSAttributedString in
429+
var replacement: NSAttributedString
327430
switch tokenType {
328431
case .wayName:
329-
return modifyValueByKey?(.wayName, attributedName) ?? attributedName
432+
replacement = attributedName
330433
case .code:
331-
return modifyValueByKey?(.code, attributedRef) ?? attributedRef
434+
replacement = attributedRef
332435
default:
333436
fatalError("Unexpected token type \(tokenType) in name-and-ref phrase")
334437
}
438+
439+
if let variant = variant {
440+
replacement = replacement.inflected(into: variant, version: version)
441+
}
442+
return modifyValueByKey?(tokenType, replacement) ?? replacement
335443
})
336444
} else if let ref = ref, isMotorway, let decimalRange = ref.rangeOfCharacter(from: .decimalDigits), !decimalRange.isEmpty {
337445
let attributedRef = NSAttributedString(string: ref, attributes: attrs)
@@ -411,7 +519,7 @@ public class OSRMInstructionFormatter: Formatter {
411519
if step.finalHeading != nil { bearing = Int(step.finalHeading! as Double) }
412520

413521
// Replace tokens
414-
let result = NSAttributedString(string: instruction, attributes: attrs).replacingTokens { (tokenType) -> NSAttributedString in
522+
let result = NSAttributedString(string: instruction, attributes: attrs).replacingTokens { (tokenType, variant) -> NSAttributedString in
415523
var replacement: String
416524
switch tokenType {
417525
case .code: replacement = step.codes?.first ?? ""
@@ -430,6 +538,9 @@ public class OSRMInstructionFormatter: Formatter {
430538
if tokenType == .wayName {
431539
return wayName // already modified above
432540
} else {
541+
if let variant = variant {
542+
replacement = replacement.inflected(into: variant, version: version)
543+
}
433544
let attributedReplacement = NSAttributedString(string: replacement, attributes: attrs)
434545
return modifyValueByKey?(tokenType, attributedReplacement) ?? attributedReplacement
435546
}

OSRMTextInstructionsTests/OSRMTextInstructionsTests.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class OSRMTextInstructionsTests: XCTestCase {
4848
let fixtureOptions = json["options"] as! [String: String]
4949

5050
let expectedValue = (json["phrases"] as! [String: String])["en"]
51-
let actualValue = phrase?.replacingTokens(using: { (tokenType) -> String in
51+
let actualValue = phrase?.replacingTokens(using: { (tokenType, variant) -> String in
5252
var replacement: String?
5353
switch tokenType {
5454
case .firstInstruction:

OSRMTextInstructionsTests/TokenTests.swift

+10-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import XCTest
2-
import OSRMTextInstructions
2+
@testable import OSRMTextInstructions
33

44
class TokenTests: XCTestCase {
55
func testReplacingTokens() {
66
XCTAssertEqual("Dead Beef", "Dead Beef".replacingTokens { _ in "" })
77
XCTAssertEqual("Food", "F{ref}{ref}d".replacingTokens { _ in "o" })
88

9-
XCTAssertEqual("Take the left stairs to the 20th floor", "Take the {modifier} stairs to the {nth} floor".replacingTokens { (tokenType) -> String in
9+
XCTAssertEqual("Take the left stairs to the 20th floor", "Take the {modifier} stairs to the {nth} floor".replacingTokens { (tokenType, variant) -> String in
1010
switch tokenType {
1111
case .modifier:
1212
return "left"
@@ -19,8 +19,16 @@ class TokenTests: XCTestCase {
1919
})
2020

2121
XCTAssertEqual("{👿}", "{👿}".replacingTokens { _ in "👼" })
22+
XCTAssertEqual("{👿:}", "{👿:}".replacingTokens { _ in "👼" })
23+
XCTAssertEqual("{👿:💣}", "{👿:💣}".replacingTokens { _ in "👼" })
2224
XCTAssertEqual("{", "{".replacingTokens { _ in "🕳" })
2325
XCTAssertEqual("{💣", "{💣".replacingTokens { _ in "🕳" })
2426
XCTAssertEqual("}", "}".replacingTokens { _ in "🕳" })
2527
}
28+
29+
func testInflectingStrings() {
30+
if Bundle(for: OSRMInstructionFormatter.self).preferredLocalizations.contains(where: { $0.starts(with: "ru") }) {
31+
XCTAssertEqual("Бармалееву улицу", "Бармалеева улица".inflected(into: "accusative", version: "v5"))
32+
}
33+
}
2634
}

json2plist.sh

+15-1
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
# Transform select osrm-text-instructions language files from json to plist
44
git submodule init
55
git submodule update
6-
cd "./osrm-text-instructions/languages/translations/" || exit 1
76

7+
cd "./osrm-text-instructions/languages/translations/" || exit 1
88
for file in ./*; do
99
if [ "$file" = "./en.json" ]; then
1010
LANGUAGE="Base"
@@ -18,4 +18,18 @@ for file in ./*; do
1818
plutil -convert xml1 "./${file}" -o "${LANGUAGE_DIR}/Instructions.plist"
1919
done
2020

21+
cd "../grammar/" || exit 1
22+
for file in ./*; do
23+
if [ "$file" = "./en.json" ]; then
24+
LANGUAGE="Base"
25+
else
26+
LANGUAGE=$(basename $file)
27+
LANGUAGE=${LANGUAGE%.json}
28+
fi
29+
30+
LANGUAGE_DIR="${BUILT_PRODUCTS_DIR:-../../../OSRMTextInstructions/}/${UNLOCALIZED_RESOURCES_FOLDER_PATH:-}/${LANGUAGE}.lproj"
31+
mkdir -p "${LANGUAGE_DIR}"
32+
plutil -convert xml1 "./${file}" -o "${LANGUAGE_DIR}/Grammar.plist"
33+
done
34+
2135
cd - || exit 1

0 commit comments

Comments
 (0)