Skip to content

Commit 5123d1b

Browse files
author
Arjan Scherpenisse
committed
170 tests pass and counting
1 parent e164c5f commit 5123d1b

File tree

5 files changed

+231
-15
lines changed

5 files changed

+231
-15
lines changed

lib/speech_markdown/grammar.ex

+14-7
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ defmodule SpeechMarkdown.Grammar do
6262

6363
identifier =
6464
reduce(
65-
repeat(ascii_char('_abcdefghijklmnopqrstuvwxyz1234567890')),
65+
ascii_char('_abcdefghijklmnopqrstuvwxyz1234567890')
66+
|> concat(repeat(ascii_char('_abcdefghijklmnopqrstuvwxyz1234567890'))),
6667
:to_string
6768
)
6869

@@ -101,12 +102,16 @@ defmodule SpeechMarkdown.Grammar do
101102
|> ignore(string("]"))
102103
|> unwrap_and_tag(:audio)
103104

105+
defp empty_block(_x) do
106+
:empty_block
107+
end
108+
104109
parenthesized =
105110
ignore(string("("))
106-
# |> reduce(repeat(utf8_char([{:not, ?)}])), :to_string)
107-
|> concat(parsec(:document))
111+
|> reduce(repeat(utf8_char([{:not, ?)}])), :to_string)
112+
# |> concat(parsec(:document))
108113
|> ignore(string(")"))
109-
|> parsec(:block)
114+
|> choice([parsec(:block), string("[]") |> reduce(:empty_block)])
110115
|> reduce(:nested_block)
111116

112117
ipa =
@@ -134,7 +139,7 @@ defmodule SpeechMarkdown.Grammar do
134139
)
135140

136141
def nested_block([a, b]) do
137-
{:nested_block, a, b}
142+
{:nested_block, [{:text, a}], b}
138143
end
139144

140145
def kv_block(x) do
@@ -154,8 +159,9 @@ defmodule SpeechMarkdown.Grammar do
154159
non_ctrl_instr =
155160
utf8_char((@ws ++ [?), ?], ?[, ?(, 35, ?!]) |> Enum.map(&{:not, &1}))
156161

162+
# [{:not, ?[}, {:not, ?)}])
157163
plaintext =
158-
utf8_char([{:not, ?[}, {:not, ?)}])
164+
utf8_char([])
159165
|> reduce(:to_string)
160166
|> unwrap_and_tag(:text)
161167

@@ -168,6 +174,7 @@ defmodule SpeechMarkdown.Grammar do
168174
|> repeat(utf8_char([{:not, char}]))
169175
|> reduce(:to_string)
170176
|> ignore(string(abbrev))
177+
|> lookahead(choice([ws, eos()]))
171178
|> reduce({:short_emphasis, [emphasis]})
172179
end
173180

@@ -185,9 +192,9 @@ defmodule SpeechMarkdown.Grammar do
185192
defparsec(
186193
:document,
187194
choice([
195+
parenthesized,
188196
section,
189197
audio,
190-
parenthesized,
191198
parsec(:block),
192199
parsec(:any_emphasis),
193200
plaintext

lib/speech_markdown/transpiler.ex

+135-5
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ defmodule SpeechMarkdown.Transpiler do
3535
|> opt_strip_declaration(xml_declaration)}
3636
end
3737

38+
### EMPTY BLOCK
39+
defp convert({:nested_block, nodes, :empty_block}, variant) do
40+
Enum.map(nodes, &convert(&1, variant))
41+
end
42+
3843
### BREAK
3944

4045
defp convert({:kv_block, [{"break", break}]}, _variant) do
@@ -60,7 +65,18 @@ defmodule SpeechMarkdown.Transpiler do
6065

6166
### SECTIONS
6267

63-
defp convert({:section, block, nodes}, :alexa) do
68+
defp convert({:section, [{_, _} | _] = attrs, nodes}, :alexa) do
69+
{alexa_attrs, attrs} =
70+
Enum.split_with(attrs, &(elem(&1, 0) in ~w(excited disappointed)))
71+
72+
nodes
73+
|> Enum.map(&convert(&1, :alexa))
74+
|> wrap_with_voice_and_or_lang(attrs)
75+
|> Alexa.emotion(alexa_attrs)
76+
|> unwrap_single_node()
77+
end
78+
79+
defp convert({:section, block, nodes}, :alexa) when is_binary(block) do
6480
nodes
6581
|> Enum.map(&convert(&1, :alexa))
6682
|> Alexa.emotion(block)
@@ -102,6 +118,74 @@ defmodule SpeechMarkdown.Transpiler do
102118
Enum.map(nodes, &convert(&1, variant))
103119
end
104120

121+
### WHISPER
122+
defp convert({:nested_block, nodes, {:block, "whisper"}}, :alexa) do
123+
nodes = Enum.map(nodes, &convert(&1, :alexa))
124+
{:"amazon:effect", [name: 'whispered'], nodes}
125+
end
126+
127+
defp convert({:nested_block, nodes, {:block, "whisper"}}, variant) do
128+
nodes = Enum.map(nodes, &convert(&1, variant))
129+
{:prosody, [volume: 'x-soft', rate: 'slow'], nodes}
130+
end
131+
132+
### PROSODY
133+
134+
defp convert(
135+
{:nested_block, nodes, {:kv_block, [{"volume", volume}]}},
136+
variant
137+
) do
138+
{:prosody, [volume: volume], Enum.map(nodes, &convert(&1, variant))}
139+
end
140+
141+
defp convert(
142+
{:nested_block, nodes, {:kv_block, [{"pitch", pitch}]}},
143+
variant
144+
) do
145+
{:prosody, [pitch: pitch], Enum.map(nodes, &convert(&1, variant))}
146+
end
147+
148+
defp convert(
149+
{:nested_block, nodes, {:kv_block, [{"rate", rate}]}},
150+
variant
151+
) do
152+
{:prosody, [rate: rate], Enum.map(nodes, &convert(&1, variant))}
153+
end
154+
155+
### LANG
156+
157+
defp convert({:nested_block, nodes, {:kv_block, [{"lang", _}]}}, :google) do
158+
Enum.map(nodes, &convert(&1, :google))
159+
end
160+
161+
defp convert({:nested_block, nodes, {:kv_block, [{"lang", lang}]}}, variant) do
162+
{:lang, ["xml:lang": ch(lang)], Enum.map(nodes, &convert(&1, variant))}
163+
end
164+
165+
### SUB
166+
167+
defp convert({:nested_block, nodes, {:kv_block, [{"sub", sub}]}}, variant) do
168+
{:sub, [alias: ch(sub)], Enum.map(nodes, &convert(&1, variant))}
169+
end
170+
171+
### VOICE
172+
173+
defp convert({:nested_block, nodes, {:kv_block, [{"voice", voice}]}}, :alexa) do
174+
nodes = Enum.map(nodes, &convert(&1, :alexa))
175+
176+
case Validator.alexa_voice(voice) do
177+
nil ->
178+
nodes
179+
180+
voice ->
181+
{:voice, [name: ch(voice)], nodes}
182+
end
183+
end
184+
185+
defp convert({:nested_block, nodes, {:kv_block, [{"voice", _}]}}, variant) do
186+
Enum.map(nodes, &convert(&1, variant))
187+
end
188+
105189
### SAY-AS
106190
defp convert(
107191
{:nested_block, nodes, {:kv_block, [{"emphasis", level}]}},
@@ -111,23 +195,33 @@ defmodule SpeechMarkdown.Transpiler do
111195
end
112196

113197
defp convert(
114-
{:nested_block, nodes, {:kv_block, [{"date", format}]}},
198+
{:nested_block, nodes, {:kv_block, [{dt, format}]}},
115199
variant
116-
) do
117-
{:"say-as", ["interpret-as": 'date', format: ch(format)],
200+
)
201+
when dt in ~w(date time) do
202+
{:"say-as", ["interpret-as": ch(dt), format: ch(format)],
118203
Enum.map(nodes, &convert(&1, variant))}
119204
end
120205

121-
@interpret_as ~w(characters number address chars)
206+
defp convert({:nested_block, nodes, {:block, say}}, :google)
207+
when say in ~w(interjection) do
208+
Enum.map(nodes, &convert(&1, :google))
209+
end
210+
211+
@interpret_as ~w(characters number address chars expletive fraction interjection ordinal unit)
122212
defp convert({:nested_block, nodes, {:block, say}}, variant)
123213
when say in @interpret_as do
124214
{:"say-as", ["interpret-as": say], Enum.map(nodes, &convert(&1, variant))}
125215
end
126216

217+
### AUDIO
218+
127219
defp convert({:audio, src}, _variant) do
128220
{:audio, [src: ch(src)], []}
129221
end
130222

223+
### TEXT
224+
131225
defp convert({:text, text}, _variant) do
132226
ch(text)
133227
end
@@ -157,4 +251,40 @@ defmodule SpeechMarkdown.Transpiler do
157251
end
158252

159253
defdelegate ch(s), to: String, as: :to_charlist
254+
255+
defp wrap_with_voice_and_or_lang(nodes, attrs) do
256+
~w(lang voice)
257+
|> Enum.reduce(nodes, fn
258+
"lang", children ->
259+
case kw(attrs, "lang") do
260+
nil ->
261+
children
262+
263+
lang ->
264+
[{:lang, ["xml:lang": ch(lang)], children}]
265+
end
266+
267+
"voice", children ->
268+
case kw(attrs, "voice") do
269+
nil ->
270+
children
271+
272+
"device" ->
273+
children
274+
275+
voice ->
276+
[{:voice, [name: ch(voice)], children}]
277+
end
278+
end)
279+
end
280+
281+
defp unwrap_single_node([{_, _, _} = n]), do: n
282+
defp unwrap_single_node(n), do: n
283+
284+
defp kw(list, prop) do
285+
case :proplists.get_value(prop, list) do
286+
:undefined -> nil
287+
value -> value
288+
end
289+
end
160290
end

lib/speech_markdown/transpiler/alexa.ex

+4
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ defmodule SpeechMarkdown.Transpiler.Alexa do
1010
{:"amazon:domain", [name: 'music'], inner}
1111
end
1212

13+
def emotion(inner, "newscaster") do
14+
{:"amazon:domain", [name: 'news'], inner}
15+
end
16+
1317
def emotion(inner, block) do
1418
case section_to_xml_attrs(block) do
1519
{:ok, attrs} ->

lib/speech_markdown/validator.ex

+62-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
defmodule SpeechMarkdown.Validator do
2-
@blocks ~w(address cardinal number characters expletive bleep fraction interjection ordinal phone telephone unit whisper emphasis excited disappointed)
3-
@attributes ~w(break date emphasis lang voice pitch ipa sub disappointed excited)
2+
@blocks ~w(address cardinal number characters expletive fraction interjection ordinal phone telephone unit whisper emphasis excited disappointed)
3+
@attributes ~w(break date time emphasis lang voice pitch ipa sub disappointed excited)
44

55
@enum_attrs [
66
rate: ~w(x-slow slow medium fast x-fast),
@@ -118,6 +118,10 @@ defmodule SpeechMarkdown.Validator do
118118
end
119119
end
120120

121+
defp validate_kvs([{"vol", value} | rest], acc) do
122+
validate_kvs([{"volume", value} | rest], acc)
123+
end
124+
121125
defp validate_kvs([{"break", break} | rest], acc) do
122126
with :ok <- valid_delay(break) do
123127
validate_kvs(rest, [{"break", break} | acc])
@@ -143,11 +147,27 @@ defmodule SpeechMarkdown.Validator do
143147
end
144148
end
145149

150+
defp convert_nested({:block, "emphasis"}) do
151+
{:ok, {:kv_block, [{"emphasis", "moderate"}]}}
152+
end
153+
154+
defp convert_nested({:block, "pitch"}) do
155+
{:ok, {:kv_block, [{"pitch", "medium"}]}}
156+
end
157+
158+
defp convert_nested({:block, "rate"}) do
159+
{:ok, {:kv_block, [{"rate", "medium"}]}}
160+
end
161+
162+
defp convert_nested({:block, v}) when v in ~w(vol volume) do
163+
{:ok, {:kv_block, [{"volume", "medium"}]}}
164+
end
165+
146166
defp convert_nested({:block, block}) when block in @blocks do
147167
{:ok, {:block, block}}
148168
end
149169

150-
@translate_blocks %{"chars" => "characters"}
170+
@translate_blocks %{"chars" => "characters", "bleep" => "expletive"}
151171
@translate_block_header Map.keys(@translate_blocks)
152172

153173
defp convert_nested({:block, block}) when block in @translate_block_header do
@@ -166,6 +186,10 @@ defmodule SpeechMarkdown.Validator do
166186
{:ok, {:kv_block, [{"sub", sub}]}}
167187
end
168188

189+
defp convert_nested(:empty_block) do
190+
{:ok, :empty_block}
191+
end
192+
169193
defp convert_nested({:kv_block, kvs}) do
170194
with {:ok, kvs1} <- validate_kvs(kvs) do
171195
{:ok, {:kv_block, kvs1}}
@@ -174,4 +198,39 @@ defmodule SpeechMarkdown.Validator do
174198

175199
def break_attr(type) when type in @delay_enum, do: :strength
176200
def break_attr(_type), do: :time
201+
202+
@alexa_voices %{
203+
"Ivy" => "en-US",
204+
"Joanna" => "en-US",
205+
"Joey" => "en-US",
206+
"Justin" => "en-US",
207+
"Kendra" => "en-US",
208+
"Kimberly" => "en-US",
209+
"Matthew" => "en-US",
210+
"Salli" => "en-US",
211+
"Nicole" => "en-AU",
212+
"Russell" => "en-AU",
213+
"Amy" => "en-GB",
214+
"Brian" => "en-GB",
215+
"Emma" => "en-GB",
216+
"Aditi" => "en-IN",
217+
"Raveena" => "en-IN",
218+
"Hans" => "de-DE",
219+
"Marlene" => "de-DE",
220+
"Vicki" => "de-DE",
221+
"Conchita" => "es-ES",
222+
"Enrique" => "es-ES",
223+
"Carla" => "it-IT",
224+
"Giorgio" => "it-IT",
225+
"Mizuki" => "ja-JP",
226+
"Takumi" => "ja-JP",
227+
"Celine" => "fr-FR",
228+
"Lea" => "fr-FR",
229+
"Mathieu" => "fr-FR"
230+
}
231+
|> Map.keys()
232+
233+
def alexa_voice(voice) do
234+
Enum.find(@alexa_voices, &(String.downcase(&1) == String.downcase(voice)))
235+
end
177236
end

test/speech_markdown/grammar_test.exs

+16
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ defmodule SpeechMarkdown.Grammar.Test do
4545
assert parse(
4646
"hello [bla] there [x:\"bar\"] and (foo [300ms] (d)[x] apentuin)[foo:\"bar\";lang:\"nl\"] that is it\n\n#[foo]\nxxx"
4747
)
48+
49+
# |> IO.inspect(label: "x")
4850
end
4951

5052
test "emphasis" do
@@ -73,4 +75,18 @@ defmodule SpeechMarkdown.Grammar.Test do
7375
{:kv_block, [{"emphasis", "reduced"}]}}
7476
]} = parse("++strong++ +med+ ~moderate~ -reduced-")
7577
end
78+
79+
test "special chars" do
80+
text = """
81+
This is text with (parens) but this and other special characters: []()*~@#\\_!+- are ignored
82+
"""
83+
84+
assert [text: _] = parse!(text)
85+
86+
text = """
87+
This is text with ~parens! but this and other special characters: *~@#\\_!+- are ignored
88+
"""
89+
90+
assert [text: _] = parse!(text)
91+
end
7692
end

0 commit comments

Comments
 (0)