Parses ABNF with a parser written with nimble_parsec
, emits parser consists of nimble_parsec
combinators.
- Brevity - flattens unnecessary nesting in parsed ABNF
- Easy to config and customize
- Full test coverage
The package is available on Hex
and can be installed by adding abnf_parsec
to your list of dependencies in mix.exs
:
def deps do
[
{:abnf_parsec, "~> 1.0", runtime: false}
]
end
Documentation can be generated with ExDoc and published on HexDocs. Once published, the docs can be found at https://hexdocs.pm/abnf_parsec.
In some RFCs, literals in ABNF could be used to describe the byte representation instead of the text codepoints. There is no clear distinction between them for us to detect automatically. Hence a text / byte mode is added for the user to set.
defmodule TextParser do
use AbnfParsec,
mode: :text, # the default, can be omitted
abnf: """
ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
/ %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
/ %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
/ %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
/ %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
/ %xD0000-DFFFD / %xE1000-EFFFD
"""
end
defmodule ByteParser do
use AbnfParsec,
mode: :byte,
abnf: """
; from RFC 5322 + UTF8-non-ascii
atext = ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" /
"-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~" / UTF8-non-ascii
; from RFC 3629
UTF8-non-ascii = UTF8-2 / UTF8-3 / UTF8-4
UTF8-2 = %xC2-DF UTF8-tail
UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
%xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
%xF4 %x80-8F 2( UTF8-tail )
UTF8-tail = %x80-BF
"""
end
defmodule IPv4Parser do
use AbnfParsec,
abnf: """
ip = first dot second dot third dot fourth
dot = "."
dec-octet =
"25" %x30-35 / ; 250-255
"2" %x30-34 DIGIT / ; 200-249
"1" 2DIGIT / ; 100-199
%x31-39 DIGIT / ; 10-99
DIGIT ; 0-9
first = dec-octet
second = dec-octet
third = dec-octet
fourth = dec-octet
""",
unbox: ["dec-octet"],
ignore: ["dot"],
parse: :ip
end
# IPv4Parser.ip("192.168.0.1")
# IPv4Parser.parse("127.0.0.1")
# IPv4Parser.parse!("10.0.0.1")
iex> IPv4Parser.parse! "10.0.0.1"
[ip: [first: '10', second: '0', third: '0', fourth: '1']]
defmodule JsonParser do
use AbnfParsec,
abnf_file: "test/fixture/json.abnf",
parse: :json_text,
transform: %{
"string" => {:reduce, {List, :to_string, []}},
"int" => [{:reduce, {List, :to_string, []}}, {:map, {String, :to_integer, []}}],
"frac" => {:reduce, {List, :to_string, []}},
"null" => {:replace, nil},
"true" => {:replace, true},
"false" => {:replace, false}
},
untag: ["member"],
unwrap: ["int", "frac"],
unbox: [
"JSON-text",
"null",
"true",
"false",
"digit1-9",
"decimal-point",
"escape",
"unescaped",
"char"
],
ignore: [
"name-separator",
"value-separator",
"quotation-mark",
"begin-object",
"end-object",
"begin-array",
"end-array"
]
end
json = """
{"a": {"b": 1, "c": [true]}, "d": null, "e": "e\\te"}
"""
# JsonParser.json_text(json)
# JsonParser.parse(json)
# JsonParser.parse!(json)
iex> JsonParser.parse! """
...> {"a": {"b": 1, "c": [true]}, "d": null, "e": "e\\te"}
...> """
[
object: [
[
string: ["a"],
value: [
object: [
[string: ["b"], value: [number: [int: 1]]],
[string: ["c"], value: [array: [value: [true]]]]
]
]
],
[string: ["d"], value: [nil]],
[string: ["e"], value: [string: ["e\\te"]]]
]
]
For more details of options for customization, see abnf_parsec.ex
For example, ABNF of ABNF 😉 is parsed
rulelist = 1*( rule / (*c-wsp c-nl) )
rule = rulename defined-as elements c-nl
; continues if next line starts
; with white space
rulename = ALPHA *(ALPHA / DIGIT / "-")
defined-as = *c-wsp ("=" / "=/") *c-wsp
; basic rules definition and
; incremental alternatives
elements = alternation *c-wsp
c-wsp = WSP / (c-nl WSP)
c-nl = comment / CRLF
; comment or newline
comment = ";" *(WSP / VCHAR) CRLF
alternation = concatenation
*(*c-wsp "/" *c-wsp concatenation)
concatenation = repetition *(1*c-wsp repetition)
repetition = [repeat] element
repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
element = rulename / group / option /
char-val / num-val / prose-val
group = "(" *c-wsp alternation *c-wsp ")"
option = "[" *c-wsp alternation *c-wsp "]"
char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
; quoted string of SP and VCHAR
; without DQUOTE
num-val = "%" (bin-val / dec-val / hex-val)
bin-val = "b" 1*BIT
[ 1*("." 1*BIT) / ("-" 1*BIT) ]
; series of concatenated bit values
; or single ONEOF range
dec-val = "d" 1*DIGIT
[ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
hex-val = "x" 1*HEXDIG
[ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
prose-val = "<" *(%x20-3D / %x3F-7E) ">"
; bracketed string of SP and VCHAR
; without angles
; prose description, to be used as
; last resort
into
[
rule: [
rulename: "rulelist",
repetition: [
repeat: [min: 1],
alternation: [
rulename: "rule",
concatenation: [
repetition: [repeat: [], rulename: "c-wsp"],
rulename: "c-nl"
]
]
]
],
rule: [
rulename: "rule",
concatenation: [
rulename: "rulename",
rulename: "defined-as",
rulename: "elements",
rulename: "c-nl"
],
comment: "continues if next line starts",
comment: "with white space"
],
rule: [
rulename: "rulename",
concatenation: [
rulename: "ALPHA",
repetition: [
repeat: [],
alternation: [{:rulename, "ALPHA"}, {:rulename, "DIGIT"}, "-"]
]
]
],
rule: [
rulename: "defined-as",
concatenation: [
repetition: [repeat: [], rulename: "c-wsp"],
alternation: ["=", "=/"],
repetition: [repeat: [], rulename: "c-wsp"]
],
comment: "basic rules definition and",
comment: "incremental alternatives"
],
rule: [
rulename: "elements",
concatenation: [
rulename: "alternation",
repetition: [repeat: [], rulename: "c-wsp"]
]
],
rule: [
rulename: "c-wsp",
alternation: [
rulename: "WSP",
concatenation: [rulename: "c-nl", rulename: "WSP"]
]
],
rule: [
rulename: "c-nl",
alternation: [rulename: "comment", rulename: "CRLF"],
comment: "comment or newline"
],
rule: [
rulename: "comment",
concatenation: [
";",
{:repetition,
[repeat: [], alternation: [rulename: "WSP", rulename: "VCHAR"]]},
{:rulename, "CRLF"}
]
],
rule: [
rulename: "alternation",
concatenation: [
rulename: "concatenation",
repetition: [
repeat: [],
concatenation: [
{:repetition, [repeat: [], rulename: "c-wsp"]},
"/",
{:repetition, [repeat: [], rulename: "c-wsp"]},
{:rulename, "concatenation"}
]
]
]
],
rule: [
rulename: "concatenation",
concatenation: [
rulename: "repetition",
repetition: [
repeat: [],
concatenation: [
repetition: [repeat: [min: 1], rulename: "c-wsp"],
rulename: "repetition"
]
]
]
],
rule: [
rulename: "repetition",
concatenation: [option: [rulename: "repeat"], rulename: "element"]
],
rule: [
rulename: "repeat",
alternation: [
repetition: [repeat: [min: 1], rulename: "DIGIT"],
concatenation: [
{:repetition, [repeat: [], rulename: "DIGIT"]},
"*",
{:repetition, [repeat: [], rulename: "DIGIT"]}
]
]
],
rule: [
rulename: "element",
alternation: [
rulename: "rulename",
rulename: "group",
rulename: "option",
rulename: "char-val",
rulename: "num-val",
rulename: "prose-val"
]
],
rule: [
rulename: "group",
concatenation: [
"(",
{:repetition, [repeat: [], rulename: "c-wsp"]},
{:rulename, "alternation"},
{:repetition, [repeat: [], rulename: "c-wsp"]},
")"
]
],
rule: [
rulename: "option",
concatenation: [
"[",
{:repetition, [repeat: [], rulename: "c-wsp"]},
{:rulename, "alternation"},
{:repetition, [repeat: [], rulename: "c-wsp"]},
"]"
]
],
rule: [
rulename: "char-val",
concatenation: [
rulename: "DQUOTE",
repetition: [
repeat: [],
alternation: [
num_range: [{:base, "x"}, "20", "21"],
num_range: [{:base, "x"}, "23", "7E"]
]
],
rulename: "DQUOTE"
],
comment: "quoted string of SP and VCHAR",
comment: "without DQUOTE"
],
rule: [
rulename: "num-val",
concatenation: [
"%",
{:alternation,
[rulename: "bin-val", rulename: "dec-val", rulename: "hex-val"]}
]
],
rule: [
rulename: "bin-val",
concatenation: [
"b",
{:repetition, [repeat: [min: 1], rulename: "BIT"]},
{:option,
[
alternation: [
repetition: [
repeat: [min: 1],
concatenation: [
".",
{:repetition, [repeat: [min: 1], rulename: "BIT"]}
]
],
concatenation: [
"-",
{:repetition, [repeat: [min: 1], rulename: "BIT"]}
]
]
]}
],
comment: "series of concatenated bit values",
comment: "or single ONEOF range"
],
rule: [
rulename: "dec-val",
concatenation: [
"d",
{:repetition, [repeat: [min: 1], rulename: "DIGIT"]},
{:option,
[
alternation: [
repetition: [
repeat: [min: 1],
concatenation: [
".",
{:repetition, [repeat: [min: 1], rulename: "DIGIT"]}
]
],
concatenation: [
"-",
{:repetition, [repeat: [min: 1], rulename: "DIGIT"]}
]
]
]}
]
],
rule: [
rulename: "hex-val",
concatenation: [
"x",
{:repetition, [repeat: [min: 1], rulename: "HEXDIG"]},
{:option,
[
alternation: [
repetition: [
repeat: [min: 1],
concatenation: [
".",
{:repetition, [repeat: [min: 1], rulename: "HEXDIG"]}
]
],
concatenation: [
"-",
{:repetition, [repeat: [min: 1], rulename: "HEXDIG"]}
]
]
]}
]
],
rule: [
rulename: "prose-val",
concatenation: [
"<",
{:repetition,
[
repeat: [],
alternation: [
num_range: [{:base, "x"}, "20", "3D"],
num_range: [{:base, "x"}, "3F", "7E"]
]
]},
">"
],
comment: "bracketed string of SP and VCHAR",
comment: "without angles",
comment: "prose description, to be used as",
comment: "last resort"
]
]
And generated parser looks like:
[
defparsec(
:rulelist,
tag(
times(choice([parsec(:rule), repeat(parsec(:c_wsp)) |> parsec(:c_nl)]), min: 1),
:rulelist
)
),
defparsec(
:rule,
tag(parsec(:rulename) |> parsec(:defined_as) |> parsec(:elements) |> parsec(:c_nl), :rule)
),
defparsec(
:rulename,
tag(
parsec(:core_alpha)
|> repeat(choice([parsec(:core_alpha), parsec(:core_digit), string("-")])),
:rulename
)
),
defparsec(
:defined_as,
tag(
repeat(parsec(:c_wsp)) |> choice([string("="), string("=/")]) |> repeat(parsec(:c_wsp)),
:defined_as
)
),
defparsec(:elements, tag(parsec(:alternation) |> repeat(parsec(:c_wsp)), :elements)),
defparsec(:c_wsp, tag(choice([parsec(:core_wsp), parsec(:c_nl) |> parsec(:core_wsp)]), :c_wsp)),
defparsec(:c_nl, tag(choice([parsec(:comment), parsec(:core_crlf)]), :c_nl)),
defparsec(
:comment,
tag(
string(";")
|> repeat(choice([parsec(:core_wsp), parsec(:core_vchar)]))
|> parsec(:core_crlf),
:comment
)
),
defparsec(
:alternation,
tag(
parsec(:concatenation)
|> repeat(
repeat(parsec(:c_wsp))
|> string("/")
|> repeat(parsec(:c_wsp))
|> parsec(:concatenation)
),
:alternation
)
),
defparsec(
:concatenation,
tag(
parsec(:repetition) |> repeat(times(parsec(:c_wsp), min: 1) |> parsec(:repetition)),
:concatenation
)
),
defparsec(:repetition, tag(optional(parsec(:repeat)) |> parsec(:element), :repetition)),
defparsec(
:repeat,
tag(
choice([
times(parsec(:core_digit), min: 1),
repeat(parsec(:core_digit)) |> string("*") |> repeat(parsec(:core_digit))
]),
:repeat
)
),
defparsec(
:element,
tag(
choice([
parsec(:rulename),
parsec(:group),
parsec(:option),
parsec(:char_val),
parsec(:num_val),
parsec(:prose_val)
]),
:element
)
),
defparsec(
:group,
tag(
string("(")
|> repeat(parsec(:c_wsp))
|> parsec(:alternation)
|> repeat(parsec(:c_wsp))
|> string(")"),
:group
)
),
defparsec(
:option,
tag(
string("[")
|> repeat(parsec(:c_wsp))
|> parsec(:alternation)
|> repeat(parsec(:c_wsp))
|> string("]"),
:option
)
),
defparsec(
:char_val,
tag(
parsec(:core_dquote)
|> repeat(choice([ascii_char([32..33]), ascii_char([35..126])]))
|> parsec(:core_dquote),
:char_val
)
),
defparsec(
:num_val,
tag(string("%") |> choice([parsec(:bin_val), parsec(:dec_val), parsec(:hex_val)]), :num_val)
),
defparsec(
:bin_val,
tag(
string("b")
|> times(parsec(:core_bit), min: 1)
|> optional(
choice([
times(string(".") |> times(parsec(:core_bit), min: 1), min: 1),
string("-") |> times(parsec(:core_bit), min: 1)
])
),
:bin_val
)
),
defparsec(
:dec_val,
tag(
string("d")
|> times(parsec(:core_digit), min: 1)
|> optional(
choice([
times(string(".") |> times(parsec(:core_digit), min: 1), min: 1),
string("-") |> times(parsec(:core_digit), min: 1)
])
),
:dec_val
)
),
defparsec(
:hex_val,
tag(
string("x")
|> times(parsec(:core_hexdig), min: 1)
|> optional(
choice([
times(string(".") |> times(parsec(:core_hexdig), min: 1), min: 1),
string("-") |> times(parsec(:core_hexdig), min: 1)
])
),
:hex_val
)
),
defparsec(
:prose_val,
tag(
string("<") |> repeat(choice([ascii_char([32..61]), ascii_char([63..126])])) |> string(">"),
:prose_val
)
)
]