Skip to content

Commit

Permalink
Add bit string literals for ease of encoding BIT STRINGs
Browse files Browse the repository at this point in the history
BIT STRING is distinct from OCTET STRING in that it has a leading byte
specifying the number of "unimportant" bits in the last octet. This adds
syntax (b`...`) similar to hex literals, which generates this prefix in
a convenient fashion. See language.txt for details.

Includes changes to ascii2der, as well as a changing the existing
der2ascii heuristic for well-formed BIT STRINGs to use the new syntax
instead.
  • Loading branch information
mcy committed Jul 1, 2021
1 parent c0da311 commit 07da533
Show file tree
Hide file tree
Showing 7 changed files with 299 additions and 13 deletions.
1 change: 1 addition & 0 deletions CONTRIBUTORS
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@
David Benjamin <[email protected]>
Carl Mehner <[email protected]>
Eric Roman <[email protected]>
Miguel Young de la Sota <[email protected]>
Victor Vasiliev <[email protected]>
47 changes: 47 additions & 0 deletions cmd/ascii2der/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,53 @@ again:
if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '"' {
return s.parseUTF32String()
}
case 'b':
if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '`' {
s.advance() // Skip the b.
s.advance() // Skip the `.
bitStr, ok := s.consumeUpTo('`')
if !ok {
return token{}, &parseError{s.pos, errors.New("unmatched `")}
}

// The leading byte is the number of "extra" bits at the end.
var bitCount int
var sawPipe bool
value := []byte{0}
for i, r := range bitStr {
switch r {
case '0', '1':
if bitCount%8 == 0 {
value = append(value, 0)
}
if r == '1' {
value[bitCount/8+1] |= 1 << uint(7-bitCount%8)
}
bitCount++
case '|':
if sawPipe {
return token{}, &parseError{s.pos, errors.New("duplicate |")}
}

// bitsRemaining is the number of bits remaining in the output that haven't
// been used yet. There cannot be more than that many bits past the |.
bitsRemaining := (len(value)-1)*8 - bitCount
inputRemaining := len(bitStr) - i - 1
if inputRemaining > bitsRemaining {
return token{}, &parseError{s.pos, fmt.Errorf("expected at most %v explicit padding bits; found %v", bitsRemaining, inputRemaining)}
}

sawPipe = true
value[0] = byte(bitsRemaining)
default:
return token{}, &parseError{s.pos, fmt.Errorf("unexpected rune %q", r)}
}
}
if !sawPipe {
value[0] = byte((len(value)-1)*8 - bitCount)
}
return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil
}
case '`':
s.advance()
hexStr, ok := s.consumeUpTo('`')
Expand Down
107 changes: 107 additions & 0 deletions cmd/ascii2der/scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ indefinite long-form:2`,
{"[long-form:2 SEQUENCE]", []token{{Kind: tokenBytes, Value: []byte{0x3f, 0x80, 0x10}}, {Kind: tokenEOF}}, true},
// Bad hex bytes.
{"`hi there!`", nil, false},
// Bad bit characters.
{"b`hi there!`", nil, false},
// UTF-16 literals are parsed correctly.
{
`u""`,
Expand Down Expand Up @@ -224,6 +226,110 @@ indefinite long-form:2`,
},
true,
},
// BIT STRING literals are parsed correctly.
{
"b``",
[]token{
{Kind: tokenBytes, Value: []byte{0x00}},
{Kind: tokenEOF},
},
true,
},
{
"b`1`",
[]token{
{Kind: tokenBytes, Value: []byte{0x07, 0x100 - (1 << 7)}},
{Kind: tokenEOF},
},
true,
},
{
"b`11`",
[]token{
{Kind: tokenBytes, Value: []byte{0x06, 0x100 - (1 << 6)}},
{Kind: tokenEOF},
},
true,
},
{
"b`111`",
[]token{
{Kind: tokenBytes, Value: []byte{0x05, 0x100 - (1 << 5)}},
{Kind: tokenEOF},
},
true,
},
{
"b`1111`",
[]token{
{Kind: tokenBytes, Value: []byte{0x04, 0x100 - (1 << 4)}},
{Kind: tokenEOF},
},
true,
},
{
"b`11111`",
[]token{
{Kind: tokenBytes, Value: []byte{0x03, 0x100 - (1 << 3)}},
{Kind: tokenEOF},
},
true,
},
{
"b`111111`",
[]token{
{Kind: tokenBytes, Value: []byte{0x02, 0x100 - (1 << 2)}},
{Kind: tokenEOF},
},
true,
},
{
"b`1111111`",
[]token{
{Kind: tokenBytes, Value: []byte{0x01, 0x100 - (1 << 1)}},
{Kind: tokenEOF},
},
true,
},
{
"b`1010101001010101`",
[]token{
{Kind: tokenBytes, Value: []byte{0x00, 0xaa, 0x55}},
{Kind: tokenEOF},
},
true,
},
{
"b`101010100101`",
[]token{
{Kind: tokenBytes, Value: []byte{0x04, 0xaa, 0x50}},
{Kind: tokenEOF},
},
true,
},
// We can stick a | in the middle of a BIT STRING to add "explicit" padding.
{
"b`101010100|1010101`",
[]token{
{Kind: tokenBytes, Value: []byte{0x07, 0xaa, 0x55}},
{Kind: tokenEOF},
},
true,
},
// If explicit padding does not end at a byte boundary, the remaining padding
// bits are zero.
{
"b`101010100101|010`",
[]token{
{Kind: tokenBytes, Value: []byte{0x04, 0xaa, 0x54}},
{Kind: tokenEOF},
},
true,
},
// Padding that passes a byte boundary is an error.
{"b`0000000|01`", nil, false},
// Extra |s are an error.
{"b`0|0|0`", nil, false},
// Bad or truncated escape sequences.
{`"\`, nil, false},
{`"\x`, nil, false},
Expand Down Expand Up @@ -267,6 +373,7 @@ indefinite long-form:2`,
{`"hello`, nil, false},
{`u"hello`, nil, false},
{`U"hello`, nil, false},
{"b`0101", nil, false},
// Long-form with invalid number.
{"long-form:", nil, false},
{"long-form:garbage", nil, false},
Expand Down
2 changes: 1 addition & 1 deletion cmd/der2ascii/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
package main

import (
"encoding/pem"
"encoding/hex"
"encoding/pem"
"flag"
"fmt"
"io/ioutil"
Expand Down
31 changes: 31 additions & 0 deletions cmd/der2ascii/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,37 @@ func derToASCIIImpl(out *bytes.Buffer, in []byte, indent int, stopAtEOC bool) []
// Emit the remaining as a DER element.
derToASCIIImpl(out, elem.body[1:], indent+1, false) // Adds a trailing newline.
addLine(out, indent, "}")
} else if len(elem.body) == 1 && elem.body[0] == 0 {
addLine(out, indent, fmt.Sprintf("%s b`` }", header))
} else if len(elem.body) > 1 && len(elem.body) <= 5 && elem.body[0] < 8 {
// Convert to a b`` literal when the leading byte is valid and the
// number of data octets is at most 4; we limit the length for
// readability.
bits := new(strings.Builder)

// The first octet is the number of unused bits.
significant := 8 - elem.body[0]
for i, octet := range elem.body[1:] {
// Last octet gets some special handling.
isLast := i == len(elem.body)-2
for j := 0; j < 8; j++ {
if isLast && int(significant) == j {
if octet == 0 {
break
}
bits.WriteRune('|')
}

if octet&0x80 == 0 {
bits.WriteRune('0')
} else {
bits.WriteRune('1')
}
octet <<= 1
}
}

addLine(out, indent, fmt.Sprintf("%s b`%s` }", header, bits))
} else if len(elem.body) > 1 && elem.body[0] < 8 {
// The first byte is the number of unused bits.
addLine(out, indent, fmt.Sprintf("%s %s %s }", header, bytesToString(elem.body[:1]), bytesToString(elem.body[1:])))
Expand Down
75 changes: 68 additions & 7 deletions cmd/der2ascii/writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,25 +128,86 @@ var derToASCIITests = []convertFuncTest{
[]byte{0x03, 0x03, 0x00, 0x30, 0x00},
"BIT_STRING {\n `00`\n SEQUENCE {}\n}\n",
},
// BIT STRINGs are encoded normally if the contents are not an element.
// BIT STRINGs are encoded as bit string literals if the contents are not an
// element.
{
[]byte{0x03, 0x03, 0x00, 0x00, 0x00},
"BIT_STRING { `00` `0000` }\n",
"BIT_STRING { b`0000000000000000` }\n",
},
// BIT STRINGs are encoded normally if the leading byte is non-zero.
{
[]byte{0x03, 0x01, 0x00},
"BIT_STRING { b`` }\n",
},
{
[]byte{0x03, 0x02, 0x07, 0x100 - (1 << 7)},
"BIT_STRING { b`1` }\n",
},
{
[]byte{0x03, 0x02, 0x06, 0x100 - (1 << 6)},
"BIT_STRING { b`11` }\n",
},
{
[]byte{0x03, 0x02, 0x05, 0x100 - (1 << 5)},
"BIT_STRING { b`111` }\n",
},
{
[]byte{0x03, 0x02, 0x04, 0x100 - (1 << 4)},
"BIT_STRING { b`1111` }\n",
},
{
[]byte{0x03, 0x02, 0x03, 0x100 - (1 << 3)},
"BIT_STRING { b`11111` }\n",
},
{
[]byte{0x03, 0x02, 0x02, 0x100 - (1 << 2)},
"BIT_STRING { b`111111` }\n",
},
{
[]byte{0x03, 0x02, 0x01, 0x100 - (1 << 1)},
"BIT_STRING { b`1111111` }\n",
},
{
[]byte{0x03, 0x02, 0x00, 0xff},
"BIT_STRING { b`11111111` }\n",
},
{
[]byte{0x03, 0x03, 0x07, 0xff, 0x100 - (1 << 7)},
"BIT_STRING { b`111111111` }\n",
},
// The above, but with padding.
{
[]byte{0x03, 0x02, 0x07, 0xc0},
"BIT_STRING { b`1|1000000` }\n",
},
// BIT STRINGs are encoded as bit string literals if the they are at most 32
// bits.
{
[]byte{0x03, 0x05, 0x01, 0x30, 0x80, 0x00, 0x00},
"BIT_STRING { `01` `30800000` }\n",
"BIT_STRING { b`0011000010000000000000000000000` }\n",
},
// The above, but with non-trivial padding.
{
[]byte{0x03, 0x05, 0x01, 0x30, 0x80, 0x00, 0xff},
"BIT_STRING { b`0011000010000000000000001111111|1` }\n",
},
// BIT STRINGs with more than four components are hex-encoded instead.
{
[]byte{0x03, 0x06, 0x01, 0x30, 0x80, 0xaa, 0x55, 0xaa},
"BIT_STRING { `01` `3080aa55aa` }\n",
},
// BIT STRINGs do not attempt to separate the leading byte if invalid.
{
[]byte{0x03, 0x05, 0xff, 0x30, 0x80, 0x00, 0x00},
"BIT_STRING { `ff30800000` }\n",
},
// Empty BIT STRINGs do not emit extra whitspace.
// Empty BIT STRINGs with non-zero leading byte are always invalid.
{
[]byte{0x03, 0x01, 0x00},
"BIT_STRING { `00` }\n",
[]byte{0x03, 0x01, 0x07},
"BIT_STRING { `07` }\n",
},
{
[]byte{0x03, 0x01, 0x08},
"BIT_STRING { `08` }\n",
},
// OBJECT IDENTIFIERs are pretty-printed if possible.
{
Expand Down
49 changes: 44 additions & 5 deletions language.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,32 @@ Unicode code point."
`AbCdEf`


# Bit string literals.

# A backtick string beginning with 'b' is a bit string literal. 0 or 1
# characters denote bits in a bit string. | characters are interpreted as below.
# No other characters may appear. The emit the contents of the bit string's DER
# encoding as a BIT STRING. (Big-endian bit order, prefixed with the number of
# trailing padding bits)

# This encodes as `00aa`.
b`10101010`

# This encodes as `04a0`.
b`1010`

# A single | may appear, which marks the beginning of explicit padding bits. BER
# permits any bit sequence after the padding bytes. However, it is an error for
# padding to cross the byte boundary.

# This encodes as `04aa`.
b`1010|1010`

# This is an error, since only four padding bits are available for the user to
# specify.
# b`1010|10101`


# Integers.

# Tokens which match /-?[0-9]+/ are integer tokens. They emit the contents of
Expand Down Expand Up @@ -288,11 +314,24 @@ SEQUENCE `aabbcc`
# c. If the tag is BOOLEAN and the body is valid, encode as TRUE or FALSE.
# Otherwise encode as a hex literal.
#
# d. If the tag is BIT STRING and the body is non-empty, encode the first
# byte as a separate hex literal. If this value is non-zero, encode the
# remainder as a raw byte string. Otherwise, apply step g to encode the
# body. This is to account for X.509 incorrectly using BIT STRING
# instead of OCTET STRING for SubjectPublicKeyInfo and signatures.
# d. If the tag is a BIT STRING:
#
# i. If the body is a valid bit string, contains a whole number of
# bytes, and may be parsed as a series of BER elements with no
# trailing data, encode as `00` followed by recursing into the body
# as in step g. This accounts for X.509 incorrectly using BIT STRING
# instead of OCTET STRING for SubjectPublicKeyInfo and signatures.
#
# ii. If the body is a valid bit string with at most 32 bits, encode as a
# bit string literal. If any padding bits are non-zero, they are
# encoded explicitly.
#
# iii. If the body is a valid bit string with more than 32 bits, encode as
# apair of hex literals, containing the initial byte and the data
# bytes.
#
# iv. Otherwise, the body is not a valid bit string. Encode as a single
# hex literal.
#
# e. If the tag is BMPString, decode the body as UTF-16 and encode as a
# UTF-16 literal. Unpaired surrogates and unprintable code points are
Expand Down

0 comments on commit 07da533

Please sign in to comment.