Skip to content

Commit

Permalink
Refactor unidata package a bit, add -json flag
Browse files Browse the repository at this point in the history
  • Loading branch information
arp242 committed Jan 15, 2021
1 parent 30c53bc commit 28477db
Show file tree
Hide file tree
Showing 14 changed files with 35,842 additions and 37,150 deletions.
3 changes: 2 additions & 1 deletion .ignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
/unidata
/unidata/codepoints.go
/unidata/emojis.go
17 changes: 17 additions & 0 deletions README.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,23 @@ See `uni help` for more details on the `-format` flag.
ChangeLog
---------

### v2.1.0 (unreleased)

- Refactor the arp242.net/uni/unidata package to be more useful for other use
cases. This isn't really relevant for `uni` users as such, but if you want to
get information about codepoints or emojis then this package is a nice
addition to the standard library's `unicode` package.

See godoc for some more details: https://pkg.go.dev/arp242.net/uni/unidata

- Can now output as JSON with `-j` or `-json`.

- `-format all` is a special value to include all columns uni knows about. This
is useful especially in combination with `-json`.

- Add `%(block)`, `%(plane)`, and `%(width)` to `-f`.


### v2.0.0 (2021-01-03)

This changes some flags, semantics, and defaults in **incompatible** ways, hence
Expand Down
114 changes: 46 additions & 68 deletions format.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package main

import (
"encoding/json"
"fmt"
"io"
"math"
Expand All @@ -9,8 +10,6 @@ import (
"sort"
"strconv"
"strings"
"unicode"
"unicode/utf8"

"arp242.net/uni/unidata"
"zgo.at/zli"
Expand Down Expand Up @@ -51,14 +50,15 @@ type Format struct {
lines [][]string // Processed lines, to be printed.
autoalign []int // Max line lengths for autoalign.
ntrim int // Number of columns with "trim"
json bool // Print as JSON.

printHeader bool
}

func NewFormat(format string, printHeader bool, knownCols ...string) (*Format, error) {
func NewFormat(format string, asJSON, printHeader bool, knownCols ...string) (*Format, error) {
var (
reFindCols = regexp.MustCompile(`%\((.*?)(?: .+?)?\)`)
f = Format{format: format, printHeader: printHeader}
f = Format{format: format, printHeader: printHeader, json: asJSON}
)
for _, m := range reFindCols.FindAllString(format, -1) {
err := f.processColumn(m)
Expand All @@ -84,7 +84,7 @@ func NewFormat(format string, printHeader bool, knownCols ...string) (*Format, e
h["tab"] = tabOrSpace()
h["wide_padding"] = " "

if printHeader {
if printHeader && !asJSON {
f.Line(h)
}

Expand Down Expand Up @@ -188,7 +188,32 @@ func (f *Format) SortNum(col string) {
})
}

func (f *Format) printJSON(out io.Writer) {
out.Write([]byte("["))
for i, l := range f.lines {
m := make(map[string]string, len(f.cols))
for j, c := range f.cols {
if c.name == "wide_padding" || c.name == "tab" {
continue
}
m[c.name] = l[j]
}

j, _ := json.MarshalIndent(m, "", "\t")
out.Write(j)
if i != len(f.lines)-1 {
out.Write([]byte(", "))
}
}
out.Write([]byte("]\n"))
}

func (f *Format) Print(out io.Writer) {
if f.json {
f.printJSON(out)
return
}

for lineno, l := range f.lines {
line := f.format

Expand Down Expand Up @@ -278,23 +303,27 @@ func (f *Format) String() string {
return b.String()
}

var knownColumns = []string{"char", "wide_padding", "cpoint", "dec", "utf8", "html",
"xml", "keysym", "digraph", "name", "cat"}
var knownColumns = []string{"char", "wide_padding", "cpoint", "dec",
"hex", "utf8", "html", "xml", "keysym", "digraph", "name", "cat", "block",
"plane", "width"}

func toLine(info unidata.Codepoint, raw bool) map[string]string {
c := rune(info.Codepoint)
return map[string]string{
"char": fmtChar(c, raw),
"char": info.Repr(raw),
"wide_padding": widePadding(info),
"cpoint": fmt.Sprintf("U+%04X", info.Codepoint),
"dec": strconv.FormatUint(uint64(info.Codepoint), 10),
"utf8": utf8Bytes(c),
"html": htmlEntity(c, info.Codepoint),
"xml": fmt.Sprintf("#x%x", info.Codepoint),
"keysym": keysym(c),
"digraph": unidata.Digraphs[c],
"cpoint": info.FormatCodepoint(),
"dec": info.Format(10),
"hex": info.Format(16),
"utf8": info.UTF8(),
"html": info.HTMLEntity(),
"xml": info.XMLEntity(),
"keysym": info.KeySym,
"digraph": info.Digraph,
"name": info.Name,
"cat": unidata.Catnames[info.Cat],
"cat": info.Category(),
"block": info.Block(),
"plane": info.Plane(),
"width": info.WidthName(),
}
}

Expand Down Expand Up @@ -322,54 +351,3 @@ func widePadding(info unidata.Codepoint) string {
}
return ""
}

func keysym(c rune) string {
s, ok := unidata.KeySyms[c]
if !ok {
return "(none)"
}
return strings.Join(s, " ")
}

func htmlEntity(c rune, cp uint32) string {
html := unidata.Entities[c]
if html == "" {
html = fmt.Sprintf("#x%x", cp)
}
return "&" + html + ";"
}

func utf8Bytes(c rune) string {
buf := make([]byte, 4)
n := utf8.EncodeRune(buf, c)
return fmt.Sprintf("% x", buf[:n])
}

func fmtChar(c rune, raw bool) string {
if raw {
return string(c)
}

// Display combining characters with ◌.
if unicode.In(c, unicode.Mn, unicode.Mc, unicode.Me) {
return "\u25cc" + string(c)
}

switch {
case unicode.IsControl(c):
switch {
case c < 0x20: // C0; use "Control Pictures" block
c += 0x2400
case c == 0x7f: // DEL
c = 0x2421
// No control pictures for C1 or anything else, use "open box".
default:
c = 0x2423
}
// "Other, Format" category except the soft hyphen and spaces.
case !unicode.IsPrint(c) && c != 0x00ad && !unicode.In(c, unicode.Zs):
c = 0xfffd
}

return string(c)
}
2 changes: 1 addition & 1 deletion format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
)

func BenchmarkFormat(b *testing.B) {
f, err := NewFormat("%(a) %(b l:auto) %(c)", false)
f, err := NewFormat("%(a) %(b l:auto) %(c)", false, false)
if err != nil {
b.Fatal(err)
}
Expand Down
Loading

0 comments on commit 28477db

Please sign in to comment.