Skip to content

Commit

Permalink
strings: use BTF's string APIs for strings management
Browse files Browse the repository at this point in the history
Switch strings container to using struct btf and its
btf__add_str()/btf__find_str() APIs, which do equivalent internal string
deduplication. This turns out to be a very significantly faster than using
tsearch functions. To satisfy CTF encoding use case, some hacky string size
fetching approach is utilized, as libbpf doesn't provide direct API to get
total string section size and to copy over just strings data section.

BEFORE:
         22,624.28 msec task-clock                #    1.000 CPUs utilized
                85      context-switches          #    0.004 K/sec
                 3      cpu-migrations            #    0.000 K/sec
           622,545      page-faults               #    0.028 M/sec
    68,177,206,387      cycles                    #    3.013 GHz                      (24.99%)
   114,370,031,619      instructions              #    1.68  insn per cycle           (25.01%)
    26,125,001,179      branches                  # 1154.733 M/sec                    (25.01%)
       458,861,243      branch-misses             #    1.76% of all branches          (25.00%)
    24,533,455,967      L1-dcache-loads           # 1084.386 M/sec                    (25.02%)
       973,500,214      L1-dcache-load-misses     #    3.97% of all L1-dcache hits    (25.05%)
       338,773,561      LLC-loads                 #   14.974 M/sec                    (25.02%)
        12,651,196      LLC-load-misses           #    3.73% of all LL-cache hits     (25.00%)

      22.628910615 seconds time elapsed

      21.341063000 seconds user
       1.283763000 seconds sys

AFTER:
         18,362.97 msec task-clock                #    1.000 CPUs utilized
                37      context-switches          #    0.002 K/sec
                 0      cpu-migrations            #    0.000 K/sec
           626,281      page-faults               #    0.034 M/sec
    52,480,619,000      cycles                    #    2.858 GHz                      (25.00%)
   104,736,434,384      instructions              #    2.00  insn per cycle           (25.01%)
    23,878,428,465      branches                  # 1300.358 M/sec                    (25.01%)
       252,669,685      branch-misses             #    1.06% of all branches          (25.03%)
    21,829,390,952      L1-dcache-loads           # 1188.772 M/sec                    (25.04%)
       638,086,339      L1-dcache-load-misses     #    2.92% of all L1-dcache hits    (25.02%)
       212,327,435      LLC-loads                 #   11.563 M/sec                    (25.00%)
        14,578,117      LLC-load-misses           #    6.87% of all LL-cache hits     (25.00%)

      18.364427347 seconds time elapsed

      16.985494000 seconds user
       1.377959000 seconds sys

Committer testing:

Before:

  $ perf stat -r5 pahole -J vmlinux

   Performance counter stats for 'pahole -J vmlinux' (5 runs):

            8,735.92 msec task-clock:u              #    0.998 CPUs utilized            ( +-  0.34% )
                   0      context-switches:u        #    0.000 K/sec
                   0      cpu-migrations:u          #    0.000 K/sec
             353,978      page-faults:u             #    0.041 M/sec                    ( +-  0.00% )
      34,722,167,335      cycles:u                  #    3.975 GHz                      ( +-  0.12% )  (83.33%)
         555,981,118      stalled-cycles-frontend:u #    1.60% frontend cycles idle     ( +-  1.53% )  (83.33%)
       5,215,370,531      stalled-cycles-backend:u  #   15.02% backend cycles idle      ( +-  1.31% )  (83.33%)
      72,615,773,119      instructions:u            #    2.09  insn per cycle
                                                    #    0.07  stalled cycles per insn  ( +-  0.02% )  (83.34%)
      16,624,959,121      branches:u                # 1903.057 M/sec                    ( +-  0.01% )  (83.33%)
         229,962,327      branch-misses:u           #    1.38% of all branches          ( +-  0.07% )  (83.33%)

              8.7503 +- 0.0301 seconds time elapsed  ( +-  0.34% )

  $

After:

  $ perf stat -r5 pahole -J vmlinux

   Performance counter stats for 'pahole -J vmlinux' (5 runs):

            7,302.31 msec task-clock:u              #    0.998 CPUs utilized            ( +-  1.16% )
                   0      context-switches:u        #    0.000 K/sec
                   0      cpu-migrations:u          #    0.000 K/sec
             355,884      page-faults:u             #    0.049 M/sec                    ( +-  0.00% )
      29,150,861,078      cycles:u                  #    3.992 GHz                      ( +-  0.35% )  (83.33%)
         478,705,326      stalled-cycles-frontend:u #    1.64% frontend cycles idle     ( +-  2.70% )  (83.33%)
       5,351,001,796      stalled-cycles-backend:u  #   18.36% backend cycles idle      ( +-  1.20% )  (83.33%)
      65,835,888,022      instructions:u            #    2.26  insn per cycle
                                                    #    0.08  stalled cycles per insn  ( +-  0.03% )  (83.33%)
      15,025,195,460      branches:u                # 2057.594 M/sec                    ( +-  0.05% )  (83.34%)
         141,209,214      branch-misses:u           #    0.94% of all branches          ( +-  0.15% )  (83.33%)

              7.3140 +- 0.0851 seconds time elapsed  ( +-  1.16% )

  $

16.04% less cycles, keep the patches coming! :-)

Had to add this patch tho:

  +++ b/dwarf_loader.c
  @@ -2159,7 +2159,7 @@ static unsigned long long dwarf_tag__orig_id(const struct tag *tag,
   static const char *dwarf__strings_ptr(const struct cu *cu __unused,
   				      strings_t s)
   {
  -	return strings__ptr(strings, s);
  +	return s ? strings__ptr(strings, s) : NULL;
   }

To keep preexisting behaviour and to do what the BTF specific
strings_ptr method does:

  static const char *btf_elf__strings_ptr(const struct cu *cu, strings_t s)
  {
          return btf_elf__string(cu->priv, s);
  }

  const char *btf_elf__string(struct btf_elf *btfe, uint32_t ref)
  {
          const char *s = btf__str_by_offset(btfe->btf, ref);

          return s && s[0] == '\0' ? NULL : s;
  }

With these adjustments, btfdiff on a vmlinux with BTF and DWARF is again
clean, i.e. pretty printing from BTF matches what we get when using
DWARF.

Signed-off-by: Andrii Nakryiko <[email protected]>
Tested-by: Arnaldo Carvalho de Melo <[email protected]>
Cc: Alexei Starovoitov <[email protected]>
Cc: Andrii Nakryiko <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
  • Loading branch information
anakryiko authored and acmel committed Oct 20, 2020
1 parent 75f3520 commit 29fce8d
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 95 deletions.
2 changes: 1 addition & 1 deletion ctf_encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ int cu__encode_ctf(struct cu *cu, int verbose)
if (cu__cache_symtab(cu) < 0)
goto out_delete;

ctf__set_strings(ctf, &strings->gb);
ctf__set_strings(ctf, strings);

uint32_t id;
struct tag *pos;
Expand Down
2 changes: 1 addition & 1 deletion dwarf_loader.c
Original file line number Diff line number Diff line change
Expand Up @@ -2159,7 +2159,7 @@ static unsigned long long dwarf_tag__orig_id(const struct tag *tag,
static const char *dwarf__strings_ptr(const struct cu *cu __unused,
strings_t s)
{
return strings__ptr(strings, s);
return s ? strings__ptr(strings, s) : NULL;
}

struct debug_fmt_ops dwarf__ops;
Expand Down
14 changes: 7 additions & 7 deletions libctf.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "ctf.h"
#include "dutil.h"
#include "gobuffer.h"
#include "pahole_strings.h"

bool ctf__ignore_symtab_function(const GElf_Sym *sym, const char *sym_name)
{
Expand Down Expand Up @@ -287,7 +288,7 @@ int ctf__load_symtab(struct ctf *ctf)
return ctf->symtab == NULL ? -1 : 0;
}

void ctf__set_strings(struct ctf *ctf, struct gobuffer *strings)
void ctf__set_strings(struct ctf *ctf, struct strings *strings)
{
ctf->strings = strings;
}
Expand Down Expand Up @@ -570,7 +571,7 @@ int ctf__encode(struct ctf *ctf, uint8_t flags)
size = (gobuffer__size(&ctf->types) +
gobuffer__size(&ctf->objects) +
gobuffer__size(&ctf->funcs) +
gobuffer__size(ctf->strings));
strings__size(ctf->strings));

ctf->size = sizeof(*hdr) + size;
ctf->buf = malloc(ctf->size);
Expand All @@ -594,13 +595,13 @@ int ctf__encode(struct ctf *ctf, uint8_t flags)
hdr->ctf_type_off = offset;
offset += gobuffer__size(&ctf->types);
hdr->ctf_str_off = offset;
hdr->ctf_str_len = gobuffer__size(ctf->strings);
hdr->ctf_str_len = strings__size(ctf->strings);

void *payload = ctf->buf + sizeof(*hdr);
gobuffer__copy(&ctf->objects, payload + hdr->ctf_object_off);
gobuffer__copy(&ctf->funcs, payload + hdr->ctf_func_off);
gobuffer__copy(&ctf->types, payload + hdr->ctf_type_off);
gobuffer__copy(ctf->strings, payload + hdr->ctf_str_off);
strings__copy(ctf->strings, payload + hdr->ctf_str_off);

*(char *)(ctf->buf + sizeof(*hdr) + hdr->ctf_str_off) = '\0';
if (flags & CTF_FLAGS_COMPR) {
Expand All @@ -623,11 +624,10 @@ int ctf__encode(struct ctf *ctf, uint8_t flags)
}
#if 0
printf("\n\ntypes:\n entries: %d\n size: %u"
"\nstrings:\n entries: %u\n size: %u\ncompressed size: %d\n",
"\nstrings:\n size: %u\ncompressed size: %d\n",
ctf->type_index,
gobuffer__size(&ctf->types),
gobuffer__nr_entries(ctf->strings),
gobuffer__size(ctf->strings), size);
strings__size(ctf->strings), size);
#endif
int fd = open(ctf->filename, O_RDWR);
if (fd < 0) {
Expand Down
4 changes: 2 additions & 2 deletions libctf.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ struct ctf {
struct gobuffer objects; /* data/variables */
struct gobuffer types;
struct gobuffer funcs;
struct gobuffer *strings;
struct strings *strings;
char *filename;
size_t size;
int swapped;
Expand Down Expand Up @@ -76,7 +76,7 @@ int ctf__add_function(struct ctf *ctf, uint16_t type, uint16_t nr_parms,

int ctf__add_object(struct ctf *ctf, uint16_t type);

void ctf__set_strings(struct ctf *ctf, struct gobuffer *strings);
void ctf__set_strings(struct ctf *ctf, struct strings *strings);
int ctf__encode(struct ctf *ctf, uint8_t flags);

char *ctf__string(struct ctf *ctf, uint32_t ref);
Expand Down
32 changes: 5 additions & 27 deletions pahole_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@
Copyright (C) 2008 Arnaldo Carvalho de Melo <[email protected]>
*/

#include "gobuffer.h"
#include "lib/bpf/src/btf.h"

typedef unsigned int strings_t;

struct strings {
void *tree;
struct gobuffer gb;
struct btf *btf;
};

struct strings *strings__new(void);
Expand All @@ -21,33 +20,12 @@ void strings__delete(struct strings *strings);

strings_t strings__add(struct strings *strings, const char *str);
strings_t strings__find(struct strings *strings, const char *str);

int strings__cmp(const struct strings *strings, strings_t a, strings_t b);
strings_t strings__size(const struct strings *strings);
int strings__copy(const struct strings *strings, void *dst);

static inline const char *strings__ptr(const struct strings *strings, strings_t s)
{
return gobuffer__ptr(&strings->gb, s);
}

static inline const char *strings__entries(const struct strings *strings)
{
return gobuffer__entries(&strings->gb);
}

static inline unsigned int strings__nr_entries(const struct strings *strings)
{
return gobuffer__nr_entries(&strings->gb);
}

static inline strings_t strings__size(const struct strings *strings)
{
return gobuffer__size(&strings->gb);
}

static inline const char *strings__compress(struct strings *strings,
unsigned int *size)
{
return gobuffer__compress(&strings->gb, size);
return btf__str_by_offset(strings->btf, s);
}

#endif /* _STRINGS_H_ */
91 changes: 34 additions & 57 deletions strings.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,97 +15,74 @@
#include <zlib.h>

#include "dutil.h"
#include "lib/bpf/src/libbpf.h"

struct strings *strings__new(void)
{
struct strings *strs = malloc(sizeof(*strs));

if (strs != NULL) {
strs->tree = NULL;
gobuffer__init(&strs->gb);
if (!strs)
return NULL;

strs->btf = btf__new_empty();
if (libbpf_get_error(strs->btf)) {
free(strs);
return NULL;
}

return strs;

}

static void do_nothing(void *ptr __unused)
{
}

void strings__delete(struct strings *strs)
{
if (strs == NULL)
return;
tdestroy(strs->tree, do_nothing);
__gobuffer__delete(&strs->gb);
btf__free(strs->btf);
free(strs);
}

static strings_t strings__insert(struct strings *strs, const char *s)
{
return gobuffer__add(&strs->gb, s, strlen(s) + 1);
}

struct search_key {
struct strings *strs;
const char *str;
};

static int strings__compare(const void *a, const void *b)
{
const struct search_key *key = a;

return strcmp(key->str, key->strs->gb.entries + (unsigned long)b);
}

strings_t strings__add(struct strings *strs, const char *str)
{
unsigned long *s;
strings_t index;
struct search_key key = {
.strs = strs,
.str = str,
};

if (str == NULL)
return 0;

s = tsearch(&key, &strs->tree, strings__compare);
if (s != NULL) {
if (*(struct search_key **)s == (void *)&key) { /* Not found, replace with the right key */
index = strings__insert(strs, str);
if (index != 0)
*s = (unsigned long)index;
else {
tdelete(&key, &strs->tree, strings__compare);
return 0;
}
} else /* Found! */
index = *s;
} else
index = btf__add_str(strs->btf, str);
if (index < 0)
return 0;

return index;
}

strings_t strings__find(struct strings *strs, const char *str)
{
strings_t *s;
struct search_key key = {
.strs = strs,
.str = str,
};
return btf__find_str(strs->btf, str);
}

if (str == NULL)
return 0;
/* a horrible and inefficient hack to get string section size out of BTF */
strings_t strings__size(const struct strings *strs)
{
const struct btf_header *p;
uint32_t sz;

p = btf__get_raw_data(strs->btf, &sz);
if (!p)
return -1;

s = tfind(&key, &strs->tree, strings__compare);
return s ? *s : 0;
return p->str_len;
}

int strings__cmp(const struct strings *strs, strings_t a, strings_t b)
/* similarly horrible hack to copy out string section out of BTF */
int strings__copy(const struct strings *strs, void *dst)
{
return a == b ? 0 : strcmp(strings__ptr(strs, a),
strings__ptr(strs, b));
const struct btf_header *p;
uint32_t sz;

p = btf__get_raw_data(strs->btf, &sz);
if (!p)
return -1;

memcpy(dst, (void *)p + p->str_off, p->str_len);
return 0;
}

0 comments on commit 29fce8d

Please sign in to comment.