Skip to content

Commit

Permalink
traced_probes, traced_perf: allow kernel symbolisation on release bui…
Browse files Browse the repository at this point in the history
…lds with init's assistance

traced_probes and traced_perf now inherit an fd to /proc/kallsyms from
init, which retains full address visibility, even on release builds of
Android.

The main complication with this shared fd is that concurrent reads would
cause quadratic slowdown because the underlying seq_file is stateful.
Hence we use provisional flock(/proc/kallsyms) to mutually exclude the
parsing of kallsyms between our daemons.

See go/perfetto-kallsyms-user for more details.

Bug: 383513654
Change-Id: I39c6ac4e6d7a07fad1fad757cecb3ef5014989d2
  • Loading branch information
rsavitski committed Dec 19, 2024
1 parent f5be557 commit 061d1aa
Show file tree
Hide file tree
Showing 11 changed files with 188 additions and 104 deletions.
1 change: 1 addition & 0 deletions perfetto.rc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ service traced_probes /system/bin/traced_probes
onrestart exec_background - nobody shell -- /system/bin/traced_probes --cleanup-after-crash
file /dev/kmsg w
capabilities DAC_READ_SEARCH
shared_kallsyms

on property:persist.device_config.global_settings.sys_traced=1
setprop persist.traced.enable 1
Expand Down
34 changes: 19 additions & 15 deletions src/kallsyms/kernel_symbol_map.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "perfetto/protozero/proto_utils.h"

#include <stdio.h>
#include <unistd.h>

#include <algorithm>
#include <cinttypes>
Expand All @@ -51,17 +52,11 @@ using TokenId = KernelSymbolMap::TokenTable::TokenId;
constexpr size_t kSymNameMaxLen = 128;
constexpr size_t kSymMaxSizeBytes = 1024 * 1024;

// Reads a kallsyms file in blocks of 4 pages each and decode its lines using
// a simple FSM. Calls the passed lambda for each valid symbol.
// It skips undefined symbols and other useless stuff.
// Reads a kallsyms file and decodes its lines using a simple FSM. Calls the
// passed lambda for each valid symbol. It skips undefined symbols and other
// useless stuff.
template <typename Lambda /* void(uint64_t, char, base::StringView) */>
void ForEachSym(const std::string& kallsyms_path, Lambda fn) {
base::ScopedFile fd = base::OpenFile(kallsyms_path.c_str(), O_RDONLY);
if (!fd) {
PERFETTO_PLOG("Cannot open %s", kallsyms_path.c_str());
return;
}

void ForEachSym(int fd, Lambda fn) {
// /proc/kallsyms looks as follows:
// 0000000000026a80 A bpf_trace_sds
//
Expand All @@ -75,15 +70,20 @@ void ForEachSym(const std::string& kallsyms_path, Lambda fn) {
static constexpr size_t kBufSize = 16 * 1024;
base::PagedMemory buffer = base::PagedMemory::Allocate(kBufSize);
enum { kSymAddr, kSymType, kSymName, kEatRestOfLine } state = kSymAddr;
off_t rd_offset = 0;
uint64_t sym_addr = 0;
char sym_type = '\0';
char sym_name[kSymNameMaxLen + 1];
size_t sym_name_len = 0;
for (;;) {
char* buf = static_cast<char*>(buffer.Get());
auto rsize = base::Read(*fd, buf, kBufSize);
// Use pread because on android we might be sharing an open file across
// processes. Even if they should be mutually excluded, not relying on a
// seek position is simpler to reason about.
ssize_t rsize = PERFETTO_EINTR(pread(fd, buf, kBufSize, rd_offset));
rd_offset += rsize;
if (rsize < 0) {
PERFETTO_PLOG("read(%s) failed", kallsyms_path.c_str());
PERFETTO_PLOG("pread(kallsyms) failed");
return;
}
if (rsize == 0)
Expand Down Expand Up @@ -234,7 +234,7 @@ base::StringView KernelSymbolMap::TokenTable::Lookup(TokenId id) {
return base::StringView();
}

size_t KernelSymbolMap::Parse(const std::string& kallsyms_path) {
size_t KernelSymbolMap::Parse(int fd) {
PERFETTO_METATRACE_SCOPED(TAG_PRODUCER, KALLSYMS_PARSE);
using SymAddr = uint64_t;

Expand Down Expand Up @@ -263,8 +263,12 @@ size_t KernelSymbolMap::Parse(const std::string& kallsyms_path) {
// Based on `cat /proc/kallsyms | egrep "\b[tT]\b" | wc -l`.
symbol_tokens.reserve(128 * 1024);

ForEachSym(kallsyms_path, [&](SymAddr addr, char type,
base::StringView name) {
if (fd < 0) {
PERFETTO_ELOG("Invalid kallsyms fd");
return 0;
}

ForEachSym(fd, [&](SymAddr addr, char type, base::StringView name) {
// Special cases:
//
// Skip arm mapping symbols such as $x, $x.123, $d, $d.123. They exist to
Expand Down
5 changes: 4 additions & 1 deletion src/kallsyms/kernel_symbol_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#include <string>
#include <vector>

#include "perfetto/ext/base/scoped_file.h"

namespace perfetto {

namespace base {
Expand Down Expand Up @@ -123,7 +125,8 @@ class KernelSymbolMap {
static size_t kTokenIndexSampling;

// Parses a kallsyms file. Returns the number of valid symbols decoded.
size_t Parse(const std::string& kallsyms_path);
// Does not take ownership of the fd.
size_t Parse(int fd);

// Looks up the closest symbol (i.e. the one with the highest address <=
// |addr|) from its absolute 64-bit address.
Expand Down
7 changes: 5 additions & 2 deletions src/kallsyms/kernel_symbol_map_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ static void BM_KallSymsFind(benchmark::State& state) {
// which slows down significantly the CI.
const bool skip = IsBenchmarkFunctionalOnly();
if (!skip) {
kallsyms.Parse(perfetto::base::GetTestDataPath("test/data/kallsyms.txt"));
auto fd = perfetto::base::OpenFile(
perfetto::base::GetTestDataPath("test/data/kallsyms.txt"), O_RDONLY);
kallsyms.Parse(*fd);
}

for (auto _ : state) {
Expand Down Expand Up @@ -137,7 +139,8 @@ static void BM_KallSymsLoad(benchmark::State& state) {
for (auto _ : state) {
perfetto::KernelSymbolMap kallsyms;
if (!skip) {
kallsyms.Parse(kallsyms_path);
auto fd = perfetto::base::OpenFile(kallsyms_path, O_RDONLY);
kallsyms.Parse(*fd);
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/kallsyms/kernel_symbol_map_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ ffffff8f73e2faa0 t tls_get.cfi_jt
KernelSymbolMap kallsyms;
EXPECT_EQ(kallsyms.Lookup(0x42), "");

kallsyms.Parse(tmp.path().c_str());
kallsyms.Parse(*base::OpenFile(tmp.path().c_str(), O_RDONLY));
EXPECT_EQ(kallsyms.num_syms(), 10u);

// Test first exact lookups.
Expand Down Expand Up @@ -157,7 +157,7 @@ TEST(KernelSymbolMapTest, GoldenTest) {
base::FlushFile(tmp.fd());

KernelSymbolMap kallsyms;
kallsyms.Parse(tmp.path().c_str());
kallsyms.Parse(*base::OpenFile(tmp.path().c_str(), O_RDONLY));
ASSERT_EQ(kallsyms.num_syms(), symbols.size());
for (const auto& kv : symbols) {
ASSERT_EQ(kallsyms.Lookup(kv.first), kv.second);
Expand Down
148 changes: 74 additions & 74 deletions src/kallsyms/lazy_kernel_symbolizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,49 +18,78 @@

#include <string>

#include <sys/file.h>
#include <unistd.h>

#include "perfetto/base/build_config.h"
#include "perfetto/base/compiler.h"
#include "perfetto/ext/base/file_utils.h"
#include "perfetto/ext/base/scoped_file.h"
#include "perfetto/ext/base/string_utils.h"
#include "perfetto/ext/base/utils.h"
#include "src/kallsyms/kernel_symbol_map.h"

#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
#include <sys/system_properties.h>
#endif

namespace perfetto {

namespace {

const char kKallsymsPath[] = "/proc/kallsyms";
const char kPtrRestrictPath[] = "/proc/sys/kernel/kptr_restrict";
const char kLowerPtrRestrictAndroidProp[] = "security.lower_kptr_restrict";

// This class takes care of temporarily lowering kptr_restrict and putting it
// back to the original value if necessary. It solves the following problem:
// When reading /proc/kallsyms on Linux/Android, the symbol addresses can be
// masked out (i.e. they are all 00000000) through the kptr_restrict file.
// On Android kptr_restrict defaults to 2. On Linux, it depends on the
// distribution. On Android we cannot simply write() kptr_restrict ourselves.
// Doing so requires the union of:
// - filesystem ACLs: kptr_restrict is rw-r--r--// and owned by root.
// - Selinux rules: kptr_restrict is labelled as proc_security and restricted.
// - CAP_SYS_ADMIN: when writing to kptr_restrict, the kernel enforces that the
// caller has the SYS_ADMIN capability at write() time.
// The latter would be problematic, we don't want traced_probes to have that,
// CAP_SYS_ADMIN is too broad.
// Instead, we opt for the following model: traced_probes sets an Android
// property introduced in S (security.lower_kptr_restrict); init (which
// satisfies all the requirements above) in turn sets kptr_restrict.
// On Linux and standalone builds, instead, we don't have many options. Either:
// - The system administrator takes care of lowering kptr_restrict before
// tracing.
// - The system administrator runs traced_probes as root / CAP_SYS_ADMIN and we
// temporarily lower and restore kptr_restrict ourselves.
// This class deals with all these cases.
const char kEnvName[] = "ANDROID_FILE__proc_kallsyms";

size_t ParseInheritedAndroidKallsyms(KernelSymbolMap* symbol_map) {
const char* fd_str = getenv(kEnvName);
auto inherited_fd = base::CStringToInt32(fd_str ? fd_str : "");
// Note: this is also the early exit for non-platform builds.
if (!inherited_fd.has_value()) {
PERFETTO_DLOG("Failed to parse %s (%s)", kEnvName, fd_str ? fd_str : "N/A");
return 0;
}

// We've inherited a special fd for kallsyms from init, but we might be
// sharing the underlying open file description with a concurrent process.
// Even if we use pread() for reading at absolute offsets, the underlying
// kernel seqfile is stateful and remembers where the last read stopped. In
// the worst case, two concurrent readers will cause a quadratic slowdown
// since the kernel reconstructs the seqfile from the beginning whenever two
// reads are not consequent.
// The chosen approach is to use provisional file locks to coordinate access.
// However we cannot use the special fd for locking, since the locks are based
// on the underlying open file description (in other words, both sharers will
// think they own the same lock). Therefore we open /proc/kallsyms again
// purely for locking purposes.
base::ScopedFile fd_for_lock = base::OpenFile(kKallsymsPath, O_RDONLY);
if (!fd_for_lock) {
PERFETTO_PLOG("Failed to open kallsyms for locking.");
return 0;
}

// Blocking lock since the only possible contention is
// traced_probes<->traced_perf, which will both lock only for the duration of
// the parse. Worst case, the task watchdog will restart the process.
//
// Lock goes away when |fd_for_lock| gets closed at end of scope.
if (flock(*fd_for_lock, LOCK_EX) != 0) {
PERFETTO_PLOG("Unexpected error in flock(kallsyms).");
return 0;
}

return symbol_map->Parse(*inherited_fd);
}

// This class takes care of temporarily lowering the kptr_restrict sysctl.
// Otherwise the symbol addresses in /proc/kallsyms will be zeroed out on most
// Linux configurations.
//
// On Android platform builds, this is solved by inheriting a kallsyms fd from
// init, with symbols being visible as that is evaluated at the time of the
// initial open().
//
// On Linux and standalone builds, we rely on this class in combination with
// either:
// - the sysctls (kptr_restrict, perf_event_paranoid) or this process'
// capabilitied to be sufficient for addresses to be visible.
// - this process to be running as root / CAP_SYS_ADMIN, in which case this
// class will attempt to temporarily override kptr_restrict ourselves.
class ScopedKptrUnrestrict {
public:
ScopedKptrUnrestrict(); // Lowers kptr_restrict if necessary.
Expand All @@ -69,46 +98,15 @@ class ScopedKptrUnrestrict {
private:
static void WriteKptrRestrict(const std::string&);

static const bool kUseAndroidProperty;
std::string initial_value_;
bool restore_on_dtor_ = true;
};

#if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
// This is true only on Android in-tree builds (not on standalone).
const bool ScopedKptrUnrestrict::kUseAndroidProperty = true;
#else
const bool ScopedKptrUnrestrict::kUseAndroidProperty = false;
#endif

ScopedKptrUnrestrict::ScopedKptrUnrestrict() {
if (LazyKernelSymbolizer::CanReadKernelSymbolAddresses()) {
// Everything seems to work (e.g., we are running as root and kptr_restrict
// is < 2). Don't touching anything.
restore_on_dtor_ = false;
// Symbols already visible, don't touch anything.
return;
}

if (kUseAndroidProperty) {
#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
__system_property_set(kLowerPtrRestrictAndroidProp, "1");
#endif
// Init takes some time to react to the property change.
// Unfortunately, we cannot read kptr_restrict because of SELinux. Instead,
// we detect this by reading the initial lines of kallsyms and checking
// that they are non-zero. This loop waits for at most 250ms (50 * 5ms).
for (int attempt = 1; attempt <= 50; ++attempt) {
usleep(5000);
if (LazyKernelSymbolizer::CanReadKernelSymbolAddresses())
return;
}
PERFETTO_ELOG("kallsyms addresses are still masked after setting %s",
kLowerPtrRestrictAndroidProp);
return;
} // if (kUseAndroidProperty)

// On Linux and Android standalone, read the kptr_restrict value and lower it
// if needed.
bool read_res = base::ReadFile(kPtrRestrictPath, &initial_value_);
if (!read_res) {
PERFETTO_PLOG("Failed to read %s", kPtrRestrictPath);
Expand All @@ -124,24 +122,19 @@ ScopedKptrUnrestrict::ScopedKptrUnrestrict() {
}

ScopedKptrUnrestrict::~ScopedKptrUnrestrict() {
if (!restore_on_dtor_)
if (initial_value_.empty())
return;
if (kUseAndroidProperty) {
#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
__system_property_set(kLowerPtrRestrictAndroidProp, "0");
#endif
} else if (!initial_value_.empty()) {
WriteKptrRestrict(initial_value_);
}
WriteKptrRestrict(initial_value_);
}

void ScopedKptrUnrestrict::WriteKptrRestrict(const std::string& value) {
// Note: kptr_restrict requires O_WRONLY. O_RDWR won't work.
PERFETTO_DCHECK(!value.empty());
base::ScopedFile fd = base::OpenFile(kPtrRestrictPath, O_WRONLY);
auto wsize = write(*fd, value.c_str(), value.size());
if (wsize <= 0)
if (wsize <= 0) {
PERFETTO_PLOG("Failed to set %s to %s", kPtrRestrictPath, value.c_str());
}
}

} // namespace
Expand All @@ -154,12 +147,19 @@ KernelSymbolMap* LazyKernelSymbolizer::GetOrCreateKernelSymbolMap() {
if (symbol_map_)
return symbol_map_.get();

symbol_map_.reset(new KernelSymbolMap());
symbol_map_ = std::make_unique<KernelSymbolMap>();

// Android platform builds: we have an fd from init.
size_t num_syms = ParseInheritedAndroidKallsyms(symbol_map_.get());
if (num_syms) {
return symbol_map_.get();
}

// If kptr_restrict is set, try temporarily lifting it (it works only if
// traced_probes is run as a privileged user).
// Otherwise, try reading the file directly, temporarily lowering
// kptr_restrict if we're running with sufficient privileges.
ScopedKptrUnrestrict kptr_unrestrict;
symbol_map_->Parse(kKallsymsPath);
auto fd = base::OpenFile(kKallsymsPath, O_RDONLY);
symbol_map_->Parse(*fd);
return symbol_map_.get();
}

Expand Down
10 changes: 8 additions & 2 deletions src/traced/probes/ftrace/ftrace_controller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,13 @@ void FtraceController::StopIfNeeded(FtraceInstanceState* instance) {
if (!data_sources_.empty())
return;

if (!retain_ksyms_on_stop_) {
// The kernel symbol table is discarded by default to save memory as we run as
// a long-lived daemon. Check if the config asked to retain the symbols (e.g.
// lab tests). And in either case, reset a set-but-empty table to allow trying
// again next time a config requests symbols.
if (!retain_ksyms_on_stop_ ||
(symbolizer_.is_valid() &&
symbolizer_.GetOrCreateKernelSymbolMap()->num_syms() == 0)) {
symbolizer_.Destroy();
}
retain_ksyms_on_stop_ = false;
Expand Down Expand Up @@ -608,7 +614,7 @@ bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
// buffers while doing the symbol parsing.
if (data_source->config().symbolize_ksyms()) {
symbolizer_.GetOrCreateKernelSymbolMap();
// If at least one config sets the KSYMS_RETAIN flag, keep the ksysm map
// If at least one config sets the KSYMS_RETAIN flag, keep the ksyms map
// around in StopIfNeeded().
const auto KRET = FtraceConfig::KSYMS_RETAIN;
retain_ksyms_on_stop_ |= data_source->config().ksyms_mem_policy() == KRET;
Expand Down
1 change: 1 addition & 0 deletions test/cts/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ static_library("perfetto_cts_deps") {
"../../protos/perfetto/config/process_stats:cpp",
"../../protos/perfetto/config/profiling:cpp",
"../../protos/perfetto/trace:cpp",
"../../protos/perfetto/trace/interned_data:cpp",
"../../protos/perfetto/trace/profiling:cpp",
"../../src/base:test_support",
"../../src/protozero/filtering:bytecode_generator",
Expand Down
Loading

0 comments on commit 061d1aa

Please sign in to comment.