From f636da49cb2186966b65e30ca70666b363d40db1 Mon Sep 17 00:00:00 2001 From: Dax Pryce Date: Mon, 17 Jul 2023 13:35:03 -0700 Subject: [PATCH] Bypass errors in logging for non-msft-prod environments (#392) * Removed the logger and verified that the logging capability is the root cause of our consistent segfault errors in python. Perhaps it also will fix any issues in our label test too? I'd like to push it to GH and see. * Formatting fixes * Revert "Formatting fixes" This reverts commit 9042595614c0f3b5e72f61090538abdb6510af14. * Revert "Removed the logger and verified that the logging capability is the root cause of our consistent segfault errors in python. Perhaps it also will fix any issues in our label test too? I'd like to push it to GH and see." This reverts commit 7561009932ff109ed386c4f5d50983859e49b9e7. * The custom logging implementation is causing segfaults in python. We're not sure exactly where, but this is the easiest and quickest way to getting a working python release. * All the integration tests are failing, and there's a chance the virtual dtor on AbstractDataStore might be the culprit, though I am not sure why. I'm hoping it is so it won't fall on the logging changes. * Formatting. Again. --- include/abstract_data_store.h | 2 ++ include/logger.h | 5 +++++ include/logger_impl.h | 38 ++++++++++++++++------------------- src/logger.cpp | 15 ++++---------- 4 files changed, 28 insertions(+), 32 deletions(-) diff --git a/include/abstract_data_store.h b/include/abstract_data_store.h index 2e0266814..976174378 100644 --- a/include/abstract_data_store.h +++ b/include/abstract_data_store.h @@ -18,6 +18,8 @@ template class AbstractDataStore public: AbstractDataStore(const location_t capacity, const size_t dim); + // virtual ~AbstractDataStore() = default; + // Return number of points returned virtual location_t load(const std::string &filename) = 0; diff --git a/include/logger.h b/include/logger.h index 1a1b79e71..0b17807db 100644 --- a/include/logger.h +++ b/include/logger.h @@ -14,8 +14,13 @@ namespace diskann { +#ifdef ENABLE_CUSTOM_LOGGER DISKANN_DLLEXPORT extern std::basic_ostream cout; DISKANN_DLLEXPORT extern std::basic_ostream cerr; +#else +using std::cerr; +using std::cout; +#endif enum class DISKANN_DLLEXPORT LogLevel { diff --git a/include/logger_impl.h b/include/logger_impl.h index 510c5aa08..03c65e0ce 100644 --- a/include/logger_impl.h +++ b/include/logger_impl.h @@ -11,6 +11,7 @@ namespace diskann { +#ifdef ENABLE_CUSTOM_LOGGER class ANNStreamBuf : public std::basic_streambuf { public: @@ -36,30 +37,25 @@ class ANNStreamBuf : public std::basic_streambuf int flush(); void logImpl(char *str, int numchars); -// Why the two buffer-sizes? If we are running normally, we are basically -// interacting with a character output system, so we short-circuit the -// output process by keeping an empty buffer and writing each character -// to stdout/stderr. But if we are running in OLS, we have to take all -// the text that is written to diskann::cout/diskann:cerr, consolidate it -// and push it out in one-shot, because the OLS infra does not give us -// character based output. Therefore, we use a larger buffer that is large -// enough to store the longest message, and continuously add characters -// to it. When the calling code outputs a std::endl or std::flush, sync() -// will be called and will output a log level, component name, and the text -// that has been collected. (sync() is also called if the buffer is full, so -// overflows/missing text are not a concern). -// This implies calling code _must_ either print std::endl or std::flush -// to ensure that the message is written immediately. -#ifdef ENABLE_CUSTOM_LOGGER + // Why the two buffer-sizes? If we are running normally, we are basically + // interacting with a character output system, so we short-circuit the + // output process by keeping an empty buffer and writing each character + // to stdout/stderr. But if we are running in OLS, we have to take all + // the text that is written to diskann::cout/diskann:cerr, consolidate it + // and push it out in one-shot, because the OLS infra does not give us + // character based output. Therefore, we use a larger buffer that is large + // enough to store the longest message, and continuously add characters + // to it. When the calling code outputs a std::endl or std::flush, sync() + // will be called and will output a log level, component name, and the text + // that has been collected. (sync() is also called if the buffer is full, so + // overflows/missing text are not a concern). + // This implies calling code _must_ either print std::endl or std::flush + // to ensure that the message is written immediately. + static const int BUFFER_SIZE = 1024; -#else - // Allocating an arbitrarily small buffer here because the overflow() and - // other function implementations push the BUFFER_SIZE chars into the - // buffer before flushing to fwrite. - static const int BUFFER_SIZE = 4; -#endif ANNStreamBuf(const ANNStreamBuf &); ANNStreamBuf &operator=(const ANNStreamBuf &); }; +#endif } // namespace diskann diff --git a/src/logger.cpp b/src/logger.cpp index dc27f718d..052f54877 100644 --- a/src/logger.cpp +++ b/src/logger.cpp @@ -10,13 +10,12 @@ namespace diskann { +#ifdef ENABLE_CUSTOM_LOGGER DISKANN_DLLEXPORT ANNStreamBuf coutBuff(stdout); DISKANN_DLLEXPORT ANNStreamBuf cerrBuff(stderr); DISKANN_DLLEXPORT std::basic_ostream cout(&coutBuff); DISKANN_DLLEXPORT std::basic_ostream cerr(&cerrBuff); - -#ifdef ENABLE_CUSTOM_LOGGER std::function g_logger; void SetCustomLogger(std::function logger) @@ -24,7 +23,6 @@ void SetCustomLogger(std::function logger) g_logger = logger; diskann::cout << "Set Custom Logger" << std::endl; } -#endif ANNStreamBuf::ANNStreamBuf(FILE *fp) { @@ -38,11 +36,7 @@ ANNStreamBuf::ANNStreamBuf(FILE *fp) } _fp = fp; _logLevel = (_fp == stdout) ? LogLevel::LL_Info : LogLevel::LL_Error; -#ifdef ENABLE_CUSTOM_LOGGER _buf = new char[BUFFER_SIZE + 1]; // See comment in the header -#else - _buf = new char[BUFFER_SIZE]; // See comment in the header -#endif std::memset(_buf, 0, (BUFFER_SIZE) * sizeof(char)); setp(_buf, _buf + BUFFER_SIZE - 1); @@ -88,17 +82,16 @@ int ANNStreamBuf::flush() } void ANNStreamBuf::logImpl(char *str, int num) { -#ifdef ENABLE_CUSTOM_LOGGER str[num] = '\0'; // Safe. See the c'tor. // Invoke the OLS custom logging function. if (g_logger) { g_logger(_logLevel, str); } +} #else - fwrite(str, sizeof(char), num, _fp); - fflush(_fp); +using std::cerr; +using std::cout; #endif -} } // namespace diskann