Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize wildcard matching by converting SafeFNMatch_ to an iterative approach #1151

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Update vlog_is_on.cc
RadoslavPetkow authored Feb 3, 2025
commit c868344f6e9b77c0539e7bc0cab89d38ab273505
158 changes: 65 additions & 93 deletions src/vlog_is_on.cc
Original file line number Diff line number Diff line change
@@ -36,6 +36,7 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <limits>
#include <mutex>
#include <string>

@@ -50,121 +51,116 @@ namespace google {

inline namespace glog_internal_namespace_ {

// Implementation of fnmatch that does not need 0-termination
// of arguments and does not allocate any memory,
// but we only support "*" and "?" wildcards, not the "[...]" patterns.
// It's not a static function for the unittest.
// Optimized implementation of fnmatch that does not require 0-termination
// of its arguments and does not allocate any memory.
// It supports only "*" and "?" wildcards.
// This version is implemented iteratively rather than recursively.
GLOG_NO_EXPORT bool SafeFNMatch_(const char* pattern, size_t patt_len,
const char* str, size_t str_len) {
size_t p = 0;
size_t s = 0;
while (true) {
if (p == patt_len && s == str_len) return true;
if (p == patt_len) return false;
if (s == str_len) return p + 1 == patt_len && pattern[p] == '*';
if (pattern[p] == str[s] || pattern[p] == '?') {
p += 1;
s += 1;
continue;
}
if (pattern[p] == '*') {
if (p + 1 == patt_len) return true;
do {
if (SafeFNMatch_(pattern + (p + 1), patt_len - (p + 1), str + s,
str_len - s)) {
return true;
}
s += 1;
} while (s != str_len);
size_t p = 0, s = 0;
// star_idx holds the index of the last '*' encountered.
// match_idx holds the index in str corresponding to that '*' match.
size_t star_idx = std::numeric_limits<size_t>::max();
size_t match_idx = 0;

while (s < str_len) {
if (p < patt_len && (pattern[p] == str[s] || pattern[p] == '?')) {
// Characters match (or we have a '?') so advance both indices.
++p;
++s;
} else if (p < patt_len && pattern[p] == '*') {
// Record the position of '*' and the current string index.
star_idx = p;
match_idx = s;
++p;
} else if (star_idx != std::numeric_limits<size_t>::max()) {
// No direct match, but we have seen a '*' before.
// Backtrack: assume '*' matches one more character.
p = star_idx + 1;
s = ++match_idx;
} else {
// No match and no '*' to backtrack to.
return false;
}
return false;
}

// Check for remaining '*' in the pattern.
while (p < patt_len && pattern[p] == '*') {
++p;
}
return p == patt_len;
}

} // namespace glog_internal_namespace_

using glog_internal_namespace_::SafeFNMatch_;

// List of per-module log levels from FLAGS_vmodule.
// Once created each element is never deleted/modified
// except for the vlog_level: other threads will read VModuleInfo blobs
// w/o locks and we'll store pointers to vlog_level at VLOG locations
// that will never go away.
// We can't use an STL struct here as we wouldn't know
// when it's safe to delete/update it: other threads need to use it w/o locks.
// Structure holding per-module logging level info.
struct VModuleInfo {
string module_pattern;
mutable int32 vlog_level; // Conceptually this is an AtomicWord, but it's
// too much work to use AtomicWord type here
// w/o much actual benefit.
mutable int32 vlog_level; // Conceptually atomic but kept simple for performance.
const VModuleInfo* next;
};

// This protects the following global variables.
// Global variables controlling per-module logging levels.
static std::mutex vmodule_mutex;
// Pointer to head of the VModuleInfo list.
// It's a map from module pattern to logging level for those module(s).
static VModuleInfo* vmodule_list = nullptr;
static SiteFlag* cached_site_list = nullptr;

// Boolean initialization flag.
static bool inited_vmodule = false;

// L >= vmodule_mutex.
// Initializes the module-specific logging levels based on FLAGS_vmodule.
static void VLOG2Initializer() {
// Can now parse --vmodule flag and initialize mapping of module-specific
// logging levels.
inited_vmodule = false;
const char* vmodule = FLAGS_vmodule.c_str();
const char* sep;
VModuleInfo* head = nullptr;
VModuleInfo* tail = nullptr;
while ((sep = strchr(vmodule, '=')) != nullptr) {
while (*vmodule != '\0') {
const char* sep = strchr(vmodule, '=');
if (sep == nullptr) break;
string pattern(vmodule, static_cast<size_t>(sep - vmodule));
int module_level;
if (sscanf(sep, "=%d", &module_level) == 1) {
auto* info = new VModuleInfo;
info->module_pattern = pattern;
info->vlog_level = module_level;
info->next = nullptr;
if (head) {
tail->next = info;
} else {
head = info;
}
tail = info;
}
// Skip past this entry
// Skip past this entry (find the next comma).
vmodule = strchr(sep, ',');
if (vmodule == nullptr) break;
vmodule++; // Skip past ","
++vmodule; // Skip the comma.
}
if (head) { // Put them into the list at the head:
if (head) {
tail->next = vmodule_list;
vmodule_list = head;
}
inited_vmodule = true;
}

// This can be called very early, so we use SpinLock and RAW_VLOG here.
// Sets the VLOG level for a given module pattern.
int SetVLOGLevel(const char* module_pattern, int log_level) {
int result = FLAGS_v;
size_t const pattern_len = strlen(module_pattern);
const size_t pattern_len = strlen(module_pattern);
bool found = false;
{
std::lock_guard<std::mutex> l(
vmodule_mutex); // protect whole read-modify-write
for (const VModuleInfo* info = vmodule_list; info != nullptr;
info = info->next) {
std::lock_guard<std::mutex> l(vmodule_mutex);
for (const VModuleInfo* info = vmodule_list; info != nullptr; info = info->next) {
if (info->module_pattern == module_pattern) {
if (!found) {
result = info->vlog_level;
found = true;
}
info->vlog_level = log_level;
} else if (!found && SafeFNMatch_(info->module_pattern.c_str(),
info->module_pattern.size(),
module_pattern, pattern_len)) {
} else if (!found &&
SafeFNMatch_(info->module_pattern.c_str(),
info->module_pattern.size(),
module_pattern, pattern_len)) {
result = info->vlog_level;
found = true;
}
@@ -176,15 +172,12 @@ int SetVLOGLevel(const char* module_pattern, int log_level) {
info->next = vmodule_list;
vmodule_list = info;

// Update any cached site flags that match this module.
SiteFlag** item_ptr = &cached_site_list;
SiteFlag* item = cached_site_list;

// We traverse the list fully because the pattern can match several items
// from the list.
while (item) {
if (SafeFNMatch_(module_pattern, pattern_len, item->base_name,
item->base_len)) {
// Redirect the cached value to its module override.
item->level = &info->vlog_level;
*item_ptr = item->next; // Remove the item from the list.
} else {
@@ -198,8 +191,7 @@ int SetVLOGLevel(const char* module_pattern, int log_level) {
return result;
}

// NOTE: Individual VLOG statements cache the integer log level pointers.
// NOTE: This function must not allocate memory or require any locks.
// Initializes the VLOG site flag and returns whether logging should occur.
bool InitVLOG3__(SiteFlag* site_flag, int32* level_default, const char* fname,
int32 verbose_level) {
std::lock_guard<std::mutex> l(vmodule_mutex);
@@ -208,59 +200,40 @@ bool InitVLOG3__(SiteFlag* site_flag, int32* level_default, const char* fname,
VLOG2Initializer();
}

// protect the errno global in case someone writes:
// VLOG(..) << "The last error was " << strerror(errno)
// Save errno in case any recoverable error occurs.
int old_errno = errno;

// site_default normally points to FLAGS_v
int32* site_flag_value = level_default;

// Get basename for file
// Get the base file name (strip directory path).
const char* base = strrchr(fname, '/');

#ifdef _WIN32
if (!base) {
base = strrchr(fname, '\\');
}
#endif

base = base ? (base + 1) : fname;
const char* base_end = strchr(base, '.');
size_t base_length =
base_end ? static_cast<size_t>(base_end - base) : strlen(base);
size_t base_length = base_end ? static_cast<size_t>(base_end - base) : strlen(base);

// Trim out trailing "-inl" if any
if (base_length >= 4 && (memcmp(base + base_length - 4, "-inl", 4) == 0)) {
// Trim any trailing "-inl" if present.
if (base_length >= 4 && memcmp(base + base_length - 4, "-inl", 4) == 0) {
base_length -= 4;
}

// TODO: Trim out _unittest suffix? Perhaps it is better to have
// the extra control and just leave it there.

// find target in vector of modules, replace site_flag_value with
// a module-specific verbose level, if any.
for (const VModuleInfo* info = vmodule_list; info != nullptr;
info = info->next) {
// Search for a matching module override.
for (const VModuleInfo* info = vmodule_list; info != nullptr; info = info->next) {
if (SafeFNMatch_(info->module_pattern.c_str(), info->module_pattern.size(),
base, base_length)) {
site_flag_value = &info->vlog_level;
// value at info->vlog_level is now what controls
// the VLOG at the caller site forever
break;
}
}

// Cache the vlog value pointer if --vmodule flag has been parsed.
// Cache the level pointer in the site flag.
ANNOTATE_BENIGN_RACE(site_flag,
"*site_flag may be written by several threads,"
" but the value will be the same");
"*site_flag may be written by several threads, but the value will be the same");
if (read_vmodule_flag) {
site_flag->level = site_flag_value;
// If VLOG flag has been cached to the default site pointer,
// we want to add to the cached list in order to invalidate in case
// SetVModule is called afterwards with new modules.
// The performance penalty here is neglible, because InitVLOG3__ is called
// once per site.
if (site_flag_value == level_default && !site_flag->base_name) {
site_flag->base_name = base;
site_flag->base_len = base_length;
@@ -269,8 +242,7 @@ bool InitVLOG3__(SiteFlag* site_flag, int32* level_default, const char* fname,
}
}

// restore the errno in case something recoverable went wrong during
// the initialization of the VLOG mechanism (see above note "protect the..")
// Restore errno and return whether logging should proceed.
errno = old_errno;
return *site_flag_value >= verbose_level;
}