diff options
author | Mike Frysinger <vapier@gentoo.org> | 2021-10-23 04:45:55 -0400 |
---|---|---|
committer | Mike Frysinger <vapier@gentoo.org> | 2021-10-23 18:18:03 -0400 |
commit | c681b2cadafca7fbc0b5059a7dcb1bd4f997ebca (patch) | |
tree | 65bc5a4ac3740815120e9df4df7f46a185c0e9e2 /scripts | |
parent | libsandbox: add 64-bit time_t wrappers (diff) | |
download | sandbox-c681b2cadafca7fbc0b5059a7dcb1bd4f997ebca.tar.gz sandbox-c681b2cadafca7fbc0b5059a7dcb1bd4f997ebca.tar.bz2 sandbox-c681b2cadafca7fbc0b5059a7dcb1bd4f997ebca.zip |
scripts: rewrite main processing loops for significant speedup
The awk scripts iterate over all the possible symbols for each line of
readelf output. As we add more symbols, and as the C library grows,
the number of iterations explodes.
We iterate over the list of possible symbols by creating a regex to
match against the readelf output. We could create a large regex at
the start of the script to match all possible symbols, and then run
that against the readelf lines. This avoids the nested loop logic,
and speeds up the scripts significantly: from ~1.5sec to ~0.05sec.
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/gen_symbol_header.awk | 128 | ||||
-rw-r--r-- | scripts/gen_symbol_version_map.awk | 132 |
2 files changed, 136 insertions, 124 deletions
diff --git a/scripts/gen_symbol_header.awk b/scripts/gen_symbol_header.awk index 3f23134..2d26c5a 100644 --- a/scripts/gen_symbol_header.awk +++ b/scripts/gen_symbol_header.awk @@ -1,5 +1,15 @@ BEGIN { COUNT = split(" " SYMBOLS_LIST, SYMBOLS); + + sym_regex = ""; + for (x in SYMBOLS) { + symbol = SYMBOLS[x]; + if (sym_regex) + sym_regex = sym_regex "|"; + sym_regex = sym_regex symbol; + } + SYMBOL_REGEX = "^(" sym_regex ")(@|$)"; + WEAK_SYMBOL_REGEX = "^__(" sym_regx ")(@@|$)"; } /^ OS\/ABI:/ { @@ -12,73 +22,69 @@ BEGIN { if ($0 ~ "^Symbol (.*)table '.symtab'") nextfile; - for (x in SYMBOLS) { - sym_regex = "^" SYMBOLS[x] "(@|$)"; - # On x86, x86_64 and others, $8 is the symbol name, but on - # alpha, its $10, so rather use $NF, as it should be the - # last field - if ($NF ~ sym_regex) { - split($NF, symbol_array, /@|@@/); - - # Don't add local symbols of versioned libc's - if (VERSIONED_LIBC && !symbol_array[2]) - continue; - - # We have a versioned libc - if (symbol_array[2] && !VERSIONED_LIBC) - VERSIONED_LIBC = 1; - - ADD = 1; - # Check that we do not add duplicates - for (y in PROCESSED_SYMBOLS) { - if (y == $NF) { - ADD = 0; - break; - } + # On x86, x86_64 and others, $8 is the symbol name, but on + # alpha, its $10, so rather use $NF, as it should be the + # last field + if ($NF ~ SYMBOL_REGEX) { + split($NF, symbol_array, /@|@@/); + + # Don't add local symbols of versioned libc's + if (VERSIONED_LIBC && !symbol_array[2]) + next; + + # We have a versioned libc + if (symbol_array[2] && !VERSIONED_LIBC) + VERSIONED_LIBC = 1; + + ADD = 1; + # Check that we do not add duplicates + for (y in PROCESSED_SYMBOLS) { + if (y == $NF) { + ADD = 0; + break; } + } - if (ADD) { - SYMBOL_LIST[symbol_array[1]] = SYMBOL_LIST[symbol_array[1]] " " $NF; - PROCESSED_SYMBOLS[$NF] = $NF; - } + if (ADD) { + SYMBOL_LIST[symbol_array[1]] = SYMBOL_LIST[symbol_array[1]] " " $NF; + PROCESSED_SYMBOLS[$NF] = $NF; } + } - # No apparent need to handle weak __XXX symbols ... so disable - # until we have documentation on why ... - # If we do re-add this, need to update the `readelf` call in - # libsandbox/ to include the -h flag again. - continue; - - sym_regex = "^__" SYMBOLS[x] "(@@|$)"; - if (($5 == "WEAK") && ($NF ~ sym_regex)) { - split($NF, symbol_array, /@@/); - - # Don't add local symbols of versioned libc's - if (VERSIONED_LIBC && !symbol_array[2]) - continue; - - # Blacklist __getcwd on FreeBSD - # Unleashed - May 2006 - if ((symbol_array[1] == "__getcwd") && (ABI == "FreeBSD")) - continue; - - # We have a versioned libc - if (symbol_array[2] && !VERSIONED_LIBC) - VERSIONED_LIBC = 1; - - ADD = 1; - # Check that we do not add duplicates - for (y in PROCESSED_SYMBOLS) { - if (y == $NF) { - ADD = 0; - break; - } + # No apparent need to handle weak __XXX symbols ... so disable + # until we have documentation on why ... + # If we do re-add this, need to update the `readelf` call in + # libsandbox/ to include the -h flag again. + next; + + if (($5 == "WEAK") && ($NF ~ WEAK_SYMBOL_REGEX)) { + split($NF, symbol_array, /@@/); + + # Don't add local symbols of versioned libc's + if (VERSIONED_LIBC && !symbol_array[2]) + next; + + # Blacklist __getcwd on FreeBSD + # Unleashed - May 2006 + if ((symbol_array[1] == "__getcwd") && (ABI == "FreeBSD")) + next; + + # We have a versioned libc + if (symbol_array[2] && !VERSIONED_LIBC) + VERSIONED_LIBC = 1; + + ADD = 1; + # Check that we do not add duplicates + for (y in PROCESSED_SYMBOLS) { + if (y == $NF) { + ADD = 0; + break; } + } - if (ADD) { - WEAK_SYMBOLS[SYMBOLS[x]] = WEAK_SYMBOLS[SYMBOLS[x]] " " $NF; - PROCESSED_SYMBOLS[$NF] = $NF; - } + if (ADD) { + WEAK_SYMBOLS[SYMBOLS[x]] = WEAK_SYMBOLS[SYMBOLS[x]] " " $NF; + PROCESSED_SYMBOLS[$NF] = $NF; } } } diff --git a/scripts/gen_symbol_version_map.awk b/scripts/gen_symbol_version_map.awk index a0a43c0..c92e2f9 100644 --- a/scripts/gen_symbol_version_map.awk +++ b/scripts/gen_symbol_version_map.awk @@ -1,5 +1,15 @@ BEGIN { split(" " SYMBOLS_LIST, SYMBOLS); + + sym_regex = ""; + for (x in SYMBOLS) { + symbol = SYMBOLS[x]; + if (sym_regex) + sym_regex = sym_regex "|"; + sym_regex = sym_regex symbol; + } + SYMBOL_REGEX = "^(" sym_regex ")(@|$)"; + WEAK_SYMBOL_REGEX = "^__(" sym_regx ")(@@|$)"; } /^ OS\/ABI:/ { @@ -17,81 +27,77 @@ BEGIN { if ($4 != "FUNC" || $5 == "LOCAL" || $6 != "DEFAULT") next; - for (x in SYMBOLS) { - sym_regex = "^" SYMBOLS[x] "(@|$)"; - # On x86, x86_64 and others, $8 is the symbol name, but on - # alpha, its $10, so rather use $NF, as it should be the - # last field - if ($NF ~ sym_regex) { - split($NF, symbol_array, /@|@@/); + # On x86, x86_64 and others, $8 is the symbol name, but on + # alpha, its $10, so rather use $NF, as it should be the + # last field + if ($NF ~ SYMBOL_REGEX) { + split($NF, symbol_array, /@|@@/); - # Don't add local symbols of versioned libc's - if (VERSIONED_LIBC && !symbol_array[2]) - continue; + # Don't add local symbols of versioned libc's + if (VERSIONED_LIBC && !symbol_array[2]) + next; - # Handle non-versioned libc's like uClibc ... - if (!symbol_array[2]) - symbol_array[2] = ""; - - # We have a versioned libc - if (symbol_array[2] && !VERSIONED_LIBC) - VERSIONED_LIBC = 1; - - ADD = 1; - # Check that we do not add duplicates - for (y in PROCESSED_SYMBOLS) { - if (y == $NF) { - ADD = 0; - break; - } + # Handle non-versioned libc's like uClibc ... + if (!symbol_array[2]) + symbol_array[2] = ""; + + # We have a versioned libc + if (symbol_array[2] && !VERSIONED_LIBC) + VERSIONED_LIBC = 1; + + ADD = 1; + # Check that we do not add duplicates + for (y in PROCESSED_SYMBOLS) { + if (y == $NF) { + ADD = 0; + break; } + } - if (ADD) { - SYMBOL_LIST[symbol_array[2]] = SYMBOL_LIST[symbol_array[2]] " " symbol_array[1]; - PROCESSED_SYMBOLS[$NF] = $NF; - } + if (ADD) { + SYMBOL_LIST[symbol_array[2]] = SYMBOL_LIST[symbol_array[2]] " " symbol_array[1]; + PROCESSED_SYMBOLS[$NF] = $NF; } + } - # No apparent need to handle weak __XXX symbols ... so disable - # until we have documentation on why ... - # If we do re-add this, need to update the `readelf` call in - # libsandbox/ to include the -h flag again. - continue; + # No apparent need to handle weak __XXX symbols ... so disable + # until we have documentation on why ... + # If we do re-add this, need to update the `readelf` call in + # libsandbox/ to include the -h flag again. + next; - sym_regex = "^__" SYMBOLS[x] "(@@|$)"; - if (($5 == "WEAK") && ($NF ~ sym_regex)) { - split($NF, symbol_array, /@@/); + if (($5 == "WEAK") && ($NF ~ WEAK_SYMBOL_REGEX)) { + split($NF, symbol_array, /@@/); - # Don't add local symbols of versioned libc's - if (VERSIONED_LIBC && !symbol_array[2]) - continue; + # Don't add local symbols of versioned libc's + if (VERSIONED_LIBC && !symbol_array[2]) + next; - # Blacklist __getcwd on FreeBSD - # Unleashed - May 2006 - if ((symbol_array[1] == "__getcwd") && (ABI == "FreeBSD")) - continue; + # Blacklist __getcwd on FreeBSD + # Unleashed - May 2006 + if ((symbol_array[1] == "__getcwd") && (ABI == "FreeBSD")) + next; - # Handle non-versioned libc's like uClibc ... - if (!symbol_array[2]) - symbol_array[2] = ""; - - # We have a versioned libc - if (symbol_array[2] && !VERSIONED_LIBC) - VERSIONED_LIBC = 1; - - ADD = 1; - # Check that we do not add duplicates - for (y in PROCESSED_SYMBOLS) { - if (y == $NF) { - ADD = 0; - break; - } + # Handle non-versioned libc's like uClibc ... + if (!symbol_array[2]) + symbol_array[2] = ""; + + # We have a versioned libc + if (symbol_array[2] && !VERSIONED_LIBC) + VERSIONED_LIBC = 1; + + ADD = 1; + # Check that we do not add duplicates + for (y in PROCESSED_SYMBOLS) { + if (y == $NF) { + ADD = 0; + break; } + } - if (ADD) { - SYMBOL_LIST[symbol_array[2]] = SYMBOL_LIST[symbol_array[2]] " " symbol_array[1]; - PROCESSED_SYMBOLS[$NF] = $NF; - } + if (ADD) { + SYMBOL_LIST[symbol_array[2]] = SYMBOL_LIST[symbol_array[2]] " " symbol_array[1]; + PROCESSED_SYMBOLS[$NF] = $NF; } } } |