agent-enviroments/builder/libs/seastar/scripts/stall-analyser.py
2024-09-10 17:06:08 +03:00

453 lines
18 KiB
Python
Executable File

#!/usr/bin/env python
import argparse
import sys
import re
import addr2line
from collections import defaultdict
from itertools import chain, dropwhile
from typing import Iterator, Self
def get_command_line_parser():
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
description='A reactor stall backtrace graph analyser.',
epilog="""
stall-analyser helps analyze a series of reactor-stall backtraces using a graph.
Each node in the graph includes:
`addr` - a program address
Each link in the graph includes:
`total` - the total sum of stalls, in milliseconds
of all reactor stalls that pass via this caller/callee link.
`count` - number of backtraces going through the link.
When printed, the graph is traversed in descending `total` order
to emphasize stall backtraces that are frequent and long.
Each node in the printed output is preceded with [level#index pct%],
where `level` is the level of that node in the graph (0 are root nodes),
`index` is the index in the parent node's list of callers/callees, and
`pct` is the percantage of this link's `total` time relative to
its siblings.
When given an executable, addresses are decoding using `addr2line`
""")
parser.add_argument('--address-threshold', default='0x100000000',
help='Skip common backtrace prefix terminated by one or more addresses greater or equal to the threshold (0=disabled)')
parser.add_argument('-e', '--executable',
help='Decode addresses to lines using given executable')
parser.add_argument('-f', '--full-function-names', action='store_const', const=True, default=False,
help="When demangling C++ function names, display all information, including the type of the function's parameters. Otherwise, they are omitted (see `c++filt(1) -p`).")
parser.add_argument('-w', '--width', type=int, default=0,
help='Smart trim of long lines to width characters (0=disabled)')
parser.add_argument('-d', '--direction', choices=['bottom-up', 'top-down'], default='bottom-up',
help='Print graph bottom-up (default, callees first) or top-down (callers first)')
parser.add_argument('-m', '--minimum', type=int, dest='tmin', default=0,
help='Process only stalls lasting the given time, in milliseconds, or longer')
parser.add_argument('-b', '--branch-threshold', type=float, default=0.03,
help='Drop branches responsible for less than this threshold relative to the previous level, not global. (default 3%%)')
parser.add_argument('--format', choices=['graph', 'trace'], default='graph',
help='The output format, default is %(default)s. `trace` is suitable as input for flamegraph.pl')
parser.add_argument('file', nargs='?',
type=argparse.FileType('r'),
default=sys.stdin,
help='File containing reactor stall backtraces. Read from stdin if missing.')
return parser
class Node:
def __init__(self, addr: str):
self.addr = addr
self.callers = {}
self.callees = {}
self.printed = False
def __repr__(self):
return f"Node({self.addr})"
class Link:
def __init__(self, node: "Node", t: int) -> None:
self.node = node
self.total = t
self.count = 1
def __eq__(self, other: Self) -> bool:
return self.total == other.total and self.count == other.count
def __ne__(self, other: Self):
return not (self == other)
def __lt__(self, other: Self) -> bool:
return self.total < other.total or self.total == other.total and self.count < other.count
def add(self, t: int) -> bool:
self.total += t
self.count += 1
def link_caller(self, t: int, n: Self) -> Self:
if n.addr in self.callers:
link = self.callers[n.addr]
link.add(t)
n.callees[self.addr].add(t)
else:
self.callers[n.addr] = self.Link(n, t)
n.callees[self.addr] = self.Link(self, t)
return n
def unlink_caller(self, addr: str) -> None:
link = self.callers.pop(addr)
link.node.callees.pop(self.addr)
def link_callee(self, t: int, n) -> Self:
if n.addr in self.callees:
link = self.callees[n.addr]
link.add(t)
n.callers[self.addr].add(t)
else:
self.callees[n.addr] = self.Link(n, t)
n.callers[self.addr] = self.Link(self, t)
return n
def unlink_callee(self, addr: str) -> None:
link = self.callees.pop(addr)
link.node.callers.pop(self.addr)
def sorted_links(self, links: list, descending=True) -> list:
return sorted([l for l in links if l.node.addr], reverse=descending)
def sorted_callers(self, descending=True) -> list:
return self.sorted_links(self.callers.values(), descending)
def sorted_callees(self, descending=True) -> list:
return self.sorted_links(self.callees.values(), descending)
class Graph:
def __init__(self, resolver: addr2line.BacktraceResolver):
self.resolver = resolver
# Each node in the tree contains:
self.count = 0
self.total = 0
self.nodes = dict[str, Node]()
self.tail = Node('')
self.head = Node('')
def empty(self):
return not self.nodes
def __bool__(self):
return not self.empty()
def process_trace(self, trace: list[str], t: int) -> None:
# process each backtrace and insert it to the tree
#
# The backtraces are assumed to be in bottom-up order, i.e.
# the first address indicates the innermost frame and the last
# address is in the outermost, in calling order.
#
# This helps identifying closely related reactor stalls
# where a code path that stalls may be called from multiple
# call sites.
node = None
for addr in trace:
node = self.add(node, t, addr)
self.add_head(t, node)
def add(self, prev: Node, t: int, addr: str):
if addr in self.nodes:
n = self.nodes[addr]
else:
n = Node(addr)
self.nodes[addr] = n
if prev:
if prev.addr in self.head.callees:
self.head.unlink_callee(prev.addr)
prev.link_caller(t, n)
if addr in self.tail.callers:
self.tail.unlink_caller(addr)
elif not n.callees or addr in self.tail.callers:
self.tail.link_caller(t, n)
return n
def add_head(self, t: int, n: Node):
self.head.link_callee(t, n)
def smart_print(self, lines: str, width: int):
def _print(l: str, width: int):
if not width or len(l) <= width:
print(l)
return
i = l.rfind(" at ")
if i < 0:
print(l[:width])
return
sfx = l[i:]
w = width - len(sfx) - 3
if w > 0:
pfx = l[:w]
else:
pfx = ""
print(f"{pfx}...{sfx}")
for l in lines.splitlines():
if l:
_print(l, width)
def print_graph(self, direction: str, width: int, branch_threshold: float):
top_down = (direction == 'top-down')
print(f"""
This graph is printed in {direction} order, where {'callers' if top_down else 'callees'} are printed first.
Use --direction={'bottom-up' if top_down else 'top-down'} to print {'callees' if top_down else 'callers'} first.
[level#index/out_of pct%] below denotes:
level - nesting level in the graph
index - index of node among to its siblings
out_of - number of siblings
pct - percentage of total stall time of this call relative to its siblings
""")
def _prefix(prefix_list: list):
prefix = ''
for p in prefix_list:
prefix += p
return prefix
def _recursive_print_graph(n: Node, total: int = 0, count: int = 0, level: int = -1, idx: int = 0, out_of: int = 0, rel: float = 1.0, prefix_list: list[str] = [], skip_stats: bool = False):
nonlocal top_down
if level >= 0:
avg = round(total / count) if count else 0
prefix = _prefix(prefix_list)
p = '+' if idx == 1 or idx == out_of else '|'
p += '+'
l = f"[{level}#{idx}/{out_of} {round(100*rel)}%]"
cont_indent = len(l) + 1
if skip_stats:
l = f"{' ' * (len(l)-2)} -"
stats = ''
else:
stats = f" total={total} count={count} avg={avg}"
l = f"{prefix}{p}{l} addr={n.addr}{stats}"
p = "| "
if self.resolver:
lines = self.resolver.resolve_address(n.addr).splitlines()
if len(lines) == 1:
li = lines[0]
if li.startswith("??"):
l += f": {lines[0]}"
else:
l += f":\n{prefix}{p}{' '*cont_indent}{li.strip()}"
else:
l += ":\n"
if top_down:
lines.reverse()
for li in lines:
l += f"{prefix}{p}{' '*cont_indent}{li.strip()}\n"
self.smart_print(l, width)
if n.printed:
print(f"{prefix}-> continued at addr={n.addr} above")
return
n.printed = True
next = n.sorted_callees() if top_down else n.sorted_callers()
if not next:
return
link = next[0]
if level >= 0 and len(next) == 1 and link.total == total and link.count == count:
_recursive_print_graph(link.node, link.total, link.count, level, idx, out_of, rel, prefix_list, skip_stats=True)
else:
total = sum(link.total for link in next)
next_prefix_list = prefix_list + ["| " if idx < out_of else " "] if level >= 0 else []
i = 1
last_idx = len(next)
omitted_idx = 0
omitted_total = 0
omitted_count = 0
for link in next:
rel = link.total / total
if rel < branch_threshold:
if not omitted_idx:
omitted_idx = i
omitted_total += link.total
omitted_count += link.count
else:
_recursive_print_graph(link.node, link.total, link.count, level + 1, i, last_idx, rel, next_prefix_list)
i += 1
if omitted_idx:
prefix = _prefix(next_prefix_list)
p = '++'
rel = omitted_total / total
avg = round(omitted_total / omitted_count) if count else 0
l = f"[{level+1}#{omitted_idx}/{last_idx} {round(100*rel)}%]"
print(f"{prefix}{p}{l} {last_idx - omitted_idx + 1} more branches total={omitted_total} count={omitted_count} avg={avg}")
r = self.head if top_down else self.tail
_recursive_print_graph(r)
class StackCollapse:
# collapse stall backtraces into single lines
def __init__(self, resolver: addr2line.BacktraceResolver) -> None:
self.resolver = resolver
# track every stack and the its total sample counts
self.collapsed = defaultdict(int)
# to match lines like
# (inlined by) position_in_partition::tri_compare::operator() at ././position_in_partition.hh:485
self.pattern = re.compile(r'''(.+)\s # function signature
at\s #
[^:]+:(?:\?|\d+) # <source>:<line number>''', re.X)
def process_trace(self, frames: list[str], count: int) -> None:
# each stall report is mapped to a line of perf samples
# so the output looks like:
# row_cache::update;row_cache::do_update;row_cache::upgrade_entry 42
# from outer-most caller to the inner-most callee, and 42 is the time
# in ms, but we use it for the count of samples.
self.collapsed[';'.join(frames)] += count
def _annotate_func(self, line: str) -> str:
# sample input:
# (inlined by) position_in_partition::tri_compare::operator() at ././position_in_partition.hh:485
# sample output:
# position_in_partition::tri_compare::operator()_[i]
if line.startswith("??"):
return ""
inlined_prefix = ' (inlined by) '
inlined = line.startswith(inlined_prefix)
if inlined:
line = line[len(inlined_prefix):]
matched = self.pattern.match(line)
assert matched, f"bad line: {line}"
func = matched.groups()[0]
# annotations
if inlined:
func += "_[i]"
return func
def _resolve(self, addr: str) -> Iterator[str]:
lines = self.resolver.resolve_address(addr).splitlines()
return (self._annotate_func(line) for line in lines)
def print_graph(self, *_) -> None:
for stack, count in self.collapsed.items():
frames = filter(lambda frame: frame,
chain.from_iterable(self._resolve(addr) for addr in stack.split(';')))
print(';'.join(reversed(list(frames))), count)
def print_stats(tally: dict, tmin: int) -> None:
data = []
total_time = 0
total_count = 0
processed_count = 0
min_time = 1000000
max_time = 0
median = None
p95 = None
p99 = None
p999 = None
for t in sorted(tally.keys()):
count = tally[t]
data.append((t, count))
total_time += t * count
if t < min_time:
min_time = t
if t > max_time:
max_time = t
total_count += count
if t >= tmin:
processed_count += count
running_count = 0
for (t, count) in data:
running_count += count
if median is None and running_count >= total_count / 2:
median = t
elif p95 is None and running_count >= (total_count * 95) / 100:
p95 = t
elif p99 is None and running_count >= (total_count * 99) / 100:
p99 = t
elif p999 is None and running_count >= (total_count * 999) / 1000:
p999 = t
print(f"Processed {total_count} stalls lasting a total of {total_time} milliseconds.")
if tmin:
print(f"Of which, {processed_count} lasted {tmin} milliseconds or longer.")
avg_time = total_time / total_count if total_count else 0
print(f"min={min_time} avg={avg_time:.1f} median={median} p95={p95} p99={p99} p999={p999} max={max_time}")
def print_command_line_options(args):
varargs = vars(args)
clopts = ""
for k in varargs.keys():
val = varargs[k]
opt = re.sub('_', '-', k)
if val is None:
continue
elif not isinstance(val, bool):
clopts += f" --{opt}={val}"
elif val:
clopts += f" --{opt}"
print(f"Command line options:{clopts}\n")
def main():
args = get_command_line_parser().parse_args()
comment = re.compile(r'^\s*#')
pattern = re.compile(r"Reactor stalled for (?P<stall>\d+) ms on shard (?P<shard>\d+).*Backtrace:")
address_threshold = int(args.address_threshold, 0)
# map from stall time in ms to the count of the stall time
tally = {}
resolver = None
if args.executable:
resolver = addr2line.BacktraceResolver(executable=args.executable,
concise=not args.full_function_names)
if args.format == 'graph':
render = Graph(resolver)
else:
render = StackCollapse(resolver)
for s in args.file:
if comment.search(s):
continue
# parse log line like:
# ... scylla[7795]: Reactor stalled for 860 ms on shard 4. Backtrace: 0x4f002d2 0x4efef30 0x4f001e0
m = pattern.search(s)
if not m:
continue
# extract the time in ms
trace = s[m.span()[1]:].split()
t = int(m.group("stall"))
# and the addresses after "Backtrace:"
tally[t] = tally.pop(t, 0) + 1
# The address_threshold typically indicates a library call
# and the backtrace up-to and including it are usually of
# no interest as they all contain the stall backtrace geneneration code, e.g.:
# seastar::internal::cpu_stall_detector::generate_trace
# void seastar::backtrace<seastar::backtrace_buffer::append_backtrace_oneline()::{lambda(seastar::frame)#1}>(seastar::backt>
# (inlined by) seastar::backtrace_buffer::append_backtrace_oneline() at ./build/release/seastar/./seastar/src/core/reactor.cc:771
# (inlined by) seastar::print_with_backtrace(seastar::backtrace_buffer&, bool) at ./build/release/seastar/./seastar/src/core/reactor.cc>
# seastar::internal::cpu_stall_detector::generate_trace() at ./build/release/seastar/./seastar/src/core/reactor.cc:1257
# seastar::internal::cpu_stall_detector::maybe_report() at ./build/release/seastar/./seastar/src/core/reactor.cc:1103
# (inlined by) seastar::internal::cpu_stall_detector::on_signal() at ./build/release/seastar/./seastar/src/core/reactor.cc:1117
# (inlined by) seastar::reactor::block_notifier(int) at ./build/release/seastar/./seastar/src/core/reactor.cc:1240
# ?? ??:0
if address_threshold:
trace = list(dropwhile(lambda addr: int(addr, 0) >= address_threshold, trace))
if t >= args.tmin:
render.process_trace(trace, t)
try:
if not render:
print("No input data found. Please run `stall-analyser.py --help` for usage instruction")
sys.exit()
if args.format == 'graph':
print_command_line_options(args)
print_stats(tally, tmin)
render.print_graph(args.direction, args.width, args.branch_threshold)
except BrokenPipeError:
pass
if __name__ == '__main__':
main()