From e318da540a86b8d959dfd4f75b989743d418c5c7 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 3 Aug 2022 21:39:51 -0700 Subject: [PATCH 1/3] Add option to exclude traces spent in GC --- generate_bindings.py | 8 ++++++-- src/config.rs | 10 +++++++++- src/main.rs | 18 ++++++++++++++++++ src/python_bindings/mod.rs | 14 ++++++++++++++ src/python_spy.rs | 14 +++++++++++++- src/sampler.rs | 20 ++++++++++++++++---- tests/integration_test.py | 25 +++++++++++++++++++------ tests/scripts/gc.py | 21 +++++++++++++++++++++ 8 files changed, 116 insertions(+), 14 deletions(-) create mode 100644 tests/scripts/gc.py diff --git a/generate_bindings.py b/generate_bindings.py index 58cb84b1..68cbe374 100644 --- a/generate_bindings.py +++ b/generate_bindings.py @@ -52,13 +52,17 @@ def calculate_pyruntime_offsets(cpython_path, version, configure=False): #define Py_BUILD_CORE 1 #include "Include/Python.h" #include "Include/internal/pystate.h" + #include "Include/internal/pycore_interp.h" int main(int argc, const char * argv[]) { size_t interp_head = offsetof(_PyRuntimeState, interpreters.head); - printf("pub static INTERP_HEAD_OFFSET: usize = %i;\n", interp_head); + printf("pub static INTERP_HEAD_OFFSET: usize = %zu;\n", interp_head); size_t tstate_current = offsetof(_PyRuntimeState, gilstate.tstate_current); - printf("pub static TSTATE_CURRENT: usize = %i;\n", tstate_current); + printf("pub static TSTATE_CURRENT: usize = %zu;\n", tstate_current); + + size_t gc_collecting = offsetof(PyInterpreterState, gc.collecting); + printf("pub static gc_collecting: usize = %zu;\n", gc_collecting); } """ diff --git a/src/config.rs b/src/config.rs index 729860df..c53270a0 100644 --- a/src/config.rs +++ b/src/config.rs @@ -41,6 +41,8 @@ pub struct Config { #[doc(hidden)] pub gil_only: bool, #[doc(hidden)] + pub no_gc: bool, + #[doc(hidden)] pub hide_progress: bool, #[doc(hidden)] pub capture_output: bool, @@ -114,7 +116,7 @@ impl Default for Config { command: String::from("top"), blocking: LockingStrategy::Lock, show_line_numbers: false, sampling_rate: 100, duration: RecordDuration::Unlimited, native: false, - gil_only: false, include_idle: false, include_thread_ids: false, + gil_only: false, no_gc: false, include_idle: false, include_thread_ids: false, hide_progress: false, capture_output: true, dump_json: false, dump_locals: 0, subprocesses: false, full_filenames: false, lineno: LineNo::LastInstruction } } @@ -179,6 +181,10 @@ impl Config { .long("gil") .help("Only include traces that are holding on to the GIL"); + let nogc = Arg::new("no-gc") + .long("no-gc") + .help("Exclude traces that are running garbage collection"); + let record = Command::new("record") .about("Records stack trace information to a flamegraph, speedscope or raw file") .arg(program.clone()) @@ -221,6 +227,7 @@ impl Config { .long("threads") .help("Show thread ids in the output")) .arg(gil.clone()) + .arg(nogc.clone()) .arg(idle.clone()) .arg(Arg::new("capture") .long("capture") @@ -307,6 +314,7 @@ impl Config { config.format = Some(matches.value_of_t("format")?); config.filename = matches.value_of("output").map(|f| f.to_owned()); config.show_line_numbers = matches.occurrences_of("nolineno") == 0; + config.no_gc = matches.occurrences_of("no-gc") > 0; config.lineno = if matches.occurrences_of("nolineno") > 0 { LineNo::NoLine } else if matches.occurrences_of("function") > 0 { LineNo::FirstLineNo } else { LineNo::LastInstruction }; config.include_thread_ids = matches.occurrences_of("threads") > 0; if matches.occurrences_of("nolineno") > 0 && matches.occurrences_of("function") > 0 { diff --git a/src/main.rs b/src/main.rs index ca3a10a0..19d82f36 100644 --- a/src/main.rs +++ b/src/main.rs @@ -185,6 +185,7 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> let mut errors = 0; let mut intervals = 0; + let mut gc_intervals = 0; let mut samples = 0; println!(); @@ -229,6 +230,16 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> } } + + if let Some(collecting) = sample.gc_collecting { + if collecting != 0 { + gc_intervals += 1; + if config.no_gc { + continue; + } + } + } + for trace in sample.traces.iter_mut() { if !(config.include_idle || trace.active) { continue; @@ -273,6 +284,7 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> } else { format!("Collected {} samples", samples) }; + progress.set_message(msg); } progress.inc(1); @@ -288,6 +300,12 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error> output.write(&mut out_file)?; } + if gc_intervals > 0 { + println!("{}{:.3}% of samples were spent in garbage collection{}", lede, + (100.0 * gc_intervals as f32) / intervals as f32, + if config.no_gc { ", and were excluded by the --no-gc flag" } else {" "}); + } + match config.format.as_ref().unwrap() { FileFormat::flamegraph => { println!("{}Wrote flamegraph data to '{}'. Samples: {} Errors: {}", lede, filename, samples, errors); diff --git a/src/python_bindings/mod.rs b/src/python_bindings/mod.rs index 5742731c..29c10549 100644 --- a/src/python_bindings/mod.rs +++ b/src/python_bindings/mod.rs @@ -177,4 +177,18 @@ pub mod pyruntime { _ => None } } + + #[cfg(all(target_os="linux", target_arch="x86_64"))] + pub fn get_gc_collecting_offset(version: &Version) -> Option { + match version { + Version{major: 3, minor: 9..=10, ..} => Some(816), + Version{major: 3, minor: 11, ..} => Some(848), + _ => None + } + } + + #[cfg(not(all(target_os="linux", target_arch="x86_64")))] + pub fn get_gc_collecting_offset(version: &Version) -> Option { + None + } } diff --git a/src/python_spy.rs b/src/python_spy.rs index 40c2e625..f3ea5fa1 100644 --- a/src/python_spy.rs +++ b/src/python_spy.rs @@ -34,6 +34,7 @@ pub struct PythonSpy { pub version: Version, pub interpreter_address: usize, pub threadstate_address: usize, + pub gc_collecting_address: usize, pub python_filename: std::path::PathBuf, pub version_string: String, pub config: Config, @@ -120,6 +121,15 @@ impl PythonSpy { } }; + // Get the address for the PyInterpreterState.gc.collecting value + // (note that we can't get this directly from the PyInterpreterState bindings because + // of the mutex member that occurs before it - which has a different size/definition + // on each OS =( + let gc_collecting_address = match pyruntime::get_gc_collecting_offset(&version) { + Some(offset) => offset + interpreter_address, + None => 0 + }; + let version_string = format!("python{}.{}", version.major, version.minor); #[cfg(unwind)] @@ -129,7 +139,9 @@ impl PythonSpy { None }; - Ok(PythonSpy{pid, process, version, interpreter_address, threadstate_address, + Ok(PythonSpy{pid, process, version, interpreter_address, + threadstate_address, + gc_collecting_address, python_filename: python_info.python_filename, version_string, #[cfg(unwind)] diff --git a/src/sampler.rs b/src/sampler.rs index 2f60f511..42e7f026 100644 --- a/src/sampler.rs +++ b/src/sampler.rs @@ -6,7 +6,7 @@ use std::thread; use anyhow::Error; -use remoteprocess::Pid; +use remoteprocess::{ProcessMemory, Pid}; use crate::timer::Timer; use crate::python_spy::PythonSpy; @@ -23,7 +23,8 @@ pub struct Sampler { pub struct Sample { pub traces: Vec, pub sampling_errors: Option>, - pub late: Option + pub late: Option, + pub gc_collecting: Option, } impl Sampler { @@ -71,7 +72,15 @@ impl Sampler { }; let late = sleep.err(); - if tx.send(Sample{traces: traces, sampling_errors, late}).is_err() { + + // TODO: Should we do this inside the process lock? + let gc_collecting = if spy.gc_collecting_address != 0 { + spy.process.copy_struct::(spy.gc_collecting_address).ok() + } else { + None + }; + + if tx.send(Sample{traces: traces, sampling_errors, late, gc_collecting}).is_err() { break; } } @@ -187,9 +196,12 @@ impl Sampler { trace.process_info = process.clone(); } + // TODO: get gc stats for subprocesses as well + // (move to get_stack_traces, change to not return a vector etc) + // Send the collected info back let late = sleep.err(); - if tx.send(Sample{traces, sampling_errors, late}).is_err() { + if tx.send(Sample{traces, sampling_errors, late, gc_collecting: None}).is_err() { break; } diff --git a/tests/integration_test.py b/tests/integration_test.py index e7c987a0..520674ba 100644 --- a/tests/integration_test.py +++ b/tests/integration_test.py @@ -42,7 +42,7 @@ def _sample_process(self, script_name, options=None, include_profile_name=False) ] cmdline.extend(options or []) cmdline.extend(["--", sys.executable, script_name]) - subprocess.check_output(cmdline) + output = subprocess.check_output(cmdline) with open(profile_file.name) as f: profiles = json.load(f) @@ -58,15 +58,15 @@ def _sample_process(self, script_name, options=None, include_profile_name=False) ] += 1 else: samples[tuple(Frame(**frames[frame]) for frame in sample)] += 1 - return samples + return samples, output def test_longsleep(self): # running with the gil flag should have ~ no samples returned - profile = self._sample_process(_get_script("longsleep.py"), GIL) + profile, _ = self._sample_process(_get_script("longsleep.py"), GIL) assert sum(profile.values()) <= 5 # running with the idle flag should have > 95% of samples in the sleep call - profile = self._sample_process(_get_script("longsleep.py"), ["--idle"]) + profile, _ = self._sample_process(_get_script("longsleep.py"), ["--idle"]) sample, count = _most_frequent_sample(profile) assert count >= 95 assert len(sample) == 2 @@ -77,7 +77,7 @@ def test_longsleep(self): def test_busyloop(self): # can't be sure what line we're on, but we should have ~ all samples holding the gil - profile = self._sample_process(_get_script("busyloop.py"), GIL) + profile, _ = self._sample_process(_get_script("busyloop.py"), GIL) assert sum(profile.values()) >= 95 def test_thread_names(self): @@ -87,7 +87,7 @@ def test_thread_names(self): return for _ in range(3): - profile = self._sample_process( + profile, _ = self._sample_process( _get_script("thread_names.py"), ["--threads", "--idle"], include_profile_name=True, @@ -104,6 +104,19 @@ def test_thread_names(self): assert expected_thread_names == actual_thread_names + def test_gc_collecting(self): + # we don't support getting GC stats on python < 3.9 + v = sys.version_info + if v.major < 3 or v.minor < 9: + return + + profiles, output = self._sample_process(_get_script("gc.py")) + + # TODO: while this will verify that at least one sample was spent in GC, + # its still not a very compelling test + assert("samples were spent in garbage collection" in output) + + def _get_script(name): base_dir = os.path.dirname(__file__) return os.path.join(base_dir, "scripts", name) diff --git a/tests/scripts/gc.py b/tests/scripts/gc.py new file mode 100644 index 00000000..b480fc66 --- /dev/null +++ b/tests/scripts/gc.py @@ -0,0 +1,21 @@ +import gc + + +_GRAPH = None + +def fill_global_graph(): + # create a moderately large self-referential struct + global _GRAPH + _GRAPH = {} + for i in range(100000): + _GRAPH[i] = _GRAPH + _GRAPH = None + gc.collect() + + +def busy_loop(): + while True: + fill_global_graph() + +if __name__ == "__main__": + busy_loop() From 4ab7f425c03cdf2fe709d27c33e7da781561518e Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 3 Aug 2022 22:00:42 -0700 Subject: [PATCH 2/3] unittest fix --- tests/integration_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration_test.py b/tests/integration_test.py index 520674ba..5e31a6ea 100644 --- a/tests/integration_test.py +++ b/tests/integration_test.py @@ -114,7 +114,7 @@ def test_gc_collecting(self): # TODO: while this will verify that at least one sample was spent in GC, # its still not a very compelling test - assert("samples were spent in garbage collection" in output) + assert(b"samples were spent in garbage collection" in output) def _get_script(name): From b2dc023c3fbfec288794b89922daa95ef3b11d8e Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 3 Aug 2022 22:34:20 -0700 Subject: [PATCH 3/3] Get GC time for osx Seems to have the same offsets as linux, which is sorta suspicious (maybe we can avoid the OS specific code here and just add to the InterpreterState trait?) --- src/python_bindings/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python_bindings/mod.rs b/src/python_bindings/mod.rs index 29c10549..c0e197cb 100644 --- a/src/python_bindings/mod.rs +++ b/src/python_bindings/mod.rs @@ -178,7 +178,7 @@ pub mod pyruntime { } } - #[cfg(all(target_os="linux", target_arch="x86_64"))] + #[cfg(all(any(target_os="linux", target_os="macos"), target_arch="x86_64"))] pub fn get_gc_collecting_offset(version: &Version) -> Option { match version { Version{major: 3, minor: 9..=10, ..} => Some(816), @@ -187,7 +187,7 @@ pub mod pyruntime { } } - #[cfg(not(all(target_os="linux", target_arch="x86_64")))] + #[cfg(not(all(any(target_os="linux", target_os="macos"), target_arch="x86_64")))] pub fn get_gc_collecting_offset(version: &Version) -> Option { None }