Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to exclude traces spent in GC #515

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions generate_bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,17 @@ def calculate_pyruntime_offsets(cpython_path, version, configure=False):
#define Py_BUILD_CORE 1
#include "Include/Python.h"
#include "Include/internal/pystate.h"
#include "Include/internal/pycore_interp.h"

int main(int argc, const char * argv[]) {
size_t interp_head = offsetof(_PyRuntimeState, interpreters.head);
printf("pub static INTERP_HEAD_OFFSET: usize = %i;\n", interp_head);
printf("pub static INTERP_HEAD_OFFSET: usize = %zu;\n", interp_head);

size_t tstate_current = offsetof(_PyRuntimeState, gilstate.tstate_current);
printf("pub static TSTATE_CURRENT: usize = %i;\n", tstate_current);
printf("pub static TSTATE_CURRENT: usize = %zu;\n", tstate_current);

size_t gc_collecting = offsetof(PyInterpreterState, gc.collecting);
printf("pub static gc_collecting: usize = %zu;\n", gc_collecting);
}
"""

Expand Down
10 changes: 9 additions & 1 deletion src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ pub struct Config {
#[doc(hidden)]
pub gil_only: bool,
#[doc(hidden)]
pub no_gc: bool,
#[doc(hidden)]
pub hide_progress: bool,
#[doc(hidden)]
pub capture_output: bool,
Expand Down Expand Up @@ -114,7 +116,7 @@ impl Default for Config {
command: String::from("top"),
blocking: LockingStrategy::Lock, show_line_numbers: false, sampling_rate: 100,
duration: RecordDuration::Unlimited, native: false,
gil_only: false, include_idle: false, include_thread_ids: false,
gil_only: false, no_gc: false, include_idle: false, include_thread_ids: false,
hide_progress: false, capture_output: true, dump_json: false, dump_locals: 0, subprocesses: false,
full_filenames: false, lineno: LineNo::LastInstruction }
}
Expand Down Expand Up @@ -179,6 +181,10 @@ impl Config {
.long("gil")
.help("Only include traces that are holding on to the GIL");

let nogc = Arg::new("no-gc")
.long("no-gc")
.help("Exclude traces that are running garbage collection");

let record = Command::new("record")
.about("Records stack trace information to a flamegraph, speedscope or raw file")
.arg(program.clone())
Expand Down Expand Up @@ -221,6 +227,7 @@ impl Config {
.long("threads")
.help("Show thread ids in the output"))
.arg(gil.clone())
.arg(nogc.clone())
.arg(idle.clone())
.arg(Arg::new("capture")
.long("capture")
Expand Down Expand Up @@ -307,6 +314,7 @@ impl Config {
config.format = Some(matches.value_of_t("format")?);
config.filename = matches.value_of("output").map(|f| f.to_owned());
config.show_line_numbers = matches.occurrences_of("nolineno") == 0;
config.no_gc = matches.occurrences_of("no-gc") > 0;
config.lineno = if matches.occurrences_of("nolineno") > 0 { LineNo::NoLine } else if matches.occurrences_of("function") > 0 { LineNo::FirstLineNo } else { LineNo::LastInstruction };
config.include_thread_ids = matches.occurrences_of("threads") > 0;
if matches.occurrences_of("nolineno") > 0 && matches.occurrences_of("function") > 0 {
Expand Down
18 changes: 18 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error>

let mut errors = 0;
let mut intervals = 0;
let mut gc_intervals = 0;
let mut samples = 0;
println!();

Expand Down Expand Up @@ -229,6 +230,16 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error>
}
}


if let Some(collecting) = sample.gc_collecting {
if collecting != 0 {
gc_intervals += 1;
if config.no_gc {
continue;
}
}
}

for trace in sample.traces.iter_mut() {
if !(config.include_idle || trace.active) {
continue;
Expand Down Expand Up @@ -273,6 +284,7 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error>
} else {
format!("Collected {} samples", samples)
};

progress.set_message(msg);
}
progress.inc(1);
Expand All @@ -288,6 +300,12 @@ fn record_samples(pid: remoteprocess::Pid, config: &Config) -> Result<(), Error>
output.write(&mut out_file)?;
}

if gc_intervals > 0 {
println!("{}{:.3}% of samples were spent in garbage collection{}", lede,
(100.0 * gc_intervals as f32) / intervals as f32,
if config.no_gc { ", and were excluded by the --no-gc flag" } else {" "});
}

match config.format.as_ref().unwrap() {
FileFormat::flamegraph => {
println!("{}Wrote flamegraph data to '{}'. Samples: {} Errors: {}", lede, filename, samples, errors);
Expand Down
14 changes: 14 additions & 0 deletions src/python_bindings/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,4 +177,18 @@ pub mod pyruntime {
_ => None
}
}

#[cfg(all(any(target_os="linux", target_os="macos"), target_arch="x86_64"))]
pub fn get_gc_collecting_offset(version: &Version) -> Option<usize> {
match version {
Version{major: 3, minor: 9..=10, ..} => Some(816),
Version{major: 3, minor: 11, ..} => Some(848),
_ => None
}
}

#[cfg(not(all(any(target_os="linux", target_os="macos"), target_arch="x86_64")))]
pub fn get_gc_collecting_offset(version: &Version) -> Option<usize> {
None
}
}
14 changes: 13 additions & 1 deletion src/python_spy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pub struct PythonSpy {
pub version: Version,
pub interpreter_address: usize,
pub threadstate_address: usize,
pub gc_collecting_address: usize,
pub python_filename: std::path::PathBuf,
pub version_string: String,
pub config: Config,
Expand Down Expand Up @@ -120,6 +121,15 @@ impl PythonSpy {
}
};

// Get the address for the PyInterpreterState.gc.collecting value
// (note that we can't get this directly from the PyInterpreterState bindings because
// of the mutex member that occurs before it - which has a different size/definition
// on each OS =(
let gc_collecting_address = match pyruntime::get_gc_collecting_offset(&version) {
Some(offset) => offset + interpreter_address,
None => 0
};

let version_string = format!("python{}.{}", version.major, version.minor);

#[cfg(unwind)]
Expand All @@ -129,7 +139,9 @@ impl PythonSpy {
None
};

Ok(PythonSpy{pid, process, version, interpreter_address, threadstate_address,
Ok(PythonSpy{pid, process, version, interpreter_address,
threadstate_address,
gc_collecting_address,
python_filename: python_info.python_filename,
version_string,
#[cfg(unwind)]
Expand Down
20 changes: 16 additions & 4 deletions src/sampler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::thread;

use anyhow::Error;

use remoteprocess::Pid;
use remoteprocess::{ProcessMemory, Pid};

use crate::timer::Timer;
use crate::python_spy::PythonSpy;
Expand All @@ -23,7 +23,8 @@ pub struct Sampler {
pub struct Sample {
pub traces: Vec<StackTrace>,
pub sampling_errors: Option<Vec<(Pid, Error)>>,
pub late: Option<Duration>
pub late: Option<Duration>,
pub gc_collecting: Option<i32>,
}

impl Sampler {
Expand Down Expand Up @@ -71,7 +72,15 @@ impl Sampler {
};

let late = sleep.err();
if tx.send(Sample{traces: traces, sampling_errors, late}).is_err() {

// TODO: Should we do this inside the process lock?
let gc_collecting = if spy.gc_collecting_address != 0 {
spy.process.copy_struct::<i32>(spy.gc_collecting_address).ok()
} else {
None
};

if tx.send(Sample{traces: traces, sampling_errors, late, gc_collecting}).is_err() {
break;
}
}
Expand Down Expand Up @@ -187,9 +196,12 @@ impl Sampler {
trace.process_info = process.clone();
}

// TODO: get gc stats for subprocesses as well
// (move to get_stack_traces, change to not return a vector etc)

// Send the collected info back
let late = sleep.err();
if tx.send(Sample{traces, sampling_errors, late}).is_err() {
if tx.send(Sample{traces, sampling_errors, late, gc_collecting: None}).is_err() {
break;
}

Expand Down
25 changes: 19 additions & 6 deletions tests/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def _sample_process(self, script_name, options=None, include_profile_name=False)
]
cmdline.extend(options or [])
cmdline.extend(["--", sys.executable, script_name])
subprocess.check_output(cmdline)
output = subprocess.check_output(cmdline)
with open(profile_file.name) as f:
profiles = json.load(f)

Expand All @@ -58,15 +58,15 @@ def _sample_process(self, script_name, options=None, include_profile_name=False)
] += 1
else:
samples[tuple(Frame(**frames[frame]) for frame in sample)] += 1
return samples
return samples, output

def test_longsleep(self):
# running with the gil flag should have ~ no samples returned
profile = self._sample_process(_get_script("longsleep.py"), GIL)
profile, _ = self._sample_process(_get_script("longsleep.py"), GIL)
assert sum(profile.values()) <= 5

# running with the idle flag should have > 95% of samples in the sleep call
profile = self._sample_process(_get_script("longsleep.py"), ["--idle"])
profile, _ = self._sample_process(_get_script("longsleep.py"), ["--idle"])
sample, count = _most_frequent_sample(profile)
assert count >= 95
assert len(sample) == 2
Expand All @@ -77,7 +77,7 @@ def test_longsleep(self):

def test_busyloop(self):
# can't be sure what line we're on, but we should have ~ all samples holding the gil
profile = self._sample_process(_get_script("busyloop.py"), GIL)
profile, _ = self._sample_process(_get_script("busyloop.py"), GIL)
assert sum(profile.values()) >= 95

def test_thread_names(self):
Expand All @@ -87,7 +87,7 @@ def test_thread_names(self):
return

for _ in range(3):
profile = self._sample_process(
profile, _ = self._sample_process(
_get_script("thread_names.py"),
["--threads", "--idle"],
include_profile_name=True,
Expand All @@ -104,6 +104,19 @@ def test_thread_names(self):
assert expected_thread_names == actual_thread_names


def test_gc_collecting(self):
# we don't support getting GC stats on python < 3.9
v = sys.version_info
if v.major < 3 or v.minor < 9:
return

profiles, output = self._sample_process(_get_script("gc.py"))

# TODO: while this will verify that at least one sample was spent in GC,
# its still not a very compelling test
assert(b"samples were spent in garbage collection" in output)


def _get_script(name):
base_dir = os.path.dirname(__file__)
return os.path.join(base_dir, "scripts", name)
Expand Down
21 changes: 21 additions & 0 deletions tests/scripts/gc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import gc


_GRAPH = None

def fill_global_graph():
# create a moderately large self-referential struct
global _GRAPH
_GRAPH = {}
for i in range(100000):
_GRAPH[i] = _GRAPH
_GRAPH = None
gc.collect()


def busy_loop():
while True:
fill_global_graph()

if __name__ == "__main__":
busy_loop()