forked from open-telemetry/opentelemetry-ebpf-profiler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dotnet_tracer.ebpf.c
291 lines (255 loc) · 11.4 KB
/
dotnet_tracer.ebpf.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
// This file contains the code and map definitions for the Dotnet tracer
//
// Core unwinding of frames is simple, as all the generated code uses frame pointers,
// and all the interesting data is directly accessible via FP.
//
// See the host agent interpreter/dotnet/ for more references.
#include "bpfdefs.h"
#include "tracemgmt.h"
#include "types.h"
// The number of dotnet frames to unwind per frame-unwinding eBPF program.
#define DOTNET_FRAMES_PER_PROGRAM 5
// The maximum dotnet frame length used in heuristic to validate FP
#define DOTNET_MAX_FRAME_LENGTH 8192
// Keep in sync with dotnet interpreter code
#define DOTNET_CODE_JIT 0x1f
#define DOTNET_CODE_FLAG_LEAF 0x80
// Map from dotnet process IDs to a structure containing addresses of variables
// we require in order to build the stack trace
bpf_map_def SEC("maps") dotnet_procs = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(pid_t),
.value_size = sizeof(DotnetProcInfo),
.max_entries = 1024,
};
// Nibble map tunables
// https://github.com/dotnet/runtime/blob/v7.0.15/src/coreclr/inc/nibblemapmacros.h
#define DOTNET_CODE_ALIGN 4
#define DOTNET_CODE_NIBBLES_PER_ENTRY 8 // 8nibbles * 4 bits/nibble = 32bit word
#define DOTNET_CODE_BYTES_PER_NIBBLE 32 // one nibble maps to 32 bytes of code
#define DOTNET_CODE_BYTES_PER_ENTRY (DOTNET_CODE_BYTES_PER_NIBBLE*DOTNET_CODE_NIBBLES_PER_ENTRY)
// Find method code header using a dotnet coreclr "NibbleMap"
// Currently this technically could require an unbounded for loop to scan through the nibble map.
// The make things work in the eBPF the number of elements we parse are limited by the scratch
// buffer size. This needs to be in eBPF for the methods which may be Garbage Collected (typically
// short runtime generated IL code). If we start seeing "code too large" errors, we can also do
// this same lookup from the Host Agent because most generated code (especially large pieces) are
// currently not Garbage Collected by the runtime. Though, we have submitted also an enhancement
// request to fix the nibble map format to something sane, and this might get implemented.
// see: https://github.com/dotnet/runtime/issues/93550
static inline __attribute__((__always_inline__))
ErrorCode dotnet_find_code_start(PerCPURecord *record, DotnetProcInfo *vi, u64 pc, u64 *code_start) {
// This is an ebpf optimized implementation of EEJitManager::FindMethodCode()
// https://github.com/dotnet/runtime/blob/v7.0.15/src/coreclr/vm/codeman.cpp#L4115
// The support code setups the page mapping so that:
// text_section_base = pHp->mapBase (base address of the JIT area)
// text_section_id = pHp->pHdrMap (pointer to the nibble map)
const UnwindState *state = &record->state;
DotnetUnwindScratchSpace *scratch = &record->dotnetUnwindScratch;
const int map_elements = sizeof(scratch->map)/sizeof(scratch->map[0])/2;
u64 pc_base = state->text_section_bias;
u64 pc_delta = pc - pc_base;
u64 map_start = state->text_section_id;
DEBUG_PRINT("dotnet: --> find code start for %lx: pc_base %lx, map_start %lx",
(unsigned long) pc_delta, (unsigned long) pc_base, (unsigned long) map_start);
pc_delta &= ~(DOTNET_CODE_ALIGN-1);
// Read the nibble map data
// Calculate read to offset based on map_start so that end of scratch->map corresponds to pc_delta
long offs = (long)map_elements - pc_delta/DOTNET_CODE_BYTES_PER_ENTRY - 1;
if (offs < 0) {
// We can read full scratch buffer, adjust map_start so that last entry read corresponds pc_delta
map_start += pc_delta/DOTNET_CODE_BYTES_PER_ENTRY*sizeof(u32) - sizeof(scratch->map) + sizeof(u32);
offs = 0;
}
offs %= map_elements;
if (bpf_probe_read_user(&scratch->map[offs], sizeof(scratch->map)/2, (void*) map_start)) {
goto bad_code_header;
}
// Determine if the first map entry contains the start region
int pos = map_elements;
u32 val = scratch->map[--pos];
DEBUG_PRINT("dotnet: --> find code start for %lx: first entry %x",
(unsigned long) pc_delta, val);
val >>= 28 - ((pc_delta / DOTNET_CODE_BYTES_PER_NIBBLE) % DOTNET_CODE_NIBBLES_PER_ENTRY) * 4;
if (val != 0) {
// Adjust pc_delta to beginning of the positioned nibble of 'val'
pc_delta &= ~(DOTNET_CODE_BYTES_PER_NIBBLE - 1);
} else {
// Adjust delta to end of previous map entry
pc_delta &= ~(DOTNET_CODE_BYTES_PER_ENTRY - 1);
pc_delta -= DOTNET_CODE_BYTES_PER_NIBBLE;
val = scratch->map[--pos];
DEBUG_PRINT("dotnet: --> find code start for %lx: second entry %x",
(unsigned long) pc_delta, val);
// Find backwards the first non-zero entry as it marks function start
// This is unrolled several times, so it needs to be minimal in size.
// And currently this is the major limit for DOTNET_FRAMES_PER_PROGRAM.
int orig_pos = pos;
#pragma unroll 256
for (int i = 0; i < map_elements - 2; i++) {
if (val != 0) {
break;
}
val = scratch->map[--pos];
}
// Adjust pc_delta based on how many iterations were done
u64 pc_skipped = DOTNET_CODE_BYTES_PER_ENTRY * (orig_pos - pos);
if (pc_delta < pc_skipped) {
DEBUG_PRINT("dotnet: nibble map search went below pc_base");
goto bad_code_header;
}
pc_delta -= pc_skipped;
DEBUG_PRINT("dotnet: --> find code start for %lx: skipped %d, entry %x",
(unsigned long) pc_delta, orig_pos - pos, val);
if (val == 0) {
increment_metric(metricID_UnwindDotnetErrCodeTooLarge);
return ERR_DOTNET_CODE_TOO_LARGE;
}
}
// Decode the code start info from the entry
#pragma unroll
for (int i = 0; i < DOTNET_CODE_NIBBLES_PER_ENTRY; i++) {
u8 nybble = val & 0xf;
if (nybble != 0) {
*code_start = pc_base + pc_delta + (nybble - 1) * DOTNET_CODE_ALIGN;
DEBUG_PRINT("dotnet: --> pc_delta = %lx, val=%x, ret=%lx",
(unsigned long) pc_delta, nybble, (unsigned long) *code_start);
return ERR_OK;
}
val >>= 4;
pc_delta -= DOTNET_CODE_BYTES_PER_NIBBLE;
}
bad_code_header:
DEBUG_PRINT("dotnet: not found");
increment_metric(metricID_UnwindDotnetErrCodeHeader);
return ERR_DOTNET_CODE_HEADER;
}
// Record a Dotnet frame
static inline __attribute__((__always_inline__))
ErrorCode push_dotnet(Trace *trace, u64 code_header_ptr, u64 pc_offset, bool return_address) {
return _push_with_return_address(trace, code_header_ptr, pc_offset, FRAME_MARKER_DOTNET, return_address);
}
// Unwind one dotnet frame
static inline __attribute__((__always_inline__))
ErrorCode unwind_one_dotnet_frame(PerCPURecord *record, DotnetProcInfo *vi, bool top) {
UnwindState *state = &record->state;
Trace *trace = &record->trace;
u64 regs[2], sp = state->sp, fp = state->fp, pc = state->pc;
bool return_address = state->return_address;
// All dotnet frames have frame pointer. Check that the FP looks valid.
DEBUG_PRINT("dotnet: pc: %lx, sp: %lx, fp: %lx",
(unsigned long) pc, (unsigned long) sp, (unsigned long) fp);
if (fp < sp || fp >= sp + DOTNET_MAX_FRAME_LENGTH) {
DEBUG_PRINT("dotnet: frame pointer too far off %lx / %lx",
(unsigned long) fp, (unsigned long) sp);
increment_metric(metricID_UnwindDotnetErrBadFP);
return ERR_DOTNET_BAD_FP;
}
// Default to R2R/stub code_start.
u64 type = state->text_section_id;
u64 code_start = state->text_section_bias;
u64 code_header_ptr = pc;
state->return_address = true;
if (type < 0x100 && (type & DOTNET_CODE_FLAG_LEAF)) {
// Stub frame that does not do calls.
// For arm this is unwind with LR, and for x86-64 unwind with RA only.
if (bpf_probe_read_user(&state->pc, sizeof(state->pc), (void*)state->sp)) {
DEBUG_PRINT("dotnet: --> bad stack pointer");
increment_metric(metricID_UnwindDotnetErrBadFP);
return ERR_DOTNET_BAD_FP;
}
state->sp += 8;
type &= 0x7f;
goto push_frame;
}
// Unwind with frame pointer. On Linux the frame pointers are always on.
// https://github.com/dotnet/runtime/blob/main/docs/design/coreclr/botr/clr-abi.md#system-v-x86_64-support
// FIXME: Early prologue and epilogue may skip a frame. Seems prologue is fixed, consider
// using heuristic to handle prologue when the new frame is not yet pushed to stack.
if (bpf_probe_read_user(regs, sizeof(regs), (void*)fp)) {
DEBUG_PRINT("dotnet: --> bad frame pointer");
increment_metric(metricID_UnwindDotnetErrBadFP);
return ERR_DOTNET_BAD_FP;
}
state->sp = fp + sizeof(regs);
state->fp = regs[0];
state->pc = regs[1];
DEBUG_PRINT("dotnet: pc: %lx, sp: %lx, fp: %lx",
(unsigned long) state->pc, (unsigned long) state->sp,
(unsigned long) state->fp);
if (type < 0x100) {
// Not a JIT frame. A R2R frame at this point.
type &= 0x7f;
goto push_frame;
}
// JIT generated code, locate code start
ErrorCode error = dotnet_find_code_start(record, vi, pc, &code_start);
if (error != ERR_OK) {
DEBUG_PRINT("dotnet: --> code_start failed with %d", error);
// dotnet_find_code_start incremented the metric already
if (error != ERR_DOTNET_CODE_TOO_LARGE) {
return error;
}
return _push(trace, 0, ERR_DOTNET_CODE_TOO_LARGE, FRAME_MARKER_DOTNET|FRAME_MARKER_ERROR_BIT);
}
// code_start points to beginning of the JIT generated code. This is preceded by a CodeHeader
// structure. The platforms we care define USE_INDIRECT_CODEHEADER, so the data is defined at:
// https://github.com/dotnet/runtime/blob/v7.0.15/src/coreclr/vm/codeman.h#L246-L248
// This just reads the single pointer to the RealCodeHeader.
if (bpf_probe_read_user(&code_header_ptr, sizeof(code_header_ptr), (void*)code_start-sizeof(u64))) {
DEBUG_PRINT("dotnet: --> bad code header");
increment_metric(metricID_UnwindDotnetErrCodeHeader);
return ERR_DOTNET_CODE_HEADER;
}
type = DOTNET_CODE_JIT;
push_frame:
DEBUG_PRINT("dotnet: --> code_start = %lx, code_header = %lx, pc_offset = %lx",
(unsigned long) code_start, (unsigned long) code_header_ptr, (unsigned long)(pc - code_start));
error = push_dotnet(trace, (code_header_ptr << 5) + type, pc - code_start, return_address);
if (error) {
return error;
}
increment_metric(metricID_UnwindDotnetFrames);
return ERR_OK;
}
// unwind_dotnet is the entry point for tracing when invoked from the native tracer
// or interpreter dispatcher. It does not reset the trace object and will append the
// dotnet stack frames to the trace object for the current CPU.
SEC("perf_event/unwind_dotnet")
int unwind_dotnet(struct pt_regs *ctx) {
PerCPURecord *record = get_per_cpu_record();
if (!record) {
return -1;
}
Trace *trace = &record->trace;
u32 pid = trace->pid;
DEBUG_PRINT("==== unwind_dotnet %d ====", trace->stack_len);
int unwinder = PROG_UNWIND_STOP;
ErrorCode error = ERR_OK;
DotnetProcInfo *vi = bpf_map_lookup_elem(&dotnet_procs, &pid);
if (!vi) {
DEBUG_PRINT("dotnet: no DotnetProcInfo for this pid");
error = ERR_DOTNET_NO_PROC_INFO;
increment_metric(metricID_UnwindDotnetErrNoProcInfo);
goto exit;
}
record->ratelimitAction = RATELIMIT_ACTION_FAST;
increment_metric(metricID_UnwindDotnetAttempts);
#pragma unroll
for (int i = 0; i < DOTNET_FRAMES_PER_PROGRAM; i++) {
unwinder = PROG_UNWIND_STOP;
error = unwind_one_dotnet_frame(record, vi, i == 0);
if (error) {
break;
}
error = get_next_unwinder_after_native_frame(record, &unwinder);
if (error || unwinder != PROG_UNWIND_DOTNET) {
break;
}
}
exit:
record->state.unwind_error = error;
tail_call(ctx, unwinder);
DEBUG_PRINT("dotnet: tail call for next frame unwinder (%d) failed", unwinder);
return -1;
}