diff --git a/aya/src/programs/perf_event.rs b/aya/src/programs/perf_event.rs index b7b233cf4..4dd22d32c 100644 --- a/aya/src/programs/perf_event.rs +++ b/aya/src/programs/perf_event.rs @@ -1,6 +1,6 @@ //! Perf event programs. -use std::os::fd::AsFd as _; +use std::os::fd::{AsFd as _, OwnedFd}; pub use crate::generated::{ perf_hw_cache_id, perf_hw_cache_op_id, perf_hw_cache_op_result_id, perf_hw_id, perf_sw_ids, @@ -20,7 +20,9 @@ use crate::{ perf_attach::{PerfLinkIdInner, PerfLinkInner}, FdLink, LinkError, ProgramData, ProgramError, }, - sys::{bpf_link_get_info_by_fd, perf_event_open, SyscallError}, + sys::{ + self, bpf_link_get_info_by_fd, SyscallError, + }, }; /// The type of perf event @@ -50,6 +52,20 @@ pub enum SamplePolicy { Frequency(u64), } +/// Fields included in the event samples +#[derive(Debug, Clone)] +pub struct SampleType(u64); + +/// "Wake up" overflow notification policy. +/// Overflows are generated only by sampling events. +#[derive(Debug, Clone)] +pub enum WakeUpPolicy { + /// Wake up after n events + WakeupEvents(u32), + /// Wake up after n bytes + WakeupWatermark(u32), +} + /// The scope of a PerfEvent #[derive(Debug, Clone)] #[allow(clippy::enum_variant_names)] @@ -147,33 +163,11 @@ impl PerfEvent { ) -> Result { let prog_fd = self.fd()?; let prog_fd = prog_fd.as_fd(); - let (sample_period, sample_frequency) = match sample_policy { - SamplePolicy::Period(period) => (period, None), - SamplePolicy::Frequency(frequency) => (0, Some(frequency)), - }; - let (pid, cpu) = match scope { - PerfEventScope::CallingProcessAnyCpu => (0, -1), - PerfEventScope::CallingProcessOneCpu { cpu } => (0, cpu as i32), - PerfEventScope::OneProcessAnyCpu { pid } => (pid as i32, -1), - PerfEventScope::OneProcessOneCpu { cpu, pid } => (pid as i32, cpu as i32), - PerfEventScope::AllProcessesOneCpu { cpu } => (-1, cpu as i32), - }; - let fd = perf_event_open( - perf_type as u32, - config, - pid, - cpu, - sample_period, - sample_frequency, - false, - 0, - ) - .map_err(|(_code, io_error)| SyscallError { - call: "perf_event_open", - io_error, - })?; - - let link = perf_attach(prog_fd, fd)?; + + let sampling = Some((sample_policy, SampleType(PERF_TYPE_RAW as u64))); + let event_fd = perf_event_open(perf_type as u32, config, scope, sampling, None, 0)?; + + let link = perf_attach(prog_fd, event_fd)?; self.data.links.insert(PerfEventLink::new(link)) } @@ -225,3 +219,63 @@ define_link_wrapper!( PerfLinkInner, PerfLinkIdInner ); + +/// Perform a call to `perf_event_open` and returns the event's file descriptor. +/// +/// # Arguments +/// +/// * `perf_type` - the type of event, see [`crate::generated::perf_type_id`] for a list of types. Note that this list is non-exhaustive, because PMUs (Performance Monitoring Units) can be added to the system. Their ids can be read from the sysfs (see the kernel documentation on perf_event_open). +/// * `config` - the event that we want to open +/// * `scope` - which process and cpu to monitor (logical cpu, not physical socket) +/// * `sampling` - if not None, enables the sampling mode with the given parameters +/// * `wakeup` - if not None, sets up the wake-up for the overflow notifications +/// * `flags` - various flags combined with a binary OR (for ex. `FLAG_A | FLAG_B`), zero means no flag +pub fn perf_event_open( + perf_type: u32, + config: u64, + scope: PerfEventScope, + sampling: Option<(SamplePolicy, SampleType)>, + wakeup: Option, + flags: u32, +) -> Result { + let mut attr = sys::init_perf_event_attr(); + + // Fill in the attributes + attr.type_ = perf_type; + attr.config = config; + match sampling { + Some((SamplePolicy::Frequency(f), SampleType(t))) => { + attr.set_freq(1); + attr.__bindgen_anon_1.sample_freq = f; + attr.sample_type = t; + } + Some((SamplePolicy::Period(p), SampleType(t))) => { + attr.__bindgen_anon_1.sample_period = p; + attr.sample_type = t; + } + None => (), + }; + match wakeup { + Some(WakeUpPolicy::WakeupEvents(n)) => { + attr.__bindgen_anon_2.wakeup_events = n; + } + Some(WakeUpPolicy::WakeupWatermark(n)) => { + attr.set_watermark(1); + attr.__bindgen_anon_2.wakeup_watermark = n; + } + None => (), + }; + + let (pid, cpu) = match scope { + PerfEventScope::CallingProcessAnyCpu => (0, -1), + PerfEventScope::CallingProcessOneCpu { cpu } => (0, cpu as i32), + PerfEventScope::OneProcessAnyCpu { pid } => (pid as i32, -1), + PerfEventScope::OneProcessOneCpu { cpu, pid } => (pid as i32, cpu as i32), + PerfEventScope::AllProcessesOneCpu { cpu } => (-1, cpu as i32), + }; + + sys::perf_event_sys(attr, pid, cpu, flags).map_err(|(_, io_error)| SyscallError { + call: "perf_event_open", + io_error, + }) +} diff --git a/aya/src/sys/perf_event.rs b/aya/src/sys/perf_event.rs index b06f4fba4..effc59173 100644 --- a/aya/src/sys/perf_event.rs +++ b/aya/src/sys/perf_event.rs @@ -15,8 +15,14 @@ use crate::generated::{ PERF_FLAG_FD_CLOEXEC, }; +pub(crate) fn init_perf_event_attr() -> perf_event_attr { + let mut attr = unsafe { mem::zeroed::() }; + attr.size = mem::size_of::() as u32; + attr +} + #[allow(clippy::too_many_arguments)] -pub(crate) fn perf_event_open( +pub(crate) fn perf_event_open_sampled( perf_type: u32, config: u64, pid: pid_t, @@ -26,10 +32,8 @@ pub(crate) fn perf_event_open( wakeup: bool, flags: u32, ) -> SysResult { - let mut attr = unsafe { mem::zeroed::() }; - + let mut attr = init_perf_event_attr(); attr.config = config; - attr.size = mem::size_of::() as u32; attr.type_ = perf_type; attr.sample_type = PERF_SAMPLE_RAW as u64; // attr.inherits = if pid > 0 { 1 } else { 0 }; @@ -46,7 +50,7 @@ pub(crate) fn perf_event_open( } pub(crate) fn perf_event_open_bpf(cpu: c_int) -> SysResult { - perf_event_open( + perf_event_open_sampled( PERF_TYPE_SOFTWARE as u32, PERF_COUNT_SW_BPF_OUTPUT as u64, -1, @@ -67,7 +71,7 @@ pub(crate) fn perf_event_open_probe( ) -> SysResult { use std::os::unix::ffi::OsStrExt as _; - let mut attr = unsafe { mem::zeroed::() }; + let mut attr = init_perf_event_attr(); if let Some(ret_bit) = ret_bit { attr.config = 1 << ret_bit; @@ -75,7 +79,6 @@ pub(crate) fn perf_event_open_probe( let c_name = CString::new(name.as_bytes()).unwrap(); - attr.size = mem::size_of::() as u32; attr.type_ = ty; attr.__bindgen_anon_3.config1 = c_name.as_ptr() as u64; attr.__bindgen_anon_4.config2 = offset; @@ -87,9 +90,7 @@ pub(crate) fn perf_event_open_probe( } pub(crate) fn perf_event_open_trace_point(id: u32, pid: Option) -> SysResult { - let mut attr = unsafe { mem::zeroed::() }; - - attr.size = mem::size_of::() as u32; + let mut attr = init_perf_event_attr(); attr.type_ = PERF_TYPE_TRACEPOINT as u32; attr.config = id as u64; @@ -112,7 +113,7 @@ pub(crate) fn perf_event_ioctl( return crate::sys::TEST_SYSCALL.with(|test_impl| unsafe { test_impl.borrow()(call) }); } -fn perf_event_sys(attr: perf_event_attr, pid: pid_t, cpu: i32, flags: u32) -> SysResult { +pub(crate) fn perf_event_sys(attr: perf_event_attr, pid: pid_t, cpu: i32, flags: u32) -> SysResult { let fd = syscall(Syscall::PerfEventOpen { attr, pid, diff --git a/bpf/aya-bpf/src/maps/perf/perf_event_array.rs b/bpf/aya-bpf/src/maps/perf/perf_event_array.rs index 234943793..315333cd7 100644 --- a/bpf/aya-bpf/src/maps/perf/perf_event_array.rs +++ b/bpf/aya-bpf/src/maps/perf/perf_event_array.rs @@ -18,7 +18,7 @@ use crate::{ /// # Minimum kernel version /// /// The minimum kernel version required to read perf_event values using [PerfEventArray] is 4.15. -/// This concerns the functions [`read_current_cpu()`], [`read_at_index()`] and [`read()`]. +/// This concerns the functions [`PerfEventArray::read_current_cpu()`] and [`PerfEventArray::read_at_index()`]. /// #[repr(transparent)] pub struct PerfEventArray { @@ -68,7 +68,7 @@ impl PerfEventArray { } pub fn output_at_index(&self, ctx: &C, data: &T, index: u32) -> Result<(), i64> { - self.output(ctx, data, (index as u64) & BPF_F_INDEX_MASK) + self.output(ctx, data, u64::from(index) & BPF_F_INDEX_MASK) } fn output(&self, ctx: &C, data: &T, flags: u64) -> Result<(), i64> { diff --git a/test/integration-ebpf/Cargo.toml b/test/integration-ebpf/Cargo.toml index d471acf31..9faf21dab 100644 --- a/test/integration-ebpf/Cargo.toml +++ b/test/integration-ebpf/Cargo.toml @@ -55,3 +55,7 @@ path = "src/xdp_sec.rs" [[bin]] name = "ring_buf" path = "src/ring_buf.rs" + +[[bin]] +name = "perf_events" +path = "src/perf_events.rs" diff --git a/test/integration-ebpf/src/perf_events.rs b/test/integration-ebpf/src/perf_events.rs new file mode 100644 index 000000000..07264c9e6 --- /dev/null +++ b/test/integration-ebpf/src/perf_events.rs @@ -0,0 +1,56 @@ +#![no_std] +#![no_main] + +use aya_bpf::{ + bindings::bpf_perf_event_value, + helpers::bpf_get_smp_processor_id, + macros::{map, perf_event}, + maps::PerfEventArray, + programs::PerfEventContext, +}; + +#[repr(C)] +struct EventData { + value: u64, + cpu_id: u32, + tag: u8, +} + +/// Input map: file descriptors of the perf events, obtained by calling +/// `perf_event_open` in user space. +#[map] +static mut DESCRIPTORS: PerfEventArray = PerfEventArray::with_max_entries(1, 0); + +#[map] +static mut OUTPUT: PerfEventArray = PerfEventArray::with_max_entries(1, 0); + +#[perf_event] +pub fn on_perf_event(ctx: PerfEventContext) -> i64 { + match read_event().map(|res| write_output(&ctx, res)) { + Ok(_) => 0, + Err(e) => e, + } +} + +fn read_event() -> Result { + // read the event value using the file descriptor in the DESCRIPTORS array + let event: bpf_perf_event_value = unsafe { DESCRIPTORS.read_current_cpu() }?; + + let cpu_id = unsafe { bpf_get_smp_processor_id() }; + let res = EventData { + value: event.counter, + cpu_id, + tag: 0xAB, + }; + Ok(res) +} + +fn write_output(ctx: &PerfEventContext, output: EventData) -> Result<(), i64> { + unsafe { OUTPUT.output_current_cpu(ctx, &output) } +} + +#[cfg(not(test))] +#[panic_handler] +fn panic(_info: &core::panic::PanicInfo) -> ! { + loop {} +} diff --git a/test/integration-test/src/lib.rs b/test/integration-test/src/lib.rs index d47080336..c01b3ab99 100644 --- a/test/integration-test/src/lib.rs +++ b/test/integration-test/src/lib.rs @@ -22,6 +22,7 @@ pub const BPF_PROBE_READ: &[u8] = pub const REDIRECT: &[u8] = include_bytes_aligned!(concat!(env!("OUT_DIR"), "/redirect")); pub const XDP_SEC: &[u8] = include_bytes_aligned!(concat!(env!("OUT_DIR"), "/xdp_sec")); pub const RING_BUF: &[u8] = include_bytes_aligned!(concat!(env!("OUT_DIR"), "/ring_buf")); +pub const PERF_EVENTS: &[u8] = include_bytes_aligned!(concat!(env!("OUT_DIR"), "/perf_events")); #[cfg(test)] mod tests; diff --git a/test/integration-test/src/tests.rs b/test/integration-test/src/tests.rs index f37d54bbe..e25125c4c 100644 --- a/test/integration-test/src/tests.rs +++ b/test/integration-test/src/tests.rs @@ -3,6 +3,7 @@ mod btf_relocations; mod elf; mod load; mod log; +mod perf_events; mod rbpf; mod relocations; mod ring_buf; diff --git a/test/integration-test/src/tests/perf_events.rs b/test/integration-test/src/tests/perf_events.rs new file mode 100644 index 000000000..440c29aa2 --- /dev/null +++ b/test/integration-test/src/tests/perf_events.rs @@ -0,0 +1,82 @@ +use std::os::fd::OwnedFd; + +use aya::maps::PerfEventArray; +use aya::programs::perf_event::{perf_event_open, PerfEventScope}; +use aya::programs::{PerfTypeId, SamplePolicy}; +use aya_obj::generated::perf_sw_ids; +use test_log::test; + +#[derive(Debug)] +#[repr(C)] +struct EventData { + value: u64, + cpu_id: u32, + tag: u8, +} + +#[test] +fn perf_event_read_from_kernel() { + // load bpf program + let mut bpf = Bpf::load(PERF_EVENTS); + let mut descriptors = PerfEventArray::::try_from(bpf.map_mut("DESCRIPTORS").unwrap()).unwrap(); + let mut bpf_output = PerfEventArray::::try_from(bpf.map_mut("OUTPUT").unwrap()).unwrap(); + + // open a perf_event + // Beware: this returns an `OwnedFd`, which means that the file descriptor is closed at the end of the scope + let event_fd: OwnedFd = perf_event_open( + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_INSTRUCTIONS, + PerfEventScope::CallingProcessAnyCpu, + None, + None, + 0, + ) + .unwrap(); + + // pass pointer to bpf array + descriptors.set(0, event_fd.into()); + + // load program + let program: &mut PerfEvent = bpf.program_mut("on_perf_event").unwrap().try_into().unwrap(); + program.load().expect("failed to load the bpf program"); + + // get buffer to poll the events + const BUF_PAGE_COUNT: u32 = 1; + let buf = bpf_output + .open(0, Some(buf_page_count)) + .expect("failed to open output buffer to poll events"); + + // attach program + const CPU_ID: u32 = 0; + program.attach( + PerfTypeId::Software, + perf_sw_ids::PERF_COUNT_SW_CPU_CLOCK as u64, + PerfEventScope::AllProcessesOneCpu { cpu: CPU_ID }, + SamplePolicy::Frequency(1), + ); + + // sleep a little bit, then poll the values from the buffer + std::thread::sleep(Duration::from_secs(2)); + assert!( + buf.readable(), + "the buffer should have been filled by the bpf program" + ); + + // read the events and check that the returned data is correct + let mut events_data = [BytesMut, BUF_PAGE_COUNT] = std::array::from_fn(|_| BytesMut::new()); + let event_stats = buf + .read_events(&mut events_data) + .expect("failed to poll events"); + + for data_buf in events_data.iter_mut().take(events_stats.read) { + // You must ensure that the definition of the struct (here `EventData`) is the same + // in the userspace and in the bpf program. + let ptr = data_buf.as_ptr() as *const EventData; + let data: EventData = unsafe { ptr.read_unaligned() }; + + assert_eq!(data.cpu_id, CPU_ID, "unexpected data: {}", data); + assert_eq!(data.tag, 0xAB, "unexpected data: {}", data); + } + + Ok(()) +}