Skip to content

Commit ff203f7

Browse files
authored
Merge pull request #1 from pythonspeed/raw-events
Raw events
2 parents d5f802e + f2431bb commit ff203f7

File tree

6 files changed

+55
-2
lines changed

6 files changed

+55
-2
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "py-perf-event"
3-
version = "0.1.0"
3+
version = "0.2.0"
44
edition = "2021"
55

66
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@ Until more docs are available, you can use the underlying Rust libraries docs to
2525

2626
## Changelog
2727

28+
* **0.2:** Exposed `Raw` events.
2829
* **0.1:** Initial, very minimal release.

requirements-dev.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
pytest
2+
numba
3+
numpy

src/lib.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,19 @@ pub struct Hardware(events::Hardware);
6868

6969
expose_consts!(Hardware, CPU_CYCLES, INSTRUCTIONS, CACHE_REFERENCES, CACHE_MISSES, BRANCH_INSTRUCTIONS, BRANCH_MISSES, BUS_CYCLES, STALLED_CYCLES_FRONTEND, STALLED_CYCLES_BACKEND, REF_CPU_CYCLES);
7070

71+
/// A raw, model-specific CPU counter.
72+
#[derive(Clone, Copy)]
73+
#[pyclass]
74+
pub struct Raw(events::Raw);
75+
76+
#[pymethods]
77+
impl Raw {
78+
#[new]
79+
fn new(config: u64) -> Self {
80+
Raw(events::Raw::new(config))
81+
}
82+
}
83+
7184
/// Start gathering counter information, given a list of Hardware or Cache
7285
/// instances.
7386
#[pyclass]
@@ -87,6 +100,10 @@ impl Measure {
87100
counters.push(group.add(&perf_event::Builder::new(hw.0))?);
88101
continue;
89102
}
103+
if let Ok(raw) = event.extract::<Raw>() {
104+
counters.push(group.add(&perf_event::Builder::new(raw.0))?);
105+
continue;
106+
}
90107
let cache: Cache = event.extract()?;
91108
let cache: events::Cache = cache.into();
92109
counters.push(group.add(&perf_event::Builder::new(cache))?);
@@ -136,6 +153,7 @@ fn py_perf_event(_py: Python, m: &PyModule) -> PyResult<()> {
136153
m.add_class::<CacheResult>()?;
137154
m.add_class::<Cache>()?;
138155
m.add_class::<Hardware>()?;
156+
m.add_class::<Raw>()?;
139157
m.add_class::<Measure>()?;
140158
m.add_function(wrap_pyfunction!(measure, m)?)?;
141159
Ok(())

test_perf_event.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
import numpy as np
2+
from numba import njit
13
from py_perf_event import (
24
Measure,
35
Hardware,
46
Cache,
57
CacheId,
68
CacheOp,
79
CacheResult,
10+
Raw,
811
measure,
912
)
1013

@@ -57,3 +60,32 @@ def traverse(l):
5760
assert small_misses <= small_reads
5861
assert large_reads > 1000 * small_reads
5962
assert (large_misses / large_reads) > 0.2
63+
64+
65+
def test_raw():
66+
"""
67+
``Raw()`` events get measured.
68+
69+
TODO: This test is model-specific, only tested on i7-12700K.
70+
"""
71+
# SIMD on float64:
72+
simd_f64 = [Raw(0x4c7), Raw(0x10c7)]
73+
74+
f64_data = np.ones((1_000_000,), dtype=np.float64)
75+
f32_data = np.ones((1_000_000,), dtype=np.float32)
76+
77+
@njit
78+
def double(arr):
79+
result = np.empty(arr.shape, dtype=arr.dtype)
80+
# Should auto-vectorize to SIMD;
81+
for i in range(len(arr)):
82+
result[i] = 2 * arr[i]
83+
return result
84+
85+
double(f64_data)
86+
double(f32_data)
87+
88+
with_f64 = sum(measure(simd_f64, double, f64_data))
89+
assert with_f64 > (1_000_000 / 8) * 0.5
90+
with_f32 = sum(measure(simd_f64, double, f32_data))
91+
assert with_f32 < 100

0 commit comments

Comments
 (0)