KallistiOS git master
Independent SDK for the Sega Dreamcast
Loading...
Searching...
No Matches
perfctr.h
Go to the documentation of this file.
1/* KallistiOS ##version##
2
3 arch/dreamcast/include/dc/perfctr.h
4 Copyright (C) 2023 Andy Barajas
5 Copyright (C) 2023 Falco Girgis
6
7*/
8
9/** \file dc/perfctr.h
10 \brief Low-level performance counter API
11 \ingroup perf_counters
12
13 This file contains the low-level driver for interacting with and
14 utilizing the SH4's two Performance Counters, which are primarily
15 used for profiling and performance tuning.
16
17 \author MoopTheHedgehog
18 \author Andy Barajas
19 \author Falco Girgis
20*/
21
22#ifndef __DC_PERFCTR_H
23#define __DC_PERFCTR_H
24
25#include <stdint.h>
26#include <stdbool.h>
27
28#include <sys/cdefs.h>
29__BEGIN_DECLS
30
31/** \defgroup perf_counters Performance Counters
32 \brief SH4 CPU Performance Counter Driver
33 \ingroup debugging
34
35 The performance counter API exposes the SH4's hardware profiling registers,
36 which consist of two different sets of independently operable 48-bit
37 counters.
38
39 @{
40*/
41
42/** \brief Identifiers for the two SH4 performance counters */
43typedef enum perf_cntr {
44 /** \brief SH4 Performance Counter 0
45
46 The first performance counter ID.
47
48 This counter is used by KOS by default to implement the \ref
49 perf_counters_timer API. Reference it for details on how to
50 reconfigure it if necessary.
51 */
53
54 /** \brief SH4 Performance Counter 1
55
56 The second performance counter ID.
57
58 This counter is not used anywhere internally by KOS.
59 */
60 PRFC1
62
63/** \brief Count clock types for the SH4 performance counters */
64typedef enum perf_cntr_clock {
65 /** \brief CPU Cycles
66
67 Count CPU cycles. At 5 ns increments (for 200Mhz CPU clock), a 48-bit
68 cycle counter can run continuously for 16.33 days.
69 */
71
72 /** \brief Ratio Cycles
73
74 Count CPU/bus ratio mode cycles (where `T = C x B / 24` and `T` is
75 time, `C` is count, and `B` is time of one bus cycle).
76
77 `B` has been found to be approximately `1/99753008`, but actual time
78 varies slightly. The target frequency is probably 99.75MHz.
79 */
82
83
84/** \brief Performance Counter Event Modes
85
86 This is the list of modes that are allowed to be passed into the perf_cntr_start()
87 function, representing different events you want to count.
88*/
89/* MODE DEFINITION VALUE MEASURMENT TYPE & NOTES */
90typedef enum perf_cntr_event {
91 PMCR_INIT_NO_MODE = 0x00, /**< \brief None; Just here to be complete */
92 PMCR_OPERAND_READ_ACCESS_MODE = 0x01, /**< \brief Quantity; With cache */
93 PMCR_OPERAND_WRITE_ACCESS_MODE = 0x02, /**< \brief Quantity; With cache */
94 PMCR_UTLB_MISS_MODE = 0x03, /**< \brief Quantity */
95 PMCR_OPERAND_CACHE_READ_MISS_MODE = 0x04, /**< \brief Quantity */
96 PMCR_OPERAND_CACHE_WRITE_MISS_MODE = 0x05, /**< \brief Quantity */
97 PMCR_INSTRUCTION_FETCH_MODE = 0x06, /**< \brief Quantity; With cache */
98 PMCR_INSTRUCTION_TLB_MISS_MODE = 0x07, /**< \brief Quantity */
99 PMCR_INSTRUCTION_CACHE_MISS_MODE = 0x08, /**< \brief Quantity */
100 PMCR_ALL_OPERAND_ACCESS_MODE = 0x09, /**< \brief Quantity */
101 PMCR_ALL_INSTRUCTION_FETCH_MODE = 0x0a, /**< \brief Quantity */
102 PMCR_ON_CHIP_RAM_OPERAND_ACCESS_MODE = 0x0b, /**< \brief Quantity */
103 /* No 0x0c */
104 PMCR_ON_CHIP_IO_ACCESS_MODE = 0x0d, /**< \brief Quantity */
105 PMCR_OPERAND_ACCESS_MODE = 0x0e, /**< \brief Quantity; With cache, counts both reads and writes */
106 PMCR_OPERAND_CACHE_MISS_MODE = 0x0f, /**< \brief Quantity */
107 PMCR_BRANCH_ISSUED_MODE = 0x10, /**< \brief Quantity; Not the same as branch taken! */
108 PMCR_BRANCH_TAKEN_MODE = 0x11, /**< \brief Quantity */
109 PMCR_SUBROUTINE_ISSUED_MODE = 0x12, /**< \brief Quantity; Issued a BSR, BSRF, JSR, JSR/N */
110 PMCR_INSTRUCTION_ISSUED_MODE = 0x13, /**< \brief Quantity */
111 PMCR_PARALLEL_INSTRUCTION_ISSUED_MODE = 0x14, /**< \brief Quantity */
112 PMCR_FPU_INSTRUCTION_ISSUED_MODE = 0x15, /**< \brief Quantity */
113 PMCR_INTERRUPT_COUNTER_MODE = 0x16, /**< \brief Quantity */
114 PMCR_NMI_COUNTER_MODE = 0x17, /**< \brief Quantity */
115 PMCR_TRAPA_INSTRUCTION_COUNTER_MODE = 0x18, /**< \brief Quantity */
116 PMCR_UBC_A_MATCH_MODE = 0x19, /**< \brief Quantity */
117 PMCR_UBC_B_MATCH_MODE = 0x1a, /**< \brief Quantity */
118 /* No 0x1b-0x20 */
119 PMCR_INSTRUCTION_CACHE_FILL_MODE = 0x21, /**< \brief Cycles */
120 PMCR_OPERAND_CACHE_FILL_MODE = 0x22, /**< \brief Cycles */
121 /** \brief Cycles
122 For 200MHz CPU: 5ns per count in 1 cycle = 1 count mode.
123 Around 417.715ps per count (increments by 12) in CPU/bus ratio mode
124 */
126 PMCR_PIPELINE_FREEZE_BY_ICACHE_MISS_MODE = 0x24, /**< \brief Cycles */
127 PMCR_PIPELINE_FREEZE_BY_DCACHE_MISS_MODE = 0x25, /**< \brief Cycles */
128 /* No 0x26 */
129 PMCR_PIPELINE_FREEZE_BY_BRANCH_MODE = 0x27, /**< \brief Cycles */
130 PMCR_PIPELINE_FREEZE_BY_CPU_REGISTER_MODE = 0x28, /**< \brief Cycles */
131 PMCR_PIPELINE_FREEZE_BY_FPU_MODE = 0x29 /**< \brief Cycles */
133
134/** \brief Get a performance counter's settings.
135
136 This function returns a performance counter's settings.
137
138 \param counter The performance counter (i.e, \ref PRFC0 or PRFC1).
139 \param event_mode Pointer to fill in with the counter's event mode
140 \param clock_type Pointer to fill in with the counter's clock type
141
142 \retval true The performance counter is running
143 \retval false the performance counter is stopped
144*/
146 perf_cntr_clock_t *clock_type);
147
148/** \brief Start a performance counter.
149
150 This function starts a performance counter.
151
152 \param counter The counter to start (i.e, \ref PRFC0 or PRFC1).
153 \param event_mode Use one of the 33 event modes (pef_cntr_event_t).
154 \param clock_type PMCR_COUNT_CPU_CYCLES or PMCR_COUNT_RATIO_CYCLES.
155
156 \sa perf_cntr_stop(), perf_cntr_resume()
157*/
159 perf_cntr_clock_t clock_type);
160
161/** \brief Stop a performance counter.
162
163 This function stops a performance counter that was started with perf_cntr_start().
164 Stopping a counter retains its count. To clear the count use perf_cntr_clear().
165
166 \param counter The counter to stop (i.e, \ref PRFC0 or PRFC1).
167
168 \sa perf_cntr_clear()
169*/
171
172/** \brief Resume a performance counter.
173
174 This function resumes a stopped performance counter.
175
176 \param counter The counter to resume (i.e. \ref PRFC0 or PRFC1).
177
178 \sa perf_cntr_stop()
179*/
181
182/** \brief Clear a performance counter.
183
184 This function clears a performance counter. It resets its count to zero.
185 This function stops the counter before clearing it because you cant clear
186 a running counter.
187
188 \param counter The counter to clear (i.e, \ref PRFC0 or PRFC1).
189*/
191
192/** \brief Obtain the count of a performance counter.
193
194 This function simply returns the count of the counter.
195
196 \param counter The counter to read (i.e, \ref PRFC0 or PRFC1).
197
198 \return The counter's count.
199*/
200uint64_t perf_cntr_count(perf_cntr_t counter);
201
202/** \defgroup perf_counters_timer Timer
203 \brief High-resolution performance counter-based timer API
204
205 This API allows for using the performance counters as high-resolution
206 general-purpose interval timer with 5ns ticks. It does this by configuring
207 \ref PRFC0 in \ref PMCR_ELAPSED_TIME_MODE.
208
209 \note
210 This is enabled by default. To use \ref PRFC0 for something else, either
211 use perf_cntr_timer_disable() or perf_cntr_start() to reconfigure it for
212 something else. When disabled, the timer will simply fall through to use
213 timer_ns_gettime64() from the timer driver, decreasing the resolution of
214 each tick to 80ns.
215
216 \warning
217 The performance counter timer is only counting \a active CPU cycles. This
218 means that when KOS's thread scheduler uses the "sleep" instruction,
219 putting the CPU to sleep, these counters cease to record elapsed time.
220 Because of this, they should only be used to measure small deltas that
221 are not across frames, when you want real wall time rather than active
222 CPU time.
223
224 \sa timers
225
226 @{
227*/
228
229/** \brief Enable the nanosecond timer.
230
231 This function enables the performance counter used for the perf_cntr_timer_ns()
232 function.
233
234 \note
235 This is on by default. The function uses \ref PRFC0 to do the work.
236
237 \warning
238 The performance counters are only counting \a active CPU cycles while in
239 this mode. This is analogous to providing you with the CPU time of your
240 application, not the actual wall-time or monotonic clock, as it ceases
241 to count when the kernel puts the CPU to sleep.
242*/
244
245/** \brief Disable the nanosecond timer.
246
247 This function disables the performance counter used for the
248 perf_cntr_timer_ns() function.
249
250 \note
251 Generally, you will not want to do this, unless you have some need to use
252 the counter \ref PRFC0 for something else.
253*/
255
256/** \brief Check whether the nanosecond timer is enabled.
257
258 Queries the configuration of \ref PRFC0 to check whether it's
259 currently acting as the nanosecond timer.
260
261 \note
262 Even when it's not, perf_cntr_timer_ns() will still gracefully fall-through
263 to using the timer_ns_gettime64(), which decreases the resolution of each
264 tick to 80ns.
265
266 \retval true The nanosecond timer is configured and running
267 \retval false The nanosecond timer is not configured and/or isn't
268 running
269*/
271
272/** \brief Gets elapsed CPU time (in nanoseconds)
273
274 This function retrieves the total amount of \a active CPU time since
275 perf_cntr_timer_enabled() was called.
276
277 \note
278 It's called by default when KOS initializes, so unless you reinitialize it
279 later on, this should be the total CPU time since KOS booted up.
280
281 \return The number of nanoseconds of active CPU time since
282 the timer was enabled.
283*/
284uint64_t perf_cntr_timer_ns(void);
285
286/** @} */
287
288/** @} */
289
290__END_DECLS
291
292#endif /* __DC_PERFCTR_H */
uint64_t perf_cntr_timer_ns(void)
Gets elapsed CPU time (in nanoseconds)
void perf_cntr_timer_enable(void)
Enable the nanosecond timer.
void perf_cntr_timer_disable(void)
Disable the nanosecond timer.
bool perf_cntr_timer_enabled(void)
Check whether the nanosecond timer is enabled.
void perf_cntr_stop(perf_cntr_t counter)
Stop a performance counter.
void perf_cntr_start(perf_cntr_t counter, perf_cntr_event_t event_mode, perf_cntr_clock_t clock_type)
Start a performance counter.
bool perf_cntr_config(perf_cntr_t counter, perf_cntr_event_t *event_mode, perf_cntr_clock_t *clock_type)
Get a performance counter's settings.
perf_cntr_event_t
Performance Counter Event Modes.
Definition perfctr.h:90
perf_cntr_clock_t
Count clock types for the SH4 performance counters.
Definition perfctr.h:64
uint64_t perf_cntr_count(perf_cntr_t counter)
Obtain the count of a performance counter.
void perf_cntr_clear(perf_cntr_t counter)
Clear a performance counter.
perf_cntr_t
Identifiers for the two SH4 performance counters.
Definition perfctr.h:43
void perf_cntr_resume(perf_cntr_t counter)
Resume a performance counter.
@ PMCR_OPERAND_READ_ACCESS_MODE
Quantity; With cache.
Definition perfctr.h:92
@ PMCR_OPERAND_CACHE_MISS_MODE
Quantity.
Definition perfctr.h:106
@ PMCR_INSTRUCTION_ISSUED_MODE
Quantity.
Definition perfctr.h:110
@ PMCR_ALL_OPERAND_ACCESS_MODE
Quantity.
Definition perfctr.h:100
@ PMCR_INTERRUPT_COUNTER_MODE
Quantity.
Definition perfctr.h:113
@ PMCR_ELAPSED_TIME_MODE
Cycles For 200MHz CPU: 5ns per count in 1 cycle = 1 count mode.
Definition perfctr.h:125
@ PMCR_BRANCH_ISSUED_MODE
Quantity; Not the same as branch taken!
Definition perfctr.h:107
@ PMCR_INSTRUCTION_FETCH_MODE
Quantity; With cache.
Definition perfctr.h:97
@ PMCR_PIPELINE_FREEZE_BY_FPU_MODE
Cycles.
Definition perfctr.h:131
@ PMCR_PIPELINE_FREEZE_BY_ICACHE_MISS_MODE
Cycles.
Definition perfctr.h:126
@ PMCR_OPERAND_WRITE_ACCESS_MODE
Quantity; With cache.
Definition perfctr.h:93
@ PMCR_UBC_A_MATCH_MODE
Quantity.
Definition perfctr.h:116
@ PMCR_PIPELINE_FREEZE_BY_CPU_REGISTER_MODE
Cycles.
Definition perfctr.h:130
@ PMCR_OPERAND_ACCESS_MODE
Quantity; With cache, counts both reads and writes.
Definition perfctr.h:105
@ PMCR_FPU_INSTRUCTION_ISSUED_MODE
Quantity.
Definition perfctr.h:112
@ PMCR_INSTRUCTION_CACHE_MISS_MODE
Quantity.
Definition perfctr.h:99
@ PMCR_NMI_COUNTER_MODE
Quantity.
Definition perfctr.h:114
@ PMCR_ALL_INSTRUCTION_FETCH_MODE
Quantity.
Definition perfctr.h:101
@ PMCR_UTLB_MISS_MODE
Quantity.
Definition perfctr.h:94
@ PMCR_SUBROUTINE_ISSUED_MODE
Quantity; Issued a BSR, BSRF, JSR, JSR/N.
Definition perfctr.h:109
@ PMCR_ON_CHIP_RAM_OPERAND_ACCESS_MODE
Quantity.
Definition perfctr.h:102
@ PMCR_OPERAND_CACHE_READ_MISS_MODE
Quantity.
Definition perfctr.h:95
@ PMCR_INIT_NO_MODE
None; Just here to be complete.
Definition perfctr.h:91
@ PMCR_UBC_B_MATCH_MODE
Quantity.
Definition perfctr.h:117
@ PMCR_OPERAND_CACHE_WRITE_MISS_MODE
Quantity.
Definition perfctr.h:96
@ PMCR_TRAPA_INSTRUCTION_COUNTER_MODE
Quantity.
Definition perfctr.h:115
@ PMCR_PIPELINE_FREEZE_BY_BRANCH_MODE
Cycles.
Definition perfctr.h:129
@ PMCR_INSTRUCTION_TLB_MISS_MODE
Quantity.
Definition perfctr.h:98
@ PMCR_ON_CHIP_IO_ACCESS_MODE
Quantity.
Definition perfctr.h:104
@ PMCR_PIPELINE_FREEZE_BY_DCACHE_MISS_MODE
Cycles.
Definition perfctr.h:127
@ PMCR_OPERAND_CACHE_FILL_MODE
Cycles.
Definition perfctr.h:120
@ PMCR_PARALLEL_INSTRUCTION_ISSUED_MODE
Quantity.
Definition perfctr.h:111
@ PMCR_INSTRUCTION_CACHE_FILL_MODE
Cycles.
Definition perfctr.h:119
@ PMCR_BRANCH_TAKEN_MODE
Quantity.
Definition perfctr.h:108
@ PMCR_COUNT_RATIO_CYCLES
Ratio Cycles.
Definition perfctr.h:80
@ PMCR_COUNT_CPU_CYCLES
CPU Cycles.
Definition perfctr.h:70
@ PRFC0
SH4 Performance Counter 0.
Definition perfctr.h:52
@ PRFC1
SH4 Performance Counter 1.
Definition perfctr.h:60