I try to count cpu clock cycles for my function on ARM Cortex-A53 using following function:
#include <sys/time.h>
readticks(unsigned int *result, int enabled)
{
struct timeval t;
unsigned int cc;
unsigned int val;
if (!enabled) {
// program the performance-counter control-register:
asm volatile("msr pmcr_el0, %0" : : "r" (17));
//enable all counters
asm volatile("msr PMCNTENSET_EL0, %0" : : "r" (0x8000000f));
//clear the overflow
asm volatile("msr PMOVSCLR_EL0, %0" : : "r" (0x8000000f));
enabled = 1;
}
//read the coutner value
asm volatile("mrs %0, PMCCNTR_EL0" : "=r" (cc));
gettimeofday(&t,(struct timezone *) 0);
result[0] = cc;
result[1] = t.tv_usec;
result[2] = t.tv_sec;
}
and here is my user space application:
#include <stio.h>
#include <inttypes.h>
#include <time.h>
int main(){
unsigned int init[3] = {0};
unsigned int start[3] = {0};
unsigned int end[3] = {0};
unsigned int overhead = 0;
readticks(init, 0);
readticks(start, 1);
readticks(end, 1);
overhead = end[0] - start[0];
readticks(init, 0);
readticks(start, 1);
foo(); //This is my function
readticks(end, 1);
end[0] = end[0] - start[0] - overhead;
printf("clock cycles= %d\n", end[0]);
return 0;
}
When I run my code for several times, I have got different clock cycles with relatively high variation (almost 5000). My code should be run around 4000 clock cycles, but I have got 4500 - 9500 clock cycles. Is there any way around that gives me more accurate clock cycles count?
use the following macro
#define mfcp(rn) ({u32 rval = 0U; \
__asm__ __volatile__(\
"mrc " rn "\n"\
: "=r" (rval)\
);\
rval;\
})
#endif
call mfcp with counter register
uint64_t t1,t2;
t1 = mfcp(CNTPCT_EL0);
// your code
t2 = mfcp(CNTPCT_EL0);
User contributions licensed under CC BY-SA 3.0