I was using following method to read clock in cortex-a15:
static void readticks(unsigned int *result)
{
struct timeval t;
unsigned int cc;
if (!enabled) {
// program the performance-counter control-register:
asm volatile("mcr p15, 0, %0, c9, c12, 0" :: "r"(17));
//enable all counters
asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f));
//Clear overflow.
asm volatile("mcr p15, 0, %0, c9, c12, 3" :: "r"(0x8000000f));
enabled = 1;
}
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(cc));
gettimeofday(&t,(struct timezone *) 0);
result[0] = cc;
result[1] = t.tv_usec;
result[2] = t.tv_sec;
}
And final performance profilinglooks like:
before = readticks();
foo();
after = readticks();
clock_cycles = after - before.
I want to use same logic in cortex-A53, ARM64 (not aarch32).
I have tried this after following online portals:
/* All counters, including PMCCNTR_EL0, are disabled/enabled */
#define QUADD_ARMV8_PMCR_E (1 << 0)
/* Reset all event counters, not including PMCCNTR_EL0, to 0
*/
#define QUADD_ARMV8_PMCR_P (1 << 1)
/* Reset PMCCNTR_EL0 to 0 */
#define QUADD_ARMV8_PMCR_C (1 << 2)
/* Clock divider: PMCCNTR_EL0 counts every clock cycle/every 64 clock cycles */
#define QUADD_ARMV8_PMCR_D (1 << 3)
/* Export of events is disabled/enabled */
#define QUADD_ARMV8_PMCR_X (1 << 4)
/* Disable cycle counter, PMCCNTR_EL0 when event counting is prohibited */
#define QUADD_ARMV8_PMCR_DP (1 << 5)
/* Long cycle count enable */
#define QUADD_ARMV8_PMCR_LC (1 << 6)
static inline unsigned int armv8_pmu_pmcr_read(void)
{
unsigned int val;
/* Read Performance Monitors Control Register */
asm volatile("mrs %0, pmcr_el0" : "=r" (val));
return val;
}
static inline void armv8_pmu_pmcr_write(unsigned int val)
{
asm volatile("msr pmcr_el0, %0" : :"r" (val & QUADD_ARMV8_PMCR_WR_MASK));
}
static void enable_all_counters(void)
{
unsigned int val;
/* Enable all counters */
val = armv8_pmu_pmcr_read();
val |= QUADD_ARMV8_PMCR_E | QUADD_ARMV8_PMCR_X;
armv8_pmu_pmcr_write(val);
}
static void reset_all_counters(void)
{
unsigned int val;
val = armv8_pmu_pmcr_read();
val |= QUADD_ARMV8_PMCR_P | QUADD_ARMV8_PMCR_C;
armv8_pmu_pmcr_write(val);
}
static void readticks(unsigned int *result)
{
struct timeval t;
unsigned int cc;
unsigned int val;
if (!enabled) {
reset_all_counters();
enable_all_counters();
enabled = 1;
}
cc = armv8_pmu_pmcr_read();
gettimeofday(&t,(struct timezone *) 0);
result[0] = cc;
result[1] = t.tv_usec;
result[2] = t.tv_sec;
}
But it gives "Illegal instruction" as error while I am trying profiling. Can anyone help me to change the above code for cortex-a53?
You need to enable the PMU for user mode. Here is the kernel module I wrote for it(For ARM V7 in Raspberry Pi 2):
/* Module source file 'module.c'. */
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
arm_write(unsigned long val)
{
//Enabling both read and write - note difference between mcr and mrc
asm volatile("mrc p15, 0, %0, c9, c14, 0" :: "r"(1));
asm volatile("mcr p15, 0, %0, c9, c14, 0" :: "r"(1));
}
static int enabler(void)
{
unsigned long value = 1;
printk(KERN_INFO "Enabling PMU usermode.\n");
arm_write(value);
return 0;
}
static void end(void)
{
printk(KERN_INFO "module unloaded.\n");
}
module_init(enabler);
module_exit(end);
MODULE_AUTHOR("Sama");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Blahblah");
This will enable user mode access to the PMU. once you compiled it , you need to enable the PMU counters as follow:
int main(int argc, char **argv){
int enable_divider =1;
int do_reset=1;
int value = 1;
// peform reset:
if (do_reset) {
value |= 2; // reset all counters to zero.
value |= 4; // reset cycle counter to zero.
}
if (enable_divider)
value |= 8; // enable "by 64" divider for CCNT. You really do not want to get all cycle count. This will increment the counter by 1 for every 64 cpu cycle.
value |= 16;
// program the performance-counter control-register with mask constructed above
asm volatile ("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(value));
// enable all counters:
asm volatile ("MCR p15, 0, %0, c9, c12, 1\t\n" :: "r"(0x8000000f));
// clear overflows:
asm volatile ("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x80000001));
// Select individual counter (0)
asm volatile ("MCR p15, 0, %0, c9 , c12 , 5\t\n":: "r"(0x00));
// Write event (0x11 = Cycle count)
asm volatile ("MCR p15, 0, %0, c9 , c13 , 1\t\n":: "r"(0xD));
printf("Hi");
unsigned int output;
// Read current event counter
asm volatile ("MRC p15, 0, %0, c9 , c13 , 2\t\n": "=r"(output));
printf("Event count 0: %ul\n", output);
printf("Normal Execution, No Buffer Overflow Occurred.\n");
return 0;
}
However unfortunately what you get is not only your program cpu cycle, but entire system cpu cycle!. So what I recommend is to use perf.
Write your asm code in an inline assembly code in C and then put it like this:
int dummya(int z, int b){
//This is my function you need to change it for yourself
struct perf_event_attr pe;
long long count;
int fd;
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof(struct perf_event_attr);
pe.config = PERF_COUNT_HW_CPU_CYCLES;
pe.disabled = 1;
pe.exclude_kernel = 1;
pe.exclude_hv = 1;
fd = perf_event_open(&pe, 0, -1, -1, 0);
if (fd == -1) {
fprintf(stderr, "Error opening leader %llx\n", pe.config);
exit(EXIT_FAILURE);
}
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
//From here the counter starts.
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
asm("Your ASM Codes");
//Disabling Counter
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
read(fd, &count, sizeof(long long));
printf("%lld\n", count);
close(fd);
return 5;
}
And be advised you need a new kernels to access the Perf driver.
User contributions licensed under CC BY-SA 3.0