#ifndef LOREN_BENCH_H
#define LOREN_BENCH_H

#define __STDC_FORMAT_MACROS
#include <stdio.h>
#include <inttypes.h>

static inline uint64_t read_time(void)
{
    uint32_t a, d;
    __asm__ volatile("lfence\n rdtsc" : "=a"(a), "=d"(d));
    return ((uint64_t)d << 32) + a;
}
 
#define NOP_CYCLES 28 // time measured by an empty timer on Sandybridge
 
#define START_TIMER \
uint64_t tend;\
uint64_t tstart= read_time();

#define STOP_TIMER(id) {\
tend= read_time();\
{\
    static uint64_t tsum=0;\
    static int tcount=0;\
    static int tskip_count=0;\
    if(tskip_count<2)\
        tskip_count++;\
    else{\
    if(tcount<2 || tend - tstart < 8*tsum/tcount + 500){\
        tsum+= tend - tstart;\
        tcount++;\
    }else\
        tskip_count++;\
    if(((tcount+tskip_count) & (tcount+tskip_count-1)) == 0)\
        printf("%"PRId64" decicycles in %s, %d runs, %d skips\n", tsum*10/tcount-NOP_CYCLES*10, id, tcount, tskip_count);\
}}}

#endif // LOREN_BENCH_H
