#!/usr/bin/perl

@replace = (
    ['x264_pixel_sad_' => 'pixel_sad_*_mmx'],
    ['^sad_w\d+_align\d+_' => 'pixel_sad_*_mmx'],
    ['x264_pixel_ssd_' => 'pixel_ssd_*_mmx'],
    ['intra_satd_x3|hadamard_load' => 'intra_satd_*_mmx'],
    ['_satd_' => 'pixel_satd_*_mmx'],
    ['_sa8d_' => 'pixel_sa8d_*_mmx'],
    ['^load_hadamard' => 'pixel_satd_*_mmx'],
    ['hadamard_ac_' => 'hadamard_ac_*_mmx'],
    ['x264_pixel_avg_' => 'pixel_avg_*_mmx'],
    ['x264_pixel_avg2_' => 'pixel_avg2_*_mmx'],
    ['x264_pixel_var_' => 'pixel_var_*_mmx'],
    ['x264_pixel_var2_' => 'pixel_var2_*_mmx'],
    ['x264_pixel_ads' => 'pixel_ads_*_mmx'],
    ['x264_mc_copy_' => 'mc_copy_*_mmx'],
    ['x264_mc_weight_' => 'mc_weight_*_mmx'],
    ['_trellis' => 'quant_trellis_cabac'],
    ['_quant' => 'quant_*_mmx'],
    ['_dequant' => 'dequant_*_mmx'],
    ['idct' => 'idct_*_mmx'],
    ['dct' => 'dct_*_mmx'],
    ['deblock_edge' => 'frame_deblock_row'],
    ['(?<!frame_)deblock' => 'deblock_*_mmx'],
    ['hpel_filter' => 'hpel_filter_mmx'],
    ['prefetch' => 'prefetch_mmx'],
    ['get_ref' => 'get_ref'],
    ['mc_luma' => 'mc_luma'],
    ['mc_chroma' => 'mc_chroma_mmx'],
    ['predict_\d+x\d+' => 'predict_intra_*_mmx'],
    ['decimate_score\d' => 'decimate_score*_mmx'],
    ['coeff_last\d' => 'coeff_last*_mmx'],
    ['coeff_level_run\d' => 'coeff_level_run*_mmx'],
    ['x264_mb_predict_mv.*' => 'mb_predict_mv_*'],
    ['x264_mb_mc' => 'mb_mc_*'],
    ['zigzag_scan_' => 'zigzag_scan_*'],
    ['x264_mb_analyse_inter' => 'mb_analyse_inter_*'],
    ['x264_mb_analyse_intra' => 'mb_analyse_intra_*'],
    ['x264_mb_analyse_.*rd$' => 'mb_analyse_*_rd'],
    ['x264_intra_rd$' => 'mb_analyse_*_rd'],
    ['x264_mb_encode_i\d' => 'mb_encode_i*'],
    ['x264_mb_cache_mv' => 'mb_cache_mv_*'],
    ['cabac_size_decision' => 'cabac_size_decision*'],
    ['bs_write_te' => 'bs_write_se'],
    ['mempcpy' => 'memcpy'],
    ['block_residual_write_cabac' => 'block_residual_write_cabac'],
    ['copy_column8' => 'macroblock_cache_load'],
    ['cabac_mb8x8_mvd' => 'cabac_mb_mvd'],
    ['cabac_mb_(?!mvd)' => 'cabac_mb_*'],
    ['refine_subpel|refine_qpel(\b|_refdup)' => 'me_refine_subpel'],
    ['refine_bidir' => 'me_refine_bidir'],
    ['macroblock_cache_(mv|mvd|ref)_\d_\d' => 'cache_rect'],
);

while(<>) {
    my @F = split /\s+/, $_;
    next if @F!=3 && @F!=5 or $F[0] =~ /[^0-9]/ or $F[1] =~ /[^0-9e.-]/;
    $ni = ($F[2] !~ /[^0-9]/ and $F[3] !~ /[^0-9e.-]/) ? 4 : 2;
    my $n = $F[$ni];

    foreach $k (0..$#replace) {
        if($n =~ $replace[$k][0]) {
            $n = $replace[$k][1];
            last;
        }
    }

    $n =~ s/mmxext|s?sse[234]/mmx/;
    $n =~ s/^x264_//;

    for(0..$ni-1) {
        $dat{$n}[$_] += $F[$_];
    }
}

# assume double column is L1 cycles, L2 misses
for(sort { $dat{$b}[0] + ($ni==4)*250*$dat{$b}[2] <=> $dat{$a}[0] + ($ni==4)*250*$dat{$a}[2] } keys %dat) {
    if($dat{$_}[1] >= .01 || $dat{$_}[3] >= .01) {
        printf "%-8d %7.4f  ", $dat{$_}[0], $dat{$_}[1];
        printf "%-8d %7.4f  ", $dat{$_}[2], $dat{$_}[3] if $ni==4;
        print "$_\n";
    }
}
