Changeset 3501


Ignore:
Timestamp:
Sep 15, 2013, 3:47:11 PM (5 years ago)
Author:
cameron
Message:

Update main

Location:
docs/Working/re
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • docs/Working/re/ppopp-re.tex

    r3493 r3501  
    574574
    575575
    576 
    577 \section{Commodity SIMD Implementation and Experimental Evaluation}\label{sec:SSE2}
    578 
    579 
    580 \subsection{Implementation Notes}
    581 \subsection{Evaluation Methodology}
    582 \subsection{Comparison}
    583 \begin{figure}
    584 \begin{center}
    585 \begin{tikzpicture}
    586 \begin{axis}[
    587 xtick=data,
    588 ylabel=Cycles per Byte,
    589 xticklabels={@,Date,Email,URIorEmail,xquote},
    590 tick label style={font=\tiny},
    591 enlargelimits=0.15,
    592 legend style={at={(0.5,-0.15)},
    593 anchor=north,legend columns=-1},
    594 ymax=8,
    595 ybar,
    596 bar width=7pt,
    597 ]
    598 \addplot
    599 file {data/cycles1.dat};
    600 \addplot
    601 file {data/cycles2.dat};
    602 \addplot
    603 file {data/cycles3.dat};
    604  
    605 \legend{Bitstreams,NRGrep,Grep,Annot}
    606 \end{axis}
    607 \end{tikzpicture}
    608 \end{center}
    609 \caption{Cycles per Byte}
    610 \end{figure}
    611  
    612 \begin{figure}
    613 \begin{center}
    614 \begin{tikzpicture}
    615 \begin{axis}[
    616 xtick=data,
    617 ylabel=Instructions per Byte,
    618 xticklabels={@,Date,Email,URIorEmail,xquote},
    619 tick label style={font=\tiny},
    620 enlargelimits=0.15,
    621 legend style={at={(0.5,-0.15)},
    622 anchor=north,legend columns=-1},
    623 ymax=16,
    624 ybar,
    625 bar width=7pt,
    626 ]
    627 \addplot
    628 file {data/instructions1.dat};
    629 \addplot
    630 file {data/instructions2.dat};
    631 \addplot
    632 file {data/instructions3.dat};
    633  
    634 \legend{Bitstreams,NRGrep,Grep,Annot}
    635 \end{axis}
    636 \end{tikzpicture}
    637 \end{center}
    638 \caption{Instructions per Byte}
    639 \end{figure}
    640 
    641 \begin{figure}
    642 \begin{center}
    643 \begin{tikzpicture}
    644 \begin{axis}[
    645 xtick=data,
    646 ylabel=Instructions per Cycle,
    647 xticklabels={@,Date,Email,URIorEmail,xquote},
    648 tick label style={font=\tiny},
    649 enlargelimits=0.15,
    650 legend style={at={(0.5,-0.15)},
    651 anchor=north,legend columns=-1},
    652 ybar,
    653 bar width=7pt,
    654 ]
    655 \addplot
    656 file {data/ipc1.dat};
    657 \addplot
    658 file {data/ipc2.dat};
    659 \addplot
    660 file {data/ipc3.dat};
    661 
    662 \legend{Bitstreams,NRGrep,Grep,Annot}
    663 \end{axis}
    664 \end{tikzpicture}
    665 \end{center}
    666 \caption{Instructions per Cycle}
    667 \end{figure}
    668 
    669 \begin{figure}
    670 \begin{center}
    671 \begin{tikzpicture}
    672 \begin{axis}[
    673 xtick=data,
    674 ylabel=Branch Misses per Byte,
    675 xticklabels={@,Date,Email,URIorEmail,xquote},
    676 tick label style={font=\tiny},
    677 enlargelimits=0.15,
    678 legend style={at={(0.5,-0.15)},
    679 anchor=north,legend columns=-1},
    680 ymax=0.03,
    681 ybar,
    682 bar width=7pt,
    683 ]
    684 \addplot
    685 file {data/branch-misses1.dat};
    686 \addplot
    687 file {data/branch-misses2.dat};
    688 \addplot
    689 file {data/branch-misses3.dat};
    690 
    691 \legend{Bitstreams,NRGrep,Grep,Annot}
    692 \end{axis}
    693 \end{tikzpicture}
    694 \end{center}
    695 \caption{Branch Misses per Byte}
    696 \end{figure}
    697 
    698 
    699 
    700 \section{SIMD Scalability}\label{sec:AVX2}
    701 
    702 
    703 
    704 
    705 \begin{figure}
    706 \begin{center}
    707 \begin{tikzpicture}
    708 \begin{axis}[
    709 xtick=data,
    710 ylabel=Cycles per Byte,
    711 xticklabels={@,Date,Email,URIorEmail,xquote},
    712 tick label style={font=\tiny},
    713 enlargelimits=0.15,
    714 legend style={at={(0.5,-0.15)},
    715 anchor=north,legend columns=-1},
    716 ybar,
    717 bar width=7pt,
    718 ]
    719 \addplot
    720 file {data/ssecycles.dat};
    721 \addplot
    722 file {data/avxcycles.dat};
    723 
    724 \legend{SSE2,AVX2,Annot}
    725 \end{axis}
    726 \end{tikzpicture}
    727 \end{center}
    728 \caption{Cycles per Byte}
    729 \end{figure}
    730 
    731 \begin{figure}
    732 \begin{center}
    733 \begin{tikzpicture}
    734 \begin{axis}[
    735 xtick=data,
    736 ylabel=Instructions per Byte,
    737 xticklabels={@,Date,Email,URIorEmail,xquote},
    738 tick label style={font=\tiny},
    739 enlargelimits=0.15,
    740 legend style={at={(0.5,-0.15)},
    741 anchor=north,legend columns=-1},
    742 ybar,
    743 bar width=7pt,
    744 ]
    745 \addplot
    746 file {data/sseinstructions.dat};
    747 \addplot
    748 file {data/avxinstructions.dat};
    749 
    750 \legend{SSE2,AVX2,Annot}
    751 \end{axis}
    752 \end{tikzpicture}
    753 \end{center}
    754 \caption{Instructions per Byte}
    755 \end{figure}
    756 
    757 \begin{figure}
    758 \begin{center}
    759 \begin{tikzpicture}
    760 \begin{axis}[
    761 xtick=data,
    762 ylabel=Instructions per Cycle,
    763 xticklabels={@,Date,Email,URIorEmail,xquote},
    764 tick label style={font=\tiny},
    765 enlargelimits=0.15,
    766 legend style={at={(0.5,-0.15)},
    767 anchor=north,legend columns=-1},
    768 ybar,
    769 bar width=7pt,
    770 ]
    771 \addplot
    772 file {data/sseipc.dat};
    773 \addplot
    774 file {data/avxipc.dat};
    775 
    776 
    777 \legend{SSE2,AVX2,Annot}
    778 \end{axis}
    779 \end{tikzpicture}
    780 \end{center}
    781 \caption{Instructions per Cycle}
    782 \end{figure}
    783 
    784 \begin{figure}
    785 \begin{center}
    786 \begin{tikzpicture}
    787 \begin{axis}[
    788 xtick=data,
    789 ylabel=Branch Misses per Byte,
    790 xticklabels={@,Date,Email,URIorEmail,xquote},
    791 tick label style={font=\tiny},
    792 enlargelimits=0.15,
    793 legend style={at={(0.5,-0.15)},
    794 anchor=north,legend columns=-1},
    795 ybar,
    796 bar width=7pt,
    797 ]
    798 \addplot
    799 file {data/ssebranch-misses.dat};
    800 \addplot
    801 file {data/avxbranch-misses.dat};
    802 
    803 \legend{SSE2,AVX2,Annot}
    804 \end{axis}
    805 \end{tikzpicture}
    806 \end{center}
    807 \caption{Branch Misses per Byte}
    808 \end{figure}
    809 
    810 
    811 
    812 
    813 \subsection{AVX Stream Addition}
    814  \begin{figure*}[tbh]
    815 \begin{center}
    816 \begin{verbatim}
    817 void add_ci_co(bitblock_t x, bitblock_t y, carry_t carry_in, carry_t & carry_out, bitblock_t & sum) {
    818   bitblock_t all_ones = simd256<1>::constant<1>();
    819   bitblock_t gen = simd_and(x, y);
    820   bitblock_t prop = simd_xor(x, y);
    821   bitblock_t partial_sum = simd256<64>::add(x, y);
    822   bitblock_t carry = simd_or(gen, simd_andc(prop, partial_sum));
    823   bitblock_t bubble = simd256<64>::eq(partial_sum, all_ones);
    824   uint64_t carry_mask = hsimd256<64>::signmask(carry) * 2 + convert(carry_in);
    825   uint64_t bubble_mask = hsimd256<64>::signmask(bubble);
    826   uint64_t carry_scan_thru_bubbles = (carry_mask + bubble_mask) &~ bubble_mask;
    827   uint64_t increments = carry_scan_thru_bubbles | (carry_scan_thru_bubbles - carry_mask);
    828   carry_out = convert(increments >> 4);
    829   uint64_t spread = 0x0000200040008001 * increments & 0x0001000100010001;
    830   sum = simd256<64>::add(partial_sum, _mm256_cvtepu16_epi64(avx_select_lo128(convert(spread))));
    831 }
    832 
    833 \end{verbatim}
    834 
    835 \end{center}
    836 \caption{AVX2 256-bit Addition}
    837 \label{fig:AVX2add}
    838 \end{figure*}
     576\input{sse2}
     577
     578\input{avx2}
     579
    839580
    840581
     
    843584To further assess the scalability of our regular expression matching
    844585using bit-parallel data streams, we implemented a GPGPU version
    845 in OpenCL.   We arranged for 64 work groups each having 64
     586in OpenCL.   
     587We arranged for 64 work groups each having 64
    846588threads.  Input files are divided in data parallel fashion among
    847589the 64 work groups.  Each work group carries out the regular
     
    853595style process.
    854596
     597Our GPU test machine was an AMD A10-5800K APU with Radeon(tm) HD Graphics
     598having a processor speed of 4.30 GHz and 32.0GB of memory.
    855599
    856600
Note: See TracChangeset for help on using the changeset viewer.