Changeset 4082


Ignore:
Timestamp:
Aug 24, 2014, 10:07:29 PM (5 years ago)
Author:
cameron
Message:

Updates

File:
1 edited

Legend:

Unmodified
Added
Removed
  • docs/Working/re/Slides/pactslides.tex

    r4081 r4082  
    3636\frametitle{Acceleration of Regular Expression Matching}
    3737
    38 \begin{itemize}%[<+->]
     38\begin{itemize}[<+->]
    3939\item Example: quickly find instances of \verb:(^|[ ])\p{Lu}\p{Ll}+[.!?]($|[ ]): in text.
    4040\item Sequential algorithms use finite automata or backtracking.
     
    5454
    5555\begin{itemize}[<+->]
    56 \item A new algorithm family for regular expression matching based
    57 on bitwise data parallelism.
    58 
    5956\item A fundamentally parallel approach in contrast to approaches
    6057that parallelize existing DFA or NFA algorithms.
    6158
     59\item A new algorithm {\em family} for regular expression matching based
     60on bitwise data parallelism.
     61
    6262\item Builds on the Parabix methods that have been used for XML
    6363parsing and Unicode transcoding.
    6464
    65 \item Uses bitstream addition for simultaneous nondeterministic matching
    66 of character class repetitions (MatchStar primitive).
     65\item Introduces the MatchStar primitive
     66for simultaneous nondeterministic matching
     67of character class repetitions using bitstream addition.
    6768
    6869\item Compilation technologies for regular expressions (new), character classes (existing),
     
    7778\frametitle{Bitwise Data Parallelism}
    7879
    79 \begin{itemize}%[<+->]
     80\begin{itemize}[<+->]
    8081\item Parabix methods use a transform representation of text.
    8182\item Bitstreams are formed using one bit per input byte.
     
    8687\item Process 256 bytes at a time with AVX2.
    8788\end{itemize}
    88 \item Transposition supported efficiently with SIMD pack operations.
     89\item Parabix transform is fast using SIMD pack operations.
     90\item {\em But}: programmer view is of unbounded, arbitrary length bitstreams.
    8991\end{itemize}
    9092
     
    9496\frametitle{Impressive Results in Full Unicode Matching}
    9597
    96 \begin{itemize}%[<+->]
     98\begin{itemize}[<+->]
     99\item \verb:(^|[ ])\p{Lu}\p{Ll}+[.!?]($|[ ]):
     100\begin{itemize}
     101\item Match against 110 MB Arabic wikimedia document.
    97102\item Find capitalized words at ends of sentences.
    98103\item Use Unicode upper/lower case categories.
    99 \item Match \verb:(^|[ ])\p{Lu}\p{Ll}+[.!?]($|[ ]): against 110 MB Arabic file.
     104\end{itemize}
     105\item \verb:egrep: 45,951,194,784 CPU cycles.
    100106\item \verb:pcregrep: 14,772,797,548 CPU cycles.
    101 \item \verb:egrep: 45,951,194,784 CPU cycles.
    102107\item \verb:icgrep: (Parabix) 653,530,064 CPU cycles.
    103108\item 20X acceleration over \verb:pcgregrep:, 70X over GNU \verb:egrep:.
     
    310315\end{frame}
    311316
     317\section{Block-by-Block Processing}
     318
     319\begin{frame}[fragile]
     320\frametitle{Pablo Compiler Compiles to Block-by-Block Form}
     321\begin{block}<+->{Pablo Input}
     322\begin{semiverbatim}
     323m0 = ~0
     324m1 = pablo.Advance(m0 & cc\_a)
     325m2 = pablo.MatchStar(m1, cc\_0\_9)
     326m3 = pablo.Advance(m2, cc\_z9)
     327\end{semiverbatim}
     328
     329\end{block}
     330\begin{block}<+->{Pablo Output Adds Carry Processing}
     331\begin{semiverbatim}
     332m0 = simd_not(simd<1>::constant<0>());
     333cryQ.set(0, blkShl(blkAnd(m0, c\c_a), cryQ.get(0), m1);
     334cryQ.set(1, blkMatchStar(m1, cc\_0\_9, cryQ.get(1), m2);
     335cryQ.set(2, blkShl(blkAnd(m2, cc\_z9), cryQ.get(2), m3);
     336cryQ.AdjustCarries(3);
     337\end{semiverbatim}
     338\end{block}
     339\end{frame}
     340
     341
     342\begin{frame}[fragile]
     343\frametitle{Long-Stream Addition}
     344Compute the long-stream sum {\tt Z} = {\tt X} + {\tt Y}.
     345\begin{tabular}{c||r|r|r|r|r|r|r|r||}\cline{2-9}
     346{\tt X} & {\tt 19} & {\tt 31} & {\tt BA} & {\tt 4C} & {\tt 3D} & {\tt 45} & {\tt 21} & {\tt F1} \\ \cline{2-9}
     347{\tt Y} & {\tt 22} & {\tt 12} & {\tt 45} & {\tt B3} & {\tt E2} & {\tt 16} & {\tt 17} & {\tt 36} \\ \cline{2-9}
     348\onslide<2->{\tt R} (add digits)& {\tt 3B} & {\tt 43} & {\tt FF} & {\tt FF} & {\tt 1F} & {\tt 5B} & {\tt 38} & {\tt 27} \\ \cline{2-9}
     349\onslide<2->{\tt x} = high\_bit({\tt X})& {\tt 0} & {\tt 0} & {\tt 1} & {\tt 0} & {\tt 0} & {\tt 0} & {\tt 0} & {\tt 1} \\ \cline{2-9}
     350\onslide<2->{\tt y} = high\_bit({\tt Y}) & {\tt 0} & {\tt 0} & {\tt 0} & {\tt 1} & {\tt 1} & {\tt 0} & {\tt 0} & {\tt 0} \\ \cline{2-9}
     351\onslide<2->{\tt r} = high\_bit({\tt Z}) & {\tt 0} & {\tt 0} & {\tt 1} & {\tt 1} & {\tt 0} & {\tt 0} & {\tt 0} & {\tt 0} \\ \cline{2-9}
     352\onslide<3->{\tt c} (carries)& {\tt 0} & {\tt 0} & {\tt 0} & {\tt 0} & {\tt 1} & {\tt 0} & {\tt 0} & {\tt 1} \\ \cline{2-9}
     353\onslide<4->{\tt c*2} (shift left 1)& {\tt 0} & {\tt 0} & {\tt 0} & {\tt 1} & {\tt 0} & {\tt 0} & {\tt 1} & {\tt 0} \\ \cline{2-9}
     354\onslide<5->{\tt b} (bubble ({\tt Z} = {\tt FF}) & {\tt 0} & {\tt 0} & {\tt 1} & {\tt 1} & {\tt 0} & {\tt 0} & {\tt 0} & {\tt 0} \\ \cline{2-9}
     355\onslide<6->{\tt i} = Matchstar({\tt b},{\tt c*2})& {\tt 0} & {\tt 1} & {\tt 1} & {\tt 1} & {\tt 0} & {\tt 0} & {\tt 1} & {\tt 0} \\ \cline{2-9}
     356\onslide<7->{\tt Z} = {\tt R} + {\tt i} & {\tt 3B} & {\tt 44} & {\tt 0} & {\tt 0} & {\tt 1F} & {\tt 5B} & {\tt 39} & {\tt 27} \\ \cline{2-9}
     357\end{tabular}
     358\end{frame}
    312359
    313360
     
    317364\begin{frame}[fragile]
    318365\frametitle{Test Expressions}
     366
     367
    319368{\tiny
    320369\begin{tabular}{|l|l|}
     
    428477\begin{tabular}{|c|c|c|c|} \hline
    429478\multirow{2}{*}{Expression} & \multicolumn{3}{c|}{Bitstream/AVX2 grep Speedup} \\ \cline{2-4}
    430 & vs. nrgrep & vs. gre2p & vs. GNU grep -e\\ \hline \hline
     479& vs. nrgrep & vs. gre2p & vs. GNU egrep \\ \hline \hline
    431480At & 3.5X & 34X & 1.6X\\ \hline
    432481Date & 0.76X & 13X & 48X\\ \hline
Note: See TracChangeset for help on using the changeset viewer.