Changeset 1402 for docs/EuroPar2011/Pres


Ignore:
Timestamp:
Aug 31, 2011, 8:59:07 AM (8 years ago)
Author:
cameron
Message:

Tighten slide timing.

Location:
docs/EuroPar2011/Pres
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • docs/EuroPar2011/Pres/europar-slides.tex

    r1401 r1402  
    5454\end{block}
    5555
    56 \begin{block}<+->{Finite State Transitions at 500 GHz?}
     56\begin{block}<+->{Finite State Transitions at 200 GHz?}
    5757\begin{itemize}
    5858\item Use addition for parallel scanning.
    5959\item Carry propagation between bit positions = FSM transition.
    60 \item Commodity processors @ 3 GHz, 3 additions/cycle.
     60\item Commodity processors @ 3.1 GHz.
    6161\item 64 carry propagations per addition.
    62 \item Effective speed for FSM transitions: $3 \times 3 \times 64 > 500$ GHz.
     62\item Effective speed for FSM transitions: $3.1 \times 64 > 200$ GHz.
    6363\end{itemize}
    6464\end{block}
     
    7070\frametitle{Parallel Bit Streams: A Transform Representation of Text}
    7171
    72 \begin{itemize}[<+->]
     72\begin{itemize}
    7373\item Given a byte-oriented character stream $T$, e.g., ``{\tt Ab17;}''.
    7474\item Transpose to 8 parallel bit streams $b_0$, $b_1$, ..., $b_7$.
    7575\item Each stream $b_k$ comprises bit $k$ of each byte of $T$.
    7676\end{itemize}
    77 \onslide<4->{
    7877\begin{tabular}{|c|c|c|c|c|c|} \hline
    79 $T$ & A & b & 1 & 7 & ; \pause \\ \hline
    80 ASCII & \alert<6>{0}\alert<7>{1}\alert<8>{0}00001 & \alert<6>{0}\alert<7>{1}\alert<8>{1}00010 & \alert<6>{0}\alert<7>{0}\alert<8>{1}10001 & \alert<6>{0}\alert<7>{0}\alert<8>{1}10111 & \alert<6>{0}\alert<7>{0}\alert<8>{1}11011 \pause \\ \hline
    81 $b_0$ & \alert<6>{0} & \alert<6>{0} & \alert<6>{0} & \alert<6>{0} & \alert<6>{0} \pause \\ \hline
    82 $b_1$ & \alert<7>{1} & \alert<7>{1} & \alert<7>{0} & \alert<7>{0} & \alert<7>{0} \pause \\ \hline
    83 $b_2$ & \alert<8>{0} & \alert<8>{1} & \alert<8>{1} & \alert<8>{1} & \alert<8>{1}  \\ \hline
     78$T$ & A & b & 1 & 7 & ; \\ \hline
     79ASCII & \alert<2>{0}\alert<3>{1}\alert<4>{0}00001 & \alert<2>{0}\alert<3>{1}\alert<4>{1}00010 & \alert<2>{0}\alert<3>{0}\alert<4>{1}10001 & \alert<2>{0}\alert<3>{0}\alert<4>{1}10111 & \alert<2>{0}\alert<3>{0}\alert<4>{1}11011 \pause \\ \hline
     80$b_0$ & \alert<2>{0} & \alert<2>{0} & \alert<2>{0} & \alert<2>{0} & \alert<2>{0} \pause \\ \hline
     81$b_1$ & \alert<3>{1} & \alert<3>{1} & \alert<3>{0} & \alert<3>{0} & \alert<3>{0} \pause \\ \hline
     82$b_2$ & \alert<4>{0} & \alert<4>{1} & \alert<4>{1} & \alert<4>{1} & \alert<4>{1}  \\ \hline
    8483$b_3$ & 0 & 0 & 1 & 1 & 1 \\ \hline
    8584$b_4$ & 0 & 0 & 0 & 0 & 1 \\ \hline
     
    8887$b_7$ & 1 & 0 & 1 & 1 & 1 \\ \hline
    8988\end{tabular}
    90 }
    9189\end{frame}
    9290
     
    9896Why form parallel bit streams?
    9997
    100 \begin{itemize}[<+->]
     98\begin{itemize}
    10199\item Byte-at-a-time text processing is too slow.
    102 \begin{itemize}[<+->]
     100\begin{itemize}
    103101\item Example: XML scan for ``{\tt <}''.
    104102\item Byte-at-time loop computes only 1 info bit per iteration!
    105103\end{itemize}
    106104\item So let's compute those bits in parallel!
    107 \begin{itemize}[<+->]
     105\begin{itemize}
    108106\item Bitwise logic on basis streams $b_i \rightarrow${\tt [<]} stream.
    109107\item Process 128 positions at a time using SSE registers.
    110108\end{itemize}
    111109\item Find next ``{\tt <}'' with {\em bit scan} instruction (e.g., Intel bsf).
    112 \begin{itemize}[<+->]
     110\begin{itemize}
    113111\item Advance up to 63 positions at once.
    114112\end{itemize}
     
    121119\begin{frame}[fragile]
    122120\frametitle{Character Class Formation}
    123 \begin{itemize}[<+->]
     121\begin{itemize}
    124122\item Combining 8 bits of a code unit gives a character class stream.
    125123\item compile({\tt [CharDef(LAngle, "<")]})
     
    141139\begin{frame}[fragile]
    142140\frametitle{Character Class Common Subexpressions}
    143 \begin{itemize}[<+->]
     141\begin{itemize}
    144142\item Multiple definitions have common subexpressions.
    145 \item compile({\tt [CharDef(LAngle, "<"), \\ CharDef(RAngle, "<")]})
     143\item compile({\tt [CharDef(LAngle, "<"), \\ \alert{CharDef(RAngle, "<")}]})
    146144\item
    147145\begin{semiverbatim}
     
    153151temp6 = simd_andc(temp4, temp5);
    154152LAngle = simd_and(temp3, temp6);
    155 \onslide<4->temp7 = simd_andc(bit[6], bit[7]);
    156 \onslide<4->temp8 = simd_and(temp4, temp7);
    157 \onslide<4->RAngle = simd_and(temp3, temp8);
     153\alert{temp7 = simd_andc(bit[6], bit[7]);
     154temp8 = simd_and(temp4, temp7);
     155RAngle = simd_and(temp3, temp8);}
    158156\end{semiverbatim}
    159157\end{itemize}
     
    162160\begin{frame}[fragile]
    163161\frametitle{Character Class Ranges}
    164 \begin{itemize}[<+->]
     162\begin{itemize}
    165163\item Ranges of characters are often very simple to compute.
    166164\item compile({\tt [CharSet('Control', ['$\backslash$x00-$\backslash$x1F']),
    167            \onslide<4->{CharSet('Digit', ['0-9']})]]})
     165           {CharSet('Digit', ['0-9']})]]})
    168166\item
    169167\begin{semiverbatim}
     
    171169temp2 = simd_or(temp1, bit[2]);
    172170Control = simd_andc(simd_const_1(1), temp2)
    173 \onslide<5->temp3 = simd_and(bit[2], bit[3]);
    174 \onslide<5->temp4 = simd_andc(temp3, temp1);
    175 \onslide<5->temp5 = simd_or(bit[5], bit[6]);
    176 \onslide<5->temp6 = simd_and(bit[4], temp5);
    177 \onslide<5->Digit = simd_andc(temp4, temp6);
     171temp3 = simd_and(bit[2], bit[3]);
     172temp4 = simd_andc(temp3, temp1);
     173temp5 = simd_or(bit[5], bit[6]);
     174temp6 = simd_and(bit[4], temp5);
     175Digit = simd_andc(temp4, temp6);
    178176\end{semiverbatim}
    179177\end{itemize}
     
    186184\begin{frame}[fragile]
    187185\frametitle{From Sequential to Parallel Scanning}
    188 \begin{itemize}[<+->]
    189 \item Parabix 1: use bit scan instructions throughout.
     186\begin{block}<+->{Parabix 1: Use Bit Scan Instructions}
    190187\begin{itemize}
    191188\item For example, to find markup, bit scan to next 1 in [{\tt <}].
    192189\item Accelerates scanning, but still sequential.
    193190\end{itemize}
    194 
    195 \item Parabix 2: use new parallel scanning primitive.
     191\end{block}
     192\begin{block}<+->{Parabix 2 : Parallel Scanning Primitive}
    196193\begin{itemize}
    197194\item $s(M, C) = (M + C) \wedge \neg C$
     
    201198\item Relies on little-endian representation of streams.
    202199\end{itemize}
    203 \end{itemize}
     200\end{block}
    204201\end{frame}
    205202
     
    217214\onslide<2->$T$     &  {\tt --935---7---29456---7--23--6--} \\
    218215\onslide<3->$M_0$   &  {\tt ....1...........1.......1...1.} \\
    219 \onslide<3->$D$     &  {\tt ..111...1...11111...1..11..1..} \\
     216\onslide<4->$D$     &  {\tt ..111...1...11111...1..11..1..} \\
    220217\onslide<5->$M_0 +D$ & {\tt .1......1..1........1.1....11.} \\
    221218\onslide<6->$M_1 = (M_0 + D)
Note: See TracChangeset for help on using the changeset viewer.