Changeset 4875 for docs


Ignore:
Timestamp:
Nov 17, 2015, 9:16:13 PM (3 years ago)
Author:
cameron
Message:

Final version of presentation at ICA3PP

File:
1 edited

Legend:

Unmodified
Added
Removed
  • docs/Working/icGrep/pres.tex

    r4874 r4875  
    7777\item Gigabyte to Terabyte scale.
    7878\item Regular expressions are often used in these applications.
    79 \item If performance were better, regular expressions could be applied
    80   more widely.
    8179\end{itemize}
    8280\end{block}
     
    9189\item
    9290Polig {\it et al.}
    93 Giving text analytics a boost. {\em IEEE Micro}, {\bf 34}(4):6--14,
     91Giving text analytics a boost. {\em IEEE Micro}, {\bf 34},
    94922014.
    95 \end{itemize}
    96 \end{block}
    97 \end{frame}
    98 \begin{frame}[fragile]
    99 \frametitle{Performance Matters - Catastrophic Backtracking}
    100 \begin{block}<+->{Nondeterministic Regular Expression Features}
    101 \begin{itemize}
    102 \item Ambiguity: regexes may match strings in multiple ways.
    103 \item Ex: match(\verb:^(\p{L})+t(\p{L})+$:, ``\verb:statistics:'')
    104 \item (\verb:s:, \verb:atistics:),  (\verb:sta:, \verb:istics:), or
    105   (\verb:statis:, \verb:ics:)
    106 \item Backtracking engines: potential runaway behaviour.
    107 \item Procedural workarounds:  \verb:^(\p{L})++t(\p{L})+$:  or \verb:^(?>(\p{L})+?)t(\p{L})+$:
    108 \item Declarative nature of regexps diminished.
    109 \end{itemize}
    110 \end{block}
    111 
    112 \begin{block}<+->{Web Site Security}
    113 \begin{itemize}
    114 \item Regular expressions may be used in web services.
    115 \item Malicious agents may inject ill-formed regular expressions.
    116 \item Web sites may be tied up (denial-of-service) or compromised.
    11793\end{itemize}
    11894\end{block}
     
    163139\begin{frame}[fragile]
    164140\frametitle{Beyond Byte-At-A-Time}
    165 \begin{itemize}[<+->]
     141\begin{itemize}%[<+->]
    166142\item Traditional regular expression technology processes one code
    167143  unit at a time using DFA, NFA or backtracking implementations.
     
    218194\begin{tabular}{cl}\\
    219195input data  & \verb`a45a...`\\
    220 \onslide$cc\_a$ & \verb`1001...`\\
     196$cc\_a$ & \verb`1001...`\\
    221197\end{tabular}
    222198
     
    277253\begin{frame}[fragile]
    278254\frametitle{Marker Stream Example}
    279 \begin{itemize}[<+->]
     255\begin{itemize}%[<+->]
    280256\item Consider matching regular expression \verb`a[0-9]*[z9]` against the input text below.
    281 \item<2->$M_1$ marks positions after occurrences of \verb:a:.
    282 \item<3->$M_2$ marks positions after occurrences of \verb:a[0-9]*:.
    283 \item<4->$M_3$ marks positions after occurrences of \verb:a[0-9]*[z9]:.
     257\item $M_1$ marks positions after occurrences of \verb:a:.
     258\item $M_2$ marks positions after occurrences of \verb:a[0-9]*:.
     259\item $M_3$ marks positions after occurrences of \verb:a[0-9]*[z9]:.
    284260\end{itemize}
    285261
    286262\begin{tabular}{cr}\\
    287263input data  & \verb`a453z--b3z--az--a12949z--ca22z7--`\\
    288 \onslide<2->$M_1$ & \verb`.1...........1...1.........1.....`\\
    289 \onslide<3->$M_2$ & \verb`.1111........1...111111....111...`\\
    290 \onslide<4->$M_3$ & \verb`.....1........1.....1.11......1..`
     264$M_1$ & \verb`.1...........1...1.........1.....`\\
     265$M_2$ & \verb`.1111........1...111111....111...`\\
     266$M_3$ & \verb`.....1........1.....1.11......1..`
    291267\end{tabular}
    292268\end{frame}
     
    296272\begin{frame}[fragile]
    297273\frametitle{Matching Character Class Repetitions with MatchStar}
    298 \begin{itemize}[<+->]
     274\begin{itemize}%[<+->]
    299275\item<2->$\text{MatchStar}(M, C) = (((M \wedge C) + C)  \oplus C) \vee M$
    300276\item<3->Consider $M_2 = \text{MatchStar}(M_1, C)$
     
    330306\end{eqnarray*}
    331307The recursive equation is implemented with a while loop.
     308\end{frame}
     309
     310
     311\section{Unicode Regular Expression Matching with Parabix}
     312\begin{frame}
     313\frametitle{Unicode}
     314\begin{center}
     315\Huge UTF-8 Regular Expression Matching
     316\end{center}
     317\end{frame}
     318
     319\begin{frame}[fragile]
     320\frametitle{UTF-8 Byte Classification and Validation}
     321
     322\begin{eqnarray*}
     323\mbox{ASCII} & = & \mbox{CharClass}(\verb:[\x00-\x7F]:) \\
     324\mbox{Prefix} & = & \mbox{CharClass}(\verb:[\xC2-\F4]:) \\
     325\mbox{Pfx3or4} & = & \mbox{CharClass}(\verb:[\xE0-\xF4]:) \\
     326\mbox{Prefix4} & = & \mbox{CharClass}(\verb:[\xF0-\xF4]:) \\
     327\mbox{Suffix} & = & \mbox{CharClass}(\verb:[\x80-\xBF]:) \\
     328\mbox{Scope} & = & \mbox{Advance}(\mbox{Prefix}) \vee \mbox{Advance}(\mbox{Pfx3or4},2) \vee \mbox{Advance}(\mbox{Prefix4}, 3) \\
     329\mbox{Mismatch} & = & \mbox{Scope} \oplus \mbox{Suffix} \\
     330\mbox{Initial} & = & \mbox{ASCII} \vee \mbox{Prefix} \\
     331\mbox{NonFinal} & = & \mbox{Prefix} \vee \mbox{Advance}(\mbox{Pfx3or4}) \vee \mbox{Advance}(\mbox{Prefix4}, 2)
     332\end{eqnarray*}
     333\end{frame}
     334
     335
     336\begin{frame}[fragile]
     337\frametitle{UTF-8 Matching Operations}
     338
     339Advance and Matchstar can both be adapted for UTF-8 matching.
     340Let U8Class($U$) be the stream marking the {\em final} UTF-8 byte position of
     341occurrences of members of the class.
     342
     343\begin{eqnarray*}
     344\mbox{Match}(m, U) & = & \mbox{Advance}(\mbox{ScanThru}(m, \mbox{NonFinal}) \wedge \mbox{U8Class}(U)) \\
     345\mbox{Match}(m, U*) & = & \mbox{MatchStar}(m, \mbox{U8Class}(U) \vee \mbox{NonFinal}) \wedge \mbox{Initial}\\
     346\mbox{ScanThru}(m, C) & = & (m + C) \wedge \neg C\\
     347\end{eqnarray*}
    332348\end{frame}
    333349
Note: See TracChangeset for help on using the changeset viewer.