source: docs/IUC40/iuc40pres.tex @ 5789

Last change on this file since 5789 was 5201, checked in by cameron, 3 years ago

IUC 40 presentation

File size: 26.2 KB
Line 
1\documentclass{beamer}
2\usepackage{comment}
3\usepackage{color}
4\usefonttheme{professionalfonts} % Tell beamer to not override my font selection.
5\usepackage{mathtools}
6\usepackage{subfigure}
7%\usepackage[default]{ucs}
8%\usepackage[utf8x]{inputenc}
9\usepackage{fontspec}
10\usepackage{verbatim}
11\usepackage{colortbl}
12\usepackage{tikz}
13\usetikzlibrary{shapes,positioning,arrows,calc,fadings}
14\usepackage{pgfplots}
15\usepackage{pgfplotstable}
16%\pgfplotsset{compat=1.13}
17%\setmainfont{Unifont}
18\usetheme{Madrid}
19\AtBeginSection[]{
20        \begin{frame}<beamer>
21        \frametitle{Outline}
22        \tableofcontents[currentsection]
23        \end{frame}
24}
25
26\title[International Unicode Conference 40]{Further Advances in Parabix Technology for High-Performance Unicode}
27\author[Rob Cameron]{Robert D. Cameron}
28\institute[SFU]{School of Computing Science\\
29Simon Fraser University}
30\date{November 3, 2016}
31
32\begin{document}
33
34\begin{frame}
35\titlepage
36\end{frame}
37
38\begin{frame}
39\frametitle{Outline}
40\tableofcontents%[pausesections]
41\end{frame}
42
43\section{Introduction}
44\begin{frame}[fragile]
45\frametitle{Parabix Technology}
46\begin{block}<+->{Parabix Concept}
47
48\begin{itemize}
49\item Parabix employs {\em bitwise data parallelism} to achieve high-performance text processing.
50\item XML parsing [HPCA 2012].
51\item Regular expression matching [PACT 2014].
52\item Process 128 bytes at a time using 128-bit SSE2 registers on all Intel/AMD 64-bit processors.
53\item Process 256 bytes at a time using 256-bit AVX2 technology.
54\end{itemize}
55\end{block}
56
57\begin{block}<+->{Parabix Regular Expression Software}
58\begin{itemize}
59\item \verb:icgrep 1.0: employs Parabix methods in a full Unicode Level 1 ``grep'' search tool [IUC39, ICAPP2015].
60\item Gigabyte/sec regular expression search.
61\end{itemize}
62\end{block}
63\end{frame}
64
65\begin{frame}[fragile]
66\frametitle{Recent Advances}
67\begin{block}<+->{Parabix Toolchain}
68
69\begin{itemize}
70\item 100\% dynamic compilation to LLVM IR.
71\item Dynamic processor detection for AVX2.
72\item Can target NVPTX back end (Nvidia GPUs).
73\item Application construction using stream-processing kernels.
74\item Multicore processing using segmented pipeline parallelism.
75\end{itemize}
76\end{block}
77
78\begin{block}<+->{New Regular Expression Features}
79\begin{itemize}
80\item Unicode Name Reflection
81\item Property Boundary Experessions
82\end{itemize}
83\end{block}
84\end{frame}
85
86\begin{frame}[fragile]
87\frametitle{Technology Roadmap: Parabix OS}
88\begin{block}<+->{Parabix Shell plus Core Utilities}
89
90\begin{itemize}
91\item Parabix versions of grep, sed, awk, cut, wc, head, tail, join, ...
92\item Parabix shell: dynamic pipelining using pipeline parallelism.
93\item Goal: high performance OS for big data applications.
94\item Compression, transcoding, etc., built-in.
95\item Design for use with Linux or Darwin kernel.
96\end{itemize}
97\end{block}
98
99\begin{block}<+->{Parabix Components for Unicode}
100\begin{itemize}
101\item Integrate high-level Unicode awareness into all core utilities.
102\item Unicode regular expression support throughout.
103\item What else?  (A call for advice/collaboration!)
104\end{itemize}
105\end{block}
106\end{frame}
107
108\section{Parabix Regular Expression Matching}
109
110\begin{frame}[fragile]
111\frametitle{Bitwise Data Parallelism: Character Classes}
112Consider matching regexp \verb`A[a-z]*e;` against the input text below.
113
114First we make some character class bit streams.
115\begin{itemize}[<+->]
116\item<2->\verb:[A]:
117\item<3->\verb:[a-z]:
118\item<4->\verb:[e]:
119\item<5->\verb:[;]:
120\end{itemize}
121
122\begin{tabular}{cl}\\
123input data  &              \verb`Axe;  Apples; A badApple;  Accede; Ate!`\\
124\onslide<2->\verb:[A]: &   \verb`1.....1.......1....1.......1.......1...`\\
125\onslide<3->\verb:[a-z]: & \verb`.11....11111....111.1111....11111...11.`\\
126\onslide<4->\verb:[e]: &   \verb`..1.......1............1......1.1....1.`\\
127\onslide<5->\verb:[;]: &   \verb`...1........1...........1........1.....`\\
128\end{tabular}
129\end{frame}
130
131\begin{frame}[fragile]
132\frametitle{Regular Expression Matching Equations}
133Matching equations for \verb`A[a-z]*e,` produce {\em marker bit streams} 
134indicating positions after a match.
135\begin{itemize}[<+->]
136\item<2->$M_1 = \mbox{Advance}(\mbox{\tt [A]})$ --- {\em positions after \verb:[A]:}
137\item<3->$M_2 = \mbox{MatchStar}(M_1, \mbox{\tt [a-z]})$ --- {\em all positions matching \verb:A[a-z]*:}
138\item<4->$M_3 = \mbox{Advance}(M_2 \wedge \mbox{\tt [e]})$ --- {\em all positions matching  \verb:A[a-z]*e:}
139\item<5->$M_4 = \mbox{Advance}(M_3 \wedge \mbox{\tt [;]})$ --- {\em all positions matching  \verb:A[a-z]*e;:}
140\end{itemize}
141
142\begin{tabular}{cl}\\
143input data  &       \verb`Axe;  Apples; A badApple;  Accede; Ate!`\\
144\onslide<2->$M_1$ & \verb`.1.....1.......1....1.......1.......1...`\\
145\onslide<3->$M_2$ & \verb`.111...111111..1....11111...111111..111.`\\
146\onslide<4->$M_3$ & \verb`...1.......1............1......1.1....1.`\\
147\onslide<5->$M_4$ & \verb`....1....................1........1.....`
148\end{tabular}
149\end{frame}
150
151\begin{frame}[fragile]
152\frametitle{MatchStar Definition}
153\begin{itemize}[<+->]
154\item<1->$\text{MatchStar}(M, C) = (((M \wedge C) + C)  \oplus C) \vee M$
155\item<2->Consider $M_2 = \text{MatchStar}(M_1, \mbox{\tt [a-z]})$
156\item<3->Use addition to scan each marker through the class.
157\item<4->Bits that change represent matches.
158\item<5->We also have matches at start positions in $M_1$.
159\end{itemize}
160
161\begin{tabular}{cl}\\
162\onslide<2->input data  &                        \verb`Axe;  Apples; A badApple;  Accede; Ate!`\\
163\onslide<2->$M_1$ &                  \verb`.1.....1.......1....1.......1.......1...`\\
164\onslide<2->$C = \text{\tt [a-z]}$ & \verb`.11....11111....111.1111....11111...11..`\\
165\onslide<3->$T_0 = M_1 \wedge C$ &   \verb`.1.....1............1.......1.......1...`\\
166\onslide<3->$T_1 = T_0 + C$ &        \verb`...1........1...111.....1........1....1.`\\
167\onslide<4->$T_2 = T_1 \oplus C$ &   \verb`.111...111111.......11111...111111..111.`\\
168\onslide<5->$M_2 = T_2 \vee M_1$ &   \verb`.111...111111..1....11111...111111..111.`
169\end{tabular}
170\end{frame}
171
172
173\begin{frame}[fragile]
174\frametitle{Beyond Byte-At-A-Time}
175\begin{itemize}
176\item Byte-oriented data is first transformed to 8 parallel bit
177  streams (Parabix transform).
178\begin{itemize}
179\item Bit stream $j$ consists of bit $j$ of each byte.
180\item Uses SIMD pack operations on SSE2, AVX2, etc.
181\item About 0.5 CPU cycles/byte amortized cost (AVX2).
182\end{itemize}
183\item Compute character class bit streams with bitwise logic.
184\begin{itemize}
185\item \verb:[A], [a-z], [e], [;]: about 0.2 CPU cycles/byte (AVX2).
186\end{itemize}
187\item Implement matching using bitwise logic, shifting and addition.
188\begin{itemize}
189\item \verb:A[a-z]*e;: about 0.3 CPU cycles/byte (AVX2).
190\end{itemize}
191\item Performance beyond the best achievable by byte-at-a-time methods.
192\end{itemize}
193\end{frame}
194
195
196\begin{frame}[fragile]
197\frametitle{Unbounded Stream Abstraction}
198\begin{itemize}
199\item Program operations as if {\em all positions in the file are to
200    be processed simultaneously}.
201\item Unbounded bitwise parallelism.
202\item Pablo compiler technology maps to block-by-block processing.
203\item Information flows between blocks using carry bits.
204\item LLVM compiler infrastructure for Just-In-Time compilation.
205\item Custom LLVM improvements further accelerate processing.
206\end{itemize}
207\end{frame}
208
209
210\begin{frame}[fragile]
211\frametitle{icgrep Features}
212\begin{block}<+->{Unicode Level 1 (UTS \#18)}
213\begin{itemize}
214\item General category (e.g., \verb:\p{Lu}:), script (e.g. \verb:\p{Greek}:), and other core Unicode properties.
215\item Simple Unicode line breaks, word boundaries.
216\item Set operations, e.g., \verb:[\p{Greek}&&\p{Lu}]:
217\item Simple Unicode case-insensitive matching.
218\item Complete implementation of UTS \#18 level 1 requirements.
219\end{itemize}
220\end{block}
221\begin{block}<+->{Unicode Level 2}
222\begin{itemize}
223\item Grapheme clusters and grapheme cluster mode.
224\item Name property with regexp values \verb:\N{SMIL(E|ING)}: (emoji search!)
225\item Broad set of Unicode general, numeric,  identifier, case, normalization,
226  shaping, bidirectional, CJK and  other properties.
227\end{itemize}
228\end{block}
229\end{frame}
230
231\begin{frame}[fragile]
232\frametitle{Performance Study}
233\begin{itemize}
234\item Search for lines containing characters in computed sets.
235\item Ex: \verb'[\p{Greek}&&\p{upper}]'
236\item Set expressions combine general category and script properties.
237\item Set intersection, union, difference, negation.
238\item Cover all general categories and scripts.
239\item 246 test expressions in all.
240\item Evaluate icgrep 1.0 vs. ugrep541 and pcre2grep.
241\item Measure performance against a large collection of Wikimedia
242  documents.
243\end{itemize}
244\end{frame}
245
246\begin{frame}
247\frametitle{Search Time Comparison}
248\begin{center}
249\pgfplotstableread[col sep = comma]{data/icgrep-scatter.csv}\icgrep
250\pgfplotstableread[col sep = comma]{data/ugrep541-scatter.csv}\ugrep
251\pgfplotstableread[col sep = comma]{data/pcre2grep-scatter.csv}\pcre
252
253\begin{tikzpicture}
254\begin{axis}[
255grid=both,
256x tick label style={/pgf/number format/1000 sep=},
257ylabel={CPU Cycles Per Byte},
258xlabel={Percentage of Matching Lines},
259minor y tick num={1},
260xmax=100,
261height=0.5\textwidth,
262legend style={at={(1.05,0.5)},
263anchor=west,legend columns=1,
264align=left,draw=none,column sep=2ex}
265]
266\addplot+[no markers,line width=2pt,color=blue!60,solid] table {\icgrep};
267\addplot+[no markers,line width=2pt,color=red!60,solid] table {\ugrep};
268\addplot+[no markers,line width=2pt,color=brown!60,solid] table {\pcre};
269\legend{icgrep1.0,ugrep541,pcre2grep}
270\end{axis}
271
272
273\end{tikzpicture}
274\end{center}
275\end{frame}
276
277\section{Parabix Platform Development}
278\begin{frame}[fragile]
279\frametitle{Parabix As a Platform}
280\begin{block}<+->{The Questions}
281\begin{itemize}
282\item Is icgrep an interesting one-off application or just one example of a general approach?
283\item How can Parabix techniques be assembled into a programming framework
284for high-performance applications?
285\item How do we produce high-quality high-peformance software without tedious, error-prone and non-portable use of SIMD intrinsics?
286
287\item Can the legacy conventions of Posix systems be systematically updated to create
288an OS with first-class Unicode support?
289\end{itemize}
290\end{block}
291\begin{block}<+->{Human Resource Issues}
292\begin{itemize}
293\item Where can we find software engineers capable of Parabix development?
294\item Unfortunately, open source is not the answer.
295\item Graduate students can only provide part of the solution.
296\end{itemize}
297\end{block}
298
299
300\end{frame}
301
302\begin{frame}[fragile]
303\frametitle{Lessons Learned}
304\begin{block}<+->{Avoid Sequential APIs} 
305\begin{itemize}
306\item Traditional regexp engines provide programmability through APIs.
307\item But APIs are sequential, fundamentally limiting performance.
308\item Do not repeat the mistake of icXML.
309\begin{itemize}
310\item Heroic SIMD/Parabix acceleration of Xerces C++ XML parser.
311\item Fighting Amdahl's Law all the way.
312\item 2X speedup achieved.
313\item Fundamentally limited by Xerces C++ APIs.
314\end{itemize}
315\end{itemize}
316\end{block}
317
318\begin{block}<+->{Core Utilities} 
319\begin{itemize}
320\item Consider core utilities, composed with Unix pipes.
321\item Dynamic compilation allows efficiencies in pipe stage composition.
322\item Fundamentally parallel core utilities avoid sequential bottlenecks.
323\end{itemize}
324\end{block}
325\end{frame}
326
327
328\begin{frame}[fragile]
329\frametitle{Parabix Languages and Compilers}
330\begin{block}<+->{Languages}
331\begin{itemize}
332\item Grammars: regexps, character classes, Unicode properties.
333\item Pablo stream language: operations on arbitrary-length streams.
334\item LLVM IR: high-level assembly language for stream processing {\em kernels}.
335\end{itemize}
336\end{block}
337
338\begin{block}<+->{Compilers} 
339\begin{itemize}
340\item Character class compiler: produce Pablo code for any explicit character class.
341\item Unicode property compiler: Pablo code for any Unicode property.
342\item Regexp compiler: produce Pablo code for any regular expression.
343\item Pablo compiler: produce Pablo Kernels in LLVM IR.
344\item Kernel Pipeline Compilers: produce IR from a chain of kernels.
345\item LLVM JIT code generators translate IR to executable code.
346\end{itemize}
347\end{block}
348\end{frame}
349
350
351\begin{frame}[fragile]
352\frametitle{Kernels}
353\begin{block}<+->{Kernel Structure}
354\begin{itemize}
355\item Kernels are computational abstractions for text stream processing.
356\item Kernels process input stream sets, producing output stream sets.
357\end{itemize}
358\end{block}
359
360\begin{block}<+->{Transposition Kernel} 
361\begin{itemize}
362\item Input: $1 \times \mbox{\tt i8}$: a single stream of 8-bit code units (e.g., UTF-8).
363\item Output: $8 \times \mbox{\tt i1}$: a set 8 of parallel bit streams (basis bit streams).
364\end{itemize}
365\end{block}
366
367\begin{block}<+->{Transposition Subkernels} 
368\begin{itemize}
369\item Transposition can actually be divided into 3 stages.
370\item Stage 1: $1 \times \mbox{\tt i8}$: to $2 \times \mbox{\tt i4}$ (2 streams of nybbles).
371\item Stage 2: $2 \times \mbox{\tt i4}$: to $4 \times \mbox{\tt i2}$ (4 streams of bit-pairs).
372\item Stage 3: $4 \times \mbox{\tt i2}$: to $8 \times \mbox{\tt i1}$ (basis bit streams).
373\end{itemize}
374\end{block}
375\end{frame}
376
377\begin{frame}[fragile]
378\frametitle{Regular Expression Kernels}
379\begin{block}<+->{Character Class Kernels}
380\begin{itemize}
381\item Kernel for the character classes of a regexp: e.g., \verb:a[0-9]*[z9]:
382\item Input: $8 \times \mbox{\tt i1}$: the 8 basis bit streams.
383\item Output: $3 \times \mbox{\tt i1}$: 3 bit streams for \verb:[a]:, \verb:[0-9]:, \verb:[z9]:%, \verb:[\n]:
384\item Dynamically generated by the Parabix Character Class compiler.
385\end{itemize}
386\end{block}
387
388\begin{block}<+->{Matching Logic Kernels}
389\begin{itemize}
390\item Kernel for the matching logic: e.g., \verb:a[0-9]*[z9]:
391\item Input: $3 \times \mbox{\tt i1}$: character class streams
392\item Output: $1 \times \mbox{\tt i1}$: a bit stream of matches found.
393\item Dynamically generated by the Parabix Regular Expression compiler.
394\end{itemize}
395\end{block}
396\end{frame}
397
398\begin{frame}[fragile]
399\frametitle{The icgrep Kernels}
400\begin{block}<+->{Line Break Kernel}
401\begin{itemize}
402\item Kernel for Unicode line breaks
403\item Input: $8 \times \mbox{\tt i1}$: the 8 basis bit streams.
404\item Output: $1 \times \mbox{\tt i1}$: line breaks for any of LF, CR, CRLF, LS, PS, $\cdots$
405\end{itemize}
406\end{block}
407
408\begin{block}<+->{Match Scanning Kernel}
409\begin{itemize}
410\item Kernel to generate matched lines.
411\item Three inputs:
412\begin{itemize}
413\item $1 \times \mbox{\tt i8}$: source byte stream
414\item $1 \times \mbox{\tt i1}$: matches bit stream
415\item $1 \times \mbox{\tt i1}$: line break bit stream
416\end{itemize}
417\item Output: $1 \times \mbox{\tt i8}$ matched line output stream.
418\end{itemize}
419\end{block}
420\end{frame}
421
422\begin{frame}[fragile]
423\frametitle{Parallel Deletion Kernels}
424\begin{block}<+->{Bit Stream Compression Kernel}
425\begin{itemize}
426\item Two inputs:
427\begin{itemize}
428\item {\tt Nxi1}: bit streams to compress
429\item $1 \times \mbox{\tt i1}$: deletion mask stream
430\end{itemize}
431\item Output: {\tt Nxi1}: compressed output streams
432\item Example:
433\begin{tabular}{cl}\\
434input[1]  &              \verb`10101000101010101011` \\
435input[2]  &              \verb`11100111100000110001` \\
436deletion mask &               \verb`00111000001101000110`\\
437output[1]  &              \verb`100001011011` \\
438output[2]  &              \verb`111111001101` \\
439\end{tabular}
440
441\item Provides a general approach to stream filtering.
442\end{itemize}
443\end{block}
444\end{frame}
445
446
447\begin{frame}[fragile]
448\frametitle{The icgrep pipeline}
449\begin{block}<+->{A 5-Stage Pipeline}
450\begin{itemize}
451\item BasisBits = Transpose(ByteData)
452\item LineBreaks = UnicodeLineBreaks(BasiBits)
453\item CharacterClasses = CC\_compiler<regexp>(BasisBits)
454\item MatchPositions = RE\_compiler<regexp>(CharacterClasses)
455\item FinalOutput = MatchScanner(ByteData, LineBreaks, MatchPositions)
456\end{itemize}
457\end{block}
458
459\begin{block}<+->{Pipeline Compilation}
460\begin{itemize}
461\item The Parabix pipeline compiler builds the complete icgrep runtime.
462\item Buffers are allocated for all streams.
463\item Internal states allocated for all kernels.
464\item Kernels are compiled to process data in defined buffers.
465\item icgrep, word count, u8u16 now compile with this structure.
466\end{itemize}
467\end{block}
468\end{frame}
469
470\begin{frame}[fragile]
471\frametitle{Experimental Pipeline Compilers}
472\begin{block}<+->{Pipeline Parallel Compiler}
473\begin{itemize}
474\item Each kernel is compiled to a separate thread function.
475\item Kernels are synchronized through producer/consumer positions.
476\item Lock-free synchronization through monotonic positions.
477\item Functional, but balance between pipeline stages is problematic.
478\end{itemize}
479\end{block}
480
481\begin{block}<+->{NVPTX Pipeline Compiler}
482\begin{itemize}
483\item Kernels compiled to PTX code to run on NVidia GPUs.
484\item Can now compile first 4 icgrep stages to GPU.
485\item Currently only a single workgroup of 64 threads: 4096 position SIMT.
486\item MatchedLineScanner compiles to CPU.
487\end{itemize}
488\end{block}
489\end{frame}
490
491
492
493\begin{frame}[fragile]
494\frametitle{Segmented Pipeline Parallelism}
495\begin{block}<+->{Combined Data and Pipeline Parallelism}
496\begin{itemize}
497\item Input divided into logical segments.
498\item Allocate segments to $P$ cores in round robin fashion.
499\item Core $i$ responsible for all segments $n$ such that $n \mod P = i$.
500\item Each core executes a full pipeline for its segment.
501\item For any pipeline stage $s$ and segment $i+1$, core $(i+1) \mod P$ can proceed as
502soon as core $i \mod P$ completes stage $i$.
503\item Workload balanced between cores as long as no stage requires more than $1/P$ of the total time to process a segment.
504\item In progress $\cdots$
505\end{itemize}
506\end{block}
507\end{frame}
508
509
510\section{New Unicode Regular Expression Features}
511\begin{frame}[fragile]
512\frametitle{Proposal 1: Unicode Name Reflection}
513\begin{block}<+->{Loose Matches: Case Insensitivity}
514
515\begin{itemize}
516
517\item Simple caseless matching: \verb'(?i:*Viagara*)'
518
519\item Matches
520\verb:*viagara*:, \verb:*Viagara*:, \verb:*ViAgArA*:, $\cdots$
521\end{itemize}
522\end{block}
523
524\begin{block}<+->{Loose Matches: Name Reflection}
525
526\begin{itemize}
527\item Use \verb:N: mode flag: \verb'(?N:*Viagara*)'
528\item Each literal regexp character in \verb:N: mode is replaced by its name pattern expression within
529word boundaries.
530\item \verb;(?N:V); = \verb:\p{name=/\bLATIN CAPITAL LETTER V\b/}:, matching:
531\begin{itemize}
532        \item \verb`CIRCLED LATIN CAPITAL LETTER V`: {\fontspec{Arial Unicode MS} â“‹}
533        \item \verb`LATIN CAPITAL LETTER V WITH HOOK`: {\fontspec{Arial Unicode MS} Æ²}
534        \item \verb`LATIN CAPITAL LETTER V WITH TILDE`: {\fontspec{Arial Unicode MS} á¹Œ}
535        \item \verb`NEGATIVE SQUARED LATIN CAPITAL LETTER V`: {\fontspec{Apple Symbols} ðŸ†…}
536    \item as well as V and 7 other codepoints.
537\end{itemize}
538\end{itemize}
539\end{block}
540\end{frame}
541
542\begin{frame}[fragile]
543\frametitle{Name Reflection}
544\begin{block}<+->{Caseless Name Reflection}
545
546\begin{itemize}
547
548\item Combine \verb:(?i): and \verb:(?N): modes, e.g., \verb'(?iN:*Viagara*)'
549
550\item Matches *viAgara*, {\fontspec{Arial Unicode MS} âœºá¹Œâ’€â“}{\fontspec{Geneva} êž¡}{\fontspec{Arial Unicode MS} áº­}{\fontspec{Hiragino Kaku Gothic ProN W3}🅁}a{\fontspec{Arial Unicode MS} âœ²}, $\cdots$
551
552\end{itemize}
553\end{block}
554
555\begin{block}<+->{Level 2 Name Reflection}
556
557\begin{itemize}
558
559\item Combined with grapheme cluster mode, \verb'(?gN)' matches to include named character sequences from {\tt NamedSequences.txt}.
560
561\item \verb;(?N:R); can also match the sequence: LATIN CAPITAL LETTER R WITH TILDE;0052 0303
562
563\item Also consider \verb'(?K)' compatibility mode flag.
564\end{itemize}
565\end{block}
566\end{frame}
567
568
569\begin{frame}[fragile]
570\frametitle{Future Extensions}
571
572\begin{block}<+->{Named Substring or Edited Name Reflection}
573
574\begin{itemize}
575
576\item Potential use case: extended loose name reflection ignoring the word \verb:LATIN:.
577
578\item Use \verb:///: match/replace syntax, possibly.
579
580\item For example \verb'(?N/LATIN//:a)' matches additional codepoints.
581\begin{itemize}
582
583\item 0430      CYRILLIC SMALL LETTER A
584\item 04D1      CYRILLIC SMALL LETTER A WITH BREVE
585\item 04D3      CYRILLIC SMALL LETTER A WITH DIAERESIS
586\item 2090      LATIN SUBSCRIPT SMALL LETTER A
587\item AB70      CHEROKEE SMALL LETTER A
588\item 104D8     OSAGE SMALL LETTER A
589\item 10CC0     OLD HUNGARIAN SMALL LETTER A
590\item 118C1     WARANG CITI SMALL LETTER A
591\end{itemize}
592
593\end{itemize}
594\end{block}
595\end{frame}
596
597
598
599\begin{frame}[fragile]
600\frametitle{Proposal 2: Property Boundary Expressions}
601
602\begin{block}<+->{Simple Boundary Expressions}
603
604\begin{itemize}
605
606\item For any binary property \verb:X:, \verb'\b{X}' is equivalent to:\\ \hspace{1cm} \verb'(?<\p{X})(?=\P{X})|(?<\P{X})(?=\p{X})'.
607
608\item Similarly, for any property-value combination \verb:X=a:, \verb'\b{X=a}' $\equiv$ \verb'(?<\p{X=a})(?=\P{X=a})|(?<\P{X=a})(?=\p{X=a})'.
609
610\item As usual, \verb:sc=: may be omitted for any script value, e.g. \verb'\b{Common}'.
611
612\item Also, \verb:gc=: may be omitted, except for \verb:\b{gc=l}: and  \verb:\b{gc=s}: (conflict with UTS \#18 line and sentence boundaries).
613\end{itemize}
614\end{block}
615
616
617\begin{block}<+->{Character Class Boundary Expressions}
618
619\begin{itemize}
620
621\item The concept extends to character classes, e.g.,
622\verb'\b{[a-f]}' $\equiv$ \verb'(?<[a-f])(?=[^a-f])|(?<[^a-f])(?=[a-f])'.
623
624\item The character class expression may involve properties, intersection, difference $\ldots$
625
626\end{itemize}
627\end{block}
628
629\end{frame}
630
631
632\begin{frame}[fragile]
633\frametitle{Property Boundary Expressions}
634
635\begin{block}<+->{Level 2: Generalized Boundary Expressions}
636\begin{itemize}
637
638\item Now consider the generalization for any property, for example 
639script boundaries \verb'\b{Script}'.
640
641\item Simple concept: the boundary assertion is
642satisfied if the property values of the characters on either side of the boundary are
643{\it different}.
644
645\item Equivalent regexp: a union of property boundary assertions for {\em all} values of the
646property.
647
648\item Use case: search for spoofing: \verb'\p{alpha}\b{Script}\p{alpha}'
649
650\item Parabix implementation straightforward:
651\begin{itemize}
652 \item Encode $N$ property values with $log_2 N$ bitstreams $v_i$.
653 \item Compute \[\bigvee_{i = 0}^{\lceil \log_2 N \rceil} v_i \oplus \textrm{Advance}(v_i) \]
654\end{itemize}
655\end{itemize}
656
657\end{block}
658\end{frame}
659
660\section{Parabix Components for Unicode}
661
662
663
664\begin{frame}[fragile]
665\frametitle{Unicode Property Support}
666\begin{block}<+->{Binary Property Kernels}
667\begin{itemize}
668\item Produced by Unicode property compiler.
669\item Input: $8 \times \mbox{\tt i1}$: basis bit streams (UTF-8) or {\tt 16 x i1} (UTF-16) for input text.
670\item Output: $1 \times \mbox{\tt i1}$: text positions (final code unit) for codepoints having the property.
671\end{itemize}
672\end{block}
673\begin{block}<+->{Enumerated Property Kernels}
674\begin{itemize}
675\item Produced by Unicode property compiler.
676\item For property=value syntax, similar to binary property: $1 \times \mbox{\tt i1}$ output.
677\item Property=/regexp/ syntax,  $1 \times \mbox{\tt i1}$ output.
678\item Full property information for enumeration of $N$ possible values.
679\begin{itemize}
680\item Can produce $N \times \mbox{\tt i1}$ separate streams.
681\item Multiplexed representation: $\lceil \log_2 N \rceil \times \mbox{\tt i1}$ streams.
682\end{itemize}
683\end{itemize}
684\end{block}
685\end{frame}
686
687\begin{frame}[fragile]
688\frametitle{Unicode Property Support}
689\begin{block}<+->{Codepoint Property Kernels}
690\begin{itemize}
691\item Codepoint properties: the value of a given property for an input codepoint $c$ is some other codepoint $c^{\prime}=P(c)$.
692\item Output: up to $21 \times \mbox{\tt i1}$: streams.
693\item Each stream $P_i$ indicates whether bit $i$ changes between input and output codepoint.
694\item Often fewer than 21 streams required.
695\item Can be used for parallel substitution: $y_i = x_i \oplus P_i$.
696\item Efficient simple-case folding.
697\end{itemize}
698\end{block}
699\end{frame}
700
701
702
703
704
705\begin{frame}[fragile]
706\frametitle{Transcoding Kernels}
707\begin{block}<+->{UTF-8 to UTF-16 Logic Kernel}
708\begin{itemize}
709\item Input: $8 \times \mbox{\tt i1}$: the 8 basis bit streams.
710\item Three outputs:
711\begin{itemize}
712\item $16 \times \mbox{\tt i1}$: UTF-16 parallel bit streams
713\item $1 \times \mbox{\tt i1}$: deletion mask stream
714\item $1 \times \mbox{\tt i1}$: UTF-8 error stream
715\end{itemize}
716\item Only one logical output code unit position for 2 or 3 byte UTF-8 sequence, 2 positions for 4-byte sequences.
717\item Deletion mask marks positions to be removed from output stream.
718\end{itemize}
719\end{block}
720\end{frame}
721
722
723
724\begin{frame}[fragile]
725\frametitle{Transformation Kernels (Future)}
726\begin{block}<+->{Case Conversion Notes}
727\begin{itemize}
728\item Apply simple case conversion using parallel logic.
729\item Compute bit stream marking positions for multicharacter conversions.
730\item Bit stream to position index conversion using bit scan instructions.
731\item Sequential application of multicharacter conversions only at required positions.
732\end{itemize}
733\end{block}
734\begin{block}<+->{Normalization Notes}
735\begin{itemize}
736\item Parallel calculation of quick-check properties.
737\item Skip further processing when all positions within a block yield Yes.
738\item Compute compressed ccc property value streams (57 possible values: 6 streams).
739\item Sequential application of normalization where required.
740\end{itemize}
741\end{block}
742\end{frame}
743
744\section{Conclusion}
745
746\begin{frame}[fragile]
747\frametitle{Conclusion}
748\begin{block}<+->{Performance}
749\begin{itemize}
750\item Parabix methods deliver excellent performance for Unicode regular expression matching.
751\item Performance improves with SIMD instruction set advances: AVX2, AVX-512.
752\item Multicore acceleration can use segmented pipeline parallelism.
753\end{itemize}
754\end{block}
755\begin{block}<+->{Programming Framework}
756\begin{itemize}
757\item Programming interface based on kernels, pipelines and compilers.
758\item Kernels are function on arbitrary-length stream sets.
759\item Kernels are composed to into pipelines to yield applications.
760\item Pipeline compilers provide for buffer management and synchronization.
761\item Compilers put it all together.
762\end{itemize}
763\end{block}
764\end{frame}
765
766\begin{frame}[fragile]
767\frametitle{Ongoing/Future Work}
768\begin{block}<+->{Towards Parabix OS}
769\begin{itemize}
770\item Use Unix pipes and files model, with Parabix under the hood.
771\item Parabix shell can dynamically compile pipelines.
772\begin{itemize}
773        \item Avoid costs of inverse transposition followed by transposition.
774        \item Only compute needed bit streams, e.g., decompression followed by grep.
775\item Unicode support can be built-in to all new tools.
776\end{itemize}
777\end{itemize}
778\end{block}
779\begin{block}<+->{Collaboration Opportunities}
780\begin{itemize}
781\item Parabix Technology is open source: \verb`parabix.costar.sfu.ca`.
782\item cameron@sfu.ca.
783\item 2017 Sabbatical: seeking invitations.
784\end{itemize}
785\end{block}
786\end{frame}
787
788
789\end{document}
Note: See TracBrowser for help on using the repository browser.