Changeset 3051


Ignore:
Timestamp:
Apr 19, 2013, 3:49:32 PM (6 years ago)
Author:
cameron
Message:

More tables

Location:
docs/Balisage13
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • docs/Balisage13/Bal2013came0601/Bal2013came0601.html

    r3050 r3051  
     1<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
    12<html lang="en">
    23<head>
    34<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
    45<title></title>
    5 <link rel="stylesheet" href="balisage-plain.css" type="text/css">
     6<link rel="stylesheet" href="balisage-proceedings.css" type="text/css">
    67<meta name="keywords" content="">
    78</head>
     
    1011<i>Balisage:</i> <small>The Markup Conference</small>
    1112</h1></div>
    12 <div lang="en" class="article">
    13 <div class="titlepage">
    14 <h2 class="article-title" id="idp30464"></h2>
     13<html lang="en">
     14<head>
     15<title></title>
     16<link rel="stylesheet" href="balisage-proceedings.css" type="text/css">
     17<meta name="generator" content="Balisage Conference Proceedings XSLT (v1.2)">
     18<script type="text/javascript">
     19    function toggle(folderID) {
     20      folder = document.getElementById("folder-"+folderID);
     21      icon = document.getElementById("icon-"+folderID)
     22      // need to:
     23      //   switch folder.style.display between 'none' and 'block'
     24      //   switch between collapse and expand icons
     25      if (folder.style.display != "block") {
     26        folder.style.display = "block";
     27        icon.src = "minus.png" ;
     28        icon.alt = "collapse" ;
     29      }
     30      else {
     31        folder.style.display = "none";
     32        icon.src = "plus.png" ;
     33        icon.alt = "expand" ;
     34      };
     35      return;
     36    }
     37
     38   function hidecite(citeID) {
     39     cite = document.getElementById(citeID);
     40     cite.style.display = "none";
     41     return;
     42   }
     43   
     44   function showcite(citeID,anchorID) {
     45     cite = document.getElementById(citeID);
     46
     47     citeLeft = cite.style.left;
     48     citeTop = cite.style.top;
     49     
     50     if (citeLeft != (getLeft(anchorID)+"px") ||
     51         citeTop != (getTop(anchorID)+"px")) {
     52       cite.style.display = "none";
     53     }
     54     
     55     if (cite.style.display != "table-cell") {
     56        movebox(citeID, anchorID);
     57        cite.style.display = "table-cell";
     58     }
     59     else {
     60       cite.style.display = "none";
     61     };
     62     return;
     63   }
     64
     65   function movebox(citeID, anchorID) {
     66
     67     cite = document.getElementById(citeID);
     68     
     69     // alert(cite.offsetWidth + " by " + cite.offsetHeight)
     70     
     71     horizontalOffset = getLeft(anchorID);
     72     // horizontalOffset = (inMain(anchorID)) ?
     73     // (horizontalOffset - 260) : (horizontalOffset + 20)
     74     // (horizontalOffset - (20 + cite.offsetWidth)) : (horizontalOffset + 20)
     75
     76     verticalOffset = getTop(anchorID);
     77     // verticalOffset = (inMain(anchorID)) ?
     78     // (verticalOffset - 20) : (verticalOffset + 20)
     79     // (verticalOffset - (20 + cite.offsetHeight)) : (verticalOffset + 20)
     80
     81     /*
     82     horizontalOffset = getAbsoluteLeft(anchorID) - getScrollLeft(anchorID) + 20;
     83     if (inMain(anchorID)) {
     84       horizontalOffset = horizontalOffset - 300;
     85     }
     86     verticalOffset = getAbsoluteTop(anchorID) - getScrollTop(anchorID) - 40;
     87     if (inMain(anchorID)) {
     88       verticalOffset = verticalOffset - 300;
     89     }
     90     */
     91     
     92     cite.style.left = horizontalOffset + "px";
     93     cite.style.top = verticalOffset + "px";
     94   }
     95   
     96   function getLeft(objectID) {
     97     var left = getAbsoluteLeft(objectID) - getScrollLeft(objectID);
     98     left = (inMain(objectID)) ? (left - 260) : (left + 20)   
     99     return left;
     100   }
     101   
     102   function getTop(objectID) {
     103     var top = getAbsoluteTop(objectID) - getScrollTop(objectID);
     104     top = (inMain(objectID)) ? (top - 50) : (top + 20)
     105     return top;     
     106   }
     107   
     108   function getAbsoluteLeft(objectId) {
     109   // Get an object left position from the upper left viewport corner
     110     o = document.getElementById(objectId)
     111     oLeft = o.offsetLeft            // Get left position from the parent object
     112     while(o.offsetParent!=null) {   // Parse the parent hierarchy up to the document element
     113       oParent = o.offsetParent    // Get parent object reference
     114       oLeft += oParent.offsetLeft // Add parent left position
     115       o = oParent
     116      }
     117    return oLeft
     118    }
     119
     120    function getAbsoluteTop(objectId) {
     121    // Get an object top position from the upper left viewport corner
     122      o = document.getElementById(objectId)
     123      oTop = o.offsetTop            // Get top position from the parent object
     124      while(o.offsetParent!=null) { // Parse the parent hierarchy up to the document element
     125        oParent = o.offsetParent  // Get parent object reference
     126        oTop += oParent.offsetTop // Add parent top position
     127        o = oParent
     128      }
     129    return oTop
     130    }
     131
     132   function getScrollLeft(objectId) {
     133     // Get a left scroll position
     134     o = document.getElementById(objectId)
     135     oLeft = o.scrollLeft            // Get left position from the parent object
     136     while(o.offsetParent!=null) {   // Parse the parent hierarchy up to the document element
     137       oParent = o.offsetParent    // Get parent object reference
     138       oLeft += oParent.scrollLeft // Add parent left position
     139       o = oParent
     140      }
     141    return oLeft
     142    }
     143
     144    function getScrollTop(objectId) {
     145    // Get a right scroll position
     146      o = document.getElementById(objectId)
     147      oTop = o.scrollTop            // Get top position from the parent object
     148      while(o.offsetParent!=null) { // Parse the parent hierarchy up to the document element
     149        oParent = o.offsetParent  // Get parent object reference
     150        oTop += oParent.scrollTop // Add parent top position
     151        o = oParent
     152      }
     153    return oTop
     154    }
     155
     156    function inMain(objectId) {
     157    // returns true if in div#main
     158      o = document.getElementById(objectId)
     159      while(o.parentNode != null) { // Parse the parent hierarchy up to div#main
     160        oParent = o.parentNode
     161        if (o.id == "main") { return true; }
     162        o = oParent;
     163      }
     164    return false;
     165    }
     166
     167
     168   /*
     169   function showcite(citeID) {
     170      cite = document.getElementById(citeID);
     171      if (cite.style.display != "table-cell") {
     172        cite.style.display = "table-cell";
     173      }
     174      else {
     175        cite.style.display = "none";
     176      };
     177      return;
     178    }
     179    */
     180
     181      </script>
     182</head>
     183<body>
     184<div class="inline-citation" id="cite-XMLChip09" style="display:none;width: 240px">
     185<a class="quiet" href="javascript:hidecite('cite-XMLChip09')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">Leventhal, Michael and
     186         Eric Lemoine 2009. The XML chip at 6 years. Proceedings of International Symposium on
     187         Processing XML Efficiently 2009, Montréal.</p>
     188</div>
     189<div class="inline-citation" id="cite-Datapower09" style="display:none;width: 240px">
     190<a class="quiet" href="javascript:hidecite('cite-Datapower09')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">Salz, Richard,
     191         Heather Achilles, and David Maze. 2009. Hardware and software trade-offs in the IBM
     192         DataPower XML XG4 processor card. Proceedings of International Symposium on Processing XML
     193         Efficiently 2009, Montréal.</p>
     194</div>
     195<div class="inline-citation" id="cite-PPoPP08" style="display:none;width: 240px">
     196<a class="quiet" href="javascript:hidecite('cite-PPoPP08')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">Cameron, Robert D. 2007. A Case Study
     197         in SIMD Text Processing with Parallel Bit Streams UTF-8 to UTF-16 Transcoding. Proceedings
     198         of 13th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming 2008, Salt
     199         Lake City, Utah. On the Web at <a href="http://research.ihost.com/ppopp08/" class="link" target="_new">http://research.ihost.com/ppopp08/</a>.</p>
     200</div>
     201<div class="inline-citation" id="cite-CASCON08" style="display:none;width: 240px">
     202<a class="quiet" href="javascript:hidecite('cite-CASCON08')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">Cameron, Robert D.,
     203         Kenneth S Herdy, and Dan Lin. 2008. High Performance XML Parsing Using Parallel Bit Stream
     204         Technology. Proceedings of CASCON 2008. 13th ACM SIGPLAN Symposium on Principles and
     205         Practice of Parallel Programming 2008, Toronto.</p>
     206</div>
     207<div class="inline-citation" id="cite-SVGOpen08" style="display:none;width: 240px">
     208<a class="quiet" href="javascript:hidecite('cite-SVGOpen08')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">Herdy, Kenneth
     209         S., Robert D. Cameron and David S. Burggraf. 2008. High Performance GML to SVG
     210         Transformation for the Visual Presentation of Geographic Data in Web-Based Mapping Systems.
     211         Proceedings of SVG Open 6th International Conference on Scalable Vector Graphics,
     212         Nuremburg. On the Web at
     213            <a href="http://www.svgopen.org/2008/papers/74-HighPerformance_GML_to_SVG_Transformation_for_the_Visual_Presentation_of_Geographic_Data_in_WebBased_Mapping_Systems/" class="link" target="_new">http://www.svgopen.org/2008/papers/74-HighPerformance_GML_to_SVG_Transformation_for_the_Visual_Presentation_of_Geographic_Data_in_WebBased_Mapping_Systems/</a>.</p>
     214</div>
     215<div class="inline-citation" id="cite-Ross06" style="display:none;width: 240px">
     216<a class="quiet" href="javascript:hidecite('cite-Ross06')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">Ross, Kenneth A. 2006. Efficient hash
     217         probes on modern processors. Proceedings of ICDE, 2006. ICDE 2006, Atlanta. On the Web at
     218            <a href="www.cs.columbia.edu/~kar/pubsk/icde2007.pdf" class="link" target="_new">www.cs.columbia.edu/~kar/pubsk/icde2007.pdf</a>.</p>
     219</div>
     220<div class="inline-citation" id="cite-ASPLOS09" style="display:none;width: 240px">
     221<a class="quiet" href="javascript:hidecite('cite-ASPLOS09')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">Cameron, Robert D. and Dan
     222         Lin. 2009. Architectural Support for SWAR Text Processing with Parallel Bit Streams: The
     223         Inductive Doubling Principle. Proceedings of ASPLOS 2009, Washington, DC.</p>
     224</div>
     225<div class="inline-citation" id="cite-Wu08" style="display:none;width: 240px">
     226<a class="quiet" href="javascript:hidecite('cite-Wu08')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">Wu, Yu, Qi Zhang, Zhiqiang Yu and
     227         Jianhui Li. 2008. A Hybrid Parallel Processing for XML Parsing and Schema Validation.
     228         Proceedings of Balisage 2008, Montréal. On the Web at
     229            <a href="http://www.balisage.net/Proceedings/vol1/html/Wu01/BalisageVol1-Wu01.html" class="link" target="_new">http://www.balisage.net/Proceedings/vol1/html/Wu01/BalisageVol1-Wu01.html</a>.</p>
     230</div>
     231<div class="inline-citation" id="cite-u8u16" style="display:none;width: 240px">
     232<a class="quiet" href="javascript:hidecite('cite-u8u16')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">u8u16 - A High-Speed UTF-8 to UTF-16
     233         Transcoder Using Parallel Bit Streams Technical Report 2007-18. 2007. School of Computing
     234         Science Simon Fraser University, June 21 2007.</p>
     235</div>
     236<div class="inline-citation" id="cite-XML10" style="display:none;width: 240px">
     237<a class="quiet" href="javascript:hidecite('cite-XML10')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">Extensible Markup Language (XML) 1.0 (Fifth
     238         Edition) W3C Recommendation 26 November 2008. On the Web at
     239            <a href="http://www.w3.org/TR/REC-xml/" class="link" target="_new">http://www.w3.org/TR/REC-xml/</a>.</p>
     240</div>
     241<div class="inline-citation" id="cite-Unicode" style="display:none;width: 240px">
     242<a class="quiet" href="javascript:hidecite('cite-Unicode')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">The Unicode Consortium. 2009. On the Web at
     243            <a href="http://unicode.org/" class="link" target="_new">http://unicode.org/</a>.</p>
     244</div>
     245<div class="inline-citation" id="cite-Pex06" style="display:none;width: 240px">
     246<a class="quiet" href="javascript:hidecite('cite-Pex06')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex"> Hilewitz, Y. and Ruby B. Lee.
     247         2006. Fast Bit Compression and Expansion with Parallel Extract and Parallel Deposit
     248         Instructions. Proceedings of the IEEE 17th International Conference on Application-Specific
     249         Systems, Architectures and Processors (ASAP), pp. 65-72, September 11-13, 2006.</p>
     250</div>
     251<div class="inline-citation" id="cite-InfoSet" style="display:none;width: 240px">
     252<a class="quiet" href="javascript:hidecite('cite-InfoSet')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">XML Information Set (Second Edition) W3C
     253         Recommendation 4 February 2004. On the Web at
     254         <a href="http://www.w3.org/TR/xml-infoset/" class="link" target="_new">http://www.w3.org/TR/xml-infoset/</a>.</p>
     255</div>
     256<div class="inline-citation" id="cite-Saxon" style="display:none;width: 240px">
     257<a class="quiet" href="javascript:hidecite('cite-Saxon')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">SAXON The XSLT and XQuery Processor. On the Web
     258         at <a href="http://saxon.sourceforge.net/" class="link" target="_new">http://saxon.sourceforge.net/</a>.</p>
     259</div>
     260<div class="inline-citation" id="cite-Kay08" style="display:none;width: 240px">
     261<a class="quiet" href="javascript:hidecite('cite-Kay08')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex"> Kay, Michael Y. 2008. Ten Reasons Why Saxon
     262         XQuery is Fast, IEEE Data Engineering Bulletin, December 2008.</p>
     263</div>
     264<div class="inline-citation" id="cite-AElfred" style="display:none;width: 240px">
     265<a class="quiet" href="javascript:hidecite('cite-AElfred')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex"> The Ælfred XML Parser. On the Web at
     266            <a href="http://saxon.sourceforge.net/aelfred.html" class="link" target="_new">http://saxon.sourceforge.net/aelfred.html</a>.</p>
     267</div>
     268<div class="inline-citation" id="cite-JNI" style="display:none;width: 240px">
     269<a class="quiet" href="javascript:hidecite('cite-JNI')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">Hitchens, Ron. Java NIO. O'Reilly, 2002.</p>
     270</div>
     271<div class="inline-citation" id="cite-Expat" style="display:none;width: 240px">
     272<a class="quiet" href="javascript:hidecite('cite-Expat')" style="font-size:90%"><img src="eks.png" alt="[x]" style="float:right;clear:both;margin:1px"></a><p style="margin:0ex">The Expat XML Parser.
     273            <a href="http://expat.sourceforge.net/" class="link" target="_new">http://expat.sourceforge.net/</a>.</p>
     274</div>
     275<div id="mast"><div class="content">
     276<h2 class="article-title" id="idp74624"></h2>
    15277<div class="author">
    16278<h3 class="author">Nigel Medforth</h3>
     
    61323<h5 class="author-email"><code class="email">&lt;<a class="email" href="mailto:"></a>&gt;</code></h5>
    62324</div>
    63 <div class="abstract">
    64 <p class="title"><b>Abstract</b></p>
    65 <p id="idp31888">Prior research on the acceleration of XML processing using SIMD and multi-core
     325<div class="mast-box">
     326<p class="title"><a href="javascript:toggle('idp75744')" class="quiet"><img class="toc-icon" src="plus.png" alt="expand" id="icon-idp75744"></a> <span onclick="javascript:toggle('idp75744');return true">Abstract</span></p>
     327<div class="folder" id="folder-idp75744" style="display:none"><p id="idp76048">Prior research on the acceleration of XML processing using SIMD and multi-core
    66328            parallelism has lead to a number of interesting research prototypes. This work
    67329            investigates the extent to which the techniques underlying these prototypes could result
     
    72334            an increase in parsing speed of at least 50% was observed in a range of applications.
    73335            When coupled with pipeline parallelism on dual core processors, improvements of 2x and
    74             beyond were realized. </p>
    75 </div>
    76 <hr>
     336            beyond were realized. </p></div>
    77337</div>
    78338<div class="toc">
    79339<p><b>Table of Contents</b></p>
    80340<dl>
    81 <dt><span class="section"><a href="#idp240288" class="toc">Introduction</a></span></dt>
    82 <dt><span class="section"><a href="#idp242080" class="toc">Background</a></span></dt>
     341<dt><span class="section"><a href="#idp284352" class="toc">Introduction</a></span></dt>
     342<dt><span class="section"><a href="#idp286144" class="toc">Background</a></span></dt>
    83343<dd><dl>
    84 <dt><span class="section"><a href="#idp242720" class="toc">Xerces C++ Structure</a></span></dt>
    85 <dt><span class="section"><a href="#idp286496" class="toc">The Parabix Framework</a></span></dt>
    86 <dt><span class="section"><a href="#idm5136" class="toc">Sequential vs. Parallel Paradigm</a></span></dt>
     344<dt><span class="section"><a href="#idp286784" class="toc">Xerces C++ Structure</a></span></dt>
     345<dt><span class="section"><a href="#idp330512" class="toc">The Parabix Framework</a></span></dt>
     346<dt><span class="section"><a href="#idp404544" class="toc">Sequential vs. Parallel Paradigm</a></span></dt>
    87347</dl></dd>
    88 <dt><span class="section"><a href="#idm1376" class="toc">Architecture</a></span></dt>
     348<dt><span class="section"><a href="#idp408928" class="toc">Architecture</a></span></dt>
    89349<dd><dl>
    90 <dt><span class="section"><a href="#idp322064" class="toc">Overview</a></span></dt>
    91 <dt><span class="section"><a href="#idp349632" class="toc">Character Set Adapters</a></span></dt>
    92 <dt><span class="section"><a href="#idp358272" class="toc">Combined Parallel Filtering</a></span></dt>
    93 <dt><span class="section"><a href="#idp376928" class="toc">Content Stream</a></span></dt>
    94 <dt><span class="section"><a href="#idp387584" class="toc">Namespace Handling</a></span></dt>
    95 <dt><span class="section"><a href="#idp406880" class="toc">Error Handling</a></span></dt>
     350<dt><span class="section"><a href="#idp409568" class="toc">Overview</a></span></dt>
     351<dt><span class="section"><a href="#idp437296" class="toc">Character Set Adapters</a></span></dt>
     352<dt><span class="section"><a href="#idp446192" class="toc">Combined Parallel Filtering</a></span></dt>
     353<dt><span class="section"><a href="#idp474608" class="toc">Content Stream</a></span></dt>
     354<dt><span class="section"><a href="#idp485264" class="toc">Namespace Handling</a></span></dt>
     355<dt><span class="section"><a href="#idp528864" class="toc">Error Handling</a></span></dt>
    96356</dl></dd>
    97 <dt><span class="section"><a href="#idp417920" class="toc">Multithreading with Pipeline Parallelism</a></span></dt>
    98 <dt><span class="section"><a href="#idp440784" class="toc">Performance</a></span></dt>
     357<dt><span class="section"><a href="#idp539088" class="toc">Multithreading with Pipeline Parallelism</a></span></dt>
     358<dt><span class="section"><a href="#idp562720" class="toc">Performance</a></span></dt>
    99359<dd><dl>
    100 <dt><span class="section"><a href="#idp443504" class="toc">Xerces C++ SAXCount</a></span></dt>
    101 <dt><span class="section"><a href="#idp467808" class="toc">GML2SVG</a></span></dt>
     360<dt><span class="section"><a href="#idp565440" class="toc">Xerces C++ SAXCount</a></span></dt>
     361<dt><span class="section"><a href="#idp590288" class="toc">GML2SVG</a></span></dt>
    102362</dl></dd>
    103 <dt><span class="section"><a href="#idp486112" class="toc">Conclusion and Future Work</a></span></dt>
     363<dt><span class="section"><a href="#idp607904" class="toc">Conclusion and Future Work</a></span></dt>
    104364</dl>
    105365</div>
    106 <div class="section" id="idp240288">
     366<div class="mast-box">
     367<p class="title"><a href="javascript:toggle('idp77472')" class="linkbox"><img class="toc-icon" src="plus.png" alt="expand" id="icon-idp77472"></a> <span onclick="javascript:toggle('idp77472');return true">Nigel Medforth</span></p>
     368<div class="folder" id="folder-idp77472" style="display:none">
     369<h5 class="author-email"><code class="email">&lt;<a class="email" href="mailto:nmedfort@sfu.ca">nmedfort@sfu.ca</a>&gt;</code></h5>
     370<div class="affiliation">
     371<p class="jobtitle">Developer</p>
     372<p class="orgname">International Characters Inc.</p>
     373</div>
     374<div class="affiliation">
     375<p class="jobtitle">Graduate Student, School of Computing Science</p>
     376<p class="orgname">Simon Fraser University </p>
     377</div>
     378<div class="personblurb">
     379<p id="idp59168">Nigel Medforth is a M.Sc. student at Simon Fraser University and the lead
     380               developer of icXML. He earned a Bachelor of Technology in Information Technology at
     381               Kwantlen Polytechnic University in 2009 and was awarded the Dean’s Medal for
     382               Outstanding Achievement.</p>
     383<p id="idp60176">Nigel is currently researching ways to leverage both the Parabix framework and
     384               stream-processing models to further accelerate XML parsing within icXML.</p>
     385</div>
     386</div>
     387</div>
     388<div class="mast-box">
     389<p class="title"><a href="javascript:toggle('idp63840')" class="linkbox"><img class="toc-icon" src="plus.png" alt="expand" id="icon-idp63840"></a> <span onclick="javascript:toggle('idp63840');return true">Dan Lin</span></p>
     390<div class="folder" id="folder-idp63840" style="display:none">
     391<h5 class="author-email"><code class="email">&lt;<a class="email" href="mailto:lindanl@sfu.ca">lindanl@sfu.ca</a>&gt;</code></h5>
     392<div class="affiliation">
     393<p class="jobtitle">Graduate Student, School of Computing Science</p>
     394<p class="orgname">Simon Fraser University </p>
     395</div>
     396<div class="personblurb"><p id="idp65552">Dan Lin is a Ph.D student at Simon Fraser University. She earned a Master of Science
     397             in Computing Science at Simon Fraser University in 2010. Her research focus on on high
     398             performance algorithms that exploit parallelization strategies on various multicore platforms.
     399           </p></div>
     400</div>
     401</div>
     402<div class="mast-box">
     403<p class="title"><a href="javascript:toggle('idp68112')" class="linkbox"><img class="toc-icon" src="plus.png" alt="expand" id="icon-idp68112"></a> <span onclick="javascript:toggle('idp68112');return true">Kenneth Herdy</span></p>
     404<div class="folder" id="folder-idp68112" style="display:none">
     405<h5 class="author-email"><code class="email">&lt;<a class="email" href="mailto:ksherdy@sfu.ca">ksherdy@sfu.ca</a>&gt;</code></h5>
     406<div class="affiliation">
     407<p class="jobtitle">Graduate Student, School of Computing Science</p>
     408<p class="orgname">Simon Fraser University </p>
     409</div>
     410<div class="personblurb">
     411<p id="idp270560"> Ken Herdy completed an Advanced Diploma of Technology in Geographical Information
     412               Systems at the British Columbia Institute of Technology in 2003 and earned a Bachelor
     413               of Science in Computing Science with a Certificate in Spatial Information Systems at
     414               Simon Fraser University in 2005. </p>
     415<p id="idp271296"> Ken is currently pursuing PhD studies in Computing Science at Simon Fraser
     416               University with industrial scholarship support from the Natural Sciences and
     417               Engineering Research Council of Canada, the Mathematics of Information Technology and
     418               Complex Systems NCE, and the BC Innovation Council. His research focus is an analysis
     419               of the principal techniques that may be used to improve XML processing performance in
     420               the context of the Geography Markup Language (GML). </p>
     421</div>
     422</div>
     423</div>
     424<div class="mast-box">
     425<p class="title"><a href="javascript:toggle('idp274032')" class="linkbox"><img class="toc-icon" src="plus.png" alt="expand" id="icon-idp274032"></a> <span onclick="javascript:toggle('idp274032');return true">Rob Cameron</span></p>
     426<div class="folder" id="folder-idp274032" style="display:none">
     427<h5 class="author-email"><code class="email">&lt;<a class="email" href="mailto:cameron@cs.sfu.ca">cameron@cs.sfu.ca</a>&gt;</code></h5>
     428<div class="affiliation">
     429<p class="jobtitle">Professor of Computing Science</p>
     430<p class="orgname">Simon Fraser University</p>
     431</div>
     432<div class="affiliation">
     433<p class="jobtitle">Chief Technology Officer</p>
     434<p class="orgname">International Characters, Inc.</p>
     435</div>
     436<div class="personblurb"><p id="idp275696">Dr. Rob Cameron is Professor of Computing Science and Associate Dean of Applied
     437               Sciences at Simon Fraser University. His research interests include programming
     438               language and software system technology, with a specific focus on high performance
     439               text processing using SIMD and multicore parallelism. He is the developer of the REX
     440               XML shallow parser as well as the parallel bit stream (Parabix) framework for SIMD
     441               text processing. </p></div>
     442</div>
     443</div>
     444</div></div>
     445<div id="navbar"></div>
     446<div id="balisage-header" style="background-color: #6699CC">
     447<a class="quiet" href="http://www.balisage.net"><img style="float:right;border:none" alt="Balisage logo" height="130" src="http://balisage.net/Logo/BalisageSeries-logo.png"></a><h2 class="page-header">Balisage: The Markup Conference</h2>
     448<h1 class="page-header">Proceedings preview</h1>
     449</div>
     450<div id="main">
     451<div class="article">
     452<h2 class="article-title" id="idp74624"></h2>
     453<div class="section" id="idp284352">
    107454<h2 class="title" style="clear: both">Introduction</h2>
    108 <p id="idp240928"></p>
    109 <p id="idp241184"></p>
    110 <p id="idp241440"></p>
    111 <p id="idp241696"></p>
    112 </div>
    113 <div class="section" id="idp242080">
     455<p id="idp284992"></p>
     456<p id="idp285248"></p>
     457<p id="idp285504"></p>
     458<p id="idp285760"></p>
     459</div>
     460<div class="section" id="idp286144">
    114461<h2 class="title" style="clear: both">Background</h2>
    115 <div class="section" id="idp242720">
     462<div class="section" id="idp286784">
    116463<h3 class="title" style="clear: both">Xerces C++ Structure</h3>
    117 <p id="idp243360"> The Xerces C++ parser
     464<p id="idp287424"> The Xerces C++ parser
    118465           
    119466           
     
    126473            parsing using either pull parsing or SAX/SAX2 push-style parsing as well as a DOM
    127474            tree-based parsing interface. </p>
    128 <p id="idp245488">
     475<p id="idp289584">
    129476           
    130477           
     
    135482            state. This introduces implicit dependencies between the various tasks within the
    136483            application that make it difficult to optimize for performance. As a complex software
    137             system, no one feature dominates the overall parsing performance. Figure
    138             \ref{fig:xerces-profile} shows the execution time profile of the top ten functions in a
     484            system, no one feature dominates the overall parsing performance. Table I
     485            shows the execution time profile of the top ten functions in a
    139486            typical run. Even if it were possible, Amdahl's Law dictates that tackling any one of
    140487            these functions for parallelization in isolation would only produce a minute improvement
     
    145492            expected that a comprehensive restructuring is required, involving all aspects of the
    146493            parser. </p>
    147 <div class="table-wrapper" id="idp248416">
     494<div class="table-wrapper" id="idp292544">
    148495<p class="title">Table I</p>
    149 <div class="caption"><p id="idm37568">Execution Time of Top 10 Xerces Functions</p></div>
     496<div class="caption"><p id="idm847680">Execution Time of Top 10 Xerces Functions</p></div>
    150497<table class="table">
    151498<colgroup span="1">
     
    202549</div>
    203550</div>
    204 <div class="section" id="idp286496">
     551<div class="section" id="idp330512">
    205552<h3 class="title" style="clear: both">The Parabix Framework</h3>
    206 <p id="idp287168"> The Parabix (parallel bit stream) framework is a transformative approach to XML
     553<p id="idp331184"> The Parabix (parallel bit stream) framework is a transformative approach to XML
    207554            parsing (and other forms of text processing.) The key idea is to exploit the
    208555            availability of wide SIMD registers (e.g., 128-bit) in commodity processors to represent
     
    229576             multiple
    230577            classes can share the classification cost. </p>
    231 <p id="idp298736">
    232            
    233          </p>
    234 <p id="idp302672"> Consider, for example, the XML source data stream shown in the first line of
    235             . The remaining lines of this figure show
     578<div class="table-wrapper" id="idp342752">
     579<p class="title">Table II</p>
     580<div class="caption"><p id="idp343264">XML Source Data</p></div>
     581<table class="table">
     582<colgroup span="1">
     583<col align="right" valign="top" span="1">
     584<col align="centre" valign="top" span="1">
     585<col align="centre" valign="top" span="1">
     586<col align="centre" valign="top" span="1">
     587<col align="centre" valign="top" span="1">
     588</colgroup>
     589<tbody>
     590<tr>
     591<td>String </td>
     592<td> <code class="code">b</code> </td>
     593<td> <code class="code">7</code> </td>
     594<td> <code class="code">&lt;</code> </td>
     595<td> <code class="code">A</code> </td>
     596</tr>
     597<tr>
     598<td>ASCII </td>
     599<td> <code class="code">0110001<span class="bold">0</span></code> </td>
     600<td> <code class="code">0011011<span class="bold">1</span></code> </td>
     601<td> <code class="code">0011110<span class="bold">0</span></code> </td>
     602<td> <code class="code">0100000<span class="bold">1</span></code> </td>
     603</tr>
     604</tbody>
     605</table>
     606</div>
     607<div class="table-wrapper" id="idp358656">
     608<p class="title">Table III</p>
     609<div class="caption"><p id="idp359168">8-bit ASCII Basis Bit Streams</p></div>
     610<table class="table">
     611<colgroup span="1">
     612<col align="centre" valign="top" span="1">
     613<col align="centre" valign="top" span="1">
     614<col align="centre" valign="top" span="1">
     615<col align="centre" valign="top" span="1">
     616<col align="centre" valign="top" span="1">
     617<col align="centre" valign="top" span="1">
     618<col align="centre" valign="top" span="1">
     619<col align="centre" valign="top" span="1">
     620</colgroup>
     621<tbody>
     622<tr>
     623<td> b<sub>0</sub> </td>
     624<td> b<sub>1</sub> </td>
     625<td> b<sub>2</sub> </td>
     626<td> b<sub>3</sub>
     627</td>
     628<td> b<sub>4</sub> </td>
     629<td> b<sub>5</sub> </td>
     630<td> b<sub>6</sub> </td>
     631<td> b<sub>7</sub> </td>
     632</tr>
     633<tr>
     634<td> <code class="code">0</code> </td>
     635<td> <code class="code">1</code> </td>
     636<td> <code class="code">1</code> </td>
     637<td> <code class="code">0</code> </td>
     638<td> <code class="code">0</code> </td>
     639<td> <code class="code">0</code> </td>
     640<td> <code class="code">1</code> </td>
     641<td> <span class="bold"><code class="code">0</code></span> </td>
     642</tr>
     643<tr>
     644<td> <code class="code">0</code> </td>
     645<td> <code class="code">0</code> </td>
     646<td> <code class="code">1</code> </td>
     647<td> <code class="code">1</code> </td>
     648<td> <code class="code">0</code> </td>
     649<td> <code class="code">1</code> </td>
     650<td> <code class="code">1</code> </td>
     651<td> <span class="bold"><code class="code">1</code></span> </td>
     652</tr>
     653<tr>
     654<td> <code class="code">0</code> </td>
     655<td> <code class="code">0</code> </td>
     656<td> <code class="code">1</code> </td>
     657<td> <code class="code">1</code> </td>
     658<td> <code class="code">1</code> </td>
     659<td> <code class="code">1</code> </td>
     660<td> <code class="code">0</code> </td>
     661<td> <span class="bold"><code class="code">0</code></span> </td>
     662</tr>
     663<tr>
     664<td> <code class="code">0</code> </td>
     665<td> <code class="code">1</code> </td>
     666<td> <code class="code">0</code> </td>
     667<td> <code class="code">0</code> </td>
     668<td> <code class="code">0</code> </td>
     669<td> <code class="code">0</code> </td>
     670<td> <code class="code">0</code> </td>
     671<td> <span class="bold"><code class="code">1</code></span> </td>
     672</tr>
     673</tbody>
     674</table>
     675</div>
     676<p id="idp399328"> Consider, for example, the XML source data stream shown in the first line of Table II.
     677The remaining lines of this figure show
    236678            several parallel bit streams that are computed in Parabix-style parsing, with each bit
    237679            of each stream in one-to-one correspondence to the source character code units of the
     
    244686            (using the technique of bitstream addition \cite{cameron-EuroPar2011}), namely streams
    245687            marking the element names, attribute names and attribute values of tags. </p>
    246 <p id="idm8496"> Two intuitions may help explain how the Parabix approach can lead to improved XML
     688<p id="idp401184"> Two intuitions may help explain how the Parabix approach can lead to improved XML
    247689            parsing performance. The first is that the use of the full register width offers a
    248690            considerable information advantage over sequential byte-at-a-time parsing. That is,
     
    253695            individual decision-bits, an approach that computes many of them in parallel (e.g., 128
    254696            bytes at a time using 128-bit registers) should provide substantial benefit. </p>
    255 <p id="idm7248"> Previous studies have shown that the Parabix approach improves many aspects of XML
     697<p id="idp402432"> Previous studies have shown that the Parabix approach improves many aspects of XML
    256698            processing, including transcoding \cite{Cameron2008}, character classification and
    257699            validation, tag parsing and well-formedness checking. The first Parabix parser used
     
    262704            \cite{HPCA2012}. Although these research prototypes handled the full syntax of
    263705            schema-less XML documents, they lacked the functionality required by full XML parsers. </p>
    264 <p id="idm5984"> Commercial XML processors support transcoding of multiple character sets and can
     706<p id="idp403696"> Commercial XML processors support transcoding of multiple character sets and can
    265707            parse and validate against multiple document vocabularies. Additionally, they provide
    266708            API facilities beyond those found in research prototypes, including the widely used SAX,
    267709            SAX2 and DOM interfaces. </p>
    268710</div>
    269 <div class="section" id="idm5136">
     711<div class="section" id="idp404544">
    270712<h3 class="title" style="clear: both">Sequential vs. Parallel Paradigm</h3>
    271 <p id="idm4496"> Xerces—like all traditional XML parsers—processes XML documents
     713<p id="idp405184"> Xerces—like all traditional XML parsers—processes XML documents
    272714            sequentially. Each character is examined to distinguish between the XML-specific markup,
    273715            such as a left angle bracket <code class="code">&lt;</code>, and the content held within the
    274716            document. As the parser progresses through the document, it alternates between markup
    275717            scanning, validation and content processing modes. </p>
    276 <p id="idm2928"> In other words, Xerces belongs to an equivalent class applications termed FSM
     718<p id="idp406720"> In other words, Xerces belongs to an equivalent class applications termed FSM
    277719            applications\footnote{ Herein FSM applications are considered software systems whose
    278720            behaviour is defined by the inputs, current state and the events associated with
     
    280722            subsequent characters. Unfortunately, textual data tends to be unpredictable and any
    281723            character could induce a state transition. </p>
    282 <p id="idm2016"> Parabix-style XML parsers utilize a concept of layered processing. A block of source
     724<p id="idp407632"> Parabix-style XML parsers utilize a concept of layered processing. A block of source
    283725            text is transformed into a set of lexical bitstreams, which undergo a series of
    284726            operations that can be grouped into logical layers, e.g., transposition, character
     
    289731</div>
    290732</div>
    291 <div class="section" id="idm1376">
     733<div class="section" id="idp408928">
    292734<h2 class="title" style="clear: both">Architecture</h2>
    293 <div class="section" id="idp322064">
     735<div class="section" id="idp409568">
    294736<h3 class="title" style="clear: both">Overview</h3>
    295 <p id="idp322960"> icXML is more than an optimized version of Xerces. Many components were grouped,
     737<p id="idp410464"> icXML is more than an optimized version of Xerces. Many components were grouped,
    296738            restructured and rearchitected with pipeline parallelism in mind. In this section, we
    297739            highlight the core differences between the two systems. As shown in Figure
     
    319761<p class="title">Figure 1: Xerces Architecture</p>
    320762<div class="figure-contents">
    321 <div class="mediaobject" id="idp329696"><img alt="png image (xerces.png)" src="xerces.png" width="150cm"></div>
     763<div class="mediaobject" id="idp417104"><img alt="png image (xerces.png)" src="xerces.png" width="150cm"></div>
    322764<div class="caption"></div>
    323765</div>
    324766</div>
    325 <p id="idp331952"> In icXML functions are grouped into logical components. As shown in Figure
     767<p id="idp419360"> In icXML functions are grouped into logical components. As shown in Figure
    326768            \ref{fig:icxml-arch}, two major categories exist: (1) the Parabix Subsystem and (2) the
    327769            Markup Processor. All tasks in (1) use the Parabix Framework \cite{HPCA2012}, which
     
    342784            described in Section \ref{section:arch:errorhandling}. From here, two data-independent
    343785            branches exist: the Symbol Resolver and Content Preparation Unit. </p>
    344 <p id="idp335968"> A typical XML file contains few unique element and attribute names—but
     786<p id="idp423520"> A typical XML file contains few unique element and attribute names—but
    345787            each of them will occur frequently. icXML stores these as distinct data structures,
    346788            called symbols, each with their own global identifier (GID). Using the symbol marker
     
    348790               Resolver</span> scans through the raw data to produce a sequence of GIDs, called
    349791            the <span class="ital">symbol stream</span>. </p>
    350 <p id="idp338448"> The final components of the Parabix Subsystem are the <span class="ital">Content
     792<p id="idp426112"> The final components of the Parabix Subsystem are the <span class="ital">Content
    351793               Preparation Unit</span> and <span class="ital">Content Stream
    352794            Generator</span>. The former takes the (transposed) basis bitstreams and selectively
    353795            filters them, according to the information provided by the Parallel Markup Parser, and
    354796            the latter transforms the filtered streams into the tagged UTF-16 <span class="ital">content stream</span>, discussed in Section \ref{section:arch:contentstream}. </p>
    355 <p id="idp341360"> Combined, the symbol and content stream form icXML's compressed IR of the XML
     797<p id="idp429024"> Combined, the symbol and content stream form icXML's compressed IR of the XML
    356798            document. The <span class="ital">Markup Processor</span>~parses the IR to
    357799            validate and produce the sequential output for the end user. The <span class="ital">Final WF checker</span> performs inter-element well-formedness validation that
     
    365807<p class="title">Figure 2: icXML Architecture</p>
    366808<div class="figure-contents">
    367 <div class="mediaobject" id="idp347184"><img alt="png image (icxml.png)" src="icxml.png" width="500cm"></div>
     809<div class="mediaobject" id="idp434848"><img alt="png image (icxml.png)" src="icxml.png" width="500cm"></div>
    368810<div class="caption"></div>
    369811</div>
    370812</div>
    371813</div>
    372 <div class="section" id="idp349632">
     814<div class="section" id="idp437296">
    373815<h3 class="title" style="clear: both">Character Set Adapters</h3>
    374 <p id="idp350304"> In Xerces, all input is transcoded into UTF-16 to simplify the parsing costs of
     816<p id="idp437968"> In Xerces, all input is transcoded into UTF-16 to simplify the parsing costs of
    375817            Xerces itself and provide the end-consumer with a single encoding format. In the
    376818            important case of UTF-8 to UTF-16 transcoding, the transcoding costs can be significant,
     
    379821            other cases, transcoding may involve table look-up operations for each byte of input. In
    380822            any case, transcoding imposes at least a cost of buffer copying. </p>
    381 <p id="idp352016"> In icXML, however, the concept of Character Set Adapters (CSAs) is used to minimize
     823<p id="idp439024"> In icXML, however, the concept of Character Set Adapters (CSAs) is used to minimize
    382824            transcoding costs. Given a specified input encoding, a CSA is responsible for checking
    383825            that input code units represent valid characters, mapping the characters of the encoding
     
    385827            item streams), as well as supporting ultimate transcoding requirements. All of this work
    386828            is performed using the parallel bitstream representation of the source input. </p>
    387 <p id="idp352992"> An important observation is that many character sets are an extension to the legacy
     829<p id="idp440000"> An important observation is that many character sets are an extension to the legacy
    388830            7-bit ASCII character set. This includes the various ISO Latin character sets, UTF-8,
    389831            UTF-16 and many others. Furthermore, all significant characters for parsing XML are
    390832            confined to the ASCII repertoire. Thus, a single common set of lexical item calculations
    391833            serves to compute lexical item streams for all such ASCII-based character sets. </p>
    392 <p id="idp353872"> A second observation is that—regardless of which character set is
     834<p id="idp440880"> A second observation is that—regardless of which character set is
    393835            used—quite often all of the characters in a particular block of input will be
    394836            within the ASCII range. This is a very simple test to perform using the bitstream
     
    397839            be skipped. Transcoding to UTF-16 becomes trivial as the high eight bitstreams of the
    398840            UTF-16 form are each set to zero in this case. </p>
    399 <p id="idp355792"> A third observation is that repeated transcoding of the names of XML elements,
     841<p id="idp442800"> A third observation is that repeated transcoding of the names of XML elements,
    400842            attributes and so on can be avoided by using a look-up mechanism. That is, the first
    401843            occurrence of each symbol is stored in a look-up table mapping the input encoding to a
     
    404846            symbol look up is required to apply various XML validation rules, there is achieves the
    405847            effect of transcoding each occurrence without additional cost. </p>
    406 <p id="idp356848"> The cost of individual character transcoding is avoided whenever a block of input is
     848<p id="idp443856"> The cost of individual character transcoding is avoided whenever a block of input is
    407849            confined to the ASCII subset and for all but the first occurrence of any XML element or
    408850            attribute name. Furthermore, when transcoding is required, the parallel bitstream
     
    415857            using bit scan operations. </p>
    416858</div>
    417 <div class="section" id="idp358272">
     859<div class="section" id="idp446192">
    418860<h3 class="title" style="clear: both">Combined Parallel Filtering</h3>
    419 <p id="idp358960"> As just mentioned, UTF-8 to UTF-16 transcoding involves marking all but the last
     861<p id="idp446880"> As just mentioned, UTF-8 to UTF-16 transcoding involves marking all but the last
    420862            bytes of multi-byte UTF-8 sequences as positions for deletion. For example, the two
    421863            Chinese characters <code class="code">䜠奜</code> are represented as two
     
    431873            may then be completed by applying parallel deletion and inverse transposition of the
    432874            UTF-16 bitstreams\cite{Cameron2008}. </p>
    433 <p id="idp363120">
    434            
    435            
    436            
    437            
    438            
    439            
    440            
    441            
    442            
    443          </p>
    444 <p id="idp367056"> Rather than immediately paying the costs of deletion and transposition just for
     875<div class="table-wrapper" id="idp451040">
     876<p class="title">Table IV</p>
     877<div class="caption"><p id="idp451552">XML Source Data and Derived Parallel Bit Streams</p></div>
     878<table class="table">
     879<colgroup span="1">
     880<col align="centre" valign="top" span="1">
     881<col align="left" valign="top" span="1">
     882</colgroup>
     883<tbody>
     884<tr>
     885<td> Source Data </td>
     886<td> <code class="code"> &lt;document&gt;fee&lt;element a1='fie' a2 = 'foe'&gt;&lt;/element&gt;fum&lt;/document&gt; </code>
     887</td>
     888</tr>
     889<tr>
     890<td> Tag Openers </td>
     891<td> <code class="code">1____________1____________________________1____________1__________</code>
     892</td>
     893</tr>
     894<tr>
     895<td> Start Tag Marks </td>
     896<td> <code class="code">_1____________1___________________________________________________</code>
     897</td>
     898</tr>
     899<tr>
     900<td> End Tag Marks </td>
     901<td> <code class="code">___________________________________________1____________1_________</code>
     902</td>
     903</tr>
     904<tr>
     905<td> Empty Tag Marks </td>
     906<td> <code class="code">__________________________________________________________________</code>
     907</td>
     908</tr>
     909<tr>
     910<td> Element Names </td>
     911<td> <code class="code">_11111111_____1111111_____________________________________________</code>
     912</td>
     913</tr>
     914<tr>
     915<td> Attribute Names </td>
     916<td> <code class="code">______________________11_______11_________________________________</code>
     917</td>
     918</tr>
     919<tr>
     920<td> Attribute Values </td>
     921<td> <code class="code">__________________________111________111__________________________</code>
     922</td>
     923</tr>
     924</tbody>
     925</table>
     926</div>
     927<p id="idp464640"> Rather than immediately paying the costs of deletion and transposition just for
    445928            transcoding, however, icXML defers these steps so that the deletion masks for several
    446929            stages of processing may be combined. In particular, this includes core XML requirements
     
    455938           
    456939         </p>
    457 <p id="idp369696"> In essence, the deletion masks for transcoding and for line break normalization each
     940<p id="idp467152"> In essence, the deletion masks for transcoding and for line break normalization each
    458941            represent a bitwise filter; these filters can be combined using bitwise-or so that the
    459942            parallel deletion algorithm need only be applied once. </p>
    460 <p id="idp370816"> A further application of combined filtering is the processing of XML character and
     943<p id="idp468464"> A further application of combined filtering is the processing of XML character and
    461944            entity references. Consider, for example, the references <code class="code">&amp;</code> or
    462945               <code class="code">&lt;</code>. which must be replaced in XML processing with the single
     
    471954            UTF-16 code unit. In the case, that this is not true, it is addressed in
    472955            post-processing. </p>
    473 <p id="idp375600"> The final step of combined filtering occurs during the process of reducing markup
     956<p id="idp473280"> The final step of combined filtering occurs during the process of reducing markup
    474957            data to tag bytes preceding each significant XML transition as described in
    475958            section~\ref{section:arch:contentstream}. Overall, icXML avoids separate buffer copying
     
    481964            Haswell architecture. </p>
    482965</div>
    483 <div class="section" id="idp376928">
     966<div class="section" id="idp474608">
    484967<h3 class="title" style="clear: both">Content Stream</h3>
    485 <p id="idp377600"> A relatively-unique concept for icXML is the use of a filtered content stream.
     968<p id="idp475280"> A relatively-unique concept for icXML is the use of a filtered content stream.
    486969            Rather that parsing an XML document in its original format, the input is transformed
    487970            into one that is easier for the parser to iterate through and produce the sequential
     
    491974           
    492975            through the parallel filtering algorithm, described in section \ref{sec:parfilter}. </p>
    493 <p id="idp380000"> Combined with the symbol stream, the parser traverses the content stream to
     976<p id="idp477680"> Combined with the symbol stream, the parser traverses the content stream to
    494977            effectively reconstructs the input document in its output form. The initial <span class="ital">0</span> indicates an empty content string. The following
    495978               <code class="code">&gt;</code> indicates that a start tag without any attributes is the first
     
    503986            null character in the content stream in parallel, which in turn means the parser can
    504987            directly jump to the end of every string without scanning for it. </p>
    505 <p id="idp383392"> Following <code class="code">'fee'</code> is a <code class="code">=</code>, which marks the
     988<p id="idp481120"> Following <code class="code">'fee'</code> is a <code class="code">=</code>, which marks the
    506989            existence of an attribute. Because all of the intra-element was performed in the Parabix
    507990            Subsystem, this must be a legal attribute. Since attributes can only occur within start
     
    5171000            that the appropriate scope-nesting rules have been applied. </p>
    5181001</div>
    519 <div class="section" id="idp387584">
     1002<div class="section" id="idp485264">
    5201003<h3 class="title" style="clear: both">Namespace Handling</h3>
    521 <p id="idp388672"> In XML, namespaces prevents naming conflicts when multiple vocabularies are used
     1004<p id="idp486352"> In XML, namespaces prevents naming conflicts when multiple vocabularies are used
    5221005            together. It is especially important when a vocabulary application-dependant meaning,
    5231006            such as when XML or SVG documents are embedded within XHTML files. Namespaces are bound
    5241007            to uniform resource identifiers (URIs), which are strings used to identify specific
    525             names or resources. On line 1 of Figure \ref{fig:namespace1}, the <code class="code">xmlns</code>
     1008            names or resources. On line 1 in the Table below, the <code class="code">xmlns</code>
    5261009            attribute instructs the XML processor to bind the prefix <code class="code">p</code> to the URI
    5271010               '<code class="code">pub.net</code>' and the default (empty) prefix to
     
    5361019            uniquely-named items because the current vocabulary is determined by the namespace(s)
    5371020            that are in-scope. </p>
    538 <p id="idp395792">
    539            
    540          </p>
    541 <p id="idp396336"> In both Xerces and icXML, every URI has a one-to-one mapping to a URI ID. These
     1021<div class="table-wrapper" id="idp493520">
     1022<p class="title">Table V</p>
     1023<div class="caption"><p id="idp494032">XML Namespace Example</p></div>
     1024<table class="table">
     1025<colgroup span="1">
     1026<col align="centre" valign="top" span="1">
     1027<col align="left" valign="top" span="1">
     1028</colgroup>
     1029<tbody>
     1030<tr>
     1031<td>1. </td>
     1032<td>&lt;book xmlns:p="pub.net" xmlns="book.org"&gt; </td>
     1033</tr>
     1034<tr>
     1035<td>2. </td>
     1036<td>  &lt;title&gt;BOOK NAME&lt;/title&gt; </td>
     1037</tr>
     1038<tr>
     1039<td>3. </td>
     1040<td>  &lt;p:name&gt;PUBLISHER NAME&lt;/p:name&gt; </td>
     1041</tr>
     1042<tr>
     1043<td>4. </td>
     1044<td>  &lt;price&gt;X&lt;/price&gt; </td>
     1045</tr>
     1046<tr>
     1047<td>5. </td>
     1048<td>  &lt;price xmlns="publisher.net"&gt;Y&lt;/price&gt; </td>
     1049</tr>
     1050<tr>
     1051<td>6. </td>
     1052<td>&lt;/book&gt; </td>
     1053</tr>
     1054</tbody>
     1055</table>
     1056</div>
     1057<p id="idp503056"> In both Xerces and icXML, every URI has a one-to-one mapping to a URI ID. These
    5421058            persist for the lifetime of the application through the use of a global URI pool. Xerces
    5431059            maintains a stack of namespace scopes that is pushed (popped) every time a start tag
     
    5471063            those that declare a set of namespaces upfront and never change them, and (2) those that
    5481064            repeatedly modify the namespaces in predictable patterns. </p>
    549 <p id="idp398048"> For that reason, icXML contains an independent namespace stack and utilizes bit
     1065<p id="idp504192"> For that reason, icXML contains an independent namespace stack and utilizes bit
    5501066            vectors to cheaply perform
    5511067             When a prefix is
     
    5611077            found using a bit-scan intrinsic. A namespace binding table, similar to Table
    5621078            \ref{tbl:namespace1}, provides the actual URI ID. </p>
    563 <p id="idp402432">
    564            
    565          </p>
    566 <p id="idp402976">
     1079<div class="table-wrapper" id="idp508608">
     1080<p class="title">Table VI</p>
     1081<div class="caption"><p id="idp509120">Namespace Binding Table Example</p></div>
     1082<table class="table">
     1083<colgroup span="1">
     1084<col align="centre" valign="top" span="1">
     1085<col align="centre" valign="top" span="1">
     1086<col align="centre" valign="top" span="1">
     1087<col align="centre" valign="top" span="1">
     1088<col align="centre" valign="top" span="1">
     1089</colgroup>
     1090<thead><tr>
     1091<th>NSID </th>
     1092<th> Prefix </th>
     1093<th> URI </th>
     1094<th> Prefix ID </th>
     1095<th> URI ID </th>
     1096</tr></thead>
     1097<tbody>
     1098<tr>
     1099<td>0 </td>
     1100<td> <code class="code"> p</code> </td>
     1101<td> <code class="code"> pub.net</code> </td>
     1102<td> 0 </td>
     1103<td> 0 </td>
     1104</tr>
     1105<tr>
     1106<td>1 </td>
     1107<td> <code class="code"> xmlns</code> </td>
     1108<td> <code class="code"> books.org</code> </td>
     1109<td> 1 </td>
     1110<td> 1 </td>
     1111</tr>
     1112<tr>
     1113<td>2 </td>
     1114<td> <code class="code"> xmlns</code> </td>
     1115<td> <code class="code"> pub.net</code> </td>
     1116<td> 1 </td>
     1117<td> 0 </td>
     1118</tr>
     1119</tbody>
     1120</table>
     1121</div>
     1122<p id="idp525504">
    5671123           
    5681124           
     
    5701126           
    5711127         </p>
    572 <p id="idp405424"> To ensure that scoping rules are adhered to, whenever a start tag is encountered,
     1128<p id="idp527408"> To ensure that scoping rules are adhered to, whenever a start tag is encountered,
    5731129            any modification to the currently visible namespaces is calculated and stored within a
    5741130            stack of bit vectors denoting the locally modified namespace bindings. When an end tag
     
    5791135         </p>
    5801136</div>
    581 <div class="section" id="idp406880">
     1137<div class="section" id="idp528864">
    5821138<h3 class="title" style="clear: both">Error Handling</h3>
    583 <p id="idp407552">
     1139<p id="idp529536">
    5841140           
    5851141            Xerces outputs error messages in two ways: through the programmer API and as thrown
     
    5901146            \ref{fig:icxml-arch}, icXML is divided into two sections: the Parabix Subsystem and
    5911147            Markup Processor, each with its own system for detecting and producing error messages. </p>
    592 <p id="idp409184"> Within the Parabix Subsystem, all computations are performed in parallel, a block at
     1148<p id="idp531168"> Within the Parabix Subsystem, all computations are performed in parallel, a block at
    5931149            a time. Errors are derived as artifacts of bitstream calculations, with a 1-bit marking
    5941150            the byte-position of an error within a block, and the type of error is determined by the
     
    6231179            detected, the sum of those skipped positions is subtracted from the distance to
    6241180            determine the actual column number. </p>
    625 <p id="idp414672"> The Markup Processor is a state-driven machine. As such, error detection within it
     1181<p id="idp536656"> The Markup Processor is a state-driven machine. As such, error detection within it
    6261182            is very similar to Xerces. However, reporting the correct line/column is a much more
    6271183            difficult problem. The Markup Processor parses the content stream, which is a series of
     
    6371193</div>
    6381194</div>
    639 <div class="section" id="idp417920">
     1195<div class="section" id="idp539088">
    6401196<h2 class="title" style="clear: both">Multithreading with Pipeline Parallelism</h2>
    641 <p id="idp418560"> As discussed in section \ref{background:xerces}, Xerces can be considered a FSM
     1197<p id="idp539792"> As discussed in section \ref{background:xerces}, Xerces can be considered a FSM
    6421198         application. These are "embarrassingly
    6431199         sequential."\cite{Asanovic:EECS-2006-183} and notoriously difficult to
     
    6471203         well into the general model of pipeline parallelism, in which each thread is in charge of a
    6481204         single module or group of modules. </p>
    649 <p id="idp420416"> The most straightforward division of work in icXML is to separate the Parabix Subsystem
     1205<p id="idp541648"> The most straightforward division of work in icXML is to separate the Parabix Subsystem
    6501206         and the Markup Processor into distinct logical layers into two separate stages. The
    6511207         resultant application, <span class="ital">icXML-p</span>, is a course-grained
     
    6681224            <code class="code">T<sub>2</sub></code> to finish reading the shared data before it can
    6691225         reuse the memory space. </p>
    670 <p id="idp429488">
     1226<p id="idp550720">
    6711227        <div class="figure" id="threads_timeline1">
    6721228<p class="title">Figure 3: Thread Balance in Two-Stage Pipelines</p>
    6731229<div class="figure-contents">
    674 <div class="mediaobject" id="idp430832"><img alt="png image (threads_timeline1.png)" src="threads_timeline1.png" width="500cm"></div>
     1230<div class="mediaobject" id="idp552112"><img alt="png image (threads_timeline1.png)" src="threads_timeline1.png" width="500cm"></div>
    6751231<div class="caption"></div>
    6761232</div>
     
    6791235<p class="title">Figure 4: Thread Balance in Two-Stage Pipelines</p>
    6801236<div class="figure-contents">
    681 <div class="mediaobject" id="idp434208"><img alt="png image (threads_timeline2.png)" src="threads_timeline2.png" width="500cm"></div>
     1237<div class="mediaobject" id="idp555488"><img alt="png image (threads_timeline2.png)" src="threads_timeline2.png" width="500cm"></div>
    6821238<div class="caption"></div>
    6831239</div>
    6841240</div>
    6851241      </p>
    686 <p id="idp436624"> Overall, our design is intended to benefit a range of applications. Conceptually, we
     1242<p id="idp557904"> Overall, our design is intended to benefit a range of applications. Conceptually, we
    6871243         consider two design points. The first, the parsing performed by the Parabix Subsystem
    6881244         dominates at 67% of the overall cost, with the cost of application processing (including
     
    6901246         scenario, the cost of application processing dominates at 60%, while the cost of XML
    6911247         parsing represents an overhead of 40%. </p>
    692 <p id="idp437536"> Our design is predicated on a goal of using the Parabix framework to achieve a 50% to
     1248<p id="idp558816"> Our design is predicated on a goal of using the Parabix framework to achieve a 50% to
    6931249         100% improvement in the parsing engine itself. In a best case scenario, a 100% improvement
    6941250         of the Parabix Subsystem for the design point in which XML parsing dominates at 67% of the
     
    6981254         about 33% of the original work. In this case, Amdahl's law predicts that we could expect up
    6991255         to a 3x speedup at best. </p>
    700 <p id="idp438656"> At the other extreme of our design range, we consider an application in which core
     1256<p id="idp559936"> At the other extreme of our design range, we consider an application in which core
    7011257         parsing cost is 40%. Assuming the 2x speedup of the Parabix Subsystem over the
    7021258         corresponding Xerces core, single-threaded icXML delivers a 25% speedup. However, the most
     
    7041260         the entire latency of parsing within the serial time required by the application. In this
    7051261         case, we achieve an overall speedup in processing time by 1.67x. </p>
    706 <p id="idp439600"> Although the structure of the Parabix Subsystem allows division of the work into
     1262<p id="idp560880"> Although the structure of the Parabix Subsystem allows division of the work into
    7071263         several pipeline stages and has been demonstrated to be effective for four pipeline stages
    7081264         in a research prototype \cite{HPCA2012}, our analysis here suggests that the further
     
    7121268         the cost of application logic that could match reductions in core parsing cost. </p>
    7131269</div>
    714 <div class="section" id="idp440784">
     1270<div class="section" id="idp562720">
    7151271<h2 class="title" style="clear: both">Performance</h2>
    716 <p id="idp441456"> We evaluate Xerces-C++ 3.1.1, icXML, icXML-p against two benchmarking applications: the
     1272<p id="idp563392"> We evaluate Xerces-C++ 3.1.1, icXML, icXML-p against two benchmarking applications: the
    7171273         Xerces C++ SAXCount sample application, and a real world GML to SVG transformation
    7181274         application. We investigated XML parser performance using an Intel Core i7 quad-core (Sandy
     
    7201276         L1 cache, 256 kB (per core) L2 cache, 8 MB L3 cache) running the 64-bit version of Ubuntu
    7211277         12.04 (Linux). </p>
    722 <p id="idp442368"> We analyzed the execution profiles of each XML parser using the performance counters
     1278<p id="idp564304"> We analyzed the execution profiles of each XML parser using the performance counters
    7231279         found in the processor. We chose several key hardware events that provide insight into the
    7241280         profile of each application and indicate if the processor is doing useful work. The set of
     
    7281284         collection of hardware performance monitoring statistics. In addition, we used the Linux
    7291285         perf \cite{perf} utility to collect per core hardware events. </p>
    730 <div class="section" id="idp443504">
     1286<div class="section" id="idp565440">
    7311287<h3 class="title" style="clear: both">Xerces C++ SAXCount</h3>
    732 <p id="idp444176"> Xerces comes with sample applications that demonstrate salient features of the
     1288<p id="idp566112"> Xerces comes with sample applications that demonstrate salient features of the
    7331289            parser. SAXCount is the simplest such application: it counts the elements, attributes
    7341290            and characters of a given XML file using the (event based) SAX API and prints out the
    7351291            totals. </p>
    736 <p id="idp444880"> Table \ref{XMLDocChars} shows the document characteristics of the XML input files
     1292<p id="idp566864"> Table \ref{XMLDocChars} shows the document characteristics of the XML input files
    7371293            selected for the Xerces C++ SAXCount benchmark. The jaw.xml represents document-oriented
    7381294            XML inputs and contains the three-byte and four-byte UTF-8 sequence required for the
    7391295            UTF-8 encoding of Japanese characters. The remaining data files are data-oriented XML
    7401296            documents and consist entirely of single byte encoded ASCII characters.
    741   <div class="table-wrapper" id="idp445616">
    742 <p class="title">Table II</p>
    743 <div class="caption"><p id="idp446128">XML Document Characteristics</p></div>
     1297  <div class="table-wrapper" id="idp567600">
     1298<p class="title">Table VII</p>
     1299<div class="caption"><p id="idp568112">XML Document Characteristics</p></div>
    7441300<table class="table">
    7451301<colgroup span="1">
    7461302<col align="left" valign="top" span="1">
    747 <col align="left" valign="top" span="1">
    748 <col align="left" valign="top" span="1">
    749 <col align="left" valign="top" span="1">
    750 <col align="left" valign="top" span="1">
     1303<col align="centre" valign="top" span="1">
     1304<col align="centre" valign="top" span="1">
     1305<col align="centre" valign="top" span="1">
     1306<col align="centre" valign="top" span="1">
    7511307</colgroup>
    7521308<tbody>
     
    7901346</div>           
    7911347</p>
    792 <p id="idp461808"> A key predictor of the overall parsing performance of an XML file is markup
     1348<p id="idp583696"> A key predictor of the overall parsing performance of an XML file is markup
    7931349            density\footnote{ Markup Density: the ratio of markup bytes used to define the structure
    7941350            of the document vs. its file size.}. This metric has substantial influence on the
     
    7971353            of document-oriented and data-oriented XML files to analyze performance over a spectrum
    7981354            of markup densities. </p>
    799 <p id="idp462816"> Figure \ref{perf_SAX} compares the performance of Xerces, icXML and pipelined icXML
     1355<p id="idp585312"> Figure \ref{perf_SAX} compares the performance of Xerces, icXML and pipelined icXML
    8001356            in terms of CPU cycles per byte for the SAXCount application. The speedup for icXML over
    8011357            Xerces is 1.3x to 1.8x. With two threads on the multicore machine, icXML-p can achieve
     
    8041360            icXML-p performs better as markup-density increases because the work performed by each
    8051361            stage is well balanced in this application. </p>
    806 <p id="idp463856">
     1362<p id="idp586352">
    8071363        <div class="figure" id="perf_SAX">
    8081364<p class="title">Figure 5: SAXCount Performance Comparison</p>
    8091365<div class="figure-contents">
    810 <div class="mediaobject" id="idp465264"><img alt="png image (perf_SAX.png)" src="perf_SAX.png" width="500cm"></div>
     1366<div class="mediaobject" id="idp587744"><img alt="png image (perf_SAX.png)" src="perf_SAX.png" width="500cm"></div>
    8111367<div class="caption"></div>
    8121368</div>
     
    8141370         </p>
    8151371</div>
    816 <div class="section" id="idp467808">
     1372<div class="section" id="idp590288">
    8171373<h3 class="title" style="clear: both">GML2SVG</h3>
    818 <p id="idp468480">       As a more substantial application of XML processing, the GML-to-SVG (GML2SVG) application
     1374<p id="idp590960">       As a more substantial application of XML processing, the GML-to-SVG (GML2SVG) application
    8191375was chosen.   This application transforms geospatially encoded data represented using
    8201376an XML representation in the form of Geography Markup Language (GML) \cite{lake2004geography}
     
    8281384a known XML format for the purpose of analysis and restructuring to meet
    8291385the requirements of an alternative format.</p>
    830 <p id="idp470720">Our GML to SVG data translations are executed on GML source data
     1386<p id="idp592288">Our GML to SVG data translations are executed on GML source data
    8311387modelling the city of Vancouver, British Columbia, Canada.
    8321388The GML source document set
     
    8381394<p class="title">Figure 6: Performance Comparison for GML2SVG</p>
    8391395<div class="figure-contents">
    840 <div class="mediaobject" id="idp472704"><img alt="png image (Throughput.png)" src="Throughput.png" width="500cm"></div>
     1396<div class="mediaobject" id="idp594320"><img alt="png image (Throughput.png)" src="Throughput.png" width="500cm"></div>
    8411397<div class="caption"></div>
    8421398</div>
    8431399</div>
    844 <p id="idp474992">Figure \ref{perf_GML2SVG} compares the performance of the GML2SVG application linked against
     1400<p id="idp596608">Figure \ref{perf_GML2SVG} compares the performance of the GML2SVG application linked against
    8451401the Xerces, \icXML{} and \icXMLp{}.   
    8461402On the GML workload with this application, single-thread \icXML{}
     
    8491405Using \icXMLp{}, a further throughput increase to 111 MB/sec was recorded,
    8501406approximately a 2X speedup.</p>
    851 <p id="idp475824">An important aspect of \icXML{} is the replacement of much branch-laden
     1407<p id="idp597440">An important aspect of \icXML{} is the replacement of much branch-laden
    8521408sequential code inside Xerces with straight-line SIMD code using far
    8531409fewer branches.  Figure \ref{branchmiss_GML2SVG} shows the corresponding
     
    8601416<p class="title">Figure 7: Comparative Branch Misprediction Rate</p>
    8611417<div class="figure-contents">
    862 <div class="mediaobject" id="idp477936"><img alt="png image (BM.png)" src="BM.png" width="500cm"></div>
     1418<div class="mediaobject" id="idp600144"><img alt="png image (BM.png)" src="BM.png" width="500cm"></div>
    8631419<div class="caption"></div>
    8641420</div>
    8651421</div>
    866 <p id="idp480224">The behaviour of the three versions with respect to L1 cache misses per kB is shown
     1422<p id="idp602432">The behaviour of the three versions with respect to L1 cache misses per kB is shown
    8671423in Figure \ref{cachemiss_GML2SVG}.   Improvements are shown in both instruction-
    8681424and data-cache performance with the improvements in instruction-cache
     
    8761432<p class="title">Figure 8: Comparative Cache Miss Rate</p>
    8771433<div class="figure-contents">
    878 <div class="mediaobject" id="idp482336"><img alt="png image (CM.png)" src="CM.png" width="500cm"></div>
     1434<div class="mediaobject" id="idp604544"><img alt="png image (CM.png)" src="CM.png" width="500cm"></div>
    8791435<div class="caption"></div>
    8801436</div>
    8811437</div>
    882 <p id="idp484624">One caveat with this study is that the GML2SVG application did not exhibit
     1438<p id="idp606832">One caveat with this study is that the GML2SVG application did not exhibit
    8831439a relative balance of processing between application code and Xerces library
    8841440code reaching the 33\% figure.  This suggests that for this application and
     
    8881444</div>
    8891445</div>
    890 <div class="section" id="idp486112">
     1446<div class="section" id="idp607904">
    8911447<h2 class="title" style="clear: both">Conclusion and Future Work</h2>
    892 <p id="idp486800"> This paper is the first case study documenting the significant performance benefits
     1448<p id="idp608592"> This paper is the first case study documenting the significant performance benefits
    8931449         that may be realized through the integration of parallel bitstream technology into existing
    8941450         widely-used software libraries. In the case of the Xerces-C++ XML parser, the combined
     
    9001456         technologies, this is an important case study demonstrating the general feasibility of
    9011457         these techniques. </p>
    902 <p id="idp488080"> The further development of icXML to move beyond 2-stage pipeline parallelism is
     1458<p id="idp609872"> The further development of icXML to move beyond 2-stage pipeline parallelism is
    9031459         ongoing, with realistic prospects for four reasonably balanced stages within the library.
    9041460         For applications such as GML2SVG which are dominated by time spent on XML parsing, such a
    9051461         multistage pipelined parsing library should offer substantial benefits. </p>
    906 <p id="idp488848"> The example of XML parsing may be considered prototypical of finite-state machines
     1462<p id="idp610640"> The example of XML parsing may be considered prototypical of finite-state machines
    9071463         applications which have sometimes been considered "embarassingly
    9081464         sequential" and so difficult to parallelize that "nothing
     
    9101466         point in making the case that parallelization can indeed be helpful across a broad array of
    9111467         application types. </p>
    912 <p id="idp490224"> To overcome the software engineering challenges in applying parallel bitstream
     1468<p id="idp612016"> To overcome the software engineering challenges in applying parallel bitstream
    9131469         technology to existing software systems, it is clear that better library and tool support
    9141470         is needed. The techniques used in the implementation of icXML and documented in this paper
     
    9171473      </p>
    9181474</div>
    919 <div class="bibliography" id="idp491712">
     1475<div class="bibliography" id="idp613504">
    9201476<h2 class="title" style="clear:both">Bibliography</h2>
    9211477<p class="bibliomixed" id="XMLChip09">[Leventhal and Lemoine 2009] Leventhal, Michael and
     
    9761532</div>
    9771533</div>
     1534<div id="balisage-footer"><h3 style="font-family: serif; margin:0.25em; font-style: italic">Balisage Series on Markup Technologies</h3></div>
     1535</div>
     1536</body>
     1537</html>
    9781538<div id="balisage-footer"><h3 style="font-family: serif; margin:0.25em">
    9791539<i>Balisage:</i> <small>The Markup Conference</small>
  • docs/Balisage13/Bal2013came0601/Bal2013came0601.xml

    r3050 r3051  
    169169            state. This introduces implicit dependencies between the various tasks within the
    170170            application that make it difficult to optimize for performance. As a complex software
    171             system, no one feature dominates the overall parsing performance. Figure
    172             \ref{fig:xerces-profile} shows the execution time profile of the top ten functions in a
     171            system, no one feature dominates the overall parsing performance. Table I
     172            shows the execution time profile of the top ten functions in a
    173173            typical run. Even if it were possible, Amdahl's Law dictates that tackling any one of
    174174            these functions for parallelization in isolation would only produce a minute improvement
     
    231231            <!-- the classification cost could be amortized over many character classes.--> multiple
    232232            classes can share the classification cost. </para>
    233          <para>
    234             <!-- FIGURE
    235 \begin{figure}[h]
    236 \begin{center}
    237 \begin{tabular}{r c c c c }
    238 String & \ttfamily{b} & \ttfamily{7} & \ttfamily{\verb`<`} & \ttfamily{A} \\
    239 ASCII & \ttfamily{\footnotesize 0110001{\bfseries 0}} & \ttfamily{\footnotesize 0011011{\bfseries 1}} & \ttfamily{\footnotesize 0011110{\bfseries 0}} & \ttfamily{\footnotesize 0100000{\bfseries 1}} \\
    240 \hline
    241 \end{tabular}
    242 \end{center}
    243 \begin{center}
    244 \begin{tabular}{r |c |c |c |c |c |c |c |c |}
    245  & $\mbox{\fontsize{11}{11}\selectfont $\tt b<subscript>{0}</subscript>$}$ & $\mbox{\fontsize{11}{11}\selectfont $\tt b<subscript>{1}</subscript>$}$ & $\mbox{\fontsize{11}{11}\selectfont $\tt b<subscript>{2}</subscript>$}$ & $\mbox{\fontsize{11}{11}\selectfont $\tt b<subscript>{3}$}$ & $\mbox{\fontsize{11}{11}\selectfont $\tt b<subscript>{4}</subscript>$}$ & $\mbox{\fontsize{11}{11}\selectfont $\tt b<subscript>{5}</subscript>$}$ & $\mbox{\fontsize{11}{11}\selectfont $\tt b<subscript>{6}</subscript>$}$ & $\mbox{\fontsize{11}{11}\selectfont $\tt b<subscript>{7}</subscript>$}$ \\
    246  & \ttfamily{0} & \ttfamily{1} & \ttfamily{1} & \ttfamily{0} & \ttfamily{0} & \ttfamily{0} & \ttfamily{1} & \bfseries\ttfamily{0} \\
    247  & \ttfamily{0} & \ttfamily{0} & \ttfamily{1} & \ttfamily{1} & \ttfamily{0} & \ttfamily{1} & \ttfamily{1} & \bfseries\ttfamily{1} \\
    248  & \ttfamily{0} & \ttfamily{0} & \ttfamily{1} & \ttfamily{1} & \ttfamily{1} & \ttfamily{1} & \ttfamily{0} & \bfseries\ttfamily{0} \\
    249  & \ttfamily{0} & \ttfamily{1} & \ttfamily{0} & \ttfamily{0} & \ttfamily{0} & \ttfamily{0} & \ttfamily{0} & \bfseries\ttfamily{1} \\
    250 \end{tabular}
    251 \end{center}
    252 \caption{8-bit ASCII Basis Bit Streams}
    253 \label{fig:BitStreamsExample}
    254 \end{figure}
    255 -->
    256          </para>
     233         <table>
     234                  <caption>
     235                     <para>XML Source Data</para>
     236                  </caption>
     237                  <colgroup>
     238                     <col align="right" valign="top"/>
     239                     <col align="centre" valign="top"/>
     240                     <col align="centre" valign="top"/>
     241                     <col align="centre" valign="top"/>
     242                     <col align="centre" valign="top"/>
     243                  </colgroup>
     244                  <tbody>
     245  <tr><td>String </td><td> <code>b</code> </td><td> <code>7</code> </td><td> <code>&lt;</code> </td><td> <code>A</code> </td></tr>
     246  <tr><td>ASCII </td><td> <code>0110001<emphasis role="bold">0</emphasis></code> </td><td> <code>0011011<emphasis role="bold">1</emphasis></code> </td><td> <code>0011110<emphasis role="bold">0</emphasis></code> </td><td> <code>0100000<emphasis role="bold">1</emphasis></code> </td></tr>
     247  </tbody>
     248 
     249 
     250</table>         
     251         <table>
     252                  <caption>
     253                     <para>8-bit ASCII Basis Bit Streams</para>
     254                  </caption>
     255                  <colgroup>
     256                     <col align="centre" valign="top"/>
     257                     <col align="centre" valign="top"/>
     258                     <col align="centre" valign="top"/>
     259                     <col align="centre" valign="top"/>
     260                     <col align="centre" valign="top"/>
     261                     <col align="centre" valign="top"/>
     262                     <col align="centre" valign="top"/>
     263                     <col align="centre" valign="top"/>
     264                  </colgroup>
     265                  <tbody>
     266<tr><td> b<subscript>0</subscript> </td><td> b<subscript>1</subscript> </td><td> b<subscript>2</subscript> </td><td> b<subscript>3</subscript></td><td> b<subscript>4</subscript> </td><td> b<subscript>5</subscript> </td><td> b<subscript>6</subscript> </td><td> b<subscript>7</subscript> </td></tr>
     267 <tr><td> <code>0</code> </td><td> <code>1</code> </td><td> <code>1</code> </td><td> <code>0</code> </td><td> <code>0</code> </td><td> <code>0</code> </td><td> <code>1</code> </td><td> <emphasis role="bold"><code>0</code></emphasis> </td></tr>
     268 <tr><td> <code>0</code> </td><td> <code>0</code> </td><td> <code>1</code> </td><td> <code>1</code> </td><td> <code>0</code> </td><td> <code>1</code> </td><td> <code>1</code> </td><td> <emphasis role="bold"><code>1</code></emphasis> </td></tr>
     269 <tr><td> <code>0</code> </td><td> <code>0</code> </td><td> <code>1</code> </td><td> <code>1</code> </td><td> <code>1</code> </td><td> <code>1</code> </td><td> <code>0</code> </td><td> <emphasis role="bold"><code>0</code></emphasis> </td></tr>
     270 <tr><td> <code>0</code> </td><td> <code>1</code> </td><td> <code>0</code> </td><td> <code>0</code> </td><td> <code>0</code> </td><td> <code>0</code> </td><td> <code>0</code> </td><td> <emphasis role="bold"><code>1</code></emphasis> </td></tr>
     271  </tbody>
     272 
     273 
     274</table>         
     275
    257276         <!-- Using a mixture of boolean-logic and arithmetic operations, character-class -->
    258277         <!-- bit streams can be transformed into lexical bit streams, where the presense of -->
     
    260279         <!-- process, intra-element well-formedness validation is performed on each block -->
    261280         <!-- of text. -->
    262          <para> Consider, for example, the XML source data stream shown in the first line of
    263             <!-- FIGURE REF Figure \ref{fig:parabix1} -->. The remaining lines of this figure show
     281         <para> Consider, for example, the XML source data stream shown in the first line of Table II.
     282The remaining lines of this figure show
    264283            several parallel bit streams that are computed in Parabix-style parsing, with each bit
    265284            of each stream in one-to-one correspondence to the source character code units of the
     
    472491            may then be completed by applying parallel deletion and inverse transposition of the
    473492            UTF-16 bitstreams\cite{Cameron2008}. </para>
    474          <para>
    475             <!-- FIGURE
    476 \begin{figure*}[tbh]
    477 \begin{center}
    478 \begin{tabular}{rr}\\
    479 Source Data & \verb`<document>fee<element a1='fie' a2 = 'foe'></element>fum</document>`\\
    480 -->
    481             <!-- Tag Openers & \verb`1____________1____________________________1____________1__________`\\-->
    482             <!-- Start Tag Marks & \verb`_1____________1___________________________________________________`\\-->
    483             <!-- End Tag Marks & \verb`___________________________________________1____________1_________`\\-->
    484             <!-- Empty Tag Marks & \verb`__________________________________________________________________`\\-->
    485             <!-- Element Names & \verb`_11111111_____1111111_____________________________________________`\\-->
    486             <!-- Attribute Names & \verb`______________________11_______11_________________________________`\\-->
    487             <!-- Attribute Values & \verb`__________________________111________111__________________________`\\-->
    488             <!-- FIGURE
    489 String Ends & \verb`1____________1_______________1__________1_1____________1__________`\\
    490 Markup Identifiers & \verb`_________1______________1_________1______1_1____________1_________`\\
    491 Deletion Mask & \verb`_11111111_____1111111111_1____1111_11_______11111111_____111111111`\\
    492 Undeleted Data & \verb``<emphasis role="ital">0</emphasis>\verb`________>fee`<emphasis role="ital">0</emphasis>\verb`__________=_fie`<emphasis role="ital">0</emphasis>\verb`____=__foe`{\tt<emphasis role="ital">0</emphasis>\verb`>`<emphasis role="ital">0</emphasis>\verb`/________fum`<emphasis role="ital">0</emphasis>\verb`/_________`
    493 \end{tabular}
    494 \end{center}
    495 \caption{XML Source Data and Derived Parallel Bit Streams}
    496 \label{fig:parabix2}
    497 \end{figure*}
    498 -->
    499          </para>
     493<table>
     494                  <caption>
     495                     <para>XML Source Data and Derived Parallel Bit Streams</para>
     496                  </caption>
     497                  <colgroup>
     498                     <col align="centre" valign="top"/>
     499                     <col align="left" valign="top"/>
     500                  </colgroup>
     501                  <tbody>
     502          <tr><td> Source Data </td><td> <code> <![CDATA[<document>fee<element a1='fie' a2 = 'foe'></element>fum</document>]]> </code></td></tr>
     503          <tr><td> Tag Openers </td><td> <code>1____________1____________________________1____________1__________</code></td></tr>
     504           <tr><td> Start Tag Marks </td><td> <code>_1____________1___________________________________________________</code></td></tr>
     505           <tr><td> End Tag Marks </td><td> <code>___________________________________________1____________1_________</code></td></tr>
     506           <tr><td> Empty Tag Marks </td><td> <code>__________________________________________________________________</code></td></tr>
     507           <tr><td> Element Names </td><td> <code>_11111111_____1111111_____________________________________________</code></td></tr>
     508           <tr><td> Attribute Names </td><td> <code>______________________11_______11_________________________________</code></td></tr>
     509           <tr><td> Attribute Values </td><td> <code>__________________________111________111__________________________</code></td></tr>
     510                  </tbody>
     511               </table>         
    500512         <para> Rather than immediately paying the costs of deletion and transposition just for
    501513            transcoding, however, icXML defers these steps so that the deletion masks for several
     
    598610            such as when XML or SVG documents are embedded within XHTML files. Namespaces are bound
    599611            to uniform resource identifiers (URIs), which are strings used to identify specific
    600             names or resources. On line 1 of Figure \ref{fig:namespace1}, the <code>xmlns</code>
     612            names or resources. On line 1 in the Table below, the <code>xmlns</code>
    601613            attribute instructs the XML processor to bind the prefix <code>p</code> to the URI
    602614               &apos;<code>pub.net</code>&apos; and the default (empty) prefix to
     
    611623            uniquely-named items because the current vocabulary is determined by the namespace(s)
    612624            that are in-scope. </para>
    613          <para>
    614             <!-- FIGURE
    615 \begin{figure}[h]
    616 \begin{tabular}{l|l}
    617 1. & \verb|<book xmlns:p="pub.net" xmlns="book.org">| \\
    618 2. & \verb|  <title>BOOK NAME</title>| \\
    619 3. & \verb|  <p:name>PUBLISHER NAME</p:name>| \\
    620 4. & \verb|  <price>X</price>| \\
    621 5. & \verb|  <price xmlns="publisher.net">Y</price>| \\
    622 6. & \verb|</book>| \\
    623 \end{tabular}
    624 \caption{XML Namespace Example}
    625 \label {fig:namespace1}
    626 \end{figure}
    627 -->
    628          </para>
     625<table>
     626                  <caption>
     627                     <para>XML Namespace Example</para>
     628                  </caption>
     629                  <colgroup>
     630                     <col align="centre" valign="top"/>
     631                     <col align="left" valign="top"/>
     632                  </colgroup>
     633                  <tbody>
     634 <tr><td>1. </td><td><![CDATA[<book xmlns:p="pub.net" xmlns="book.org">]]> </td></tr>
     635 <tr><td>2. </td><td><![CDATA[  <title>BOOK NAME</title>]]> </td></tr>
     636 <tr><td>3. </td><td><![CDATA[  <p:name>PUBLISHER NAME</p:name>]]> </td></tr>
     637 <tr><td>4. </td><td><![CDATA[  <price>X</price>]]> </td></tr>
     638 <tr><td>5. </td><td><![CDATA[  <price xmlns="publisher.net">Y</price>]]> </td></tr>
     639 <tr><td>6. </td><td><![CDATA[</book>]]> </td></tr>
     640                  </tbody>
     641               </table>         
     642
    629643         <para> In both Xerces and icXML, every URI has a one-to-one mapping to a URI ID. These
    630644            persist for the lifetime of the application through the use of a global URI pool. Xerces
     
    649663            found using a bit-scan intrinsic. A namespace binding table, similar to Table
    650664            \ref{tbl:namespace1}, provides the actual URI ID. </para>
    651          <para>
    652             <!-- FIGURE
    653 \begin{table}[h]
    654 \begin{center}
    655 \begin{tabular}{|c||c|c|c|c|}\hline
    656 NSID & Prefix & URI & Prefix ID & URI ID \\ \hline\hline
    657 0 & {\tt p} & {\tt pub.net} & 0 & 0 \\ \hline
    658 1 & {\tt xmlns} & {\tt books.org} & 1 & 1 \\ \hline
    659 2 & {\tt xmlns} & {\tt pub.net} & 1 & 0 \\ \hline
    660 \end{tabular}
    661 \caption{Namespace Binding Table Example}
    662 \end{center}
    663 \label{tbl:namespace1}
    664 \end{table}
    665 -->
    666          </para>
     665<table>
     666                  <caption>
     667                     <para>Namespace Binding Table Example</para>
     668                  </caption>
     669                  <colgroup>
     670                     <col align="centre" valign="top"/>
     671                     <col align="centre" valign="top"/>
     672                     <col align="centre" valign="top"/>
     673                     <col align="centre" valign="top"/>
     674                     <col align="centre" valign="top"/>
     675                   </colgroup>
     676                   <thead>
     677                     <tr><th>NSID </th><th> Prefix </th><th> URI </th><th> Prefix ID </th><th> URI ID </th>
     678                     </tr>
     679                   </thead>
     680                  <tbody>
     681<tr><td>0 </td><td> <code> p</code> </td><td> <code> pub.net</code> </td><td> 0 </td><td> 0 </td></tr>
     682 <tr><td>1 </td><td> <code> xmlns</code> </td><td> <code> books.org</code> </td><td> 1 </td><td> 1 </td></tr>
     683 <tr><td>2 </td><td> <code> xmlns</code> </td><td> <code> pub.net</code> </td><td> 1 </td><td> 0 </td></tr>
     684                  </tbody>
     685               </table>         
    667686         <para>
    668687            <!-- PrefixBindings = PrefixBindingTable[prefixID]; -->
     
    856875                  <colgroup>
    857876                     <col align="left" valign="top"/>
    858                      <col align="left" valign="top"/>
    859                      <col align="left" valign="top"/>
    860                      <col align="left" valign="top"/>
    861                      <col align="left" valign="top"/>
     877                     <col align="centre" valign="top"/>
     878                     <col align="centre" valign="top"/>
     879                     <col align="centre" valign="top"/>
     880                     <col align="centre" valign="top"/>
    862881                  </colgroup>
    863882                  <tbody>
  • docs/Balisage13/DO_XSLT

    r3037 r3051  
    1 xsltproc -o Bal2013came0601/Bal2013came0601.html balisage-1-3-xsl/balisage-html.xsl Bal2013came0601/Bal2013came0601.xml
     1xsltproc -o Bal2013came0601/Bal2013came0601.html balisage-1-3-xsl/balisage-proceedings-html.xsl Bal2013came0601/Bal2013came0601.xml
    22
Note: See TracChangeset for help on using the changeset viewer.