Changeset 3151 for icXML


Ignore:
Timestamp:
May 17, 2013, 5:57:31 AM (6 years ago)
Author:
cameron
Message:

Updates for icxmlc files.

Location:
icXML/icXML-devel/src/icxmlc
Files:
20 edited

Legend:

Unmodified
Added
Removed
  • icXML/icXML-devel/src/icxmlc/XMLConfig.hpp

    r3103 r3151  
    77/*
    88 * @author Nigel Medforth, nigelm -at- interational-characters.com
    9  * @version $Id: XMLConfig.hpp 300 2013-04-29 00:52:03Z nigelm $
     9 * @version $Id: XMLConfig.hpp 302 2013-05-04 03:48:19Z nigelm $
    1010 *
    1111 */
     
    2424
    2525#ifndef PRINT_DEBUG_IGNORE_TRANSITION_STREAM_MESSAGES
    26 #define PRINT_DEBUG_IGNORE_TRANSITION_STREAM_MESSAGES
     26//#define PRINT_DEBUG_IGNORE_TRANSITION_STREAM_MESSAGES
    2727#endif
    2828
     
    3636
    3737#ifndef PRINT_DEBUG_IGNORE_SYMBOL_STREAM_MESSAGES
    38 #define PRINT_DEBUG_IGNORE_SYMBOL_STREAM_MESSAGES
     38//#define PRINT_DEBUG_IGNORE_SYMBOL_STREAM_MESSAGES
    3939#endif
    4040
  • icXML/icXML-devel/src/icxmlc/XMLNameChars.hpp

    r3103 r3151  
    111111            return (0x100002600 >> codepoint) & 1;
    112112        }
    113         else if (unlikely(!isXMLV1_0))
     113        else if (!isXMLV1_0)
    114114        {
    115115            return codepoint == 0x85 || codepoint == 0x2028;
     
    127127                // XML 1.1: all but 0x0 is a legal control character
    128128
    129                 const XMLUInt32 legalControlChars[2] = {~0x1, 0x2600};
     129                const XMLUInt32 legalControlChars[2] = {0xFFFFFFFE, 0x2600};
    130130                return (legalControlChars[isXMLV1_0] >> codepoint) & 1;
    131131            }
     
    140140                    return (0x2600 >> codepoint) & 1;
    141141                case 3: // codepoint in [ 0x60, 0x7f ]
    142                     return codepoint < 0x7f;
     142                    return isXMLV1_0 | (codepoint < 0x7f);
    143143                case 4: // codepoint in [ 0x80, 0x9f ]
    144                     return codepoint == 0x85;
     144                    return isXMLV1_0 | (codepoint == 0x85);
    145145            }
    146146        }
  • icXML/icXML-devel/src/icxmlc/XMLParser.hpp

    r3103 r3151  
    77/*
    88 * @author Nigel Medforth, nigelm -at- interational-characters.com
    9  * @version $Id: XMLParser.hpp 295 2013-04-24 02:36:00Z nigelm $
     9 * @version $Id: XMLParser.hpp 307 2013-05-08 23:02:44Z nigelm $
    1010 *
    1111 */
     
    6262    template<class ScannerType> friend class XMLGrammarValidator;
    6363    friend class XMLDocumentDisseminator;
    64 
     64    friend class XMLReader;
    6565
    6666    public:
     
    7070        typedef DynamicArray<XMLSize_t, INITIAL_MAX_SCOPE> ElementChildrenArray;
    7171        typedef DynamicArray<XMLByte, INITIAL_MAX_SCOPE> ContentFlagArray;
    72 
    7372        typedef StringPool<XMLCh, 1024, true> ContentOutputType;
     73
     74        typedef XMLReader::XMLVersion XMLVersion;
     75
     76        typedef DynamicArray<const XMLCh*, 8>   ShortStringPtrArray;
     77
    7478
    7579        enum DocumentStateType
     
    8791        };
    8892
     93    public:
     94
    8995        void getCurrentLineCol
    9096        (
     
    9298            , XMLFileLoc               &   col
    9399        ) const;
    94 
    95         typedef XMLReader::XMLVersion XMLVersion;
    96 
    97         typedef DynamicArray<const XMLCh*, 8>   ShortStringPtrArray;
    98100
    99101    protected:
     
    141143        SymbolArray                                                     fSymbolStream;       
    142144        SymbolUriArray                                          fUriStream;
    143         SymbolUriArray                                          fContextStream;
    144         RefVectorOf<XMLAttr>                fAttrList;
     145        SymbolUriArray                                          fContextStream;       
    145146        ReferenceArray                      fReferenceStream;
    146147
     
    203204    , fDocumentContextStream()
    204205    , fDocumentObjectStream()
    205     , fAttrList(16, true, manager)
    206206{
    207207    fContentIdx = 0;
  • icXML/icXML-devel/src/icxmlc/XMLParserImpl.c

    r3103 r3151  
    77/*
    88 * @author Nigel Medforth, nigelm -at- interational-characters.com
    9  * @version $Id: XMLParserImpl.c 298 2013-04-27 20:13:29Z nigelm $
     9 * @version $Id: XMLParserImpl.c 313 2013-05-11 04:03:43Z nigelm $
    1010 *
    1111 */
     
    1818#include <icxmlc/parsers/XMLWellFormednessParser.hpp>
    1919#include <icxmlc/parsers/XMLGrammarValidator.hpp>
    20 #include <icxmlc/parsers/XMLDocumentDisseminator.hpp>
    2120
    2221XERCES_CPP_NAMESPACE_BEGIN
     
    6867    size_t stringEndCount;
    6968    size_t referenceCount;
    70 
    71     DEBUG_MESSAGE(" -- fContentIdx=" << fContentIdx << " of " << fContentStream.capacity() )
    72     DEBUG_MESSAGE(" -- fSymbolCount=" << fSymbolCount << " of " << fSymbolStream.capacity() )
    73     DEBUG_MESSAGE(" -- fReferenceCount=" << fReferenceCount << " of " << fReferenceStream.capacity() )
    74     DEBUG_MESSAGE(" -- fStringCount=" << fStringCount << " of " << fStringEndStream.capacity() )
    7569
    7670        fCursorEndPtr =
     
    9791    fStringCount += stringEndCount;
    9892    fUriIdx = 0;
    99     fContextIdx = 0;
    100     fContentIdx = 0;
    10193    fStringEndPtr = &fStringEndStream[0];
    10294    fDocumentContextIdx = 0;
    10395    fDocumentObjectIdx = 0;
     96    fContextIdx = 0;
     97    fContentIdx = 0;
    10498
    10599    if (unlikely(referenceCount > 0 && !fEntityContentBuffer))
     
    117111        // point must be copied back and will not be modified or inspected when
    118112                // parsing the next document page.
    119 
    120         DEBUG_MESSAGE("preScanDocumentPage::bytesEaten=" << bytesEaten)
    121113        reader->refreshRawBuffer(bytesEaten, 0);
    122114        }
     
    151143    for (;;)
    152144    {
    153         size_t avail = length;
     145        size_t avail = length - offset;
    154146        fNoMore = 1;
    155147        if (unlikely(avail > SEGMENT_SIZE))
     
    182174            );
    183175
     176        DEBUG_MESSAGE("bytesEaten=" << bytesEaten);
     177
    184178        fSymbolCount += symbolCount;
    185179        fReferenceCount += referenceCount;
     
    189183        if (fNoMore) break;
    190184
    191         DEBUG_REFERENCE_MESSAGE(" **** NEXT INTERNAL PAGE! ****")
     185        offset += bytesEaten;
     186
     187        DEBUG_REFERENCE_MESSAGE(" **** NEXT INTERNAL PAGE @ " << offset << "! ****")
    192188
    193189        enum { INPUT_SIZE = (BUFFER_BLOCKS * BLOCK_SIZE) / sizeof(XMLCh) };
     
    229225        {
    230226            fReferenceStream.resizeToFit(fReferenceCount, fReferenceStream.capacity() * 2);
    231         }
    232 
    233         offset += SEGMENT_SIZE;
     227        }     
    234228    }
    235229
     
    237231
    238232    fContentIdx = 0;
    239     fStringEndPtr = &fStringEndStream[0];
     233    fStringEndPtr = &fStringEndStream[0];   
    240234
    241235    XMLWellFormednessParser<XMLScannerType> wfScanner(*this, adapter->getSymbolTable(), adapter->getReferenceTable(), fScanner);
     
    264258scanPrologPage()
    265259{
    266     DEBUG_TRANSITION_MESSAGE(" ---------------------------- SCANNING PROLOG -------------------------------");
     260    DEBUG_MESSAGE(" ---------------------------- SCANNING PROLOG -------------------------------");
    267261
    268262    checkWellformedness<XMLParser::Prolog>();
     
    276270void
    277271XMLParserImpl<XMLScannerType>::
    278 preScanElementPage()
     272buildElementPage()
    279273{
    280274    checkWellformedness<XMLParser::Element>();
     
    282276    resolveDocumentPageNamespaces();
    283277
    284     validateGrammar();
     278    validateGrammar();   
     279
     280    if (unlikely(fScanner.getDocHandler() == 0))
     281    {
     282        fDocumentObjectCount = 0;
     283    }
     284    else
     285    {
     286        fDocumentDesseminator.resizeAttributeArray(fMaxAttributeCount, fScanner.getMemoryManager());
     287    }
    285288}
    286289
     
    292295scanElementPage()
    293296{
    294     preScanElementPage();
    295 
    296     XMLDocumentDisseminator disseminator(*this, fScanner.getDocHandler(), fScanner.getMemoryManager());
    297 
    298     while (disseminator.next());
    299 
     297    while (scanNext());
     298
     299    return inElement();
     300}
     301
     302// -------------------------------------------------------------------------------------------
     303
     304/** This function automatically sends the next piece of content / markup to the
     305    appropriate scanner event handler. If it cannot complete a particular piece
     306    of content or markup, it returns false.
     307 **/
     308
     309template<class XMLScannerType>
     310bool XMLParserImpl<XMLScannerType>::scanNext()
     311{
     312    return fDocumentDesseminator.next();
     313}
     314
     315template<class XMLScannerType>
     316bool XMLParserImpl<XMLScannerType>::inElement()
     317{
    300318    return fInElement;
    301319}
     
    304322
    305323template<class XMLScannerType>
    306 void XMLParserImpl<XMLScannerType>::resolveDocumentPageNamespaces()
    307 {
    308     if (!fScanner.getDoNamespaces() || fMarkupCount == 0)
    309     {
    310         return;
    311     }
    312 
    313     XMLSchemaLoader<XMLScannerType> schemaLoader(NULL); // fMemoryManager
    314     XMLNamespaceParser<XMLScannerType> parser(*this, fNamespaceResolver, fSymbolTable, fReferenceTable, schemaLoader, fScanner);
    315 
    316     const bool isRoot = (fNamespaceResolver.getScope() == 0);
    317 
    318     parser.resolveNamespaces();
    319 
    320     if (fScanner.getDoSchema())
    321     {
    322         if (unlikely(schemaLoader.hasSchemas()))
    323         {
    324             schemaLoader.loadAllSchemas(fScanner);
    325         }
    326         if (unlikely(isRoot))
    327         {
    328             fScanner.loadExternalGrammars();
    329         }
     324bool XMLParserImpl<XMLScannerType>::scanMiscellaneousPage()
     325{
     326    checkWellformedness<XMLParser::Miscellaneous>();
     327
     328    while (scanNext());
     329
     330    return !fNoMore;
     331}
     332
     333// ---------------------------------------------------------------------------------------------------------
     334
     335template<class XMLScannerType>
     336void XMLParserImpl<XMLScannerType>::verifyProlog()
     337{
     338    // verify that the first content "string" in the element is empty and discard it
     339    ContentPtrType contentPtr = &fContentStream[fContentIdx];
     340    if (likely(contentPtr == *fStringEndPtr))
     341    {
     342        contentPtr = *fStringEndPtr++ + 1;
     343        fContentIdx++;
     344        fInMarkup = true;
     345    }
     346    else
     347    {
     348        fScanner.emitError(XMLErrs::ExpectedCommentOrPI);
     349    }
     350
     351    // make sure we start the Element with a legal tag.
     352    switch (*contentPtr & MarkupMask)
     353    {
     354        case StartTagWithAttributes:
     355        case StartTagWithoutAttributes:
     356        case ProcessingInstruction:
     357        case Comment:
     358            break;
     359        /// ------------------------------------------------------------------------ ///
     360        case EndTag:
     361            fScanner.emitError(XMLErrs::MoreEndThanStartTags);
     362            break;
     363        /// ------------------------------------------------------------------------ ///
     364        case CDATA:
     365            fScanner.emitError(XMLErrs::CDATAOutsideOfContent);
    330366    }
    331367}
     
    339375    if (DocStateType == XMLParser::Element)
    340376    {
    341         // verify that the first content "string" in the element is empty and discard it
    342377        if (unlikely(fScope == 0 && !fInMarkup))
    343378        {
    344             const ContentPtrType contentPtr = &fContentStream[fContentIdx];
    345             const ContentPtrType endOfContentPtr = *fStringEndPtr++;
    346             if (likely(XMLStringU::isWhitespace(contentPtr, endOfContentPtr - contentPtr)))
    347             {
    348                 fContentIdx = (endOfContentPtr - contentPtr) + 1;
    349                 fInMarkup = true;
    350             }
    351             else
    352             {               
    353                 fScanner.emitError(XMLErrs::ExpectedCommentOrPI);
    354             }
     379            verifyProlog();
    355380        }
    356381
     
    371396        );
    372397
    373 
    374398        // based on the WF check, also pre-expand any streams as needed.
    375399        const size_t elementCount = (fElementIndex + wfScanner.fElementCount);
     
    407431
    408432        wfScanner.checkWellformedness<DocStateType>(&fDocumentAccumulator);
     433
     434        if (unlikely(fScanner.getDocHandler() == 0))
     435        {
     436            fDocumentObjectCount = 0;
     437        }
     438    }
     439}
     440
     441// -------------------------------------------------------------------------------------------
     442
     443template<class XMLScannerType>
     444void XMLParserImpl<XMLScannerType>::resolveDocumentPageNamespaces()
     445{
     446    if (!fScanner.getDoNamespaces() || fMarkupCount == 0)
     447    {
     448        return;
     449    }
     450
     451    XMLSchemaLoader<XMLScannerType> schemaLoader(NULL); // fMemoryManager
     452    XMLNamespaceParser<XMLScannerType> parser(*this, fNamespaceResolver, fSymbolTable, fReferenceTable, schemaLoader, fScanner);
     453
     454    const bool isRoot = (fNamespaceResolver.getScope() == 0);
     455
     456    parser.resolveNamespaces();
     457
     458    if (fScanner.getDoSchema())
     459    {
     460        if (unlikely(schemaLoader.hasSchemas()))
     461        {
     462            schemaLoader.loadAllSchemas(fScanner);
     463        }
     464        if (unlikely(isRoot))
     465        {
     466            fScanner.loadExternalGrammars();
     467        }
    409468    }
    410469}
     
    426485    fStringEndPtr = gv.fStringEndPtr;
    427486    fMarkupCount = gv.fMarkupCount;
    428 
    429 }
    430 
    431 // -------------------------------------------------------------------------------------------
    432 
    433 /** This function automatically sends the next piece of content / markup to the
    434         appropriate scanner event handler. If it cannot complete a particular piece
    435         of content or markup, it returns false.
    436  **/
    437 
    438 template<class XMLScannerType>
    439 XMLParser::XMLScanState XMLParserImpl<XMLScannerType>::scanNext()
    440 {
    441 
    442     return XMLParser::EndOfDocumentSection; // fInMiscellaneous;
    443 }
    444 
    445 // -------------------------------------------------------------------------------------------
    446 
    447 template<class XMLScannerType>
    448 bool XMLParserImpl<XMLScannerType>::scanMiscellaneousPage()
    449 {
    450     checkWellformedness<XMLParser::Miscellaneous>();
    451 
    452     XMLDocumentDisseminator disseminator(*this, fScanner.getDocHandler(), fScanner.getMemoryManager());
    453 
    454     while (disseminator.next());
    455 
    456     return !fNoMore;
    457 }
    458 
    459 // ---------------------------------------------------------------------------------------------------------
    460 
    461 // could we have a stage prior to grammar validation that obtains the proper ElemDecls and AttDefs for every symbol?
    462 // could it prove out whether all ElemDecls and AttDefs are stored in the symbols and therefore no grammar lookup could be needed?
    463 // would that even simplier and/or faster?
    464 
    465 template<class XMLScannerType>
    466 void XMLParserImpl<XMLScannerType>::getNext()
    467 {
    468 
    469487}
    470488
  • icXML/icXML-devel/src/icxmlc/XMLParserImpl.hpp

    r3103 r3151  
    77/*
    88 * @author Nigel Medforth, nigelm -at- interational-characters.com
    9  * @version $Id: XMLParserImpl.hpp 281 2013-04-04 00:14:43Z nigelm $
     9 * @version $Id: XMLParserImpl.hpp 307 2013-05-08 23:02:44Z nigelm $
    1010 *
    1111 */
     
    1616#include <icxmlc/XMLParser.hpp>
    1717#include <icxmlc/parsers/XMLDocumentAccumulator.hpp>
     18#include <icxmlc/parsers/XMLDocumentDisseminator.hpp>
    1819
    1920XERCES_CPP_NAMESPACE_BEGIN
     
    8889        bool scanPrologPage();
    8990
     91        IDISA_ALWAYS_INLINE
    9092        bool scanElementPage();
     93
     94        IDISA_ALWAYS_INLINE
     95        bool scanNext();
     96
     97        inline bool inElement();
     98
     99                void prepareForNextDocumentPage();
     100
     101        void buildElementPage();
     102
     103        bool scanMiscellaneousPage();
     104
     105        void verifyProlog();
     106
     107    private:
    91108
    92109        template <XMLParser::DocumentStateType DocStateType>
     
    100117        void validateGrammar();
    101118
    102                 void prepareForNextDocumentPage();
    103 
    104         IDISA_ALWAYS_INLINE
    105         void preScanElementPage();
    106 
    107         XMLParser::XMLScanState scanNext();
    108 
    109         bool scanMiscellaneousPage();
    110 
    111         private:
    112 
    113         IDISA_ALWAYS_INLINE
    114         void getNext();
    115 
    116119        private:
    117120
     
    119122        XMLSymbolTable                                          fSymbolTable;
    120123        XMLReferenceTable                   fReferenceTable;
    121         XMLDocumentAccumulator              fDocumentAccumulator;
    122124        XMLScannerType &                    fScanner;
     125        XMLDocumentAccumulator              fDocumentAccumulator;               
     126        XMLDocumentDisseminator             fDocumentDesseminator;
    123127};
    124128
     
    133137, fSymbolTable(scanner, fNamespaceResolver, fReferenceTable, manager)
    134138, fReferenceTable(scanner, fSymbolTable, manager)
     139, fScanner(*scanner)
    135140, fDocumentAccumulator(*this)
    136 , fScanner(*scanner)
     141, fDocumentDesseminator(*this, scanner->getDocHandler(), manager)
    137142{
    138143    fScanner.setUriResolver(&fNamespaceResolver);
  • icXML/icXML-devel/src/icxmlc/XMLReferenceTable.cpp

    r3107 r3151  
    4848
    4949    const size_t transcodedLength = entity.getLen() - inAttVal;
    50     XMLCh * referenceName = XMLString::replicate(entity.getRawBuffer(), transcodedLength, fMemoryManager);
     50
     51    Janitor<XMLCh> janReferenceName(XMLString::replicate(entity.getRawBuffer(), transcodedLength, fMemoryManager));
     52    XMLCh * referenceName = janReferenceName.get();
    5153    referenceName[transcodedLength] = 0;
    5254    #ifdef PRINT_DEBUG_MESSAGE
     
    5658
    5759    bool expanded = 0;
    58     XMLReader::XMLVersion version = fScanner.getXMLVersion();
     60    bool isExternal = 0;
    5961    // need to set these to what the CSA says they are.
    6062    XMLFileLoc line = 0;
     
    6466    if (*referenceName == chPound)
    6567    {
    66         const bool isXMLV1_0 = (version == XMLReader::XMLV1_0);
    67 
    6868//      3.3.3 Attribute-Value Normalization
    6969//      For [each] character reference, append the referenced character to the normalized value.
    7070
    71         expanded = expandCharacterReference(referenceName + 1, transcodedLength - 1, isXMLV1_0, entity, replacementText.fType);
     71        expanded = expandCharacterReference(referenceName + 1, transcodedLength - 1, entity, replacementText.fType);
    7272    }
    7373    else // it must be a predefined or general entity
     
    7979        };
    8080
    81         bool isPredefined = 0;
    82         expanded = fScanner.expandEntityReference(referenceName, length, inAttVal, entity, isPredefined, version, line, column);
    83         replacementText.fType = replacementTextType[isPredefined];
     81        if (likely(XMLNameChar::test(referenceName, transcodedLength)))
     82        {
     83            bool isPredefined = 0;
     84            expanded = fScanner.expandEntityReference(referenceName, length, inAttVal, entity, isPredefined, isExternal, line, column);
     85            replacementText.fType = replacementTextType[isPredefined];
     86        }
     87        else
     88        {
     89            fScanner.emitError(XMLErrs::ExpectedEntityRefName);
     90        }
    8491    }
    8592
     
    8794    if (likely(expanded))
    8895    {
    89         // if it's a character reference or predefined entity, we could search to see if the replacement text object already
    90         // exists for the opposite context.
    91 
    92         const bool isXMLV1_0 = (version == XMLReader::XMLV1_0);
    9396        // expansion succeeded; now we have to construct a content stream (and potentially a symbol gid stream)
    94         expanded = parseReference(entity.getRawBuffer(), entity.getLen(), inAttVal, isXMLV1_0, replacementText);
     97        expanded = parseReference(entity.getRawBuffer(), entity.getLen(), inAttVal, replacementText);
    9598    }   
    9699
     
    109112    }
    110113
    111     XMLString::release(&referenceName, fMemoryManager);
    112114    fReplacementTextList[reference.fId] = replacementText;
     115
     116    if (unlikely(isExternal))
     117    {
     118        // if we were dealing with an external entity, pop the reader that was parsing it
     119        fScanner.popReader();
     120    }
    113121
    114122    return reference.fId;
     
    122130    , size_t                length
    123131    , const bool            inAttVal
    124     , const bool            isXMLV1_0
    125132    , XMLReplacementText &      toFill
    126133)
    127134{
     135    DEBUG_REFERENCE_MESSAGE("parseReference(" << replacementText << ',' << length << ',' << "inAttVal=" << inAttVal << ',' << "type=" << toFill.fType << ')' )
     136
    128137    bool parsed = 1;
    129 
    130138    if (likely(toFill.fType < XMLReplacementText::GeneralEntity))
    131139    {
     
    139147    {
    140148        const bool hasOpenAngleBracket = XMLStringU::indexOf<chOpenAngle>(replacementText, length) != -1;
     149
     150        DEBUG_REFERENCE_MESSAGE(" -- hasOpenAngleBracket=" << hasOpenAngleBracket)
     151
    141152        if (inAttVal | !hasOpenAngleBracket)
    142153        {
     
    148159            else
    149160            {
    150                 parsed = parseReferenceWithoutMarkup(replacementText, length, inAttVal, isXMLV1_0, toFill);
     161                parsed = parseReferenceWithoutMarkup(replacementText, length, inAttVal, toFill);
    151162            }
    152163        }
    153164        else // at least one piece of markup must exist within the replacement text
    154165        {
    155             parsed = parseReferenceWithMarkup(replacementText, length, isXMLV1_0, toFill);
     166            parsed = parseReferenceWithMarkup(replacementText, length, toFill);
    156167        }
    157168    }
     
    167178    , size_t                        length
    168179    , const bool                    inAttVal
    169     , const bool                    isXMLV1_0
    170180    , XMLReplacementText &          toFill
    171181)
     
    177187    bool parsed = 0;
    178188
    179     XMLWhitespaceNormalizer::normalize(replacementText, length, inAttVal, isXMLV1_0);
    180 
    181     if (!isXMLV1_0)
     189    if (inAttVal)
     190    {
     191        // do any remaining whitespace normalization on content-normalized attribute values
     192        XMLChIterator2<chLF, chHTab> wsItr(replacementText, length);
     193        while (wsItr.next())
     194        {
     195            replacementText[wsItr.pos()] = chSpace;
     196        }
     197    }
     198
     199    if (fScanner.getXMLVersion() == XMLReader::XMLV1_1)
    182200    {
    183201        // scan to see if any restricted characters are present
     
    214232    }
    215233
     234    DEBUG_REFERENCE_MESSAGE(" -- replacementText'=" << replacementText)
     235
    216236    if (likely(length != 0))
    217237    {
     
    293313            {
    294314                // unterminated entity! any entity must be closed with a ';' and cannot have another '&' it
    295                 fScanner.emitError(XMLErrs::UnterminatedEntityRef);
     315                reportUnterminatedEntity(&replacementText[startPos], expandedText);
    296316                return 0;
    297317            }
     
    360380    const XMLCh *                   characterReference
    361381    , size_t                        length
    362     , const bool                    isXMLV1_0
    363382    , XMLBuffer &                   entity
    364383    , ReplacementType &             type
     
    437456
    438457    bool valid = true;
     458
    439459    // Return the char (or chars) and check if the character expanded is valid or not
    440     if (likely(characterValue <= 0xFFFF))
     460    if (likely(characterValue <= 0xFFFF && XMLNameChar::isXMLChar(characterValue, fScanner.getXMLVersion() == XMLReader::XMLV1_0, true)))
    441461    {
    442462        XMLCh character = XMLCh(characterValue);
    443463        entity.set(&character, 1);
    444 
    445         valid = XMLNameChar::isXMLChar(characterValue, isXMLV1_0, true);
    446 
    447464        type = XMLReplacementText::CharacterReference;
    448465    }
     466    else if (characterValue >= 0x10000 && characterValue <= 0x10FFFF)
     467    {
     468        XMLCh character[2];
     469        characterValue -= static_cast<XMLUInt64>(0x10000);
     470        character[0] = XMLCh((characterValue >> 10) | static_cast<XMLUInt64>(0xD800));
     471        character[1] = XMLCh((characterValue & 0x3FF) | static_cast<XMLUInt64>(0xDC00));
     472        entity.set(character, 2);
     473        type = XMLReplacementText::SurrogateCharacterReference;
     474        fHasSurrogateCharacterReferences = true;
     475    }
    449476    else
    450     {
    451         if (likely(characterValue >= 0x10000 && characterValue <= 0x10FFFF))
    452         {
    453             XMLCh character[2];
    454             characterValue -= static_cast<XMLUInt64>(0x10000);
    455             character[0] = XMLCh((characterValue >> 10) | static_cast<XMLUInt64>(0xD800));
    456             character[1] = XMLCh((characterValue & 0x3FF) | static_cast<XMLUInt64>(0xDC00));
    457 
    458             entity.set(character, 2);
    459 
    460             type = XMLReplacementText::SurrogateCharacterReference;
    461             fHasSurrogateCharacterReferences = true;
    462         }
    463         else
    464         {
    465             valid = false;
    466         }
    467     }
    468 
    469     if (unlikely(!valid))
    470477    {
    471478        // Character reference was not in the valid range
    472479        fScanner.emitError(XMLErrs::InvalidCharacterRef);
     480        valid = false;
    473481    }
    474482
     
    482490    XMLCh *                 replacementText
    483491    , size_t                length
    484     , const bool            isXMLV1_0
    485492    , XMLReplacementText &  toFill
    486493)
     
    509516        // used for the parsing work. That only exists to properly template the scanner.
    510517        XMLParserImpl<XMLScanner> refParser(&fScanner, fMemoryManager);
    511         const XMLReader::XMLVersion VERSION[2] = { XMLReader::XMLV1_1, XMLReader::XMLV1_0 };
    512         refParser.init(&refAdapter, transcoder, 0, 0, VERSION[isXMLV1_0]);
     518        refParser.init(&refAdapter, transcoder, 0, 0, fScanner.getXMLVersion());
    513519        refAdapter.init(&fScanner, fSymbolTable, *this, 0, 0);
    514520        // parse the entity's replacement text
    515521        refParser.scanInternalDocumentPage(replacementText, length, toFill);
     522
     523        DEBUG_MESSAGE(" *** scanned internal document page")
    516524
    517525        // restore the reference and symbol table's original parameters
     
    550558            fSomeGeneralEntityContainsReferences = 1;
    551559        }
    552     }
    553     while (0);
     560
     561        DEBUG_MESSAGE(" *** parsed reference with markup 1")
     562    }
    554563
    555564    // note: the XMLParser automatically sets the UriResolver in the scanner to 0 upon deletion; this restores it.
    556565    fScanner.setUriResolver(resolver);
    557566
     567    DEBUG_MESSAGE(" *** parsed reference with markup 2")
     568
    558569    return 1;
    559570}
     
    561572/// -------------------------------------------------------------------------------------------------------------------
    562573
    563 void XMLReferenceTable::normalizeDefaultAttributeValue
    564 (
    565     XMLElementDefaultAttribute &                 defaultAttribute
    566     , const bool                                 isXMLV1_0
    567 )
    568 {
    569     XMLReplacementText temp;
    570 
    571     parseReferenceWithoutMarkup(const_cast<WritableContentPtrType>(defaultAttribute.getValue()), defaultAttribute.getValueLen(), true, isXMLV1_0, temp);
    572 
    573     defaultAttribute.fValue = temp.fContentStream;
    574     defaultAttribute.fValueLen = temp.fContentLength;
    575 }
    576 
    577 /// -------------------------------------------------------------------------------------------------------------------
    578 
    579 void XMLReferenceTable::reportEntity
    580 (
    581     const gid_t                 entityId
    582     , const XMLErrs::Codes      errorCode
    583 ) const
     574void XMLReferenceTable::reportEntity(const gid_t entityId, const XMLErrs::Codes errorCode) const
    584575{
    585576
     
    587578    const XMLReference & ref = fReferenceTable[entityId];
    588579    fTranscoder->transcodeFrom(ref.getKey(), ref.getLength(), entityName);
    589     fScanner.emitError(errorCode, entityName.getRawBuffer());
     580    XMLCh * transcodedEntityName = entityName.getRawBuffer();
     581    const XMLSize_t len = entityName.getLen() - 1;
     582    if (transcodedEntityName[len] == chSemiColon)
     583    {
     584        transcodedEntityName[len] = chNull;
     585    }
     586    fScanner.emitError(errorCode, transcodedEntityName);
     587}
     588
     589void XMLReferenceTable::reportUnterminatedEntity(const XMLCh * reference, XMLBuffer & toFill) const
     590{
     591    XMLSize_t len = 0;
     592    if (XMLNameChar::testNameStart(reference, len))
     593    {
     594        while (XMLNameChar::testNameChar(reference, len));
     595    }
     596
     597    if (len == 0)
     598    {
     599        fScanner.emitError(XMLErrs::ExpectedEntityRefName);
     600    }
     601    else
     602    {
     603        toFill.set(reference, len);
     604        fScanner.emitError(XMLErrs::UnterminatedEntityRef, toFill.getRawBuffer());
     605    }
    590606}
    591607
  • icXML/icXML-devel/src/icxmlc/XMLReferenceTable.hpp

    r3103 r3151  
    77/*
    88 * @author Nigel Medforth, nigelm -at- interational-characters.com
    9  * @version $Id: XMLReferenceTable.hpp 296 2013-04-25 23:53:07Z nigelm $
     9 * @version $Id: XMLReferenceTable.hpp 304 2013-05-06 22:38:14Z nigelm $
    1010 *
    1111 */
     
    159159                );
    160160
    161         void normalizeDefaultAttributeValue
    162         (
    163             XMLElementDefaultAttribute &                 defaultAttribute
    164             , const bool                                 isXML1_0
    165         );
    166 
    167161        IDISA_ALWAYS_INLINE
    168162        void setTranscoder(XMLTranscoder * transcoder);
     
    199193            , size_t                        length
    200194            , const bool                    inAttVal
    201             , const bool                    isXMLV1_0
    202195            , XMLReplacementText &          toFill
    203196                );
     
    207200            XMLCh *                         replacementText
    208201            , size_t                        length
    209             , const bool                    isXMLV1_0
    210202            , XMLReplacementText &          toFill
    211203        );
     
    216208            , size_t                        length
    217209            , const bool                    inAttVal
    218             , const bool                    isXMLV1_0
    219210            , XMLReplacementText &          toFill
    220211        );
     
    236227            const XMLCh *                   charRef
    237228            , size_t                        length
    238             , const bool                    isXMLV1_0
    239229            , XMLBuffer &                   character
    240230            , ReplacementType &             type
     
    249239        static bool invert(BitBlock * stream, const ssize_t pos);
    250240
    251         void reportEntity
    252         (
    253             const gid_t                 entityId
    254             , const XMLErrs::Codes      errorCode
    255         ) const;
     241        void reportEntity(const gid_t entityId, const XMLErrs::Codes errorCode) const;
     242
     243        void reportUnterminatedEntity(const XMLCh * reference, XMLBuffer & toFill) const;
    256244
    257245        private:
  • icXML/icXML-devel/src/icxmlc/XMLSymbol.c

    r3104 r3151  
    77/*
    88 * @author Nigel Medforth, nigelm -at- interational-characters.com
    9  * @version $Id: XMLSymbol.c 282 2013-04-04 21:06:57Z nigelm $
     9 * @version $Id: XMLSymbol.c 312 2013-05-10 20:53:22Z nigelm $
    1010 *
    1111 */
     
    136136
    137137
    138 XMLSymbol::XMLSymbol() :
    139           fRawLength(0)
    140         , fRawSymbol(0)
    141         , fAttr(NULL)
    142         , fQName(NULL)
    143     , fPrefixId(XMLNamespaceResolver::fUnknownUriId)
    144     , fLocalPartId(XMLNamespaceResolver::fUnknownUriId)
    145     , fDefaultAttributeList(0)
     138XMLSymbol::XMLSymbol(MemoryManager* const manager)
     139: fRawLength(0)
     140, fRawSymbol(0)
     141, fPrefixId(XMLNamespaceResolver::fUnknownUriId)
     142, fLocalPartId(XMLNamespaceResolver::fUnknownUriId)
     143, fQName(manager)
     144, fAttr(NULL)
     145, fDefaultAttributeList(0)
    146146{
    147147
  • icXML/icXML-devel/src/icxmlc/XMLSymbol.hpp

    r3103 r3151  
    77/*
    88 * @author Nigel Medforth, nigelm -at- interational-characters.com
    9  * @version $Id: XMLSymbol.hpp 292 2013-04-13 02:02:18Z nigelm $
     9 * @version $Id: XMLSymbol.hpp 316 2013-05-13 20:03:50Z nigelm $
    1010 *
    1111 */
     
    127127    typedef DynamicArray<ElementDeclRef, 1> ElementDeclList;
    128128
    129     inline XMLSymbol();
     129    inline XMLSymbol(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
    130130
    131131        inline ~XMLSymbol();
     
    133133        XMLSymbol & operator=(const XMLSymbol & symbol)
    134134        {
     135        DEBUG_MESSAGE("XMLSymbol=(symbol)")
    135136                fRawLength = symbol.fRawLength;
    136137                fRawSymbol = symbol.fRawSymbol;
    137138                fAttr = symbol.fAttr;
    138                 fQName = symbol.fQName;
     139
    139140        fPrefixId = symbol.fPrefixId;
    140141        fLocalPartId = symbol.fLocalPartId;
    141142        fDefaultAttributeList = symbol.fDefaultAttributeList;
    142143        fElemDeclList = symbol.fElemDeclList;
     144
     145        fQName = symbol.fQName;
     146
    143147                return *this;
    144148        }
     
    151155
    152156        IDISA_ALWAYS_INLINE
    153     const XMLCh * const getName() const { return fQName->getRawName(); }
    154 
    155         IDISA_ALWAYS_INLINE
    156     const XMLSize_t getNameLength() const { return fQName->getRawLength(); }
    157 
    158         IDISA_ALWAYS_INLINE
    159     QName * getQName() { return fQName; }
    160 
    161         IDISA_ALWAYS_INLINE
    162     const QName * const getQName() const { return fQName; }
     157    const XMLCh * const getName() const { return fQName.getRawName(); }
     158
     159        IDISA_ALWAYS_INLINE
     160    const XMLSize_t getNameLength() const { return fQName.getRawLength(); }
     161
     162        IDISA_ALWAYS_INLINE
     163    QName * getQName() { return &fQName; }
     164
     165        IDISA_ALWAYS_INLINE
     166    const QName * const getQName() const { return &fQName; }
    163167
    164168        IDISA_ALWAYS_INLINE
     
    204208        }
    205209
    206     QName                               *       fQName;
    207210    unsigned int                                        fRawLength;
    208211    const XMLByte                               *       fRawSymbol;
    209212    gid_t                                   fPrefixId;
    210213    gid_t                                   fLocalPartId;   
     214    QName                                       fQName;
    211215    XMLAttr                             *       fAttr;
    212216    XMLElementDefaultAttribute          *   fDefaultAttributeList;
  • icXML/icXML-devel/src/icxmlc/XMLSymbolTable.cpp

    r3104 r3151  
    77/*
    88 * @author Nigel Medforth, nigelm -at- interational-characters.com
    9  * @version $Id: XMLSymbolTable.cpp 295 2013-04-24 02:36:00Z nigelm $
     9 * @version $Id: XMLSymbolTable.cpp 316 2013-05-13 20:03:50Z nigelm $
    1010 *
    1111 */
     
    3030)
    3131{
    32     XMLSymbol entry;
     32    XMLSymbol entry(fMemoryManager);
    3333    entry.fRawLength = length;
    3434    entry.fRawSymbol = fSymbolPool.insert(key, length);
     
    3636    XMLBuffer symbol(length, fMemoryManager);
    3737    fTranscoder->transcodeFrom(key, length, symbol);
    38     const XMLCh * symbolName = symbol.getRawBuffer();
    3938    const XMLSize_t symbolLength = symbol.getLen();
     39    Janitor<XMLCh> janSymbolName(XMLString::replicate(symbol.getRawBuffer(), symbolLength, fMemoryManager));
     40    XMLCh * symbolName = janSymbolName.get();
    4041
    4142    /// VERIFY THAT THE CHARACTERS IN THE SYMBOL ARE LEGAL ACCORDING TO XML SPECIFICATIONS
    42     if (unlikely(!XMLNameChar::test(symbolName, symbol.getLen())))
    43     {
     43    if (unlikely(!XMLNameChar::test(symbolName, symbolLength)))
     44    {       
    4445        DEBUG_SYMBOL_MESSAGE("ERROR! failed XMLNameChar validation! " << symbolName)
    4546        fScanner.emitError(XMLErrs::ExpectedElementName);
    46     }
    47 
    48     entry.fQName = new QName(symbolName, XMLNamespaceResolver::fEmptyUriId, fMemoryManager);
    49 
    50     doNamespaceResolution(symbolName,  symbolLength, entry);
     47        return -1;
     48    }
     49
     50    int colon = -1;
     51
     52    doNamespaceResolution(symbolName, symbolLength, colon, entry);
     53
     54    initQName(symbolName, length, colon, entry, entry.fQName);
    5155
    5256    checkForDefaultAttributes(entry);
    5357
    54     DEBUG_SYMBOL_MESSAGE(" -- add(" << &entry << ')');
     58    janSymbolName.release();
    5559
    5660    return fSymbolTable.add(entry);
     
    6670)
    6771{
    68     XMLSymbol entry;
     72    XMLSymbol entry(fMemoryManager);
    6973    entry.fRawLength = unencodedLength;
    7074    entry.fRawSymbol = fSymbolPool.insert(unencodedKey, unencodedLength);
    7175
    72     XMLCh * symbolName = XMLString::replicate(key, length, fMemoryManager);
     76    Janitor<XMLCh> janSymbolName(XMLString::replicate(key, length, fMemoryManager));
     77    XMLCh * symbolName = janSymbolName.get();
    7378    symbolName[length] = 0;
     79
    7480
    7581    /// VERIFY THAT THE CHARACTERS IN THE SYMBOL ARE LEGAL ACCORDING TO XML SPECIFICATIONS
     
    7884        DEBUG_SYMBOL_MESSAGE("ERROR! failed XMLNameChar validation! " << symbolName)
    7985        fScanner.emitError(XMLErrs::ExpectedElementName);
    80     }
    81 
    82     entry.fQName = new QName(symbolName, XMLNamespaceResolver::fEmptyUriId, fMemoryManager);
    83 
    84     doNamespaceResolution(symbolName, length, entry);
     86        return -1;
     87    }
     88
     89    int colon = -1;
     90
     91    doNamespaceResolution(symbolName, length, colon, entry);
     92
     93    initQName(symbolName, length, colon, entry, entry.fQName);
    8594
    8695    checkForDefaultAttributes(entry);
    8796
    88     XMLString::release(&symbolName, fMemoryManager);
     97    janSymbolName.release();
    8998
    9099    return fSymbolTable.add(entry);
    91100}
    92101
     102void XMLSymbolTable::initQName(XMLCh * name, const XMLSize_t length, const int colon, const XMLSymbol & entry, QName & qName) const
     103{
     104    qName.fURIId = XMLNamespaceResolver::fEmptyUriId;
     105    if (likely(colon != -1))
     106    {
     107        qName.fPrefix = const_cast<XMLCh*>(fNamespaceResolver.getPrefixForId(entry.fPrefixId));
     108        qName.fPrefixLen = colon - 1;
     109        qName.fLocalPart = const_cast<XMLCh*>(fNCNameTable[entry.fLocalPartId]);
     110        qName.fLocalPartLen = length - colon;
     111    }
     112    else
     113    {
     114        qName.fPrefix = const_cast<XMLCh*>(XMLUni::fgEmptyString);
     115        qName.fPrefixLen = 0;
     116        qName.fLocalPart = name;
     117        qName.fLocalPartLen = length;
     118    }
     119    qName.fRawName = name;
     120    qName.fNameBuf = name;
     121}
     122
     123
    93124#if __GNUC__
    94 #warning "What if the symbol is an element but contains default attributes with the same name? revise slightly if the DTD specification allows that"
     125#warning "What if the symbol is an element but contains default attributes with the same name? revise!"
    95126
    96127#warning "TODO: the error message for multiple colons in a QName cannot distinguish between element and attribute QNames. Either we need to supply it with the appropriate context information or modify the error code/description table."
     
    98129
    99130IDISA_ALWAYS_INLINE
    100 void XMLSymbolTable::doNamespaceResolution(const XMLCh * name, const XMLSize_t length, XMLSymbol & entry)
    101 {
    102     int colon = -1;
     131void XMLSymbolTable::doNamespaceResolution(const XMLCh * name, const XMLSize_t length, int & colon, XMLSymbol & entry)
     132{
    103133    if (fScanner.getDoNamespaces())
    104134    {       
     
    134164                    }
    135165                    return;
    136             }
     166            }           
    137167        }
    138168
  • icXML/icXML-devel/src/icxmlc/XMLSymbolTable.hpp

    r3103 r3151  
    77/*
    88 * @author Nigel Medforth, nigelm -at- interational-characters.com
    9  * @version $Id: XMLSymbolTable.hpp 295 2013-04-24 02:36:00Z nigelm $
     9 * @version $Id: XMLSymbolTable.hpp 312 2013-05-10 20:53:22Z nigelm $
    1010 *
    1111 */
     
    179179
    180180    IDISA_ALWAYS_INLINE
    181     void doNamespaceResolution(const XMLCh * name, const XMLSize_t length, XMLSymbol & entry);
     181    void doNamespaceResolution(const XMLCh * name, const XMLSize_t length, int & colon, XMLSymbol & entry);
    182182
    183183    IDISA_ALWAYS_INLINE
    184184    void checkForDefaultAttributes(XMLSymbol & entry);
     185
     186    IDISA_ALWAYS_INLINE
     187    void initQName(XMLCh * name, const XMLSize_t length, const int colon, const XMLSymbol & entry, QName & qName) const;
    185188
    186189private:
  • icXML/icXML-devel/src/icxmlc/XMLUTF16CharacterSetAdapter.cpp

    r3103 r3151  
    77/*
    88 * @author Nigel Medforth, nigelm -at- interational-characters.com
    9  * @version $Id: XMLUTF16CharacterSetAdapter.cpp 297 2013-04-26 02:30:33Z nigelm $
     9 * @version $Id: XMLUTF16CharacterSetAdapter.cpp 315 2013-05-12 22:34:37Z nigelm $
    1010 *
    1111 */
     
    110110    gid_t * generalEntityReferencePtr = referenceStream;
    111111    const XMLCh ** stringEndPtr = stringEndStream;
    112     const XMLCh * const contentStream0 = contentStream;
    113112
    114113    if (unlikely(!bitblock::all(*deletionMaskStream)))
    115114    {
    116         const BitBlock delmask = bitblock::load_aligned(deletionMaskStream);
    117         BitBlock shift1, shift2, shift4;
    118         del_info_8(delmask, shift1, shift2, shift4);
    119         ubitblock del;
    120         del._128 = del_count(delmask);
    121 
    122         const BitBlock zero = simd<1>::constant<0>();
    123 
    124         for (size_t i = 0; i < 8; i++)
    125         {
    126             bitblock::store_unaligned(zero, reinterpret_cast<BytePack *>(contentStream));
    127             contentStream += del._8[i << 1];
    128             bitblock::store_unaligned(zero, reinterpret_cast<BytePack *>(contentStream));
    129             contentStream += del._8[(i << 1) | 1];
    130         }
     115        // const size_t leadingNullChars = BLOCK_SIZE - bitblock::popcount(bitblock::load_aligned(deletionMaskStream));
     116        contentStream++;
    131117    }
    132118
     
    379365        PopCounter<3> markupCounter;
    380366
     367    /// ----------------------------------------------------------------------------------------------------
     368    /// INITIALIZE THE PARSING PARAMETERS
     369    /// ----------------------------------------------------------------------------------------------------
     370
    381371    if (unlikely(fScanner->getXMLVersion() == XMLReader::XMLV1_1))
    382372    {
     
    387377        parameters.XML_11 = simd<1>::constant<0>();
    388378    }
     379    memset(&errors, 0, sizeof(Errors));
    389380
    390381    /// ----------------------------------------------------------------------------------------------------
     
    431422  init_Parameters.do_block(parameters, callouts);
    432423  classify_bytes.do_block(parameters, u16hi, u16lo, lex, callouts, errors);
     424  normalize_LF.do_block(parameters, lex, u16hi, u16lo, callouts, marker);
    433425  parse_CtCDPI.do_block(lex, marker, callouts, errors);
    434   parse_tags.do_block(lex, marker, callouts, errors);
     426  parse_tags.do_block(lex, marker, u16hi, u16lo, callouts, errors);
    435427  parse_refs.do_block(lex, marker, callouts, errors);
    436   normalize_WS.do_block(parameters, lex, u16hi, u16lo, callouts, marker);
    437428  prepare_content_buffer.do_block(u16hi, u16lo, lex, callouts, marker);
    438429
     
    457448        markupCounter.tally(callouts.MarkupDelimiters);
    458449        // scan for errors
    459         checkErrors(errors, data, fInternalLineColTracker);
     450        checkErrors(data, errors);
    460451        // advance to the next block
    461452        fInternalLineColTracker.advance();
     
    509500  init_Parameters.do_final_block(parameters, callouts, EOF_mask);
    510501  classify_bytes.do_final_block(parameters, u16hi, u16lo, lex, callouts, errors, EOF_mask);
     502  normalize_LF.do_final_block(parameters, lex, u16hi, u16lo, callouts, marker, EOF_mask);
    511503  parse_CtCDPI.do_final_block(lex, marker, callouts, errors, EOF_mask);
    512   parse_tags.do_final_block(lex, marker, callouts, errors, EOF_mask);
     504  parse_tags.do_final_block(lex, marker, u16hi, u16lo, callouts, errors, EOF_mask);
    513505  parse_refs.do_final_block(lex, marker, callouts, errors, EOF_mask);
    514   normalize_WS.do_final_block(parameters, lex, u16hi, u16lo, callouts, marker, EOF_mask);
    515506  prepare_content_buffer.do_final_block(u16hi, u16lo, lex, callouts, marker, EOF_mask);
    516507
     
    534525        markupCounter.tally(callouts.MarkupDelimiters);
    535526        // scan for errors
    536         checkErrors(errors, data, fInternalLineColTracker);
     527        checkErrors(data, errors);
    537528        // advance to the next block
    538529        fInternalLineColTracker.advance();
     
    647638        PopCounter<3> markupCounter;
    648639
     640    /// ----------------------------------------------------------------------------------------------------
     641    /// INITIALIZE THE PARSING PARAMETERS
     642    /// ----------------------------------------------------------------------------------------------------
     643
    649644    if (unlikely(fScanner->getXMLVersion() == XMLReader::XMLV1_1))
    650645    {
     
    656651    }
    657652
     653    memset(&errors, 0, sizeof(Errors));
     654
    658655    /// ----------------------------------------------------------------------------------------------------
    659656    /// PARALLEL MARKUP PARSER
     
    666663    const XMLByte * data = reinterpret_cast<const XMLByte*>(&input[fInternalOffset]);
    667664
    668     #if defined(PRINT_DEBUG_MESSAGE) && !defined(PRINT_DEBUG_IGNORE_TRANSITION_STREAM_MESSAGES)
    669     XMLByte DATA_BLOCK[66] = {'|', 0};
    670     #endif
    671 
    672665    for (; count--; index++)
    673666    {
    674         #if defined(PRINT_DEBUG_MESSAGE) && !defined(PRINT_DEBUG_IGNORE_TRANSITION_STREAM_MESSAGES)
    675         for (unsigned int scanOffset = 0; scanOffset < (BLOCK_SIZE * 2); scanOffset += 64)
    676         {
    677             const size_t tempOffset = (index << LOG_2_U16_BLOCK_SIZE) | scanOffset;
    678             const XMLByte * byteStream = &data[scanOffset];
    679 
    680             for (unsigned int i = 0; i < 64; i++)
    681             {
    682                 XMLByte c = byteStream[i];
    683                 if (c < 32) c = ' ';
    684                 DATA_BLOCK[i + 1] = c;
    685             }
    686 
    687             DEBUG_SYMBOL_MESSAGE("----------------------------------------------------------------------");
    688             DEBUG_SYMBOL_MESSAGE(
    689                  XERCES_STD_QUALIFIER setw(5) <<
    690                  (tempOffset) <<
    691                  XERCES_STD_QUALIFIER setw(0) <<
    692                  DATA_BLOCK );
    693             DEBUG_SYMBOL_MESSAGE("----------------------------------------------------------------------");
    694         }
    695         #endif
    696 
    697667                // transpose the byte data
    698668        transpose(data, u16hi, u16lo);
     
    701671  init_Parameters.do_block(parameters, callouts);
    702672  classify_bytes.do_block(parameters, u16hi, u16lo, lex, callouts, errors);
     673  normalize_LF.do_block(parameters, lex, u16hi, u16lo, callouts, marker);
    703674  parse_CtCDPI.do_block(lex, marker, callouts, errors);
    704   parse_tags.do_block(lex, marker, callouts, errors);
     675  parse_tags.do_block(lex, marker, u16hi, u16lo, callouts, errors);
    705676  parse_refs.do_block(lex, marker, callouts, errors);
    706   normalize_WS.do_block(parameters, lex, u16hi, u16lo, callouts, marker);
    707677  prepare_content_buffer.do_block(u16hi, u16lo, lex, callouts, marker);
    708678
     
    725695        markupCounter.tally(callouts.MarkupDelimiters);
    726696        // scan for errors
    727         checkErrors(errors, data, fInternalLineColTracker);
     697        checkErrors(data, errors);
    728698        // advance to the next block
    729699        fInternalLineColTracker.advance();
     
    737707        const size_t advance = (avail - fInternalOffset) & (BLOCK_SIZE  - 1);
    738708
    739         #if defined(PRINT_DEBUG_MESSAGE) && !defined(PRINT_DEBUG_IGNORE_TRANSITION_STREAM_MESSAGES)
    740         size_t remaining = advance;
    741         for (unsigned int scanOffset = 0; scanOffset < (BLOCK_SIZE * 2); scanOffset += 64)
    742         {
    743             const size_t tempOffset = (index << LOG_2_U16_BLOCK_SIZE) | scanOffset;
    744             const XMLByte * byteStream = &data[scanOffset];
    745 
    746             for (unsigned int i = 0; i < 64; i++)
    747             {
    748                 XMLByte c = ' ';
    749                 if (remaining)
    750                 {
    751                     c = byteStream[i];
    752                     if (c < 32) c = ' ';
    753                     remaining--;
    754                 }
    755                 DATA_BLOCK[i + 1] = c;
    756             }
    757 
    758             DEBUG_SYMBOL_MESSAGE("----------------------------------------------------------------------");
    759             DEBUG_SYMBOL_MESSAGE(
    760                  XERCES_STD_QUALIFIER setw(5) <<
    761                  (tempOffset) <<
    762                  XERCES_STD_QUALIFIER setw(0) <<
    763                  DATA_BLOCK );
    764             DEBUG_SYMBOL_MESSAGE("----------------------------------------------------------------------");
    765         }
    766         #endif
    767 
    768709        // determine the eof mask
    769710        const BitBlock EOF_mask = maskre(advance);
     711
    770712        // transpose the byte data and mask off any characters that are beyond the EOF mask
    771713        transpose(data, u16hi, u16lo, EOF_mask);
     
    774716  init_Parameters.do_final_block(parameters, callouts, EOF_mask);
    775717  classify_bytes.do_final_block(parameters, u16hi, u16lo, lex, callouts, errors, EOF_mask);
     718  normalize_LF.do_final_block(parameters, lex, u16hi, u16lo, callouts, marker, EOF_mask);
    776719  parse_CtCDPI.do_final_block(lex, marker, callouts, errors, EOF_mask);
    777   parse_tags.do_final_block(lex, marker, callouts, errors, EOF_mask);
     720  parse_tags.do_final_block(lex, marker, u16hi, u16lo, callouts, errors, EOF_mask);
    778721  parse_refs.do_final_block(lex, marker, callouts, errors, EOF_mask);
    779   normalize_WS.do_final_block(parameters, lex, u16hi, u16lo, callouts, marker, EOF_mask);
    780722  prepare_content_buffer.do_final_block(u16hi, u16lo, lex, callouts, marker, EOF_mask);
    781723
     
    797739        markupCounter.tally(callouts.MarkupDelimiters);
    798740        // scan for errors
    799         checkErrors(errors, data, fInternalLineColTracker);
     741        checkErrors(data, errors);
    800742        // advance to the next block
    801743        fInternalLineColTracker.advance();
     
    870812
    871813IDISA_ALWAYS_INLINE
    872 void XMLUTF16CharacterSetAdapter::checkErrors(const XMLUTF16CharacterSetAdapter::Errors & errors, const XMLByte * source, XMLLineColTracker & lineCol)
     814void XMLUTF16CharacterSetAdapter::checkErrors(const XMLByte * source, XMLUTF16CharacterSetAdapter::Errors & errors)
    873815{
    874816    BitBlock temp0 = simd_or(errors.Unicode, errors.Lexical);
     
    891833
    892834    temp0 = simd_or(temp0, temp4);
     835    temp8 = simd_or(temp8, errors.BracketInAttrValue);
    893836
    894837    const BitBlock fatalErrors = simd_or(temp0, temp8);
     
    897840        if (unlikely(bitblock::any(fatalErrors)))
    898841        {
    899                 reportError(fatalErrors, errors, source, lineCol);
     842        reportError(fatalErrors, source, errors);
    900843        }
    901844}
    902845
    903 void XMLUTF16CharacterSetAdapter::reportError(const BitBlock fatalErrors, const XMLUTF16CharacterSetAdapter::Errors & errors, const XMLByte * source, XMLLineColTracker & lineCol)
     846void XMLUTF16CharacterSetAdapter::reportError(const BitBlock fatalErrors, const XMLByte * source, XMLUTF16CharacterSetAdapter::Errors & errors)
    904847{
    905848    XMLStreamIterator errorIterator(fatalErrors);
    906     size_t errorPosition;
    907849
    908850    // NOTE: when redoing this, I need to make sure I can get the file name for the error handling
     
    911853    while (errorIterator.next())
    912854    {
    913         errorPosition = errorIterator.pos();
    914 
    915         const BitBlock fatalErrorMask = mask_forward_zeroes(errorPosition);
    916 
    917         DEBUG_MESSAGE(" ==================== FATAL ERROR ========================")
     855        const BitBlock fatalErrorMask = maskri(errorIterator.pos()); // mask_reverse_zeroes(BLOCK_SIZE - errorIterator.pos() - 1);
     856
     857        DEBUG_MESSAGE(" ==================== FATAL ERROR @ " << errorIterator.pos() << " ========================")
    918858
    919859        DEBUG_MESSAGE("errors.Unicode=" << errors.Unicode)
     
    934874        DEBUG_MESSAGE("errors.UnterminatedEntityRef=" << errors.UnterminatedEntityRef)
    935875        DEBUG_MESSAGE("errors.ExpectedEntityRefName=" << errors.ExpectedEntityRefName)
     876        DEBUG_MESSAGE("errors.BracketInAttrValue=" << errors.BracketInAttrValue)
     877        DEBUG_MESSAGE(" **** fatalErrorMask=" << fatalErrorMask);
    936878
    937879        if (bitblock::any(simd_and(errors.Lexical, fatalErrorMask)))
     
    949891            // calculate the line/col of this error
    950892            XMLFileLoc line, col;
    951             lineCol.get(errorPosition, line, col);
     893            fInternalLineColTracker.get(errorIterator.pos(), line, col);
    952894
    953895            if (bitblock::any(simd_and(errors.Unicode, fatalErrorMask)))
     
    956898                // ILLEGAL UTF16 SURROGATE PAIR
    957899                // ------------------------------------------------------------
    958                 const XMLCh nextCh = reinterpret_cast<const XMLCh*>(source)[errorPosition];
     900                const XMLCh nextCh = reinterpret_cast<const XMLCh*>(source)[errorIterator.pos()];
    959901                if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
    960902                {
     
    1032974                errCode = XMLErrs::BadSequenceInCharData;
    1033975            }
    1034 
     976            else if (bitblock::any(simd_and(errors.BracketInAttrValue, fatalErrorMask)))
     977            {
     978                errCode = XMLErrs::BracketInAttrValue;
     979            }
    1035980            fScanner->emitError(errCode, line, col);
    1036981        }
    1037982    }
     983    memset(&errors, 0, sizeof(Errors));
    1038984}
    1039985
  • icXML/icXML-devel/src/icxmlc/XMLUTF16CharacterSetAdapter.hpp

    r3103 r3151  
    77/*
    88 * @author Nigel Medforth, nigelm -at- interational-characters.com
    9  * @version $Id: XMLUTF16CharacterSetAdapter.hpp 301 2013-04-30 21:31:06Z nigelm $
     9 * @version $Id: XMLUTF16CharacterSetAdapter.hpp 316 2013-05-13 20:03:50Z nigelm $
    1010 *
    1111 */
     
    7676                return 2;
    7777        }
     78
     79    IDISA_ALWAYS_INLINE
     80    MemoryManager * getMemoryManager()
     81    {
     82        return fTranscoder.getMemoryManager();
     83    }
    7884
    7985        virtual XMLCh * parse
     
    120126        XMLUTF16CharacterSetAdapter& operator=(const XMLUTF16CharacterSetAdapter&);
    121127
     128protected:
     129
    122130#define error_tracker_NoteError(x, y)
    123131#define NoteXercesXMLErr(x, y)
     132
    124133          struct Parameters {
    125134  BitBlock XML_11;
     
    233242  BitBlock ExpectedEntityRefName;
    234243  BitBlock UnterminatedEntityRef;
     244  BitBlock BracketInAttrValue;
    235245  BitBlock BadSequenceInCharData;
    236246};
     
    239249  Parse_tags() {
    240250 }
    241   IDISA_ALWAYS_INLINE void do_block(Lex & lex, Marker & marker, Callouts & callouts, Errors & errors) {
    242                 BitBlock DQuoteDelim, SQuoteDelim, AttListDelim, ElemName_starts;
    243                 BitBlock elem_name_follows, NoElemNameErr, AttListStart, AfterWS, AttListEnd;
    244                 BitBlock AttNameStart, AttOpenQuotes, AttCloseQuotes, NoAttNameError;
    245                 BitBlock AttNameFollow, EqExpected, EqError, AttValPos, DQuoteAttVal;
    246                 BitBlock SQuoteAttVal, AnyQuote, NoAttValErr, DQuoteAttEnd, SQuoteAttEnd;
    247                 BitBlock AttValEnd, AttValErr, AttValFollow, AttListEndErr, ParseError;
    248                 BitBlock EndTag_Name_starts, EndTagNameMissing, EndTag_Name_follows;
    249                 BitBlock EndTagCloseError;
    250 
    251 
    252 
    253 
    254         DQuoteDelim = simd_or(lex.DQuote, lex.LAngle);
    255         SQuoteDelim = simd_or(lex.SQuote, lex.LAngle);
    256         AttListDelim = simd_or(lex.Slash, lex.RAngle);
     251  IDISA_ALWAYS_INLINE void do_block(Lex & lex, Marker & marker, U16hi & u16hi, U16lo & u16lo, Callouts & callouts, Errors & errors) {
     252                BitBlock ElemName_starts, elem_name_follows, AttListStart, AttListDelim;
     253                BitBlock AfterWS, AttListEnd, AttNameStart, AttOpenQuotes, AttCloseQuotes;
     254                BitBlock NoAttNameError, AttNameFollow, EqExpected, EqError, AttValPos;
     255                BitBlock DQuoteAttVal, SQuoteAttVal, NoAttValErr, DQuoteAttEnd, SQuoteAttEnd;
     256                BitBlock AttValEnd, AttValErr, AttValFollow, AttListEndErr, WS_in_AttVal;
     257                BitBlock ParseError, EndTag_Name_starts, EndTagNameMissing;
     258                BitBlock EndTag_Name_follows;
     259
     260
     261
     262
    257263        ElemName_starts = simd_andc(marker.Tag_opener, lex.Slash);
    258264        callouts.Symbol_starts = simd_or(callouts.Symbol_starts, ElemName_starts);
     
    260266        carryQ.cq[0] = bitblock::srli<127>(pablo_blk_ScanThru(ElemName_starts, lex.NameScan, carryQ.get_carry_in(0), elem_name_follows));
    261267        callouts.Symbol_ends = simd_or(callouts.Symbol_ends, elem_name_follows);
    262         NoElemNameErr = simd_and(ElemName_starts, elem_name_follows);
    263         errors.ExpectedElementName = NoElemNameErr;
     268        errors.ExpectedElementName = simd_and(ElemName_starts, elem_name_follows);
    264269        marker.AttEq_marks = simd<1>::constant<0>();
    265         errors.ExpectedAttrName = simd<1>::constant<0>();
    266         errors.ExpectedEqSign = simd<1>::constant<0>();
    267         errors.ExpectedAttrValue = simd<1>::constant<0>();
    268         errors.UnterminatedStartTag = simd<1>::constant<0>();
    269         errors.ExpectedWhitespace = simd<1>::constant<0>();
    270270        AttListStart = simd_and(elem_name_follows, lex.WS);
     271        AttListDelim = simd_or(lex.Slash, lex.RAngle);
    271272        if ((bitblock::any(AttListStart) || carryQ.CarryTest(1, 9))) {
    272273          carryQ.cq[1] = bitblock::srli<127>(pablo_blk_ScanThru(elem_name_follows, lex.WS, carryQ.get_carry_in(1), AfterWS));
     
    294295            DQuoteAttVal = simd_and(AttValPos, lex.DQuote);
    295296            SQuoteAttVal = simd_and(AttValPos, lex.SQuote);
    296             AnyQuote = simd_or(lex.DQuote, lex.SQuote);
    297             NoAttValErr = simd_andc(AttValPos, AnyQuote);
     297            NoAttValErr = simd_andc(AttValPos, simd_or(lex.DQuote, lex.SQuote));
    298298            errors.ExpectedAttrValue = simd_or(errors.ExpectedAttrValue, NoAttValErr);
    299299            AttOpenQuotes = simd_or(AttOpenQuotes, AttValPos);
    300             carryQ.cq[5] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_not(DQuoteDelim), carryQ.get_carry_in(5), DQuoteAttEnd));
    301             carryQ.cq[6] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_not(SQuoteDelim), carryQ.get_carry_in(6), SQuoteAttEnd));
     300            carryQ.cq[5] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_not(lex.DQuote), carryQ.get_carry_in(5), DQuoteAttEnd));
     301            carryQ.cq[6] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_not(lex.SQuote), carryQ.get_carry_in(6), SQuoteAttEnd));
    302302            AttValEnd = simd_or(DQuoteAttEnd, SQuoteAttEnd);
    303             AttValErr = simd_andc(AttValEnd, simd_or(lex.DQuote, lex.SQuote));
     303            AttValErr = simd_or(simd_andc(DQuoteAttEnd, lex.DQuote), simd_andc(SQuoteAttEnd, lex.SQuote));
     304            AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    304305            errors.UnterminatedStartTag = simd_or(errors.UnterminatedStartTag, AttValErr);
    305             AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    306306            carryQ.cq[7] = bitblock::srli<127>(pablo_blk_Advance(AttValEnd, carryQ.get_carry_in(7), AttValFollow));
    307307            AttListEndErr = AttValFollow;
     
    336336              DQuoteAttVal = simd_and(AttValPos, lex.DQuote);
    337337              SQuoteAttVal = simd_and(AttValPos, lex.SQuote);
    338               AnyQuote = simd_or(lex.DQuote, lex.SQuote);
    339               NoAttValErr = simd_andc(AttValPos, AnyQuote);
     338              NoAttValErr = simd_andc(AttValPos, simd_or(lex.DQuote, lex.SQuote));
    340339              errors.ExpectedAttrValue = simd_or(errors.ExpectedAttrValue, NoAttValErr);
    341340              AttOpenQuotes = simd_or(AttOpenQuotes, AttValPos);
    342               subcarryQ.cq[3] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_not(DQuoteDelim), simd<1>::constant<0>(), DQuoteAttEnd));
    343               subcarryQ.cq[4] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_not(SQuoteDelim), simd<1>::constant<0>(), SQuoteAttEnd));
     341              subcarryQ.cq[3] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_not(lex.DQuote), simd<1>::constant<0>(), DQuoteAttEnd));
     342              subcarryQ.cq[4] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_not(lex.SQuote), simd<1>::constant<0>(), SQuoteAttEnd));
    344343              AttValEnd = simd_or(DQuoteAttEnd, SQuoteAttEnd);
    345               AttValErr = simd_andc(AttValEnd, simd_or(lex.DQuote, lex.SQuote));
     344              AttValErr = simd_or(simd_andc(DQuoteAttEnd, lex.DQuote), simd_andc(SQuoteAttEnd, lex.SQuote));
     345              AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    346346              errors.UnterminatedStartTag = simd_or(errors.UnterminatedStartTag, AttValErr);
    347               AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    348347              subcarryQ.cq[5] = bitblock::srli<127>(pablo_blk_Advance(AttValEnd, simd<1>::constant<0>(), AttValFollow));
    349348              AttListEndErr = AttValFollow;
     
    365364          }
    366365          carryQ.cq[9] = bitblock::srli<127>(pablo_blk_ExclusiveSpan(AttOpenQuotes, AttCloseQuotes, carryQ.get_carry_in(9), callouts.AttValSpan));
     366          lex.LAngle = lex.LAngle;
     367          errors.BracketInAttrValue = simd_and(lex.LAngle, callouts.AttValSpan);
    367368          callouts.StringEnds = simd_or(callouts.StringEnds, AttCloseQuotes);
     369          WS_in_AttVal = simd_and(lex.WS, callouts.AttValSpan);
     370          if (bitblock::any(WS_in_AttVal)) {
     371            u16lo.bit_2 = simd_or(u16lo.bit_2, WS_in_AttVal);
     372            u16lo.bit_3 = simd_andc(u16lo.bit_3, WS_in_AttVal);
     373            u16lo.bit_4 = simd_andc(u16lo.bit_4, WS_in_AttVal);
     374            u16lo.bit_5 = simd_andc(u16lo.bit_5, WS_in_AttVal);
     375            u16lo.bit_6 = simd_andc(u16lo.bit_6, WS_in_AttVal);
     376            u16lo.bit_7 = simd_andc(u16lo.bit_7, WS_in_AttVal);
     377          }
    368378        }
    369379        else {
     
    371381          AttListEndErr = simd_andc(elem_name_follows, AttListDelim);
    372382          callouts.AttValSpan = simd<1>::constant<0>();
     383          errors.BracketInAttrValue = simd<1>::constant<0>();
    373384          errors.ExpectedWhitespace = simd_or(errors.ExpectedWhitespace, AttListEndErr);
    374385          carryQ.CarryDequeueEnqueue(1, 9);
     
    392403          carryQ.CarryDequeueEnqueue(13, 1);
    393404        }
    394         EndTagCloseError = simd_andc(marker.EndTag_closers, lex.RAngle);
    395         errors.UnterminatedEndTag = EndTagCloseError;
     405        errors.UnterminatedEndTag = simd_andc(marker.EndTag_closers, lex.RAngle);
    396406        marker.Tag_closers = simd_or(simd_or(marker.StartTag_closers, marker.EmptyTag_closers), marker.EndTag_closers);
    397407        carryQ.cq[14] = bitblock::srli<127>(pablo_blk_InclusiveSpan(marker.Tag_opener, marker.Tag_closers, carryQ.get_carry_in(14), marker.TagSpan));
     
    399409        carryQ.CarryQ_Adjust(15);
    400410  }
    401   IDISA_ALWAYS_INLINE void do_final_block(Lex & lex, Marker & marker, Callouts & callouts, Errors & errors, BitBlock EOF_mask) {
    402                 BitBlock DQuoteDelim, SQuoteDelim, AttListDelim, ElemName_starts;
    403                 BitBlock elem_name_follows, NoElemNameErr, AttListStart, AfterWS, AttListEnd;
    404                 BitBlock AttNameStart, AttOpenQuotes, AttCloseQuotes, NoAttNameError;
    405                 BitBlock AttNameFollow, EqExpected, EqError, AttValPos, DQuoteAttVal;
    406                 BitBlock SQuoteAttVal, AnyQuote, NoAttValErr, DQuoteAttEnd, SQuoteAttEnd;
    407                 BitBlock AttValEnd, AttValErr, AttValFollow, AttListEndErr, ParseError;
    408                 BitBlock EndTag_Name_starts, EndTagNameMissing, EndTag_Name_follows;
    409                 BitBlock EndTagCloseError;
    410 
    411 
    412 
    413 
    414         DQuoteDelim = simd_or(lex.DQuote, lex.LAngle);
    415         SQuoteDelim = simd_or(lex.SQuote, lex.LAngle);
    416         AttListDelim = simd_or(lex.Slash, lex.RAngle);
     411  IDISA_ALWAYS_INLINE void do_final_block(Lex & lex, Marker & marker, U16hi & u16hi, U16lo & u16lo, Callouts & callouts, Errors & errors, BitBlock EOF_mask) {
     412                BitBlock ElemName_starts, elem_name_follows, AttListStart, AttListDelim;
     413                BitBlock AfterWS, AttListEnd, AttNameStart, AttOpenQuotes, AttCloseQuotes;
     414                BitBlock NoAttNameError, AttNameFollow, EqExpected, EqError, AttValPos;
     415                BitBlock DQuoteAttVal, SQuoteAttVal, NoAttValErr, DQuoteAttEnd, SQuoteAttEnd;
     416                BitBlock AttValEnd, AttValErr, AttValFollow, AttListEndErr, WS_in_AttVal;
     417                BitBlock ParseError, EndTag_Name_starts, EndTagNameMissing;
     418                BitBlock EndTag_Name_follows;
     419
     420
     421
     422
    417423        ElemName_starts = simd_andc(marker.Tag_opener, lex.Slash);
    418424        callouts.Symbol_starts = simd_or(callouts.Symbol_starts, ElemName_starts);
     
    420426        carryQ.cq[0] = bitblock::srli<127>(pablo_blk_ScanThru(ElemName_starts, lex.NameScan, carryQ.get_carry_in(0), elem_name_follows));
    421427        callouts.Symbol_ends = simd_or(callouts.Symbol_ends, elem_name_follows);
    422         NoElemNameErr = simd_and(ElemName_starts, elem_name_follows);
    423         errors.ExpectedElementName = NoElemNameErr;
     428        errors.ExpectedElementName = simd_and(ElemName_starts, elem_name_follows);
    424429        marker.AttEq_marks = simd<1>::constant<0>();
    425         errors.ExpectedAttrName = simd<1>::constant<0>();
    426         errors.ExpectedEqSign = simd<1>::constant<0>();
    427         errors.ExpectedAttrValue = simd<1>::constant<0>();
    428         errors.UnterminatedStartTag = simd<1>::constant<0>();
    429         errors.ExpectedWhitespace = simd<1>::constant<0>();
    430430        AttListStart = simd_and(elem_name_follows, lex.WS);
     431        AttListDelim = simd_or(lex.Slash, lex.RAngle);
    431432        if ((bitblock::any(AttListStart) || carryQ.CarryTest(1, 9))) {
    432433          carryQ.cq[1] = bitblock::srli<127>(pablo_blk_ScanThru(elem_name_follows, lex.WS, carryQ.get_carry_in(1), AfterWS));
     
    454455            DQuoteAttVal = simd_and(AttValPos, lex.DQuote);
    455456            SQuoteAttVal = simd_and(AttValPos, lex.SQuote);
    456             AnyQuote = simd_or(lex.DQuote, lex.SQuote);
    457             NoAttValErr = simd_andc(AttValPos, AnyQuote);
     457            NoAttValErr = simd_andc(AttValPos, simd_or(lex.DQuote, lex.SQuote));
    458458            errors.ExpectedAttrValue = simd_or(errors.ExpectedAttrValue, NoAttValErr);
    459459            AttOpenQuotes = simd_or(AttOpenQuotes, AttValPos);
    460             carryQ.cq[5] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_andc(EOF_mask, DQuoteDelim), carryQ.get_carry_in(5), DQuoteAttEnd));
    461             carryQ.cq[6] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_andc(EOF_mask, SQuoteDelim), carryQ.get_carry_in(6), SQuoteAttEnd));
     460            carryQ.cq[5] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_andc(EOF_mask, lex.DQuote), carryQ.get_carry_in(5), DQuoteAttEnd));
     461            carryQ.cq[6] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_andc(EOF_mask, lex.SQuote), carryQ.get_carry_in(6), SQuoteAttEnd));
    462462            AttValEnd = simd_or(DQuoteAttEnd, SQuoteAttEnd);
    463             AttValErr = simd_andc(AttValEnd, simd_or(lex.DQuote, lex.SQuote));
     463            AttValErr = simd_or(simd_andc(DQuoteAttEnd, lex.DQuote), simd_andc(SQuoteAttEnd, lex.SQuote));
     464            AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    464465            errors.UnterminatedStartTag = simd_or(errors.UnterminatedStartTag, AttValErr);
    465             AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    466466            carryQ.cq[7] = bitblock::srli<127>(pablo_blk_Advance(AttValEnd, carryQ.get_carry_in(7), AttValFollow));
    467467            AttListEndErr = AttValFollow;
     
    496496              DQuoteAttVal = simd_and(AttValPos, lex.DQuote);
    497497              SQuoteAttVal = simd_and(AttValPos, lex.SQuote);
    498               AnyQuote = simd_or(lex.DQuote, lex.SQuote);
    499               NoAttValErr = simd_andc(AttValPos, AnyQuote);
     498              NoAttValErr = simd_andc(AttValPos, simd_or(lex.DQuote, lex.SQuote));
    500499              errors.ExpectedAttrValue = simd_or(errors.ExpectedAttrValue, NoAttValErr);
    501500              AttOpenQuotes = simd_or(AttOpenQuotes, AttValPos);
    502               subcarryQ.cq[3] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_andc(EOF_mask, DQuoteDelim), simd<1>::constant<0>(), DQuoteAttEnd));
    503               subcarryQ.cq[4] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_andc(EOF_mask, SQuoteDelim), simd<1>::constant<0>(), SQuoteAttEnd));
     501              subcarryQ.cq[3] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_andc(EOF_mask, lex.DQuote), simd<1>::constant<0>(), DQuoteAttEnd));
     502              subcarryQ.cq[4] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_andc(EOF_mask, lex.SQuote), simd<1>::constant<0>(), SQuoteAttEnd));
    504503              AttValEnd = simd_or(DQuoteAttEnd, SQuoteAttEnd);
    505               AttValErr = simd_andc(AttValEnd, simd_or(lex.DQuote, lex.SQuote));
     504              AttValErr = simd_or(simd_andc(DQuoteAttEnd, lex.DQuote), simd_andc(SQuoteAttEnd, lex.SQuote));
     505              AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    506506              errors.UnterminatedStartTag = simd_or(errors.UnterminatedStartTag, AttValErr);
    507               AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    508507              subcarryQ.cq[5] = bitblock::srli<127>(pablo_blk_Advance(AttValEnd, simd<1>::constant<0>(), AttValFollow));
    509508              AttListEndErr = AttValFollow;
     
    525524          }
    526525          carryQ.cq[9] = bitblock::srli<127>(pablo_blk_ExclusiveSpan(AttOpenQuotes, AttCloseQuotes, carryQ.get_carry_in(9), callouts.AttValSpan));
     526          lex.LAngle = lex.LAngle;
     527          errors.BracketInAttrValue = simd_and(lex.LAngle, callouts.AttValSpan);
    527528          callouts.StringEnds = simd_or(callouts.StringEnds, AttCloseQuotes);
     529          WS_in_AttVal = simd_and(lex.WS, callouts.AttValSpan);
     530          if (bitblock::any(WS_in_AttVal)) {
     531            u16lo.bit_2 = simd_or(u16lo.bit_2, WS_in_AttVal);
     532            u16lo.bit_3 = simd_andc(u16lo.bit_3, WS_in_AttVal);
     533            u16lo.bit_4 = simd_andc(u16lo.bit_4, WS_in_AttVal);
     534            u16lo.bit_5 = simd_andc(u16lo.bit_5, WS_in_AttVal);
     535            u16lo.bit_6 = simd_andc(u16lo.bit_6, WS_in_AttVal);
     536            u16lo.bit_7 = simd_andc(u16lo.bit_7, WS_in_AttVal);
     537          }
    528538        }
    529539        else {
     
    531541          AttListEndErr = simd_andc(elem_name_follows, AttListDelim);
    532542          callouts.AttValSpan = simd<1>::constant<0>();
     543          errors.BracketInAttrValue = simd<1>::constant<0>();
    533544          errors.ExpectedWhitespace = simd_or(errors.ExpectedWhitespace, AttListEndErr);
    534545          carryQ.CarryDequeueEnqueue(1, 9);
     
    552563          carryQ.CarryDequeueEnqueue(13, 1);
    553564        }
    554         EndTagCloseError = simd_andc(marker.EndTag_closers, lex.RAngle);
    555         errors.UnterminatedEndTag = EndTagCloseError;
     565        errors.UnterminatedEndTag = simd_andc(marker.EndTag_closers, lex.RAngle);
    556566        marker.Tag_closers = simd_or(simd_or(marker.StartTag_closers, marker.EmptyTag_closers), marker.EndTag_closers);
    557567        carryQ.cq[14] = bitblock::srli<127>(pablo_blk_InclusiveSpan(marker.Tag_opener, marker.Tag_closers, carryQ.get_carry_in(14), marker.TagSpan));
     
    602612 }
    603613  IDISA_ALWAYS_INLINE void do_block(Lex & lex, Marker & marker, Callouts & callouts, Errors & errors) {
    604                 BitBlock ref_error;
    605 
    606                 BitBlock tempvar0, tempvar1;
    607 
    608 
    609         errors.UnterminatedEntityRef = simd<1>::constant<0>();
    610         errors.ExpectedEntityRefName = simd<1>::constant<0>();
     614                BitBlock EntityRefName;
     615
     616                BitBlock tempvar0;
     617
     618
    611619        if ((bitblock::any(callouts.Ref_opener) || carryQ.CarryTest(0, 3))) {
    612           carryQ.cq[0] = bitblock::srli<127>(pablo_blk_Advance(callouts.Ref_opener, carryQ.get_carry_in(0), tempvar0));
    613           errors.ExpectedEntityRefName = simd_and(tempvar0, lex.Semicolon);
    614           carryQ.cq[1] = bitblock::srli<127>(pablo_blk_ScanThru(callouts.Ref_opener, simd_or(simd_or(lex.NameScan, callouts.Ref_opener), lex.Hash), carryQ.get_carry_in(1), callouts.Ref_closer));
    615           ref_error = simd_andc(callouts.Ref_closer, lex.Semicolon);
    616           errors.UnterminatedEntityRef = ref_error;
    617           carryQ.cq[2] = bitblock::srli<127>(pablo_blk_SpanUpTo(callouts.Ref_opener, callouts.Ref_closer, carryQ.get_carry_in(2), tempvar1));
    618           callouts.delmask = simd_or(callouts.delmask, tempvar1);
     620          carryQ.cq[0] = bitblock::srli<127>(pablo_blk_Advance(callouts.Ref_opener, carryQ.get_carry_in(0), EntityRefName));
     621          carryQ.cq[1] = bitblock::srli<127>(pablo_blk_ScanThru(EntityRefName, simd_or(lex.NameScan, lex.Hash), carryQ.get_carry_in(1), callouts.Ref_closer));
     622          errors.ExpectedEntityRefName = simd_and(EntityRefName, callouts.Ref_closer);
     623          errors.UnterminatedEntityRef = simd_andc(simd_andc(callouts.Ref_closer, lex.Semicolon), errors.ExpectedEntityRefName);
     624          carryQ.cq[2] = bitblock::srli<127>(pablo_blk_SpanUpTo(callouts.Ref_opener, callouts.Ref_closer, carryQ.get_carry_in(2), tempvar0));
     625          callouts.delmask = simd_or(callouts.delmask, tempvar0);
    619626        }
    620627        else {
     
    624631  }
    625632  IDISA_ALWAYS_INLINE void do_final_block(Lex & lex, Marker & marker, Callouts & callouts, Errors & errors, BitBlock EOF_mask) {
    626                 BitBlock ref_error;
    627 
    628                 BitBlock tempvar0, tempvar1;
    629 
    630 
    631         errors.UnterminatedEntityRef = simd<1>::constant<0>();
    632         errors.ExpectedEntityRefName = simd<1>::constant<0>();
     633                BitBlock EntityRefName;
     634
     635                BitBlock tempvar0;
     636
     637
    633638        if ((bitblock::any(callouts.Ref_opener) || carryQ.CarryTest(0, 3))) {
    634           carryQ.cq[0] = bitblock::srli<127>(pablo_blk_Advance(callouts.Ref_opener, carryQ.get_carry_in(0), tempvar0));
    635           errors.ExpectedEntityRefName = simd_and(tempvar0, lex.Semicolon);
    636           carryQ.cq[1] = bitblock::srli<127>(pablo_blk_ScanThru(callouts.Ref_opener, simd_or(simd_or(lex.NameScan, callouts.Ref_opener), lex.Hash), carryQ.get_carry_in(1), callouts.Ref_closer));
    637           ref_error = simd_andc(callouts.Ref_closer, lex.Semicolon);
    638           errors.UnterminatedEntityRef = ref_error;
    639           carryQ.cq[2] = bitblock::srli<127>(pablo_blk_SpanUpTo(callouts.Ref_opener, callouts.Ref_closer, carryQ.get_carry_in(2), tempvar1));
    640           callouts.delmask = simd_or(callouts.delmask, tempvar1);
     639          carryQ.cq[0] = bitblock::srli<127>(pablo_blk_Advance(callouts.Ref_opener, carryQ.get_carry_in(0), EntityRefName));
     640          carryQ.cq[1] = bitblock::srli<127>(pablo_blk_ScanThru(EntityRefName, simd_or(lex.NameScan, lex.Hash), carryQ.get_carry_in(1), callouts.Ref_closer));
     641          errors.ExpectedEntityRefName = simd_and(EntityRefName, callouts.Ref_closer);
     642          errors.UnterminatedEntityRef = simd_andc(simd_andc(callouts.Ref_closer, lex.Semicolon), errors.ExpectedEntityRefName);
     643          carryQ.cq[2] = bitblock::srli<127>(pablo_blk_SpanUpTo(callouts.Ref_opener, callouts.Ref_closer, carryQ.get_carry_in(2), tempvar0));
     644          callouts.delmask = simd_or(callouts.delmask, tempvar0);
    641645        }
    642646        else {
     
    658662                BitBlock CtCDPI_Closer;
    659663
    660                 BitBlock tempvar0, tempvar1, tempvar2, tempvar3, tempvar4, tempvar5;
    661 
    662 
    663         errors.ExpectedCommentOrCDATA = simd<1>::constant<0>();
    664         errors.PINameExpected = simd<1>::constant<0>();
    665         errors.IllegalSequenceInComment = simd<1>::constant<0>();
    666         errors.UnterminatedPI = simd<1>::constant<0>();
    667         errors.UnterminatedCDATASection = simd<1>::constant<0>();
    668         errors.UnterminatedComment = simd<1>::constant<0>();
     664                BitBlock tempvar0, tempvar1, tempvar2, tempvar3, tempvar4, tempvar5, tempvar6;
     665
     666
    669667        marker.PI_openers = simd<1>::constant<0>();
    670668        marker.CtCD_openers = simd<1>::constant<0>();
     
    732730            DoubleHyphen = simd_and(simd_and(v1, w1), lex.Hyphen);
    733731            carryQ.cq[14] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, carryQ.get_carry_in(14), tempvar4));
    734             carryQ.cq[15] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_not(DoubleHyphen), carryQ.get_carry_in(15), Ct_Cursor));
    735             carryQ.cq[16] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, carryQ.get_carry_in(16), Ct_Cursor));
     732            carryQ.cq[15] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_not(DoubleHyphen), carryQ.get_carry_in(15), tempvar5));
     733            carryQ.cq[16] = bitblock::srli<127>(pablo_blk_Advance(tempvar5, carryQ.get_carry_in(16), Ct_Cursor));
    736734            Ct_error = simd_andc(Ct_Cursor, lex.RAngle);
    737735            errors.IllegalSequenceInComment = simd_or(errors.IllegalSequenceInComment, Ct_error);
     
    742740          CtCDPI_Closer = simd_or(simd_or(PI_Cursor, CD_Cursor), Ct_Cursor);
    743741          marker.CtCDPI_closers = simd_or(marker.CtCDPI_closers, CtCDPI_Closer);
    744           carryQ.cq[17] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, carryQ.get_carry_in(17), tempvar5));
    745           ctCDPI_mask = simd_or(ctCDPI_mask, tempvar5);
     742          carryQ.cq[17] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, carryQ.get_carry_in(17), tempvar6));
     743          ctCDPI_mask = simd_or(ctCDPI_mask, tempvar6);
    746744          if (bitblock::any(simd<1>::constant<0>())) {
    747             if (bitblock::any(simd<1>::constant<0>())) {
    748               errors.UnterminatedPI = simd_or(errors.UnterminatedPI, PI_Cursor);
    749             }
    750             if (bitblock::any(simd<1>::constant<0>())) {
    751               errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, CD_Cursor);
    752             }
    753             if (bitblock::any(simd<1>::constant<0>())) {
    754               errors.UnterminatedComment = simd_or(errors.UnterminatedComment, Ct_Cursor);
    755             }
     745            errors.UnterminatedPI = simd_or(errors.UnterminatedPI, simd<1>::constant<0>());
     746            errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, simd<1>::constant<0>());
     747            Ct_Cursor = simd_or(Ct_Cursor, simd_andc(ctCDPI_mask, simd_or(PI_Cursor, CD_Cursor)));
     748            errors.UnterminatedComment = simd_or(errors.UnterminatedComment, simd<1>::constant<0>());
    756749          }
    757750          callouts.StringEnds = simd_or(callouts.StringEnds, CtCDPI_Closer);
     
    800793              DoubleHyphen = simd_and(simd_and(v1, w1), lex.Hyphen);
    801794              subcarryQ.cq[9] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, simd<1>::constant<0>(), tempvar4));
    802               subcarryQ.cq[10] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_not(DoubleHyphen), simd<1>::constant<0>(), Ct_Cursor));
    803               subcarryQ.cq[11] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, simd<1>::constant<0>(), Ct_Cursor));
     795              subcarryQ.cq[10] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_not(DoubleHyphen), simd<1>::constant<0>(), tempvar5));
     796              subcarryQ.cq[11] = bitblock::srli<127>(pablo_blk_Advance(tempvar5, simd<1>::constant<0>(), Ct_Cursor));
    804797              Ct_error = simd_andc(Ct_Cursor, lex.RAngle);
    805798              errors.IllegalSequenceInComment = simd_or(errors.IllegalSequenceInComment, Ct_error);
     
    810803            CtCDPI_Closer = simd_or(simd_or(PI_Cursor, CD_Cursor), Ct_Cursor);
    811804            marker.CtCDPI_closers = simd_or(marker.CtCDPI_closers, CtCDPI_Closer);
    812             subcarryQ.cq[12] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, simd<1>::constant<0>(), tempvar5));
    813             ctCDPI_mask = simd_or(ctCDPI_mask, tempvar5);
     805            subcarryQ.cq[12] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, simd<1>::constant<0>(), tempvar6));
     806            ctCDPI_mask = simd_or(ctCDPI_mask, tempvar6);
    814807            if (bitblock::any(simd<1>::constant<0>())) {
    815               if (bitblock::any(simd<1>::constant<0>())) {
    816                 errors.UnterminatedPI = simd_or(errors.UnterminatedPI, PI_Cursor);
    817               }
    818               if (bitblock::any(simd<1>::constant<0>())) {
    819                 errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, CD_Cursor);
    820               }
    821               if (bitblock::any(simd<1>::constant<0>())) {
    822                 errors.UnterminatedComment = simd_or(errors.UnterminatedComment, Ct_Cursor);
    823               }
     808              errors.UnterminatedPI = simd_or(errors.UnterminatedPI, simd<1>::constant<0>());
     809              errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, simd<1>::constant<0>());
     810              Ct_Cursor = simd_or(Ct_Cursor, simd_andc(ctCDPI_mask, simd_or(PI_Cursor, CD_Cursor)));
     811              errors.UnterminatedComment = simd_or(errors.UnterminatedComment, simd<1>::constant<0>());
    824812            }
    825813            callouts.StringEnds = simd_or(callouts.StringEnds, CtCDPI_Closer);
     
    845833                BitBlock CtCDPI_Closer;
    846834
    847                 BitBlock tempvar0, tempvar1, tempvar2, tempvar3, tempvar4, tempvar5;
    848 
    849 
    850         errors.ExpectedCommentOrCDATA = simd<1>::constant<0>();
    851         errors.PINameExpected = simd<1>::constant<0>();
    852         errors.IllegalSequenceInComment = simd<1>::constant<0>();
    853         errors.UnterminatedPI = simd<1>::constant<0>();
    854         errors.UnterminatedCDATASection = simd<1>::constant<0>();
    855         errors.UnterminatedComment = simd<1>::constant<0>();
     835                BitBlock tempvar0, tempvar1, tempvar2, tempvar3, tempvar4, tempvar5, tempvar6;
     836
     837
    856838        marker.PI_openers = simd<1>::constant<0>();
    857839        marker.CtCD_openers = simd<1>::constant<0>();
     
    919901            DoubleHyphen = simd_and(simd_and(v1, w1), lex.Hyphen);
    920902            carryQ.cq[14] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, carryQ.get_carry_in(14), tempvar4));
    921             carryQ.cq[15] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_andc(EOF_mask, DoubleHyphen), carryQ.get_carry_in(15), Ct_Cursor));
    922             carryQ.cq[16] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, carryQ.get_carry_in(16), Ct_Cursor));
     903            carryQ.cq[15] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_andc(EOF_mask, DoubleHyphen), carryQ.get_carry_in(15), tempvar5));
     904            carryQ.cq[16] = bitblock::srli<127>(pablo_blk_Advance(tempvar5, carryQ.get_carry_in(16), Ct_Cursor));
    923905            Ct_error = simd_andc(Ct_Cursor, lex.RAngle);
    924             errors.IllegalSequenceInComment = simd_or(errors.IllegalSequenceInComment, Ct_error);
     906            errors.IllegalSequenceInComment = simd_or(errors.IllegalSequenceInComment, simd_and(Ct_error, EOF_mask));
    925907          }
    926908          else {
     
    929911          CtCDPI_Closer = simd_or(simd_or(PI_Cursor, CD_Cursor), Ct_Cursor);
    930912          marker.CtCDPI_closers = simd_or(marker.CtCDPI_closers, CtCDPI_Closer);
    931           carryQ.cq[17] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, carryQ.get_carry_in(17), tempvar5));
    932           ctCDPI_mask = simd_or(ctCDPI_mask, tempvar5);
    933           if (bitblock::any(simd_and(simd_andc(ctCDPI_mask, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    934             if (bitblock::any(simd_and(simd_andc(PI_Cursor, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    935               errors.UnterminatedPI = simd_or(errors.UnterminatedPI, PI_Cursor);
    936             }
    937             if (bitblock::any(simd_and(simd_andc(CD_Cursor, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    938               errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, CD_Cursor);
    939             }
    940             if (bitblock::any(simd_and(simd_andc(Ct_Cursor, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    941               errors.UnterminatedComment = simd_or(errors.UnterminatedComment, Ct_Cursor);
    942             }
     913          carryQ.cq[17] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, carryQ.get_carry_in(17), tempvar6));
     914          ctCDPI_mask = simd_or(ctCDPI_mask, tempvar6);
     915          if (bitblock::any(simd_andc(simd_andc(ctCDPI_mask, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))))) {
     916            errors.UnterminatedPI = simd_or(errors.UnterminatedPI, simd_andc(simd_andc(PI_Cursor, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))));
     917            errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, simd_andc(simd_andc(CD_Cursor, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))));
     918            Ct_Cursor = simd_or(Ct_Cursor, simd_andc(ctCDPI_mask, simd_or(PI_Cursor, CD_Cursor)));
     919            errors.UnterminatedComment = simd_or(errors.UnterminatedComment, simd_andc(simd_andc(Ct_Cursor, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))));
    943920          }
    944921          callouts.StringEnds = simd_or(callouts.StringEnds, CtCDPI_Closer);
     
    987964              DoubleHyphen = simd_and(simd_and(v1, w1), lex.Hyphen);
    988965              subcarryQ.cq[9] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, simd<1>::constant<0>(), tempvar4));
    989               subcarryQ.cq[10] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_andc(EOF_mask, DoubleHyphen), simd<1>::constant<0>(), Ct_Cursor));
    990               subcarryQ.cq[11] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, simd<1>::constant<0>(), Ct_Cursor));
     966              subcarryQ.cq[10] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_andc(EOF_mask, DoubleHyphen), simd<1>::constant<0>(), tempvar5));
     967              subcarryQ.cq[11] = bitblock::srli<127>(pablo_blk_Advance(tempvar5, simd<1>::constant<0>(), Ct_Cursor));
    991968              Ct_error = simd_andc(Ct_Cursor, lex.RAngle);
    992               errors.IllegalSequenceInComment = simd_or(errors.IllegalSequenceInComment, Ct_error);
     969              errors.IllegalSequenceInComment = simd_or(errors.IllegalSequenceInComment, simd_and(Ct_error, EOF_mask));
    993970            }
    994971            else {
     
    997974            CtCDPI_Closer = simd_or(simd_or(PI_Cursor, CD_Cursor), Ct_Cursor);
    998975            marker.CtCDPI_closers = simd_or(marker.CtCDPI_closers, CtCDPI_Closer);
    999             subcarryQ.cq[12] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, simd<1>::constant<0>(), tempvar5));
    1000             ctCDPI_mask = simd_or(ctCDPI_mask, tempvar5);
    1001             if (bitblock::any(simd_and(simd_andc(ctCDPI_mask, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    1002               if (bitblock::any(simd_and(simd_andc(PI_Cursor, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    1003                 errors.UnterminatedPI = simd_or(errors.UnterminatedPI, PI_Cursor);
    1004               }
    1005               if (bitblock::any(simd_and(simd_andc(CD_Cursor, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    1006                 errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, CD_Cursor);
    1007               }
    1008               if (bitblock::any(simd_and(simd_andc(Ct_Cursor, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    1009                 errors.UnterminatedComment = simd_or(errors.UnterminatedComment, Ct_Cursor);
    1010               }
     976            subcarryQ.cq[12] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, simd<1>::constant<0>(), tempvar6));
     977            ctCDPI_mask = simd_or(ctCDPI_mask, tempvar6);
     978            if (bitblock::any(simd_andc(simd_andc(ctCDPI_mask, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))))) {
     979              errors.UnterminatedPI = simd_or(errors.UnterminatedPI, simd_andc(simd_andc(PI_Cursor, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))));
     980              errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, simd_andc(simd_andc(CD_Cursor, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))));
     981              Ct_Cursor = simd_or(Ct_Cursor, simd_andc(ctCDPI_mask, simd_or(PI_Cursor, CD_Cursor)));
     982              errors.UnterminatedComment = simd_or(errors.UnterminatedComment, simd_andc(simd_andc(Ct_Cursor, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))));
    1011983            }
    1012984            callouts.StringEnds = simd_or(callouts.StringEnds, CtCDPI_Closer);
     
    10691041        AllTagMarks = simd_or(simd_or(simd_or(marker.StartTag_closers, marker.EmptyTag_closers), marker.EndTag_marks), marker.AttEq_marks);
    10701042        TransitionMarks = simd_or(CtCDPI_openers, AllTagMarks);
    1071         callouts.StringEnds = simd_or(callouts.StringEnds, simd_and(simd_andc(simd_not(simd<1>::constant<0>()), EOF_mask), bitblock::slli<1>(EOF_mask)));
     1043        callouts.StringEnds = simd_or(callouts.StringEnds, simd_andc(simd_andc(simd_not(simd<1>::constant<0>()), EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))));
    10721044        callouts.delmask = simd_or(callouts.delmask, simd_andc(simd_or(simd_not(simd_and(simd_not(simd<1>::constant<0>()), EOF_mask)), marker.TagSpan), simd_or(simd_or(callouts.AttValSpan, callouts.StringEnds), TransitionMarks)));
    10731045        zeromask = simd_or(simd_or(TransitionMarks, callouts.StringEnds), callouts.delmask);
     
    12691241          carryQ.CarryDequeueEnqueue(0, 1);
    12701242        }
    1271         lexError = simd_or(simd_andc(x00_x1F, lex.WS), FFFE_FFFF);
     1243        lexError = FFFE_FFFF;
     1244        lexError = simd_or(lexError, simd_andc(x00_x1F, lex.WS));
    12721245        if (bitblock::any(parameters.XML_11)) {
    1273           lex.WS = simd_or(simd_or(lex.WS, lex.LS), lex.NEL);
    1274           lex.NameScan = simd_andc(lex.NameScan, lex.WS);
    1275           lexError = simd_or(lexError, simd_andc(x80_x9F, lex.NEL));
    1276           lexError = simd_or(lexError, DEL);
     1246          lexError = simd_or(lexError, simd_or(simd_andc(x80_x9F, lex.NEL), DEL));
    12771247        }
    12781248        errors.Lexical = lexError;
     
    14461416          carryQ.CarryDequeueEnqueue(0, 1);
    14471417        }
    1448         lexError = simd_or(simd_andc(x00_x1F, lex.WS), FFFE_FFFF);
     1418        lexError = FFFE_FFFF;
     1419        lexError = simd_or(lexError, simd_andc(x00_x1F, lex.WS));
    14491420        if (bitblock::any(parameters.XML_11)) {
    1450           lex.WS = simd_or(simd_or(lex.WS, lex.LS), lex.NEL);
    1451           lex.NameScan = simd_andc(lex.NameScan, lex.WS);
    1452           lexError = simd_or(lexError, simd_andc(x80_x9F, lex.NEL));
    1453           lexError = simd_or(lexError, DEL);
     1421          lexError = simd_or(lexError, simd_or(simd_andc(x80_x9F, lex.NEL), DEL));
    14541422        }
    14551423        errors.Lexical = simd_and(lexError, EOF_mask);
     
    14581426  };
    14591427
    1460   struct Normalize_WS {
    1461   Normalize_WS() {
     1428  struct Normalize_LF {
     1429  Normalize_LF() {
    14621430 }
    14631431  IDISA_ALWAYS_INLINE void do_block(Parameters & parameters, Lex & lex, U16hi & u16hi, U16lo & u16lo, Callouts & callouts, Marker & marker) {
    1464                 BitBlock CRLF, CRNEL, WS_in_AttVal;
     1432                BitBlock CRLF, CRNEL;
    14651433
    14661434                BitBlock tempvar0, tempvar1;
     
    15091477            lex.LF = simd_or(lex.LF, lex.LS);
    15101478          }
     1479          lex.WS = simd_or(lex.WS, lex.LF);
     1480          lex.NameScan = simd_andc(lex.NameScan, lex.LF);
    15111481        }
    15121482        else {
    15131483          carryQ.CarryDequeueEnqueue(1, 1);
    1514         }
    1515         WS_in_AttVal = simd_and(lex.WS, callouts.AttValSpan);
    1516         if (bitblock::any(WS_in_AttVal)) {
    1517           u16lo.bit_2 = simd_or(u16lo.bit_2, WS_in_AttVal);
    1518           u16lo.bit_3 = simd_andc(u16lo.bit_3, WS_in_AttVal);
    1519           u16lo.bit_4 = simd_andc(u16lo.bit_4, WS_in_AttVal);
    1520           u16lo.bit_5 = simd_andc(u16lo.bit_5, WS_in_AttVal);
    1521           u16lo.bit_6 = simd_andc(u16lo.bit_6, WS_in_AttVal);
    1522           u16lo.bit_7 = simd_andc(u16lo.bit_7, WS_in_AttVal);
    15231484        }
    15241485        carryQ.CarryQ_Adjust(2);
    15251486  }
    15261487  IDISA_ALWAYS_INLINE void do_final_block(Parameters & parameters, Lex & lex, U16hi & u16hi, U16lo & u16lo, Callouts & callouts, Marker & marker, BitBlock EOF_mask) {
    1527                 BitBlock CRLF, CRNEL, WS_in_AttVal;
     1488                BitBlock CRLF, CRNEL;
    15281489
    15291490                BitBlock tempvar0, tempvar1;
     
    15721533            lex.LF = simd_or(lex.LF, lex.LS);
    15731534          }
     1535          lex.WS = simd_or(lex.WS, lex.LF);
     1536          lex.NameScan = simd_andc(lex.NameScan, lex.LF);
    15741537        }
    15751538        else {
    15761539          carryQ.CarryDequeueEnqueue(1, 1);
    1577         }
    1578         WS_in_AttVal = simd_and(lex.WS, callouts.AttValSpan);
    1579         if (bitblock::any(WS_in_AttVal)) {
    1580           u16lo.bit_2 = simd_or(u16lo.bit_2, WS_in_AttVal);
    1581           u16lo.bit_3 = simd_andc(u16lo.bit_3, WS_in_AttVal);
    1582           u16lo.bit_4 = simd_andc(u16lo.bit_4, WS_in_AttVal);
    1583           u16lo.bit_5 = simd_andc(u16lo.bit_5, WS_in_AttVal);
    1584           u16lo.bit_6 = simd_andc(u16lo.bit_6, WS_in_AttVal);
    1585           u16lo.bit_7 = simd_andc(u16lo.bit_7, WS_in_AttVal);
    15861540        }
    15871541  }
     
    15911545
    15921546          Classify_bytes classify_bytes;
    1593   Normalize_WS normalize_WS;
     1547  Normalize_LF normalize_LF;
    15941548
    15951549
     
    16161570        ,               size_t &                referenceCount
    16171571        );
    1618 
    16191572
    16201573        IDISA_ALWAYS_INLINE
     
    17041657    }
    17051658
    1706 
    1707         #ifdef CALCULATE_COPY_BACK_POSITION
    1708         static uint64_t calculateUnusedSymbols
    1709         (
    1710                 const BitBlock * const  symbolStream
    1711                 , const unsigned int    avail
    1712                 , const unsigned int    unused
    1713         );
    1714 
    1715         static uint64_t calculateUnusedContent
    1716         (
    1717                 const BitBlock * const  delMaskStream
    1718                 , const unsigned int    avail
    1719                 , const unsigned int    unused
    1720         );
    1721         #endif
    1722 
    1723         void checkErrors(const XMLUTF16CharacterSetAdapter::Errors & errors, const XMLByte * source, XMLLineColTracker & lineCol);
    1724 
    1725     void reportError(BitBlock fatalErrors, const XMLUTF16CharacterSetAdapter::Errors & errors, const XMLByte * source, XMLLineColTracker & lineCol);
    1726 
    1727         IDISA_ALWAYS_INLINE
    1728         MemoryManager * getMemoryManager()
    1729         {
    1730         return fTranscoder.getMemoryManager();
    1731         }
     1659    void checkErrors(const XMLByte * source, XMLUTF16CharacterSetAdapter::Errors & errors);
     1660
     1661    void reportError(const BitBlock fatalErrors, const XMLByte * source, XMLUTF16CharacterSetAdapter::Errors & errors);
    17321662
    17331663protected:
  • icXML/icXML-devel/src/icxmlc/XMLUTF8CharacterSetAdapter.cpp

    r3103 r3151  
    77/*
    88 * @author Nigel Medforth, nigelm -at- interational-characters.com
    9  * @version $Id: XMLUTF8CharacterSetAdapter.cpp 301 2013-04-30 21:31:06Z nigelm $
     9 * @version $Id: XMLUTF8CharacterSetAdapter.cpp 315 2013-05-12 22:34:37Z nigelm $
    1010 *
    1111 */
     
    110110    if (unlikely(!bitblock::all(*deletionMaskStream)))
    111111    {
    112         const BitBlock delmask = bitblock::load_aligned(deletionMaskStream);
    113         BitBlock shift1, shift2, shift4;
    114         del_info_8(delmask, shift1, shift2, shift4);
    115         ubitblock del;
    116         del._128 = del_count(delmask);
    117 
    118         const BitBlock zero = simd<1>::constant<0>();
    119 
    120         for (size_t i = 0; i < 8; i++)
    121         {
    122             bitblock::store_aligned(zero, reinterpret_cast<BytePack *>(contentStream));
    123             contentStream += del._8[i << 1];
    124             bitblock::store_aligned(zero, reinterpret_cast<BytePack *>(contentStream));
    125             contentStream += del._8[(i << 1) | 1];
    126         }
     112        // const size_t leadingNullChars = BLOCK_SIZE - bitblock::popcount(bitblock::load_aligned(deletionMaskStream));
     113        contentStream++;
    127114    }
    128115
     
    352339                *stringEndPtr++ = &contentStreamOrdinal[pos];
    353340            }
    354 
    355             DEBUG_MESSAGE(" -- stringEndCount_" << index << "=" << (stringEndPtr - stringEndStream));
    356341
    357342            // - would if be better to first iterate through the stream and store the values in a fixed ubitblock array
     
    431416        PopCounter<3> markupCounter;
    432417
     418    /// ----------------------------------------------------------------------------------------------------
     419    /// INITIALIZE THE PARSING PARAMETERS
     420    /// ----------------------------------------------------------------------------------------------------
     421
    433422    if (unlikely(fScanner->getXMLVersion() == XMLReader::XMLV1_1))
    434423    {
     
    439428        parameters.XML_11 = simd<1>::constant<0>();
    440429    }
     430    memset(&errors, 0, sizeof(Errors));
    441431
    442432    /// ----------------------------------------------------------------------------------------------------
     
    486476  init_Parameters.do_block(parameters, callouts);
    487477  classify_bytes.do_block(parameters, basis_bits, lex, u8, errors);
     478  transcode_Utf8_To_utf16.do_block(basis_bits, u8, u16hi, u16lo, callouts, errors);
     479  normalize_LF.do_block(parameters, lex, u16hi, u16lo, callouts, marker);
    488480  parse_CtCDPI.do_block(lex, marker, callouts, errors);
    489   parse_tags.do_block(lex, marker, callouts, errors);
     481  parse_tags.do_block(lex, marker, u16hi, u16lo, callouts, errors);
    490482  parse_refs.do_block(lex, marker, callouts, errors);
    491   utf8_to_utf16.do_block(basis_bits, u8, u16hi, u16lo, callouts, errors);
    492   normalize_WS.do_block(parameters, lex, u16hi, u16lo, callouts, marker);
    493483  prepare_content_buffer.do_block(u16hi, u16lo, lex, callouts, marker);
    494484
     
    513503        markupCounter.tally(callouts.MarkupDelimiters);
    514504                // scan for errors
    515         checkErrors(errors, u8, data, fInternalLineColTracker);
     505        checkErrors(u8, data, errors);
    516506        // advance to the next block
    517507        fInternalLineColTracker.advance();
     
    557547        // determine the eof mask
    558548        const BitBlock EOF_mask = maskre(advance);
     549
    559550        // transpose the byte data and mask off any characters that are beyond the EOF mask
    560551        transpose(data, basis_bits, EOF_mask);
     
    563554  init_Parameters.do_final_block(parameters, callouts, EOF_mask);
    564555  classify_bytes.do_final_block(parameters, basis_bits, lex, u8, errors, EOF_mask);
     556  transcode_Utf8_To_utf16.do_final_block(basis_bits, u8, u16hi, u16lo, callouts, errors, EOF_mask);
     557  normalize_LF.do_final_block(parameters, lex, u16hi, u16lo, callouts, marker, EOF_mask);
    565558  parse_CtCDPI.do_final_block(lex, marker, callouts, errors, EOF_mask);
    566   parse_tags.do_final_block(lex, marker, callouts, errors, EOF_mask);
     559  parse_tags.do_final_block(lex, marker, u16hi, u16lo, callouts, errors, EOF_mask);
    567560  parse_refs.do_final_block(lex, marker, callouts, errors, EOF_mask);
    568   utf8_to_utf16.do_final_block(basis_bits, u8, u16hi, u16lo, callouts, errors, EOF_mask);
    569   normalize_WS.do_final_block(parameters, lex, u16hi, u16lo, callouts, marker, EOF_mask);
    570561  prepare_content_buffer.do_final_block(u16hi, u16lo, lex, callouts, marker, EOF_mask);
    571562
     
    588579        markupCounter.tally(callouts.MarkupDelimiters);
    589580        // scan for errors
    590         checkErrors(errors, u8, data, fInternalLineColTracker);
     581        checkErrors(u8, data, errors);
    591582        // advance to the next block
    592583        fInternalLineColTracker.advance();
     
    660651
    661652IDISA_ALWAYS_INLINE
    662 void XMLUTF8CharacterSetAdapter::checkErrors(const XMLUTF8CharacterSetAdapter::Errors & errors, const U8 & u8, const XMLByte * input, XMLLineColTracker & lineCol)
     653void XMLUTF8CharacterSetAdapter::checkErrors(const U8 & u8, const XMLByte * input, XMLUTF8CharacterSetAdapter::Errors & errors)
    663654{
    664655        BitBlock temp0 = simd_or(errors.Unicode, errors.Lexical);
     
    678669
    679670        temp0 = simd_or(temp0, temp2);
    680         temp4 = simd_or(temp4, temp6);
     671    temp4 = simd_or(temp4, temp6);
    681672
    682673        temp0 = simd_or(temp0, temp4);
     674    temp8 = simd_or(temp8, errors.BracketInAttrValue);
    683675
    684676    const BitBlock fatalErrors = simd_or(temp0, temp8);
     
    687679        if (unlikely(bitblock::any(fatalErrors)))
    688680        {
    689         reportError(fatalErrors, errors, u8, input, lineCol);
     681        reportError(fatalErrors, u8, input, errors);
    690682        }
    691683}
    692684
    693 void XMLUTF8CharacterSetAdapter::reportError(const BitBlock fatalErrors, const XMLUTF8CharacterSetAdapter::Errors & errors, const U8 & u8, const XMLByte * input, XMLLineColTracker & lineCol)
     685void XMLUTF8CharacterSetAdapter::reportError(const BitBlock fatalErrors, const U8 & u8, const XMLByte * input, XMLUTF8CharacterSetAdapter::Errors & errors)
    694686{
    695687    XMLStreamIterator errorIterator(fatalErrors);
    696     size_t errorPosition;
     688
     689    // NOTE: when redoing this, I need to make sure I can get the file name for the error handling
     690    // Tag errors ought to give the name of the start tag, end tag, attribute, entity, etc...
    697691
    698692    while (errorIterator.next())
    699693    {
    700         errorPosition = errorIterator.pos();
    701 
    702         const BitBlock fatalErrorMask = mask_forward_zeroes(errorPosition);
    703 
    704         DEBUG_MESSAGE(" ==================== FATAL ERROR ========================")
     694        const BitBlock fatalErrorMask = maskri(errorIterator.pos()); // mask_reverse_zeroes(BLOCK_SIZE - errorIterator.pos() - 1);
     695
     696        DEBUG_MESSAGE(" ==================== FATAL ERROR @ " << errorIterator.pos() << " ========================")
    705697
    706698        DEBUG_MESSAGE("errors.Unicode=" << errors.Unicode)
     
    721713        DEBUG_MESSAGE("errors.UnterminatedEntityRef=" << errors.UnterminatedEntityRef)
    722714        DEBUG_MESSAGE("errors.ExpectedEntityRefName=" << errors.ExpectedEntityRefName)
     715        DEBUG_MESSAGE("errors.BracketInAttrValue=" << errors.BracketInAttrValue)
     716        DEBUG_MESSAGE(" **** fatalErrorMask=" << fatalErrorMask);
    723717
    724718        if (bitblock::any(simd_and(errors.Unicode, fatalErrorMask)))
     
    751745            #undef IF_ERROR_IN
    752746
    753             const XMLByte * srcPtr = &input[errorPosition - offset];
     747            const XMLByte * srcPtr = &input[errorIterator.pos() - offset];
    754748
    755749            switch (trailingBytes)
     
    815809            // calculate the line/col of this error
    816810            XMLFileLoc line, col;
    817             lineCol.get(errorPosition, line, col);
     811            fInternalLineColTracker.get(errorIterator.pos(), line, col);
    818812
    819813
     
    882876                errCode = XMLErrs::BadSequenceInCharData;
    883877            }
    884 
     878            else if (bitblock::any(simd_and(errors.BracketInAttrValue, fatalErrorMask)))
     879            {
     880                errCode = XMLErrs::BracketInAttrValue;
     881            }
    885882            fScanner->emitError(errCode, line, col);
    886883        }
    887884    }
     885    memset(&errors, 0, sizeof(Errors));
    888886}
    889887
  • icXML/icXML-devel/src/icxmlc/XMLUTF8CharacterSetAdapter.hpp

    r3103 r3151  
    77/*
    88 * @author Nigel Medforth, nigelm -at- interational-characters.com
    9  * @version $Id: XMLUTF8CharacterSetAdapter.hpp 301 2013-04-30 21:31:06Z nigelm $
     9 * @version $Id: XMLUTF8CharacterSetAdapter.hpp 316 2013-05-13 20:03:50Z nigelm $
    1010 *
    1111 */
     
    201201  BitBlock ExpectedEntityRefName;
    202202  BitBlock UnterminatedEntityRef;
     203  BitBlock BracketInAttrValue;
    203204  BitBlock BadSequenceInCharData;
    204205};
     
    207208  Parse_tags() {
    208209 }
    209   IDISA_ALWAYS_INLINE void do_block(Lex & lex, Marker & marker, Callouts & callouts, Errors & errors) {
    210                 BitBlock DQuoteDelim, SQuoteDelim, AttListDelim, ElemName_starts;
    211                 BitBlock elem_name_follows, NoElemNameErr, AttListStart, AfterWS, AttListEnd;
    212                 BitBlock AttNameStart, AttOpenQuotes, AttCloseQuotes, NoAttNameError;
    213                 BitBlock AttNameFollow, EqExpected, EqError, AttValPos, DQuoteAttVal;
    214                 BitBlock SQuoteAttVal, AnyQuote, NoAttValErr, DQuoteAttEnd, SQuoteAttEnd;
    215                 BitBlock AttValEnd, AttValErr, AttValFollow, AttListEndErr, ParseError;
    216                 BitBlock EndTag_Name_starts, EndTagNameMissing, EndTag_Name_follows;
    217                 BitBlock EndTagCloseError;
    218 
    219 
    220 
    221 
    222         DQuoteDelim = simd_or(lex.DQuote, lex.LAngle);
    223         SQuoteDelim = simd_or(lex.SQuote, lex.LAngle);
    224         AttListDelim = simd_or(lex.Slash, lex.RAngle);
     210  IDISA_ALWAYS_INLINE void do_block(Lex & lex, Marker & marker, U16hi & u16hi, U16lo & u16lo, Callouts & callouts, Errors & errors) {
     211                BitBlock ElemName_starts, elem_name_follows, AttListStart, AttListDelim;
     212                BitBlock AfterWS, AttListEnd, AttNameStart, AttOpenQuotes, AttCloseQuotes;
     213                BitBlock NoAttNameError, AttNameFollow, EqExpected, EqError, AttValPos;
     214                BitBlock DQuoteAttVal, SQuoteAttVal, NoAttValErr, DQuoteAttEnd, SQuoteAttEnd;
     215                BitBlock AttValEnd, AttValErr, AttValFollow, AttListEndErr, WS_in_AttVal;
     216                BitBlock ParseError, EndTag_Name_starts, EndTagNameMissing;
     217                BitBlock EndTag_Name_follows;
     218
     219
     220
     221
    225222        ElemName_starts = simd_andc(marker.Tag_opener, lex.Slash);
    226223        callouts.Symbol_starts = simd_or(callouts.Symbol_starts, ElemName_starts);
     
    228225        carryQ.cq[0] = bitblock::srli<127>(pablo_blk_ScanThru(ElemName_starts, lex.NameScan, carryQ.get_carry_in(0), elem_name_follows));
    229226        callouts.Symbol_ends = simd_or(callouts.Symbol_ends, elem_name_follows);
    230         NoElemNameErr = simd_and(ElemName_starts, elem_name_follows);
    231         errors.ExpectedElementName = NoElemNameErr;
     227        errors.ExpectedElementName = simd_and(ElemName_starts, elem_name_follows);
    232228        marker.AttEq_marks = simd<1>::constant<0>();
    233         errors.ExpectedAttrName = simd<1>::constant<0>();
    234         errors.ExpectedEqSign = simd<1>::constant<0>();
    235         errors.ExpectedAttrValue = simd<1>::constant<0>();
    236         errors.UnterminatedStartTag = simd<1>::constant<0>();
    237         errors.ExpectedWhitespace = simd<1>::constant<0>();
    238229        AttListStart = simd_and(elem_name_follows, lex.WS);
     230        AttListDelim = simd_or(lex.Slash, lex.RAngle);
    239231        if ((bitblock::any(AttListStart) || carryQ.CarryTest(1, 9))) {
    240232          carryQ.cq[1] = bitblock::srli<127>(pablo_blk_ScanThru(elem_name_follows, lex.WS, carryQ.get_carry_in(1), AfterWS));
     
    262254            DQuoteAttVal = simd_and(AttValPos, lex.DQuote);
    263255            SQuoteAttVal = simd_and(AttValPos, lex.SQuote);
    264             AnyQuote = simd_or(lex.DQuote, lex.SQuote);
    265             NoAttValErr = simd_andc(AttValPos, AnyQuote);
     256            NoAttValErr = simd_andc(AttValPos, simd_or(lex.DQuote, lex.SQuote));
    266257            errors.ExpectedAttrValue = simd_or(errors.ExpectedAttrValue, NoAttValErr);
    267258            AttOpenQuotes = simd_or(AttOpenQuotes, AttValPos);
    268             carryQ.cq[5] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_not(DQuoteDelim), carryQ.get_carry_in(5), DQuoteAttEnd));
    269             carryQ.cq[6] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_not(SQuoteDelim), carryQ.get_carry_in(6), SQuoteAttEnd));
     259            carryQ.cq[5] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_not(lex.DQuote), carryQ.get_carry_in(5), DQuoteAttEnd));
     260            carryQ.cq[6] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_not(lex.SQuote), carryQ.get_carry_in(6), SQuoteAttEnd));
    270261            AttValEnd = simd_or(DQuoteAttEnd, SQuoteAttEnd);
    271             AttValErr = simd_andc(AttValEnd, simd_or(lex.DQuote, lex.SQuote));
     262            AttValErr = simd_or(simd_andc(DQuoteAttEnd, lex.DQuote), simd_andc(SQuoteAttEnd, lex.SQuote));
     263            AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    272264            errors.UnterminatedStartTag = simd_or(errors.UnterminatedStartTag, AttValErr);
    273             AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    274265            carryQ.cq[7] = bitblock::srli<127>(pablo_blk_Advance(AttValEnd, carryQ.get_carry_in(7), AttValFollow));
    275266            AttListEndErr = AttValFollow;
     
    304295              DQuoteAttVal = simd_and(AttValPos, lex.DQuote);
    305296              SQuoteAttVal = simd_and(AttValPos, lex.SQuote);
    306               AnyQuote = simd_or(lex.DQuote, lex.SQuote);
    307               NoAttValErr = simd_andc(AttValPos, AnyQuote);
     297              NoAttValErr = simd_andc(AttValPos, simd_or(lex.DQuote, lex.SQuote));
    308298              errors.ExpectedAttrValue = simd_or(errors.ExpectedAttrValue, NoAttValErr);
    309299              AttOpenQuotes = simd_or(AttOpenQuotes, AttValPos);
    310               subcarryQ.cq[3] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_not(DQuoteDelim), simd<1>::constant<0>(), DQuoteAttEnd));
    311               subcarryQ.cq[4] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_not(SQuoteDelim), simd<1>::constant<0>(), SQuoteAttEnd));
     300              subcarryQ.cq[3] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_not(lex.DQuote), simd<1>::constant<0>(), DQuoteAttEnd));
     301              subcarryQ.cq[4] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_not(lex.SQuote), simd<1>::constant<0>(), SQuoteAttEnd));
    312302              AttValEnd = simd_or(DQuoteAttEnd, SQuoteAttEnd);
    313               AttValErr = simd_andc(AttValEnd, simd_or(lex.DQuote, lex.SQuote));
     303              AttValErr = simd_or(simd_andc(DQuoteAttEnd, lex.DQuote), simd_andc(SQuoteAttEnd, lex.SQuote));
     304              AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    314305              errors.UnterminatedStartTag = simd_or(errors.UnterminatedStartTag, AttValErr);
    315               AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    316306              subcarryQ.cq[5] = bitblock::srli<127>(pablo_blk_Advance(AttValEnd, simd<1>::constant<0>(), AttValFollow));
    317307              AttListEndErr = AttValFollow;
     
    333323          }
    334324          carryQ.cq[9] = bitblock::srli<127>(pablo_blk_ExclusiveSpan(AttOpenQuotes, AttCloseQuotes, carryQ.get_carry_in(9), callouts.AttValSpan));
     325          lex.LAngle = lex.LAngle;
     326          errors.BracketInAttrValue = simd_and(lex.LAngle, callouts.AttValSpan);
    335327          callouts.StringEnds = simd_or(callouts.StringEnds, AttCloseQuotes);
     328          WS_in_AttVal = simd_and(lex.WS, callouts.AttValSpan);
     329          if (bitblock::any(WS_in_AttVal)) {
     330            u16lo.bit_2 = simd_or(u16lo.bit_2, WS_in_AttVal);
     331            u16lo.bit_3 = simd_andc(u16lo.bit_3, WS_in_AttVal);
     332            u16lo.bit_4 = simd_andc(u16lo.bit_4, WS_in_AttVal);
     333            u16lo.bit_5 = simd_andc(u16lo.bit_5, WS_in_AttVal);
     334            u16lo.bit_6 = simd_andc(u16lo.bit_6, WS_in_AttVal);
     335            u16lo.bit_7 = simd_andc(u16lo.bit_7, WS_in_AttVal);
     336          }
    336337        }
    337338        else {
     
    339340          AttListEndErr = simd_andc(elem_name_follows, AttListDelim);
    340341          callouts.AttValSpan = simd<1>::constant<0>();
     342          errors.BracketInAttrValue = simd<1>::constant<0>();
    341343          errors.ExpectedWhitespace = simd_or(errors.ExpectedWhitespace, AttListEndErr);
    342344          carryQ.CarryDequeueEnqueue(1, 9);
     
    360362          carryQ.CarryDequeueEnqueue(13, 1);
    361363        }
    362         EndTagCloseError = simd_andc(marker.EndTag_closers, lex.RAngle);
    363         errors.UnterminatedEndTag = EndTagCloseError;
     364        errors.UnterminatedEndTag = simd_andc(marker.EndTag_closers, lex.RAngle);
    364365        marker.Tag_closers = simd_or(simd_or(marker.StartTag_closers, marker.EmptyTag_closers), marker.EndTag_closers);
    365366        carryQ.cq[14] = bitblock::srli<127>(pablo_blk_InclusiveSpan(marker.Tag_opener, marker.Tag_closers, carryQ.get_carry_in(14), marker.TagSpan));
     
    367368        carryQ.CarryQ_Adjust(15);
    368369  }
    369   IDISA_ALWAYS_INLINE void do_final_block(Lex & lex, Marker & marker, Callouts & callouts, Errors & errors, BitBlock EOF_mask) {
    370                 BitBlock DQuoteDelim, SQuoteDelim, AttListDelim, ElemName_starts;
    371                 BitBlock elem_name_follows, NoElemNameErr, AttListStart, AfterWS, AttListEnd;
    372                 BitBlock AttNameStart, AttOpenQuotes, AttCloseQuotes, NoAttNameError;
    373                 BitBlock AttNameFollow, EqExpected, EqError, AttValPos, DQuoteAttVal;
    374                 BitBlock SQuoteAttVal, AnyQuote, NoAttValErr, DQuoteAttEnd, SQuoteAttEnd;
    375                 BitBlock AttValEnd, AttValErr, AttValFollow, AttListEndErr, ParseError;
    376                 BitBlock EndTag_Name_starts, EndTagNameMissing, EndTag_Name_follows;
    377                 BitBlock EndTagCloseError;
    378 
    379 
    380 
    381 
    382         DQuoteDelim = simd_or(lex.DQuote, lex.LAngle);
    383         SQuoteDelim = simd_or(lex.SQuote, lex.LAngle);
    384         AttListDelim = simd_or(lex.Slash, lex.RAngle);
     370  IDISA_ALWAYS_INLINE void do_final_block(Lex & lex, Marker & marker, U16hi & u16hi, U16lo & u16lo, Callouts & callouts, Errors & errors, BitBlock EOF_mask) {
     371                BitBlock ElemName_starts, elem_name_follows, AttListStart, AttListDelim;
     372                BitBlock AfterWS, AttListEnd, AttNameStart, AttOpenQuotes, AttCloseQuotes;
     373                BitBlock NoAttNameError, AttNameFollow, EqExpected, EqError, AttValPos;
     374                BitBlock DQuoteAttVal, SQuoteAttVal, NoAttValErr, DQuoteAttEnd, SQuoteAttEnd;
     375                BitBlock AttValEnd, AttValErr, AttValFollow, AttListEndErr, WS_in_AttVal;
     376                BitBlock ParseError, EndTag_Name_starts, EndTagNameMissing;
     377                BitBlock EndTag_Name_follows;
     378
     379
     380
     381
    385382        ElemName_starts = simd_andc(marker.Tag_opener, lex.Slash);
    386383        callouts.Symbol_starts = simd_or(callouts.Symbol_starts, ElemName_starts);
     
    388385        carryQ.cq[0] = bitblock::srli<127>(pablo_blk_ScanThru(ElemName_starts, lex.NameScan, carryQ.get_carry_in(0), elem_name_follows));
    389386        callouts.Symbol_ends = simd_or(callouts.Symbol_ends, elem_name_follows);
    390         NoElemNameErr = simd_and(ElemName_starts, elem_name_follows);
    391         errors.ExpectedElementName = NoElemNameErr;
     387        errors.ExpectedElementName = simd_and(ElemName_starts, elem_name_follows);
    392388        marker.AttEq_marks = simd<1>::constant<0>();
    393         errors.ExpectedAttrName = simd<1>::constant<0>();
    394         errors.ExpectedEqSign = simd<1>::constant<0>();
    395         errors.ExpectedAttrValue = simd<1>::constant<0>();
    396         errors.UnterminatedStartTag = simd<1>::constant<0>();
    397         errors.ExpectedWhitespace = simd<1>::constant<0>();
    398389        AttListStart = simd_and(elem_name_follows, lex.WS);
     390        AttListDelim = simd_or(lex.Slash, lex.RAngle);
    399391        if ((bitblock::any(AttListStart) || carryQ.CarryTest(1, 9))) {
    400392          carryQ.cq[1] = bitblock::srli<127>(pablo_blk_ScanThru(elem_name_follows, lex.WS, carryQ.get_carry_in(1), AfterWS));
     
    422414            DQuoteAttVal = simd_and(AttValPos, lex.DQuote);
    423415            SQuoteAttVal = simd_and(AttValPos, lex.SQuote);
    424             AnyQuote = simd_or(lex.DQuote, lex.SQuote);
    425             NoAttValErr = simd_andc(AttValPos, AnyQuote);
     416            NoAttValErr = simd_andc(AttValPos, simd_or(lex.DQuote, lex.SQuote));
    426417            errors.ExpectedAttrValue = simd_or(errors.ExpectedAttrValue, NoAttValErr);
    427418            AttOpenQuotes = simd_or(AttOpenQuotes, AttValPos);
    428             carryQ.cq[5] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_andc(EOF_mask, DQuoteDelim), carryQ.get_carry_in(5), DQuoteAttEnd));
    429             carryQ.cq[6] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_andc(EOF_mask, SQuoteDelim), carryQ.get_carry_in(6), SQuoteAttEnd));
     419            carryQ.cq[5] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_andc(EOF_mask, lex.DQuote), carryQ.get_carry_in(5), DQuoteAttEnd));
     420            carryQ.cq[6] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_andc(EOF_mask, lex.SQuote), carryQ.get_carry_in(6), SQuoteAttEnd));
    430421            AttValEnd = simd_or(DQuoteAttEnd, SQuoteAttEnd);
    431             AttValErr = simd_andc(AttValEnd, simd_or(lex.DQuote, lex.SQuote));
     422            AttValErr = simd_or(simd_andc(DQuoteAttEnd, lex.DQuote), simd_andc(SQuoteAttEnd, lex.SQuote));
     423            AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    432424            errors.UnterminatedStartTag = simd_or(errors.UnterminatedStartTag, AttValErr);
    433             AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    434425            carryQ.cq[7] = bitblock::srli<127>(pablo_blk_Advance(AttValEnd, carryQ.get_carry_in(7), AttValFollow));
    435426            AttListEndErr = AttValFollow;
     
    464455              DQuoteAttVal = simd_and(AttValPos, lex.DQuote);
    465456              SQuoteAttVal = simd_and(AttValPos, lex.SQuote);
    466               AnyQuote = simd_or(lex.DQuote, lex.SQuote);
    467               NoAttValErr = simd_andc(AttValPos, AnyQuote);
     457              NoAttValErr = simd_andc(AttValPos, simd_or(lex.DQuote, lex.SQuote));
    468458              errors.ExpectedAttrValue = simd_or(errors.ExpectedAttrValue, NoAttValErr);
    469459              AttOpenQuotes = simd_or(AttOpenQuotes, AttValPos);
    470               subcarryQ.cq[3] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_andc(EOF_mask, DQuoteDelim), simd<1>::constant<0>(), DQuoteAttEnd));
    471               subcarryQ.cq[4] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_andc(EOF_mask, SQuoteDelim), simd<1>::constant<0>(), SQuoteAttEnd));
     460              subcarryQ.cq[3] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(DQuoteAttVal, simd_andc(EOF_mask, lex.DQuote), simd<1>::constant<0>(), DQuoteAttEnd));
     461              subcarryQ.cq[4] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(SQuoteAttVal, simd_andc(EOF_mask, lex.SQuote), simd<1>::constant<0>(), SQuoteAttEnd));
    472462              AttValEnd = simd_or(DQuoteAttEnd, SQuoteAttEnd);
    473               AttValErr = simd_andc(AttValEnd, simd_or(lex.DQuote, lex.SQuote));
     463              AttValErr = simd_or(simd_andc(DQuoteAttEnd, lex.DQuote), simd_andc(SQuoteAttEnd, lex.SQuote));
     464              AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    474465              errors.UnterminatedStartTag = simd_or(errors.UnterminatedStartTag, AttValErr);
    475               AttCloseQuotes = simd_or(AttCloseQuotes, AttValEnd);
    476466              subcarryQ.cq[5] = bitblock::srli<127>(pablo_blk_Advance(AttValEnd, simd<1>::constant<0>(), AttValFollow));
    477467              AttListEndErr = AttValFollow;
     
    493483          }
    494484          carryQ.cq[9] = bitblock::srli<127>(pablo_blk_ExclusiveSpan(AttOpenQuotes, AttCloseQuotes, carryQ.get_carry_in(9), callouts.AttValSpan));
     485          lex.LAngle = lex.LAngle;
     486          errors.BracketInAttrValue = simd_and(lex.LAngle, callouts.AttValSpan);
    495487          callouts.StringEnds = simd_or(callouts.StringEnds, AttCloseQuotes);
     488          WS_in_AttVal = simd_and(lex.WS, callouts.AttValSpan);
     489          if (bitblock::any(WS_in_AttVal)) {
     490            u16lo.bit_2 = simd_or(u16lo.bit_2, WS_in_AttVal);
     491            u16lo.bit_3 = simd_andc(u16lo.bit_3, WS_in_AttVal);
     492            u16lo.bit_4 = simd_andc(u16lo.bit_4, WS_in_AttVal);
     493            u16lo.bit_5 = simd_andc(u16lo.bit_5, WS_in_AttVal);
     494            u16lo.bit_6 = simd_andc(u16lo.bit_6, WS_in_AttVal);
     495            u16lo.bit_7 = simd_andc(u16lo.bit_7, WS_in_AttVal);
     496          }
    496497        }
    497498        else {
     
    499500          AttListEndErr = simd_andc(elem_name_follows, AttListDelim);
    500501          callouts.AttValSpan = simd<1>::constant<0>();
     502          errors.BracketInAttrValue = simd<1>::constant<0>();
    501503          errors.ExpectedWhitespace = simd_or(errors.ExpectedWhitespace, AttListEndErr);
    502504          carryQ.CarryDequeueEnqueue(1, 9);
     
    520522          carryQ.CarryDequeueEnqueue(13, 1);
    521523        }
    522         EndTagCloseError = simd_andc(marker.EndTag_closers, lex.RAngle);
    523         errors.UnterminatedEndTag = EndTagCloseError;
     524        errors.UnterminatedEndTag = simd_andc(marker.EndTag_closers, lex.RAngle);
    524525        marker.Tag_closers = simd_or(simd_or(marker.StartTag_closers, marker.EmptyTag_closers), marker.EndTag_closers);
    525526        carryQ.cq[14] = bitblock::srli<127>(pablo_blk_InclusiveSpan(marker.Tag_opener, marker.Tag_closers, carryQ.get_carry_in(14), marker.TagSpan));
     
    570571 }
    571572  IDISA_ALWAYS_INLINE void do_block(Lex & lex, Marker & marker, Callouts & callouts, Errors & errors) {
    572                 BitBlock ref_error;
    573 
    574                 BitBlock tempvar0, tempvar1;
    575 
    576 
    577         errors.UnterminatedEntityRef = simd<1>::constant<0>();
    578         errors.ExpectedEntityRefName = simd<1>::constant<0>();
     573                BitBlock EntityRefName;
     574
     575                BitBlock tempvar0;
     576
     577
    579578        if ((bitblock::any(callouts.Ref_opener) || carryQ.CarryTest(0, 3))) {
    580           carryQ.cq[0] = bitblock::srli<127>(pablo_blk_Advance(callouts.Ref_opener, carryQ.get_carry_in(0), tempvar0));
    581           errors.ExpectedEntityRefName = simd_and(tempvar0, lex.Semicolon);
    582           carryQ.cq[1] = bitblock::srli<127>(pablo_blk_ScanThru(callouts.Ref_opener, simd_or(simd_or(lex.NameScan, callouts.Ref_opener), lex.Hash), carryQ.get_carry_in(1), callouts.Ref_closer));
    583           ref_error = simd_andc(callouts.Ref_closer, lex.Semicolon);
    584           errors.UnterminatedEntityRef = ref_error;
    585           carryQ.cq[2] = bitblock::srli<127>(pablo_blk_SpanUpTo(callouts.Ref_opener, callouts.Ref_closer, carryQ.get_carry_in(2), tempvar1));
    586           callouts.delmask = simd_or(callouts.delmask, tempvar1);
     579          carryQ.cq[0] = bitblock::srli<127>(pablo_blk_Advance(callouts.Ref_opener, carryQ.get_carry_in(0), EntityRefName));
     580          carryQ.cq[1] = bitblock::srli<127>(pablo_blk_ScanThru(EntityRefName, simd_or(lex.NameScan, lex.Hash), carryQ.get_carry_in(1), callouts.Ref_closer));
     581          errors.ExpectedEntityRefName = simd_and(EntityRefName, callouts.Ref_closer);
     582          errors.UnterminatedEntityRef = simd_andc(simd_andc(callouts.Ref_closer, lex.Semicolon), errors.ExpectedEntityRefName);
     583          carryQ.cq[2] = bitblock::srli<127>(pablo_blk_SpanUpTo(callouts.Ref_opener, callouts.Ref_closer, carryQ.get_carry_in(2), tempvar0));
     584          callouts.delmask = simd_or(callouts.delmask, tempvar0);
    587585        }
    588586        else {
     
    592590  }
    593591  IDISA_ALWAYS_INLINE void do_final_block(Lex & lex, Marker & marker, Callouts & callouts, Errors & errors, BitBlock EOF_mask) {
    594                 BitBlock ref_error;
    595 
    596                 BitBlock tempvar0, tempvar1;
    597 
    598 
    599         errors.UnterminatedEntityRef = simd<1>::constant<0>();
    600         errors.ExpectedEntityRefName = simd<1>::constant<0>();
     592                BitBlock EntityRefName;
     593
     594                BitBlock tempvar0;
     595
     596
    601597        if ((bitblock::any(callouts.Ref_opener) || carryQ.CarryTest(0, 3))) {
    602           carryQ.cq[0] = bitblock::srli<127>(pablo_blk_Advance(callouts.Ref_opener, carryQ.get_carry_in(0), tempvar0));
    603           errors.ExpectedEntityRefName = simd_and(tempvar0, lex.Semicolon);
    604           carryQ.cq[1] = bitblock::srli<127>(pablo_blk_ScanThru(callouts.Ref_opener, simd_or(simd_or(lex.NameScan, callouts.Ref_opener), lex.Hash), carryQ.get_carry_in(1), callouts.Ref_closer));
    605           ref_error = simd_andc(callouts.Ref_closer, lex.Semicolon);
    606           errors.UnterminatedEntityRef = ref_error;
    607           carryQ.cq[2] = bitblock::srli<127>(pablo_blk_SpanUpTo(callouts.Ref_opener, callouts.Ref_closer, carryQ.get_carry_in(2), tempvar1));
    608           callouts.delmask = simd_or(callouts.delmask, tempvar1);
     598          carryQ.cq[0] = bitblock::srli<127>(pablo_blk_Advance(callouts.Ref_opener, carryQ.get_carry_in(0), EntityRefName));
     599          carryQ.cq[1] = bitblock::srli<127>(pablo_blk_ScanThru(EntityRefName, simd_or(lex.NameScan, lex.Hash), carryQ.get_carry_in(1), callouts.Ref_closer));
     600          errors.ExpectedEntityRefName = simd_and(EntityRefName, callouts.Ref_closer);
     601          errors.UnterminatedEntityRef = simd_andc(simd_andc(callouts.Ref_closer, lex.Semicolon), errors.ExpectedEntityRefName);
     602          carryQ.cq[2] = bitblock::srli<127>(pablo_blk_SpanUpTo(callouts.Ref_opener, callouts.Ref_closer, carryQ.get_carry_in(2), tempvar0));
     603          callouts.delmask = simd_or(callouts.delmask, tempvar0);
    609604        }
    610605        else {
     
    626621                BitBlock CtCDPI_Closer;
    627622
    628                 BitBlock tempvar0, tempvar1, tempvar2, tempvar3, tempvar4, tempvar5;
    629 
    630 
    631         errors.ExpectedCommentOrCDATA = simd<1>::constant<0>();
    632         errors.PINameExpected = simd<1>::constant<0>();
    633         errors.IllegalSequenceInComment = simd<1>::constant<0>();
    634         errors.UnterminatedPI = simd<1>::constant<0>();
    635         errors.UnterminatedCDATASection = simd<1>::constant<0>();
    636         errors.UnterminatedComment = simd<1>::constant<0>();
     623                BitBlock tempvar0, tempvar1, tempvar2, tempvar3, tempvar4, tempvar5, tempvar6;
     624
     625
    637626        marker.PI_openers = simd<1>::constant<0>();
    638627        marker.CtCD_openers = simd<1>::constant<0>();
     
    700689            DoubleHyphen = simd_and(simd_and(v1, w1), lex.Hyphen);
    701690            carryQ.cq[14] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, carryQ.get_carry_in(14), tempvar4));
    702             carryQ.cq[15] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_not(DoubleHyphen), carryQ.get_carry_in(15), Ct_Cursor));
    703             carryQ.cq[16] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, carryQ.get_carry_in(16), Ct_Cursor));
     691            carryQ.cq[15] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_not(DoubleHyphen), carryQ.get_carry_in(15), tempvar5));
     692            carryQ.cq[16] = bitblock::srli<127>(pablo_blk_Advance(tempvar5, carryQ.get_carry_in(16), Ct_Cursor));
    704693            Ct_error = simd_andc(Ct_Cursor, lex.RAngle);
    705694            errors.IllegalSequenceInComment = simd_or(errors.IllegalSequenceInComment, Ct_error);
     
    710699          CtCDPI_Closer = simd_or(simd_or(PI_Cursor, CD_Cursor), Ct_Cursor);
    711700          marker.CtCDPI_closers = simd_or(marker.CtCDPI_closers, CtCDPI_Closer);
    712           carryQ.cq[17] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, carryQ.get_carry_in(17), tempvar5));
    713           ctCDPI_mask = simd_or(ctCDPI_mask, tempvar5);
     701          carryQ.cq[17] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, carryQ.get_carry_in(17), tempvar6));
     702          ctCDPI_mask = simd_or(ctCDPI_mask, tempvar6);
    714703          if (bitblock::any(simd<1>::constant<0>())) {
    715             if (bitblock::any(simd<1>::constant<0>())) {
    716               errors.UnterminatedPI = simd_or(errors.UnterminatedPI, PI_Cursor);
    717             }
    718             if (bitblock::any(simd<1>::constant<0>())) {
    719               errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, CD_Cursor);
    720             }
    721             if (bitblock::any(simd<1>::constant<0>())) {
    722               errors.UnterminatedComment = simd_or(errors.UnterminatedComment, Ct_Cursor);
    723             }
     704            errors.UnterminatedPI = simd_or(errors.UnterminatedPI, simd<1>::constant<0>());
     705            errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, simd<1>::constant<0>());
     706            Ct_Cursor = simd_or(Ct_Cursor, simd_andc(ctCDPI_mask, simd_or(PI_Cursor, CD_Cursor)));
     707            errors.UnterminatedComment = simd_or(errors.UnterminatedComment, simd<1>::constant<0>());
    724708          }
    725709          callouts.StringEnds = simd_or(callouts.StringEnds, CtCDPI_Closer);
     
    768752              DoubleHyphen = simd_and(simd_and(v1, w1), lex.Hyphen);
    769753              subcarryQ.cq[9] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, simd<1>::constant<0>(), tempvar4));
    770               subcarryQ.cq[10] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_not(DoubleHyphen), simd<1>::constant<0>(), Ct_Cursor));
    771               subcarryQ.cq[11] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, simd<1>::constant<0>(), Ct_Cursor));
     754              subcarryQ.cq[10] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_not(DoubleHyphen), simd<1>::constant<0>(), tempvar5));
     755              subcarryQ.cq[11] = bitblock::srli<127>(pablo_blk_Advance(tempvar5, simd<1>::constant<0>(), Ct_Cursor));
    772756              Ct_error = simd_andc(Ct_Cursor, lex.RAngle);
    773757              errors.IllegalSequenceInComment = simd_or(errors.IllegalSequenceInComment, Ct_error);
     
    778762            CtCDPI_Closer = simd_or(simd_or(PI_Cursor, CD_Cursor), Ct_Cursor);
    779763            marker.CtCDPI_closers = simd_or(marker.CtCDPI_closers, CtCDPI_Closer);
    780             subcarryQ.cq[12] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, simd<1>::constant<0>(), tempvar5));
    781             ctCDPI_mask = simd_or(ctCDPI_mask, tempvar5);
     764            subcarryQ.cq[12] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, simd<1>::constant<0>(), tempvar6));
     765            ctCDPI_mask = simd_or(ctCDPI_mask, tempvar6);
    782766            if (bitblock::any(simd<1>::constant<0>())) {
    783               if (bitblock::any(simd<1>::constant<0>())) {
    784                 errors.UnterminatedPI = simd_or(errors.UnterminatedPI, PI_Cursor);
    785               }
    786               if (bitblock::any(simd<1>::constant<0>())) {
    787                 errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, CD_Cursor);
    788               }
    789               if (bitblock::any(simd<1>::constant<0>())) {
    790                 errors.UnterminatedComment = simd_or(errors.UnterminatedComment, Ct_Cursor);
    791               }
     767              errors.UnterminatedPI = simd_or(errors.UnterminatedPI, simd<1>::constant<0>());
     768              errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, simd<1>::constant<0>());
     769              Ct_Cursor = simd_or(Ct_Cursor, simd_andc(ctCDPI_mask, simd_or(PI_Cursor, CD_Cursor)));
     770              errors.UnterminatedComment = simd_or(errors.UnterminatedComment, simd<1>::constant<0>());
    792771            }
    793772            callouts.StringEnds = simd_or(callouts.StringEnds, CtCDPI_Closer);
     
    813792                BitBlock CtCDPI_Closer;
    814793
    815                 BitBlock tempvar0, tempvar1, tempvar2, tempvar3, tempvar4, tempvar5;
    816 
    817 
    818         errors.ExpectedCommentOrCDATA = simd<1>::constant<0>();
    819         errors.PINameExpected = simd<1>::constant<0>();
    820         errors.IllegalSequenceInComment = simd<1>::constant<0>();
    821         errors.UnterminatedPI = simd<1>::constant<0>();
    822         errors.UnterminatedCDATASection = simd<1>::constant<0>();
    823         errors.UnterminatedComment = simd<1>::constant<0>();
     794                BitBlock tempvar0, tempvar1, tempvar2, tempvar3, tempvar4, tempvar5, tempvar6;
     795
     796
    824797        marker.PI_openers = simd<1>::constant<0>();
    825798        marker.CtCD_openers = simd<1>::constant<0>();
     
    887860            DoubleHyphen = simd_and(simd_and(v1, w1), lex.Hyphen);
    888861            carryQ.cq[14] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, carryQ.get_carry_in(14), tempvar4));
    889             carryQ.cq[15] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_andc(EOF_mask, DoubleHyphen), carryQ.get_carry_in(15), Ct_Cursor));
    890             carryQ.cq[16] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, carryQ.get_carry_in(16), Ct_Cursor));
     862            carryQ.cq[15] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_andc(EOF_mask, DoubleHyphen), carryQ.get_carry_in(15), tempvar5));
     863            carryQ.cq[16] = bitblock::srli<127>(pablo_blk_Advance(tempvar5, carryQ.get_carry_in(16), Ct_Cursor));
    891864            Ct_error = simd_andc(Ct_Cursor, lex.RAngle);
    892             errors.IllegalSequenceInComment = simd_or(errors.IllegalSequenceInComment, Ct_error);
     865            errors.IllegalSequenceInComment = simd_or(errors.IllegalSequenceInComment, simd_and(Ct_error, EOF_mask));
    893866          }
    894867          else {
     
    897870          CtCDPI_Closer = simd_or(simd_or(PI_Cursor, CD_Cursor), Ct_Cursor);
    898871          marker.CtCDPI_closers = simd_or(marker.CtCDPI_closers, CtCDPI_Closer);
    899           carryQ.cq[17] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, carryQ.get_carry_in(17), tempvar5));
    900           ctCDPI_mask = simd_or(ctCDPI_mask, tempvar5);
    901           if (bitblock::any(simd_and(simd_andc(ctCDPI_mask, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    902             if (bitblock::any(simd_and(simd_andc(PI_Cursor, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    903               errors.UnterminatedPI = simd_or(errors.UnterminatedPI, PI_Cursor);
    904             }
    905             if (bitblock::any(simd_and(simd_andc(CD_Cursor, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    906               errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, CD_Cursor);
    907             }
    908             if (bitblock::any(simd_and(simd_andc(Ct_Cursor, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    909               errors.UnterminatedComment = simd_or(errors.UnterminatedComment, Ct_Cursor);
    910             }
     872          carryQ.cq[17] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, carryQ.get_carry_in(17), tempvar6));
     873          ctCDPI_mask = simd_or(ctCDPI_mask, tempvar6);
     874          if (bitblock::any(simd_andc(simd_andc(ctCDPI_mask, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))))) {
     875            errors.UnterminatedPI = simd_or(errors.UnterminatedPI, simd_andc(simd_andc(PI_Cursor, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))));
     876            errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, simd_andc(simd_andc(CD_Cursor, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))));
     877            Ct_Cursor = simd_or(Ct_Cursor, simd_andc(ctCDPI_mask, simd_or(PI_Cursor, CD_Cursor)));
     878            errors.UnterminatedComment = simd_or(errors.UnterminatedComment, simd_andc(simd_andc(Ct_Cursor, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))));
    911879          }
    912880          callouts.StringEnds = simd_or(callouts.StringEnds, CtCDPI_Closer);
     
    955923              DoubleHyphen = simd_and(simd_and(v1, w1), lex.Hyphen);
    956924              subcarryQ.cq[9] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, simd<1>::constant<0>(), tempvar4));
    957               subcarryQ.cq[10] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_andc(EOF_mask, DoubleHyphen), simd<1>::constant<0>(), Ct_Cursor));
    958               subcarryQ.cq[11] = bitblock::srli<127>(pablo_blk_Advance(Ct_Cursor, simd<1>::constant<0>(), Ct_Cursor));
     925              subcarryQ.cq[10] = bitblock::srli<127>(pablo_blk_AdvanceThenScanThru(tempvar4, simd_andc(EOF_mask, DoubleHyphen), simd<1>::constant<0>(), tempvar5));
     926              subcarryQ.cq[11] = bitblock::srli<127>(pablo_blk_Advance(tempvar5, simd<1>::constant<0>(), Ct_Cursor));
    959927              Ct_error = simd_andc(Ct_Cursor, lex.RAngle);
    960               errors.IllegalSequenceInComment = simd_or(errors.IllegalSequenceInComment, Ct_error);
     928              errors.IllegalSequenceInComment = simd_or(errors.IllegalSequenceInComment, simd_and(Ct_error, EOF_mask));
    961929            }
    962930            else {
     
    965933            CtCDPI_Closer = simd_or(simd_or(PI_Cursor, CD_Cursor), Ct_Cursor);
    966934            marker.CtCDPI_closers = simd_or(marker.CtCDPI_closers, CtCDPI_Closer);
    967             subcarryQ.cq[12] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, simd<1>::constant<0>(), tempvar5));
    968             ctCDPI_mask = simd_or(ctCDPI_mask, tempvar5);
    969             if (bitblock::any(simd_and(simd_andc(ctCDPI_mask, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    970               if (bitblock::any(simd_and(simd_andc(PI_Cursor, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    971                 errors.UnterminatedPI = simd_or(errors.UnterminatedPI, PI_Cursor);
    972               }
    973               if (bitblock::any(simd_and(simd_andc(CD_Cursor, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    974                 errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, CD_Cursor);
    975               }
    976               if (bitblock::any(simd_and(simd_andc(Ct_Cursor, EOF_mask), bitblock::slli<1>(EOF_mask)))) {
    977                 errors.UnterminatedComment = simd_or(errors.UnterminatedComment, Ct_Cursor);
    978               }
     935            subcarryQ.cq[12] = bitblock::srli<127>(pablo_blk_InclusiveSpan(CtCDPI_Cursor, CtCDPI_Closer, simd<1>::constant<0>(), tempvar6));
     936            ctCDPI_mask = simd_or(ctCDPI_mask, tempvar6);
     937            if (bitblock::any(simd_andc(simd_andc(ctCDPI_mask, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))))) {
     938              errors.UnterminatedPI = simd_or(errors.UnterminatedPI, simd_andc(simd_andc(PI_Cursor, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))));
     939              errors.UnterminatedCDATASection = simd_or(errors.UnterminatedCDATASection, simd_andc(simd_andc(CD_Cursor, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))));
     940              Ct_Cursor = simd_or(Ct_Cursor, simd_andc(ctCDPI_mask, simd_or(PI_Cursor, CD_Cursor)));
     941              errors.UnterminatedComment = simd_or(errors.UnterminatedComment, simd_andc(simd_andc(Ct_Cursor, EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))));
    979942            }
    980943            callouts.StringEnds = simd_or(callouts.StringEnds, CtCDPI_Closer);
     
    10371000        AllTagMarks = simd_or(simd_or(simd_or(marker.StartTag_closers, marker.EmptyTag_closers), marker.EndTag_marks), marker.AttEq_marks);
    10381001        TransitionMarks = simd_or(CtCDPI_openers, AllTagMarks);
    1039         callouts.StringEnds = simd_or(callouts.StringEnds, simd_and(simd_andc(simd_not(simd<1>::constant<0>()), EOF_mask), bitblock::slli<1>(EOF_mask)));
     1002        callouts.StringEnds = simd_or(callouts.StringEnds, simd_andc(simd_andc(simd_not(simd<1>::constant<0>()), EOF_mask), bitblock::slli<1>(simd_not(EOF_mask))));
    10401003        callouts.delmask = simd_or(callouts.delmask, simd_andc(simd_or(simd_not(simd_and(simd_not(simd<1>::constant<0>()), EOF_mask)), marker.TagSpan), simd_or(simd_or(callouts.AttValSpan, callouts.StringEnds), TransitionMarks)));
    10411004        zeromask = simd_or(simd_or(TransitionMarks, callouts.StringEnds), callouts.delmask);
     
    10821045  BitBlock scope44;
    10831046  BitBlock surrogate;
    1084   BitBlock delmask;
    10851047};
    10861048
     
    10971059                BitBlock temp50, temp51, temp52, temp53, temp54, temp55, xC2, temp56, x80_x9F;
    10981060                BitBlock temp57, temp58, x85, temp59, xE2, x80, temp60, temp61, xA8, xEF;
    1099                 BitBlock temp62, xBF, xBE, xEF_scope, EF_BF_pending, u8_FFFE_FFFF, xC2_scope1;
     1061                BitBlock temp62, xBF, xBE, xEF_scope, EF_BF_pending, FFFE_FFFF, xC2_scope1;
    11001062                BitBlock lexError;
    11011063
     
    11961158        carryQ.cq[0] = bitblock::srli<127>(pablo_blk_Advance(xEF, carryQ.get_carry_in(0), xEF_scope));
    11971159        carryQ.cq[1] = bitblock::srli<127>(pablo_blk_Advance(simd_and(xEF_scope, xBF), carryQ.get_carry_in(1), EF_BF_pending));
    1198         u8_FFFE_FFFF = simd_and(EF_BF_pending, simd_or(xBE, xBF));
     1160        FFFE_FFFF = simd_and(EF_BF_pending, simd_or(xBE, xBF));
    11991161        carryQ.cq[2] = bitblock::srli<127>(pablo_blk_Advance(xE2, carryQ.get_carry_in(2), tempvar0));
    12001162        carryQ.cq[3] = bitblock::srli<127>(pablo_blk_Advance(simd_and(tempvar0, x80), carryQ.get_carry_in(3), tempvar1));
     
    12021164        carryQ.cq[4] = bitblock::srli<127>(pablo_blk_Advance(xC2, carryQ.get_carry_in(4), xC2_scope1));
    12031165        lex.NEL = simd_and(xC2_scope1, x85);
    1204         lexError = simd_or(simd_andc(x00_x1F, lex.WS), u8_FFFE_FFFF);
     1166        lexError = FFFE_FFFF;
     1167        lexError = simd_or(lexError, simd_andc(x00_x1F, lex.WS));
    12051168        if (bitblock::any(parameters.XML_11)) {
    1206           lexError = simd_or(lexError, simd_andc(simd_and(xC2_scope1, x80_x9F), lex.NEL));
    1207           lexError = simd_or(lexError, lex.DEL);
    1208           lex.WS = simd_or(lex.WS, simd_or(lex.LS, lex.NEL));
     1169          lexError = simd_or(lexError, simd_or(simd_andc(simd_and(xC2_scope1, x80_x9F), lex.NEL), lex.DEL));
    12091170        }
    12101171        errors.Lexical = lexError;
     
    12201181                BitBlock temp50, temp51, temp52, temp53, temp54, temp55, xC2, temp56, x80_x9F;
    12211182                BitBlock temp57, temp58, x85, temp59, xE2, x80, temp60, temp61, xA8, xEF;
    1222                 BitBlock temp62, xBF, xBE, xEF_scope, EF_BF_pending, u8_FFFE_FFFF, xC2_scope1;
     1183                BitBlock temp62, xBF, xBE, xEF_scope, EF_BF_pending, FFFE_FFFF, xC2_scope1;
    12231184                BitBlock lexError;
    12241185
     
    13191280        carryQ.cq[0] = bitblock::srli<127>(pablo_blk_Advance(xEF, carryQ.get_carry_in(0), xEF_scope));
    13201281        carryQ.cq[1] = bitblock::srli<127>(pablo_blk_Advance(simd_and(xEF_scope, xBF), carryQ.get_carry_in(1), EF_BF_pending));
    1321         u8_FFFE_FFFF = simd_and(EF_BF_pending, simd_or(xBE, xBF));
     1282        FFFE_FFFF = simd_and(EF_BF_pending, simd_or(xBE, xBF));
    13221283        carryQ.cq[2] = bitblock::srli<127>(pablo_blk_Advance(xE2, carryQ.get_carry_in(2), tempvar0));
    13231284        carryQ.cq[3] = bitblock::srli<127>(pablo_blk_Advance(simd_and(tempvar0, x80), carryQ.get_carry_in(3), tempvar1));
     
    13251286        carryQ.cq[4] = bitblock::srli<127>(pablo_blk_Advance(xC2, carryQ.get_carry_in(4), xC2_scope1));
    13261287        lex.NEL = simd_and(xC2_scope1, x85);
    1327         lexError = simd_or(simd_andc(x00_x1F, lex.WS), u8_FFFE_FFFF);
     1288        lexError = FFFE_FFFF;
     1289        lexError = simd_or(lexError, simd_andc(x00_x1F, lex.WS));
    13281290        if (bitblock::any(parameters.XML_11)) {
    1329           lexError = simd_or(lexError, simd_andc(simd_and(xC2_scope1, x80_x9F), lex.NEL));
    1330           lexError = simd_or(lexError, lex.DEL);
    1331           lex.WS = simd_or(lex.WS, simd_or(lex.LS, lex.NEL));
     1291          lexError = simd_or(lexError, simd_or(simd_andc(simd_and(xC2_scope1, x80_x9F), lex.NEL), lex.DEL));
    13321292        }
    13331293        errors.Lexical = simd_and(lexError, EOF_mask);
     
    13361296  };
    13371297
    1338   struct Utf8_to_utf16 {
    1339   Utf8_to_utf16() {
     1298  struct Normalize_LF {
     1299  Normalize_LF() {
     1300 }
     1301  IDISA_ALWAYS_INLINE void do_block(Parameters & parameters, Lex & lex, U16hi & u16hi, U16lo & u16lo, Callouts & callouts, Marker & marker) {
     1302                BitBlock CRLF, CR_scope1, CR_scope2, CRNEL;
     1303
     1304                BitBlock tempvar0;
     1305
     1306
     1307        if ((bitblock::any(lex.CR) || carryQ.CarryTest(0, 1))) {
     1308          u16lo.bit_5 = simd_xor(u16lo.bit_5, lex.CR);
     1309          u16lo.bit_6 = simd_xor(u16lo.bit_6, lex.CR);
     1310          u16lo.bit_7 = simd_xor(u16lo.bit_7, lex.CR);
     1311          carryQ.cq[0] = bitblock::srli<127>(pablo_blk_Advance(lex.CR, carryQ.get_carry_in(0), tempvar0));
     1312          CRLF = simd_and(tempvar0, lex.LF);
     1313          callouts.delmask = simd_or(callouts.delmask, CRLF);
     1314          lex.LF = simd_or(lex.LF, lex.CR);
     1315          lex.LF = simd_xor(lex.LF, CRLF);
     1316          callouts.skipmask = simd_or(callouts.skipmask, CRLF);
     1317        }
     1318        else {
     1319          carryQ.CarryDequeueEnqueue(0, 1);
     1320        }
     1321        if ((bitblock::any(parameters.XML_11) || carryQ.CarryTest(1, 2))) {
     1322          if ((bitblock::any(lex.NEL) || carryQ.CarryTest(1, 2))) {
     1323            u16lo.bit_0 = simd_xor(u16lo.bit_0, lex.NEL);
     1324            u16lo.bit_4 = simd_or(u16lo.bit_4, lex.NEL);
     1325            u16lo.bit_5 = simd_xor(u16lo.bit_5, lex.NEL);
     1326            u16lo.bit_6 = simd_or(u16lo.bit_6, lex.NEL);
     1327            u16lo.bit_7 = simd_xor(u16lo.bit_7, lex.NEL);
     1328            lex.LF = simd_or(lex.LF, lex.NEL);
     1329            if ((bitblock::any(lex.CR) || carryQ.CarryTest(1, 2))) {
     1330              carryQ.cq[1] = bitblock::srli<127>(pablo_blk_Advance(lex.CR, carryQ.get_carry_in(1), CR_scope1));
     1331              carryQ.cq[2] = bitblock::srli<127>(pablo_blk_Advance(CR_scope1, carryQ.get_carry_in(2), CR_scope2));
     1332              CRNEL = simd_and(CR_scope2, lex.NEL);
     1333              callouts.delmask = simd_or(callouts.delmask, CRNEL);
     1334              lex.LF = simd_xor(lex.LF, CRNEL);
     1335              callouts.skipmask = simd_or(callouts.skipmask, CRNEL);
     1336            }
     1337            else {
     1338              carryQ.CarryDequeueEnqueue(1, 2);
     1339            }
     1340          }
     1341          else {
     1342            carryQ.CarryDequeueEnqueue(1, 2);
     1343          }
     1344          if (bitblock::any(lex.LS)) {
     1345            u16hi.bit_2 = simd_xor(u16hi.bit_2, lex.LS);
     1346            u16lo.bit_2 = simd_xor(u16lo.bit_2, lex.LS);
     1347            u16lo.bit_6 = simd_xor(u16lo.bit_6, lex.LS);
     1348            lex.LF = simd_or(lex.LF, lex.LS);
     1349          }
     1350          lex.WS = simd_or(lex.WS, lex.LF);
     1351          lex.NameScan = simd_andc(lex.NameScan, lex.LF);
     1352        }
     1353        else {
     1354          carryQ.CarryDequeueEnqueue(1, 2);
     1355        }
     1356        carryQ.CarryQ_Adjust(3);
     1357  }
     1358  IDISA_ALWAYS_INLINE void do_final_block(Parameters & parameters, Lex & lex, U16hi & u16hi, U16lo & u16lo, Callouts & callouts, Marker & marker, BitBlock EOF_mask) {
     1359                BitBlock CRLF, CR_scope1, CR_scope2, CRNEL;
     1360
     1361                BitBlock tempvar0;
     1362
     1363
     1364        if ((bitblock::any(lex.CR) || carryQ.CarryTest(0, 1))) {
     1365          u16lo.bit_5 = simd_xor(u16lo.bit_5, lex.CR);
     1366          u16lo.bit_6 = simd_xor(u16lo.bit_6, lex.CR);
     1367          u16lo.bit_7 = simd_xor(u16lo.bit_7, lex.CR);
     1368          carryQ.cq[0] = bitblock::srli<127>(pablo_blk_Advance(lex.CR, carryQ.get_carry_in(0), tempvar0));
     1369          CRLF = simd_and(tempvar0, lex.LF);
     1370          callouts.delmask = simd_or(callouts.delmask, CRLF);
     1371          lex.LF = simd_or(lex.LF, lex.CR);
     1372          lex.LF = simd_xor(lex.LF, CRLF);
     1373          callouts.skipmask = simd_or(callouts.skipmask, CRLF);
     1374        }
     1375        else {
     1376          carryQ.CarryDequeueEnqueue(0, 1);
     1377        }
     1378        if ((bitblock::any(parameters.XML_11) || carryQ.CarryTest(1, 2))) {
     1379          if ((bitblock::any(lex.NEL) || carryQ.CarryTest(1, 2))) {
     1380            u16lo.bit_0 = simd_xor(u16lo.bit_0, lex.NEL);
     1381            u16lo.bit_4 = simd_or(u16lo.bit_4, lex.NEL);
     1382            u16lo.bit_5 = simd_xor(u16lo.bit_5, lex.NEL);
     1383            u16lo.bit_6 = simd_or(u16lo.bit_6, lex.NEL);
     1384            u16lo.bit_7 = simd_xor(u16lo.bit_7, lex.NEL);
     1385            lex.LF = simd_or(lex.LF, lex.NEL);
     1386            if ((bitblock::any(lex.CR) || carryQ.CarryTest(1, 2))) {
     1387              carryQ.cq[1] = bitblock::srli<127>(pablo_blk_Advance(lex.CR, carryQ.get_carry_in(1), CR_scope1));
     1388              carryQ.cq[2] = bitblock::srli<127>(pablo_blk_Advance(CR_scope1, carryQ.get_carry_in(2), CR_scope2));
     1389              CRNEL = simd_and(CR_scope2, lex.NEL);
     1390              callouts.delmask = simd_or(callouts.delmask, CRNEL);
     1391              lex.LF = simd_xor(lex.LF, CRNEL);
     1392              callouts.skipmask = simd_or(callouts.skipmask, CRNEL);
     1393            }
     1394            else {
     1395              carryQ.CarryDequeueEnqueue(1, 2);
     1396            }
     1397          }
     1398          else {
     1399            carryQ.CarryDequeueEnqueue(1, 2);
     1400          }
     1401          if (bitblock::any(lex.LS)) {
     1402            u16hi.bit_2 = simd_xor(u16hi.bit_2, lex.LS);
     1403            u16lo.bit_2 = simd_xor(u16lo.bit_2, lex.LS);
     1404            u16lo.bit_6 = simd_xor(u16lo.bit_6, lex.LS);
     1405            lex.LF = simd_or(lex.LF, lex.LS);
     1406          }
     1407          lex.WS = simd_or(lex.WS, lex.LF);
     1408          lex.NameScan = simd_andc(lex.NameScan, lex.LF);
     1409        }
     1410        else {
     1411          carryQ.CarryDequeueEnqueue(1, 2);
     1412        }
     1413  }
     1414  CarryArray<3, 0> carryQ;
     1415  };
     1416
     1417  struct Transcode_Utf8_To_utf16 {
     1418  Transcode_Utf8_To_utf16() {
    13401419 }
    13411420  IDISA_ALWAYS_INLINE void do_block(Basis_bits & basis_bits, U8 & u8, U16hi & u16hi, U16lo & u16lo, Callouts & callouts, Errors & errors) {
    13421421                BitBlock u8anyscope, temp1, temp2, temp3, temp4, temp5, temp6, badprefix2;
    13431422                BitBlock u8Error, adv_bit3, adv_bit4, adv_bit5, adv_bit6, adv_bit7;
    1344                 BitBlock u8lastscope, u8lastbyte, temp7, temp8, xE0, temp9, temp10, temp11;
    1345                 BitBlock xED, xA0_xBF, x80_x9F, adv_bit2, advadv_bit4, advadv_bit5;
     1423                BitBlock u8lastscope, u8lastbyte, u8delmask, temp7, temp8, xE0, temp9, temp10;
     1424                BitBlock temp11, xED, xA0_xBF, x80_x9F, adv_bit2, advadv_bit4, advadv_bit5;
    13461425                BitBlock advadv_bit6, advadv_bit7, temp12, temp13, badprefix4, xF0, temp14;
    13471426                BitBlock temp15, xF4, x90_xBF, x80_x8F, s43lo1, s43lo0, s43borrow1, s43hi7;
     
    13521431
    13531432        u8.unibyte = simd<1>::constant<0>();
     1433        u8.scope33 = simd<1>::constant<0>();
    13541434        u8.surrogate = simd<1>::constant<0>();
    1355         u8.scope33 = simd<1>::constant<0>();
    13561435        errors.Unicode = simd<1>::constant<0>();
    13571436        if ((bitblock::any(basis_bits.bit_0) || carryQ.CarryTest(0, 20))) {
     
    13651444          u8.prefix4 = simd_and(u8.prefix, temp2);
    13661445          u8.suffix = simd_andc(basis_bits.bit_0, basis_bits.bit_1);
    1367           u8.delmask = simd<1>::constant<0>();
    13681446          temp3 = simd_or(basis_bits.bit_2, basis_bits.bit_3);
    13691447          temp4 = simd_andc(u8.prefix, temp3);
     
    13811459          u8anyscope = u8.scope22;
    13821460          u8lastbyte = simd_or(u8.unibyte, u8lastscope);
    1383           u8.delmask = u8.prefix;
     1461          u8delmask = u8.prefix;
    13841462          if ((bitblock::any(simd_or(u8.prefix3, u8.prefix4)) || carryQ.CarryTest(6, 14))) {
    13851463            temp7 = simd_or(basis_bits.bit_6, basis_bits.bit_7);
     
    14061484            u8anyscope = simd_or(u8lastscope, u8.scope32);
    14071485            u8lastbyte = simd_or(u8.unibyte, u8lastscope);
    1408             u8.delmask = simd_or(u8.delmask, u8.scope32);
     1486            u8delmask = simd_or(u8delmask, u8.scope32);
    14091487            if ((bitblock::any(u8.prefix4) || carryQ.CarryTest(15, 5))) {
    14101488              temp12 = simd_and(basis_bits.bit_5, temp7);
     
    14291507              u8anyscope = simd_or(simd_or(simd_or(u8lastscope, u8.scope32), u8.scope42), u8.scope43);
    14301508              u8lastbyte = simd_or(u8.unibyte, u8lastscope);
    1431               u8.delmask = simd_or(u8.delmask, u8.scope42);
     1509              u8delmask = simd_or(u8delmask, u8.scope42);
     1510              u16lo.bit_0 = simd_and(u8lastscope, adv_bit6);
     1511              u16lo.bit_1 = simd_or(simd_and(u8.unibyte, basis_bits.bit_1), simd_and(u8lastscope, adv_bit7));
    14321512              s43lo1 = simd_not(adv_bit3);
    14331513              u16lo.bit_1 = simd_or(u16lo.bit_1, simd_and(u8.scope43, s43lo1));
     
    14461526              u16hi.bit_6 = simd_or(simd_and(u8lastscope, adv_bit4), simd_and(u8.scope43, s43hi6));
    14471527              u16hi.bit_7 = simd_or(simd_and(u8lastscope, adv_bit5), simd_and(u8.scope43, s43hi7));
    1448               u16lo.bit_0 = simd_and(u8lastscope, adv_bit6);
    1449               u16lo.bit_1 = simd_or(simd_and(u8.unibyte, basis_bits.bit_1), simd_and(u8lastscope, adv_bit7));
    14501528              u16lo.bit_2 = simd_or(simd_and(u8lastbyte, basis_bits.bit_2), simd_and(u8.scope43, adv_bit4));
    14511529              u16lo.bit_3 = simd_or(simd_and(u8lastbyte, basis_bits.bit_3), simd_and(u8.scope43, adv_bit5));
     
    14951573          }
    14961574          u8Error = simd_or(u8Error, simd_xor(u8anyscope, u8.suffix));
    1497           callouts.delmask = simd_or(callouts.delmask, u8.delmask);
    1498           callouts.skipmask = u8.delmask;
     1575          callouts.delmask = u8delmask;
     1576          callouts.skipmask = u8delmask;
    14991577          errors.Unicode = u8Error;
    15001578        }
     
    15161594          u16hi.bit_6 = simd<1>::constant<0>();
    15171595          u16hi.bit_7 = simd<1>::constant<0>();
     1596          callouts.delmask = simd<1>::constant<0>();
     1597          callouts.skipmask = simd<1>::constant<0>();
    15181598          carryQ.CarryDequeueEnqueue(0, 20);
    15191599        }
     
    15231603                BitBlock u8anyscope, temp1, temp2, temp3, temp4, temp5, temp6, badprefix2;
    15241604                BitBlock u8Error, adv_bit3, adv_bit4, adv_bit5, adv_bit6, adv_bit7;
    1525                 BitBlock u8lastscope, u8lastbyte, temp7, temp8, xE0, temp9, temp10, temp11;
    1526                 BitBlock xED, xA0_xBF, x80_x9F, adv_bit2, advadv_bit4, advadv_bit5;
     1605                BitBlock u8lastscope, u8lastbyte, u8delmask, temp7, temp8, xE0, temp9, temp10;
     1606                BitBlock temp11, xED, xA0_xBF, x80_x9F, adv_bit2, advadv_bit4, advadv_bit5;
    15271607                BitBlock advadv_bit6, advadv_bit7, temp12, temp13, badprefix4, xF0, temp14;
    15281608                BitBlock temp15, xF4, x90_xBF, x80_x8F, s43lo1, s43lo0, s43borrow1, s43hi7;
     
    15331613
    15341614        u8.unibyte = simd<1>::constant<0>();
     1615        u8.scope33 = simd<1>::constant<0>();
    15351616        u8.surrogate = simd<1>::constant<0>();
    1536         u8.scope33 = simd<1>::constant<0>();
    15371617        errors.Unicode = simd<1>::constant<0>();
    15381618        if ((bitblock::any(basis_bits.bit_0) || carryQ.CarryTest(0, 20))) {
     
    15461626          u8.prefix4 = simd_and(u8.prefix, temp2);
    15471627          u8.suffix = simd_andc(basis_bits.bit_0, basis_bits.bit_1);
    1548           u8.delmask = simd<1>::constant<0>();
    15491628          temp3 = simd_or(basis_bits.bit_2, basis_bits.bit_3);
    15501629          temp4 = simd_andc(u8.prefix, temp3);
     
    15621641          u8anyscope = u8.scope22;
    15631642          u8lastbyte = simd_or(u8.unibyte, u8lastscope);
    1564           u8.delmask = u8.prefix;
     1643          u8delmask = u8.prefix;
    15651644          if ((bitblock::any(simd_or(u8.prefix3, u8.prefix4)) || carryQ.CarryTest(6, 14))) {
    15661645            temp7 = simd_or(basis_bits.bit_6, basis_bits.bit_7);
     
    15871666            u8anyscope = simd_or(u8lastscope, u8.scope32);
    15881667            u8lastbyte = simd_or(u8.unibyte, u8lastscope);
    1589             u8.delmask = simd_or(u8.delmask, u8.scope32);
     1668            u8delmask = simd_or(u8delmask, u8.scope32);
    15901669            if ((bitblock::any(u8.prefix4) || carryQ.CarryTest(15, 5))) {
    15911670              temp12 = simd_and(basis_bits.bit_5, temp7);
     
    16101689              u8anyscope = simd_or(simd_or(simd_or(u8lastscope, u8.scope32), u8.scope42), u8.scope43);
    16111690              u8lastbyte = simd_or(u8.unibyte, u8lastscope);
    1612               u8.delmask = simd_or(u8.delmask, u8.scope42);
     1691              u8delmask = simd_or(u8delmask, u8.scope42);
     1692              u16lo.bit_0 = simd_and(u8lastscope, adv_bit6);
     1693              u16lo.bit_1 = simd_or(simd_and(u8.unibyte, basis_bits.bit_1), simd_and(u8lastscope, adv_bit7));
    16131694              s43lo1 = simd_not(adv_bit3);
    16141695              u16lo.bit_1 = simd_or(u16lo.bit_1, simd_and(u8.scope43, s43lo1));
     
    16271708              u16hi.bit_6 = simd_or(simd_and(u8lastscope, adv_bit4), simd_and(u8.scope43, s43hi6));
    16281709              u16hi.bit_7 = simd_or(simd_and(u8lastscope, adv_bit5), simd_and(u8.scope43, s43hi7));
    1629               u16lo.bit_0 = simd_and(u8lastscope, adv_bit6);
    1630               u16lo.bit_1 = simd_or(simd_and(u8.unibyte, basis_bits.bit_1), simd_and(u8lastscope, adv_bit7));
    16311710              u16lo.bit_2 = simd_or(simd_and(u8lastbyte, basis_bits.bit_2), simd_and(u8.scope43, adv_bit4));
    16321711              u16lo.bit_3 = simd_or(simd_and(u8lastbyte, basis_bits.bit_3), simd_and(u8.scope43, adv_bit5));
     
    16761755          }
    16771756          u8Error = simd_or(u8Error, simd_xor(u8anyscope, u8.suffix));
    1678           callouts.delmask = simd_or(callouts.delmask, u8.delmask);
    1679           callouts.skipmask = u8.delmask;
     1757          callouts.delmask = u8delmask;
     1758          callouts.skipmask = u8delmask;
    16801759          errors.Unicode = simd_and(u8Error, EOF_mask);
    16811760        }
     
    16971776          u16hi.bit_6 = simd<1>::constant<0>();
    16981777          u16hi.bit_7 = simd<1>::constant<0>();
     1778          callouts.delmask = simd<1>::constant<0>();
     1779          callouts.skipmask = simd<1>::constant<0>();
    16991780          carryQ.CarryDequeueEnqueue(0, 20);
    17001781        }
     
    17031784  };
    17041785
    1705   struct Normalize_WS {
    1706   Normalize_WS() {
    1707  }
    1708   IDISA_ALWAYS_INLINE void do_block(Parameters & parameters, Lex & lex, U16hi & u16hi, U16lo & u16lo, Callouts & callouts, Marker & marker) {
    1709                 BitBlock CRLF, CR_scope1, CR_scope2, CRNEL, WS_in_AttVal;
    1710 
    1711                 BitBlock tempvar0;
    1712 
    1713 
    1714         if ((bitblock::any(lex.CR) || carryQ.CarryTest(0, 1))) {
    1715           u16lo.bit_5 = simd_xor(u16lo.bit_5, lex.CR);
    1716           u16lo.bit_6 = simd_xor(u16lo.bit_6, lex.CR);
    1717           u16lo.bit_7 = simd_xor(u16lo.bit_7, lex.CR);
    1718           carryQ.cq[0] = bitblock::srli<127>(pablo_blk_Advance(lex.CR, carryQ.get_carry_in(0), tempvar0));
    1719           CRLF = simd_and(tempvar0, lex.LF);
    1720           callouts.delmask = simd_or(callouts.delmask, CRLF);
    1721           lex.LF = simd_or(lex.LF, lex.CR);
    1722           lex.LF = simd_xor(lex.LF, CRLF);
    1723           callouts.skipmask = simd_or(callouts.skipmask, CRLF);
    1724         }
    1725         else {
    1726           carryQ.CarryDequeueEnqueue(0, 1);
    1727         }
    1728         if ((bitblock::any(parameters.XML_11) || carryQ.CarryTest(1, 2))) {
    1729           if ((bitblock::any(lex.NEL) || carryQ.CarryTest(1, 2))) {
    1730             u16lo.bit_0 = simd_xor(u16lo.bit_0, lex.NEL);
    1731             u16lo.bit_4 = simd_xor(u16lo.bit_4, lex.NEL);
    1732             u16lo.bit_5 = simd_xor(u16lo.bit_5, lex.NEL);
    1733             u16lo.bit_6 = simd_xor(u16lo.bit_6, lex.NEL);
    1734             u16lo.bit_7 = simd_xor(u16lo.bit_7, lex.NEL);
    1735             lex.LF = simd_or(lex.LF, lex.NEL);
    1736             if ((bitblock::any(lex.CR) || carryQ.CarryTest(1, 2))) {
    1737               carryQ.cq[1] = bitblock::srli<127>(pablo_blk_Advance(lex.CR, carryQ.get_carry_in(1), CR_scope1));
    1738               carryQ.cq[2] = bitblock::srli<127>(pablo_blk_Advance(CR_scope1, carryQ.get_carry_in(2), CR_scope2));
    1739               CRNEL = simd_and(CR_scope2, lex.NEL);
    1740               callouts.delmask = simd_or(callouts.delmask, CRNEL);
    1741               lex.LF = simd_xor(lex.LF, CRNEL);
    1742               callouts.skipmask = simd_or(callouts.skipmask, CRNEL);
    1743             }
    1744             else {
    1745               carryQ.CarryDequeueEnqueue(1, 2);
    1746             }
    1747           }
    1748           else {
    1749             carryQ.CarryDequeueEnqueue(1, 2);
    1750           }
    1751           if (bitblock::any(lex.LS)) {
    1752             u16hi.bit_2 = simd_xor(u16hi.bit_2, lex.LS);
    1753             u16lo.bit_2 = simd_xor(u16lo.bit_2, lex.LS);
    1754             u16lo.bit_6 = simd_xor(u16lo.bit_6, lex.LS);
    1755             lex.LF = simd_or(lex.LF, lex.LS);
    1756           }
    1757         }
    1758         else {
    1759           carryQ.CarryDequeueEnqueue(1, 2);
    1760         }
    1761         WS_in_AttVal = simd_and(lex.WS, callouts.AttValSpan);
    1762         if (bitblock::any(WS_in_AttVal)) {
    1763           u16lo.bit_2 = simd_or(u16lo.bit_2, WS_in_AttVal);
    1764           u16lo.bit_3 = simd_andc(u16lo.bit_3, WS_in_AttVal);
    1765           u16lo.bit_4 = simd_andc(u16lo.bit_4, WS_in_AttVal);
    1766           u16lo.bit_5 = simd_andc(u16lo.bit_5, WS_in_AttVal);
    1767           u16lo.bit_6 = simd_andc(u16lo.bit_6, WS_in_AttVal);
    1768           u16lo.bit_7 = simd_andc(u16lo.bit_7, WS_in_AttVal);
    1769         }
    1770         carryQ.CarryQ_Adjust(3);
    1771   }
    1772   IDISA_ALWAYS_INLINE void do_final_block(Parameters & parameters, Lex & lex, U16hi & u16hi, U16lo & u16lo, Callouts & callouts, Marker & marker, BitBlock EOF_mask) {
    1773                 BitBlock CRLF, CR_scope1, CR_scope2, CRNEL, WS_in_AttVal;
    1774 
    1775                 BitBlock tempvar0;
    1776 
    1777 
    1778         if ((bitblock::any(lex.CR) || carryQ.CarryTest(0, 1))) {
    1779           u16lo.bit_5 = simd_xor(u16lo.bit_5, lex.CR);
    1780           u16lo.bit_6 = simd_xor(u16lo.bit_6, lex.CR);
    1781           u16lo.bit_7 = simd_xor(u16lo.bit_7, lex.CR);
    1782           carryQ.cq[0] = bitblock::srli<127>(pablo_blk_Advance(lex.CR, carryQ.get_carry_in(0), tempvar0));
    1783           CRLF = simd_and(tempvar0, lex.LF);
    1784           callouts.delmask = simd_or(callouts.delmask, CRLF);
    1785           lex.LF = simd_or(lex.LF, lex.CR);
    1786           lex.LF = simd_xor(lex.LF, CRLF);
    1787           callouts.skipmask = simd_or(callouts.skipmask, CRLF);
    1788         }
    1789         else {
    1790           carryQ.CarryDequeueEnqueue(0, 1);
    1791         }
    1792         if ((bitblock::any(parameters.XML_11) || carryQ.CarryTest(1, 2))) {
    1793           if ((bitblock::any(lex.NEL) || carryQ.CarryTest(1, 2))) {
    1794             u16lo.bit_0 = simd_xor(u16lo.bit_0, lex.NEL);
    1795             u16lo.bit_4 = simd_xor(u16lo.bit_4, lex.NEL);
    1796             u16lo.bit_5 = simd_xor(u16lo.bit_5, lex.NEL);
    1797             u16lo.bit_6 = simd_xor(u16lo.bit_6, lex.NEL);
    1798             u16lo.bit_7 = simd_xor(u16lo.bit_7, lex.NEL);
    1799             lex.LF = simd_or(lex.LF, lex.NEL);
    1800             if ((bitblock::any(lex.CR) || carryQ.CarryTest(1, 2))) {
    1801               carryQ.cq[1] = bitblock::srli<127>(pablo_blk_Advance(lex.CR, carryQ.get_carry_in(1), CR_scope1));
    1802               carryQ.cq[2] = bitblock::srli<127>(pablo_blk_Advance(CR_scope1, carryQ.get_carry_in(2), CR_scope2));
    1803               CRNEL = simd_and(CR_scope2, lex.NEL);
    1804               callouts.delmask = simd_or(callouts.delmask, CRNEL);
    1805               lex.LF = simd_xor(lex.LF, CRNEL);
    1806               callouts.skipmask = simd_or(callouts.skipmask, CRNEL);
    1807             }
    1808             else {
    1809               carryQ.CarryDequeueEnqueue(1, 2);
    1810             }
    1811           }
    1812           else {
    1813             carryQ.CarryDequeueEnqueue(1, 2);
    1814           }
    1815           if (bitblock::any(lex.LS)) {
    1816             u16hi.bit_2 = simd_xor(u16hi.bit_2, lex.LS);
    1817             u16lo.bit_2 = simd_xor(u16lo.bit_2, lex.LS);
    1818             u16lo.bit_6 = simd_xor(u16lo.bit_6, lex.LS);
    1819             lex.LF = simd_or(lex.LF, lex.LS);
    1820           }
    1821         }
    1822         else {
    1823           carryQ.CarryDequeueEnqueue(1, 2);
    1824         }
    1825         WS_in_AttVal = simd_and(lex.WS, callouts.AttValSpan);
    1826         if (bitblock::any(WS_in_AttVal)) {
    1827           u16lo.bit_2 = simd_or(u16lo.bit_2, WS_in_AttVal);
    1828           u16lo.bit_3 = simd_andc(u16lo.bit_3, WS_in_AttVal);
    1829           u16lo.bit_4 = simd_andc(u16lo.bit_4, WS_in_AttVal);
    1830           u16lo.bit_5 = simd_andc(u16lo.bit_5, WS_in_AttVal);
    1831           u16lo.bit_6 = simd_andc(u16lo.bit_6, WS_in_AttVal);
    1832           u16lo.bit_7 = simd_andc(u16lo.bit_7, WS_in_AttVal);
    1833         }
    1834   }
    1835   CarryArray<3, 0> carryQ;
    1836   };
    1837 
    18381786
    18391787          Classify_bytes classify_bytes;
    1840   Utf8_to_utf16 utf8_to_utf16;
    1841   Normalize_WS normalize_WS;
     1788  Normalize_LF normalize_LF;
     1789  Transcode_Utf8_To_utf16 transcode_Utf8_To_utf16;
    18421790
    18431791
     
    19521900        #endif
    19531901
    1954         void checkErrors(const XMLUTF8CharacterSetAdapter::Errors & errors, const U8 & u8, const XMLByte * source, XMLLineColTracker & lineCol);
    1955 
    1956     void reportError(BitBlock fatalErrors, const XMLUTF8CharacterSetAdapter::Errors & errors, const U8 & u8, const XMLByte * source, XMLLineColTracker & lineCol);
     1902    void checkErrors(const U8 & u8, const XMLByte * source, XMLUTF8CharacterSetAdapter::Errors & errors);
     1903
     1904    void reportError(const BitBlock fatalErrors, const U8 & u8, const XMLByte * source, XMLUTF8CharacterSetAdapter::Errors & errors);
    19571905
    19581906        IDISA_ALWAYS_INLINE
  • icXML/icXML-devel/src/icxmlc/parsers/XMLDocumentAccumulator.hpp

    r3105 r3151  
    177177
    178178void XMLDocumentAccumulator::markStartTag()
    179 {   
     179{
    180180    fElementContextIdx = fDocumentContextIdx;
    181181    // advance the idx by 2 to make room for the type and potential attribute count. if there are no attributes,
     
    198198    fDocumentContextStream[fDocumentContextIdx++] = uriId;
    199199    fDocumentContextStream[fDocumentContextIdx++] = length;
    200 
    201 
    202200    fDocumentObjectStream[fDocumentObjectIdx++].name = name;
    203201    if (normalized)
    204202    {
    205         value = fContentAugmentStream.insert(value, length);       
     203        value = fContentAugmentStream.insert(value, length);
    206204    }
    207205    fDocumentObjectStream[fDocumentObjectIdx++].string = value;
  • icXML/icXML-devel/src/icxmlc/parsers/XMLDocumentDisseminator.hpp

    r3105 r3151  
    2323public:
    2424
    25     XMLDocumentDisseminator(XMLParser & parser, XMLDocumentHandler * documentHandler, MemoryManager * manager)
    26     : fDocumentContextStream(parser.fDocumentContextStream)
    27     , fDocumentObjectStream(parser.fDocumentObjectStream)
    28     , fDocumentContextIdx(parser.fDocumentContextIdx)
     25    XMLDocumentDisseminator(XMLParser & parser, XMLDocumentHandler * docHandler, MemoryManager * manager)
     26    : fDocumentContextIdx(parser.fDocumentContextIdx)
    2927    , fDocumentObjectIdx(parser.fDocumentObjectIdx)
    3028    , fDocumentObjectCount(parser.fDocumentObjectCount)
    31     , fContextStreamPtr(&parser.fContextStream[parser.fContextIdx])
    32     , fDocumentHandler(documentHandler)
     29    , fContextIdx(parser.fContextIdx)
     30    , fDocumentContextStream(parser.fDocumentContextStream)
     31    , fDocumentObjectStream(parser.fDocumentObjectStream)
     32    , fContextStream(parser.fContextStream)
     33    , fDocumentHandler(docHandler)
    3334    , fContextId(0)
    34     , fAttrList(parser.fAttrList)
     35    , fAttrList(16, true, manager)
    3536    {
    36         // resize the attribute list up front
    37         if (unlikely(parser.fMaxAttributeCount > fAttrList.size()))
    38         {
    39             size_t toAdd = parser.fMaxAttributeCount - fAttrList.size();
    40             do
    41             {
    42                 fAttrList.addElement(new (manager) XMLAttr(manager));
    43             }
    44             while (--toAdd);
    45         }
     37
    4638    }
    4739
     
    5042    IDISA_ALWAYS_INLINE
    5143    bool next();
     44
     45    IDISA_ALWAYS_INLINE
     46    void resizeAttributeArray(const XMLSize_t maxAttributeCount, MemoryManager * manager);
    5247
    5348private:
     
    6055    size_t &                            fDocumentObjectIdx;
    6156    size_t &                            fDocumentObjectCount;
    62     UriPtrType                          fContextStreamPtr;
    63     XMLDocumentHandler *                fDocumentHandler;
     57    size_t &                            fContextIdx;
     58    XMLDocumentHandler * const          fDocumentHandler;
    6459    gid_t                               fContextId;
    65 
    6660
    6761    DocumentContextStream &             fDocumentContextStream;
    6862    DocumentObjectStream &              fDocumentObjectStream;
    69     RefVectorOf<XMLAttr> &              fAttrList;
     63    SymbolUriArray &                                    fContextStream;
     64
     65    RefVectorOf<XMLAttr>                fAttrList;
    7066};
    7167
     
    7773    }
    7874    fDocumentObjectCount--;
     75
     76    assert (fDocumentHandler != 0);
    7977
    8078    MarkupType type = static_cast<MarkupType>(fDocumentContextStream[fDocumentContextIdx++]);
     
    113111                attr.set(name, value, length, attType);
    114112                attr.setSpecified(specified);
    115 
    116113            }
    117114            // falls through intentionally to build the start tag
     
    164161            }
    165162            break;
    166          default: UNREACHABLE;
     163         default:
     164            UNREACHABLE;
    167165    }
    168166
     
    170168}
    171169
    172 
    173170void XMLDocumentDisseminator::readContextId()
    174171{
    175172    // the namespace context id allows the scanner to map prefix to uris and determine what
    176173    // prefixes and uris are in scope.
    177     fDocumentHandler->fNamespaceContextId = *fContextStreamPtr++;
     174    fDocumentHandler->fNamespaceContextId = fContextStream[fContextIdx++];
    178175}
    179176
     177void XMLDocumentDisseminator::resizeAttributeArray(const XMLSize_t maxAttributeCount, MemoryManager * manager)
     178{
     179    // resize the attribute list up front
     180    if (unlikely(maxAttributeCount > fAttrList.size()))
     181    {
     182        size_t toAdd = maxAttributeCount - fAttrList.size();
     183        do
     184        {
     185            fAttrList.addElement(new (manager) XMLAttr(manager));
     186        }
     187        while (--toAdd);
     188    }
     189}
    180190
    181191XERCES_CPP_NAMESPACE_END
  • icXML/icXML-devel/src/icxmlc/parsers/XMLGrammarValidator.hpp

    r3105 r3151  
    5353    , fEndsWithPartialContent(parser.fEndsWithPartialContent)
    5454    , fInMarkup(parser.fInMarkup)
    55     , fEndOfElementScope(parser.fNoMore)
    5655    , fDoPSVI(scanner.getPSVIHandler())
    5756    , fCheckIdentityConstraint(scanner.toCheckIdentityConstraint())
     
    108107    , fEndsWithPartialContent(false)
    109108    , fInMarkup(false)
    110     , fEndOfElementScope(false)
    111109    , fDoPSVI(parser.fDoPSVI)
    112110    , fCheckIdentityConstraint(parser.fCheckIdentityConstraint)
     
    144142
    145143    IDISA_ALWAYS_INLINE
    146     bool validateMarkup();
     144    bool validateMarkup(bool & endOfElementScope);
    147145
    148146    bool validateEntityGrammar();
     
    202200    const bool                                  fEndsWithPartialContent;
    203201    bool                                        fInMarkup;
    204     bool                                        fEndOfElementScope;
    205202    const bool                                  fDoPSVI;
    206203    const bool                                  fCheckIdentityConstraint;
     
    232229    DEBUG_GRAMMAR_MESSAGE("######################################################################");
    233230
     231    bool endOfElementScope = false;
    234232    switch (fInMarkup)
    235233    {
     
    248246                        }                           
    249247                        fMarkupCount--;
    250                         if (validateMarkup()) break;                       
     248                        if (validateMarkup(endOfElementScope))
     249                        {
     250                            if (unlikely(endOfElementScope)) //  || fNoMore ??
     251                            {
     252                                fInMarkup = 0;
     253                                return 0;
     254                            }
     255                            break;
     256                        }
    251257                    }
    252258                    break;
    253259        default:    UNREACHABLE
    254     }
    255     fInMarkup &= !fEndOfElementScope;
    256     return !fEndOfElementScope;
     260    }   
     261    return 1;
    257262}
    258263
     
    300305
    301306template<class XMLScannerType>
    302 bool XMLGrammarValidator<XMLScannerType>::validateMarkup()
     307bool XMLGrammarValidator<XMLScannerType>::validateMarkup(bool & endOfElementScope)
    303308{
    304309    DEBUG_GRAMMAR_MESSAGE("----------------------------------------------------------------------");
     
    394399                fContentFlag[fScope] = 0;
    395400                ++fCursorPtr;
    396 
    397                 fEndOfElementScope = (isRoot & isEmpty);               
    398             }
    399             return fEndOfElementScope;
     401                endOfElementScope = (isRoot && isEmpty);
     402            }                   
     403            return endOfElementScope;
    400404        /// ------------------------------------------------------------------------ ///
    401405        case EndTag:
     
    404408                const XMLSymbol & element = getSymbolByGid(elementGid);
    405409                const gid_t uriId = *fUriPtr++;
     410                const bool isRoot = (fScope == 1);
    406411
    407412                DEBUG_GRAMMAR_MESSAGE
     
    414419
    415420                fElementIndex -= fChildren[fScope];
    416 
    417                 const bool isRoot = (fScope == 1);
    418421
    419422                fScanner.validateEndTag
     
    433436                fContextId = *fContextPtr++;
    434437                fScanner.setContextId(fContextId);
    435                 fLastContextId = fContextId;               
    436                 fEndOfElementScope = isRoot;
     438                fLastContextId = fContextId;                               
    437439                fScope--;
    438 
    439             }
    440             return fEndOfElementScope;
     440                endOfElementScope = isRoot;
     441            }           
     442            return endOfElementScope;
    441443        /// ------------------------------------------------------------------------ ///
    442444        case ProcessingInstruction:
     
    806808    validateEntityContent();
    807809
     810    bool endOfElementScope = false;
     811
    808812    for (;;)
    809813    {
    810         validateMarkup();
     814        validateMarkup(endOfElementScope);
    811815        if (--fMarkupCount == 0 || validateContent()) break;
    812816    }
  • icXML/icXML-devel/src/icxmlc/parsers/XMLNamespaceParser.hpp

    r3105 r3151  
    110110    }
    111111
    112     inline ~XMLNamespaceParser() {}
     112    inline ~XMLNamespaceParser()
     113    {
     114        DEBUG_NAMESPACE_MESSAGE("######################################################################");
     115        DEBUG_NAMESPACE_MESSAGE("END NAMESPACE PROCESSING: fContextIdCount=" << (fContextPtr - &fContextStream[0]));
     116        DEBUG_NAMESPACE_MESSAGE("######################################################################");
     117    }
    113118
    114119
     
    439444        if (unlikely(bindingError != XMLErrs::NoError))
    440445        {
    441             fScanner.emitError(bindingError, attribute.fQName->getRawName());
     446            fScanner.emitError(bindingError, attribute.getQName()->getRawName());
    442447        }
    443448
  • icXML/icXML-devel/src/icxmlc/parsers/XMLWellFormednessParser.hpp

    r3105 r3151  
    6363        fEndsWithPartialContent = false;
    6464        fUriCount = parser.fUriIdx + parser.fSymbolCount;
    65 
    66         DEBUG_MESSAGE("XMLWellFormednessParser::fStringCount=" << fStringCount << ", fSymbolIdx=" << fSymbolIdx << ", fSymbolCount=" << fSymbolCount)
    6765    }
    6866
    6967    inline ~XMLWellFormednessParser()
    7068    {
    71         DEBUG_MESSAGE("XMLWellFormednessParser::fStringCount'=" << fStringCount << ", fSymbolCount'=" << fSymbolCount)
     69
    7270    }
    7371
     
    8078private:
    8179
    82     template<XMLParser::DocumentStateType DocStateType, bool HasDTD, bool isEntity>
     80    template<XMLParser::DocumentStateType DocStateType, bool HasDTD, size_t CheckingEntity>
    8381    IDISA_ALWAYS_INLINE
    8482    void checkWellformedness(XMLDocumentAccumulator * const accumulator);
     
    228226
    229227template<class XMLScannerType>
    230 template<XMLParser::DocumentStateType DocStateType, bool HasDTD, bool isEntity>
     228template<XMLParser::DocumentStateType DocStateType, bool HasDTD, size_t CheckingEntity>
    231229void XMLWellFormednessParser<XMLScannerType>::checkWellformedness(XMLDocumentAccumulator * const accumulator)
    232230{
     
    265263                // wellformedness constraint: content can only exist between start and end tag pairs.
    266264                const XMLCh * const endOfContent = *fStringEndPtr++;
    267 
    268                 // *fStringEndPtr will be 0 if and only if this is a complete string
    269265                const size_t length = endOfContent - fCursorPtr;
    270                 if (unlikely(!XMLStringU::isWhitespace(fCursorPtr, length)))
     266                if (likely(*endOfContent == chNull && XMLStringU::isWhitespace(fCursorPtr, length)))
    271267                {
    272                     fScanner.emitError(XMLErrs::ExpectedCommentOrPI);
     268                    accumulator->writeIgnorableWhitespace(fCursorPtr, length);
    273269                }
    274270                else
    275271                {
    276                     accumulator->writeIgnorableWhitespace(fCursorPtr, length);
     272                    fScanner.emitError(XMLErrs::ExpectedCommentOrPI);
    277273                }
    278274                fCursorPtr = endOfContent;
     
    420416                            fGidStack[fScope++] = elementGid;
    421417                        }
    422                         else if (!isEntity && unlikely(fScope == 0)) // root element
     418                        else if (unlikely(fScope == 0)) // root element
    423419                        {
    424420                            fMarkupCount--;
     
    491487                            fGidStack[fScope++] = elementGid;
    492488                        }
    493                         else if (!isEntity && unlikely(fScope == 0)) // root element
     489                        else if (unlikely(fScope == 0)) // root element
    494490                        {
    495491                            fMarkupCount--;
     
    523519
    524520                        // check to be sure that we can "pop" the element off the stack
    525                         if (unlikely(fScope == 0))
    526                         {
    527                             fScanner.emitError(XMLErrs::MoreEndThanStartTags);
     521                        if (unlikely(fScope == CheckingEntity))
     522                        {
     523                            fScanner.emitError(CheckingEntity ? XMLErrs::PartialTagMarkupError : XMLErrs::MoreEndThanStartTags);
    528524                        }
    529525                        else
     
    535531                                reportMismatchedEndTag(fScope);
    536532                            }
    537                             else if (!isEntity && unlikely(fScope == 0)) // root element
     533                            else if (unlikely(fScope == 0)) // root element
    538534                            {
    539535                                fMarkupCount--;
     
    568564                        else if (unlikely(XMLStringU::isXML(target)))
    569565                        {
    570                             fScanner.emitError(XMLErrs::NoPIStartsWithXML);
     566                            fScanner.emitError((CheckingEntity && fScope == CheckingEntity) ? XMLErrs::TextDeclNotLegalHere : XMLErrs::NoPIStartsWithXML);
    571567                        }
    572568
     
    712708scanContentEntities()
    713709{
    714     DEBUG_WELL_FORMEDNESS_MESSAGE("fCursorPtr=" << *fCursorPtr)
    715710    while (unlikely(*fCursorPtr == Entity))
    716711    {
     
    728723        // skip to the next delimiter
    729724        fCursorPtr = *fStringEndPtr++;
    730         DEBUG_WELL_FORMEDNESS_MESSAGE("fCursorPtr=" << *fCursorPtr)
    731725    }
    732726}
     
    929923
    930924    if (DocStateType == XMLParser::Element)
    931     {     
    932         if (likely(!fScanner.getHasInternalOrExternalDTD()))
     925    {             
     926        if (unlikely(fScanner.getHasInternalOrExternalDTD()))
    933927        {
    934             checkWellformedness<XMLParser::Element, false, false>(accumulator);
     928            checkWellformedness<XMLParser::Element, true, 0>(accumulator);
    935929        }
    936930        else
    937931        {
    938             checkWellformedness<XMLParser::Element, true, false>(accumulator);
     932            checkWellformedness<XMLParser::Element, false, 0>(accumulator);
    939933        }
    940934
     
    947941    else
    948942    {
    949         checkWellformedness<DocStateType, false, false>(accumulator);
     943        checkWellformedness<DocStateType, false, 0>(accumulator);
    950944    }
    951945}
     
    956950void XMLWellFormednessParser<XMLScannerType>::checkEntityWellformedness()
    957951{
    958     checkWellformedness<XMLParser::Element, true, true>(NULL);
    959 
    960     // test to be sure that we aren't missing end tags
    961     if (unlikely(fNoMore && fScope != 0))
    962     {
    963         reportMismatchedEndTag(fScope - 1);
    964     }
    965 
    966     if (unlikely(fMarkupCount != 0))
     952    if (unlikely(fScope == 0 && !fInMarkup))
     953    {
     954        // if we're checking entity well-formedness, assume that we start within a scope in the source document.
     955        // note: if for any reason the initial scope has to be changed, make sure that the "CheckingEntity" parameter
     956        // is set to the initial scope.
     957        fScope = 1;
     958    }
     959
     960    checkWellformedness<XMLParser::Element, true, 1>(NULL);
     961
     962    // test to be sure that that we have exactly as many start tags as we do end tags
     963    // and that the entity entity has been parsed successfully.
     964    if (unlikely(fMarkupCount != 0 || (fNoMore && fScope != 1)))
    967965    {
    968966        fScanner.emitError(XMLErrs::PartialTagMarkupError);
    969967    }
    970 
    971968}
    972969
     
    10571054#endif
    10581055
    1059 #ifdef USE_BIT_VECTOR_FOR_DUPLICATE_ATTRIBUTE_TEST
    1060 #undef USE_BIT_VECTOR_FOR_DUPLICATE_ATTRIBUTE_TEST
    1061 #endif
    10621056
    10631057XERCES_CPP_NAMESPACE_END
Note: See TracChangeset for help on using the changeset viewer.