Changeset 3227 for proto/xmlschema


Ignore:
Timestamp:
May 29, 2013, 12:52:42 PM (6 years ago)
Author:
shiyangy
Message:

project updated

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/xmlschema/pablo_template.cpp

    r2219 r3227  
     1// Stream struct and function headers and definitions
     2
     3#include <simd-lib/bitblock.hpp>
     4#include <simd-lib/carryQ.hpp>
     5#include <simd-lib/pabloSupport.hpp>
     6
     7#define LocalCarryDeclare(name, count)\
     8CarryArray<count, 0> name;\
     9
     10#define assert_0_error(errkind, errstrm)
     11
     12BitBlock EOF_mask = simd<1>::constant<1>();
     13
     14// XMLWF application headers and definitions
    115#include <stdio.h>
    216#include <stdlib.h>
     
    418#include <sys/types.h>
    519#include <sys/stat.h>
    6 #include "../lib/lib_simd.h"
    7 
    8 #define BLOCK_SIZE (sizeof(SIMD_type) * 8)
    9 #define SEGMENT_BLOCKS 12
    10 #define BUFFER_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
    11 #define OVERLAP_BUFSIZE (sizeof(SIMD_type))
    12 
    13 typedef long ScanBlock;
    14 typedef SIMD_type BytePack;
    15 typedef SIMD_type BitBlock;
    16 
    17 #include "../lib/carryQ.h"
    18 #include "xmldecl.h"
    19 #include "xml_error.c"
    20 #include "xmldecl.c"
    21 #include "namechars.h"
    22 
    23 #include "../lib/perflib/perfsec.h"
    24 #include "../lib/s2p.h"
    25 
    26 #include "TagMatcher.h"
    27 #include "LineColTracker.h"
    28 
     20
     21#include <simd-lib/s2p.hpp>
     22#include <simd-lib/buffer.hpp>
     23#include <simd-lib/bitblock_iterator.hpp>
     24#include <simd-lib/perflib/perfsec.h>
     25
     26#include <xmldecl.h>
     27#include <namechars.h>
     28#include <LineColTracker.hpp>
     29#include <XMLTestSuiteError.h>
     30
     31// Define the mappings for pablo.assert_0(strm, errkind) statements which
     32// compile to the the form assert_0_error(errkind, strm)
     33#include <ErrorTracker.h>
     34
     35ErrorTracker error_tracker;
     36#define assert_0_error(errkind, errstrm) error_tracker.NoteError(errkind, errstrm);
     37
     38//#define STL_ALIGNED_VECTOR // experimental STL Aligned Vector
     39#ifndef STL_ALIGNED_VECTOR
     40#include <TagMatcher.hpp>
     41#endif
     42
     43#ifdef STL_ALIGNED_VECTOR // experimental, comment out TagMatcher code to use STL aligned memory vector
     44#warning "Compiling with experimental STL_ALIGNED_VECTOR."
     45#include <vector>
     46#include <simd-lib/stl_aligned_allocator.hpp>
     47#endif
    2948
    3049#ifdef BUFFER_PROFILING
    31         BOM_Table * parser_timer;
    32 
    33 #elif CODE_CLOCKER
    34         #define NUM_EVENTS 1
    35         int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
    36         //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
    37         //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
    38         int cal_size = 20;
    39         CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
     50    BOM_Table * parser_timer;
     51#elif PAPI
     52                #define PAPI_EVENTS_COUNT 2
     53                int PAPI_EVENTS[PAPI_EVENTS_COUNT] = {PAPI_TOT_CYC, PAPI_BR_MSP};       
     54    CC * parser_timer;
    4055#else
    41         void * parser_timer;
    42 #endif
    43 
    44 int block_base=0;
    45 int buffer_base=0;
    46 int buffer_last;
    47 char * source;
    48 
    49 
    50 LineColTracker tracker;
    51 
    52 static inline void ReportError(const char * error_msg, int error_pos_in_block) {
    53   int error_line, error_column;
    54   tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
    55   fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
    56 }
    57 
    58 class ErrorTracker {
    59 public:
    60         ErrorTracker() { noted_pos_in_block = -1;}
    61 
    62         inline void NoteError(const char * error_msg, BitBlock err_strm) {
    63           int pos_in_block = count_forward_zeroes(err_strm);
    64           if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
    65             noted_pos_in_block = pos_in_block;
    66             noted_error = error_msg;
    67           }
    68         }
    69 
    70         inline void If_Error_Report_First() {
    71           if (noted_pos_in_block > -1) {
    72                   int error_line, error_column;
    73                   ReportError(noted_error, noted_pos_in_block);
    74                   exit(-1);
    75           }
    76         }
    77  
    78 private:
    79   const char * noted_error;
    80   int noted_pos_in_block;       
    81 };
    82 
    83 
    84 TagMatcher matcher;
    85 BitBlock EOF_mask = simd_const_1(1);
    86 
    87 ErrorTracker error_tracker;
    88 
    89 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int)) {
    90         int blk;
    91         int block_pos = 0;
    92 
    93         for (blk = 0; blk < blk_count; blk++) {
    94                 ScanBlock s = stream[blk];
    95                 while(s) {
    96                         int code = (ProcessPos(cfzl(s) + block_pos));
    97                         if (code) return code;
    98                         s = s & (s-1);  // clear rightmost bit.
    99                 }
    100                 block_pos += 8 * sizeof(ScanBlock);
    101         }
    102         return 0;
    103 }
    104 
    105 
    106 
    107 static inline int NameStrt_check(int pos) {
    108         int block_pos = block_base + pos;
    109         if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[block_pos]) == 0){
    110               ReportError("name start error", pos);
    111               exit(-1);
    112         }
    113         return 0;
    114 }
    115 
    116 static inline int Name_check(int pos) {
    117         int block_pos = block_base + pos;
    118         if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[block_pos]) == 0){
    119               ReportError("name error", pos);
    120               exit(-1);
    121         }
    122         return 0;
    123 }
    124 
    125 static inline int PIName_check(int pos) {
    126         int block_pos = block_base + pos;
    127         int file_pos = block_pos+buffer_base;
    128         if (at_XxMmLll<ASCII>((unsigned char*)&source[block_pos]) && (source[block_pos+3]=='?' || source[block_pos+3]<= ' ')) {
    129               // "<?xml" legal at start of file.
    130               if ((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0])) return 0;
    131               ReportError("[Xx][Mm][Ll] illegal as PI name", pos);
    132               exit(-1);
    133         }
    134         return 0;
    135 }
    136 
    137 static inline int CD_check(int pos) {
    138         int block_pos = block_base + pos;
    139         if (!at_CDATA1<ASCII>((unsigned char*)&source[block_pos])){
    140               ReportError("CDATA error", pos);
    141               exit(-1);
    142         }
    143         return 0;
    144 }
    145 
    146 static inline int GenRef_check(int pos) {
    147         int block_pos = block_base + pos;
    148         unsigned char* s = (unsigned char*)&source[block_pos];
    149         if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    150               ReportError("Undefined reference", pos);
    151               exit(-1);
    152         }
    153         return 0;
    154 }
    155 
    156 static inline int HexRef_check(int pos) {
    157         int block_pos = block_base + pos;
    158         unsigned char* s = (unsigned char*)&source[block_pos];
    159         int ch_val = 0;
    160         while(at_HexDigit<ASCII>(s)){
    161           ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    162           if (ch_val> 0x10FFFF ){
    163             ReportError("Illegal character reference", pos);
    164             exit(-1);
    165           }
    166           s++;
    167         }
    168         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    169           ReportError("Illegal character reference", pos);
    170           exit(-1);
    171         }
    172         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    173           ReportError("Illegal XML 1.0 character reference", pos);
    174           exit(-1);
    175         }
    176         return 0;
    177 }
    178 
    179 static inline int DecRef_check(int pos) {
    180         int block_pos = block_base + pos;
    181         unsigned char* s = (unsigned char*)&source[block_pos];
    182         int ch_val = 0;
    183         while(at_HexDigit<ASCII>(s)){
    184           ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    185           if (ch_val> 0x10FFFF ){
    186             ReportError("Illegal character reference", pos);
    187             exit(-1);
    188           }
    189           s++;
    190         }
    191         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    192           ReportError("Illegal character reference", pos);
    193           exit(-1);
    194         }
    195         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    196           ReportError("Illegal XML 1.0 character reference", pos);
    197           exit(-1);
    198         }
    199         return 0;
    200 }
    201 
    202 static inline int AttRef_check(int pos) {
    203         int block_pos = block_base + pos;
    204         unsigned char* s = (unsigned char*)&source[block_pos];
    205         int ch_val = 0;
    206         if(s[0]=='#'){
    207           s++;
    208           if(s[0]=='x' || s[0]=='X'){
    209             s++;
    210             while(at_HexDigit<ASCII>(s)){
    211               ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    212               s++;
    213             }
    214           }
    215           else{
    216             while(at_HexDigit<ASCII>(s)){
    217               ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    218               s++;
    219             }
    220           }
    221           if (ch_val==60){
    222             ReportError("Attribute values contain '<' characters after reference expansion", pos);
    223             exit(-1);
    224           }
    225         }
    226         else if(at_Ref_lt<ASCII>(s)){
    227           ReportError("Attribute values contain '<' characters after reference expansion", pos);
    228           exit(-1);
    229         }
    230         return 0;
    231 }
    232 
    233 
    234 
     56    void * parser_timer;
     57#endif
     58
     59//////////////////////////////////////////////////////////////////////////////////////////
     60// Buffer Management // WARNING: Do Not update #defines. Results in TagMatcher errors.
     61//////////////////////////////////////////////////////////////////////////////////////////
     62#define PADDING_BLOCKS 0
     63#define PADDING_SIZE (BLOCK_SIZE * PADDING_BLOCKS)
     64#define COPYBACK_BLOCKS 2
     65#define COPYBACK_SIZE (BLOCK_SIZE * COPYBACK_BLOCKS)
     66#define LOOKAHEAD_BLOCKS 1
     67#define LOOKAHEAD_SIZE (BLOCK_SIZE * LOOKAHEAD_BLOCKS)
     68#define SEGMENT_BLOCKS  12 // WARNING: TagMatcher.hpp causes xmlconf test suite failures for SEGMENT_BLOCKS < 3.
     69#define SEGMENT_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
     70#define BUFFER_SIZE (COPYBACK_SIZE + SEGMENT_SIZE + LOOKAHEAD_SIZE + PADDING_SIZE)
     71
     72//////////////////////////////////////////////////////////////////////////////////////////
     73// @ global depends on 'error_tracker' and 'EOF_mask' definitions.
     74//////////////////////////////////////////////////////////////////////////////////////////
    23575@global
    23676
    237 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    238   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    239         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    240 }
    241 
    242 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    243   s2p_do_block(U8, basis_bits);
    244   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    245   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    246   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    247   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    248   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    249   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    250   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    251   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
    252 }
    253 
    254 static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, int chars_avail){
    255             tracker.StoreNewlines(lex.LF);
    256 
    257                 if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
    258                   StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check);
    259                   StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check);
    260                 }
    261 
    262                 if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
    263                   StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check);
    264                 }
    265 
    266                 if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
    267                   StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check);
    268                 }
    269 
    270                 if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
    271                   StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check);
    272                 }
    273 
    274                 if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
    275                   StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check);
    276                 }
    277 
    278                 if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
    279                   StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check);
    280                 }
    281 
    282                 if (bitblock_has_bit(check_streams.att_refs)){
    283                   StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check);
    284                 }
    285 
    286                 error_tracker.If_Error_Report_First();
    287 
    288                 matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    289                 tracker.AdvanceBlock();
    290 }
    291 
    292 void do_process(FILE *infile, FILE *outfile) {
    293 
    294 @decl
    295 
    296   int buf_pos = 0;
    297   int block_pos = 0;
    298   int errpos = 0;
    299   int chars_avail = 0;
    300   int check_pos = 0;
    301   int chars_read = 0;
    302   BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2)/sizeof(SIMD_type)];
    303 
    304   char * srcbuf = ((char *) buf) + OVERLAP_BUFSIZE;
    305   buffer_base = buf_pos;
    306   source = srcbuf;
    307 
    308   chars_read = fread((void *)srcbuf, 1, BUFFER_SIZE + OVERLAP_BUFSIZE, infile);
    309   chars_avail = chars_read;
    310   if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
    311 
    312   matcher.setSrc(srcbuf);
    313 
    314   if(chars_read<4){
    315     fprintf(stderr,"File is too short. Not well formed.\n");
    316     exit(-1);
    317   }
    318 
    319   Entity_Info * e = new Entity_Info;
    320   e->AnalyzeSignature((unsigned char *)srcbuf);
    321 
    322   if (e->code_unit_base == ASCII) {
    323 
    324     XML_Decl_Parser<ASCII> decl_parser((unsigned char *)srcbuf);
    325 
    326     decl_parser.ReadXMLInfo(*e);
    327 
    328     if (e->code_unit_size != SingleByte || (e->has_encoding_decl && (!at_UTF_8(e->encoding)))){
    329         fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n");
    330         exit(-1);
    331     }
    332   }
    333   else {
    334     fprintf(stderr,"Sorry, this xmlwf demo does not process EBCDIC.\n");
    335         exit(-1);
    336   }
    337 
    338   if (e->content_start != 0) {
    339         memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
    340         buf_pos = e->content_start;
    341         if (chars_avail == BUFFER_SIZE) {
    342                 chars_read = chars_read - e->content_start +
    343                              fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
    344                 chars_avail = chars_read;
    345                 if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
    346         }
    347         else {
    348           chars_read -=e->content_start;
    349           chars_avail -=e->content_start;
    350         }
    351   }
    352 
    353 @stream_stmts
    354 
    355 /* Full Buffers */
    356 
    357     while (chars_avail == BUFFER_SIZE) {
    358       PERF_SEC_START(parser_timer);
    359       for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    360           block_base = blk*BLOCK_SIZE;
    361           s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
    362           @block_stmts
    363           postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, chars_avail);
    364       }
    365       matcher.StreamScan(chars_avail);
    366       matcher.Advance_buffer();
    367       tracker.Advance_buffer();
    368       PERF_SEC_END(parser_timer, chars_avail);
    369            
    370       int bytes_left = chars_read - chars_avail;
    371       memmove(srcbuf, &srcbuf[BUFFER_SIZE], bytes_left);
    372       chars_read = fread(&srcbuf[bytes_left],1, BUFFER_SIZE + OVERLAP_BUFSIZE - bytes_left, infile) + bytes_left;
    373       chars_avail = chars_read;
    374       if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
    375       buf_pos += chars_avail;
    376       buffer_base = buf_pos;
    377     }
    378 /* Final Partial Buffer */
    379     PERF_SEC_START(parser_timer);
    380 
    381     block_pos = 0;
    382     int remaining = chars_avail;
    383 /* Full Blocks */
    384     while (remaining >= BLOCK_SIZE) {
    385           block_base = block_pos;
    386           s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
    387           @block_stmts
    388           postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, chars_avail);
    389           block_pos += BLOCK_SIZE;
    390           remaining -= BLOCK_SIZE;
    391     }
    392     block_base = block_pos;
    393     if (remaining > 0 || @any_carry) {
    394           EOF_mask = sisd_srl(simd_const_1(1),sisd_from_int(BLOCK_SIZE-remaining));
    395           s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
    396           @final_block_stmts
    397           postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, chars_avail);
    398     }
    399     buf_pos += chars_avail;
    400     buffer_base = buf_pos;
    401 
    402     matcher.StreamScan(chars_avail);
    403     matcher.Advance_buffer();
    404     tracker.Advance_buffer();
    405 
    406 
    407     PERF_SEC_END(parser_timer, chars_avail);
    408     if (matcher.depth != 0) {
    409       fprintf(stderr, "tag matching error (depth %i) at position %i\n", matcher.depth, buffer_base);
    410       exit(-1);
    411     }
    412 }
    413 
    414 
    415 
    416 int
    417 main(int argc, char * argv[]) {
     77//////////////////////////////////////////////////////////////////////////////////////////
     78// Headers that depend @ global stream struct types.
     79//////////////////////////////////////////////////////////////////////////////////////////
     80#include <simd-lib/transpose.hpp>
     81#include <post_process.hpp>
     82
     83static void do_process(FILE *infile, FILE *outfile);
     84
     85int main(int argc, char * argv[]) {
     86
    41887        char * infilename, * outfilename;
    41988        FILE *infile, *outfile;
     
    443112        }
    444113
    445 //      PERF_SEC_BIND(1);
     114        PERF_SEC_BIND(1);
    446115
    447116        PERF_SEC_INIT(parser_timer);
     
    455124        fclose(infile);
    456125        fclose(outfile);
     126
    457127        return(0);
    458128}
     129
     130void do_process(FILE *infile, FILE *outfile) {
     131
     132    @decl
     133
     134    LineColTracker tracker;
     135                #ifndef STL_ALIGNED_VECTOR
     136                        TagMatcher<SEGMENT_SIZE,LOOKAHEAD_SIZE> matcher;
     137                #endif
     138
     139    int block_base  = 0;
     140                int block_pos   = 0;
     141    int buffer_base = 0;
     142    int buffer_pos  = 0;
     143    int chars_avail = 0;
     144                int chars_read  = 0;
     145
     146    //////////////////////////////////////////////////////////////////////////////////////////
     147    // Buffer Management
     148    //////////////////////////////////////////////////////////////////////////////////////////
     149                //      BitBlock buf[(BUFFER_SIZE)/sizeof(BitBlock)];
     150                //      uint8_t * src_buffer = (uint8_t *)buf + COPYBACK_SIZE;
     151
     152                #ifdef STL_ALIGNED_VECTOR
     153                        std::vector<uint8_t, AAllocator<uint8_t> > src_buffer;
     154                        src_buffer.reserve(BUFFER_SIZE);
     155                #else
     156                        uint8_t * COPYBACK;
     157                        uint8_t * src_buffer;
     158                        ALLOC_STATIC_ALIGNED_BYTE_BUFFER_WITH_COPYBACK(COPYBACK, src_buffer);
     159                #endif
     160       
     161    //////////////////////////////////////////////////////////////////////////////////////////
     162    // XML Validation / Content Model
     163    //////////////////////////////////////////////////////////////////////////////////////////
     164                chars_read = fread((void *)&src_buffer[0], 1, SEGMENT_SIZE, infile);
     165                chars_avail = chars_read;
     166                if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
     167
     168    if(chars_read<4){
     169                fprintf(stderr,"File is too short. Not well formed.\n");
     170                exit(-1);
     171    }
     172
     173    Entity_Info * e = new Entity_Info;
     174                e->AnalyzeSignature((unsigned char *)&src_buffer[0]);
     175
     176    if (e->code_unit_base == ASCII) {
     177
     178                XML_Decl_Parser<ASCII> decl_parser((unsigned char *)&src_buffer[0]);
     179
     180                decl_parser.ReadXMLInfo(*e);
     181
     182                if (e->code_unit_size != SingleByte || (e->has_encoding_decl && (!at_UTF_8(e->encoding)))){
     183                        fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n");
     184                        exit(-1);
     185                }
     186    }
     187    else {
     188        fprintf(stderr,"Sorry, this xmlwf demo does not process EBCDIC.\n");
     189        exit(-1);
     190    }
     191
     192    if (e->content_start != 0) {
     193
     194                memmove(&src_buffer[0], &src_buffer[e->content_start], chars_avail - e->content_start);
     195                buffer_pos = e->content_start;
     196                if ((chars_avail-e->content_start) < SEGMENT_SIZE) {
     197                        chars_read = chars_avail - e->content_start + fread(&src_buffer[chars_avail-e->content_start], 1, e->content_start, infile);
     198                        chars_avail = chars_read;
     199                }
     200                if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
     201    }
     202
     203        //////////////////////////////////////////////////////////////////////////////////////////
     204        // Read OVERLAP bytes to support post processing validation lookahead.
     205        //////////////////////////////////////////////////////////////////////////////////////////
     206        chars_read = chars_avail + fread(&src_buffer[chars_avail], 1, LOOKAHEAD_SIZE, infile);
     207        chars_avail = chars_read;
     208        if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
     209
     210    @stream_stmts
     211
     212    //////////////////////////////////////////////////////////////////////////////////////////
     213    // Full Segments
     214    //////////////////////////////////////////////////////////////////////////////////////////
     215        #ifndef STL_ALIGNED_VECTOR
     216        matcher.setSrc((char *)&src_buffer[0]);
     217        #endif
     218        while (chars_avail >= SEGMENT_SIZE) {
     219      PERF_SEC_START(parser_timer);
     220      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
     221          block_base = blk*BLOCK_SIZE;
     222                  s2p_do_block((BytePack *) &src_buffer[block_base], basis_bits);
     223
     224          @block_stmts
     225
     226          tracker.StoreNewlines(lex.LF);
     227                  postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)&src_buffer[0], buffer_base, block_base, chars_avail, tracker);
     228                                        #ifndef STL_ALIGNED_VECTOR
     229          matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     230                                        #endif
     231          tracker.AdvanceBlock();
     232      }
     233                        #ifndef STL_ALIGNED_VECTOR
     234      matcher.StreamScan(chars_avail);
     235      matcher.Advance_buffer();
     236                        #endif                 
     237      PERF_SEC_END(parser_timer, chars_avail);
     238
     239          memmove(&src_buffer[0], &src_buffer[SEGMENT_SIZE], LOOKAHEAD_SIZE); // CopyBack Trailing Padding
     240
     241          chars_read = fread(&src_buffer[LOOKAHEAD_SIZE], 1, SEGMENT_SIZE, infile) + LOOKAHEAD_SIZE;
     242          chars_avail = chars_read;
     243          if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
     244          buffer_pos += chars_avail;
     245          buffer_base = buffer_pos;
     246    }
     247
     248    //////////////////////////////////////////////////////////////////////////////////////////
     249    // Final Partial Segment
     250    //////////////////////////////////////////////////////////////////////////////////////////
     251    PERF_SEC_START(parser_timer);
     252
     253    block_pos = 0;
     254    int remaining = chars_avail;
     255
     256    /* Full Blocks */
     257    while (remaining >= BLOCK_SIZE) {
     258          block_base = block_pos;
     259                                s2p_do_block((BytePack *) &src_buffer[block_pos], basis_bits);
     260          @block_stmts
     261          tracker.StoreNewlines(lex.LF);
     262                                postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)&src_buffer[0], buffer_base, block_base, chars_avail, tracker);
     263                                        #ifndef STL_ALIGNED_VECTOR
     264                                                matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     265                                        #endif
     266          tracker.AdvanceBlock();
     267          block_pos += BLOCK_SIZE;
     268          remaining -= BLOCK_SIZE;
     269    }
     270    block_base = block_pos;
     271
     272//    Partial Block or Any Carry
     273//
     274//    Eliminatinthe @anycarry test simplifies dependencies. - RDC Nov. 6, 2012
     275//
     276#ifdef USE_ANY_CARRY_TEST
     277   if (remaining > 0 || @any_carry) {
     278#endif
     279          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
     280                                s2p_do_final_block((BytePack *) &src_buffer[block_pos], basis_bits, EOF_mask);
     281          @final_block_stmts
     282          tracker.StoreNewlines(lex.LF);
     283                                postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)&src_buffer[0], buffer_base, block_base, chars_avail, tracker);
     284                                        #ifndef STL_ALIGNED_VECTOR
     285          matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     286                                        #endif
     287          tracker.AdvanceBlock();
     288
     289#ifdef USE_ANY_CARRY_TEST
     290   }
     291#endif
     292
     293    buffer_pos += chars_avail;
     294    buffer_base = buffer_pos;
     295                #ifndef STL_ALIGNED_VECTOR
     296    matcher.StreamScan(chars_avail);
     297    matcher.Advance_buffer();
     298
     299    if (matcher.depth != 0) {
     300      fprintf(stderr, "tag matching error (depth %i) at position %i\n", matcher.depth, buffer_base);
     301      exit(-1);
     302    }
     303                #endif
     304                PERF_SEC_END(parser_timer, chars_avail);
     305}
     306
Note: See TracChangeset for help on using the changeset viewer.