source: proto/xmlschema/validation_template_onevec.cpp @ 3229

Last change on this file since 3229 was 3229, checked in by shiyangy, 6 years ago
File size: 38.7 KB
RevLine 
[2219]1
[3225]2#include <simd-lib/bitblock.hpp>
3#include <simd-lib/carryQ.hpp>
4#include <simd-lib/pabloSupport.hpp>
5
6#define LocalCarryDeclare(name, count)\
7CarryArray<count, 0> name;\
8
9#define assert_0_error(errkind, errstrm)
10
11BitBlock EOF_mask = simd<1>::constant<1>();
12
13//#define USE_FUNCTION_TEMPLATES
14//#define TEMPLATED_SIMD_LIB
15
[2219]16#define DEBUG 1
[3225]17//#define BLOCK_SIZE (sizeof(SIMD_type) * 8)
18//#define SEGMENT_BLOCKS 12
19//#define BUFFER_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
20//#define OVERLAP_BUFSIZE 2*(sizeof(SIMD_type))
[2219]21
22#define VECTOR_WIDTH 8
23#define VECTOR_TYPE (char)
24
25#include <stdio.h>
26#include <stdlib.h>
27#include <errno.h>
28#include <sys/types.h>
29#include <sys/stat.h>
[3225]30#include <limits.h>
31
32#include <simd-lib/s2p.hpp>
33#include <simd-lib/buffer.hpp>
34#include <simd-lib/bitblock_iterator.hpp>
35#include <simd-lib/perflib/perfsec.h>
36
37//#include "../lib/lib_simd.h"
38#include "../symtab/pbgs_identity_symbol_table.h"
39
[2219]40#include <stack>
41#include <map>
[3225]42#include <vector>
[2219]43
44#include <queue>
45#include <string>
46
47typedef long ScanBlock;
[3225]48typedef BytePack BitBlock;
[2219]49
50using namespace std;
51
[3225]52//#include "../lib/carryQ.h"
53#include <xmldecl.h>
54//#include "xml_error.c"
55//#include "xmldecl.c"
56#include <namechars.h>
57#include <LineColTracker.hpp>
58#include <XMLTestSuiteError.h>
[2219]59
[3225]60// Define the mappings for pablo.assert_0(strm, errkind) statements which
61// compile to the the form assert_0_error(errkind, strm)
62#include <ErrorTracker.h>
[2219]63
[3225]64ErrorTracker error_tracker;
65#define assert_0_error(errkind, errstrm) error_tracker.NoteError(errkind, errstrm);
[2219]66
[3225]67#ifndef STL_ALIGNED_VECTOR
68#include <TagMatcher.hpp>
69#endif
70
71#ifdef STL_ALIGNED_VECTOR // experimental, comment out TagMatcher code to use STL aligned memory vector
72#warning "Compiling with experimental STL_ALIGNED_VECTOR."
73#include <vector>
74#include <simd-lib/stl_aligned_allocator.hpp>
75#endif
76
[2219]77#ifdef BUFFER_PROFILING
78        BOM_Table * parser_timer;
79
80#elif CODE_CLOCKER
81        #define NUM_EVENTS 1
82        int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
83        //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
84        //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
85        int cal_size = 20;
86        CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
87#else
88        void * parser_timer;
89#endif
90
[3225]91//////////////////////////////////////////////////////////////////////////////////////////
92// Buffer Management // WARNING: Do Not update #defines. Results in TagMatcher errors.
93//////////////////////////////////////////////////////////////////////////////////////////
94#define PADDING_BLOCKS 0
95#define PADDING_SIZE (BLOCK_SIZE * PADDING_BLOCKS)
96#define COPYBACK_BLOCKS 2
97#define COPYBACK_SIZE (BLOCK_SIZE * COPYBACK_BLOCKS)
98#define LOOKAHEAD_BLOCKS 1
99#define LOOKAHEAD_SIZE (BLOCK_SIZE * LOOKAHEAD_BLOCKS)
100#define SEGMENT_BLOCKS  12 // WARNING: TagMatcher.hpp causes xmlconf test suite failures for SEGMENT_BLOCKS < 3.
101#define SEGMENT_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
102#define BUFFER_SIZE (COPYBACK_SIZE + SEGMENT_SIZE + LOOKAHEAD_SIZE + PADDING_SIZE)
103
104
105
106//int block_base=0;
107//int buffer_base=0;
[2219]108int buffer_last;
[3225]109FILE *gid_writein;
110#define ELEMENT_BUFFER_SIZE 30
111char element_name_buffer[ELEMENT_BUFFER_SIZE];
112int cross_buffer_flag = 0;
113//char * source;
[2219]114LineColTracker tracker;
115
[3225]116static inline int cfzl(long x){
117        unsigned long ux = x;
118        if (x<0){
119                ux = ULONG_MAX + x + 1;
120        }
121        if (ux==0)
122                return 8*sizeof(long);
123        int n;
124        n = 0;
125        while (1){
126                if ((ux%2) == 1){
127                        if(n==64){
128                                cout << ux <<endl;
129                        }
130                        return n;               
131                }
132                n ++;
133                ux = ux >> 1;
134                //cout << "x = " << x << endl;
135        } 
136}
137
138static inline void print_gid_vec();
139/*
140static inline int cfzl(long x){
141 if (x ==0)
142  return 32;
143 int n=0;
144 if ((x & 0xFFFF0000) == 0) { n += 16; x =x << 16;} //1111 1111 1111 1111 0000 0000 0000 0000 // 16 bits from left are zero! so we omit 16left bits
145 if ((x & 0xFF000000) == 0){ n = n +  8; x = x <<  8;} // 8 left bits are 0
146 if ((x & 0xF0000000) ==0){ n = n +  4; x = x <<  4;} // 4 left bits are 0
147 if ((x & 0xC0000000) == 0){ n =n +  2, x = x <<  2;}  // 110000....0 2 left bits are zero
148 if ((x & 0x80000000) == 0){n = n +  1, x = x <<  1;} // first left bit is zero
149 return n;
150
151}
152*/
[2219]153static inline void ReportError(const char * error_msg, int error_pos_in_block) {
154  int error_line, error_column;
155  tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
156  fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
157}
[3225]158/*
[2219]159class ErrorTracker {
160public:
161    ErrorTracker() { noted_pos_in_block = -1;}
162
163    inline void NoteError(const char * error_msg, BitBlock err_strm) {
164      int pos_in_block = count_forward_zeroes(err_strm);
165      if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
166        noted_pos_in_block = pos_in_block;
167        noted_error = error_msg;
168      }
169    }
170
171    inline void If_Error_Report_First() {
172      if (noted_pos_in_block > -1) {
173              int error_line, error_column;
174              ReportError(noted_error, noted_pos_in_block);
175              exit(-1);
176      }
177    }
178
179private:
180  const char * noted_error;
181  int noted_pos_in_block;
182};
[3225]183*/
[2219]184
185
[3225]186//BitBlock EOF_mask = simd_const_1(1);
[2219]187
[3225]188//ErrorTracker error_tracker;
[2219]189BitBlock elem_starts;
190int previous_block_last_elem_start;
191BytePack hashvalues[2];
192
193vector <int> gids;
194PBGSIdentitySymbolTable pbgs_symbol_table;
195
196vector <char> gid_vec;
197
198stack <vector<char>* > elem_vec_stack;
199map<char, vector<char>* > elem_vec_map;
200
201static inline int ScanBackwardPos(BitBlock * block, int pos)
202{
[3225]203        cout << "pos " << pos << endl;
[2219]204    BitBlock s = block[0];
[3225]205    if (bitblock::any(block[0])){
206        print_register("zero",block[0]);
207        print_register("t",simd_not(simd<128>::sll(simd<1>::constant<1>(), convert(pos))) );
208        }
209    BitBlock temp = simd_and( s, simd_not(simd<128>::sll(simd<1>::constant<1>(), convert(pos))) );
210//      print_register("zero",simd_not(simd<128>::sll(simd<1>::constant<1>());
211                print_register("temp",temp);
212    if (bitblock::any(temp))
[2219]213    {
214        // sizeof (BitBlock)*8 - cbzl( s & ~(~0 << pos)) - 1;
[3225]215//      cout << "block_size" << BLOCK_SIZE << endl;
216        return BLOCK_SIZE - count_reverse_zeroes(temp) - 1;
[2219]217    }
218    else
219    {
220        //handle boundary case
221        return previous_block_last_elem_start - 1;
222    }
223}
224
225static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int)) {
226        int blk;
227        int block_pos = 0;
228        for (blk = 0; blk < blk_count; blk++) {
229                ScanBlock s = stream[blk];
230                while(s) {
231                        int code = (ProcessPos(cfzl(s) + block_pos));
232                        if (code) return code;
233                        s = s & (s-1);  // clear rightmost bit.
234                }
235                block_pos += 8 * sizeof(ScanBlock);
236        }
237        return 0;
238}
239
[3229]240static inline int compute_hash_value2(char *str,int lgth){
241        int hash = 5381;
242    int c;
243//      while (c = *str++){
244        for (int i = 0;i<lgth;i++){
245                c=*str++; 
246        hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
247    }
248
249    return hash;
250}
251
[2219]252static inline int compute_hash_value (int lgth, int start)
253{
254    unsigned int offset_bit = start + 128;
255    uint64_t stream = *((uint64_t*)(((uint32_t*)hashvalues)+(offset_bit>>5)));
256    return stream >> (offset_bit & 0x1F) & ~(~0 << lgth);
257}
258/*
259template <int L>
260static inline int ElemEnd_grouping(int pos) {
261    int end = block_base + pos;
262    int start = end - L;
263    int hashvalue = compute_hash_value(L, start - block_base);
264    int gid = pbgs_symbol_table.Lookup_or_Insert_Name<L>(source + start, hashvalue);
265    gids.push_back(gid);
266#if DEBUG
267    char* symbol = new char[L+1];
268    strncpy ( symbol, source + start, L );
269    symbol[L] ='\0';
270    printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
271    delete symbol; symbol = 0;
272#endif
273    return gid;
274}
275
276template<>
277inline int ElemEnd_grouping<17>(int pos) {
278    int end = block_base + pos;
279    int start = ScanBackwardPos (&elem_starts, pos) + block_base;
280    int lgth = end - start;
281    int hashvalue = compute_hash_value(lgth, start - block_base);
282    int gid = 0;
283
284//    if (lgth < 32)
285//    {
286//      gid = pbgs_symbol_table.Lookup_or_Insert_Name_32(source + start, hashvalue, lgth);
287//    }
288//    else
289    {
290        gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
291    }
292    gids.push_back(gid);
293#if DEBUG
294    char* symbol = new char[lgth+1];
295    strncpy ( symbol, source + start, lgth );
296    symbol[lgth] ='\0';
297    printf ("%s | start: %i[%i] | end: %i[%i] | lgth: %i | hashvalue: %i | gid: %i | symbol: %s\n", __FUNCTION__, start, start - block_base, end, end - block_base, lgth, hashvalue, gid, symbol);
298#endif
299    return gid;
300}
301*/
302
[3225]303inline char ElemEnd_grouping(char *source,int block_base,int pos) {
[2219]304    int end = block_base + pos;
[3225]305    int start = ScanBackwardPos(&elem_starts, pos) + block_base;
306    ///////////////////////
307    //start could be negitive
308    //the element name falls in two buffers
309    ///////////////////////
310    char *name = source + start;
311    char name_buffer[50];
312   
313    if (start<0){
314        memmove (&name_buffer[0],&element_name_buffer[ELEMENT_BUFFER_SIZE+start],-start);       
315        memmove (&element_name_buffer[ELEMENT_BUFFER_SIZE+start],&source[0],50+start);
316        name = name_buffer;
317    }
318//    cout << end << "  " << start << endl;
[2219]319    int lgth = end - start;
[3229]320//    int hashvalue = compute_hash_value(lgth,start-block_base);
321        int hashvalue = compute_hash_value2(name,lgth);
[2219]322    char gid = 0;
323
[3225]324
[2219]325//    if (lgth < 32)
326//    {
327//      gid = pbgs_symbol_table.Lookup_or_Insert_Name_32(source + start, hashvalue, lgth);
328//    }
329//    else
330
331        switch (lgth)
332    {
333    case 1:
334        gid = pbgs_symbol_table.Lookup_or_Insert_Name<1>(source + start, hashvalue);
335        break;
336    case 2:
337        gid = pbgs_symbol_table.Lookup_or_Insert_Name<2>(source + start, hashvalue);
338        break;
339    case 3:
340        gid = pbgs_symbol_table.Lookup_or_Insert_Name<3>(source + start, hashvalue);
341        break;
342    case 4:
343        gid = pbgs_symbol_table.Lookup_or_Insert_Name<4>(source + start, hashvalue);
344        break;
345    case 5:
346        gid = pbgs_symbol_table.Lookup_or_Insert_Name<5>(source + start, hashvalue);
347        break;
348    case 6:
349        gid = pbgs_symbol_table.Lookup_or_Insert_Name<6>(source + start, hashvalue);
350        break;
351    case 7:
352        gid = pbgs_symbol_table.Lookup_or_Insert_Name<7>(source + start, hashvalue);
353        break;
354    case 8:
355        gid = pbgs_symbol_table.Lookup_or_Insert_Name<8>(source + start, hashvalue);
356        break;
357    case 9:
358        gid = pbgs_symbol_table.Lookup_or_Insert_Name<9>(source + start, hashvalue);
359        break;
360    case 10:
361        gid = pbgs_symbol_table.Lookup_or_Insert_Name<10>(source + start, hashvalue);
362        break;
363    case 11:
364        gid = pbgs_symbol_table.Lookup_or_Insert_Name<11>(source + start, hashvalue);
365        break;
366    case 12:
367        gid = pbgs_symbol_table.Lookup_or_Insert_Name<12>(source + start, hashvalue);
368        break;
369    case 13:
370        gid = pbgs_symbol_table.Lookup_or_Insert_Name<13>(source + start, hashvalue);
371        break;
372    case 14:
373        gid = pbgs_symbol_table.Lookup_or_Insert_Name<14>(source + start, hashvalue);
374        break;
375    case 15:
376        gid = pbgs_symbol_table.Lookup_or_Insert_Name<15>(source + start, hashvalue);
377        break;
378    case 16:
379        gid = pbgs_symbol_table.Lookup_or_Insert_Name<16>(source + start, hashvalue);
380        break;
381    default:
382        gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
383        break;
384    }
385/*
386    {
387        gid = (char)pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
388    }
389*/
390    gids.push_back(gid);
391#if DEBUG
392    char* symbol = new char[lgth+1];
393    strncpy ( symbol, source + start, lgth );
394    symbol[lgth] ='\0';
395    printf ("%s | start: %i[%i] | end: %i[%i] | lgth: %i | hashvalue: %i | gid: %i | symbol: %s\n", __FUNCTION__, start, start - block_base, end, end - block_base, lgth, hashvalue, gid, symbol);
[3225]396    fprintf (gid_writein,"%s | start: %i[%i] | end: %i[%i] | lgth: %i | hashvalue: %i | gid: %i | symbol: %s\n", __FUNCTION__, start, start - block_base, end, end - block_base, lgth, hashvalue, gid, symbol);
[2219]397#endif
398    return gid;
399}
400
401//template <int L>
[3225]402static inline int StreamScanLengthGrouping(char *source,int block_base,ScanBlock * stream, ScanBlock * stream2, int blk_count) {
[2219]403    int blk;
404    int block_pos = 0;
[3225]405//    cout << "cfzl():" << cfzl() << endl;
406//      long test = 6;
407//      test = (test & (test - 1));
408//      cout << test << endl;
[2219]409    for (blk = 0; blk < blk_count; blk++) {
[3225]410        cout << "blk:" << blk << endl;
411   
[2219]412                ScanBlock s = stream[blk];
413                ScanBlock s2 = stream2[blk];
[3225]414//              cout << s << "  " << s2 << endl;
415//              print_register("s:",((BitBlock*)stream)[0]);
[2219]416                while(s | s2) {
[3225]417                        cout << "cfzl(s):" << cfzl(s)<<endl;
[2219]418                    int end_pos = cfzl(s) + block_pos;
[3225]419//                  cout << "s2 " << s2 << endl;
[2219]420                    int end_tag = cfzl(s2) + block_pos;
421//              cout << "cfzl(s):" << cfzl(s)<<endl;
422//              cout << "cfzl(s2):" << cfzl(s2)<<endl;
423//                      cout << (end_tag < end_pos && s2 != 0)<< "||" <<(s == 0 && s2 != 0)  << endl;
424                        if ((end_tag < end_pos && s2 != 0) | (s == 0 && s2 != 0)){
[3225]425                        cout << "here" << endl;
426                        cout << "closing tag" << endl;
[2219]427//                      cout << "endtag:" << end_tag << " " << end_pos<<endl;
428//                      cout << "1"<< endl;
429//                      cout << elem_vec_stack.size()<<endl;
430//                      cout << "2" << endl;
431//                      cout << elem_vec_stack.top()<<endl;
432                                if(!(gid_vec.empty())){
[3225]433//                                      if(gid_vec.back() != 0){
[2219]434                                                gid_vec.push_back(0);
[3225]435//                                      }
[2219]436                                }
[3225]437                                cout << "before" << s2 << endl;
438                                s2 = s2 & (s2 - 1);
439                                cout << "after" << s2 << endl;
440//                      cout << "s2 = " << s2 << endl;
[2219]441                }else{
[3225]442                        cout << " end_pos == " << end_pos<<endl;
443                        cout << " end_tag_pos == " << end_tag << endl;
444                        char gid = ElemEnd_grouping(source,block_base,end_pos);
445                        if (s!=0){
446                                s = s & (s-1);  // clear rightmost bit.
447                        }
448                        //else{
449                        //      s2 = s2 & (s2 - 1);
450                        //}
[2219]451                                if (gid != 0){
452                                        gid_vec.push_back(gid);                 
453                                }
[3225]454                                cout << "here1" << endl;
[2219]455                }
456                }
457                block_pos += 8 * sizeof(ScanBlock);
[3225]458                print_gid_vec();
[2219]459    }
460    return 0;
461}
462
463void print_gid_vec(){
464        for (int i = 0; i < gid_vec.size(); i++){
465                cout << (int)gid_vec[i] << " ";
466        }
467        cout << endl;
468/*     
469        int i = 1;
470        int lst[256] = {0};
471        cout << elem_vec_map.size() << " vectors" << endl;
472        for (int i = 0;i < gids.size(); i++){
473                if (lst[gids[i]] == 0){
474                        cout << "gid: " << gids[i] << endl;
475                        lst[gids[i]] = 1;
476                        vector<char> *tmp = elem_vec_map[gids[i]];
477                        cout << "vector size:"<< (*tmp).size()<<endl;
478                        if ((*tmp).size()!=0){
479                                cout << "elements: ";
480                                for (int j = 0; j < (*tmp).size();j++){
481                                        cout << (int)(*tmp)[j] << " ";
482                                }
483                                cout << endl;
484                        }
485                        cout << endl;
486                }
487        }
488*/
489}
[3225]490/*
[2219]491static inline int NameStrt_check(int pos) {
492        int block_pos = block_base + pos;
493        if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[block_pos]) == 0){
494              ReportError("name start error", pos);
495              exit(-1);
496        }
497        return 0;
498}
499
500static inline int Name_check(int pos) {
501        int block_pos = block_base + pos;
502        if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[block_pos]) == 0){
503              ReportError("name error", pos);
504              exit(-1);
505        }
506        return 0;
507}
508
509static inline int PIName_check(int pos) {
510        int block_pos = block_base + pos;
511        int file_pos = block_pos+buffer_base;
512        printf ("%s:%i\n",__FUNCTION__,pos);
513        if (at_XxMmLll<ASCII>((unsigned char*)&source[block_pos]) && (source[block_pos+3]=='?' || source[block_pos+3]<= ' ')) {
514              // "<?xml" legal at start of file.
515              if ((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0])) return 0;
516              ReportError("[Xx][Mm][Ll] illegal as PI name", pos);
517              exit(-1);
518        }
519        return 0;
520}
521
522static inline int CD_check(int pos) {
523        int block_pos = block_base + pos;
524        if (!at_CDATA1<ASCII>((unsigned char*)&source[block_pos])){
525              ReportError("CDATA error", pos);
526              exit(-1);
527        }
528        return 0;
529}
530
531static inline int GenRef_check(int pos) {
532        int block_pos = block_base + pos;
533        unsigned char* s = (unsigned char*)&source[block_pos];
534        if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
[3225]535                        cout << pos << endl;
[2219]536              ReportError("Undefined reference", pos);
537              exit(-1);
538        }
539        return 0;
540}
541
542static inline int HexRef_check(int pos) {
543        int block_pos = block_base + pos;
544        unsigned char* s = (unsigned char*)&source[block_pos];
545        int ch_val = 0;
546        while(at_HexDigit<ASCII>(s)){
547          ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
548          if (ch_val> 0x10FFFF ){
549            ReportError("Illegal character reference", pos);
550            exit(-1);
551          }
552          s++;
553        }
554        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
555          ReportError("Illegal character reference", pos);
556          exit(-1);
557        }
558        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
559          ReportError("Illegal XML 1.0 character reference", pos);
560          exit(-1);
561        }
562        return 0;
563}
564
565static inline int DecRef_check(int pos) {
566        int block_pos = block_base + pos;
567        unsigned char* s = (unsigned char*)&source[block_pos];
568        int ch_val = 0;
569        while(at_HexDigit<ASCII>(s)){
570          ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
571          if (ch_val> 0x10FFFF ){
572            ReportError("Illegal character reference", pos);
573            exit(-1);
574          }
575          s++;
576        }
577        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
578          ReportError("Illegal character reference", pos);
579          exit(-1);
580        }
581        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
582          ReportError("Illegal XML 1.0 character reference", pos);
583          exit(-1);
584        }
585        return 0;
586}
587
588static inline int AttRef_check(int pos) {
589        int block_pos = block_base + pos;
590        unsigned char* s = (unsigned char*)&source[block_pos];
591        int ch_val = 0;
592        if(s[0]=='#'){
593          s++;
594          if(s[0]=='x' || s[0]=='X'){
595            s++;
596            while(at_HexDigit<ASCII>(s)){
597              ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
598              s++;
599            }
600          }
601          else{
602            while(at_HexDigit<ASCII>(s)){
603              ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
604              s++;
605            }
606          }
607          if (ch_val==60){
608            ReportError("Attribute values contain '<' characters after reference expansion", pos);
609            exit(-1);
610          }
611        }
612        else if(at_Ref_lt<ASCII>(s)){
613          ReportError("Attribute values contain '<' characters after reference expansion", pos);
614          exit(-1);
615        }
616        return 0;
617}
[3225]618*/
[2219]619
620
621
622@global
623
[3225]624#include <simd-lib/transpose.hpp>
625#include <post_process.hpp>
626/*
[2219]627static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
628  s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
629        basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
630}
631
632static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
633  s2p_do_block(U8, basis_bits);
634  basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
635  basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
636  basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
637  basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
638  basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
639  basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
640  basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
641  basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
642}
[3225]643*/
[2219]644static inline void print_basis_bits(Basis_bits & basis_bits){
[3225]645        print_register("bit0:",basis_bits.bit_0);
646        print_register("bit1:",basis_bits.bit_1);
647        print_register("bit2:",basis_bits.bit_2);
648        print_register("bit3:",basis_bits.bit_3);
649        print_register("bit4:",basis_bits.bit_4);
650        print_register("bit5:",basis_bits.bit_5);
651        print_register("bit6:",basis_bits.bit_6);
652        print_register("bit7:",basis_bits.bit_7);                                       
[2219]653}
654
655static inline void print_elem_vec(vector<char>* vec, int size){
656        cout << "vector: ";
657        for (int i = 0; i<size ; i++){
658                cout << (int)(*vec)[i] <<" ";
659        }
660        cout << endl;
661}
662
663
664static inline void validate(){
665        struct Elem elem;
666        Validate_Elem_Vec validate_Elem_Vec;
667        cout << "vector size " << gid_vec.size() << endl;
668        int lgth = gid_vec.size();
669        int remaining = 0;
670        Basis_bits temp_basis_bits;
671        if (lgth != 0){
672                int i = 0;
673                int num_block = lgth /(BLOCK_SIZE *8 / VECTOR_WIDTH);
674                remaining = lgth %(BLOCK_SIZE *8 / VECTOR_WIDTH);
675                for (i = 0;i < num_block; i++){
676//                      s2p_do_block((BytePack*)(vector_11[i]),temp_basis_bits);
677                        s2p_do_block((BytePack*)(&(gid_vec[i])), temp_basis_bits);
678                        print_elem_vec(&(gid_vec),lgth);
679                        print_basis_bits(temp_basis_bits);
680//                      (BytePack*)(iter->second)
681                        validate_Elem_Vec.do_block(elem, temp_basis_bits);
682                        cout << i ;
683                }
684                cout << endl;
685                if (remaining !=0){
[3225]686                        BitBlock EOF_mask = bitblock::srl(simd<1>::constant<1>(),convert(BLOCK_SIZE-remaining));
[2219]687                        s2p_do_final_block((BytePack*)(&(gid_vec[i])), temp_basis_bits,EOF_mask);
688                        print_elem_vec(&(gid_vec),lgth);
689                        print_basis_bits(temp_basis_bits);
690                        validate_Elem_Vec.do_final_block(elem, temp_basis_bits, EOF_mask);
691                }
692        }
693/*
694        vector<int> test;
695        for(int i = 1;i <12; i++){
696                test.push_back(i);
697        }
698//      int test[] = {1,2,3,4,5,6,7,8,9,10,11};
699        Basis_bits temp_basis_bits;
700        BitBlock EOF_mask = sisd_srl(simd_const_1(1),sisd_from_int(BLOCK_SIZE-44));
701        s2p_do_final_block((BytePack*)(&(test[0])), temp_basis_bits,EOF_mask);
702        print_basis_bits(temp_basis_bits);
703*/
704/*
705        for (map<char, vector<char>* > ::const_iterator iter = elem_vec_map.begin(); iter!= elem_vec_map.end(); ++iter){
706                int i = 0;
707                int lgth = (*(iter->second)).size();
708                int remaining;
709                if (lgth != 0){
710                        cout <<"vector key "<< (int)iter->first << endl;
711//                      cout <<"vector ptr" << iter->second<<endl;
712               
713                        cout <<"vector size: " << lgth << endl;
714                        int num_block = lgth /(BLOCK_SIZE *8 / VECTOR_WIDTH);
715                        remaining = lgth %(BLOCK_SIZE *8 / VECTOR_WIDTH);
716                        for (i = 0;i < num_block; i++){
717                                Basis_bits temp_basis_bits;
718                                s2p_do_block(((BytePack*)(&(*(iter->second))[i])), temp_basis_bits);
719                                print_elem_vec(iter->second,lgth);
720                                print_basis_bits(temp_basis_bits);
721//                              (BytePack*)(iter->second)
722                                cout << i ;
723                        }
724                        cout << endl;
725                        if (remaining !=0){
726                                BitBlock EOF_mask = sisd_srl(simd_const_1(1),sisd_from_int(BLOCK_SIZE-remaining));
727                                Basis_bits temp_basis_bits;
728                                s2p_do_final_block(((BytePack*)(&(*(iter->second))[i])), temp_basis_bits,EOF_mask);
729                                print_elem_vec((iter->second)+i,lgth);
730                                print_basis_bits(temp_basis_bits);
731                        }
732                }                       
733        }
734*/
735}
736
[3225]737static inline void vectoring(Tag_Callouts & tag_Callouts, Hash_data & hash_data,char *source,int block_base){
738
739        elem_starts = tag_Callouts.ElemName_starts;
740    hashvalues[1] = hash_data.Hash_value;
741   
742        StreamScanLengthGrouping(source,block_base,(ScanBlock *) &tag_Callouts.ElemName_ends,(ScanBlock *) &tag_Callouts.Tag_closing, sizeof(BitBlock)/sizeof(ScanBlock));
743       
744        // Store the last starting position in case we hit boundary case
745    previous_block_last_elem_start = - count_reverse_zeroes(elem_starts);
746
747    //copy current hash value data as previous one.
748    memmove (&hashvalues[0], &hashvalues[1], 16);
749}
750/*
[2219]751static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail){
752
753
754    //NOTE: We are NOT handling the case for an extremely long symbol name (length > 1541).
755    //      TagMatcher will crash if we feed a long symbol name.
756    //      Sample file: test/long_sym_name.xml
757
758    tracker.StoreNewlines(lex.LF);
759    elem_starts = tag_Callouts.ElemName_starts;
760    hashvalues[1] = hash_data.Hash_value;
761
762        StreamScanLengthGrouping((ScanBlock *) &tag_Callouts.ElemName_ends,(ScanBlock *) &tag_Callouts.Tag_closing, sizeof(BitBlock)/sizeof(ScanBlock));
763       
764/*
765    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_1) )
766    {
767        StreamScanLengthGrouping<1>((ScanBlock *) &tag_Callouts.ElemName_ends_1,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
768    }
769
770    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_2) )
771    {
772        StreamScanLengthGrouping<2>((ScanBlock *) &tag_Callouts.ElemName_ends_2,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
773    }
774
775    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_3) )
776    {
777        StreamScanLengthGrouping<3>((ScanBlock *) &tag_Callouts.ElemName_ends_3,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
778    }
779
780    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_4) )
781    {
782        StreamScanLengthGrouping<4>((ScanBlock *) &tag_Callouts.ElemName_ends_4,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
783    }
784
785    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_5) )
786    {
787        StreamScanLengthGrouping<5>((ScanBlock *) &tag_Callouts.ElemName_ends_5,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
788    }
789
790    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_6) )
791    {
792        StreamScanLengthGrouping<6>((ScanBlock *) &tag_Callouts.ElemName_ends_6,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
793    }
794
795    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_7) )
796    {
797        StreamScanLengthGrouping<7>((ScanBlock *) &tag_Callouts.ElemName_ends_7,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
798    }
799
800    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_8) )
801    {
802        StreamScanLengthGrouping<8>((ScanBlock *) &tag_Callouts.ElemName_ends_8,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
803    }
804
805    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_9) )
806    {
807        StreamScanLengthGrouping<9>((ScanBlock *) &tag_Callouts.ElemName_ends_9,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
808    }
809
810    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_10) )
811    {
812        StreamScanLengthGrouping<10>((ScanBlock *) &tag_Callouts.ElemName_ends_10,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
813    }
814
815    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_11) )
816    {
817        StreamScanLengthGrouping<11>((ScanBlock *) &tag_Callouts.ElemName_ends_11,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
818    }
819
820    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_12) )
821    {
822        StreamScanLengthGrouping<12>((ScanBlock *) &tag_Callouts.ElemName_ends_12,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
823    }
824
825    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_13) )
826    {
827        StreamScanLengthGrouping<13>((ScanBlock *) &tag_Callouts.ElemName_ends_13,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
828    }
829
830    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_14) )
831    {
832        StreamScanLengthGrouping<14>((ScanBlock *) &tag_Callouts.ElemName_ends_14,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
833    }
834
835    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_15) )
836    {
837        StreamScanLengthGrouping<15>((ScanBlock *) &tag_Callouts.ElemName_ends_15,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
838    }
839
840    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_16) )
841    {
842        StreamScanLengthGrouping<16>((ScanBlock *) &tag_Callouts.ElemName_ends_16,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
843    }
844
845    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_17_and_longer) )
846    {
847        StreamScanLengthGrouping<17>((ScanBlock *) &tag_Callouts.ElemName_ends_17_and_longer,(ScanBlock *) &tag_Callouts.EndTag_marks, sizeof(BitBlock)/sizeof(ScanBlock));
848    }
849
850*/
851
852
[3225]853/*
[2219]854    // Store the last starting position in case we hit boundary case
[3225]855    previous_block_last_elem_start = - count_reverse_zeroes(elem_starts);
[2219]856
857    //copy current hash value data as previous one.
858    memmove (&hashvalues[0], &hashvalues[1], 16);
859
[3225]860    if (bitblock::any(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
[2219]861      StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check);
862      StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check);
863    }
864
[3225]865    if (bitblock::any(ctCDPI_Callouts.PI_name_starts)){
[2219]866      StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check);
867    }
868
[3225]869    if (bitblock::any(ctCDPI_Callouts.CD_starts)){
[2219]870      StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check);
871    }
872
[3225]873    if (bitblock::any(ref_Callouts.GenRef_starts)){
[2219]874      StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check);
875    }
876
[3225]877    if (bitblock::any(ref_Callouts.DecRef_starts)){
[2219]878      StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check);
879    }
880
[3225]881    if (bitblock::any(ref_Callouts.HexRef_starts)){
[2219]882      StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check);
883    }
884
[3225]885    if (bitblock::any(check_streams.att_refs)){
[2219]886      StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check);
887    }
888
[3225]889//    error_tracker.If_Error_Report_First();
890/*
[2219]891    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
892    tracker.AdvanceBlock();
[3225]893    */
894    /*
[2219]895}
[3225]896*/
[2219]897
898static inline void print_GIDS()
899{
900    int span_count = gids.size();
901    for(int i=0;i<span_count;i++) {
902             cout << gids[i] << " ";
903    }
904    cout << endl;
905}
906
907static inline int test(int)
908{
909    return 0;
910}
911
912void do_process(FILE *infile, FILE *outfile) {
913
[3225]914        ///////////////
915        //gid file
916        ///////////////
917        gid_writein = fopen("gid.out","w");
[2219]918@decl
[3225]919        #ifndef STL_ALIGNED_VECTOR
920                TagMatcher<SEGMENT_SIZE,LOOKAHEAD_SIZE> matcher;
921        #endif
922        ///////////////////////////////////////////////////
923        //preload element_IDs into symbol table from schema
924        ///////////////////////////////////////////////////
925        FILE *element_ID_in = fopen("../test/element_ID","r");
[3229]926        char symbol[50];
[3225]927        int index;
928        if(element_ID_in == NULL){
929                printf("Error opening element_ID file.\n");
930                exit(0);
931        }
932        hashvalues[1] = hash_data.Hash_value;
[3229]933        while(fscanf(element_ID_in,"%s %d\n",&symbol[0],&index)==2){
934                int lgth = strlen(symbol);
935                int hashvalue = compute_hash_value2(symbol,lgth);
936                cout << symbol <<" "<< lgth<<" "<<hashvalue << endl;
[3225]937                int gid = 0;
938                switch (lgth)
939        {
940                        case 1:
[3229]941                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<1>(symbol, hashvalue);
[3225]942                        break;
943                        case 2:
[3229]944                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<2>(symbol, hashvalue);
[3225]945                        break;
946                        case 3:
[3229]947                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<3>(symbol, hashvalue);
[3225]948                        break;
949                        case 4:
[3229]950                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<4>(symbol, hashvalue);
[3225]951                        break;
952                        case 5:
[3229]953                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<5>(symbol, hashvalue);
[3225]954                        break;
955                        case 6:
[3229]956                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<6>(symbol, hashvalue);
[3225]957                        break;
958                        case 7:
[3229]959                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<7>(symbol, hashvalue);
[3225]960                        break;
961                        case 8:
[3229]962                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<8>(symbol, hashvalue);
[3225]963                        break;
964                        case 9:
[3229]965                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<9>(symbol, hashvalue);
[3225]966                        break;
967                        case 10:
[3229]968                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<10>(symbol, hashvalue);
[3225]969                        break;
970                        case 11:
[3229]971                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<11>(symbol, hashvalue);
[3225]972                        break;
973                        case 12:
[3229]974                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<12>(symbol, hashvalue);
[3225]975                        break;
976                        case 13:
[3229]977                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<13>(symbol, hashvalue);
[3225]978                        break;
979                        case 14:
[3229]980                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<14>(symbol, hashvalue);
[3225]981                        break;
982                        case 15:
[3229]983                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<15>(symbol, hashvalue);
[3225]984                        break;
985                        case 16:
[3229]986                        gid = pbgs_symbol_table.Lookup_or_Insert_Name<16>(symbol, hashvalue);
[3225]987                        break;
988                        default:
[3229]989                        gid = pbgs_symbol_table.Lookup_or_Insert_Name(symbol, hashvalue, lgth);
[3225]990                        break;         
991                }
[3229]992                fprintf (gid_writein,"%s |  lgth: %i | hashvalue: %i | gid: %i | symbol: %s\n", __FUNCTION__,  lgth, hashvalue, gid, symbol);
[3225]993        }       
994        fclose(element_ID_in);
[3229]995//      return;
[2219]996
[3225]997        int buffer_base = 0;
998        int block_base = 0;
999        int buffer_pos = 0;
1000        int block_pos = 0;
1001        int errpos = 0;
1002        int chars_avail = 0;
1003        int check_pos = 0;
1004        int chars_read = 0;
1005 
1006        //////////////////////////////////////////////////////////////////////////////////////////
1007    // Buffer Management
1008    //////////////////////////////////////////////////////////////////////////////////////////
1009                //      BitBlock buf[(BUFFER_SIZE)/sizeof(BitBlock)];
1010                //      uint8_t * src_buffer = (uint8_t *)buf + COPYBACK_SIZE;
1011
1012                #ifdef STL_ALIGNED_VECTOR
1013                        std::vector<uint8_t, AAllocator<uint8_t> > src_buffer;
1014                        src_buffer.reserve(BUFFER_SIZE);
1015                #else
1016                        uint8_t * COPYBACK;
1017                        uint8_t * src_buffer;
1018                        ALLOC_STATIC_ALIGNED_BYTE_BUFFER_WITH_COPYBACK(COPYBACK, src_buffer);
1019                #endif
1020  /*
1021  BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+LOOKAHEAD_SIZE*2)/sizeof(SIMD_type)];
1022
1023  char * srcbuf = ((char *) buf) + LOOKAHEAD_SIZE;
[2219]1024  buffer_base = buf_pos;
1025  source = srcbuf;
[3225]1026  */
1027  chars_read = fread((void *)&src_buffer[0], 1, SEGMENT_SIZE, infile);
[2219]1028  chars_avail = chars_read;
[3225]1029  if (chars_avail > BUFFER_SIZE) chars_avail = SEGMENT_SIZE;
[2219]1030
1031  if(chars_read<4){
1032    fprintf(stderr,"File is too short. Not well formed.\n");
1033    exit(-1);
1034  }
1035
1036  Entity_Info * e = new Entity_Info;
[3225]1037  e->AnalyzeSignature((unsigned char *)&src_buffer[0]);
[2219]1038
1039  if (e->code_unit_base == ASCII) {
1040
[3225]1041    XML_Decl_Parser<ASCII> decl_parser((unsigned char *)&src_buffer[0]);
[2219]1042
1043    decl_parser.ReadXMLInfo(*e);
1044
1045    if (e->code_unit_size != SingleByte || (e->has_encoding_decl && (!at_UTF_8(e->encoding)))){
1046        fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n");
1047        exit(-1);
1048    }
1049  }
1050  else {
1051    fprintf(stderr,"Sorry, this xmlwf demo does not process EBCDIC.\n");
1052        exit(-1);
1053  }
1054
1055  if (e->content_start != 0) {
[3225]1056        memmove(&src_buffer[0], &src_buffer[e->content_start], chars_read - e->content_start);
1057        buffer_pos = e->content_start;
1058//      buffer_base = buffer_pos;
1059        if ((chars_avail-e->content_start) < SEGMENT_SIZE) {
1060                        chars_read = chars_avail - e->content_start + fread(&src_buffer[chars_avail-e->content_start], 1, e->content_start, infile);
1061                        chars_avail = chars_read;
1062                }
1063                if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
[2219]1064  }
1065
[3225]1066        //////////////////////////////////////////////////////////////////////////////////////////
1067        // Read OVERLAP bytes to support post processing validation lookahead.
1068        //////////////////////////////////////////////////////////////////////////////////////////
1069        chars_read = chars_avail + fread(&src_buffer[chars_avail], 1, LOOKAHEAD_SIZE, infile);
1070        chars_avail = chars_read;
1071        if (chars_avail >= SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
1072
[2219]1073@stream_stmts
1074
[3225]1075        #ifndef STL_ALIGNED_VECTOR
1076        matcher.setSrc((char *)&src_buffer[0]);
1077        #endif
1078
[2219]1079/* Full Buffers */
1080    int block_segment_num = 0;
[3225]1081    while (chars_avail >= SEGMENT_SIZE) {
[2219]1082      PERF_SEC_START(parser_timer);
1083      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
[3225]1084        cout << "loops" << ends;
1085                block_base = blk*BLOCK_SIZE;
1086        s2p_do_block((BytePack *) &src_buffer[block_base], basis_bits);
1087     
1088                @block_stmts
1089         
1090                tracker.StoreNewlines(lex.LF);
1091                cout << "post start" << endl;
1092//              postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
1093                postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)&src_buffer[0], buffer_base, block_base, chars_avail, tracker);
1094                vectoring(tag_Callouts,hash_data,(char *)&src_buffer[0],block_base);
1095                cout << "post done" << endl;
1096                #ifndef STL_ALIGNED_VECTOR
1097                matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
1098                #endif
1099                tracker.AdvanceBlock();
1100                cout << "loop" << endl;
[2219]1101      }
[3225]1102      #ifndef STL_ALIGNED_VECTOR
[2219]1103      matcher.StreamScan(chars_avail);
1104      matcher.Advance_buffer();
[3225]1105      #endif
1106      cout << "11111" << endl;
[2219]1107      PERF_SEC_END(parser_timer, chars_avail);
[3225]1108          cout << "here" << endl;
[2219]1109      int bytes_left = chars_read - chars_avail;
[3225]1110      //////////////////
1111      //element name buffer, for scanning element name backwards
1112      /////////////////
1113      memmove(&element_name_buffer[0],&src_buffer[SEGMENT_SIZE-ELEMENT_BUFFER_SIZE],ELEMENT_BUFFER_SIZE);
1114      cross_buffer_flag = 1;
1115     
1116      memmove(&src_buffer[0], &src_buffer[SEGMENT_SIZE], LOOKAHEAD_SIZE);
1117      chars_read = fread(&src_buffer[LOOKAHEAD_SIZE],1,SEGMENT_SIZE, infile) + LOOKAHEAD_SIZE;
[2219]1118      chars_avail = chars_read;
[3225]1119      cout << "here" << endl;
1120      if (chars_avail > SEGMENT_SIZE) chars_avail = SEGMENT_SIZE;
1121      buffer_pos += chars_avail;
1122      buffer_base = buffer_pos;
[2219]1123      block_segment_num++;
1124
1125  }
1126       
1127/* Final Partial Buffer */
1128    PERF_SEC_START(parser_timer);
1129
1130    block_pos = 0;
1131    int remaining = chars_avail;
1132
1133
1134
1135/* Full Blocks */
1136    while (remaining >= BLOCK_SIZE) {
1137          block_base = block_pos;
[3225]1138          s2p_do_block((BytePack *) &src_buffer[block_pos], basis_bits);
1139         
[2219]1140          @block_stmts
[3225]1141         
1142          tracker.StoreNewlines(lex.LF);
1143          cout << "post start1" << endl;
1144         
1145//        postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
1146          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)&src_buffer[0], buffer_base, block_base, chars_avail, tracker);
1147          vectoring(tag_Callouts,hash_data,(char *)&src_buffer[0],block_base);
1148          cout << "post end2" << endl;
1149          #ifndef STL_ALIGNED_VECTOR
1150          matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
1151          #endif
1152          tracker.AdvanceBlock();
[2219]1153          block_pos += BLOCK_SIZE;
1154          remaining -= BLOCK_SIZE;
1155    }
1156    block_base = block_pos;
[3225]1157        #ifdef USE_ANY_CARRY_TEST
[2219]1158    if (remaining > 0 || @any_carry) {
[3225]1159        #endif
1160          EOF_mask = bitblock::srl(simd<1>::constant<1>(),convert(BLOCK_SIZE-remaining));
1161          s2p_do_final_block((BytePack *) &src_buffer[block_pos], basis_bits, EOF_mask);
[2219]1162          @final_block_stmts
[3225]1163          cout << "post start3" << endl;
1164//        postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
1165          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, (char *)&src_buffer[0], buffer_base, block_base, chars_avail, tracker);
1166          vectoring(tag_Callouts,hash_data,(char *)&src_buffer[0],block_base);
1167          cout << "post end3" << endl;
1168          #ifndef STL_ALIGNED_VECTOR
1169      matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
1170          #endif
1171          tracker.AdvanceBlock();
1172        #ifdef USE_ANY_CARRY_TEST
[2219]1173    }
[3225]1174    #endif
1175    buffer_pos += chars_avail;
1176    buffer_base = buffer_pos;
1177        #ifndef STL_ALIGNED_VECTOR
[2219]1178    matcher.StreamScan(chars_avail);
1179    matcher.Advance_buffer();
[3225]1180//    tracker.Advance_buffer();
[2219]1181
[3225]1182   
[2219]1183    if (matcher.depth != 0) {
1184      fprintf(stderr, "tag matching error (depth %i) at position %i\n", matcher.depth, buffer_base);
1185      exit(-1);
[3225]1186     
1187     
[2219]1188    }
[3225]1189    #endif
1190    PERF_SEC_END(parser_timer, chars_avail);
[2219]1191//  print_GIDS();
1192//    pbgs_symbol_table.Print_Symbol_Table_Distribution();
1193}
1194
1195
1196
1197int
1198main(int argc, char * argv[]) {
1199        char * infilename, * outfilename;
1200        FILE *infile, *outfile;
1201        struct stat fileinfo;
1202
1203        if (argc < 2) {
1204                printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
1205                exit(-1);
1206        }
1207
1208        infilename = argv[1];
1209        stat(infilename, &fileinfo);
1210        infile = fopen(infilename, "rb");
1211        if (!infile) {
1212                fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
1213                exit(-1);
1214        }
1215
1216        if (argc < 3) outfile = stdout;
1217        else {
1218                outfilename = argv[2];
1219                outfile = fopen(outfilename, "wb");
1220                if (!outfile) {
1221                        fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
1222                        exit(-1);
1223                }
1224        }
1225
1226//      PERF_SEC_BIND(1);
1227
1228        PERF_SEC_INIT(parser_timer);
1229
1230        do_process(infile, outfile);
1231       
1232#if DEBUG
1233print_gid_vec();
1234#endif
[3225]1235        //cout << "validate start" << endl;
[2219]1236        validate();
1237
1238        PERF_SEC_DUMP(parser_timer);
1239
1240        PERF_SEC_DESTROY(parser_timer);
1241
1242        fclose(infile);
1243        fclose(outfile);
1244
1245        printf ("Done procressing\n");
1246        return(0);
1247}
Note: See TracBrowser for help on using the repository browser.