Changeset 72 for trunk/src/bitlex.h


Ignore:
Timestamp:
Mar 27, 2008, 9:26:16 AM (11 years ago)
Author:
cameron
Message:

Refactored Lexer and friends

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/bitlex.h

    r59 r72  
    1 /*  bitlex.h - parabix lexical analysis (bit streams)
     1/*  bitlex.h - Lexical Item Stream Module.
    22    Copyright (c) 2007, 2008, Robert D. Cameron.
    33    Licensed to the public under the Open Software License 3.0.
     
    99#define BITLEX_H
    1010
    11 #include "xmlparam.h"
    12 #include "xmlbuffer.h"
     11#include "xmlmodel.h"
     12#include "byteplex.h"
     13#include "bitplex.h"
     14#include "xmldecl.h"
    1315
    1416/* Lexical items are particular characters, character classes
     
    4244   items.
    4345
    44    A BitBlockBasis is a set of 8 parallel bit blocks for
    45    that represent a block of 8-bit code units in bit-parallel
    46    form. */
     46*/
    4747
    48 struct BitBlockBasis {
    49         BitBlock bit[8];
    50 };
    5148
    5249/* A BitStreamBuffer is a bit stream of BUFFER_BLOCKS consecutive
     
    6360class Lexer_Interface {
    6461public:
    65         Lexer_Interface(XML_Buffer_Interface *b, LexicalStreamSet *l);
     62        Lexer_Interface(Model_Info * m, LexicalStreamSet *l);
    6663        ~Lexer_Interface();
    67         void AdvanceBuffer(int& base_pos, int& rel_pos, int& limit_pos);
     64        void AnalyzeBuffer(BitBlockBasis * x8basis, int buffer_limit_pos);
    6865
    6966protected:
    70         XML_Buffer_Interface *xml_buf;
    71         void TransposeToBitStreams();
     67        Model_Info * model_info;
    7268        virtual void Do_XML_10_WS_Control() = 0;
    7369        virtual void Do_MarkupStreams() = 0;
     
    7672        int lexer_base_pos;
    7773        BitBlockBasis * x8basis;
     74        BitBlock * validation_stream;
    7875        LexicalStreamSet * parsing_engine_data;
    7976        int buffer_units;
     
    8481class Lexer : public Lexer_Interface {
    8582public:
    86         static Lexer_Interface * LexerFactory(XML_Buffer_Interface *b, LexicalStreamSet *l);
     83        static Lexer_Interface * LexerFactory(Model_Info * m,LexicalStreamSet *l);
    8784
    8885protected:
    89         Lexer(XML_Buffer_Interface *b, LexicalStreamSet *l);
     86        Lexer(Model_Info * m,LexicalStreamSet *l);
    9087        void Do_XML_10_WS_Control();
    9188        void Do_MarkupStreams();
     
    9693class UTF_8_Lexer : public Lexer<ASCII> {
    9794public:
    98         UTF_8_Lexer(XML_Buffer_Interface *b, LexicalStreamSet *l);
     95        UTF_8_Lexer(Model_Info * m,LexicalStreamSet *l);
    9996        void Do_XML_11_WS_Control();
    10097        void Do_CharsetValidation();
     
    103100class ASCII_7_Lexer : public Lexer<ASCII> {
    104101public:
    105         ASCII_7_Lexer(XML_Buffer_Interface *b, LexicalStreamSet *l);
     102        ASCII_7_Lexer(Model_Info * m,LexicalStreamSet *l);
    106103        void Do_XML_11_WS_Control();
    107104        void Do_CharsetValidation();
     
    110107class EASCII_8_Lexer : public Lexer<ASCII> {
    111108public:
    112         EASCII_8_Lexer(XML_Buffer_Interface *b, LexicalStreamSet *l);
     109        EASCII_8_Lexer(Model_Info * m,LexicalStreamSet *l);
    113110        void Do_XML_11_WS_Control();
    114111        void Do_CharsetValidation();
     
    121118class U16_Lexer : public Lexer<ASCII> {
    122119public:
    123         U16_Lexer(XML_Buffer_Interface *b, LexicalStreamSet *l);
     120        U16_Lexer(Model_Info * m,LexicalStreamSet *l);
    124121        void Do_XML_11_WS_Control();
    125122        virtual void Do_CharsetValidation() = 0;
     
    128125class UTF_16_Lexer : public U16_Lexer {
    129126public:
    130         UTF_16_Lexer(XML_Buffer_Interface *b, LexicalStreamSet *l);
     127        UTF_16_Lexer(Model_Info * m,LexicalStreamSet *l);
    131128        void Do_CharsetValidation();
    132129};
     
    134131class UCS_2_Lexer : public U16_Lexer {
    135132public:
    136         UCS_2_Lexer(XML_Buffer_Interface *b, LexicalStreamSet *l);
     133        UCS_2_Lexer(Model_Info * m,LexicalStreamSet *l);
    137134        void Do_CharsetValidation();
    138135};
     
    140137class UTF_32_Lexer : public Lexer<ASCII> {
    141138public:
    142         UTF_32_Lexer(XML_Buffer_Interface *b, LexicalStreamSet *l);
     139        UTF_32_Lexer(Model_Info * m,LexicalStreamSet *l);
    143140        void Do_XML_11_WS_Control();
    144141        void Do_CharsetValidation();
     
    147144class EBCDIC_Lexer: public Lexer<EBCDIC> {
    148145public:
    149         EBCDIC_Lexer(XML_Buffer_Interface *b, LexicalStreamSet *l);
     146        EBCDIC_Lexer(Model_Info * m,LexicalStreamSet *l);
    150147        void Do_XML_11_WS_Control();
    151148        void Do_CharsetValidation();
     
    153150
    154151
    155 #ifdef BUFFER_PROFILING
    156 #include "../Profiling/BOM_Profiler.c"
    157 BOM_Table * bitstream_timer;
    158 BOM_Table * lextranspose_timer;
    159 BOM_Table * scanner_timer;
    160152#endif
    161 
    162 
    163 #endif
Note: See TracChangeset for help on using the changeset viewer.