Changeset 38 for trunk


Ignore:
Timestamp:
Feb 10, 2008, 6:22:44 AM (12 years ago)
Author:
cameron
Message:

parabix-0.40 (prelim)

Location:
trunk
Files:
5 deleted
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/Makefile

    r19 r38  
    22CFLAGS= -O3 -DBUFFER_PROFILING
    33OS = $(shell uname)
     4ICONV_FLAGS=
     5ifeq ($(OS),Darwin)
     6        ICONV_FLAGS=-liconv
     7endif
    48
    59markup_stats:   markup_stats.cxx src/xmlbuffer.h src/ilax.h src/multiliteral.h src/engine.h\
    610                src/transpose.h src/bitlex.h src/xmlbuffer.c src/bitlex.c src/engine.c\
    7                 src/charsets/charset_family.h src/charsets/ext_ascii_8.h src/charsets/ext_ascii_8.c\
    8                 src/charsets/ext_ascii_16.h src/charsets/ext_ascii_16.c lib/sse_simd.h\
     11                lib/sse_simd.h\
    912                src/bytelex.h src/charsets/ASCII_EBCDIC.h
    1013        $(CC) -o markup_stats markup_stats.cxx -march=pentium4
     
    1215markup_stats.s: markup_stats.cxx src/xmlbuffer.h src/ilax.h src/multiliteral.h src/engine.h\
    1316                src/transpose.h src/bitlex.h src/xmlbuffer.c src/bitlex.c src/engine.c\
    14                 src/charsets/charset_family.h src/charsets/ext_ascii_8.h src/charsets/ext_ascii_8.c\
    15                 src/charsets/ext_ascii_16.h src/charsets/ext_ascii_16.c lib/sse_simd.h\
     17                lib/sse_simd.h\
    1618                src/bytelex.h src/charsets/ASCII_EBCDIC.h
    1719        $(CC) -o markup_stats.s markup_stats.cxx -march=pentium4 -S -fverbose-asm
    1820
     21markup_stats_g4:         markup_stats.cxx src/xmlbuffer.h src/ilax.h src/multiliteral.h src/engine.h\
     22                src/transpose.h src/bitlex.h src/xmlbuffer.c src/bitlex.c src/engine.c\
     23                lib/altivec_simd.h\
     24                src/bytelex.h src/charsets/ASCII_EBCDIC.h
     25        $(CC) -o -markup_stats markup_stats.cxx -maltivec -faltivec
  • trunk/markup_stats.cxx

    r19 r38  
    1515#include <unistd.h>
    1616
     17#include "src/ilax.h"
     18#include "src/engine.h"
     19
     20
     21/* Internals */
    1722#include "src/xmlbuffer.h"
    1823#include "src/xmlbuffer.c"
    19 
    20 #include "src/charsets/charset_family.h"
    2124#include "src/bitlex.h"
    2225#include "src/bitlex.c"
    23 #include "src/charsets/ext_ascii_8.h"
    24 #include "src/charsets/ext_ascii_8.c"
    25 #include "src/charsets/ext_ascii_16.h"
    26 #include "src/charsets/ext_ascii_16.c"
    27 
    28 #include "src/ilax.h"
    29 #include "src/engine.h"
    3026#include "src/engine.c"
    3127
    3228/* Global declarations for statistics. */
    3329
    34   int comment_count = 0;
    35   int comment_length = 0;
    36   int CDATA_count = 0;
    37   int CDATA_length = 0;
    38   int PI_count = 0;
    39   int PI_length = 0;
    40   int empty_elem_count = 0;
    41   int empty_elem_length = 0;
    42   int start_tag_count = 0;
    43   int start_tag_length = 0;
    44   int attribute_count = 0;
    45   int end_tag_count = 0;
    46   int end_tag_length = 0;
    47   int reference_count = 0;
    48   int reference_length = 0;
    49   int text_item_count = 0;
    50   int text_item_length = 0;
    51   int error_item_count = 0;
    52   int error_item_length = 0;
    53   int nesting_depth = 0;
    54   int max_nesting_depth = 0;
    55   int total_attribute_count = 0;
    56   int total_att_name_length = 0;
    57   int total_att_value_length = 0;
    58   int namespace_count = 0;
    59   int total_namespace_name_length = 0;
    60   int total_namespace_URI_length = 0;
     30int comment_count = 0;
     31int comment_length = 0;
     32int CDATA_count = 0;
     33int CDATA_length = 0;
     34int PI_count = 0;
     35int PI_length = 0;
     36int empty_elem_count = 0;
     37int empty_elem_length = 0;
     38int start_tag_count = 0;
     39int start_tag_length = 0;
     40int attribute_count = 0;
     41int end_tag_count = 0;
     42int end_tag_length = 0;
     43int reference_count = 0;
     44int reference_length = 0;
     45int text_item_count = 0;
     46int text_item_length = 0;
     47int error_item_count = 0;
     48int error_item_length = 0;
     49int nesting_depth = 0;
     50int max_nesting_depth = 0;
     51int total_attribute_count = 0;
     52int total_att_name_length = 0;
     53int total_att_value_length = 0;
     54int namespace_count = 0;
     55int total_namespace_name_length = 0;
     56int total_namespace_URI_length = 0;
    6157
    6258
    6359/* Action routine for an XML comment in "<!--"  "-->" brackets. */
     60//template <CodeUnit_Base C>
    6461static inline void Comment_action(int start_pos, int end_pos) {
    65   comment_count +=1;
    66   comment_length += end_pos - start_pos;
     62        comment_count +=1;
     63        comment_length += end_pos - start_pos;
    6764}
    6865
    6966/* Action routine for a CDATA section enclosed in "<![CDATA[" and "]]>" brackets. */
     67//template <CodeUnit_Base C>
    7068static inline void CDATA_action(int start_pos, int end_pos) {
    71   CDATA_count +=1;
    72   CDATA_length += end_pos - start_pos;
     69        CDATA_count +=1;
     70        CDATA_length += end_pos - start_pos;
    7371}
    7472
    7573/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
     74//template <CodeUnit_Base C>
    7675static inline void PI_action(int start_pos, int end_pos) {
    77   PI_count +=1;
    78   PI_length += end_pos - start_pos;
     76        PI_count +=1;
     77        PI_length += end_pos - start_pos;
    7978}
    8079
    8180/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
     81//template <CodeUnit_Base C>
    8282static inline void EmptyElement_action(int start_pos, int end_pos) {
    83   empty_elem_count +=1;
    84   empty_elem_length += end_pos - start_pos;
     83        empty_elem_count +=1;
     84        empty_elem_length += end_pos - start_pos;
    8585}
    8686
    8787/* Action routine for a start tag enclosed in "<" and ">" brackets. */
     88//template <CodeUnit_Base C>
    8889static inline void StartTag_action(int start_pos, int end_pos) {
    89   start_tag_count +=1;
    90   start_tag_length += end_pos - start_pos;
    91   nesting_depth += 1;
    92   if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
     90        start_tag_count +=1;
     91        start_tag_length += end_pos - start_pos;
     92        nesting_depth += 1;
     93        if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
    9394}
    9495
    9596/* Action routine for an end tag enclosed in "</" and ">" brackets. */
     97//template <CodeUnit_Base C>
    9698static inline void EndTag_action(int start_pos, int end_pos) {
    97   end_tag_count +=1;
    98   end_tag_length += end_pos - start_pos;
    99   nesting_depth -= 1;
     99        end_tag_count +=1;
     100        end_tag_length += end_pos - start_pos;
     101        nesting_depth -= 1;
    100102}
    101103
    102104/* Action routine for an error item */
     105//template <CodeUnit_Base C>
    103106static inline void Error_action(int start_pos, int end_pos) {
    104   error_item_count +=1;
    105   error_item_length += end_pos - start_pos;
    106   printf("Error: illegal markup at positions %i through %i.\n", start_pos, end_pos);
     107        error_item_count +=1;
     108        error_item_length += end_pos - start_pos;
     109        printf("Error: illegal markup at positions %i through %i.\n", start_pos, end_pos);
    107110}
    108111
    109112/* Action routine for a text item */
     113//template <CodeUnit_Base C>
    110114static inline void Text_action(int start_pos, int end_pos) {
    111   text_item_count +=1;
    112   text_item_length += end_pos - start_pos;
    113 }
    114 
     115        text_item_count +=1;
     116        text_item_length += end_pos - start_pos;
     117}
     118
     119//template <CodeUnit_Base C>
    115120static inline void Reference_action(int start_pos, int end_pos) {
    116   reference_count +=1;
    117   reference_length += end_pos - start_pos;
     121        reference_count +=1;
     122        reference_length += end_pos - start_pos;
    118123}
    119124
     
    141146/* Action routine for an element name occurring immediately after the
    142147   opening "<" of a start tag or empty element tag. */
     148//template <CodeUnit_Base C>
    143149static inline void ElementName_action(int start_pos, int end_pos) {
    144150}
     
    146152/* Action routine for a processing instruction target name occurring immediately
    147153   after the opening "<?" of a processing instruction. */
     154//template <CodeUnit_Base C>
    148155static inline void PI_Target_action(int start_pos, int end_pos) {
    149156}
     
    151158/* Action routine for an individual attribute/value pair occurring in
    152159   a element start tag or an empty element tag. */
     160//template <CodeUnit_Base C>
    153161static inline void AttributeValue_action(int name_start, int name_end,
    154162                                  int val_start, int val_end) {
    155   total_attribute_count+=1;
    156   total_att_name_length += name_end - name_start;
    157   total_att_value_length += val_end - val_start;
     163        total_attribute_count+=1;
     164        total_att_name_length += name_end - name_start;
     165        total_att_value_length += val_end - val_start;
    158166}
    159167
    160168/* Action routine for an individual attribute/value pair occurring in
    161169   a element start tag or an empty element tag. */
     170//template <CodeUnit_Base C>
    162171static inline void Namespace_action(int name_start, int name_end,
    163172                             int URI_start, int URI_end) {
    164   namespace_count+=1;
    165   total_namespace_name_length += name_end - name_start;
    166   total_namespace_URI_length += URI_end - URI_start;
    167 }
    168 
    169 
     173        namespace_count+=1;
     174        total_namespace_name_length += name_end - name_start;
     175        total_namespace_URI_length += URI_end - URI_start;
     176}
     177
     178
     179//template <CodeUnit_Base C>
    170180static inline void FinalizeBuffer_action() {
    171181}
     
    173183
    174184#define print_stats(stat_string, count, total_lgth) \
    175   printf("%i %s", count, stat_string);\
    176   if (count == 0) printf("s.\n");\
    177   else if (count == 1) printf(" of length %i.\n", total_lgth);\
    178   else printf("s of avg. lgth %i.\n", total_lgth/count);
     185        printf("%i %s", count, stat_string);\
     186        if (count == 0) printf("s.\n");\
     187        else if (count == 1) printf(" of length %i.\n", total_lgth);\
     188        else printf("s of avg. lgth %i.\n", total_lgth/count);
    179189
    180190
    181191int
    182192main(int argc, char * argv[]) {
    183   if (argc != 2) {
    184     printf("Usage: %s <filename>\n", argv[0]);
    185           exit(-1);
    186   }
    187   char * filename = argv[1];
    188 
    189   Entity_Declaration_Info xml_info;
    190 
    191   ParsingEngine parser = ParsingEngine(filename);
    192   parser.InitLexer();
    193 
    194   parser.ReadXmlInfo(xml_info);
    195   printf("has_ByteOrderMark = %i\n", xml_info.has_ByteOrderMark);
    196   if (xml_info.has_version_decl) {
    197     printf("XML version 1.%i declared.\n", xml_info.version);
    198   }
    199   else printf ("XML version 1.0 implied by default.\n");
    200   if (xml_info.has_encoding_decl) {
    201     printf("XML encoding named at positions %i through %i\n",
    202            xml_info.encoding_start_pos, xml_info.encoding_end_pos);
    203   }
    204   if (xml_info.has_standalone_decl) {
    205     printf("XML standalone = %i declared.\n", xml_info.standalone);
    206   }
    207   else printf ("XML standalone = 0 by default.\n");
    208 
    209   parser.ParseContent();
    210 
    211   print_stats("comment", comment_count, comment_length);
    212   print_stats("CDATA section", CDATA_count, CDATA_length);
    213   print_stats("processing instruction", PI_count, PI_length);
    214   print_stats("empty element", empty_elem_count, empty_elem_length);
    215   print_stats("start tag", start_tag_count, start_tag_length);
    216   printf("%i total attributes\n", attribute_count);
    217   print_stats("attribute name", total_attribute_count, total_att_name_length);
    218   print_stats("attribute value", total_attribute_count, total_att_value_length);
    219   print_stats("namespace name", namespace_count, total_namespace_name_length);
    220   print_stats("namespace URI", namespace_count, total_namespace_URI_length);
    221   print_stats("end tag", end_tag_count, end_tag_length);
    222   print_stats("text item", text_item_count, text_item_length);
    223   print_stats("reference", reference_count, reference_length);
    224   print_stats("error item", error_item_count, error_item_length);
    225   printf("Maximum nesting depth = %i\n", max_nesting_depth);
    226 
    227   return(0);
    228 }
     193        if (argc != 2) {
     194        printf("Usage: %s <filename>\n", argv[0]);
     195                exit(-1);
     196        }
     197        char * filename = argv[1];
     198       
     199       
     200        Parser_Interface * parser = Parser_Interface::ParserFactory(filename);
     201       
     202       
     203        if (!parser->has_ByteOrderMark()) printf("No ");
     204        printf("Byte Order Mark found.\n");
     205        if (parser->get_version() == XML_1_0) printf("XML version 1.0 declared.\n");
     206        else if (parser->get_version() == XML_1_1) printf("XML version 1.1 declared.\n");
     207        else printf ("XML version 1.0 implied by default.\n");
     208        if (parser->has_EncodingDecl()) {
     209        printf("XML encoding named at positions %i of length %i\n",
     210                parser->get_Encoding_pos(), parser->get_Encoding_lgth());
     211        }
     212        if (parser->standalone_status() == Standalone_yes)
     213                printf("XML standalone = yes declared.\n");
     214        else if (parser->standalone_status() == Standalone_no)
     215                printf("XML standalone = no declared.\n");
     216        else printf ("XML standalone = no by default.\n");
     217       
     218        parser->ParseContent();
     219       
     220        print_stats("comment", comment_count, comment_length);
     221        print_stats("CDATA section", CDATA_count, CDATA_length);
     222        print_stats("processing instruction", PI_count, PI_length);
     223        print_stats("empty element", empty_elem_count, empty_elem_length);
     224        print_stats("start tag", start_tag_count, start_tag_length);
     225        printf("%i total attributes\n", attribute_count);
     226        print_stats("attribute name", total_attribute_count, total_att_name_length);
     227        print_stats("attribute value", total_attribute_count, total_att_value_length);
     228        print_stats("namespace name", namespace_count, total_namespace_name_length);
     229        print_stats("namespace URI", namespace_count, total_namespace_URI_length);
     230        print_stats("end tag", end_tag_count, end_tag_length);
     231        print_stats("text item", text_item_count, text_item_length);
     232        print_stats("reference", reference_count, reference_length);
     233        print_stats("error item", error_item_count, error_item_length);
     234        printf("Maximum nesting depth = %i\n", max_nesting_depth);
     235       
     236        return(0);
     237}
Note: See TracChangeset for help on using the changeset viewer.