Changeset 2663


Ignore:
Timestamp:
Nov 20, 2012, 5:29:13 PM (7 years ago)
Author:
linmengl
Message:

use store_unaligned to for speed

Location:
proto/CSV/csv2xml
Files:
3 added
7 edited

Legend:

Unmodified
Added
Removed
  • proto/CSV/csv2xml/QA/test.sh

    r2611 r2663  
    44echo "Test script, compare current results with standard output"
    55
    6 for name in spanned_escape spanned_quote 2006scores salary_data eso_eagle_awards_FY09_11.1.09 sticker-price-list FN TLTD_holdings GUNR_holdings scaledwps
     6for name in right_close spanned_escape spanned_quote 2006scores salary_data eso_eagle_awards_FY09_11.1.09 sticker-price-list FN TLTD_holdings GUNR_holdings scaledwps
    77do
    88    ../src/csv $name.csv my.xml
  • proto/CSV/csv2xml/Report on Perf.txt

    r2611 r2663  
    2828   |        B2. XML logic and I/O time
    2929   -            (I/O unstable, logic simple)
     30
     31Nov. 6th
     32
     33When I turn off xml output and fwrite each buffer directly, it's much faster, every test case we have less than 20 cyc/byte. WHICH INFER, small amount of fwrite each time is quite slow. Consider buffering each fwrite to a huge fwrite
     34
     35Nov. 8th
     36
     37I tried static buffer, with 16KB or 32KB or larger, it doesn't speed up significantly. fwrite has a buffer by default, whose size is 8KB. Maybe single thread can not make it any faster, I want to count the output size divide input size
     38
     39Nov. 13th
     40
     41I tried to do parsing segment by segment, that is, parse a dozen of blocks and then write the result to a buffer, flush it to file while turning off the default buffer system in fwrite. It doesn't help much yet... WTF!
     42
     43I wrote a better class for segment iterators with Nigel's help. File-off performance increased, but file performance didn't change.
     44
     45Nov. 15th
     46At the beginning of Today, test results are
     47
     48File         Size      File On  File Off  Mask Only Size(Output)/Size(Input)
     49
     502006scores   (1.8MB)   40       7           7       2.3
     51scaledwps    (2.0MB)   34       8           7       3.55
     52gen1000      (28MB)    80       12          3       6.46
     53gen10000     (280MB)   167      12          3       6.46
     54L2_2012-01   (103.2MB) 39       6           3       2.39
     55L2_2012-02   (130.5MB) 42       6           3       2.28
     56
     57Nov. 20th
     58After using SIMD store, store_unaligned, I have this test result:
     59
     60File         Size      File On
     61
     622006scores   (1.8MB)   39
     63scaledwps    (2.0MB)   32
     64gen1000      (28MB)    84
     65gen10000     (280MB)   125
     66L2_2012-01   (103.2MB) 32
     67L2_2012-02   (130.5MB) 39
  • proto/CSV/csv2xml/pablo_template.cpp

    r2611 r2663  
    4545#define LOOKAHEAD_BLOCKS 1
    4646#define LOOKAHEAD_SIZE (BLOCK_SIZE * LOOKAHEAD_BLOCKS)
    47 #define SEGMENT_BLOCKS  12 // WARNING: TagMatcher.hpp causes xmlconf test suite failures for SEGMENT_BLOCKS < 3.
     47#define SEGMENT_BLOCKS 15 // WARNING: TagMatcher.hpp causes xmlconf test suite failures for SEGMENT_BLOCKS < 3.
    4848#define SEGMENT_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
    4949#define BUFFER_SIZE (COPYBACK_SIZE + SEGMENT_SIZE + LOOKAHEAD_SIZE + PADDING_SIZE)
     
    5555BitBlock Simd_const_even = simd<4>::constant<5>();
    5656
    57 BitBlock parse_quote_mask(BitBlock quote)
     57inline BitBlock parse_quote_mask(BitBlock quote)
    5858{
    5959    BitBlock p2, p4, p8, p16, p32, p64;
     
    9292#include "../lib/transpose.hpp"
    9393#include "../util/csv2xmlwriter.hpp"
     94#include "../util/bitsegment_iterator.hpp"
    9495
    9596static void do_process(FILE *infile, FILE *outfile);
     
    131132    printf("Process %s as input and %s as output\n", infilename, outfilename);
    132133
    133     FILE *infile = fopen(infilename, "r");
    134     FILE *outfile = fopen(outfilename, "w");
     134    FILE *infile = fopen(infilename, "rb");
     135    FILE *outfile = fopen(outfilename, "wb");
     136    setbuf(outfile, NULL); //shutdown default buffer system
     137    // printf("setvbuf = %d\n", setvbuf(outfile, NULL, _IOFBF, SEGMENT_SIZE * 16));
    135138
    136139    PERF_SEC_BIND(1);
     
    150153}
    151154
     155class IteratorPackage
     156{
     157public:
     158    BitSegment<SEGMENT_BLOCKS> delimSeg, eolSeg, andSeg, hideSeg;
     159
     160    IteratorPackage()
     161    {
     162        init();
     163    }
     164
     165    void init()
     166    {
     167        delimSeg.clear();
     168        eolSeg.clear();
     169        andSeg.clear();
     170        hideSeg.clear();
     171    }
     172
     173    void append(Marker &marker, Lex &lex)
     174    {
     175        delimSeg.append(marker.delim);
     176        eolSeg.append(marker.eol);
     177        andSeg.append(lex.AndSymbol);
     178        hideSeg.append(marker.hide);
     179    }
     180};
     181
    152182class BufferToXMLParser
    153183{
    154     BitBlockForwardIterator delimIter, eolIter, andIter, hideIter;
    155     BitBlockForwardIterator end;
     184    BitSegmentForwardIterator delimIter, eolIter, andIter, hideIter;
    156185    uint8_t *src_buffer;
    157186    Csv2XmlWriter *writer;
     187    IteratorPackage *package;
    158188
    159189public:
    160190
    161     BufferToXMLParser(Marker &marker, Lex &lex, uint8_t *buffer, Csv2XmlWriter *_writer)
    162     {
    163         delimIter = BitBlockForwardIterator(&marker.delim);
    164         eolIter = BitBlockForwardIterator(&marker.eol);
    165         andIter = BitBlockForwardIterator(&lex.AndSymbol);
    166         hideIter = BitBlockForwardIterator(&marker.hide);
    167 
     191    BufferToXMLParser(uint8_t *buffer, Csv2XmlWriter *_writer, IteratorPackage *_package, int n)
     192        : delimIter(_package->delimSeg.address(), n), eolIter(_package->eolSeg.address(), n),
     193            andIter(_package->andSeg.address(), n), hideIter(_package->hideSeg.address(), n)
     194    {
    168195        src_buffer = buffer;
    169196        writer = _writer;
     197        package = _package;
    170198    }
    171199
     
    174202        int pos = 0;
    175203
    176         while (eolIter != end)
     204        while (!eolIter.is_end())
    177205        {
    178206            parseRowWithoutLastColumn(pos, *eolIter);
     
    186214            pos = *eolIter + 1;
    187215
    188             eolIter ++;
     216            ++ eolIter;
    189217            writer->nextRow();
    190218        }
     
    206234    void parseRowWithoutLastColumn(int &pos, int endofline)
    207235    {
    208         BitBlockForwardIterator end;
    209         while ((delimIter != end) && (*delimIter < endofline))
     236        while ((!delimIter.is_end()) && (*delimIter < endofline))
    210237        {
    211238            int length = *delimIter - pos;
     
    213240            writer->nextCol();
    214241            pos = (*delimIter) + 1;
    215             delimIter++;
     242            ++delimIter;
    216243        }
    217244    }
     
    221248        char text[10] = "&amp;";
    222249
    223         if (startPos + length >= BLOCK_SIZE + 1)
    224         {
    225             printf("bufPrint memory leak!\n");
    226             exit(-1);
    227         }
    228 
    229250        while (length > 0)
    230251        {
    231             while ((andIter != end) && *andIter < startPos) andIter++;
    232 
    233             if (andIter != end && *andIter < startPos + length)
     252            while ((!andIter.is_end()) && *andIter < startPos) ++andIter;
     253
     254            if (!andIter.is_end() && *andIter < startPos + length)
    234255            {
    235256                bufPrintWithHideSymbol(startPos, *andIter - startPos);
    236                 writer->writeColumn(text);
     257                writer->writeColumn(text, 5);
    237258
    238259                length -= *andIter - startPos + 1;
     
    251272        while (length > 0)
    252273        {
    253             while (hideIter != end && *hideIter < startPos) hideIter++;
    254 
    255             if (hideIter != end && *hideIter < startPos + length)
     274            while (!hideIter.is_end() && *hideIter < startPos) ++hideIter;
     275
     276            if (!hideIter.is_end() && *hideIter < startPos + length)
    256277            {
    257278                bufPrintSimple(startPos, *hideIter - startPos);
     
    270291    {
    271292        uint8_t *p = src_buffer + startPos;
    272         uint8_t temp = p[length];
    273         p[length] = 0;
    274         writer->writeColumn((char *)p);
    275         p[length] = temp;
     293        writer->writeColumn((char *)p, length);
    276294    }
    277295};
     
    279297void do_process(FILE *infile, FILE *outfile) {
    280298
    281     @decl
     299    @decl;
    282300
    283301    BitBlock buf[9];//watch out buffer size, may cause memory leak!! shit!!
     302    BitBlock segBuf[9 * SEGMENT_BLOCKS];
    284303
    285304    uint8_t * src_buffer = (uint8_t *) buf;
     305    uint8_t * seg_buffer = (uint8_t *) segBuf;
    286306    size_t count;
    287307
    288     @stream_stmts
     308    @stream_stmts;
    289309
    290310    Csv2XmlWriter writer(outfile);
    291311    FlipSignal = ZERO;
    292     //if quoted string spanned more than 1 block, next block's FlipSignal should be 1
    293 
    294     while ((count = fread(src_buffer, sizeof(uint8_t), BLOCK_SIZE, infile)) > 0)
    295     {
     312
     313    bool infile_end = false;
     314    int segment_count;
     315    int segment_blocks;
     316    unsigned long long infile_count = 0;
     317    IteratorPackage package;
     318
     319    while (!infile_end)
     320    {
     321        segment_count = 0;
     322        segment_blocks = 0;
     323        package.init();
    296324        PERF_SEC_START(parser_timer);
    297325
    298         if (count < BLOCK_SIZE)
    299         {
    300             EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-count));
    301             s2p_do_final_block((BytePack *) src_buffer, basis_bits, EOF_mask);
    302             @final_block_stmts
    303         }
    304         else
    305         {
    306             s2p_do_block((BytePack *) src_buffer, basis_bits);
    307             @block_stmts
    308         }
    309 
    310         FlipSignal = ZERO;
    311         if (bitblock::any(simd_and(marker.quote_mask, HIGH_ONE)))
    312         {
    313             FlipSignal = ONE;
    314         }
    315 
    316         PERF_SEC_END(parser_timer, count);
    317 
    318         BufferToXMLParser bufParser(marker, lex, src_buffer, &writer);
    319         bufParser.parseEachRow(count);
    320     }
     326
     327        //if quoted string spanned more than 1 block, next block's FlipSignal should be 1
     328        while (segment_blocks < SEGMENT_BLOCKS && (count = fread(src_buffer, sizeof(uint8_t), BLOCK_SIZE, infile)) > 0)
     329        {
     330            memcpy(seg_buffer + segment_count, src_buffer, count);
     331            segment_blocks ++;
     332            segment_count += count;
     333            infile_count += count;
     334
     335            if (count < BLOCK_SIZE)
     336            {
     337                EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-count));
     338                s2p_do_final_block((BytePack *) src_buffer, basis_bits, EOF_mask);
     339                @final_block_stmts
     340            }
     341            else
     342            {
     343                s2p_do_block((BytePack *) src_buffer, basis_bits);
     344                @block_stmts
     345                FlipSignal = ZERO;
     346                if (bitblock::any(simd_and(marker.quote_mask, HIGH_ONE)))
     347                {
     348                    FlipSignal = ONE;
     349                }
     350            }
     351
     352            package.append(marker, lex);
     353        }
     354
     355        if (segment_count)
     356        {
     357            BufferToXMLParser bufParser(seg_buffer, &writer, &package, segment_blocks);
     358            bufParser.parseEachRow(segment_count);
     359            writer.flush();
     360
     361            PERF_SEC_END(parser_timer, segment_count);
     362        }
     363
     364        if (segment_blocks < SEGMENT_BLOCKS)
     365        {
     366            infile_end = true;
     367        }
     368    }
     369
     370
     371    printf("input: %lld\t", infile_count);
     372    printf("output: %lld\t", writer._outCount);
     373    printf("rate: %lf\n", (double) writer._outCount / (double) infile_count);
    321374}
  • proto/CSV/csv2xml/src/Makefile

    r2611 r2663  
    2525perf:   $(SRCFILE)
    2626        $(CC) -o $(OUTFILE) $(SRCFILE) $(INCLUDES) $(AFLAGS) -DBUFFER_PROFILING
     27
     28debug:  $(SRCFILE)
     29        $(CC) -g -o $(OUTFILE) $(SRCFILE) $(INCLUDES) $(AFLAGS)
  • proto/CSV/csv2xml/src/csv.cpp

    r2611 r2663  
    4545#define LOOKAHEAD_BLOCKS 1
    4646#define LOOKAHEAD_SIZE (BLOCK_SIZE * LOOKAHEAD_BLOCKS)
    47 #define SEGMENT_BLOCKS  12 // WARNING: TagMatcher.hpp causes xmlconf test suite failures for SEGMENT_BLOCKS < 3.
     47#define SEGMENT_BLOCKS 15 // WARNING: TagMatcher.hpp causes xmlconf test suite failures for SEGMENT_BLOCKS < 3.
    4848#define SEGMENT_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
    4949#define BUFFER_SIZE (COPYBACK_SIZE + SEGMENT_SIZE + LOOKAHEAD_SIZE + PADDING_SIZE)
     
    5555BitBlock Simd_const_even = simd<4>::constant<5>();
    5656
    57 BitBlock parse_quote_mask(BitBlock quote)
     57inline BitBlock parse_quote_mask(BitBlock quote)
    5858{
    5959    BitBlock p2, p4, p8, p16, p32, p64;
     
    123123                BitBlock temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
    124124                BitBlock temp19, temp20, temp21, temp22, temp23, temp24;
     125
    125126
    126127
     
    163164                BitBlock temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
    164165                BitBlock temp19, temp20, temp21, temp22, temp23, temp24;
     166
    165167
    166168
     
    213215                BitBlock odd, even, start, even_start, even_final, escape, odd_start;
    214216                BitBlock odd_final;
     217                BitBlock tempvar0, tempvar1;
    215218
    216219
    217220        odd = Simd_const_odd;
    218221        even = Simd_const_even;
    219         start = simd_andc(lex.BackSlash, carryQ.BitBlock_advance_ci_co(lex.BackSlash, carryQ.get_carry_in(0), 0));
     222        tempvar0 = carryQ.BitBlock_advance_ci_co(lex.BackSlash, carryQ.get_carry_in(0), 0);
     223        start = simd_andc(lex.BackSlash, tempvar0);
    220224        even_start = simd_and(start, even);
    221225        even_final = carryQ.BitBlock_scanthru_ci_co(even_start, lex.BackSlash, carryQ.get_carry_in(1), 1);
     
    236240        }
    237241        marker.eol = simd_andc(simd_or(lex.CR, lex.LF), marker.quote_mask);
    238         marker.hide = simd_or(marker.quote, simd_andc(simd_and(carryQ.BitBlock_advance_ci_co(lex.CR, carryQ.get_carry_in(3), 3), lex.LF), marker.quote_mask));
     242        tempvar1 = carryQ.BitBlock_advance_ci_co(lex.CR, carryQ.get_carry_in(3), 3);
     243        marker.hide = simd_or(marker.quote, simd_andc(simd_and(tempvar1, lex.LF), marker.quote_mask));
    239244        marker.eol = simd_andc(marker.eol, marker.hide);
    240245        carryQ.CarryQ_Adjust(4);
     
    243248                BitBlock odd, even, start, even_start, even_final, escape, odd_start;
    244249                BitBlock odd_final;
     250                BitBlock tempvar0, tempvar1;
    245251
    246252
    247253        odd = Simd_const_odd;
    248254        even = Simd_const_even;
    249         start = simd_andc(lex.BackSlash, carryQ.BitBlock_advance_ci_co(lex.BackSlash, carryQ.get_carry_in(0), 0));
     255        tempvar0 = carryQ.BitBlock_advance_ci_co(lex.BackSlash, carryQ.get_carry_in(0), 0);
     256        start = simd_andc(lex.BackSlash, tempvar0);
    250257        even_start = simd_and(start, even);
    251258        even_final = carryQ.BitBlock_scanthru_ci_co(even_start, lex.BackSlash, carryQ.get_carry_in(1), 1);
     
    266273        }
    267274        marker.eol = simd_andc(simd_or(lex.CR, lex.LF), marker.quote_mask);
    268         marker.hide = simd_or(marker.quote, simd_andc(simd_and(carryQ.BitBlock_advance_ci_co(lex.CR, carryQ.get_carry_in(3), 3), lex.LF), marker.quote_mask));
     275        tempvar1 = carryQ.BitBlock_advance_ci_co(lex.CR, carryQ.get_carry_in(3), 3);
     276        marker.hide = simd_or(marker.quote, simd_andc(simd_and(tempvar1, lex.LF), marker.quote_mask));
    269277        marker.eol = simd_andc(marker.eol, marker.hide);
    270278  }
     
    284292#include "../lib/transpose.hpp"
    285293#include "../util/csv2xmlwriter.hpp"
     294#include "../util/bitsegment_iterator.hpp"
    286295
    287296static void do_process(FILE *infile, FILE *outfile);
     
    323332    printf("Process %s as input and %s as output\n", infilename, outfilename);
    324333
    325     FILE *infile = fopen(infilename, "r");
    326     FILE *outfile = fopen(outfilename, "w");
     334    FILE *infile = fopen(infilename, "rb");
     335    FILE *outfile = fopen(outfilename, "wb");
     336    setbuf(outfile, NULL); //shutdown default buffer system
     337    // printf("setvbuf = %d\n", setvbuf(outfile, NULL, _IOFBF, SEGMENT_SIZE * 16));
    327338
    328339    PERF_SEC_BIND(1);
     
    342353}
    343354
     355class IteratorPackage
     356{
     357public:
     358    BitSegment<SEGMENT_BLOCKS> delimSeg, eolSeg, andSeg, hideSeg;
     359
     360    IteratorPackage()
     361    {
     362        init();
     363    }
     364
     365    void init()
     366    {
     367        delimSeg.clear();
     368        eolSeg.clear();
     369        andSeg.clear();
     370        hideSeg.clear();
     371    }
     372
     373    void append(Marker &marker, Lex &lex)
     374    {
     375        delimSeg.append(marker.delim);
     376        eolSeg.append(marker.eol);
     377        andSeg.append(lex.AndSymbol);
     378        hideSeg.append(marker.hide);
     379    }
     380};
     381
    344382class BufferToXMLParser
    345383{
    346     BitBlockForwardIterator delimIter, eolIter, andIter, hideIter;
    347     BitBlockForwardIterator end;
     384    BitSegmentForwardIterator delimIter, eolIter, andIter, hideIter;
    348385    uint8_t *src_buffer;
    349386    Csv2XmlWriter *writer;
     387    IteratorPackage *package;
    350388
    351389public:
    352390
    353     BufferToXMLParser(Marker &marker, Lex &lex, uint8_t *buffer, Csv2XmlWriter *_writer)
    354     {
    355         delimIter = BitBlockForwardIterator(&marker.delim);
    356         eolIter = BitBlockForwardIterator(&marker.eol);
    357         andIter = BitBlockForwardIterator(&lex.AndSymbol);
    358         hideIter = BitBlockForwardIterator(&marker.hide);
    359 
     391    BufferToXMLParser(uint8_t *buffer, Csv2XmlWriter *_writer, IteratorPackage *_package, int n)
     392        : delimIter(_package->delimSeg.address(), n), eolIter(_package->eolSeg.address(), n),
     393            andIter(_package->andSeg.address(), n), hideIter(_package->hideSeg.address(), n)
     394    {
    360395        src_buffer = buffer;
    361396        writer = _writer;
     397        package = _package;
    362398    }
    363399
     
    366402        int pos = 0;
    367403
    368         while (eolIter != end)
     404        while (!eolIter.is_end())
    369405        {
    370406            parseRowWithoutLastColumn(pos, *eolIter);
     
    378414            pos = *eolIter + 1;
    379415
    380             eolIter ++;
     416            ++ eolIter;
    381417            writer->nextRow();
    382418        }
     
    398434    void parseRowWithoutLastColumn(int &pos, int endofline)
    399435    {
    400         BitBlockForwardIterator end;
    401         while ((delimIter != end) && (*delimIter < endofline))
     436        while ((!delimIter.is_end()) && (*delimIter < endofline))
    402437        {
    403438            int length = *delimIter - pos;
     
    405440            writer->nextCol();
    406441            pos = (*delimIter) + 1;
    407             delimIter++;
     442            ++delimIter;
    408443        }
    409444    }
     
    413448        char text[10] = "&amp;";
    414449
    415         if (startPos + length >= BLOCK_SIZE + 1)
    416         {
    417             printf("bufPrint memory leak!\n");
    418             exit(-1);
    419         }
    420 
    421450        while (length > 0)
    422451        {
    423             while ((andIter != end) && *andIter < startPos) andIter++;
    424 
    425             if (andIter != end && *andIter < startPos + length)
     452            while ((!andIter.is_end()) && *andIter < startPos) ++andIter;
     453
     454            if (!andIter.is_end() && *andIter < startPos + length)
    426455            {
    427456                bufPrintWithHideSymbol(startPos, *andIter - startPos);
    428                 writer->writeColumn(text);
     457                writer->writeColumn(text, 5);
    429458
    430459                length -= *andIter - startPos + 1;
     
    443472        while (length > 0)
    444473        {
    445             while (hideIter != end && *hideIter < startPos) hideIter++;
    446 
    447             if (hideIter != end && *hideIter < startPos + length)
     474            while (!hideIter.is_end() && *hideIter < startPos) ++hideIter;
     475
     476            if (!hideIter.is_end() && *hideIter < startPos + length)
    448477            {
    449478                bufPrintSimple(startPos, *hideIter - startPos);
     
    462491    {
    463492        uint8_t *p = src_buffer + startPos;
    464         uint8_t temp = p[length];
    465         p[length] = 0;
    466         writer->writeColumn((char *)p);
    467         p[length] = temp;
     493        writer->writeColumn((char *)p, length);
    468494    }
    469495};
     
    477503  struct Marker marker;
    478504
    479 
     505;
    480506
    481507    BitBlock buf[9];//watch out buffer size, may cause memory leak!! shit!!
     508    BitBlock segBuf[9 * SEGMENT_BLOCKS];
    482509
    483510    uint8_t * src_buffer = (uint8_t *) buf;
     511    uint8_t * seg_buffer = (uint8_t *) segBuf;
    484512    size_t count;
    485513
    486514      Classify_bytes classify_bytes;
    487515  Parse_marker parse_marker;
    488 
     516;
    489517
    490518    Csv2XmlWriter writer(outfile);
    491519    FlipSignal = ZERO;
    492     //if quoted string spanned more than 1 block, next block's FlipSignal should be 1
    493 
    494     while ((count = fread(src_buffer, sizeof(uint8_t), BLOCK_SIZE, infile)) > 0)
    495     {
     520
     521    bool infile_end = false;
     522    int segment_count;
     523    int segment_blocks;
     524    unsigned long long infile_count = 0;
     525    IteratorPackage package;
     526
     527    while (!infile_end)
     528    {
     529        segment_count = 0;
     530        segment_blocks = 0;
     531        package.init();
    496532        PERF_SEC_START(parser_timer);
    497533
    498         if (count < BLOCK_SIZE)
    499         {
    500             EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-count));
    501             s2p_do_final_block((BytePack *) src_buffer, basis_bits, EOF_mask);
    502            
     534
     535        //if quoted string spanned more than 1 block, next block's FlipSignal should be 1
     536        while (segment_blocks < SEGMENT_BLOCKS && (count = fread(src_buffer, sizeof(uint8_t), BLOCK_SIZE, infile)) > 0)
     537        {
     538            memcpy(seg_buffer + segment_count, src_buffer, count);
     539            segment_blocks ++;
     540            segment_count += count;
     541            infile_count += count;
     542
     543            if (count < BLOCK_SIZE)
     544            {
     545                EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-count));
     546                s2p_do_final_block((BytePack *) src_buffer, basis_bits, EOF_mask);
     547               
    503548  classify_bytes.do_final_block(basis_bits, lex, EOF_mask);
    504549  parse_marker.do_final_block(lex, marker, EOF_mask);
    505         }
    506         else
    507         {
    508             s2p_do_block((BytePack *) src_buffer, basis_bits);
    509            
     550            }
     551            else
     552            {
     553                s2p_do_block((BytePack *) src_buffer, basis_bits);
     554               
    510555  classify_bytes.do_block(basis_bits, lex);
    511556  parse_marker.do_block(lex, marker);
    512         }
    513 
    514         FlipSignal = ZERO;
    515         if (bitblock::any(simd_and(marker.quote_mask, HIGH_ONE)))
    516         {
    517             FlipSignal = ONE;
    518         }
    519 
    520         PERF_SEC_END(parser_timer, count);
    521 
    522         BufferToXMLParser bufParser(marker, lex, src_buffer, &writer);
    523         bufParser.parseEachRow(count);
    524     }
     557                FlipSignal = ZERO;
     558                if (bitblock::any(simd_and(marker.quote_mask, HIGH_ONE)))
     559                {
     560                    FlipSignal = ONE;
     561                }
     562            }
     563
     564            package.append(marker, lex);
     565        }
     566
     567        if (segment_count)
     568        {
     569            BufferToXMLParser bufParser(seg_buffer, &writer, &package, segment_blocks);
     570            bufParser.parseEachRow(segment_count);
     571            writer.flush();
     572
     573            PERF_SEC_END(parser_timer, segment_count);
     574        }
     575
     576        if (segment_blocks < SEGMENT_BLOCKS)
     577        {
     578            infile_end = true;
     579        }
     580    }
     581
     582
     583    printf("input: %lld\t", infile_count);
     584    printf("output: %lld\t", writer._outCount);
     585    printf("rate: %lf\n", (double) writer._outCount / (double) infile_count);
    525586}
  • proto/CSV/csv2xml/src/sample.csv

    r2597 r2663  
    1 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    2 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    3 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    4 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    5 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    6 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    7 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    8 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    9 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    10 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    11 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    12 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    13 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    14 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    15 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    16 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    17 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    18 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    19 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    20 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    21 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    22 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    23 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    24 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    25 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    26 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    27 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    28 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    29 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    30 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    31 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    32 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    33 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    34 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    35 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    36 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    37 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    38 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    39 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    40 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    41 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    42 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    43 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    44 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    45 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    46 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    47 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    48 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    49 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    50 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    51 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    52 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    53 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    54 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    55 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    56 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    57 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    58 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    59 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    60 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    61 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    62 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    63 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    64 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    65 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    66 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    67 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    68 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    69 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    70 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    71 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    72 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    73 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    74 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    75 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    76 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    77 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    78 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    79 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    80 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    81 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
    82 aaaaa,aaaaaa,aaaaaaaa,aaaaaaa,aaaaaaaa,aaaaaaa
     1a,b,c,d
     2a,b,c,d
  • proto/CSV/csv2xml/util/csv2xmlwriter.hpp

    r2611 r2663  
    55*/
    66#include <cstdio>
     7#include <cstdlib>
    78#include <cstring>
    8 #include <cstdlib>
     9#include "../lib/bitblock.hpp"
    910
    10 const int NO_FILE_OUTPUT = 1;
     11#define FILE_OUTPUT
     12
     13int8_t outBuf[SEGMENT_SIZE * 40];
    1114
    1215class Csv2XmlWriter
     
    1720    int _colStarted;
    1821    int _needClose;
    19     unsigned int _outCount;
     22    int _bufCount;
    2023
    21     void printString(const char *s)
     24public:
     25    inline void flush()
    2226    {
    23         if (NO_FILE_OUTPUT)
    24             _outCount += strlen(s);
    25         else
    26             fprintf(_fout, "%s", s);
     27        #ifdef FILE_OUTPUT
     28        fwrite((void *)outBuf, 1, _bufCount, _fout);
     29        #endif
     30        _bufCount = 0;
    2731    }
    2832
    29 public:
     33    unsigned long long _outCount;
     34
    3035    Csv2XmlWriter(FILE *fout)
    3136    {
     
    4550    {
    4651        writeRootEnd();
    47 
     52        flush();
    4853        if (_needClose)
    4954            fclose( _fout );
    5055    }
    5156
    52     void writeColumn(char* fullColumnContent)
     57    void writeColumn(char* fullColumnContent, int len)
    5358    {
    54         if (fullColumnContent[0] == 0)
     59        if (len == 0)
    5560            return;
    5661
     
    6065        }
    6166
    62         writeContent(fullColumnContent);
     67        writeContent(fullColumnContent, len);
    6368    }
    6469
     
    8085
    8186private:
    82     int _rowDepth;
    83     int _colDepth;
    84     void init()
     87    union StringBitBlock
    8588    {
    86         writeRootStart();
     89        char col_string[12];
     90        BitBlock col_block;
     91    };
     92
     93    StringBitBlock rol_start;
     94    StringBitBlock rol_end;
     95    StringBitBlock col_start;
     96    StringBitBlock col_end;
     97
     98    inline void init()
     99    {
    87100        _rowStarted = 0;
    88101        _colStarted = 0;
    89         _rowDepth = 0;
    90         _colDepth = 0;
    91102        _outCount = 0;
     103        _bufCount = 0;
     104        strcpy(rol_start.col_string, "   <row>\n");
     105        strcpy(rol_end.col_string, "   </row>\n");
     106        strcpy(col_start.col_string, "      <col>");
     107        strcpy(col_end.col_string, "</col>\n");
     108
     109        writeRootStart();
    92110    }
    93111
    94     inline void writeContent(char* fullColumnContent)
     112     //Profile result is oppsite this function.
     113    inline void printString(const char *s, int len)
    95114    {
    96         printString(fullColumnContent);
     115        memcpy((char *)outBuf + _bufCount, s, len);
     116        _bufCount += len;
     117        _outCount += len;
     118    }
     119
     120    inline void printConst(StringBitBlock &out, int len)
     121    {
     122        bitblock::store_unaligned(out.col_block, (BitBlock *) ((char *)outBuf + _bufCount));
     123        _bufCount += len;
     124        _outCount += len;
     125    }
     126
     127    inline void writeContent(char* fullColumnContent, int len)
     128    {
     129        printString(fullColumnContent, len);
    97130    }
    98131
    99132    inline void writeRootStart()
    100133    {
    101         printString("<root>\n");
     134        printString("<root>\n", 7);
    102135    }
    103136
    104137    inline void writeRootEnd()
    105138    {
    106         printString("</root>\n");
     139        if (_colStarted)
     140            writeColumnEnd();
     141        if (_rowStarted)
     142            writeRowEnd();
     143
     144        printString("</root>\n", 8);
    107145    }
    108146
    109147    inline void writeRowStart()
    110148    {
    111         printString("   <row>\n");
    112         _rowDepth++;
     149        printConst(rol_start, 9);
    113150        _rowStarted = 1;
    114151    }
     
    116153    inline void writeRowEnd()
    117154    {
    118         if (_rowDepth)
    119         {
    120             printString("   </row>\n");
    121             _rowDepth--;
    122         }
     155        printConst(rol_end, 10);
    123156        _rowStarted = 0;
    124157    }
     
    130163            writeRowStart();
    131164        }
    132 
    133         printString("      <col>");
    134         _colDepth ++;
     165        printConst(col_start, 11);
    135166        _colStarted = 1;
    136167    }
     
    138169    inline void writeColumnEnd()
    139170    {
    140         if (_colDepth)
    141         {
    142             printString("</col>\n");
    143             _colDepth--;
    144         }
     171        printConst(col_end, 7);
    145172        _colStarted = 0;
    146173    }
Note: See TracChangeset for help on using the changeset viewer.