Ignore:
Timestamp:
Nov 20, 2012, 5:29:13 PM (6 years ago)
Author:
linmengl
Message:

use store_unaligned to for speed

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/CSV/csv2xml/src/csv.cpp

    r2611 r2663  
    4545#define LOOKAHEAD_BLOCKS 1
    4646#define LOOKAHEAD_SIZE (BLOCK_SIZE * LOOKAHEAD_BLOCKS)
    47 #define SEGMENT_BLOCKS  12 // WARNING: TagMatcher.hpp causes xmlconf test suite failures for SEGMENT_BLOCKS < 3.
     47#define SEGMENT_BLOCKS 15 // WARNING: TagMatcher.hpp causes xmlconf test suite failures for SEGMENT_BLOCKS < 3.
    4848#define SEGMENT_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
    4949#define BUFFER_SIZE (COPYBACK_SIZE + SEGMENT_SIZE + LOOKAHEAD_SIZE + PADDING_SIZE)
     
    5555BitBlock Simd_const_even = simd<4>::constant<5>();
    5656
    57 BitBlock parse_quote_mask(BitBlock quote)
     57inline BitBlock parse_quote_mask(BitBlock quote)
    5858{
    5959    BitBlock p2, p4, p8, p16, p32, p64;
     
    123123                BitBlock temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
    124124                BitBlock temp19, temp20, temp21, temp22, temp23, temp24;
     125
    125126
    126127
     
    163164                BitBlock temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
    164165                BitBlock temp19, temp20, temp21, temp22, temp23, temp24;
     166
    165167
    166168
     
    213215                BitBlock odd, even, start, even_start, even_final, escape, odd_start;
    214216                BitBlock odd_final;
     217                BitBlock tempvar0, tempvar1;
    215218
    216219
    217220        odd = Simd_const_odd;
    218221        even = Simd_const_even;
    219         start = simd_andc(lex.BackSlash, carryQ.BitBlock_advance_ci_co(lex.BackSlash, carryQ.get_carry_in(0), 0));
     222        tempvar0 = carryQ.BitBlock_advance_ci_co(lex.BackSlash, carryQ.get_carry_in(0), 0);
     223        start = simd_andc(lex.BackSlash, tempvar0);
    220224        even_start = simd_and(start, even);
    221225        even_final = carryQ.BitBlock_scanthru_ci_co(even_start, lex.BackSlash, carryQ.get_carry_in(1), 1);
     
    236240        }
    237241        marker.eol = simd_andc(simd_or(lex.CR, lex.LF), marker.quote_mask);
    238         marker.hide = simd_or(marker.quote, simd_andc(simd_and(carryQ.BitBlock_advance_ci_co(lex.CR, carryQ.get_carry_in(3), 3), lex.LF), marker.quote_mask));
     242        tempvar1 = carryQ.BitBlock_advance_ci_co(lex.CR, carryQ.get_carry_in(3), 3);
     243        marker.hide = simd_or(marker.quote, simd_andc(simd_and(tempvar1, lex.LF), marker.quote_mask));
    239244        marker.eol = simd_andc(marker.eol, marker.hide);
    240245        carryQ.CarryQ_Adjust(4);
     
    243248                BitBlock odd, even, start, even_start, even_final, escape, odd_start;
    244249                BitBlock odd_final;
     250                BitBlock tempvar0, tempvar1;
    245251
    246252
    247253        odd = Simd_const_odd;
    248254        even = Simd_const_even;
    249         start = simd_andc(lex.BackSlash, carryQ.BitBlock_advance_ci_co(lex.BackSlash, carryQ.get_carry_in(0), 0));
     255        tempvar0 = carryQ.BitBlock_advance_ci_co(lex.BackSlash, carryQ.get_carry_in(0), 0);
     256        start = simd_andc(lex.BackSlash, tempvar0);
    250257        even_start = simd_and(start, even);
    251258        even_final = carryQ.BitBlock_scanthru_ci_co(even_start, lex.BackSlash, carryQ.get_carry_in(1), 1);
     
    266273        }
    267274        marker.eol = simd_andc(simd_or(lex.CR, lex.LF), marker.quote_mask);
    268         marker.hide = simd_or(marker.quote, simd_andc(simd_and(carryQ.BitBlock_advance_ci_co(lex.CR, carryQ.get_carry_in(3), 3), lex.LF), marker.quote_mask));
     275        tempvar1 = carryQ.BitBlock_advance_ci_co(lex.CR, carryQ.get_carry_in(3), 3);
     276        marker.hide = simd_or(marker.quote, simd_andc(simd_and(tempvar1, lex.LF), marker.quote_mask));
    269277        marker.eol = simd_andc(marker.eol, marker.hide);
    270278  }
     
    284292#include "../lib/transpose.hpp"
    285293#include "../util/csv2xmlwriter.hpp"
     294#include "../util/bitsegment_iterator.hpp"
    286295
    287296static void do_process(FILE *infile, FILE *outfile);
     
    323332    printf("Process %s as input and %s as output\n", infilename, outfilename);
    324333
    325     FILE *infile = fopen(infilename, "r");
    326     FILE *outfile = fopen(outfilename, "w");
     334    FILE *infile = fopen(infilename, "rb");
     335    FILE *outfile = fopen(outfilename, "wb");
     336    setbuf(outfile, NULL); //shutdown default buffer system
     337    // printf("setvbuf = %d\n", setvbuf(outfile, NULL, _IOFBF, SEGMENT_SIZE * 16));
    327338
    328339    PERF_SEC_BIND(1);
     
    342353}
    343354
     355class IteratorPackage
     356{
     357public:
     358    BitSegment<SEGMENT_BLOCKS> delimSeg, eolSeg, andSeg, hideSeg;
     359
     360    IteratorPackage()
     361    {
     362        init();
     363    }
     364
     365    void init()
     366    {
     367        delimSeg.clear();
     368        eolSeg.clear();
     369        andSeg.clear();
     370        hideSeg.clear();
     371    }
     372
     373    void append(Marker &marker, Lex &lex)
     374    {
     375        delimSeg.append(marker.delim);
     376        eolSeg.append(marker.eol);
     377        andSeg.append(lex.AndSymbol);
     378        hideSeg.append(marker.hide);
     379    }
     380};
     381
    344382class BufferToXMLParser
    345383{
    346     BitBlockForwardIterator delimIter, eolIter, andIter, hideIter;
    347     BitBlockForwardIterator end;
     384    BitSegmentForwardIterator delimIter, eolIter, andIter, hideIter;
    348385    uint8_t *src_buffer;
    349386    Csv2XmlWriter *writer;
     387    IteratorPackage *package;
    350388
    351389public:
    352390
    353     BufferToXMLParser(Marker &marker, Lex &lex, uint8_t *buffer, Csv2XmlWriter *_writer)
    354     {
    355         delimIter = BitBlockForwardIterator(&marker.delim);
    356         eolIter = BitBlockForwardIterator(&marker.eol);
    357         andIter = BitBlockForwardIterator(&lex.AndSymbol);
    358         hideIter = BitBlockForwardIterator(&marker.hide);
    359 
     391    BufferToXMLParser(uint8_t *buffer, Csv2XmlWriter *_writer, IteratorPackage *_package, int n)
     392        : delimIter(_package->delimSeg.address(), n), eolIter(_package->eolSeg.address(), n),
     393            andIter(_package->andSeg.address(), n), hideIter(_package->hideSeg.address(), n)
     394    {
    360395        src_buffer = buffer;
    361396        writer = _writer;
     397        package = _package;
    362398    }
    363399
     
    366402        int pos = 0;
    367403
    368         while (eolIter != end)
     404        while (!eolIter.is_end())
    369405        {
    370406            parseRowWithoutLastColumn(pos, *eolIter);
     
    378414            pos = *eolIter + 1;
    379415
    380             eolIter ++;
     416            ++ eolIter;
    381417            writer->nextRow();
    382418        }
     
    398434    void parseRowWithoutLastColumn(int &pos, int endofline)
    399435    {
    400         BitBlockForwardIterator end;
    401         while ((delimIter != end) && (*delimIter < endofline))
     436        while ((!delimIter.is_end()) && (*delimIter < endofline))
    402437        {
    403438            int length = *delimIter - pos;
     
    405440            writer->nextCol();
    406441            pos = (*delimIter) + 1;
    407             delimIter++;
     442            ++delimIter;
    408443        }
    409444    }
     
    413448        char text[10] = "&amp;";
    414449
    415         if (startPos + length >= BLOCK_SIZE + 1)
    416         {
    417             printf("bufPrint memory leak!\n");
    418             exit(-1);
    419         }
    420 
    421450        while (length > 0)
    422451        {
    423             while ((andIter != end) && *andIter < startPos) andIter++;
    424 
    425             if (andIter != end && *andIter < startPos + length)
     452            while ((!andIter.is_end()) && *andIter < startPos) ++andIter;
     453
     454            if (!andIter.is_end() && *andIter < startPos + length)
    426455            {
    427456                bufPrintWithHideSymbol(startPos, *andIter - startPos);
    428                 writer->writeColumn(text);
     457                writer->writeColumn(text, 5);
    429458
    430459                length -= *andIter - startPos + 1;
     
    443472        while (length > 0)
    444473        {
    445             while (hideIter != end && *hideIter < startPos) hideIter++;
    446 
    447             if (hideIter != end && *hideIter < startPos + length)
     474            while (!hideIter.is_end() && *hideIter < startPos) ++hideIter;
     475
     476            if (!hideIter.is_end() && *hideIter < startPos + length)
    448477            {
    449478                bufPrintSimple(startPos, *hideIter - startPos);
     
    462491    {
    463492        uint8_t *p = src_buffer + startPos;
    464         uint8_t temp = p[length];
    465         p[length] = 0;
    466         writer->writeColumn((char *)p);
    467         p[length] = temp;
     493        writer->writeColumn((char *)p, length);
    468494    }
    469495};
     
    477503  struct Marker marker;
    478504
    479 
     505;
    480506
    481507    BitBlock buf[9];//watch out buffer size, may cause memory leak!! shit!!
     508    BitBlock segBuf[9 * SEGMENT_BLOCKS];
    482509
    483510    uint8_t * src_buffer = (uint8_t *) buf;
     511    uint8_t * seg_buffer = (uint8_t *) segBuf;
    484512    size_t count;
    485513
    486514      Classify_bytes classify_bytes;
    487515  Parse_marker parse_marker;
    488 
     516;
    489517
    490518    Csv2XmlWriter writer(outfile);
    491519    FlipSignal = ZERO;
    492     //if quoted string spanned more than 1 block, next block's FlipSignal should be 1
    493 
    494     while ((count = fread(src_buffer, sizeof(uint8_t), BLOCK_SIZE, infile)) > 0)
    495     {
     520
     521    bool infile_end = false;
     522    int segment_count;
     523    int segment_blocks;
     524    unsigned long long infile_count = 0;
     525    IteratorPackage package;
     526
     527    while (!infile_end)
     528    {
     529        segment_count = 0;
     530        segment_blocks = 0;
     531        package.init();
    496532        PERF_SEC_START(parser_timer);
    497533
    498         if (count < BLOCK_SIZE)
    499         {
    500             EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-count));
    501             s2p_do_final_block((BytePack *) src_buffer, basis_bits, EOF_mask);
    502            
     534
     535        //if quoted string spanned more than 1 block, next block's FlipSignal should be 1
     536        while (segment_blocks < SEGMENT_BLOCKS && (count = fread(src_buffer, sizeof(uint8_t), BLOCK_SIZE, infile)) > 0)
     537        {
     538            memcpy(seg_buffer + segment_count, src_buffer, count);
     539            segment_blocks ++;
     540            segment_count += count;
     541            infile_count += count;
     542
     543            if (count < BLOCK_SIZE)
     544            {
     545                EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-count));
     546                s2p_do_final_block((BytePack *) src_buffer, basis_bits, EOF_mask);
     547               
    503548  classify_bytes.do_final_block(basis_bits, lex, EOF_mask);
    504549  parse_marker.do_final_block(lex, marker, EOF_mask);
    505         }
    506         else
    507         {
    508             s2p_do_block((BytePack *) src_buffer, basis_bits);
    509            
     550            }
     551            else
     552            {
     553                s2p_do_block((BytePack *) src_buffer, basis_bits);
     554               
    510555  classify_bytes.do_block(basis_bits, lex);
    511556  parse_marker.do_block(lex, marker);
    512         }
    513 
    514         FlipSignal = ZERO;
    515         if (bitblock::any(simd_and(marker.quote_mask, HIGH_ONE)))
    516         {
    517             FlipSignal = ONE;
    518         }
    519 
    520         PERF_SEC_END(parser_timer, count);
    521 
    522         BufferToXMLParser bufParser(marker, lex, src_buffer, &writer);
    523         bufParser.parseEachRow(count);
    524     }
     557                FlipSignal = ZERO;
     558                if (bitblock::any(simd_and(marker.quote_mask, HIGH_ONE)))
     559                {
     560                    FlipSignal = ONE;
     561                }
     562            }
     563
     564            package.append(marker, lex);
     565        }
     566
     567        if (segment_count)
     568        {
     569            BufferToXMLParser bufParser(seg_buffer, &writer, &package, segment_blocks);
     570            bufParser.parseEachRow(segment_count);
     571            writer.flush();
     572
     573            PERF_SEC_END(parser_timer, segment_count);
     574        }
     575
     576        if (segment_blocks < SEGMENT_BLOCKS)
     577        {
     578            infile_end = true;
     579        }
     580    }
     581
     582
     583    printf("input: %lld\t", infile_count);
     584    printf("output: %lld\t", writer._outCount);
     585    printf("rate: %lf\n", (double) writer._outCount / (double) infile_count);
    525586}
Note: See TracChangeset for help on using the changeset viewer.