Ignore:
Timestamp:
Nov 18, 2013, 6:21:18 AM (6 years ago)
Author:
cameron
Message:

simd-lib updates

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icXML/icXML-devel/src/simd-lib/bitblock_iterator.hpp

    r2720 r3567  
    99  bitblock_iterator.hpp
    1010
    11   Created on: Sept 2011
    12   Author: Ken Herdy
     11  Created on: Sept 2011, revised May 2013
     12  Authors: Ken Herdy and Rob Cameron
    1313
    1414  Description:
     
    1717        the 'scanwords' of a 'bitblock'. Scanner classes are templated
    1818        on bitblock type and scanword type.
     19
     20*/
     21
     22
     23//
     24// The following implementation of BitBlockScanner is optimized
     25// to eliminate branch mispredictions during scanning.  Only the
     26// essential methods are included.   RDC May 2013.
     27//
     28// Usage:
     29//   (1) declare:  BitBlockScanner myscanner;
     30//   (2) initialize:  myscanner.init(&mybitblock);
     31//   (3) iterate:  while (myscanner.has_next()) { pos = myscanner.scan_to_next();  ...}
     32//
     33
     34//
     35// Could also use FW 32.
     36#define FW 8
     37#define _FW _8
     38template <class bitblock_t, class scanblock_t>
     39class BitBlockScanner {
     40public:
     41        BitBlockScanner() {}
     42
     43        IDISA_ALWAYS_INLINE void init(const BitBlock *s) {
     44                remaining._bitblock = *s;
     45                mask = hsimd<FW>::signmask(simd_not(simd<FW>::eq(simd<1>::constant<0>(), remaining._bitblock)));
     46        }
     47
     48        IDISA_ALWAYS_INLINE int has_next() {
     49               
     50                     return mask != 0;
     51        }
     52
     53        IDISA_ALWAYS_INLINE int scan_to_next() {
     54               
     55                     int item_pos = scan_forward_zeroes(mask);
     56                     uint32_t scan_item = remaining._FW[item_pos];
     57                     int bitpos = scan_forward_zeroes(scan_item);
     58                     scan_item = scan_item & (scan_item - 1);
     59                     remaining._FW[item_pos] = scan_item;
     60                     // We could recalculate the mask, but updating it is faster.
     61                     // Note that this update code compiles to a SETcc instruction.
     62                     mask = mask & (mask - ((scan_item == 0) ? 1 : 0));
     63                     int pos = item_pos * FW + bitpos;
     64                     return pos;
     65        }
     66        // It slows things down to store the position.
     67        //IDISA_ALWAYS_INLINE int32_t get_pos() { return pos;}
     68private:
     69        union {bitblock_t _bitblock;
     70               uint8_t _8[sizeof(bitblock_t)];
     71               uint32_t _32[sizeof(bitblock_t)/sizeof(uint32_t)];} remaining;
     72        scanblock_t mask;                       
     73};
     74#undef _FW
     75#undef FW
     76
     77
     78/*
     79    A scanner to successively generate the positions marked by 1 bits
     80    in a bit stream segment of bitblock_count bit blocks.
     81
     82    Usage:
     83
     84    (1) declaration, e.g.
     85        BitStreamScanner<BitBlock, uint32_t, u8int_t, 8> s;
     86        (There is considerable flexibility for using different
     87         scanblock sizes for the main mask, as well as the
     88         scan field sizes within each block.)
     89
     90    (2) initialization/reinitialization
     91        s.init();
     92
     93    (3) load the blocks;
     94        for (i = 0; i++; i < n) {mybitblock = ...; s.load_block(mybitblock, i);}
     95
     96    (4) Iterative scan loop (generates each position exactly once.)
     97        while (s.has_next()) { pos = s.scan_to_next(); ...   };
     98*/
     99
     100template <class bitblock_t, class scanblock_t, class scanfield_t, int bitblock_count>
     101class BitStreamScanner {
     102public:
     103        /* Make sure that the number of bits in the mask at least equals
     104           the number of scanblocks. */
     105        /* Requires flag -std=gnu++0x  */
     106        static_assert(sizeof(scanblock_t) * 8 >= bitblock_count * sizeof(bitblock_t)/sizeof(scanfield_t),
     107                      "Too many bitblocks for a single scanword mask");
     108       
     109
     110        BitStreamScanner() {}
     111
     112        IDISA_ALWAYS_INLINE void init() { mask = 0;}
     113
     114        IDISA_ALWAYS_INLINE void load_block(BitBlock b, int i) {
     115                remaining._bitblock[i] = b;
     116                BitBlock mask_i = simd_not(simd<sizeof(scanfield_t)*8>::eq(simd<1>::constant<0>(), b));
     117                mask += ((scanblock_t) hsimd<sizeof(scanfield_t)*8>::signmask(mask_i)) << ((scanblock_t) i * (sizeof(bitblock_t)/sizeof(scanfield_t)));
     118        }
     119
     120        IDISA_ALWAYS_INLINE bool has_next() {
     121                     return mask != 0;
     122        }
     123
     124        IDISA_ALWAYS_INLINE int scan_to_next() {
     125                     int item_pos = scan_forward_zeroes(mask);
     126                     scanfield_t scan_item = remaining._scanfield[item_pos];
     127                     int bitpos = scan_forward_zeroes(scan_item);
     128                     scan_item = scan_item & (scan_item - 1);
     129                     remaining._scanfield[item_pos] = scan_item;
     130                     mask = mask & (mask - ((scan_item == 0) ? 1 : 0));
     131                     int pos = item_pos * sizeof(scanfield_t) * 8 + bitpos;
     132                     return pos;
     133        }
     134
     135        IDISA_ALWAYS_INLINE int get_final_pos() {
     136                     int item_pos = sizeof(scanblock_t) * 8 - scan_backward_zeroes((scanblock_t) mask) - 1;
     137                     scanfield_t scan_item = remaining._scanfield[item_pos];
     138                     int bitpos = sizeof(scanblock_t)  * 8 - scan_backward_zeroes((scanblock_t) scan_item) - 1;
     139                     int pos = item_pos * sizeof(scanfield_t) * 8 + bitpos;
     140                     return pos;
     141        }
     142
     143        IDISA_ALWAYS_INLINE void clear_from(int pos) {
     144                int item_pos = pos / (sizeof(scanfield_t) * 8);
     145                int bitpos = pos % (sizeof(scanfield_t) * 8);
     146                remaining._scanfield[item_pos] &= ((((scanfield_t) 1) << bitpos) - 1);
     147                item_pos += remaining._scanfield[item_pos] == 0 ? 0 : 1;
     148                mask = mask & (((scanblock_t) 1) << item_pos) - 1;
     149                for (int i = item_pos; i < bitblock_count * 2; i++) remaining._scanfield[i] = 0;
     150        }
     151   
     152        IDISA_ALWAYS_INLINE int count() {
     153                if (mask == 0) return 0;
     154                int ct = 0;
     155#define PARALLEL_COUNT
     156#ifdef PARALLEL_COUNT
     157                BitBlock sum8 = simd<1>::constant<0>();
     158                for (int i = 0; i < bitblock_count/2; i++) {
     159                    BitBlock ct4 = simd<4>::add(simd<4>::popcount(remaining._bitblock[2*i]), simd<4>::popcount(remaining._bitblock[2*i+1]));
     160                    sum8 = simd<8>::add(sum8, simd<8>::add_hl(ct4));
     161                }
     162                if ((bitblock_count & 1) != 0) {  // Should be compiled out if bitblock_count is even.
     163                    sum8 = simd<8>::add(sum8, simd<8>::popcount(remaining._bitblock[bitblock_count]));
     164                }
     165                ct = mvmd<32>::extract<0>(simd<128>::add_hl(simd<64>::add_hl(simd<32>::add_hl(simd<16>::add_hl(sum8)))));
     166#endif
     167#ifndef PARALLEL_COUNT
     168                for (int i = 0; i < bitblock_count; i++) {
     169                    ct += bitblock::popcount(remaining._bitblock[i]);
     170                }
     171#endif
     172                return ct;
     173        }
     174   
     175   
     176private:
     177        union {bitblock_t _bitblock[bitblock_count];
     178               scanfield_t _scanfield[bitblock_count * sizeof(bitblock_t)/sizeof(scanfield_t)];} remaining;
     179        scanblock_t mask;
     180};
     181
     182
     183
     184/*=============================================================================
     185
     186   Deprecated:
    19187
    20188        BitBlock iterator classes provide Standard Template Library (STL)
     
    72240//
    73241#define has_bit(x) (x != 0)
     242#define EOS -1
    74243
    75244template <class bitblock_t, class scanblock_t>
     
    77246
    78247protected:
    79         Scanner(): strm(NULL), pos(-1), blk(-1), scan_blk(-1) {}
     248        Scanner(): strm(NULL), pos(EOS), blk(-1), scan_blk(-1) {}
    80249        Scanner(const bitblock_t * s, uint32_t start_pos, uint32_t start_blk, scanblock_t start_scan_blk): strm(s), pos(start_pos), blk(start_blk), scan_blk(start_scan_blk) {}
    81250
     
    116285                };
    117286
    118                 this->pos = -1;
     287                this->pos = EOS;
    119288                return (this->pos);
    120289        }
     
    131300        }
    132301
    133         IDISA_ALWAYS_INLINE bool is_done() {return (-1==this->pos);}
     302        IDISA_ALWAYS_INLINE bool is_done() const {return (EOS==this->pos);}
    134303        IDISA_ALWAYS_INLINE void set_strm(const bitblock_t * strm) {this->strm = strm;}
    135304        IDISA_ALWAYS_INLINE const bitblock_t * get_strm() const {return this->strm;}
    136305        IDISA_ALWAYS_INLINE int32_t get_pos() const {return this->pos;}
    137 
     306        IDISA_ALWAYS_INLINE void set_pos(int32_t pos) {(this->pos = pos);}
    138307        static const int32_t BLOCK_COUNT = sizeof(bitblock_t)/sizeof(scanblock_t);
    139308
    140309};
     310
     311class BitBlockForwardScanner: public ForwardScanner<BitBlock, ScanWord> {
     312public:
     313        BitBlockForwardScanner(){}
     314        BitBlockForwardScanner(BitBlock * s): ForwardScanner<BitBlock, ScanWord>(s){}
     315};
     316
    141317
    142318template <class bitblock_t, class scanblock_t>
     
    168344                };
    169345
    170                 this->pos = -1;
     346                this->pos = EOS;
    171347                return (this->pos);
    172348        }
     
    183359        }
    184360
    185         IDISA_ALWAYS_INLINE bool is_done() {return (-1==this->pos);}
     361        IDISA_ALWAYS_INLINE bool is_done() const {return (EOS==this->pos);}
    186362        IDISA_ALWAYS_INLINE void set_strm(const bitblock_t * strm) {this->strm = strm;}
    187363        IDISA_ALWAYS_INLINE const bitblock_t * get_strm() const {return this->strm;}
    188364        IDISA_ALWAYS_INLINE int32_t get_pos() const {return this->pos;}
    189 
     365        IDISA_ALWAYS_INLINE void set_pos(int32_t pos) {(this->pos = pos);}
    190366        static const uint32_t BLOCK_COUNT = sizeof(bitblock_t)/sizeof(scanblock_t);
    191367
    192368};
    193369
    194 #undef has_bit
    195370
    196371//
     
    201376{
    202377public:
    203         ForwardIterator() {}
    204378
    205379        ForwardIterator(bitblock_t * s): scanner(s)
     
    208382        }
    209383
     384        // set scanner to first pos of
    210385        void init(bitblock_t * s)
    211386        {
     
    217392        bool operator==(const ForwardIterator& iter)
    218393        {
    219                 return ((scanner.get_strm() = iter.scanner.get_strm()) && (scanner.get_pos() == iter.scanner.get_pos));
    220         }
    221 
    222         // not equal .get_pos()ition and stream
     394                return ((scanner.get_strm() == iter.scanner.get_strm()) &&
     395                        (scanner.get_pos() == iter.scanner.get_pos()));
     396        }
     397
     398        // not equal position and stream
    223399        bool operator!=(const ForwardIterator& iter)
    224400        {
    225                 return ( (scanner.get_strm() != iter.scanner.get_strm()) && (scanner.get_pos() != iter.scanner.get_pos()));
     401                return ((scanner.get_strm() != iter.scanner.get_strm()) &&
     402                        (scanner.get_pos() != iter.scanner.get_pos()));
    226403        }
    227404
     
    247424        }
    248425
     426        IDISA_ALWAYS_INLINE bool isDone() const
     427        {
     428                return scanner.is_done();
     429        }
     430
     431protected:
     432        ForwardIterator() {}
     433
    249434private:
    250435        ForwardScanner<bitblock_t, scanblock_t> scanner;
     
    256441        BitBlockForwardIterator(BitBlock * s): ForwardIterator<BitBlock, ScanWord>(s){}
    257442};
     443
    258444
    259445template<class bitblock_t, class scanblock_t>
     
    261447{
    262448public:
    263         ReverseIterator() {}
    264449        ReverseIterator(BitBlock * s): scanner(s)
    265450        {
     
    276461        bool operator==(const ReverseIterator& iter)
    277462        {
    278                 return ((scanner.get_strm() = iter.scanner.get_strm()) && (scanner.get_pos() == iter.scanner.get_pos));
    279         }
    280 
    281         // not equal .get_pos()ition and stream
     463                return ((scanner.get_strm() == iter.scanner.get_strm()) && (scanner.get_pos() == iter.scanner.get_pos));
     464        }
     465
     466        // not equal position and stream
    282467        bool operator!=(const ReverseIterator& iter)
    283468        {
     
    306491        }
    307492
     493        IDISA_ALWAYS_INLINE bool isDone() const
     494        {
     495                return scanner.is_done();
     496        }
     497
     498protected:
     499        ReverseIterator() {}
     500        ReverseScanner<bitblock_t, scanblock_t> scanner;
     501};
     502
     503class BitBlockReverseIterator: public ReverseIterator<BitBlock, ScanWord>
     504{
     505public:
     506        BitBlockReverseIterator(BitBlock * s): ReverseIterator<BitBlock, ScanWord>(s){}
    308507private:
    309         ReverseScanner<bitblock_t, scanblock_t> scanner;
    310 };
    311 
    312 class BitBlockReverseIterator: public ReverseIterator<BitBlock, ScanWord> {
    313 public:
    314508        BitBlockReverseIterator(){}
    315         BitBlockReverseIterator(BitBlock * s): ReverseIterator<BitBlock, ScanWord>(s){}
    316509};
    317510
     
    322515{
    323516public:
    324         BitStreamIterator():pos(-1), blk(-1), blk_pos(-1), strm(NULL), scan_blk(-1), scan_blk_cnt(0)
    325         {
    326                 // default constructor defines past-the-end of bit stream semantics, pos == -1
     517        BitStreamIterator():pos(EOS), blk(-1), blk_pos(-1), strm(NULL), scan_blk(-1), scan_blk_cnt(0)
     518        {
     519                // default constructor defines past-the-end of bit stream semantics, pos == EOS
    327520        }
    328521
     
    354547        bool operator==(const BitStreamIterator& iter)
    355548        {
    356                 return((strm = iter.strm) && (pos == iter.pos));
     549                return((strm == iter.strm) && (pos == iter.pos));
    357550        }
    358551
     
    390583        }
    391584        */
     585
     586        IDISA_ALWAYS_INLINE bool isDone() const
     587        {
     588                return (EOS == pos);
     589        }
    392590
    393591        void debug() {
     
    419617                };
    420618
    421                 pos = -1;
     619                pos = EOS;
    422620                return;
    423621        }
    424622};
    425623
     624
     625
     626#undef has_bit
     627
     628
    426629#endif // BITBLOCK_ITERATOR_H_
    427630
Note: See TracChangeset for help on using the changeset viewer.