Changeset 3567 for icXML/icXML-devel


Ignore:
Timestamp:
Nov 18, 2013, 6:21:18 AM (6 years ago)
Author:
cameron
Message:

simd-lib updates

Location:
icXML/icXML-devel/src/simd-lib
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • icXML/icXML-devel/src/simd-lib/bitblock128.hpp

    r2720 r3567  
    3636static IDISA_ALWAYS_INLINE carry_t bitblock2carry(bitblock128_t carry);
    3737
     38static IDISA_ALWAYS_INLINE carry_t carryout2carry(bitblock128_t carryout);
     39
    3840static IDISA_ALWAYS_INLINE void adc(bitblock128_t x, bitblock128_t y, carry_t & carry, bitblock128_t & sum);
    3941static IDISA_ALWAYS_INLINE void sbb(bitblock128_t x, bitblock128_t y, carry_t & borrow, bitblock128_t & difference);
     
    5052static IDISA_ALWAYS_INLINE carry_t bitblock2carry(bitblock128_t carry) {  return carry;}
    5153
     54static IDISA_ALWAYS_INLINE carry_t carryout2carry(bitblock128_t carryout) {
     55 return bitblock::srli<BLOCK_SIZE-1>(carryout);
     56}
    5257
    5358
  • icXML/icXML-devel/src/simd-lib/bitblock_iterator.hpp

    r2720 r3567  
    99  bitblock_iterator.hpp
    1010
    11   Created on: Sept 2011
    12   Author: Ken Herdy
     11  Created on: Sept 2011, revised May 2013
     12  Authors: Ken Herdy and Rob Cameron
    1313
    1414  Description:
     
    1717        the 'scanwords' of a 'bitblock'. Scanner classes are templated
    1818        on bitblock type and scanword type.
     19
     20*/
     21
     22
     23//
     24// The following implementation of BitBlockScanner is optimized
     25// to eliminate branch mispredictions during scanning.  Only the
     26// essential methods are included.   RDC May 2013.
     27//
     28// Usage:
     29//   (1) declare:  BitBlockScanner myscanner;
     30//   (2) initialize:  myscanner.init(&mybitblock);
     31//   (3) iterate:  while (myscanner.has_next()) { pos = myscanner.scan_to_next();  ...}
     32//
     33
     34//
     35// Could also use FW 32.
     36#define FW 8
     37#define _FW _8
     38template <class bitblock_t, class scanblock_t>
     39class BitBlockScanner {
     40public:
     41        BitBlockScanner() {}
     42
     43        IDISA_ALWAYS_INLINE void init(const BitBlock *s) {
     44                remaining._bitblock = *s;
     45                mask = hsimd<FW>::signmask(simd_not(simd<FW>::eq(simd<1>::constant<0>(), remaining._bitblock)));
     46        }
     47
     48        IDISA_ALWAYS_INLINE int has_next() {
     49               
     50                     return mask != 0;
     51        }
     52
     53        IDISA_ALWAYS_INLINE int scan_to_next() {
     54               
     55                     int item_pos = scan_forward_zeroes(mask);
     56                     uint32_t scan_item = remaining._FW[item_pos];
     57                     int bitpos = scan_forward_zeroes(scan_item);
     58                     scan_item = scan_item & (scan_item - 1);
     59                     remaining._FW[item_pos] = scan_item;
     60                     // We could recalculate the mask, but updating it is faster.
     61                     // Note that this update code compiles to a SETcc instruction.
     62                     mask = mask & (mask - ((scan_item == 0) ? 1 : 0));
     63                     int pos = item_pos * FW + bitpos;
     64                     return pos;
     65        }
     66        // It slows things down to store the position.
     67        //IDISA_ALWAYS_INLINE int32_t get_pos() { return pos;}
     68private:
     69        union {bitblock_t _bitblock;
     70               uint8_t _8[sizeof(bitblock_t)];
     71               uint32_t _32[sizeof(bitblock_t)/sizeof(uint32_t)];} remaining;
     72        scanblock_t mask;                       
     73};
     74#undef _FW
     75#undef FW
     76
     77
     78/*
     79    A scanner to successively generate the positions marked by 1 bits
     80    in a bit stream segment of bitblock_count bit blocks.
     81
     82    Usage:
     83
     84    (1) declaration, e.g.
     85        BitStreamScanner<BitBlock, uint32_t, u8int_t, 8> s;
     86        (There is considerable flexibility for using different
     87         scanblock sizes for the main mask, as well as the
     88         scan field sizes within each block.)
     89
     90    (2) initialization/reinitialization
     91        s.init();
     92
     93    (3) load the blocks;
     94        for (i = 0; i++; i < n) {mybitblock = ...; s.load_block(mybitblock, i);}
     95
     96    (4) Iterative scan loop (generates each position exactly once.)
     97        while (s.has_next()) { pos = s.scan_to_next(); ...   };
     98*/
     99
     100template <class bitblock_t, class scanblock_t, class scanfield_t, int bitblock_count>
     101class BitStreamScanner {
     102public:
     103        /* Make sure that the number of bits in the mask at least equals
     104           the number of scanblocks. */
     105        /* Requires flag -std=gnu++0x  */
     106        static_assert(sizeof(scanblock_t) * 8 >= bitblock_count * sizeof(bitblock_t)/sizeof(scanfield_t),
     107                      "Too many bitblocks for a single scanword mask");
     108       
     109
     110        BitStreamScanner() {}
     111
     112        IDISA_ALWAYS_INLINE void init() { mask = 0;}
     113
     114        IDISA_ALWAYS_INLINE void load_block(BitBlock b, int i) {
     115                remaining._bitblock[i] = b;
     116                BitBlock mask_i = simd_not(simd<sizeof(scanfield_t)*8>::eq(simd<1>::constant<0>(), b));
     117                mask += ((scanblock_t) hsimd<sizeof(scanfield_t)*8>::signmask(mask_i)) << ((scanblock_t) i * (sizeof(bitblock_t)/sizeof(scanfield_t)));
     118        }
     119
     120        IDISA_ALWAYS_INLINE bool has_next() {
     121                     return mask != 0;
     122        }
     123
     124        IDISA_ALWAYS_INLINE int scan_to_next() {
     125                     int item_pos = scan_forward_zeroes(mask);
     126                     scanfield_t scan_item = remaining._scanfield[item_pos];
     127                     int bitpos = scan_forward_zeroes(scan_item);
     128                     scan_item = scan_item & (scan_item - 1);
     129                     remaining._scanfield[item_pos] = scan_item;
     130                     mask = mask & (mask - ((scan_item == 0) ? 1 : 0));
     131                     int pos = item_pos * sizeof(scanfield_t) * 8 + bitpos;
     132                     return pos;
     133        }
     134
     135        IDISA_ALWAYS_INLINE int get_final_pos() {
     136                     int item_pos = sizeof(scanblock_t) * 8 - scan_backward_zeroes((scanblock_t) mask) - 1;
     137                     scanfield_t scan_item = remaining._scanfield[item_pos];
     138                     int bitpos = sizeof(scanblock_t)  * 8 - scan_backward_zeroes((scanblock_t) scan_item) - 1;
     139                     int pos = item_pos * sizeof(scanfield_t) * 8 + bitpos;
     140                     return pos;
     141        }
     142
     143        IDISA_ALWAYS_INLINE void clear_from(int pos) {
     144                int item_pos = pos / (sizeof(scanfield_t) * 8);
     145                int bitpos = pos % (sizeof(scanfield_t) * 8);
     146                remaining._scanfield[item_pos] &= ((((scanfield_t) 1) << bitpos) - 1);
     147                item_pos += remaining._scanfield[item_pos] == 0 ? 0 : 1;
     148                mask = mask & (((scanblock_t) 1) << item_pos) - 1;
     149                for (int i = item_pos; i < bitblock_count * 2; i++) remaining._scanfield[i] = 0;
     150        }
     151   
     152        IDISA_ALWAYS_INLINE int count() {
     153                if (mask == 0) return 0;
     154                int ct = 0;
     155#define PARALLEL_COUNT
     156#ifdef PARALLEL_COUNT
     157                BitBlock sum8 = simd<1>::constant<0>();
     158                for (int i = 0; i < bitblock_count/2; i++) {
     159                    BitBlock ct4 = simd<4>::add(simd<4>::popcount(remaining._bitblock[2*i]), simd<4>::popcount(remaining._bitblock[2*i+1]));
     160                    sum8 = simd<8>::add(sum8, simd<8>::add_hl(ct4));
     161                }
     162                if ((bitblock_count & 1) != 0) {  // Should be compiled out if bitblock_count is even.
     163                    sum8 = simd<8>::add(sum8, simd<8>::popcount(remaining._bitblock[bitblock_count]));
     164                }
     165                ct = mvmd<32>::extract<0>(simd<128>::add_hl(simd<64>::add_hl(simd<32>::add_hl(simd<16>::add_hl(sum8)))));
     166#endif
     167#ifndef PARALLEL_COUNT
     168                for (int i = 0; i < bitblock_count; i++) {
     169                    ct += bitblock::popcount(remaining._bitblock[i]);
     170                }
     171#endif
     172                return ct;
     173        }
     174   
     175   
     176private:
     177        union {bitblock_t _bitblock[bitblock_count];
     178               scanfield_t _scanfield[bitblock_count * sizeof(bitblock_t)/sizeof(scanfield_t)];} remaining;
     179        scanblock_t mask;
     180};
     181
     182
     183
     184/*=============================================================================
     185
     186   Deprecated:
    19187
    20188        BitBlock iterator classes provide Standard Template Library (STL)
     
    72240//
    73241#define has_bit(x) (x != 0)
     242#define EOS -1
    74243
    75244template <class bitblock_t, class scanblock_t>
     
    77246
    78247protected:
    79         Scanner(): strm(NULL), pos(-1), blk(-1), scan_blk(-1) {}
     248        Scanner(): strm(NULL), pos(EOS), blk(-1), scan_blk(-1) {}
    80249        Scanner(const bitblock_t * s, uint32_t start_pos, uint32_t start_blk, scanblock_t start_scan_blk): strm(s), pos(start_pos), blk(start_blk), scan_blk(start_scan_blk) {}
    81250
     
    116285                };
    117286
    118                 this->pos = -1;
     287                this->pos = EOS;
    119288                return (this->pos);
    120289        }
     
    131300        }
    132301
    133         IDISA_ALWAYS_INLINE bool is_done() {return (-1==this->pos);}
     302        IDISA_ALWAYS_INLINE bool is_done() const {return (EOS==this->pos);}
    134303        IDISA_ALWAYS_INLINE void set_strm(const bitblock_t * strm) {this->strm = strm;}
    135304        IDISA_ALWAYS_INLINE const bitblock_t * get_strm() const {return this->strm;}
    136305        IDISA_ALWAYS_INLINE int32_t get_pos() const {return this->pos;}
    137 
     306        IDISA_ALWAYS_INLINE void set_pos(int32_t pos) {(this->pos = pos);}
    138307        static const int32_t BLOCK_COUNT = sizeof(bitblock_t)/sizeof(scanblock_t);
    139308
    140309};
     310
     311class BitBlockForwardScanner: public ForwardScanner<BitBlock, ScanWord> {
     312public:
     313        BitBlockForwardScanner(){}
     314        BitBlockForwardScanner(BitBlock * s): ForwardScanner<BitBlock, ScanWord>(s){}
     315};
     316
    141317
    142318template <class bitblock_t, class scanblock_t>
     
    168344                };
    169345
    170                 this->pos = -1;
     346                this->pos = EOS;
    171347                return (this->pos);
    172348        }
     
    183359        }
    184360
    185         IDISA_ALWAYS_INLINE bool is_done() {return (-1==this->pos);}
     361        IDISA_ALWAYS_INLINE bool is_done() const {return (EOS==this->pos);}
    186362        IDISA_ALWAYS_INLINE void set_strm(const bitblock_t * strm) {this->strm = strm;}
    187363        IDISA_ALWAYS_INLINE const bitblock_t * get_strm() const {return this->strm;}
    188364        IDISA_ALWAYS_INLINE int32_t get_pos() const {return this->pos;}
    189 
     365        IDISA_ALWAYS_INLINE void set_pos(int32_t pos) {(this->pos = pos);}
    190366        static const uint32_t BLOCK_COUNT = sizeof(bitblock_t)/sizeof(scanblock_t);
    191367
    192368};
    193369
    194 #undef has_bit
    195370
    196371//
     
    201376{
    202377public:
    203         ForwardIterator() {}
    204378
    205379        ForwardIterator(bitblock_t * s): scanner(s)
     
    208382        }
    209383
     384        // set scanner to first pos of
    210385        void init(bitblock_t * s)
    211386        {
     
    217392        bool operator==(const ForwardIterator& iter)
    218393        {
    219                 return ((scanner.get_strm() = iter.scanner.get_strm()) && (scanner.get_pos() == iter.scanner.get_pos));
    220         }
    221 
    222         // not equal .get_pos()ition and stream
     394                return ((scanner.get_strm() == iter.scanner.get_strm()) &&
     395                        (scanner.get_pos() == iter.scanner.get_pos()));
     396        }
     397
     398        // not equal position and stream
    223399        bool operator!=(const ForwardIterator& iter)
    224400        {
    225                 return ( (scanner.get_strm() != iter.scanner.get_strm()) && (scanner.get_pos() != iter.scanner.get_pos()));
     401                return ((scanner.get_strm() != iter.scanner.get_strm()) &&
     402                        (scanner.get_pos() != iter.scanner.get_pos()));
    226403        }
    227404
     
    247424        }
    248425
     426        IDISA_ALWAYS_INLINE bool isDone() const
     427        {
     428                return scanner.is_done();
     429        }
     430
     431protected:
     432        ForwardIterator() {}
     433
    249434private:
    250435        ForwardScanner<bitblock_t, scanblock_t> scanner;
     
    256441        BitBlockForwardIterator(BitBlock * s): ForwardIterator<BitBlock, ScanWord>(s){}
    257442};
     443
    258444
    259445template<class bitblock_t, class scanblock_t>
     
    261447{
    262448public:
    263         ReverseIterator() {}
    264449        ReverseIterator(BitBlock * s): scanner(s)
    265450        {
     
    276461        bool operator==(const ReverseIterator& iter)
    277462        {
    278                 return ((scanner.get_strm() = iter.scanner.get_strm()) && (scanner.get_pos() == iter.scanner.get_pos));
    279         }
    280 
    281         // not equal .get_pos()ition and stream
     463                return ((scanner.get_strm() == iter.scanner.get_strm()) && (scanner.get_pos() == iter.scanner.get_pos));
     464        }
     465
     466        // not equal position and stream
    282467        bool operator!=(const ReverseIterator& iter)
    283468        {
     
    306491        }
    307492
     493        IDISA_ALWAYS_INLINE bool isDone() const
     494        {
     495                return scanner.is_done();
     496        }
     497
     498protected:
     499        ReverseIterator() {}
     500        ReverseScanner<bitblock_t, scanblock_t> scanner;
     501};
     502
     503class BitBlockReverseIterator: public ReverseIterator<BitBlock, ScanWord>
     504{
     505public:
     506        BitBlockReverseIterator(BitBlock * s): ReverseIterator<BitBlock, ScanWord>(s){}
    308507private:
    309         ReverseScanner<bitblock_t, scanblock_t> scanner;
    310 };
    311 
    312 class BitBlockReverseIterator: public ReverseIterator<BitBlock, ScanWord> {
    313 public:
    314508        BitBlockReverseIterator(){}
    315         BitBlockReverseIterator(BitBlock * s): ReverseIterator<BitBlock, ScanWord>(s){}
    316509};
    317510
     
    322515{
    323516public:
    324         BitStreamIterator():pos(-1), blk(-1), blk_pos(-1), strm(NULL), scan_blk(-1), scan_blk_cnt(0)
    325         {
    326                 // default constructor defines past-the-end of bit stream semantics, pos == -1
     517        BitStreamIterator():pos(EOS), blk(-1), blk_pos(-1), strm(NULL), scan_blk(-1), scan_blk_cnt(0)
     518        {
     519                // default constructor defines past-the-end of bit stream semantics, pos == EOS
    327520        }
    328521
     
    354547        bool operator==(const BitStreamIterator& iter)
    355548        {
    356                 return((strm = iter.strm) && (pos == iter.pos));
     549                return((strm == iter.strm) && (pos == iter.pos));
    357550        }
    358551
     
    390583        }
    391584        */
     585
     586        IDISA_ALWAYS_INLINE bool isDone() const
     587        {
     588                return (EOS == pos);
     589        }
    392590
    393591        void debug() {
     
    419617                };
    420618
    421                 pos = -1;
     619                pos = EOS;
    422620                return;
    423621        }
    424622};
    425623
     624
     625
     626#undef has_bit
     627
     628
    426629#endif // BITBLOCK_ITERATOR_H_
    427630
  • icXML/icXML-devel/src/simd-lib/builtins.hpp

    r2720 r3567  
    8080        #endif
    8181
    82         IDISA_ALWAYS_INLINE uint32_t scan_forward_zeroes(ScanWord x) { // Precondition: x != 0
    83                 return (uint32_t) ScanForwardIntrinsic((ScanWord) x);
     82        IDISA_ALWAYS_INLINE int scan_forward_zeroes(ScanWord x) { // Precondition: x != 0
     83                return ScanForwardIntrinsic((ScanWord) x);
    8484        }
    8585
    86         IDISA_ALWAYS_INLINE uint32_t scan_backward_zeroes(ScanWord x) { // Precondition: x != 0
    87                 return (uint32_t) ScanReverseIntrinsic((ScanWord) x);
     86        IDISA_ALWAYS_INLINE int scan_backward_zeroes(ScanWord x) { // Precondition: x != 0
     87                return ScanReverseIntrinsic((ScanWord) x);
    8888        }
    8989
  • icXML/icXML-devel/src/simd-lib/carryQ.hpp

    r2720 r3567  
    1212=============================================================================*/
    1313
     14#include <string.h>
    1415#include <stdint.h>
    1516#include <iostream>
     
    4344}
    4445
    45 
    46 #include <string.h>
    47 
     46template <uint16_t CarryCount, uint16_t AdvanceNCount> class CarryArray;
     47
     48#define LocalCarryCombine(carrySet, localCarry, carryNo, carryCount)\
     49        carrySet.CarryCombine(localCarry.cq, carryNo, carryCount); 
     50
     51#define CarryDeclare(name, carry1_count, carryN_count)\
     52CarryArray<carry1_count, carryN_count> name;
    4853
    4954// Array of BitBlock implementation.
     
    5358public:
    5459
    55     #define Carry0 simd<BLOCK_SIZE>::constant<0>()
    56     #define Carry1 simd<BLOCK_SIZE>::constant<1>()
    57  
    58         BitBlock cq[CarryCount + AdvanceNCount];
     60#ifndef CARRY64
     61    #define carry_value_0 simd<BLOCK_SIZE>::constant<0>()
     62    #define carry_value_1 simd<BLOCK_SIZE>::constant<1>()
     63#endif
     64#ifdef CARRY64
     65    #define carry_value_0 0
     66    #define carry_value_1 1
     67#endif
     68
     69        carry_t cq[CarryCount + AdvanceNCount];
    5970        //BitBlock pending64[AdvanceNCount];
    6071        CarryArray()
    6172        {
    62             memset (cq, 0, sizeof(BitBlock) * (CarryCount + AdvanceNCount));
     73            memset (cq, 0, sizeof(carry_t) * (CarryCount + AdvanceNCount));
    6374            //memset(pending64, 0, sizeof(BitBlock) * AdvanceNCount);
    6475        }
    6576        ~CarryArray() {}
    6677
    67         IDISA_ALWAYS_INLINE BitBlock BitBlock_advance_ci_co(BitBlock strm, BitBlock carryin, uint16_t carryno)
     78        IDISA_ALWAYS_INLINE BitBlock BitBlock_advance_ci_co(BitBlock strm, carry_t carryin, uint16_t carryno)
    6879        {
    6980                BitBlock rslt;
     
    7283        }
    7384
    74         IDISA_ALWAYS_INLINE BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, BitBlock carryin, const uint16_t carryno)
     85        IDISA_ALWAYS_INLINE BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, carry_t carryin, const uint16_t carryno)
    7586        {
    7687                BitBlock sum;
     
    7990        }
    8091
    81         IDISA_ALWAYS_INLINE BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, BitBlock carryin, uint16_t carryno)
     92        IDISA_ALWAYS_INLINE BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, carry_t carryin, uint16_t carryno)
    8293        {
    8394                BitBlock diff;
     
    8697        }
    8798
    88         IDISA_ALWAYS_INLINE BitBlock BitBlock_scantofirst(BitBlock charclass, BitBlock carryin, uint16_t carryno)
     99        IDISA_ALWAYS_INLINE BitBlock BitBlock_scantofirst(BitBlock charclass, carry_t carryin, uint16_t carryno)
    89100        {
    90101                BitBlock marker;
     
    95106        }
    96107
    97         IDISA_ALWAYS_INLINE BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, BitBlock carryin, uint16_t carryno)
     108        IDISA_ALWAYS_INLINE BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, carry_t carryin, uint16_t carryno)
    98109        {
    99110                BitBlock markers1;
     
    102113        }
    103114
    104         IDISA_ALWAYS_INLINE BitBlock BitBlock_advance_then_scanthru(BitBlock markers0, BitBlock charclass, BitBlock carryin, uint16_t carryno)
     115        IDISA_ALWAYS_INLINE BitBlock BitBlock_advance_then_scanthru(BitBlock markers0, BitBlock charclass, carry_t carryin, uint16_t carryno)
    105116        {
    106117                BitBlock markers1;
     
    110121        }
    111122
    112         IDISA_ALWAYS_INLINE BitBlock BitBlock_span_upto(BitBlock starts, BitBlock follows, BitBlock carryin, uint16_t carryno)
     123        IDISA_ALWAYS_INLINE BitBlock BitBlock_span_upto(BitBlock starts, BitBlock follows, carry_t carryin, uint16_t carryno)
    113124        {
    114125                BitBlock span;
     
    117128        }
    118129
    119         IDISA_ALWAYS_INLINE BitBlock BitBlock_inclusive_span(BitBlock starts, BitBlock ends, BitBlock carryin, uint16_t carryno)
     130        IDISA_ALWAYS_INLINE BitBlock BitBlock_inclusive_span(BitBlock starts, BitBlock ends, carry_t carryin, uint16_t carryno)
    120131        {
    121132                BitBlock span;
     
    124135        }
    125136
    126         IDISA_ALWAYS_INLINE BitBlock BitBlock_exclusive_span(BitBlock starts, BitBlock ends, BitBlock carryin, uint16_t carryno)
     137        IDISA_ALWAYS_INLINE BitBlock BitBlock_exclusive_span(BitBlock starts, BitBlock ends, carry_t carryin, uint16_t carryno)
    127138        {
    128139                BitBlock span;
     
    131142        }
    132143
    133 
     144/*
    134145        IDISA_ALWAYS_INLINE BitBlock BitBlock_advance32_ci_co(BitBlock strm, uint32_t pending_in, uint32_t & pending_out)
    135146        {
     
    137148                return simd_or(simd<BLOCK_SIZE>::slli<32>(strm), mvmd<BLOCK_SIZE>::fill((uint64_t)pending_in));
    138149        }
    139 
    140         template <int n> IDISA_ALWAYS_INLINE BitBlock BitBlock_advance_n_(BitBlock strm, BitBlock pending_in, uint16_t pendingno)
     150*/
     151
     152/*
     153        template <int n> IDISA_ALWAYS_INLINE BitBlock BitBlock_advance_n_(BitBlock strm, carry_t pending_in, uint16_t pendingno)
    141154        {
    142155                BitBlock half_block_shifted = esimd<BLOCK_SIZE/2>::mergel(strm, pending_in);
     
    147160                return result;
    148161        }
     162*/
    149163
    150164        IDISA_ALWAYS_INLINE bool CarryTest(uint16_t carryno, uint16_t carry_count)
    151 
    152 
    153         {
    154                   BitBlock c1 = cq[carryno];
     165        {
     166                  carry_t c1 = cq[carryno];
    155167                  int ubound = carryno + carry_count;
    156168                  for (int i = carryno + 1; i < ubound ; i++) {
     
    160172        }
    161173
     174        IDISA_ALWAYS_INLINE carry_t CarryRange(uint16_t carryno, uint16_t carry_count)
     175        {
     176                  carry_t c1 = cq[carryno];
     177                  int ubound = carryno + carry_count;
     178                  for (int i = carryno + 1; i < ubound ; i++) {
     179                        c1 = carry_or(c1, cq[i]);
     180                  }
     181                  return c1;
     182        }
     183
    162184        IDISA_ALWAYS_INLINE void CarryDequeueEnqueue(uint16_t carryno, uint16_t carry_count)
    163185        {
     
    165187        }
    166188
     189        // Deprecated (renamed)
    167190        IDISA_ALWAYS_INLINE void CarryQ_Adjust(uint16_t carry_count)
    168191        {
     
    170193        }
    171194
    172         IDISA_ALWAYS_INLINE void CarryCombine(BitBlock local_cq[], uint16_t carryno, uint16_t carry_count)
     195        IDISA_ALWAYS_INLINE void CarryAdjust(uint16_t carry_count)
     196        {
     197                return;
     198        }
     199
     200        IDISA_ALWAYS_INLINE void CarryCombine(carry_t local_cq[], uint16_t carryno, uint16_t carry_count)
    173201        {
    174202                  for (int i = 0; i < carry_count; i++) {
     
    180208        {
    181209                  cq[carryno] = carry_or(cq[carryno], cq[carry2]);
    182                   cq[carry2] = Carry0;
    183         }
    184 
    185         IDISA_ALWAYS_INLINE BitBlock get_carry_in(uint16_t carryno) const
    186         {
    187                 return carry2bitblock(cq[carryno]);
    188         }
    189 
    190 //      IDISA_ALWAYS_INLINE BitBlock get_pending64(uint16_t advance_n_blkno) const
    191 //      {
    192 //              return pending64[advance_n_blkno];
    193 //      }
    194 
    195         IDISA_ALWAYS_INLINE BitBlock get_pending64(uint16_t advance_n_blkno) const
     210                  cq[carry2] = carry_value_0;
     211        }
     212
     213        IDISA_ALWAYS_INLINE carry_t get_carry_in(uint16_t carryno) const
     214        {
     215                return cq[carryno];
     216        }
     217
     218        // Deprecated (renamed)
     219        IDISA_ALWAYS_INLINE carry_t GetCarry(uint16_t carryno) const
     220        {
     221                return cq[carryno];
     222        }
     223
     224        IDISA_ALWAYS_INLINE void SetCarry(carry_t carryVal, uint16_t carryno)
     225        {
     226                cq[carryno] = carryVal;
     227        }
     228
     229
     230        // Deprecated in PabloJ, retained for legacy compiler.
     231        IDISA_ALWAYS_INLINE carry_t get_pending64(uint16_t advance_n_blkno) const
     232        {
     233                return cq[CarryCount + advance_n_blkno];
     234        }
     235
     236        IDISA_ALWAYS_INLINE carry_t Pending64(uint16_t advance_n_blkno) const
    196237        {
    197238                return cq[CarryCount + advance_n_blkno];
     
    200241//private:
    201242        // helpers
    202         IDISA_ALWAYS_INLINE BitBlock carry_flip(BitBlock carry) const
    203         {
    204                 return simd_xor(carry, Carry1);
    205         }
    206 
    207         IDISA_ALWAYS_INLINE bool test_carry(BitBlock carry) const
     243#ifndef  CARRY64
     244        // Deprecated (renamed)
     245        IDISA_ALWAYS_INLINE carry_t carry_flip(carry_t carry) const
     246        {
     247                return simd_xor(carry, carry_value_1);
     248        }
     249
     250        IDISA_ALWAYS_INLINE BitBlock CarryFlip(carry_t carry) const
     251        {
     252                return simd_xor(carry, carry_value_1);
     253        }
     254
     255        IDISA_ALWAYS_INLINE bool test_carry(carry_t carry) const
    208256        {
    209257                return bitblock::any(carry);
    210258        }
    211259
    212         IDISA_ALWAYS_INLINE BitBlock carry_or(BitBlock carry1, BitBlock carry2) const
     260        IDISA_ALWAYS_INLINE carry_t carry_or(carry_t carry1, carry_t carry2) const
    213261        {
    214262                return simd_or(carry1, carry2);
    215263        }
    216        
    217 #undef Carry0
    218 #undef Carry1
     264#endif
     265#ifdef  CARRY64
     266        // Deprecated (renamed)
     267        IDISA_ALWAYS_INLINE carry_t carry_flip(carry_t carry) const
     268        {
     269                return carry ^ 1;
     270        }
     271
     272        IDISA_ALWAYS_INLINE BitBlock CarryFlip(carry_t carry) const
     273        {
     274                return carry ^ 1;
     275        }
     276
     277        IDISA_ALWAYS_INLINE bool test_carry(carry_t carry) const
     278        {
     279                return carry != 0;
     280        }
     281
     282        IDISA_ALWAYS_INLINE carry_t carry_or(carry_t carry1, carry_t carry2) const
     283        {
     284                return carry1 | carry2;
     285        }
     286#endif
    219287
    220288};
  • icXML/icXML-devel/src/simd-lib/idisa128.hpp

    r2720 r3567  
    99#define IDISA128_HPP
    1010
     11#if defined USE_SSE3
     12#include "idisa_cpp/idisa_sse3.cpp"
     13#elif defined USE_SSSE3
     14#include "idisa_cpp/idisa_ssse3.cpp"
     15#elif defined USE_SSE4_1
     16#include "idisa_cpp/idisa_sse4_1.cpp"
     17#elif defined USE_SSE4_2
     18#include "idisa_cpp/idisa_sse4_2.cpp"
     19#elif defined USE_NEON
     20#include "idisa_cpp/idisa_neon.cpp"
     21#else
    1122#include "idisa_cpp/idisa_sse2.cpp"
    1223#endif
     24#endif
  • icXML/icXML-devel/src/simd-lib/pabloSupport.hpp

    r2720 r3567  
    2323     
    2424
    25 IDISA_ALWAYS_INLINE BitBlock pablo_blk_Advance(BitBlock strm, BitBlock carryin, BitBlock & rslt) {
    26         BitBlock carryout;
     25const BitBlock simd_const_1 = mvmd<BLOCK_SIZE/4>::fill4(0,0,0,1);
     26
     27const BitBlock simd_sign_bit = bitblock::slli<BLOCK_SIZE-1>(simd_const_1);
     28
     29IDISA_ALWAYS_INLINE BitBlock Dequeue_bit(BitBlock & q) {
     30        BitBlock bit = simd_and(q, simd_const_1);
     31        q = simd<64>::srli<1>(q);
     32        return bit;
     33}
     34
     35IDISA_ALWAYS_INLINE carry_t pablo_blk_Advance(BitBlock strm, carry_t carryin, BitBlock & rslt) {
     36        carry_t carryout;
    2737        adv_ci_co(strm, carryin, carryout, rslt);
    2838        return carryout;
    2939}
    3040
    31 template <int n> IDISA_ALWAYS_INLINE BitBlock pablo_blk_Advance_n_(BitBlock strm, BitBlock pending_in, BitBlock & rslt) {
     41template <int n> IDISA_ALWAYS_INLINE carry_t pablo_blk_Advance_n_(BitBlock strm, BitBlock pending_in, BitBlock & rslt) {
    3242        BitBlock half_block_shifted = esimd<BLOCK_SIZE/2>::mergel(strm, pending_in);
    3343        rslt = simd_or(simd<BLOCK_SIZE/2>::srli<(BLOCK_SIZE/2)-n>(half_block_shifted),
     
    3646}
    3747
    38 IDISA_ALWAYS_INLINE BitBlock pablo_blk_ScanThru(BitBlock marker, BitBlock charclass, BitBlock carryin, BitBlock & rslt) {
    39         BitBlock carryout, sum;
     48template <int fw, int n> IDISA_ALWAYS_INLINE carry_t pablo_blk_Advance_n_(BitBlock strm, BitBlock pending_in, BitBlock & rslt) {
     49        BitBlock shifted = mvmd<fw>::dslli<1>(strm, pending_in);
     50        rslt = simd_or(simd<BLOCK_SIZE/2>::srli<fw-n>(shifted),
     51                       simd<BLOCK_SIZE/2>::slli<n>(strm));
     52        return strm;
     53}
     54
     55/*  Support for pablo.Lookahead.  2 translation modes:
     56    (a) pablo.Lookahead(ss.strm, n) ==> pablo_blk_Lookahead_n_<n>(ss_curblock.strm, ss_nxtblock.strm);
     57    (b) pablo.Lookahead(ss.strm, n) ==> pablo_blk_Lookahead_n_<n>(ss[0].strm, ss[1].strm);
     58*/
     59template <int n> IDISA_ALWAYS_INLINE carry_t pablo_blk_Lookahead_n_(BitBlock strm, BitBlock lookahead) {
     60        BitBlock half_block_shifted = mvmd<BLOCK_SIZE/2>::dslli<1>(lookahead, strm);
     61        return simd_or(simd<BLOCK_SIZE/2>::slli<(BLOCK_SIZE/2)-n>(half_block_shifted),
     62                       simd<BLOCK_SIZE/2>::srli<n>(strm));
     63}
     64
     65IDISA_ALWAYS_INLINE carry_t pablo_blk_ScanThru(BitBlock marker, BitBlock charclass, carry_t carryin, BitBlock & rslt) {
     66        carry_t carryout;  BitBlock sum;
    4067        add_ci_co(marker, charclass, carryin, carryout, sum);
    4168        rslt = simd_andc(sum, charclass);
     
    4370}
    4471
    45 IDISA_ALWAYS_INLINE BitBlock pablo_blk_AdvanceThenScanThru(BitBlock marker, BitBlock charclass, BitBlock carryin, BitBlock & rslt) {
    46         BitBlock carryout, sum;
     72IDISA_ALWAYS_INLINE carry_t pablo_blk_MatchStar(BitBlock marker, BitBlock charclass, carry_t carryin, BitBlock & rslt) {
     73        carry_t carryout;  BitBlock sum;
     74        add_ci_co(simd_and(charclass, marker), charclass, carryin, carryout, sum);
     75        rslt = simd_or(simd_xor(sum, charclass), marker);
     76        return carryout;
     77}
     78
     79IDISA_ALWAYS_INLINE carry_t pablo_blk_AdvanceThenScanThru(BitBlock marker, BitBlock charclass, carry_t carryin, BitBlock & rslt) {
     80        carry_t carryout;  BitBlock sum;
    4781        add_ci_co(marker, simd_or(charclass, marker), carryin, carryout, sum);
    4882        rslt = simd_andc(sum, charclass);
     
    5084}
    5185
    52 IDISA_ALWAYS_INLINE BitBlock pablo_blk_ScanTo(BitBlock marker, BitBlock charclass, BitBlock carryin, BitBlock & rslt) {
    53         BitBlock carryout, sum;
     86IDISA_ALWAYS_INLINE carry_t pablo_blk_ScanTo(BitBlock marker, BitBlock charclass, carry_t carryin, BitBlock & rslt) {
     87        carry_t carryout;  BitBlock sum;
    5488        add_ci_co(marker, simd_not(charclass), carryin, carryout, sum);
    5589        rslt = simd_and(sum, charclass);
     
    5791}
    5892
    59 IDISA_ALWAYS_INLINE BitBlock pablo_blk_AdvanceThenScanTo(BitBlock marker, BitBlock charclass, BitBlock carryin, BitBlock & rslt) {
    60         BitBlock carryout, sum;
     93IDISA_ALWAYS_INLINE carry_t pablo_blk_AdvanceThenScanTo(BitBlock marker, BitBlock charclass, carry_t carryin, BitBlock & rslt) {
     94        carry_t carryout;  BitBlock sum;
    6195        add_ci_co(marker, simd_or(marker, simd_not(charclass)), carryin, carryout, sum);
    6296        rslt = simd_and(sum, charclass);
     
    6498}
    6599
    66 IDISA_ALWAYS_INLINE BitBlock pablo_blk_ScanToFirst(BitBlock charclass, BitBlock carryin, BitBlock & rslt) {
    67         BitBlock carryout, sum;
     100IDISA_ALWAYS_INLINE carry_t pablo_blk_ScanToFirst(BitBlock charclass, carry_t carryin, BitBlock & rslt) {
     101        carry_t carryout;  BitBlock sum;
    68102        add_ci_co(simd<BLOCK_SIZE>::constant<0>(), simd_not(charclass), carryin, carryout, sum);
    69103        rslt = simd_and(sum, charclass);
     
    71105}
    72106
    73 IDISA_ALWAYS_INLINE BitBlock pablo_blk_SpanUpTo(BitBlock starts, BitBlock follows, BitBlock carryin, BitBlock & rslt) {
    74         BitBlock carryout;
     107IDISA_ALWAYS_INLINE carry_t pablo_blk_SpanUpTo(BitBlock starts, BitBlock follows, carry_t carryin, BitBlock & rslt) {
     108        carry_t carryout;
    75109        sub_bi_bo(follows, starts, carryin, carryout, rslt);
    76110        return carryout;
    77111}
    78112
    79 IDISA_ALWAYS_INLINE BitBlock pablo_blk_InclusiveSpan(BitBlock starts, BitBlock follows, BitBlock carryin, BitBlock & rslt) {
    80         BitBlock carryout, span;
     113IDISA_ALWAYS_INLINE carry_t pablo_blk_InclusiveSpan(BitBlock starts, BitBlock follows, carry_t carryin, BitBlock & rslt) {
     114        carry_t carryout;  BitBlock span;
    81115        sub_bi_bo(follows, starts, carryin, carryout, span);
    82116        rslt = simd_or(span, follows);
     
    84118}
    85119
    86 IDISA_ALWAYS_INLINE BitBlock pablo_blk_ExclusiveSpan(BitBlock starts, BitBlock follows, BitBlock carryin, BitBlock & rslt) {
    87         BitBlock carryout, span;
     120IDISA_ALWAYS_INLINE carry_t pablo_blk_ExclusiveSpan(BitBlock starts, BitBlock follows, carry_t carryin, BitBlock & rslt) {
     121        carry_t carryout;  BitBlock span;
    88122        sub_bi_bo(follows, starts, carryin, carryout, span);
    89123        rslt = simd_andc(span, starts);
     
    92126
    93127
     128template <typename T> IDISA_ALWAYS_INLINE BitBlock pablo_blk_match(T bytedata[], const T match_str[], BitBlock v, int len) {
     129        union {BitBlock bitblock; ScanWord elems[sizeof(BitBlock)/sizeof(ScanWord)];} u;
     130        u.bitblock = v;
     131        int pos;
     132        ScanWord s, t, bit;
     133        for (unsigned int i = 0; i < sizeof(BitBlock)/sizeof(ScanWord); i++) {
     134                s = u.elems[i];
     135                while (s != 0) {
     136                        pos = scan_forward_zeroes(s);
     137                        t = s;
     138                        s = s & (s - 1); // clear rightmost bit
     139                        if (memcmp((void *) &bytedata[pos], (void *) match_str, len * sizeof(T))) {
     140                                // Strings do not match; filter the result.
     141                                bit = s ^ t;
     142                                u.elems[i] ^= bit;
     143                        }
     144                }
     145        }
     146        return u.bitblock;
     147}
    94148
    95149#endif // PABLOSUPPORT_HPP_
  • icXML/icXML-devel/src/simd-lib/s2p.hpp

    r2720 r3567  
    107107   immediately convert back. */
    108108#ifdef USE_S2P_AVX
    109 #include "idisa_cpp/idisa_sse2.cpp"
    110 #define avx_select_lo128(x) \
    111         ((__m128i) _mm256_castps256_ps128(x))
    112 
    113 #define avx_select_hi128(x) \
    114         ((__m128i)(_mm256_extractf128_ps(x, 1)))
    115 
    116 #define avx_general_combine256(x, y) \
    117    (_mm256_insertf128_ps(_mm256_castps128_ps256((__m128) y), (__m128) x, 1))
    118 
    119109#define s2p_step(s0, s1, hi_mask, shift, p0, p1)  \
    120110  do {\
     
    138128#endif
    139129
     130#ifndef USE_S2P_AVX2
    140131#define s2p_bytepack(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7) \
    141132  do {\
     
    157148        s2p_step(bit33337777_0,bit33337777_1,simd<8>::himask(),4,p3,p7);\
    158149  } while(0)
     150#endif
     151
     152#ifdef USE_S2P_AVX2
     153#define s2p_step_shuf(shuf, s0, s1, hi_mask, shift, p0, p1)  \
     154  do {\
     155        BitBlock x0, x1, t0, t1;\
     156        x0 = _mm256_permute4x64_epi64(_mm256_shuffle_epi8(s0, shuf), 0xD8);\
     157        x1 = _mm256_permute4x64_epi64(_mm256_shuffle_epi8(s1, shuf), 0xD8);\
     158        t0 = _mm256_permute2x128_si256(x1, x0, 0x31);\
     159        t1 = _mm256_permute2x128_si256(x1, x0, 0x20);\
     160        p0 = simd<1>::ifh(hi_mask, t0, simd<16>::srli<shift>(t1));\
     161        p1 = simd<1>::ifh(hi_mask, simd<16>::slli<shift>(t0), t1);\
     162  } while(0)
     163
     164#define s2p_bytepack(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7) \
     165  do {\
     166        BitBlock shuf = _mm256_set_epi32(0x0F0D0B09, 0x07050301, 0x0E0C0A08, 0x06040200, 0x0F0D0B09, 0x07050301, 0x0E0C0A08, 0x06040200);\
     167        BitBlock bit00224466_0,bit00224466_1,bit00224466_2,bit00224466_3;\
     168        BitBlock bit11335577_0,bit11335577_1,bit11335577_2,bit11335577_3;\
     169        BitBlock bit00004444_0,bit22226666_0,bit00004444_1,bit22226666_1;\
     170        BitBlock bit11115555_0,bit33337777_0,bit11115555_1,bit33337777_1;\
     171        s2p_step_shuf(shuf, s0,s1,simd<2>::himask(),1,bit00224466_0,bit11335577_0);\
     172        s2p_step_shuf(shuf, s2,s3,simd<2>::himask(),1,bit00224466_1,bit11335577_1);\
     173        s2p_step_shuf(shuf, s4,s5,simd<2>::himask(),1,bit00224466_2,bit11335577_2);\
     174        s2p_step_shuf(shuf, s6,s7,simd<2>::himask(),1,bit00224466_3,bit11335577_3);\
     175        s2p_step_shuf(shuf, bit00224466_0,bit00224466_1,simd<4>::himask(),2,bit00004444_0,bit22226666_0);\
     176        s2p_step_shuf(shuf, bit00224466_2,bit00224466_3,simd<4>::himask(),2,bit00004444_1,bit22226666_1);\
     177        s2p_step_shuf(shuf, bit11335577_0,bit11335577_1,simd<4>::himask(),2,bit11115555_0,bit33337777_0);\
     178        s2p_step_shuf(shuf, bit11335577_2,bit11335577_3,simd<4>::himask(),2,bit11115555_1,bit33337777_1);\
     179        s2p_step_shuf(shuf, bit00004444_0,bit00004444_1,simd<8>::himask(),4,p0,p4);\
     180        s2p_step_shuf(shuf, bit11115555_0,bit11115555_1,simd<8>::himask(),4,p1,p5);\
     181        s2p_step_shuf(shuf, bit22226666_0,bit22226666_1,simd<8>::himask(),4,p2,p6);\
     182        s2p_step_shuf(shuf, bit33337777_0,bit33337777_1,simd<8>::himask(),4,p3,p7);\
     183  } while(0)
     184
     185#endif
     186
     187
     188
    159189
    160190/* For sizeof(BitBlock) = 16 */
     191#if BLOCK_SIZE == 128
    161192typedef uint16_t BitPack;
     193#endif
     194#if BLOCK_SIZE == 256
     195typedef uint32_t BitPack;
     196#endif
    162197
    163198#define movemask_step(s7, s6, s5, s4, s3, s2, s1, s0, p) \
Note: See TracChangeset for help on using the changeset viewer.