Changeset 4151


Ignore:
Timestamp:
Sep 11, 2014, 4:02:00 PM (4 years ago)
Author:
linmengl
Message:

experimental support for BLOCK_SIZE 256 ready to test

Location:
icGREP/icgrep-devel
Files:
3 added
6 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/README-icgrep-0.9.txt

    r4039 r4151  
    11README-icgrep-0.9.txt
    22
    3 This is the open-source version of icgrep 0.9.   icgrep is a 
     3This is the open-source version of icgrep 0.9.   icgrep is a
    44very fast regular expression search program, particularly
    5 for complex regular expressions.   
     5for complex regular expressions.
    66
    77Normal usage to find lines in a file f matching a regexp r is:
     
    1616
    1717icgrep supports standard egrep syntax, except for Posix character
    18 classes.   icgrep supports searches using ASCII or UTF-8. 
     18classes.   icgrep supports searches using ASCII or UTF-8.
    1919Unicode two letter classes are supported, e.g. \p{Ll} matching
    2020lower case Unicode letters and \P{Lu} matching all characters
     
    2929code version of LLVM.
    3030
    31 To build LLVM, 
     31To build LLVM,
    3232(L1) open a terminal window and cd to the llvm-build directory
    3333(L2) enter the following command to build the makefiles
     
    3636
    3737Using the installed LLVM, building icgrep uses the CMake build
    38 system generator.   
     38system generator.
    3939(IC1)  open a terminal window and cd to the icgrep-build directory
    4040(IC2)  enter the following command to build the makefiles
    4141cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER:FILEPATH=/usr/bin/clang++ -DCMAKE_C_COMPILER:FILEPATH=/usr/bin/clang ../icgrep
    42 (IC3) Enter the command "make" 
     42(IC3) Enter the command "make"
    4343
    4444LLVM files are governed by the LLVM Release License in LLVM-LICENSE.txt.
    4545icgrep is governed by Open Software License 3.0 in OSL-3.0.txt.
    4646
     47### Experimental 256-bit BLOCK_SIZE
     48(IC1)  open a terminal window and cd to the icgrep-build directory
     49(IC2)  enter the following command to build the makefiles
     50cmake -DBLOCK_SIZE_256=on -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER:FILEPATH=/usr/bin/g++ -DCMAKE_C_COMPILER:FILEPATH=/usr/bin/gcc ../icgrep
     51(IC3) Enter the command "make"
    4752
     53
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r4137 r4151  
    77set (icgrep_VERSION_MAJOR 0)
    88set (icgrep_VERSION_MINOR 8)
     9
     10option(BLOCK_SIZE_256 "Compiling with BLOCK_SIZE=256")
     11if (BLOCK_SIZE_256)
     12  message("-- Build with BLOCK_SIZE=256")
     13else ()
     14  message("-- Build with BLOCK_SIZE=128")
     15endif()
    916
    1017# configure a header file to pass some of the CMake settings
     
    5764target_link_libraries (icgrep PabloADT RegExpADT ${REQ_LLVM_LIBRARIES})
    5865
    59 set(CMAKE_CXX_FLAGS_RELEASE "-std=gnu++0x -O3 -msse2")
     66set(CMAKE_CXX_FLAGS_RELEASE "-std=c++11 -O3 -msse2")
    6067
    6168# detect LLVM version and add compiler flags.
     
    6572  set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DUSE_LLVM_3_5")
    6673endif(${LLVM_PACKAGE_VERSION} MATCHES "3.4")
     74
     75if (BLOCK_SIZE_256)
     76  set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DUSE_S2P_AVX2 -DBLOCK_SIZE=256 -march=core-avx2 -mavx2 -m64 -fabi-version=6")
     77endif()
    6778
    6879add_test(
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r4149 r4151  
    3434#include <fcntl.h>
    3535
    36 
     36#if (BLOCK_SIZE == 128)
    3737#define SEGMENT_BLOCKS 7
     38#endif
     39
     40#if (BLOCK_SIZE == 256)
     41#define SEGMENT_BLOCKS 15
     42#endif
     43
    3844#define SEGMENT_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
    3945
     
    270276}
    271277
    272 
     278#if (BLOCK_SIZE == 256)
     279typedef BitStreamScanner<BitBlock, uint64_t, uint64_t, SEGMENT_BLOCKS> ScannerT;
     280#endif
     281
     282#if (BLOCK_SIZE == 128)
    273283typedef BitStreamScanner<BitBlock, uint32_t, uint32_t, SEGMENT_BLOCKS> ScannerT;
     284#endif
    274285
    275286//
  • icGREP/icgrep-devel/icgrep/llvm_gen.cpp

    r4137 r4151  
    481481}
    482482
    483 #define bitBlockExprType m64x2Vect
     483#define bitBlockExprType mXi64Vect
    484484
    485485void LLVM_Generator::DefineTypes()
    486486{
    487487    //The BitBlock vector.
    488     m64x2Vect = VectorType::get(IntegerType::get(mMod->getContext(), 64), 2);
    489     m128x1Vect = VectorType::get(IntegerType::get(mMod->getContext(), 128), 1);
     488    mXi64Vect = VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64);
     489    mXi128Vect = VectorType::get(IntegerType::get(mMod->getContext(), 128), BLOCK_SIZE / 128);
    490490    //A pointer to the BitBlock vector.
    491     m64x2Vect_Ptr1 = PointerType::get(m64x2Vect, 0);
     491    mXi64Vect_Ptr1 = PointerType::get(mXi64Vect, 0);
    492492
    493493    //Constant definitions.
    494494    mConst_int64_neg1 = ConstantInt::get(mMod->getContext(), APInt(64, StringRef("-1"), 10));
    495495
    496     mConst_Aggregate_64x2_0 = ConstantAggregateZero::get(m64x2Vect);
     496    mConst_Aggregate_Xi64_0 = ConstantAggregateZero::get(mXi64Vect);
    497497    std::vector<Constant*> const_packed_27_elems;
    498     const_packed_27_elems.push_back(mConst_int64_neg1);
    499     const_packed_27_elems.push_back(mConst_int64_neg1);
    500     mConst_Aggregate_64x2_neg1 = ConstantVector::get(const_packed_27_elems);
     498    for (int i = 0; i < BLOCK_SIZE / 64; ++i)
     499      const_packed_27_elems.push_back(mConst_int64_neg1);
     500    mConst_Aggregate_Xi64_neg1 = ConstantVector::get(const_packed_27_elems);
    501501
    502502
     
    508508    for (int i = 0; i < mBits; i++)
    509509    {
    510         StructTy_struct_Basis_bits_fields.push_back(m64x2Vect);
     510        StructTy_struct_Basis_bits_fields.push_back(mXi64Vect);
    511511    }
    512512    if (StructTy_struct_Basis_bits->isOpaque()) {
     
    520520
    521521    //The carry q array.
    522     FuncTy_0_args.push_back(m64x2Vect_Ptr1);
     522    FuncTy_0_args.push_back(mXi64Vect_Ptr1);
    523523
    524524    //The output structure.
     
    528528    }
    529529    std::vector<Type*>StructTy_struct_Output_fields;
    530     StructTy_struct_Output_fields.push_back(m64x2Vect);
    531     StructTy_struct_Output_fields.push_back(m64x2Vect);
     530    StructTy_struct_Output_fields.push_back(mXi64Vect);
     531    StructTy_struct_Output_fields.push_back(mXi64Vect);
    532532    if (StructTy_struct_Output->isOpaque()) {
    533533        StructTy_struct_Output->setBody(StructTy_struct_Output_fields, /*isPacked=*/false);
     
    547547{
    548548    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
    549     //mFunc_print_register = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), m64x2Vect, NULL);
     549    //mFunc_print_register = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), mXi64Vect, NULL);
    550550    //mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mFunc_print_register), (void *)&wrapped_print_register);
    551551    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
    552552
    553553#ifdef USE_UADD_OVERFLOW
    554     // Type Definitions for llvm.uadd.with.overflow.i128
     554    // Type Definitions for llvm.uadd.with.overflow.i128 or .i256
    555555    std::vector<Type*>StructTy_0_fields;
    556     StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 128));
     556    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
    557557    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
    558558    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
    559559
    560560    std::vector<Type*>FuncTy_1_args;
    561     FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 128));
    562     FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 128));
     561    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
     562    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
    563563    FunctionType* FuncTy_1 = FunctionType::get(
    564564                                              /*Result=*/StructTy_0,
     
    566566                                              /*isVarArg=*/false);
    567567
    568     mFunc_llvm_uadd_with_overflow_i128 = mMod->getFunction("llvm.uadd.with.overflow.i128");
    569     if (!mFunc_llvm_uadd_with_overflow_i128) {
    570         mFunc_llvm_uadd_with_overflow_i128 = Function::Create(
     568    mFunc_llvm_uadd_with_overflow = mMod->getFunction("llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE));
     569    if (!mFunc_llvm_uadd_with_overflow) {
     570        mFunc_llvm_uadd_with_overflow = Function::Create(
    571571          /*Type=*/FuncTy_1,
    572572          /*Linkage=*/GlobalValue::ExternalLinkage,
    573           /*Name=*/"llvm.uadd.with.overflow.i128", mMod); // (external, no body)
    574         mFunc_llvm_uadd_with_overflow_i128->setCallingConv(CallingConv::C);
    575     }
    576     AttributeSet mFunc_llvm_uadd_with_overflow_i128_PAL;
     573          /*Name=*/"llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
     574        mFunc_llvm_uadd_with_overflow->setCallingConv(CallingConv::C);
     575    }
     576    AttributeSet mFunc_llvm_uadd_with_overflow_PAL;
    577577    {
    578578        SmallVector<AttributeSet, 4> Attrs;
     
    586586
    587587        Attrs.push_back(PAS);
    588         mFunc_llvm_uadd_with_overflow_i128_PAL = AttributeSet::get(mMod->getContext(), Attrs);
    589     }
    590     mFunc_llvm_uadd_with_overflow_i128->setAttributes(mFunc_llvm_uadd_with_overflow_i128_PAL);
     588        mFunc_llvm_uadd_with_overflow_PAL = AttributeSet::get(mMod->getContext(), Attrs);
     589    }
     590    mFunc_llvm_uadd_with_overflow->setAttributes(mFunc_llvm_uadd_with_overflow_PAL);
    591591#endif
    592592
     
    667667        if (mMarkerMap.find(callee) == mMarkerMap.end())
    668668        {
    669             Value* func_get_unicode_category = mMod->getOrInsertFunction(callee, m64x2Vect, mStruct_Basis_Bits_Ptr1, NULL);
     669            Value* func_get_unicode_category = mMod->getOrInsertFunction(callee, mXi64Vect, mStruct_Basis_Bits_Ptr1, NULL);
    670670            void* addr;
    671671            if (pablo_call->getCallee() == "Ps")
     
    852852    if (mMarkerMap.find(name) == mMarkerMap.end())
    853853    {
    854         Value* ptr = b.CreateAlloca(m64x2Vect);
    855         Value* void_1 = b.CreateStore(mConst_Aggregate_64x2_0, ptr);
     854        Value* ptr = b.CreateAlloca(mXi64Vect);
     855        Value* void_1 = b.CreateStore(mConst_Aggregate_Xi64_0, ptr);
    856856        mMarkerMap.insert(make_pair(name, ptr));
    857857    }
     
    963963
    964964        Value* expression_marker_value = Generate_PabloE(whl->getExpr());
    965         // Use an i128 compare for simplicity and speed.
    966         Value* cast_marker_value_1 = b_cond.CreateBitCast(expression_marker_value, IntegerType::get(mMod->getContext(), 128));
    967         Value* int_tobool1 = b_cond.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(IntegerType::get(mMod->getContext(), 128), 0));
     965        Value* int_tobool1 = genBitBlockAny(expression_marker_value);
     966
    968967        b_cond.CreateCondBr(int_tobool1, whileEndBlock, whileBodyBlock);
    969968
     
    975974        IRBuilder<> b_wb1(mBasicBlock);
    976975        //Create and initialize a new carry queue.
    977         Value* ptr_while_carry_q = b_wb1.CreateAlloca(m64x2Vect, b_wb1.getInt64(mCarryQueueSize - idx));
     976        Value* ptr_while_carry_q = b_wb1.CreateAlloca(mXi64Vect, b_wb1.getInt64(mCarryQueueSize - idx));
    978977        for (int i=0; i<(mCarryQueueSize-idx); i++)
    979978        {
    980             Value* void_1 = genCarryOutStore(mConst_Aggregate_64x2_0, ptr_while_carry_q, i);
     979            Value* void_1 = genCarryOutStore(mConst_Aggregate_Xi64_0, ptr_while_carry_q, i);
    981980        }
    982981
     
    10161015        if ((all->getNum() != 0) && (all->getNum() != 1))
    10171016            std::cout << "\nErr: 'All' can only be set to 1 or 0.\n" << std::endl;
    1018         Value* ptr_all = b.CreateAlloca(m64x2Vect);
    1019         Value* void_1 = b.CreateStore((all->getNum() == 0 ? mConst_Aggregate_64x2_0 : mConst_Aggregate_64x2_neg1), ptr_all);
     1017        Value* ptr_all = b.CreateAlloca(mXi64Vect);
     1018        Value* void_1 = b.CreateStore((all->getNum() == 0 ? mConst_Aggregate_Xi64_0 : mConst_Aggregate_Xi64_neg1), ptr_all);
    10201019        Value* all_value = b.CreateLoad(ptr_all);
    10211020
     
    10321031            Value* basis_bits_struct = b.CreateLoad(mPtr_basis_bits_addr);
    10331032            Value* unicode_category = b.CreateCall(itFuncGet->second, basis_bits_struct);
    1034             Value* ptr = b.CreateAlloca(m64x2Vect);
     1033            Value* ptr = b.CreateAlloca(mXi64Vect);
    10351034            Value* void_1 = b.CreateStore(unicode_category, ptr);
    10361035
     
    10811080        IRBuilder<> b(mBasicBlock);
    10821081
    1083         Constant* const_packed_elems [] = {b.getInt64(-1), b.getInt64(-1)};
    1084         Constant* const_packed = ConstantVector::get(const_packed_elems);
    10851082        Value* expr_value = Generate_PabloE(pablo_not->getExpr());
    1086         Value* xor_rslt = b.CreateXor(expr_value, const_packed, "xor_inst");
     1083        Value* xor_rslt = b.CreateXor(expr_value, mConst_Aggregate_Xi64_neg1, "xor_inst");
    10871084
    10881085        retVal = xor_rslt;
     
    10991096    {
    11001097        IRBuilder<> b(mBasicBlock);
    1101 
    1102         int this_carry_idx = mCarryQueueIdx;
    1103         mCarryQueueIdx++;
    1104 
    1105         Value* carryq_value = genCarryInLoad(mptr_carry_q, this_carry_idx);
    1106 
    11071098        Value* strm_value = Generate_PabloE(adv->getExpr());
    1108         Value* srli_1_value = b.CreateLShr(strm_value, 63);
    1109 
    1110         Value* packed_shuffle;
    1111         Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
    1112         Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
    1113         packed_shuffle = b.CreateShuffleVector(carryq_value, srli_1_value, const_packed_1, "packed_shuffle nw");
    1114 
    1115         Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
    1116         Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
    1117 
    1118         Value* shl_value = b.CreateShl(strm_value, const_packed_2, "shl_value");
    1119         Value* result_value = b.CreateOr(shl_value, packed_shuffle, "or.result_value");
    1120 
    1121         Value* carry_out = genShiftRight127(strm_value, "carry_out");
    1122         //CarryQ - carry out:
    1123         Value* void_1 = genCarryOutStore(carry_out, mptr_carry_q, this_carry_idx);
    1124 
    1125         retVal = result_value;
     1099        retVal = genAdvanceWithCarry(strm_value);
    11261100    }
    11271101    else if (MatchStar* mstar = dynamic_cast<MatchStar*>(expr))
     
    11561130
    11571131#ifdef USE_UADD_OVERFLOW
    1158 SumWithOverflowPack LLVM_Generator::genUaddOverflow(Value* int128_e1, Value* int128_e2) {
     1132SumWithOverflowPack LLVM_Generator::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
    11591133    std::vector<Value*> struct_res_params;
    11601134    struct_res_params.push_back(int128_e1);
    11611135    struct_res_params.push_back(int128_e2);
    1162     CallInst* struct_res = CallInst::Create(mFunc_llvm_uadd_with_overflow_i128, struct_res_params, "uadd_overflow_res", mBasicBlock);
     1136    CallInst* struct_res = CallInst::Create(mFunc_llvm_uadd_with_overflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
    11631137    struct_res->setCallingConv(CallingConv::C);
    11641138    struct_res->setTailCall(false);
     
    11901164
    11911165#ifdef USE_UADD_OVERFLOW
    1192     //use llvm.uadd.with.overflow.i128
    1193 
    1194     CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), 128), "e1_128", mBasicBlock);
    1195     CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), 128), "e2_128", mBasicBlock);
    1196     CastInst* int128_carryq_value = new BitCastInst(carryq_value, IntegerType::get(mMod->getContext(), 128), "carryq_128", mBasicBlock);
     1166    //use llvm.uadd.with.overflow.i128 or i256
     1167
     1168    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
     1169    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
     1170    CastInst* int128_carryq_value = new BitCastInst(carryq_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "carryq_128", mBasicBlock);
    11971171
    11981172    SumWithOverflowPack sumpack0, sumpack1;
    11991173
    1200     sumpack0 = genUaddOverflow(int128_e1, int128_e2);
    1201     sumpack1 = genUaddOverflow(sumpack0.sum, int128_carryq_value);
     1174    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
     1175    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
    12021176
    12031177    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
    1204     Value* ret_sum = b.CreateBitCast(sumpack1.sum, m64x2Vect, "ret_sum");
    1205 
    1206     /*obit is the i1 carryout, zero extend and insert it into a v2i64 vector.*/
    1207     ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(m64x2Vect);
     1178    Value* ret_sum = b.CreateBitCast(sumpack1.sum, mXi64Vect, "ret_sum");
     1179
     1180    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
     1181    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mXi64Vect);
    12081182    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
    12091183    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
     
    12221196
    12231197    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
    1224     Value* carry_out = genShiftRight127(b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))), "carry_out");
     1198    Value* carry_out = genShiftHighbitToLow(b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))), "carry_out");
    12251199    Value* void_1 = genCarryOutStore(carry_out, mptr_carry_q, this_carry_idx);
    12261200
     
    12451219Value* LLVM_Generator::genBitBlockAny(Value* e) {
    12461220    IRBuilder<> b(mBasicBlock);
    1247     Value* cast_marker_value_1 = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), 128));
    1248     return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(IntegerType::get(mMod->getContext(), 128), 0));
    1249 }
    1250 
    1251 Value* LLVM_Generator::genShiftRight127(Value* e, const Twine &namehint) {
     1221    Value* cast_marker_value_1 = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
     1222    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(IntegerType::get(mMod->getContext(), BLOCK_SIZE), 0));
     1223}
     1224
     1225Value* LLVM_Generator::genShiftHighbitToLow(Value* e, const Twine &namehint) {
    12521226    IRBuilder<> b(mBasicBlock);
    1253     Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), 128));
    1254     return b.CreateBitCast(b.CreateLShr(i128_val, 127, namehint), bitBlockExprType);
     1227    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
     1228    return b.CreateBitCast(b.CreateLShr(i128_val, BLOCK_SIZE - 1, namehint), bitBlockExprType);
    12551229}
    12561230
    12571231Value* LLVM_Generator::genShiftLeft64(Value* e, const Twine &namehint) {
    12581232    IRBuilder<> b(mBasicBlock);
    1259     Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), 128));
     1233    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
    12601234    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), bitBlockExprType);
    12611235}
     
    12631237Value* LLVM_Generator::genNot(Value* e, const Twine &namehint) {
    12641238    IRBuilder<> b(mBasicBlock);
    1265     return b.CreateXor(e, mConst_Aggregate_64x2_neg1, namehint);
    1266 }
    1267 
    1268 
    1269 
    1270 
     1239    return b.CreateXor(e, mConst_Aggregate_Xi64_neg1, namehint);
     1240}
     1241
     1242Value* LLVM_Generator::genAdvanceWithCarry(Value* strm_value) {
     1243    IRBuilder<> b(mBasicBlock);
     1244
     1245#if (BLOCK_SIZE == 128)
     1246    int this_carry_idx = mCarryQueueIdx;
     1247    mCarryQueueIdx++;
     1248
     1249    Value* carryq_value = genCarryInLoad(mptr_carry_q, this_carry_idx);
     1250
     1251    Value* srli_1_value = b.CreateLShr(strm_value, 63);
     1252
     1253    Value* packed_shuffle;
     1254    Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
     1255    Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
     1256    packed_shuffle = b.CreateShuffleVector(carryq_value, srli_1_value, const_packed_1, "packed_shuffle nw");
     1257
     1258    Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
     1259    Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
     1260
     1261    Value* shl_value = b.CreateShl(strm_value, const_packed_2, "shl_value");
     1262    Value* result_value = b.CreateOr(shl_value, packed_shuffle, "or.result_value");
     1263
     1264    Value* carry_out = genShiftHighbitToLow(strm_value, "carry_out");
     1265    //CarryQ - carry out:
     1266    Value* void_1 = genCarryOutStore(carry_out, mptr_carry_q, this_carry_idx);
     1267
     1268    return result_value;
     1269#endif
     1270
     1271#if (BLOCK_SIZE == 256)
     1272    return genAddWithCarry(strm_value, strm_value);
     1273#endif
     1274}
     1275
     1276
  • icGREP/icgrep-devel/icgrep/llvm_gen.h

    r4137 r4151  
    146146    Value* genCarryOutStore(Value* carryout, Value* ptr_carry_q, int carryq_idx);
    147147    Value* genAddWithCarry(Value* e1, Value* e2);
     148    Value* genAdvanceWithCarry(Value* e1);
    148149    Value* genBitBlockAny(Value* e);
    149     Value* genShiftRight127(Value* e, const Twine &namehint = "");
     150    Value* genShiftHighbitToLow(Value* e, const Twine &namehint = "");
    150151    Value* genShiftLeft64(Value* e, const Twine &namehint = "") ;
    151152    Value* genNot(Value* e, const Twine &namehint = "");
    152153
    153     SumWithOverflowPack genUaddOverflow(Value *e1, Value *e2);
     154    SumWithOverflowPack callUaddOverflow(Value *e1, Value *e2);
    154155
    155156    int         mBits;
     
    162163    ExecutionEngine* mExecutionEngine;
    163164
    164     VectorType*  m64x2Vect;
    165     PointerType* m64x2Vect_Ptr1;
     165    VectorType*  mXi64Vect;
     166    PointerType* mXi64Vect_Ptr1;
    166167
    167     VectorType* m128x1Vect;
     168    VectorType* mXi128Vect;
    168169
    169170    PointerType* mStruct_Basis_Bits_Ptr1;
     
    178179
    179180    ConstantInt*           mConst_int64_neg1;
    180     ConstantAggregateZero* mConst_Aggregate_64x2_0;
    181     Constant*              mConst_Aggregate_64x2_neg1;
     181    ConstantAggregateZero* mConst_Aggregate_Xi64_0;
     182    Constant*              mConst_Aggregate_Xi64_neg1;
    182183
    183184    FunctionType* mFuncTy_0;
    184185    Function*     mFunc_process_block;
    185     Function*     mFunc_llvm_uadd_with_overflow_i128;
     186    Function*     mFunc_llvm_uadd_with_overflow;
    186187
    187188    Constant*     mFunc_print_register;
  • icGREP/icgrep-devel/performance/viewer.py

    r4128 r4151  
    1212
    1313stat_set = set()
    14 def prepare_data(perf):
     14def prepare_data(perf1, perf2):
    1515    #key: 'cycles:u', 'instructions:u'
    1616    #value: perf data
    17     yvalues = {}
     17    y1 = {}
     18    y2 = {}
    1819
    1920    # program, file, expression, stat
    20     for p in perf:
    21         for f in perf[p]:
    22             for e in perf[p][f]:
    23                 for s in perf[p][f][e]:
    24                     if s not in yvalues:
    25                         yvalues[s] = []
    26                     stat_set.add(s)
    27                     yvalues[s].append(perf[p][f][e][s])
     21    p1 = perf1.keys()[0]
     22    p2 = perf2.keys()[0]
    2823
    29     return yvalues
     24    for f in perf1[p1]:
     25        for e in perf1[p1][f]:
     26            for s in perf1[p1][f][e]:
     27                if s not in y1:
     28                    y1[s] = []
     29                if s not in y2:
     30                    y2[s] = []
     31
     32                stat_set.add(s)
     33                y1[s].append(perf1[p1][f][e][s])
     34                y2[s].append(perf2[p2][f][e][s])
     35
     36    return y1, y2
    3037
    3138if __name__ == "__main__":
     39    #Parse args
     40    parser = argparse.ArgumentParser()
     41    parser.add_argument("-t", "--text", help="generate text-based view", action="store_true")
     42    parser.add_argument("-l", "--latex", help="generate latex table of the view", action="store_true")
     43    args = parser.parse_args()
     44
    3245    file_list = [f for f in os.listdir('.') if os.path.isfile(f) and "-perf.json" in f]
    3346    print "Found the following perf data:"
     
    4053    perf2 = json.load(fp = open(file_list[f2], 'r'))
    4154
    42     y1 = prepare_data(perf1)
    43     y2 = prepare_data(perf2)
     55    y1,y2 = prepare_data(perf1, perf2)
    4456
    4557    if len(y1) != len(y2):
     
    4860
    4961    stat_set = list(stat_set)
    50     print "Found the following stats availble:"
     62    print "Found the following stats available:"
    5163    for f in enumerate(stat_set):
    5264        print "{0} ... {1}".format(f[0], f[1])
     
    5870
    5971        stat = stat_set[s1]
    60         ind = np.arange(len(y1[stat]))
    6172
    62         width = (2 - 0.4 * 2) / 2.0
     73        if not args.text and not args.latex:
     74            ind = np.arange(len(y1[stat]))
    6375
    64         p1 = plt.bar(ind * 2, y1[stat], width=width, color='y')
    65         p2 = plt.bar(ind * 2 + width, y2[stat], width=width, color='r')
    66         plt.ylabel(stat)
    67         plt.legend((p1[0], p2[0]), (file_list[f1], file_list[f2]))
     76            width = (2 - 0.4 * 2) / 2.0
    6877
    69         plt.show()
     78            p1 = plt.bar(ind * 2, y1[stat], width=width, color='y')
     79            p2 = plt.bar(ind * 2 + width, y2[stat], width=width, color='r')
     80            plt.ylabel(stat)
     81            plt.legend((p1[0], p2[0]), (file_list[f1], file_list[f2]))
     82
     83            plt.show()
     84
     85        if args.text:
     86            row_format = "{:>20}" * 3
     87            print row_format.format("y1", "y2", "y2 : y1")
     88            print "-" * 20 * 3
     89            for i in xrange(0, len(y1[stat])):
     90                print row_format.format(y1[stat][i], y2[stat][i],
     91                                        y2[stat][i] * 1.0 / y1[stat][i])
     92            print "-" * 20 * 3
     93            print 'y1 = ' + file_list[f1]
     94            print 'y2 = ' + file_list[f2]
     95
Note: See TracChangeset for help on using the changeset viewer.