Changeset 5630
- Timestamp:
- Sep 7, 2017, 4:56:56 PM (18 months ago)
- Location:
- icGREP/icgrep-devel/icgrep
- Files:
-
- 3 added
- 29 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/CMakeLists.txt
r5599 r5630 87 87 add_library(PabloADT ${PABLO_SRC}) 88 88 add_library(RegExpADT re/re_re.cpp re/re_cc.cpp re/re_rep.cpp re/re_diff.cpp re/re_intersect.cpp re/printer_re.cpp) 89 add_library(RegExpCompiler re/re_parser.cpp re/re_ nullable.cpp re/re_simplifier.cpp re/re_star_normal.cpp re/re_local.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp re/re_name_gather.cpp re/re_collect_unicodesets.cpp re/re_multiplex.cpp re/re_parser_pcre.cpp re/re_parser_ere.cpp re/re_parser_bre.cpp re/re_parser_prosite.cpp re/re_utility.cpp)89 add_library(RegExpCompiler re/re_parser.cpp re/re_memoizer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_star_normal.cpp re/re_minimizer.cpp re/re_local.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp re/re_name_gather.cpp re/re_collect_unicodesets.cpp re/re_multiplex.cpp re/re_parser_pcre.cpp re/re_parser_ere.cpp re/re_parser_bre.cpp re/re_parser_prosite.cpp re/re_utility.cpp) 90 90 add_library(CCADT cc/cc_compiler.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/CaseFolding_txt.cpp cc/alphabet.cpp cc/multiplex_CCs.cpp) 91 91 add_library(UCDlib UCD/unicode_set.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp UCD/UnicodeNameData.cpp) -
icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp
r5624 r5630 201 201 if (LLVM_UNLIKELY(printRegister == nullptr)) { 202 202 FunctionType *FT = FunctionType::get(getVoidTy(), { getInt8PtrTy(), int64Ty }, false); 203 Function * function = Function::Create(FT, Function:: InternalLinkage, "PrintInt", m);203 Function * function = Function::Create(FT, Function::ExternalLinkage, "PrintInt", m); 204 204 auto arg = function->arg_begin(); 205 205 std::string out = "%-40s = %" PRIx64 "\n"; … … 561 561 562 562 PointerType * CBuilder::getVoidPtrTy() const { 563 return TypeBuilder<void *, true>::get(getContext());563 return TypeBuilder<void *, false>::get(getContext()); 564 564 } 565 565 … … 1050 1050 } 1051 1051 1052 #define CONCAT(a__, b__) a__##b__1053 #define STRINGIFY(a__) #a__1054 1055 1052 #ifdef HAS_ADDRESS_SANITIZER 1056 1053 #define CHECK_ADDRESS_SANITIZER(Ptr, Name) \ … … 1070 1067 Value * check = CreateCall(isPoisoned, { addr, size }); \ 1071 1068 check = CreateICmpEQ(check, ConstantPointerNull::get(cast<PointerType>(isPoisoned->getReturnType()))); \ 1072 CreateAssert(check, STRINGIFY(CONCAT(Name, ": invalid memory address"))); \1069 CreateAssert(check, Name ": invalid memory address"); \ 1073 1070 } 1074 1071 #else … … 1078 1075 #define CHECK_ADDRESS(Ptr, Name) \ 1079 1076 if (codegen::EnableAsserts) { \ 1080 CreateAssert(Ptr, STRINGIFY(CONCAT(Name, ": null pointer address"))); \1077 CreateAssert(Ptr, Name ": null pointer address"); \ 1081 1078 CHECK_ADDRESS_SANITIZER(Ptr, Name) \ 1082 1079 } … … 1161 1158 } 1162 1159 1163 CallInst * CBuilder::CreateMemMove(Value * Dst, Value * Src, Value *Size, unsigned Align, bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag, MDNode *NoAliasTag) { 1160 CallInst * CBuilder::CreateMemMove(Value * Dst, Value * Src, Value *Size, unsigned Align, bool isVolatile, 1161 MDNode *TBAATag, MDNode *ScopeTag, MDNode *NoAliasTag) { 1164 1162 if (codegen::EnableAsserts) { 1165 1163 DataLayout DL(getModule()); … … 1167 1165 Value * intDst = CreatePtrToInt(Dst, intPtrTy); 1168 1166 Value * intSrc = CreatePtrToInt(Src, intPtrTy); 1167 // If the call to this intrinisic has an alignment value that is not 0 or 1, then the caller 1168 // guarantees that both the source and destination pointers are aligned to that boundary. 1169 1169 if (Align > 1) { 1170 1170 ConstantInt * align = ConstantInt::get(intPtrTy, Align); … … 1174 1174 } 1175 1175 return IRBuilder<>::CreateMemMove(Dst, Src, Size, Align, isVolatile, TBAATag, ScopeTag, NoAliasTag); 1176 } 1177 1178 llvm::CallInst * CBuilder::CreateMemCpy(llvm::Value *Dst, llvm::Value *Src, llvm::Value *Size, unsigned Align, bool isVolatile, 1179 llvm::MDNode *TBAATag, llvm::MDNode *TBAAStructTag, llvm::MDNode *ScopeTag, llvm::MDNode *NoAliasTag) { 1180 if (codegen::EnableAsserts) { 1181 DataLayout DL(getModule()); 1182 IntegerType * const intPtrTy = DL.getIntPtrType(getContext()); 1183 Value * intDst = CreatePtrToInt(Dst, intPtrTy); 1184 Value * intSrc = CreatePtrToInt(Src, intPtrTy); 1185 // If the call to this intrinisic has an alignment value that is not 0 or 1, then the caller 1186 // guarantees that both the source and destination pointers are aligned to that boundary. 1187 if (Align > 1) { 1188 ConstantInt * align = ConstantInt::get(intPtrTy, Align); 1189 CreateAssertZero(CreateURem(intDst, align), "CreateMemCpy: Dst pointer is misaligned"); 1190 CreateAssertZero(CreateURem(intSrc, align), "CreateMemCpy: Src pointer is misaligned"); 1191 } 1192 Value * intSize = CreateZExtOrTrunc(Size, intSrc->getType()); 1193 Value * nonOverlapping = CreateOr(CreateICmpULT(CreateAdd(intSrc, intSize), intDst), 1194 CreateICmpULT(CreateAdd(intDst, intSize), intSrc)); 1195 CreateAssert(nonOverlapping, "CreateMemCpy: overlapping ranges is undefined"); 1196 } 1197 return IRBuilder<>::CreateMemCpy(Dst, Src, Size, Align, isVolatile, TBAATag, TBAAStructTag, ScopeTag, NoAliasTag); 1176 1198 } 1177 1199 -
icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h
r5624 r5630 258 258 llvm::StoreInst * CreateAlignedStore(llvm::Value * Val, llvm::Value * Ptr, unsigned Align, bool isVolatile = false); 259 259 260 llvm::CallInst * CreateMemMove(llvm::Value *Dst, llvm::Value *Src, uint64_t Size, unsigned Align, 261 bool isVolatile = false, llvm::MDNode *TBAATag = nullptr, 262 llvm::MDNode *ScopeTag = nullptr, 263 llvm::MDNode *NoAliasTag = nullptr) { 264 return CreateMemMove(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag, ScopeTag, NoAliasTag); 265 } 266 260 267 llvm::CallInst * CreateMemMove(llvm::Value *Dst, llvm::Value *Src, llvm::Value *Size, unsigned Align, 261 268 bool isVolatile = false, llvm::MDNode *TBAATag = nullptr, 262 269 llvm::MDNode *ScopeTag = nullptr, 263 270 llvm::MDNode *NoAliasTag = nullptr); 271 272 llvm::CallInst * CreateMemCpy(llvm::Value *Dst, llvm::Value *Src, uint64_t Size, unsigned Align, 273 bool isVolatile = false, llvm::MDNode *TBAATag = nullptr, 274 llvm::MDNode *TBAAStructTag = nullptr, 275 llvm::MDNode *ScopeTag = nullptr, 276 llvm::MDNode *NoAliasTag = nullptr) { 277 return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag, TBAAStructTag, ScopeTag, NoAliasTag); 278 } 279 280 llvm::CallInst * CreateMemCpy(llvm::Value *Dst, llvm::Value *Src, llvm::Value *Size, unsigned Align, 281 bool isVolatile = false, llvm::MDNode *TBAATag = nullptr, 282 llvm::MDNode *TBAAStructTag = nullptr, 283 llvm::MDNode *ScopeTag = nullptr, 284 llvm::MDNode *NoAliasTag = nullptr); 285 264 286 265 287 void setDriver(Driver * const driver) { -
icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.cpp
r5486 r5630 210 210 CreateCall(barrierFunc); 211 211 212 Value * carryOffsetPtr = nullptr;213 212 Value * carryVal = carryInitVal; 214 Value * bubbleOffsetPtr = nullptr;215 213 Value * bubbleVal = bubbleInitVal; 216 214 217 215 for (unsigned offset = groupThreads/2; offset>0; offset=offset>>1){ 218 carryOffsetPtr = CreateGEP(carry, {getInt32(0), CreateXor(id, getInt32(offset))});216 Value * carryOffsetPtr = CreateGEP(carry, {getInt32(0), CreateXor(id, getInt32(offset))}); 219 217 carryVal = CreateOr(carryVal, CreateLoad(carryOffsetPtr)); 220 218 CreateStore(carryVal, carryPtr); 221 bubbleOffsetPtr = CreateGEP(bubble, {getInt32(0), CreateXor(id, getInt32(offset))});219 Value * bubbleOffsetPtr = CreateGEP(bubble, {getInt32(0), CreateXor(id, getInt32(offset))}); 222 220 bubbleVal = CreateOr(bubbleVal, CreateLoad(bubbleOffsetPtr)); 223 221 CreateStore(bubbleVal, bubblePtr); -
icGREP/icgrep-devel/icgrep/cc/multiplex_CCs.cpp
r5490 r5630 44 44 45 45 46 void doMultiplexCCs( std::vector<UCD::UnicodeSet>CCs,46 void doMultiplexCCs(const std::vector<UCD::UnicodeSet> & CCs, 47 47 std::vector<std::vector<unsigned>> & exclusiveSetIDs, 48 48 std::vector<UCD::UnicodeSet> & multiplexedCCs) { -
icGREP/icgrep-devel/icgrep/cc/multiplex_CCs.h
r5369 r5630 10 10 11 11 12 void doMultiplexCCs( std::vector<UCD::UnicodeSet>CCs,12 void doMultiplexCCs(const std::vector<UCD::UnicodeSet> & CCs, 13 13 std::vector<std::vector<unsigned>> & exclusiveSetIDs, 14 14 std::vector<UCD::UnicodeSet> & multiplexedCCs); -
icGREP/icgrep-devel/icgrep/editd/editd.cpp
r5603 r5630 685 685 auto editdScanK = pxDriver.addKernelInstance(make_unique<editdScanKernel>(iBuilder, editDistance)); 686 686 pxDriver.makeKernelCall(editdScanK, {MatchResults}, {}); 687 687 pxDriver.LinkFunction(*editdScanK, "wrapped_report_pos", &wrapped_report_pos); 688 688 pxDriver.generatePipelineIR(); 689 689 pxDriver.deallocateBuffers(); 690 690 iBuilder->CreateRetVoid(); 691 691 692 pxDriver.LinkFunction(*editdScanK, "wrapped_report_pos", &wrapped_report_pos);693 692 pxDriver.finalizeObject(); 694 693 -
icGREP/icgrep-devel/icgrep/icgrep-devel.files
r5620 r5630 291 291 wc.cpp 292 292 CMakeLists.txt 293 segtok.cpp 294 re/re_minimizer.h 295 re/re_minimizer.cpp 296 re/re_memoizer.cpp -
icGREP/icgrep-devel/icgrep/icgrep-devel.includes
r5620 r5630 2 2 ../boost/include/ 3 3 ../libllvm/include/ 4 re -
icGREP/icgrep-devel/icgrep/kernels/kernel.cpp
r5623 r5630 111 111 } 112 112 113 Module * Kernel::makeModule(const std::unique_ptr<KernelBuilder> & idb) { 114 assert (mModule == nullptr); 115 std::stringstream cacheName; 113 std::string Kernel::getCacheName(const std::unique_ptr<KernelBuilder> & idb) const { 114 std::stringstream cacheName; 116 115 cacheName << getName() << '_' << idb->getBuilderUniqueName(); 117 116 for (const StreamSetBuffer * b: mStreamSetInputBuffers) { … … 121 120 cacheName << ':' << b->getUniqueID(); 122 121 } 123 mModule = new Module(cacheName.str(), idb->getContext()); 124 prepareKernel(idb); 122 return cacheName.str(); 123 } 124 125 Module * Kernel::setModule(Module * const module) { 126 assert (mModule == nullptr || mModule == module); 127 assert (module != nullptr); 128 mModule = module; 125 129 return mModule; 126 130 } 127 131 128 Module * Kernel::setModule(const std::unique_ptr<KernelBuilder> & idb, llvm::Module * const module) { 129 assert (mModule == nullptr); 130 mModule = module; 131 prepareKernel(idb); 132 return mModule; 132 Module * Kernel::makeModule(const std::unique_ptr<kernel::KernelBuilder> & idb) { 133 return setModule(new Module(getCacheName(idb), idb->getContext())); 133 134 } 134 135 … … 145 146 const auto requiredBlocks = codegen::SegmentSize + ((blockSize + mLookAheadPositions - 1) / blockSize); 146 147 148 IntegerType * const sizeTy = idb->getSizeTy(); 149 147 150 for (unsigned i = 0; i < mStreamSetInputs.size(); i++) { 148 151 if ((mStreamSetInputBuffers[i]->getBufferBlocks() != 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < requiredBlocks)) { … … 151 154 mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getStreamSetHandle()->getType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX); 152 155 if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) { 153 addScalar(idb->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX); 154 } 155 } 156 157 IntegerType * const sizeTy = idb->getSizeTy(); 156 addScalar(sizeTy, mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX); 157 } 158 } 159 158 160 for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) { 159 161 mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getStreamSetHandle()->getType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX); … … 191 193 // will be able to add instrumentation to cached modules without recompilation. 192 194 addScalar(idb->getInt64Ty(), CYCLECOUNT_SCALAR); 195 addInternalKernelProperties(idb); 193 196 // NOTE: StructType::create always creates a new type even if an identical one exists. 194 mKernelStateType = getModule()->getTypeByName(getName()); 197 if (LLVM_UNLIKELY(mModule == nullptr)) { 198 setModule(new Module(getCacheName(idb), idb->getContext())); 199 } 200 mKernelStateType = mModule->getTypeByName(getName()); 195 201 if (LLVM_LIKELY(mKernelStateType == nullptr)) { 196 202 mKernelStateType = StructType::create(idb->getContext(), mKernelFields, getName()); … … 198 204 processingRateAnalysis(); 199 205 } 200 206 207 void Kernel::prepareCachedKernel(const std::unique_ptr<KernelBuilder> & idb) { 208 209 assert ("KernelBuilder does not have a valid IDISA Builder" && idb); 210 if (LLVM_UNLIKELY(mKernelStateType != nullptr)) { 211 report_fatal_error("Cannot prepare kernel after kernel state finalized"); 212 } 213 assert (getModule()); 214 const auto blockSize = idb->getBitBlockWidth(); 215 if (mStride == 0) { 216 // Set the default kernel stride. 217 mStride = blockSize; 218 } 219 const auto requiredBlocks = codegen::SegmentSize + ((blockSize + mLookAheadPositions - 1) / blockSize); 220 221 IntegerType * const sizeTy = idb->getSizeTy(); 222 for (unsigned i = 0; i < mStreamSetInputs.size(); i++) { 223 if ((mStreamSetInputBuffers[i]->getBufferBlocks() != 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < requiredBlocks)) { 224 //report_fatal_error(getName() + ": " + mStreamSetInputs[i].name + " requires buffer size " + std::to_string(requiredBlocks)); 225 } 226 mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getStreamSetHandle()->getType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX); 227 if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) { 228 addScalar(sizeTy, mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX); 229 } 230 } 231 232 for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) { 233 mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getStreamSetHandle()->getType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX); 234 if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) { 235 addScalar(sizeTy, mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX); 236 } 237 } 238 for (const auto & binding : mScalarInputs) { 239 addScalar(binding.type, binding.name); 240 } 241 for (const auto & binding : mScalarOutputs) { 242 addScalar(binding.type, binding.name); 243 } 244 if (mStreamMap.empty()) { 245 prepareStreamSetNameMap(); 246 } 247 for (const auto & binding : mInternalScalars) { 248 addScalar(binding.type, binding.name); 249 } 250 251 Type * const consumerSetTy = StructType::get(sizeTy, sizeTy->getPointerTo()->getPointerTo(), nullptr)->getPointerTo(); 252 for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) { 253 addScalar(consumerSetTy, mStreamSetOutputs[i].name + CONSUMER_SUFFIX); 254 } 255 256 addScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR); 257 addScalar(idb->getInt1Ty(), TERMINATION_SIGNAL); 258 259 for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) { 260 addScalar(sizeTy, mStreamSetOutputs[i].name + CONSUMED_ITEM_COUNT_SUFFIX); 261 } 262 263 // We compile in a 64-bit CPU cycle counter into every kernel. It will remain unused 264 // in normal execution, but when codegen::EnableCycleCounter is specified, pipelines 265 // will be able to add instrumentation to cached modules without recompilation. 266 addScalar(idb->getInt64Ty(), CYCLECOUNT_SCALAR); 267 mKernelStateType = getModule()->getTypeByName(getName()); 268 assert (mKernelStateType); 269 processingRateAnalysis(); 270 } 201 271 202 272 void Kernel::processingRateAnalysis() { … … 290 360 const auto m = idb->getModule(); 291 361 const auto ip = idb->saveIP(); 292 const auto saveInstance = getInstance();362 // const auto saveInstance = getInstance(); 293 363 idb->setModule(mModule); 294 364 addKernelDeclarations(idb); … … 296 366 callGenerateDoSegmentMethod(idb); 297 367 callGenerateFinalizeMethod(idb); 298 setInstance(saveInstance);368 // setInstance(saveInstance); 299 369 idb->setModule(m); 300 370 idb->restoreIP(ip); … … 812 882 813 883 Value * blockBaseMask = kb->CreateNot(kb->getSize(kb->getBitBlockWidth() - 1)); 814 //815 // Define and allocate the temporary buffer area.816 //817 Type * tempBuffers[totalSetCount];818 for (unsigned i = 0; i < totalSetCount; i++) {819 unsigned blocks = maxBlocksToCopy[i];820 Type * bufType = i < inputSetCount ? mStreamSetInputBuffers[i]->getStreamSetBlockType() : mStreamSetOutputBuffers[i -inputSetCount]->getStreamSetBlockType();821 if (blocks > 1) {822 tempBuffers[i] = ArrayType::get(bufType, blocks);823 }824 else {825 tempBuffers[i] = bufType;826 }827 }828 Type * tempParameterStructType = StructType::create(kb->getContext(), ArrayRef<Type *>(tempBuffers, totalSetCount), "tempBuf");829 Value * tempParameterArea = kb->CreateCacheAlignedAlloca(tempParameterStructType);830 884 ConstantInt * blockSize = kb->getSize(kb->getBitBlockWidth()); 831 885 ConstantInt * strideSize = kb->getSize(mStride); … … 866 920 // buffer block containing the next item, and the number of linearly available items. 867 921 868 std::vector<Value *> processedItemCount;869 std::vector<Value *> inputBlockPtr;922 Value * processedItemCount[inputSetCount]; 923 Value * inputBlockPtr[inputSetCount]; 870 924 std::vector<Value *> producedItemCount; 871 925 std::vector<Value *> outputBlockPtr; … … 873 927 // Now determine the linearly available blocks, based on blocks remaining reduced 874 928 // by limitations of linearly available input buffer space. 875 876 929 Value * linearlyAvailStrides = stridesRemaining; 877 930 for (unsigned i = 0; i < inputSetCount; i++) { … … 879 932 Value * blkNo = kb->CreateUDiv(p, blockSize); 880 933 Value * b = kb->getInputStreamBlockPtr(mStreamSetInputs[i].name, kb->getInt32(0)); 881 processedItemCount.push_back(p); 882 inputBlockPtr.push_back(b); 934 // processedItemCount.push_back(p); 935 processedItemCount[i] = p; 936 // inputBlockPtr.push_back(b); 937 inputBlockPtr[i] = b; 883 938 auto & rate = mStreamSetInputs[i].rate; 884 939 if (rate.isUnknownRate()) continue; // No calculation possible for unknown rates. … … 894 949 linearlyAvailStrides = kb->CreateSelect(kb->CreateICmpULT(maxStrides, linearlyAvailStrides), maxStrides, linearlyAvailStrides); 895 950 } 951 896 952 // Now determine the linearly writeable blocks, based on available blocks reduced 897 953 // by limitations of output buffer space. … … 915 971 linearlyWritableStrides = kb->CreateSelect(kb->CreateICmpULT(maxStrides, linearlyWritableStrides), maxStrides, linearlyWritableStrides); 916 972 } 917 Value * haveStrides = kb->CreateICmpUGT(linearlyWritableStrides, kb->getSize(0));918 kb->CreateCondBr(have Strides, doMultiBlockCall, tempBlockCheck);973 Value * const haveFullStrides = kb->CreateICmpUGT(linearlyWritableStrides, kb->getSize(0)); 974 kb->CreateCondBr(haveFullStrides, doMultiBlockCall, tempBlockCheck); 919 975 920 976 // At this point we have verified the availability of one or more blocks of input data and output buffer space for all stream sets. … … 944 1000 945 1001 kb->CreateCall(multiBlockFunction, doMultiBlockArgs); 1002 946 1003 // Do copybacks if necessary. 947 1004 unsigned priorIdx = 0; 948 for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) { 949 Value * log2BlockSize = kb->getSize(std::log2(kb->getBitBlockWidth())); 1005 for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) { 950 1006 if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i])) { 1007 Value * log2BlockSize = kb->getSize(std::log2(kb->getBitBlockWidth())); 951 1008 BasicBlock * copyBack = kb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack"); 952 1009 BasicBlock * done = kb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone"); … … 986 1043 kb->setProcessedItemCount(mStreamSetInputs[0].name, nowProcessed); 987 1044 Value * reducedStridesToDo = kb->CreateSub(stridesRemaining, linearlyWritableStrides); 988 BasicBlock * multiBlockFinal = kb->GetInsertBlock(); 989 stridesRemaining->addIncoming(reducedStridesToDo, multiBlockFinal); 1045 stridesRemaining->addIncoming(reducedStridesToDo, kb->GetInsertBlock()); 990 1046 kb->CreateBr(doSegmentOuterLoop); 1047 1048 991 1049 // 992 1050 // We use temporary buffers in 3 different cases that preclude full block processing. … … 999 1057 1000 1058 kb->SetInsertPoint(tempBlockCheck); 1001 haveStrides = kb->CreateICmpUGT(stridesRemaining, kb->getSize(0));1059 Value * const haveStrides = kb->CreateICmpUGT(stridesRemaining, kb->getSize(0)); 1002 1060 kb->CreateCondBr(kb->CreateOr(mIsFinal, haveStrides), doTempBufferBlock, segmentDone); 1003 1061 … … 1017 1075 } 1018 1076 } 1077 // 1078 // Define and allocate the temporary buffer area. 1079 // 1080 Type * tempBuffers[totalSetCount]; 1081 for (unsigned i = 0; i < inputSetCount; ++i) { 1082 Type * bufType = mStreamSetInputBuffers[i]->getStreamSetBlockType(); 1083 tempBuffers[i] = ArrayType::get(bufType, maxBlocksToCopy[i]); 1084 } 1085 for (unsigned i = 0; i < outputSetCount; i++) { 1086 Type * bufType = mStreamSetOutputBuffers[i]->getStreamSetBlockType(); 1087 tempBuffers[i + inputSetCount] = ArrayType::get(bufType, maxBlocksToCopy[i + inputSetCount]); 1088 } 1089 Type * tempParameterStructType = StructType::create(kb->getContext(), ArrayRef<Type *>(tempBuffers, totalSetCount), "tempBuf"); 1019 1090 // Prepare the temporary buffer area. 1020 // 1021 // First zero it out. 1022 Constant * const tempAreaSize = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(tempParameterStructType), kb->getSizeTy(), false); 1023 kb->CreateMemZero(tempParameterArea, tempAreaSize); 1024 // For each input and output buffer, copy over necessary data starting from the last 1025 // block boundary. 1091 Value * tempParameterArea = kb->CreateCacheAlignedAlloca(tempParameterStructType); 1092 kb->CreateMemZero(tempParameterArea, ConstantExpr::getSizeOf(tempParameterStructType)); 1093 // For each input and output buffer, copy over necessary data starting from the last block boundary. 1026 1094 Value * itemCountNeeded[inputSetCount]; 1027 1095 itemCountNeeded[0] = tempBlockItems; 1028 1096 Value * finalItemCountNeeded[inputSetCount]; 1029 1097 1030 for (unsigned i = 0; i < mStreamSetInputBuffers.size(); i++) {1098 for (unsigned i = 0; i < inputSetCount; i++) { 1031 1099 Type * bufPtrType = mStreamSetInputBuffers[i]->getPointerType(); 1032 1100 if (mItemsPerStride[i] != 0) { … … 1035 1103 ConstantInt * strideItems = kb->getSize(mItemsPerStride[i]); 1036 1104 Value * strideBasePos = kb->CreateSub(processedItemCount[i], kb->CreateURem(processedItemCount[i], strideItems)); 1037 Value * blockBasePos = (mItemsPerStride[i] % bitBlockWidth == 0) ? strideBasePos : kb->CreateAnd(strideBasePos, blockBaseMask); 1105 Value * blockBasePos = strideBasePos; 1106 if (mItemsPerStride[i] & (bitBlockWidth - 1)) { 1107 blockBasePos = kb->CreateAnd(strideBasePos, blockBaseMask); 1108 } 1038 1109 1039 1110 // The number of items to copy is determined by the processing rate requirements. … … 1077 1148 } 1078 1149 tempArgs.push_back(tempBufPtr); 1079 } 1080 else { 1150 } else { 1081 1151 Value * bufPtr = kb->getInputStreamBlockPtr(mStreamSetInputs[i].name, kb->getInt32(0)); 1082 1152 bufPtr = kb->CreatePointerCast(bufPtr, mStreamSetInputBuffers[i]->getPointerType()); … … 1085 1155 } 1086 1156 Value * outputBasePos[outputSetCount]; 1087 for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); i++) {1088 Value * tempBufPtr = kb->CreateGEP(tempParameterArea, {kb->getInt32(0), kb->getInt32( mStreamSetInputs.size()+ i)});1157 for (unsigned i = 0; i < outputSetCount; i++) { 1158 Value * tempBufPtr = kb->CreateGEP(tempParameterArea, {kb->getInt32(0), kb->getInt32(inputSetCount + i)}); 1089 1159 Type * bufPtrType = mStreamSetOutputBuffers[i]->getPointerType(); 1090 1160 tempBufPtr = kb->CreatePointerCast(tempBufPtr, bufPtrType); … … 1134 1204 } 1135 1205 1136 1137 1206 // We've dealt with the partial block processing and copied information back into the 1138 1207 // actual buffers. If this isn't the final block, loop back for more multiblock processing. -
icGREP/icgrep-devel/icgrep/kernels/kernel.h
r5615 r5630 88 88 89 89 StreamPort getStreamPort(const std::string & name) const; 90 91 llvm::Module * makeModule(const std::unique_ptr<KernelBuilder> & idb);92 93 llvm::Module * setModule(const std::unique_ptr<KernelBuilder> & idb, llvm::Module * const module);90 91 llvm::Module * setModule(llvm::Module * const module); 92 93 llvm::Module * makeModule(const std::unique_ptr<kernel::KernelBuilder> & idb); 94 94 95 95 llvm::Module * getModule() const { … … 137 137 virtual ~Kernel() = 0; 138 138 139 protected: 139 void prepareKernel(const std::unique_ptr<KernelBuilder> & idb); 140 141 void prepareCachedKernel(const std::unique_ptr<KernelBuilder> & idb); 142 143 std::string getCacheName(const std::unique_ptr<KernelBuilder> & idb) const; 144 145 protected: 146 147 virtual void addInternalKernelProperties(const std::unique_ptr<KernelBuilder> & idb) { } 140 148 141 149 // Constructor … … 168 176 169 177 void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { } 170 171 virtual void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);172 178 173 179 virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { } -
icGREP/icgrep-devel/icgrep/kernels/lz4_bytestream_decoder.cpp
r5440 r5630 78 78 outputBufferBasePtr, 79 79 iBuilder->CreateGEP(inputBufferBasePtr, iBuilder->CreateAdd(literalStart, copyLength1)), 80 iBuilder->CreateSub(literalLength, copyLength1), 8); // Buffer start is aligned. 80 iBuilder->CreateSub(literalLength, copyLength1), 1); // Buffer start is aligned. 81 // NOTE: Test case reported non-8-byte alignment 81 82 outputItems = iBuilder->CreateAdd(outputItems, literalLength); 82 83 -
icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp
r5526 r5630 55 55 /* self = */ args++; 56 56 Value * itemsToDo = &*(args++); 57 Value * inputStreamAvail = &*(args++);57 /* inputStreamAvail = */ args++; 58 58 Value * match_result = &*(args++); 59 59 Value * line_break = &*(args++); 60 Value * input_stream = &*(args);60 /* input_stream = */ args++; 61 61 62 62 Value * blocksToDo = iBuilder->CreateUDiv(iBuilder->CreateAdd(itemsToDo, blockSizeLess1), blockSize); -
icGREP/icgrep-devel/icgrep/pablo/carry_data.h
r5366 r5630 11 11 12 12 class CarryData { 13 friend class CarryManager;14 13 public: 15 14 16 enum SummaryType : int { 17 NoSummary 18 , ImplicitSummary 19 , BorrowedSummary 20 , ExplicitSummary 15 enum SummaryType : unsigned { 16 NoSummary = 0 17 , ImplicitSummary = 1 18 , BorrowedSummary = 2 19 , ExplicitSummary = 3 20 , NonCarryCollapsingMode = 4 21 21 }; 22 22 23 23 CarryData() 24 : mSummaryType(NoSummary) 25 , mInNonCollapsingCarryMode(false) { 24 : mSummaryType(NoSummary) { 26 25 27 26 } 28 27 29 28 bool hasSummary() const { 30 return (mSummaryType != NoSummary);29 return (mSummaryType & (ImplicitSummary | BorrowedSummary | ExplicitSummary)) != NoSummary; 31 30 } 32 31 33 32 bool hasImplicitSummary() const { 34 return (mSummaryType == ImplicitSummary);33 return (mSummaryType & (ImplicitSummary | BorrowedSummary | ExplicitSummary)) == ImplicitSummary; 35 34 } 36 35 37 36 bool hasBorrowedSummary() const { 38 return (mSummaryType == BorrowedSummary);37 return (mSummaryType & (ImplicitSummary | BorrowedSummary | ExplicitSummary)) == BorrowedSummary; 39 38 } 40 39 41 40 bool hasExplicitSummary() const { 42 return (mSummaryType == ExplicitSummary);41 return (mSummaryType & (ImplicitSummary | BorrowedSummary | ExplicitSummary)) == ExplicitSummary; 43 42 } 44 43 45 44 bool nonCarryCollapsingMode() const { 46 return mInNonCollapsingCarryMode;45 return (mSummaryType & (NonCarryCollapsingMode)) != 0; 47 46 } 48 47 … … 52 51 53 52 void setNonCollapsingCarryMode(const bool value = true) { 54 mInNonCollapsingCarryMode = value; 53 if (value) { 54 mSummaryType = (SummaryType)(mSummaryType | NonCarryCollapsingMode); 55 } else { 56 mSummaryType = (SummaryType)(mSummaryType & ~NonCarryCollapsingMode); 57 } 55 58 } 56 59 … … 58 61 59 62 SummaryType mSummaryType; 60 bool mInNonCollapsingCarryMode;61 63 62 64 }; -
icGREP/icgrep-devel/icgrep/pablo/carrypack_manager.cpp
r5510 r5630 127 127 mCarryGroup.resize(assignDefaultCarryGroups(kernel->getEntryBlock())); 128 128 129 Type * const carryStateTy = analyse(iBuilder, kernel->getEntryBlock()); 130 131 kernel->addScalar(carryStateTy, "carries"); 132 133 // iBuilder->CallPrintInt("carry state size:", ConstantExpr::getSizeOf(carryStateTy)); 129 kernel->setCarryDataTy(analyse(iBuilder, mCurrentScope)); 130 131 kernel->addScalar(kernel->getCarryDataTy(), "carries"); 134 132 135 133 if (mHasLoop) { -
icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp
r5620 r5630 125 125 } 126 126 127 void PabloKernel:: prepareKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {127 void PabloKernel::addInternalKernelProperties(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { 128 128 mSizeTy = iBuilder->getSizeTy(); 129 129 mStreamTy = iBuilder->getStreamTy(); … … 132 132 mPabloCompiler->initializeKernelData(iBuilder); 133 133 mSizeTy = nullptr; 134 mStreamTy = nullptr; 135 BlockOrientedKernel::prepareKernel(iBuilder); 134 mStreamTy = nullptr; 136 135 } 137 136 -
icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.h
r5510 r5630 31 31 friend class CarryManager; 32 32 friend class CarryPackManager; 33 friend class ParabixObjectCache; 33 34 34 35 public: … … 124 125 Integer * getInteger(const int64_t value) const; 125 126 127 llvm::StructType * getCarryDataTy() const { 128 return mCarryDataTy; 129 } 130 126 131 protected: 127 132 … … 145 150 llvm::IntegerType * getInt1Ty() const; 146 151 147 private: 152 void setCarryDataTy(llvm::StructType * const carryDataTy) { 153 mCarryDataTy = carryDataTy; 154 } 148 155 149 156 // A custom method for preparing kernel declarations is needed, 150 157 // so that the carry data requirements may be accommodated before 151 158 // finalizing the KernelStateType. 152 void prepareKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) final; 159 void addInternalKernelProperties(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) final; 160 161 private: 153 162 154 163 void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) final; … … 169 178 llvm::IntegerType * mSizeTy; 170 179 llvm::VectorType * mStreamTy; 180 llvm::StructType * mCarryDataTy; 171 181 std::vector<Var *> mInputs; 172 182 std::vector<Var *> mOutputs; -
icGREP/icgrep-devel/icgrep/re/re_memoizer.hpp
r5267 r5630 1 #ifndef RE_ NAMEDICTIONARY_H2 #define RE_ NAMEDICTIONARY_H1 #ifndef RE_MEMOIZER_H 2 #define RE_MEMOIZER_H 3 3 4 4 #include <re/re_name.h> … … 8 8 9 9 struct MemoizerComparator { 10 inline bool operator() (const RE * lh, const RE * rh) const{ 11 if (LLVM_LIKELY(llvm::isa<Name>(lh) && llvm::isa<Name>(rh))) { 12 return *llvm::cast<Name>(lh) < *llvm::cast<Name>(rh); 13 } else if (llvm::isa<Name>(lh)) { 14 return *llvm::cast<Name>(lh) < *llvm::cast<CC>(rh); 15 } 16 return *llvm::cast<Name>(rh) > *llvm::cast<CC>(lh); 17 } 10 bool operator() (const RE * lh, const RE * rh) const; 18 11 }; 19 12 20 13 struct Memoizer : public std::set<RE *, MemoizerComparator> { 21 14 22 inline Name * memoize(CC * cc) { 15 RE * memoize(RE * const re) { 16 return *(insert(re).first); 17 } 18 19 Name * memoize(Name * const name) { 20 return llvm::cast<Name>(memoize(llvm::cast<RE>(name))); 21 } 22 23 Name * memoize(CC * const cc) { 23 24 auto f = find(cc); 24 25 if (f != end()) { 25 26 return llvm::cast<Name>(*f); 26 27 } else { 27 Name * name = makeName(cc); 28 insert(name); 29 return name; 28 return memoize(makeName(cc)); 30 29 } 31 }32 33 inline Name * memoize(Name * name) {34 return llvm::cast<Name>(*insert(name).first);35 30 } 36 31 }; … … 38 33 } 39 34 40 #endif // RE_ NAMEDICTIONARY_H35 #endif // RE_MEMOIZER_H -
icGREP/icgrep-devel/icgrep/re/re_name.h
r5558 r5630 127 127 return *llvm::cast<CC>(mDefinition) < other; 128 128 } 129 return false;129 return RE::ClassTypeId::Name < RE::ClassTypeId::CC; 130 130 } 131 131 … … 134 134 return other < *llvm::cast<CC>(mDefinition); 135 135 } 136 return true;136 return RE::ClassTypeId::CC < RE::ClassTypeId::Name; 137 137 } 138 138 -
icGREP/icgrep-devel/icgrep/re/re_parser.cpp
r5620 r5630 149 149 RE * re = nullptr; 150 150 if (fModeFlagSet & IGNORE_SPACE_MODE_FLAG) { 151 while (*mCursor == ' ') mCursor++; 151 while (mCursor.more() && *mCursor == ' ') { 152 ++mCursor; 153 } 152 154 } 153 155 if (mCursor.more()) { … … 190 192 re = makeSeq({re, makeZeroWidth("GCB")}); 191 193 } 192 return re;194 break; 193 195 case '.': // the 'any' metacharacter 194 196 mCursor++; … … 202 204 fGraphemeBoundaryPending = true; 203 205 } 204 return re; 205 } 206 } 207 return nullptr; 206 } 207 } 208 return re; 208 209 } 209 210 -
icGREP/icgrep-devel/icgrep/re/re_re.h
r5267 r5630 26 26 , Intersect 27 27 , Name 28 , Permute28 // , Permute 29 29 , Rep 30 30 , Seq 31 31 , Start 32 , SymDiff33 , Union32 // , SymDiff 33 // , Union 34 34 }; 35 35 inline ClassTypeId getClassTypeId() const { … … 52 52 class Vector : public RE, public std::vector<RE*, RE::VectorAllocator> { 53 53 public: 54 static inline bool classof(const RE * re) { 55 const auto typeId = re->getClassTypeId(); 56 return typeId == ClassTypeId::Alt || typeId == ClassTypeId::Seq; 57 } 58 static inline bool classof(const void *) { 59 return false; 60 } 54 61 virtual ~Vector() {} 55 62 protected: -
icGREP/icgrep-devel/icgrep/re/re_reverse.cpp
r5493 r5630 56 56 } else if (Name * n = dyn_cast<Name>(re)) { 57 57 switch (n->getType()) { 58 case Name::Type::Byte: return makeName(cast<CC>(n->getDefinition())); 59 case Name::Type::Unicode: return makeName(cast<CC>(n->getDefinition())); 58 case Name::Type::Byte: 59 case Name::Type::Unicode: 60 return makeName(cast<CC>(n->getDefinition())); 60 61 case Name::Type::UnicodeProperty: 61 62 return makeName(n->getNamespace(), n->getName(), Name::Type::UnicodeProperty); -
icGREP/icgrep-devel/icgrep/re/re_simplifier.cpp
r5493 r5630 1 1 #include "re_simplifier.h" 2 2 #include <re/re_name.h> 3 #include <re/re_any.h>4 #include <re/re_start.h>5 #include <re/re_end.h>6 3 #include <re/re_alt.h> 7 #include <re/re_cc.h>8 4 #include <re/re_seq.h> 9 5 #include <re/re_rep.h> … … 11 7 #include <re/re_intersect.h> 12 8 #include <re/re_assertion.h> 13 #include <re/re_analysis.h> 14 #include <algorithm> 15 #include <memory> 16 #include <queue> 9 #include <re/re_memoizer.hpp> 10 #include <boost/container/flat_set.hpp> 17 11 18 12 using namespace llvm; … … 20 14 namespace re { 21 15 16 struct PassContainer { 17 RE * simplify(RE * re) { 18 if (Alt * alt = dyn_cast<Alt>(re)) { 19 boost::container::flat_set<RE *> list; 20 list.reserve(alt->size()); 21 for (RE * item : *alt) { 22 item = simplify(item); 23 if (LLVM_UNLIKELY(isa<Vector>(item) && cast<Vector>(item)->empty())) { 24 continue; 25 } 26 list.insert(item); 27 } 28 re = makeAlt(list.begin(), list.end()); 29 } else if (Seq * seq = dyn_cast<Seq>(re)) { 30 std::vector<RE *> list; 31 list.reserve(seq->size()); 32 for (RE * item : *seq) { 33 item = simplify(item); 34 if (LLVM_UNLIKELY(isa<Vector>(item) && cast<Vector>(item)->empty())) { 35 continue; 36 } 37 list.push_back(item); 38 } 39 re = makeSeq(list.begin(), list.end()); 40 } else if (Assertion * a = dyn_cast<Assertion>(re)) { 41 re = makeAssertion(simplify(a->getAsserted()), a->getKind(), a->getSense()); 42 } else if (Rep * rep = dyn_cast<Rep>(re)) { 43 RE * expr = simplify(rep->getRE()); 44 re = makeRep(expr, rep->getLB(), rep->getUB()); 45 } else if (Diff * diff = dyn_cast<Diff>(re)) { 46 re = makeDiff(simplify(diff->getLH()), simplify(diff->getRH())); 47 } else if (Intersect * e = dyn_cast<Intersect>(re)) { 48 re = makeIntersect(simplify(e->getLH()), simplify(e->getRH())); 49 } 50 return mMemoizer.memoize(re); 51 } 52 private: 53 Memoizer mMemoizer; 54 }; 55 22 56 RE * RE_Simplifier::simplify(RE * re) { 23 if (Alt * alt = dyn_cast<Alt>(re)) { 24 std::vector<RE *> list; 25 list.reserve(alt->size()); 26 for (RE * re : *alt) { 27 list.push_back(simplify(re)); 28 } 29 re = makeAlt(list.begin(), list.end()); 30 } else if (Seq * seq = dyn_cast<Seq>(re)) { 31 std::vector<RE *> list; 32 list.reserve(seq->size()); 33 for (RE * re : *seq) { 34 list.push_back(simplify(re)); 35 } 36 re = makeSeq(list.begin(), list.end()); 37 } else if (Assertion * a = dyn_cast<Assertion>(re)) { 38 re = makeAssertion(simplify(a->getAsserted()), a->getKind(), a->getSense()); 39 } else if (Rep * rep = dyn_cast<Rep>(re)) { 40 RE * expr = simplify(rep->getRE()); 41 re = makeRep(expr, rep->getLB(), rep->getUB()); 42 } else if (Diff * diff = dyn_cast<Diff>(re)) { 43 re = makeDiff(simplify(diff->getLH()), simplify(diff->getRH())); 44 } else if (Intersect * e = dyn_cast<Intersect>(re)) { 45 re = makeIntersect(simplify(e->getLH()), simplify(e->getRH())); 46 } 47 return re; 57 PassContainer pc; 58 return pc.simplify(re); 48 59 } 49 60 -
icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp
r5620 r5630 12 12 #include <re/re_star_normal.h> // for RE_Star_Normal 13 13 #include <re/re_simplifier.h> // for RE_Simplifier 14 #include <re/re_minimizer.h> 14 15 #include <re/re_local.h> 15 16 #include <re/printer_re.h> … … 63 64 64 65 //Optimization passes to simplify the AST. 65 re_ast = re::RE_Nullable::removeNullablePrefix(re_ast);66 re_ast = RE_Nullable::removeNullablePrefix(re_ast); 66 67 if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) { 67 68 std::cerr << "RemoveNullablePrefix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl; 68 69 } 69 re_ast = re::RE_Nullable::removeNullableSuffix(re_ast);70 re_ast = RE_Nullable::removeNullableSuffix(re_ast); 70 71 if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) { 71 72 std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl; 72 73 } 73 re_ast = re::RE_Nullable::removeNullableAssertion(re_ast);74 re_ast = RE_Nullable::removeNullableAssertion(re_ast); 74 75 if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) { 75 76 std::cerr << "RemoveNullableAssertion:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl; 76 77 } 77 //re_ast = re::RE_Nullable::removeNullableAfterAssertion(re_ast);78 //re_ast = RE_Nullable::removeNullableAfterAssertion(re_ast); 78 79 //if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) { 79 80 // std::cerr << "RemoveNullableAfterAssertion" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl; 80 81 //} 81 82 82 re_ast = re::RE_Simplifier::simplify(re_ast); 83 // re_ast = RE_Minimizer::minimize(re_ast); 84 85 re_ast = RE_Simplifier::simplify(re_ast); 86 83 87 if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) { 84 88 //Print to the terminal the AST that was generated by the simplifier. … … 86 90 } 87 91 88 re_ast = re::RE_Star_Normal::star_normal(re_ast); 92 re_ast = RE_Star_Normal::star_normal(re_ast); 93 89 94 if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) { 90 95 //Print to the terminal the AST that was transformed to the star normal form. 91 96 std::cerr << "Star_Normal_Form:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl; 92 97 } 98 93 99 return re_ast; 94 100 } … … 98 104 bool local = RE_Local::isLocalLanguage(re_ast) && isTypeForLocal(re_ast); 99 105 cc::CC_Compiler cc_compiler(kernel, basis); 100 re::RE_Compiler re_compiler(kernel, cc_compiler, local);106 RE_Compiler re_compiler(kernel, cc_compiler, local); 101 107 re_compiler.compileUnicodeNames(re_ast); 102 108 re_compiler.compile(re_ast); -
icGREP/icgrep-devel/icgrep/toolchain/NVPTXDriver.cpp
r5474 r5630 54 54 mPipeline.emplace_back(kb); 55 55 kb->bindPorts(inputs, outputs); 56 kb->setModule( iBuilder,mMainModule);56 kb->setModule(mMainModule); 57 57 } 58 58 -
icGREP/icgrep-devel/icgrep/toolchain/cpudriver.cpp
r5616 r5630 70 70 throw std::runtime_error("Could not create ExecutionEngine: " + errMessage); 71 71 } 72 mTarget = builder.selectTarget(); 72 mTarget = builder.selectTarget(); 73 73 if (LLVM_LIKELY(codegen::EnableObjectCache)) { 74 74 if (codegen::ObjectCacheDir) { … … 80 80 } 81 81 mMainModule->setTargetTriple(mTarget->getTargetTriple().getTriple()); 82 83 82 iBuilder.reset(IDISA::GetIDISA_Builder(*mContext)); 84 83 iBuilder->setDriver(this); … … 86 85 } 87 86 88 void ParabixDriver::makeKernelCall(Kernel * kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) { 89 assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr)); 90 mPipeline.emplace_back(kb); 91 kb->bindPorts(inputs, outputs); 92 kb->makeModule(iBuilder); 87 void ParabixDriver::makeKernelCall(Kernel * kernel, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) { 88 assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kernel->getModule() == nullptr)); 89 mPipeline.emplace_back(kernel); 90 kernel->bindPorts(inputs, outputs); 91 if (!mCache || !mCache->loadCachedObjectFile(iBuilder, kernel)) { 92 mUncachedKernel.push_back(kernel); 93 } 94 if (kernel->getModule() == nullptr) { 95 kernel->makeModule(iBuilder); 96 } 97 assert (kernel->getModule()); 93 98 } 94 99 95 100 void ParabixDriver::generatePipelineIR() { 96 #ifndef NDEBUG 101 97 102 if (LLVM_UNLIKELY(mPipeline.empty())) { 98 103 report_fatal_error("Pipeline cannot be empty"); … … 101 106 for (auto j = i; ++j != mPipeline.end(); ) { 102 107 if (LLVM_UNLIKELY(*i == *j)) { 103 report_fatal_error("Kernel instances cannot occurtwice in the pipeline");108 report_fatal_error("Kernel " + (*i)->getName() + " occurs twice in the pipeline"); 104 109 } 105 110 } 106 111 } 107 112 } 108 #endif 113 114 for (Kernel * const kernel : mUncachedKernel) { 115 kernel->prepareKernel(iBuilder); 116 } 117 109 118 // note: instantiation of all kernels must occur prior to initialization 110 119 for (const auto & k : mPipeline) { … … 130 139 131 140 Function * ParabixDriver::addLinkFunction(Module * mod, llvm::StringRef name, FunctionType * type, void * functionPtr) const { 132 assert ("addKernelCall or makeKernelCall must be called before LinkFunction" && (mod != nullptr)); 141 if (LLVM_UNLIKELY(mod == nullptr)) { 142 report_fatal_error("addLinkFunction(" + name + ") cannot be called until after addKernelCall or makeKernelCall"); 143 } 133 144 Function * f = mod->getFunction(name); 134 145 if (LLVM_UNLIKELY(f == nullptr)) { 135 146 f = Function::Create(type, Function::ExternalLinkage, name, mod); 136 mEngine-> addGlobalMapping(f, functionPtr);147 mEngine->updateGlobalMapping(f, functionPtr); 137 148 } else if (LLVM_UNLIKELY(f->getType() != type->getPointerTo())) { 138 149 report_fatal_error("Cannot link " + name + ": a function with a different signature already exists with that name in " + mod->getName()); … … 193 204 194 205 Module * module = nullptr; 195 196 206 try { 197 207 for (Kernel * const kernel : mUncachedKernel) { 208 iBuilder->setKernel(kernel); 209 kernel->generateKernel(iBuilder); 210 module = kernel->getModule(); assert (module); 211 module->setTargetTriple(mMainModule->getTargetTriple()); 212 PM.run(*module); 213 } 214 module = mMainModule; 215 iBuilder->setKernel(nullptr); 216 PM.run(*mMainModule); 198 217 for (Kernel * const kernel : mPipeline) { 199 iBuilder->setKernel(kernel); 200 module = kernel->getModule(); 201 assert (module != mMainModule); 202 bool uncachedObject = true; 203 if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) { 204 uncachedObject = false; 205 } 206 if (uncachedObject) { 207 module->setTargetTriple(mMainModule->getTargetTriple()); 208 kernel->generateKernel(iBuilder); 209 PM.run(*module); 210 } 211 mEngine->addModule(std::unique_ptr<Module>(module)); 212 } 213 214 iBuilder->setKernel(nullptr); 215 module = mMainModule; 216 PM.run(*mMainModule); 218 if (LLVM_UNLIKELY(kernel->getModule() == nullptr)) { 219 report_fatal_error(kernel->getName() + " was neither loaded from cache nor generated prior to finalizeObject"); 220 } 221 mEngine->addModule(std::unique_ptr<Module>(kernel->getModule())); 222 } 217 223 mEngine->finalizeObject(); 218 219 224 if (mCache) mCache->cleanUpObjectCacheFiles(); 220 221 225 } catch (const std::exception & e) { 222 report_fatal_error( e.what());226 report_fatal_error(module->getName() + ": " + e.what()); 223 227 } 224 228 -
icGREP/icgrep-devel/icgrep/toolchain/cpudriver.h
r5616 r5630 34 34 llvm::ExecutionEngine * mEngine; 35 35 ParabixObjectCache * mCache; 36 std::vector<kernel::Kernel *> mUncachedKernel; 36 37 // NOTE: when printing the IR/ASM, we cannot assume they're completely finished after finalizeObject is executed. Instead we store a 37 38 // pointer and delete them once the driver (and any processing) is complete. This prevents us from reclaiming the memory early but -
icGREP/icgrep-devel/icgrep/toolchain/object_cache.cpp
r5493 r5630 13 13 #include <boost/filesystem.hpp> 14 14 #include <boost/range/iterator_range.hpp> 15 #include <boost/container/flat_set.hpp> 16 #include <llvm/Bitcode/ReaderWriter.h> 17 #include <llvm/IR/Verifier.h> 15 18 #include <ctime> 16 19 … … 59 62 const static auto SIGNATURE = "signature"; 60 63 61 const static boost::uintmax_t CACHE_SIZE_LIMIT = 5 * 1024 * 1024;64 const static boost::uintmax_t CACHE_SIZE_LIMIT = 50 * 1024 * 1024; 62 65 63 66 const MDString * getSignature(const llvm::Module * const M) { … … 73 76 bool ParabixObjectCache::loadCachedObjectFile(const std::unique_ptr<kernel::KernelBuilder> & idb, kernel::Kernel * const kernel) { 74 77 if (LLVM_LIKELY(kernel->isCachable())) { 75 Module * const module = kernel->getModule();76 assert ("kernel module cannot be null!" && module);77 const auto moduleId = module->getModuleIdentifier(); 78 assert (kernel->getModule() == nullptr); 79 const auto moduleId = kernel->getCacheName(idb); 80 78 81 // Have we already seen this module before? 79 if (LLVM_UNLIKELY(mCachedObject.count(moduleId) != 0)) { 82 const auto f = mCachedObject.find(moduleId); 83 if (LLVM_UNLIKELY(f != mCachedObject.end())) { 84 Module * const m = f->second.first; assert (m); 85 kernel->setModule(m); 80 86 return true; 81 87 } … … 94 100 if (signatureBuffer) { 95 101 const StringRef loadedSig = signatureBuffer.get()->getBuffer(); 96 if ( !loadedSig.equals(kernel->makeSignature(idb))) {97 return false;102 if (LLVM_UNLIKELY(!loadedSig.equals(kernel->makeSignature(idb)))) { 103 goto invalid; 98 104 } 99 105 } else { 100 106 report_fatal_error("signature file expected but not found: " + moduleId); 101 return false; 107 } 108 } 109 sys::path::replace_extension(objectName, ".kernel"); 110 auto kernelBuffer = MemoryBuffer::getFile(objectName.c_str(), -1, false); 111 if (*kernelBuffer) { 112 //MemoryBuffer * kb = kernelBuffer.get().release(); 113 //auto loadedFile = parseBitcodeFile(kb->getMemBufferRef(), mContext); 114 auto loadedFile = getLazyBitcodeModule(std::move(kernelBuffer.get()), idb->getContext()); 115 if (*loadedFile) { 116 Module * const m = loadedFile.get().release(); assert (m); 117 // defaults to <path>/<moduleId>.kernel 118 m->setModuleIdentifier(moduleId); 119 kernel->setModule(m); 120 kernel->prepareCachedKernel(idb); 121 mCachedObject.emplace(moduleId, std::make_pair(m, std::move(objectBuffer.get()))); 122 // update the modified time of the object file 123 sys::path::replace_extension(objectName, ".o"); 124 boost::filesystem::last_write_time(objectName.c_str(), time(0)); 125 return true; 102 126 } 103 127 } 104 // update the modified time of the file then add it to our cache 105 boost::filesystem::last_write_time(objectName.c_str(), time(0)); 106 mCachedObject.emplace(moduleId, std::move(objectBuffer.get())); 107 return true; 108 } else { 109 // mark this module as cachable 110 module->getOrInsertNamedMetadata(CACHEABLE); 111 // if this module has a signature, add it to the metadata 112 if (kernel->hasSignature()) { 113 NamedMDNode * const md = module->getOrInsertNamedMetadata(SIGNATURE); 114 assert (md->getNumOperands() == 0); 115 MDString * const sig = MDString::get(module->getContext(), kernel->makeSignature(idb)); 116 md->addOperand(MDNode::get(module->getContext(), {sig})); 117 } 128 } 129 130 invalid: 131 132 Module * const module = kernel->setModule(new Module(moduleId, idb->getContext())); 133 // mark this module as cachable 134 module->getOrInsertNamedMetadata(CACHEABLE); 135 // if this module has a signature, add it to the metadata 136 if (kernel->hasSignature()) { 137 NamedMDNode * const md = module->getOrInsertNamedMetadata(SIGNATURE); 138 assert (md->getNumOperands() == 0); 139 MDString * const sig = MDString::get(module->getContext(), kernel->makeSignature(idb)); 140 md->addOperand(MDNode::get(module->getContext(), {sig})); 118 141 } 119 142 } … … 124 147 // exists, write it out. 125 148 void ParabixObjectCache::notifyObjectCompiled(const Module * M, MemoryBufferRef Obj) { 126 if ( M->getNamedMetadata(CACHEABLE)) {149 if (LLVM_LIKELY(M->getNamedMetadata(CACHEABLE))) { 127 150 const auto moduleId = M->getModuleIdentifier(); 128 151 Path objectName(mCachePath); … … 135 158 } 136 159 160 // Write the object code 137 161 std::error_code EC; 138 raw_fd_ostream o utfile(objectName, EC, sys::fs::F_None);139 o utfile.write(Obj.getBufferStart(), Obj.getBufferSize());140 o utfile.close();141 142 // If this module has a signature, write it.162 raw_fd_ostream objFile(objectName, EC, sys::fs::F_None); 163 objFile.write(Obj.getBufferStart(), Obj.getBufferSize()); 164 objFile.close(); 165 166 // then the signature (if one exists) 143 167 const MDString * const sig = getSignature(M); 144 168 if (sig) { … … 148 172 sigfile.close(); 149 173 } 174 175 // and finally kernel prototype header. 176 std::unique_ptr<Module> header(new Module(M->getModuleIdentifier(), M->getContext())); 177 for (const Function & f : M->getFunctionList()) { 178 if (f.hasExternalLinkage() && !f.empty()) { 179 Function::Create(f.getFunctionType(), Function::ExternalLinkage, f.getName(), header.get()); 180 } 181 } 182 183 sys::path::replace_extension(objectName, ".kernel"); 184 raw_fd_ostream kernelFile(objectName.str(), EC, sys::fs::F_None); 185 WriteBitcodeToFile(header.get(), kernelFile, false, false); 186 kernelFile.close(); 150 187 } 151 188 } … … 178 215 objectPath.replace_extension("sig"); 179 216 remove(objectPath); 217 objectPath.replace_extension("kernel"); 218 remove(objectPath); 180 219 } 181 220 } … … 185 224 186 225 std::unique_ptr<MemoryBuffer> ParabixObjectCache::getObject(const Module * module) { 187 const auto moduleId = module->getModuleIdentifier(); 188 const auto f = mCachedObject.find(moduleId); 226 const auto f = mCachedObject.find(module->getModuleIdentifier()); 189 227 if (f == mCachedObject.end()) { 190 228 return nullptr; 191 229 } 192 230 // Return a copy of the buffer, for MCJIT to modify, if necessary. 193 return MemoryBuffer::getMemBufferCopy(f->second. get()->getBuffer());231 return MemoryBuffer::getMemBufferCopy(f->second.second.get()->getBuffer()); 194 232 } 195 233 … … 211 249 } 212 250 213 ParabixObjectCache::ParabixObjectCache(const std::string &dir)251 ParabixObjectCache::ParabixObjectCache(const std::string dir) 214 252 : mCachePath(dir) { 215 253 -
icGREP/icgrep-devel/icgrep/toolchain/object_cache.h
r5464 r5630 18 18 namespace llvm { class MemoryBuffer; } 19 19 namespace llvm { class MemoryBufferRef; } 20 namespace llvm { class LLVMContext; } 20 21 namespace kernel { class Kernel; } 21 22 namespace kernel { class KernelBuilder; } … … 37 38 template <typename K, typename V> 38 39 using Map = boost::container::flat_map<K, V>; 39 using ModuleCache = Map<std::string, std:: unique_ptr<llvm::MemoryBuffer>>;40 using ModuleCache = Map<std::string, std::pair<llvm::Module *, std::unique_ptr<llvm::MemoryBuffer>>>; 40 41 public: 41 42 ParabixObjectCache(); 42 ParabixObjectCache(const std::string &dir);43 ParabixObjectCache(const std::string dir); 43 44 bool loadCachedObjectFile(const std::unique_ptr<kernel::KernelBuilder> & idb, kernel::Kernel * const kernel); 44 void notifyObjectCompiled(const llvm::Module * M, llvm::MemoryBufferRef Obj) override;45 void notifyObjectCompiled(const llvm::Module * M, llvm::MemoryBufferRef Obj) override; 45 46 void cleanUpObjectCacheFiles(); 46 47 std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module * M) override; … … 48 49 static Path getDefaultPath(); 49 50 private: 50 ModuleCache mCachedObject;51 const Path mCachePath;51 ModuleCache mCachedObject; 52 const Path mCachePath; 52 53 }; 53 54
Note: See TracChangeset
for help on using the changeset viewer.