- Timestamp:
- Dec 6, 2018, 7:26:06 AM (2 months ago)
- Location:
- icGREP/icgrep-devel/icgrep
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp
r6217 r6218 81 81 namespace grep { 82 82 83 using Alphabets = ICGrepKernel::Alphabets;84 85 83 void GrepCallBackObject::handle_signal(unsigned s) { 86 84 if (static_cast<GrepSignal>(s) == GrepSignal::BinaryFile) { … … 288 286 Component internalComponents = Component::NoComponents; 289 287 288 290 289 291 290 if (isSimple) { … … 295 294 setComponent(internalComponents, Component::MoveMatchesToEOL); 296 295 } 296 std::unique_ptr<GrepKernelOptions> options = make_unique<GrepKernelOptions>(); 297 297 const auto isWithinByteTestLimit = byteTestsWithinLimit(mREs[0], ByteCClimit); 298 298 const auto hasTriCC = hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE); 299 ICGrepKernel::Externals externals;300 299 if (isWithinByteTestLimit || hasTriCC) { 301 300 if (MultithreadedSimpleRE && hasTriCC) { … … 307 306 StreamSet * const ccStream = P->CreateStreamSet(1, 1); 308 307 P->CreateKernelCall<DirectCharacterClassKernelBuilder>(ccNameStr, std::vector<re::CC *>{cc}, ByteStream, ccStream); 309 externals.emplace_back(ccNameStr, ccStream);308 options->addExternal(ccNameStr, ccStream); 310 309 } 311 310 } … … 313 312 MatchResultsBufs[0] = MatchResults; 314 313 if (isWithinByteTestLimit) { 315 P->CreateKernelCall<ICGrepKernel>(mREs[0], ByteStream, MatchResults, externals); 314 options->setRE(mREs[0]); 315 options->setSource(ByteStream); 316 options->setResults(MatchResults); 317 P->CreateKernelCall<ICGrepKernel>(std::move(options)); 316 318 } else { 317 P->CreateKernelCall<ByteBitGrepKernel>(prefixRE, suffixRE, ByteStream, MatchResults, externals); 319 //P->CreateKernelCall<ByteBitGrepKernel>(prefixRE, suffixRE, ByteStream, MatchResults, externals); 320 options->setPrefixRE(prefixRE); 321 options->setRE(suffixRE); 322 options->setSource(ByteStream); 323 options->setResults(MatchResults); 324 P->CreateKernelCall<ICGrepKernel>(std::move(options)); 318 325 } 319 326 Kernel * LB_nullK = P->CreateKernelCall<DirectCharacterClassKernelBuilder>( "breakCC", std::vector<re::CC *>{mBreakCC}, ByteStream, LineBreakStream, callbackObject); … … 329 336 P->CreateKernelCall<S2P_PabloKernel>(ByteStream, BasisBits); 330 337 } else { 338 //P->CreateKernelCall<S2PKernel>(ByteStream, BasisBits); 331 339 Kernel * s2pK = P->CreateKernelCall<S2PKernel>(ByteStream, BasisBits, cc::BitNumbering::LittleEndian, callbackObject); 332 340 mGrepDriver.LinkFunction(s2pK, "signal_dispatcher", kernel::signal_dispatcher); … … 366 374 367 375 for(unsigned i = 0; i < numOfREs; ++i) { 368 ICGrepKernel::Externals externals;376 std::unique_ptr<GrepKernelOptions> options = make_unique<GrepKernelOptions>(); 369 377 if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) { 370 externals.emplace_back("UTF8_LB", LineBreakStream);371 externals.emplace_back("UTF8_nonfinal", RequiredStreams);378 options->addExternal("UTF8_LB", LineBreakStream); 379 options->addExternal("UTF8_nonfinal", RequiredStreams); 372 380 } 373 381 std::set<re::Name *> UnicodeProperties; … … 380 388 report_fatal_error(name + " not found"); 381 389 } 382 externals.emplace_back(name, f->second);390 options->addExternal(name, f->second); 383 391 } 384 392 } 385 393 if (hasGCB[i]) { assert (GCB_stream); 386 externals.emplace_back("\\b{g}", GCB_stream);394 options->addExternal("\\b{g}", GCB_stream); 387 395 } 388 396 … … 393 401 const auto UnicodeSets = re::collectCCs(mREs[i], cc::Unicode, std::set<re::Name *>{re::makeZeroWidth("\\b{g}")}); 394 402 if (UnicodeSets.size() <= 1) { 395 P->CreateKernelCall<ICGrepKernel>(mREs[i], BasisBits, MatchResults, externals); 403 options->setRE(mREs[i]); 404 options->setSource(BasisBits); 405 options->setResults(MatchResults); 396 406 } else { 397 407 auto mpx = std::make_shared<MultiplexedAlphabet>("mpx", UnicodeSets); … … 404 414 // Multiplexing Grep Kernel is not Cachable, since for now it use string representation of RE AST as cache key, 405 415 // whileit is possible that two multiplexed REs with the same name "mpx_1" have different alphabets 406 407 Alphabets alphabets; 408 alphabets.emplace_back(mpx, CharClasses); 409 P->CreateKernelCall<ICGrepKernel>(mREs[i], BasisBits, MatchResults, externals, alphabets, cc::BitNumbering::LittleEndian, false); 416 options->setRE(mREs[i]); 417 options->setSource(BasisBits); 418 options->setResults(MatchResults); 419 options->addAlphabet(mpx, CharClasses); 420 P->CreateKernelCall<ICGrepKernel>(std::move(options)); 410 421 } 411 422 } else { 412 P->CreateKernelCall<ICGrepKernel>(mREs[i], BasisBits, MatchResults, externals); 423 options->setRE(mREs[i]); 424 options->setSource(BasisBits); 425 options->setResults(MatchResults); 426 P->CreateKernelCall<ICGrepKernel>(std::move(options)); 413 427 } 414 428 } … … 568 582 close(fileDescriptor); 569 583 if (handler.binaryFileSignalled()) { 584 llvm::errs() << "Binary file " << fileName << "\n"; 570 585 return 0; 571 586 } … … 612 627 if (accum.binaryFileSignalled()) { 613 628 accum.mResultStr.clear(); 614 accum.mResultStr << "Binary file " << fileName << " skipped.\n"; 629 if (!mSuppressFileMessages) { 630 accum.mResultStr << "Binary file " << fileName << " skipped.\n"; 631 } 615 632 } 616 633 if (accum.mLineCount > 0) grepMatchFound = true; … … 793 810 } else { 794 811 StreamSet * MatchResults = E->CreateStreamSet(); 795 E->CreateKernelCall<ICGrepKernel>(matchingRE, BasisBits, MatchResults); 812 std::unique_ptr<GrepKernelOptions> options = make_unique<GrepKernelOptions>(); 813 options->setRE(matchingRE); 814 options->setSource(BasisBits); 815 options->setResults(MatchResults); 816 E->CreateKernelCall<ICGrepKernel>(std::move(options)); 796 817 MatchingRecords = E->CreateStreamSet(); 797 818 E->CreateKernelCall<MatchedLinesKernel>(MatchResults, RecordBreakStream, MatchingRecords); … … 799 820 if (!excludeNothing) { 800 821 StreamSet * ExcludedResults = E->CreateStreamSet(); 801 E->CreateKernelCall<ICGrepKernel>(excludedRE, BasisBits, ExcludedResults); 822 std::unique_ptr<GrepKernelOptions> options = make_unique<GrepKernelOptions>(); 823 options->setRE(excludedRE); 824 options->setSource(BasisBits); 825 options->setResults(ExcludedResults); 826 E->CreateKernelCall<ICGrepKernel>(std::move(options)); 802 827 StreamSet * ExcludedRecords = E->CreateStreamSet(); 803 828 E->CreateKernelCall<MatchedLinesKernel>(ExcludedResults, RecordBreakStream, ExcludedRecords); -
icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp
r6205 r6218 5 5 6 6 #include "grep_kernel.h" 7 #include <cc/alphabet.h> 7 8 #include <re/printer_re.h> 8 9 #include <re/re_cc.h> … … 214 215 } 215 216 216 ICGrepSignature::ICGrepSignature(re::RE * const re_ast) 217 : mRE(re_ast) 218 , mSignature(Printer_RE::PrintRE(mRE)) { 219 220 } 221 // Helper to compute stream set inputs to pass into PabloKernel constructor. 222 Bindings ICGrepKernel::makeInputBindings(StreamSet * const basis, const Externals & externals, const Alphabets & alphabets) { 217 void GrepKernelOptions::setNumbering(cc::BitNumbering numbering) {mBasisSetNumbering = numbering;} 218 void GrepKernelOptions::setIndexingAlphabet(cc::Alphabet * a) {mIndexingAlphabet = a;} 219 void GrepKernelOptions::setRE(RE * e) {mRE = e;} 220 void GrepKernelOptions::setPrefixRE(RE * e) {mPrefixRE = e;} 221 void GrepKernelOptions::setSource(StreamSet * s) {mSource = s;} 222 void GrepKernelOptions::setResults(StreamSet * r) {mResults = r;} 223 void GrepKernelOptions::addExternal(std::string name, StreamSet * strm) { 224 mExternals.emplace_back(name, strm); 225 } 226 void GrepKernelOptions::addAlphabet(std::shared_ptr<cc::Alphabet> a, StreamSet * basis) { 227 mAlphabets.emplace_back(a, basis); 228 } 229 230 Bindings GrepKernelOptions::streamSetInputBindings() { 223 231 Bindings inputs; 224 inputs.emplace_back("basis", basis);225 for (const auto & e : externals) {232 inputs.emplace_back("basis", mSource); 233 for (const auto & e : mExternals) { 226 234 inputs.emplace_back(e.first, e.second); 227 235 } 228 for (const auto & a : alphabets) {236 for (const auto & a : mAlphabets) { 229 237 inputs.emplace_back(a.first->getName() + "_basis", a.second); 230 238 } … … 232 240 } 233 241 234 ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, 235 RE * const re, 236 StreamSet * const BasisBits, 237 StreamSet * const matches, 238 const Externals externals, 239 const Alphabets alphabets, 240 const cc::BitNumbering basisSetNumbering, 241 const bool cachable) 242 : ICGrepSignature(re) 243 , PabloKernel(b, "ic" + getStringHash(mSignature), 244 // inputs 245 makeInputBindings(BasisBits, externals, alphabets), 246 // output 247 {Binding{"matches", matches, FixedRate(), Add1()}}) 248 , mExternals(std::move(externals)) 249 , mAlphabets(std::move(alphabets)) 250 , mBasisSetNumbering(basisSetNumbering) 251 , mIsCachable(cachable) { 242 Bindings GrepKernelOptions::streamSetOutputBindings() { 243 return {Binding{"matches", mResults, FixedRate(), Add1()}}; 244 } 245 246 Bindings GrepKernelOptions::scalarInputBindings() { 247 return {}; 248 } 249 250 Bindings GrepKernelOptions::scalarOutputBindings() { 251 return {}; 252 } 253 254 std::string GrepKernelOptions::getSignature() { 255 if (mSignature == "") { 256 mSignature = std::to_string(mSource->getNumElements()) + "x" + std::to_string(mSource->getFieldWidth()); 257 mSignature += "/" + mIndexingAlphabet->getName(); 258 for (auto e: mExternals) { 259 mSignature += "_" + e.first; 260 } 261 for (auto a: mAlphabets) { 262 mSignature += "_" + a.first->getName(); 263 } 264 if (mPrefixRE) { 265 mSignature += ":" + Printer_RE::PrintRE(mPrefixRE); 266 } 267 mSignature += ":" + Printer_RE::PrintRE(mRE); 268 } 269 return mSignature; 270 } 271 272 ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, std::unique_ptr<GrepKernelOptions> options) 273 : PabloKernel(b, "ic" + getStringHash(options->getSignature()), 274 options->streamSetInputBindings(), 275 options->streamSetOutputBindings(), 276 options->scalarInputBindings(), 277 options->scalarOutputBindings()), mOptions(std::move(options)) { 252 278 } 253 279 254 280 std::string ICGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) { 255 return m Signature;281 return mOptions->getSignature(); 256 282 } 257 283 … … 263 289 ccc = make_unique<cc::Direct_CC_Compiler>(getEntryScope(), pb.createExtract(getInput(0), pb.getInteger(0))); 264 290 } else { 265 ccc = make_unique<cc::Parabix_CC_Compiler>(getEntryScope(), getInputStreamSet("basis"), m BasisSetNumbering);266 } 267 //cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"), m BasisSetNumbering);268 RE_Compiler re_compiler(getEntryScope(), *ccc.get(), m BasisSetNumbering);269 for (const auto & e : m Externals) {291 ccc = make_unique<cc::Parabix_CC_Compiler>(getEntryScope(), getInputStreamSet("basis"), mOptions->mBasisSetNumbering); 292 } 293 //cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"), mOptions->mBasisSetNumbering); 294 RE_Compiler re_compiler(getEntryScope(), *ccc.get(), mOptions->mBasisSetNumbering); 295 for (const auto & e : mOptions->mExternals) { 270 296 re_compiler.addPrecompiled(e.first, pb.createExtract(getInputStreamVar(e.first), pb.getInteger(0))); 271 297 } 272 for (const auto & a : m Alphabets) {298 for (const auto & a : mOptions->mAlphabets) { 273 299 auto & alpha = a.first; 274 300 auto mpx_basis = getInputStreamSet(alpha->getName() + "_basis"); 275 301 re_compiler.addAlphabet(alpha, mpx_basis); 276 302 } 277 PabloAST * const matches = re_compiler.compile(mRE); 278 Var * const output = getOutputStreamVar("matches"); 279 pb.createAssign(pb.createExtract(output, pb.getInteger(0)), matches); 303 if (mOptions->mPrefixRE) { 304 PabloAST * const prefixMatches = re_compiler.compile(mOptions->mPrefixRE); 305 Var * const final_matches = pb.createVar("final_matches", pb.createZeroes()); 306 PabloBlock * scope1 = getEntryScope()->createScope(); 307 pb.createIf(prefixMatches, scope1); 308 309 PabloAST * u8bytes = pb.createExtract(getInput(0), pb.getInteger(0)); 310 PabloAST * nybbles[2]; 311 nybbles[0] = scope1->createPackL(scope1->getInteger(8), u8bytes); 312 nybbles[1] = scope1->createPackH(scope1->getInteger(8), u8bytes); 313 314 PabloAST * bitpairs[4]; 315 for (unsigned i = 0; i < 2; i++) { 316 bitpairs[2*i] = scope1->createPackL(scope1->getInteger(4), nybbles[i]); 317 bitpairs[2*i + 1] = scope1->createPackH(scope1->getInteger(4), nybbles[i]); 318 } 319 320 std::vector<PabloAST *> basis(8); 321 for (unsigned i = 0; i < 4; i++) { 322 basis[2*i] = scope1->createPackL(scope1->getInteger(2), bitpairs[i]); 323 basis[2*i + 1] = scope1->createPackH(scope1->getInteger(2), bitpairs[i]); 324 } 325 326 cc::Parabix_CC_Compiler ccc(scope1, basis); 327 RE_Compiler re_compiler(scope1, ccc); 328 scope1->createAssign(final_matches, re_compiler.compile(mOptions->mRE, prefixMatches)); 329 Var * const output = getOutputStreamVar("matches"); 330 pb.createAssign(pb.createExtract(output, pb.getInteger(0)), final_matches); 331 } else { 332 PabloAST * const matches = re_compiler.compile(mOptions->mRE); 333 Var * const output = getOutputStreamVar("matches"); 334 pb.createAssign(pb.createExtract(output, pb.getInteger(0)), matches); 335 } 280 336 } 281 337 -
icGREP/icgrep-devel/icgrep/kernels/grep_kernel.h
r6203 r6218 52 52 53 53 54 struct ICGrepSignature { 55 ICGrepSignature(re::RE * re_ast); 54 class GrepKernelOptions { 55 friend class ICGrepKernel; 56 public: 57 using Externals = std::vector<std::pair<std::string, StreamSet *>>; 58 using Alphabets = std::vector<std::pair<std::shared_ptr<cc::Alphabet>, StreamSet *>>; 59 GrepKernelOptions() : 60 mBasisSetNumbering(cc::BitNumbering::LittleEndian), 61 mIndexingAlphabet(&cc::Byte), 62 mPrefixRE(nullptr) {} 63 void setNumbering(cc::BitNumbering numbering); 64 void setIndexingAlphabet(cc::Alphabet * a); 65 void setSource(StreamSet * s); 66 void setResults(StreamSet * r); 67 void addExternal(std::string name, StreamSet * strm); 68 void addAlphabet(std::shared_ptr<cc::Alphabet> a, StreamSet * basis); 69 void setRE(re::RE * re); 70 void setPrefixRE(re::RE * re); 71 56 72 protected: 57 re::RE * const mRE; 73 Bindings streamSetInputBindings(); 74 Bindings streamSetOutputBindings(); 75 Bindings scalarInputBindings(); 76 Bindings scalarOutputBindings(); 77 std::string getSignature(); 78 79 private: 80 cc::BitNumbering mBasisSetNumbering; 81 const cc::Alphabet * mIndexingAlphabet; 82 StreamSet * mSource; 83 StreamSet * mResults; 84 Externals mExternals; 85 Alphabets mAlphabets; 86 re::RE * mRE; 87 re::RE * mPrefixRE; 58 88 std::string mSignature; 59 89 }; 60 90 61 62 class ICGrepKernel : public ICGrepSignature, publicpablo::PabloKernel {91 92 class ICGrepKernel : public pablo::PabloKernel { 63 93 public: 64 65 using Externals = std::vector<std::pair<std::string, StreamSet *>>;66 using Alphabets = std::vector<std::pair<std::shared_ptr<cc::Alphabet>, StreamSet *>>;67 68 94 ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, 69 re::RE * const re_ast, StreamSet * const BasisBits, StreamSet * const MatchResults, 70 const Externals externals = {}, const Alphabets alphabets = {}, 71 const cc::BitNumbering basisSetNumbering = cc::BitNumbering::LittleEndian, 72 const bool cachable = true); 95 std::unique_ptr<GrepKernelOptions> options); 73 96 std::string makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) override; 74 bool isCachable() const override { return mIsCachable; }97 bool isCachable() const override { return true; } 75 98 bool hasFamilyName() const override { return true; } 76 99 protected: 77 100 void generatePabloMethod() override; 78 private: 79 static Bindings makeInputBindings(StreamSet * const basis, const Externals & externals, const Alphabets & alphabets); 80 protected: 81 const Externals mExternals; 82 const Alphabets mAlphabets; 83 const cc::BitNumbering mBasisSetNumbering; 84 const bool mIsCachable; 101 std::unique_ptr<GrepKernelOptions> mOptions; 85 102 }; 86 103
Note: See TracChangeset
for help on using the changeset viewer.