Ignore:
Timestamp:
Oct 9, 2017, 9:28:24 AM (19 months ago)
Author:
cameron
Message:

Refactoring progress: \N uses name property; delay resolution of recursive property expressions, property object regexp support

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5678 r5679  
    99#include <llvm/IR/Module.h>
    1010#include <boost/filesystem.hpp>
    11 #include <UCD/UnicodeNameData.h>
    1211#include <UCD/resolve_properties.h>
    1312#include <kernels/charclasses.h>
     
    523522}
    524523
    525    
    526 void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, size_t line_start, size_t line_end) {
    527     reinterpret_cast<MatchAccumulator *>(accum_addr)->accumulate_match(lineNum, line_start, line_end);
    528 }
    529 
    530    
    531 
    532 
    533 void grepBuffer(re::RE * pattern, char * UnicodeDataBuffer, size_t bufferLength, MatchAccumulator * accum) {
    534     const unsigned segmentSize = 8;
    535 
    536     ParabixDriver pxDriver("codepointEngine");
    537     auto & idb = pxDriver.getBuilder();
    538     Module * M = idb->getModule();
    539    
    540     Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getVoidTy(), idb->getInt8PtrTy(), idb->getSizeTy(), nullptr));
    541     mainFunc->setCallingConv(CallingConv::C);
    542     auto args = mainFunc->arg_begin();
    543     Value * const buffer = &*(args++);
    544     buffer->setName("buffer");
    545     Value * length = &*(args++);
    546     length->setName("length");
    547    
    548     idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
    549    
    550     StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8)));
    551     kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, idb->getInt8PtrTy(), segmentSize));
    552     sourceK->setInitialArguments({buffer, length});
    553     pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
    554    
    555     StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize));
    556    
    557     kernel::Kernel * s2pk = pxDriver.addKernelInstance(make_unique<kernel::S2PKernel>(idb));
    558     pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    559    
    560     kernel::Kernel * linebreakK = pxDriver.addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, 8));
    561     StreamSetBuffer * LineBreakStream = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
    562     pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
    563    
    564     kernel::Kernel * requiredStreamsK = pxDriver.addKernelInstance(make_unique<kernel::RequiredStreams_UTF8>(idb));
    565     StreamSetBuffer * RequiredStreams = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(4, 1), segmentSize));
    566     pxDriver.makeKernelCall(requiredStreamsK, {BasisBits}, {RequiredStreams});
    567    
    568     StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
    569     kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, pattern));
    570     pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams}, {MatchResults});
    571    
    572     StreamSetBuffer * MatchedLines = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize));
    573     kernel::Kernel * matchedLinesK = pxDriver.addKernelInstance(make_unique<kernel::MatchedLinesKernel>(idb));
    574     pxDriver.makeKernelCall(matchedLinesK, {MatchResults, LineBreakStream}, {MatchedLines});
    575    
    576     kernel::Kernel * scanMatchK = pxDriver.addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb, GrepType::CallBack, 8));
    577     scanMatchK->setInitialArguments({ConstantInt::get(idb->getIntAddrTy(), reinterpret_cast<intptr_t>(accum))});
    578     pxDriver.makeKernelCall(scanMatchK, {MatchedLines, LineBreakStream, ByteStream}, {});
    579     pxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
    580     pxDriver.generatePipelineIR();
    581     pxDriver.deallocateBuffers();
    582     idb->CreateRetVoid();
    583     pxDriver.finalizeObject();
    584    
    585     typedef void (*GrepFunctionType)(const char * buffer, const size_t length);
    586     auto f = reinterpret_cast<GrepFunctionType>(pxDriver.getMain());
    587     f(UnicodeDataBuffer, bufferLength);
    588 }
    589 
    590 class CodepointAccumulator : public MatchAccumulator {
    591 public:
    592    
    593     CodepointAccumulator(const char * searchBuffer) : mSearchBuffer(searchBuffer), mParsedCodePointSet(re::makeCC()) {}
    594    
    595     void accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) override;
    596     re::CC * getCodePoints() {return mParsedCodePointSet;}
    597 private:
    598     const char * mSearchBuffer;
    599     re::CC * mParsedCodePointSet;
    600 };
    601 
    602 void CodepointAccumulator::accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) {
    603     assert (line_start <= line_end);
    604     re::codepoint_t c = 0;
    605     size_t line_pos = line_start;
    606     while (isxdigit(mSearchBuffer[line_pos])) {
    607         assert (line_pos < line_end);
    608         if (isdigit(mSearchBuffer[line_pos])) {
    609             c = (c << 4) | (mSearchBuffer[line_pos] - '0');
    610         }
    611         else {
    612             c = (c << 4) | (tolower(mSearchBuffer[line_pos]) - 'a' + 10);
    613         }
    614         line_pos++;
    615     }
    616     assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.
    617     mParsedCodePointSet->insert(c);
    618 }
    619 re::CC * grepCodepoints(re::RE * pattern, char * UnicodeDataBuffer, size_t bufferLength) {
    620    
    621     CodepointAccumulator accum(UnicodeDataBuffer);
    622    
    623     grepBuffer(pattern, UnicodeDataBuffer, bufferLength, & accum);
    624     return accum.getCodePoints();
    625 }
    626 
    627 
    628 class PropertyValueAccumulator : public MatchAccumulator {
    629 public:
    630    
    631     PropertyValueAccumulator(const char * searchBuffer, std::vector<std::string> & accumulatedPropertyValues)
    632        : mSearchBuffer(searchBuffer), mParsedPropertyValueSet(accumulatedPropertyValues) {}
    633    
    634     void accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) override;
    635 private:
    636     const char * mSearchBuffer;
    637     std::vector<std::string> & mParsedPropertyValueSet;
    638 };
    639 void PropertyValueAccumulator::accumulate_match(const size_t lineNum, size_t line_start, size_t line_end) {
    640     assert (line_start <= line_end);
    641     mParsedPropertyValueSet.emplace_back(mSearchBuffer + line_start, mSearchBuffer + line_end);
    642 }
    643 
    644 
    645 const std::vector<std::string> grepPropertyValues(const std::string& propertyName, re::RE * propertyValuePattern) {
    646     ParabixDriver pxDriver("propertyValueEngine");
    647     AlignedAllocator<char, 32> alloc;
    648     std::vector<std::string> accumulatedValues;
    649 
    650     const std::string & str = UCD::getPropertyValueGrepString(propertyName);
    651 
    652     auto & idb = pxDriver.getBuilder();
    653 
    654     const unsigned segmentSize = 8;
    655     const auto n = str.length();
    656     const auto w = idb->getBitBlockWidth() * segmentSize;
    657     const auto m = w - (n % w);
    658 
    659     char * aligned = alloc.allocate(n + m, 0);
    660     std::memcpy(aligned, str.data(), n);
    661     std::memset(aligned + n, 0, m);
    662 
    663     PropertyValueAccumulator accum(aligned, accumulatedValues);
    664     grepBuffer(propertyValuePattern, aligned, n, & accum);
    665     alloc.deallocate(aligned, 0);
    666     return accumulatedValues;
    667 }
    668 }
     524}
Note: See TracChangeset for help on using the changeset viewer.