Ignore:
Timestamp:
Aug 28, 2017, 4:00:17 PM (22 months ago)
Author:
nmedfort
Message:

Bug fixes for multigrep mode. Optional PabloKernel? branch hit counter added. Minor optimizations.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/charclasses.cpp

    r5564 r5620  
    1717#include <re/re_name.h>
    1818#include <llvm/Support/raw_ostream.h>
     19#include <boost/uuid/sha1.hpp>
    1920
    2021using NameMap = UCD::UCDCompiler::NameMap;
     
    2728using namespace UCD;
    2829
    29 CharClassesKernel::CharClassesKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, std::vector<UCD::UnicodeSet> multiplexedCCs)
    30 : PabloKernel(iBuilder,
    31               "cc",
     30inline static std::string sha1sum(const std::string & str) {
     31    char buffer[41];    // 40 hex-digits and the terminating null
     32    uint32_t digest[5]; // 160 bits in total
     33    boost::uuids::detail::sha1 sha1;
     34    sha1.process_bytes(str.c_str(), str.size());
     35    sha1.get_digest(digest);
     36    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
     37             digest[0], digest[1], digest[2], digest[3], digest[4]);
     38    return std::string(buffer);
     39}
     40
     41inline std::string signature(const std::vector<UCD::UnicodeSet> & ccs) {
     42    if (LLVM_UNLIKELY(ccs.empty())) {
     43        return "[]";
     44    } else {
     45        std::string tmp;
     46        raw_string_ostream out(tmp);
     47        char joiner = '[';
     48        for (const auto & set : ccs) {
     49            out << joiner;
     50            set.print(out);
     51            joiner = ',';
     52        }
     53        out << ']';
     54        return out.str();
     55    }
     56}
     57
     58CharClassesSignature::CharClassesSignature(const std::vector<UCD::UnicodeSet> & ccs)
     59: mSignature(signature(ccs)) {
     60
     61}
     62
     63
     64CharClassesKernel::CharClassesKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, std::vector<UnicodeSet> && ccs)
     65: CharClassesSignature(ccs)
     66, PabloKernel(iBuilder,
     67              "cc" + sha1sum(mSignature),
    3268              {Binding{iBuilder->getStreamSetTy(8), "basis"}},
    33               {Binding{iBuilder->getStreamSetTy(multiplexedCCs.size(), 1), "charclasses"}})
    34 , mMultiplexedCCs(multiplexedCCs) {
     69              {Binding{iBuilder->getStreamSetTy(ccs.size(), 1), "charclasses"}})
     70, mCCs(std::move(ccs)) {
    3571
     72}
     73
     74std::string CharClassesKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
     75    return mSignature;
    3676}
    3777
     
    3979    CC_Compiler ccc(this, getInput(0));
    4080    auto & pb = ccc.getBuilder();
    41     unsigned n = mMultiplexedCCs.size();
     81    unsigned n = mCCs.size();
    4282
    4383    NameMap nameMap;
    4484    std::vector<Name *> names;
    4585    for (unsigned i = 0; i < n; i++) {
    46         Name * name = re::makeName("cc" + std::to_string(i), makeCC(std::move(mMultiplexedCCs[i])));
     86        Name * name = re::makeName("cc" + std::to_string(i), makeCC(std::move(mCCs[i])));
    4787        nameMap.emplace(name, nullptr);
    4888        names.push_back(name);
     
    5696    }
    5797
    58     // The first UnicodeSet in the vector multiplexedCCs represents the last bit of the character class basis bit streams.
     98    // The first UnicodeSet in the vector ccs represents the last bit of the character class basis bit streams.
    5999    std::reverse(names.begin(), names.end());
    60100    for (unsigned i = 0; i < names.size(); i++) {
     
    68108            }
    69109        } else {
    70           throw std::runtime_error("Can't compile character classes.");
     110            throw std::runtime_error("Can't compile character classes.");
    71111        }
    72112    }
Note: See TracChangeset for help on using the changeset viewer.