Changeset 5173


Ignore:
Timestamp:
Sep 30, 2016, 3:13:03 PM (3 years ago)
Author:
lindanl
Message:

Edit Distance: second level filter.

Location:
icGREP/icgrep-devel/icgrep/editd
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5172 r5173  
    5858using namespace pablo;
    5959
     60struct matchPosition
     61{
     62    size_t pos;
     63    size_t dist;
     64};
     65
     66std::vector<struct matchPosition> matchList;
     67
     68void sort_match_list(){
     69   
     70}
     71
     72void run_second_filter(int total_len, int pattern_segs, float errRate){
     73   
     74    if(matchList.size() == 0) return;
     75
     76    //Sort match position
     77    bool exchanged = true;
     78    while(exchanged){
     79        exchanged = false;
     80        for (int i=0; i<matchList.size()-1; i++){
     81            if(matchList[i].pos > matchList[i+1].pos){
     82                size_t tmp_pos = matchList[i].pos;
     83                size_t tmp_dist = matchList[i].dist;
     84                matchList[i].pos = matchList[i+1].pos;
     85                matchList[i].dist = matchList[i+1].dist;
     86                matchList[i+1].pos = tmp_pos;
     87                matchList[i+1].dist = tmp_dist;
     88                exchanged = true;
     89            }
     90        }
     91    }
     92
     93    std::cerr << "pattern_segs = " << pattern_segs << ", total_len = " << total_len << std::endl;
     94
     95    int v = pattern_segs * (editDistance+1) - total_len * errRate;
     96
     97    int startPos = matchList[0].pos;
     98    int sum = matchList[0].dist;
     99    int curIdx = 0;
     100    int i = 0;
     101    int count = 0;
     102    while (i < matchList.size()){
     103        if(matchList[i].pos - startPos < total_len * (errRate+1)){
     104            sum += matchList[i].dist;
     105            i++;
     106        }
     107        else{
     108            if(sum > v) count++;
     109            sum -= matchList[curIdx].dist;
     110            curIdx++;
     111            startPos = matchList[curIdx].pos;
     112        }
     113    }
     114    std::cout << "matching value is " << v << std::endl;
     115    std::cout << "total candidate from the first filter is " << matchList.size() << std::endl;
     116    std::cout << "total candidate from the second filter is " << count << std::endl;
     117}
     118
    60119extern "C" {
    61120void wrapped_report_pos(size_t match_pos, int dist) {
     121        struct matchPosition curMatch;
     122        curMatch.pos = match_pos;
     123        curMatch.dist = dist;
     124        matchList.push_back(curMatch);
    62125        std::cout << "pos: " << match_pos << ", dist:" << dist << "\n";
    63126    }
     
    75138}
    76139
    77 void get_editd_pattern() {
     140void get_editd_pattern(int & pattern_segs, int & total_len) {
    78141 
    79142    if (PatternFilename != "") {
     
    83146            while (std::getline(pattFile, r)) {
    84147                pattVector.push_back(r);
     148                pattern_segs ++;
     149                total_len += r.size();
    85150            }
    86151            pattFile.close();
     
    105170    pablo_function_passes(function);
    106171    pablo::PabloKernel  editdk(iBuilder, "editd", function, {});
    107     kernel::editdScanKernel editdScanK(iBuilder);
     172    kernel::editdScanKernel editdScanK(iBuilder, editDistance);
    108173   
    109174    std::unique_ptr<Module> editdM = editdk.createKernelModule({&ChStream}, {&MatchResults});
     
    317382    cl::ParseCommandLineOptions(argc, argv);
    318383
    319     get_editd_pattern();
    320 
     384    int pattern_segs = 0;
     385    int total_len = 0;
     386
     387    get_editd_pattern(pattern_segs, total_len);
     388 
    321389    preprocessFunctionType preprocess_ptr = preprocessCodeGen();
    322390    int size = 0;
    323391    char * chStream = preprocess(preprocess_ptr, size);
    324        
     392   
    325393    editdFunctionType editd_ptr = editdCodeGen();
    326394    editd(editd_ptr, chStream, size);
    327395
     396    if(pattVector.size()>1)
     397        run_second_filter(pattern_segs, total_len, 0.15);
     398
    328399    delete editdEngine;
    329400    delete preprocessEngine;
  • icGREP/icgrep-devel/icgrep/editd/editdscan_kernel.cpp

    r5172 r5173  
    3838
    3939    std::vector<Value * > matchWordVectors;
    40     for(int d = 0; d <= 2; d++){
     40    for(int d = 0; d <= mEditDistance; d++){
    4141        Value * matches = iBuilder->CreateBlockAlignedLoad(matchResultsPtr, {iBuilder->getInt32(0), iBuilder->getInt32(d)});
    4242        matchWordVectors.push_back(iBuilder->CreateBitCast(matches, scanwordVectorType));
     
    4444   
    4545    for(unsigned i = 0; i < fieldCount; ++i){       
    46         for(int d = 0; d <= 2; d++){
     46        for(int d = 0; d <= mEditDistance; d++){
    4747            Value * matchWord = iBuilder->CreateExtractElement(matchWordVectors[d], ConstantInt::get(T, i));
    4848            iBuilder->CreateCall(scanWordFunction, {matchWord, iBuilder->getInt32(d), scanwordPos});
  • icGREP/icgrep-devel/icgrep/editd/editdscan_kernel.h

    r5172 r5173  
    1919class editdScanKernel : public KernelBuilder {
    2020public:
    21     editdScanKernel(IDISA::IDISA_Builder * iBuilder) :
     21    editdScanKernel(IDISA::IDISA_Builder * iBuilder, unsigned dist) :
    2222    KernelBuilder(iBuilder, "scanMatch",
    23                   {StreamSetBinding{parabix::StreamSetType(3, parabix::i1), "matchResults"}},
     23                  {StreamSetBinding{parabix::StreamSetType(dist+1, parabix::i1), "matchResults"}},
    2424                  {}, {}, {}, {}),
     25    mEditDistance(dist),
    2526    mScanwordBitWidth(Triple(llvm::sys::getProcessTriple()).isArch32Bit() ? 32 : 64) {}
    2627       
     
    3031       
    3132    unsigned mScanwordBitWidth;
     33    unsigned mEditDistance;
    3234};
    3335
Note: See TracChangeset for help on using the changeset viewer.