Ignore:
Timestamp:
Aug 4, 2014, 9:46:49 AM (5 years ago)
Author:
daled
Message:

icGREP now uses scanthru for multibyte unicode character classes.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re_compiler.cpp

    r3940 r3955  
    6969
    7070    //Build our list of predefined characters.
     71    std::string cc_name;
     72    std::map<std::string,std::string> name_map;
    7173    std::list<CC*> predefined_characters;
    72     CC* cc_lf = new CC('\n');
    73     std::string lf_ccname = cc_lf->getName();
    74     re_map.insert(make_pair(lf_ccname, cc_lf));
     74
     75    CC* cc_lf = new CC(0x0A);
     76    cc_name = cc_lf->getName();
     77    re_map.insert(make_pair(cc_name, cc_lf));
     78    name_map.insert(make_pair("LineFeed", cc_name));
     79
     80    CC* cc_utf8_single_byte = new CC(0x80, 0xBF);
     81    cc_name = cc_utf8_single_byte->getName();
     82    re_map.insert(make_pair(cc_name, cc_utf8_single_byte));
     83    name_map.insert(make_pair("UTF8-SingleByte", cc_name));
     84
     85    CC* cc_utf8_prefix2 = new CC(0xC2, 0xDF);
     86    cc_name = cc_utf8_prefix2->getName();
     87    re_map.insert(make_pair(cc_name, cc_utf8_prefix2));
     88    name_map.insert(make_pair("UTF8-Prefix2", cc_name));
     89
     90    CC* cc_utf8_prefix3 = new CC(0xE0, 0xEF);
     91    cc_name = cc_utf8_prefix3->getName();
     92    re_map.insert(make_pair(cc_name, cc_utf8_prefix3));
     93    name_map.insert(make_pair("UTF8-Prefix3", cc_name));
     94
     95    CC* cc_utf8_prefix4 = new CC(0xF0, 0xF4);
     96    cc_name = cc_utf8_prefix4->getName();
     97    re_map.insert(make_pair(cc_name, cc_utf8_prefix4));
     98    name_map.insert(make_pair("UTF8-Prefix4", cc_name));
    7599
    76100    CC_Compiler cc_compiler(encoding);
     
    80104    //std::cout << "\n" << "(" << StatementPrinter::Print_CC_PabloStmts(cc_stmtsl) << ")" << "\n" << std::endl;
    81105
    82     Pbix_Compiler pbix_compiler(lf_ccname);
    83     CodeGenState cg_state = pbix_compiler.compile(re_ast);
     106    Pbix_Compiler pbix_compiler(name_map);
     107    CodeGenState re_subexpression_cg_state = pbix_compiler.compile_subexpressions(re_map);
     108    CodeGenState re_cg_state = pbix_compiler.compile(re_ast);
    84109
    85110    //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
    86     //std::cout << "\n" << "(" << StatementPrinter::PrintStmts(cg_state) << ")" << "\n" << std::endl;
     111    //std::cout << "\n" << "Subexpressions: (" << StatementPrinter::PrintStmts(re_subexpression_cg_state) << ")" << std::endl;
     112    //std::cout << "\n" << "(" << StatementPrinter::PrintStmts(re_cg_state) << ")" << "\n" << std::endl;
    87113
    88114    //Print a count of the Pablo statements and expressions that are contained in the AST from the pbix compiler.
    89     //std::cout << "\nPablo Statement Count: " << Pbix_Counter::Count_PabloStatements(cg_state.stmtsl) <<  "\n" << std::endl;
     115    //std::cout << "\nPablo Statement Count: " << Pbix_Counter::Count_PabloStatements(re_cg_state.stmtsl) <<  "\n" << std::endl;
    90116
    91     LLVM_Generator irgen(basis_pattern, lf_ccname, encoding.getBits());
     117    LLVM_Generator irgen(name_map, basis_pattern, encoding.getBits());
    92118
    93119    unsigned long long cycles = 0;
     
    99125    }
    100126
    101     LLVM_Gen_RetVal retVal = irgen.Generate_LLVMIR(cg_state, cc_stmtsl);
     127    LLVM_Gen_RetVal retVal = irgen.Generate_LLVMIR(re_cg_state, re_subexpression_cg_state, cc_stmtsl);
    102128    if (show_compile_time)
    103129    {
     
    108134    }
    109135
    110     return  retVal;  //irgen.Generate_LLVMIR(cg_state, cc_stmtsl);
     136    return  retVal;  //irgen.Generate_LLVMIR(re_cg_state, cc_stmtsl);
    111137}
    112138
Note: See TracChangeset for help on using the changeset viewer.