source: icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp @ 5206

Last change on this file since 5206 was 5206, checked in by xwa163, 3 years ago
  1. Extend Regex Syntax, include: (a) RL2.6 of UTS#18, support regex in property value. e.g. \p{script=/.*hir.*/} (b) Support syntax of property expression when parsing boundary. e.g. \b{greek} (c) Extend property expression in non capture group. e.g. (?\p{upper}:\p{greek}\p{script=hira})
  2. Add related test cases
File size: 13.7 KB
RevLine 
[4907]1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
[4939]6#include "kernel.h"
[4907]7#include "scanmatchgen.h"
8#include <llvm/IR/Intrinsics.h>
[4959]9#include <IDISA/idisa_builder.h>
10#include <llvm/Support/raw_os_ostream.h>
[4907]11
[4959]12using namespace llvm;
13
[4974]14namespace kernel {
15
[4907]16Value * generateForwardZeroesMask(IDISA::IDISA_Builder * iBuilder, Value * bits) {
17    Value * bits_minus1 = iBuilder->CreateSub(bits, ConstantInt::get(bits->getType(), 1));
18    return iBuilder->CreateAnd(bits_minus1, iBuilder->CreateNot(bits));
19}
20
21Value * generatePopcount(IDISA::IDISA_Builder * iBuilder, Value * bits) {
22    Value * ctpopFunc = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::ctpop, bits->getType());
23    return iBuilder->CreateCall(ctpopFunc, std::vector<Value *>({bits}));
24}
25
26Value * generateCountForwardZeroes(IDISA::IDISA_Builder * iBuilder, Value * bits) {
27    Value * cttzFunc = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::cttz, bits->getType());
28    return iBuilder->CreateCall(cttzFunc, std::vector<Value *>({bits, ConstantInt::get(iBuilder->getInt1Ty(), 0)}));
29}
30
31Value * generateCountReverseZeroes(IDISA::IDISA_Builder * iBuilder, Value * bits) {
32    Value * ctlzFunc = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::ctlz, bits->getType());
33    return iBuilder->CreateCall(ctlzFunc, std::vector<Value *>({bits, ConstantInt::get(iBuilder->getInt1Ty(), 0)}));
34}
35
36Value * generateResetLowestBit(IDISA::IDISA_Builder * iBuilder, Value * bits) {
37    Value * bits_minus1 = iBuilder->CreateSub(bits, ConstantInt::get(bits->getType(), 1));
38    return iBuilder->CreateAnd(bits_minus1, bits);
39}
[4959]40
[5055]41       
[5204]42void ScanMatchKernel::generateDoBlockMethod() {
[5202]43    auto savePoint = iBuilder->saveIP();
[5063]44    Module * m = iBuilder->getModule();
45    Function * scanWordFunction = generateScanWordRoutine(m);
[5204]46    IntegerType * T = iBuilder->getSizeTy();
47    const unsigned fieldCount = iBuilder->getBitBlockWidth() / T->getBitWidth();
48
[5055]49    Type * scanwordVectorType =  VectorType::get(T, fieldCount);
[4974]50
[5096]51    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
[5055]52
53    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
54    Value * kernelStuctParam = getParameter(doBlockFunction, "self");
[5096]55    Value * blockNo = getScalarField(kernelStuctParam, blockNoScalar);
56    Value * scanwordPos = iBuilder->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), iBuilder->getBitBlockWidth()));
[5055]57   
58    Value * recordStart = getScalarField(kernelStuctParam, "LineStart");
59    Value * recordNum = getScalarField(kernelStuctParam, "LineNum");
[5104]60    Value * matchResultsPtr = getStreamSetBlockPtr(kernelStuctParam, "matchResults", blockNo);   
[5055]61    Value * matches = iBuilder->CreateBlockAlignedLoad(matchResultsPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
62    Value * linebreaks = iBuilder->CreateBlockAlignedLoad(matchResultsPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
63    Value * matchWordVector = iBuilder->CreateBitCast(matches, scanwordVectorType);
64    Value * breakWordVector = iBuilder->CreateBitCast(linebreaks, scanwordVectorType);
65    for(unsigned i = 0; i < fieldCount; ++i){
66        Value * matchWord = iBuilder->CreateExtractElement(matchWordVector, ConstantInt::get(T, i));
67        Value * recordBreaksWord = iBuilder->CreateExtractElement(breakWordVector, ConstantInt::get(T, i));
68        Value * wordResult = iBuilder->CreateCall(scanWordFunction, {kernelStuctParam, matchWord, recordBreaksWord, scanwordPos, recordStart, recordNum});
[5204]69        scanwordPos = iBuilder->CreateAdd(scanwordPos, ConstantInt::get(T, T->getBitWidth()));
[5055]70        recordStart = iBuilder->CreateExtractValue(wordResult, std::vector<unsigned>({0}));
71        recordNum = iBuilder->CreateExtractValue(wordResult, std::vector<unsigned>({1}));
72    }
73    setScalarField(kernelStuctParam, "LineStart", recordStart);
74    setScalarField(kernelStuctParam, "LineNum", recordNum);
75    iBuilder -> CreateRetVoid();
[5063]76    iBuilder->restoreIP(savePoint);
[4974]77}
[5055]78
79   
[5204]80Function * ScanMatchKernel::generateScanWordRoutine(Module * m) {
[5055]81    Function * function = m->getFunction("scan_matches_in_scanword");
82    if (LLVM_UNLIKELY(function != nullptr)) {
83        return function;
84    }
85   
86    LLVMContext & ctxt = m->getContext();
[5204]87
88    IntegerType * T = iBuilder->getSizeTy();
[5055]89    Type * S = PointerType::get(iBuilder->getIntNTy(8), 0);
90    Type * returnType = StructType::get(ctxt, std::vector<Type *>({T, T}));
91    FunctionType * functionType = FunctionType::get(returnType, std::vector<Type *>({PointerType::get(mKernelStateType, 0), T, T, T, T, T}), false);
92   
93    SmallVector<AttributeSet, 6> Attrs;
94    Attrs.push_back(AttributeSet::get(ctxt, ~0U, std::vector<Attribute::AttrKind>({ Attribute::NoUnwind, Attribute::UWTable })));
95    Attrs.push_back(AttributeSet::get(ctxt, 1, std::vector<Attribute::AttrKind>({})));
96    Attrs.push_back(AttributeSet::get(ctxt, 2, std::vector<Attribute::AttrKind>({})));
97    Attrs.push_back(AttributeSet::get(ctxt, 3, std::vector<Attribute::AttrKind>({})));
98    Attrs.push_back(AttributeSet::get(ctxt, 4, std::vector<Attribute::AttrKind>({})));
99    Attrs.push_back(AttributeSet::get(ctxt, 5, std::vector<Attribute::AttrKind>({})));
100    AttributeSet AttrSet = AttributeSet::get(ctxt, Attrs);
101   
102    function = Function::Create(functionType, GlobalValue::ExternalLinkage, "scan_matches_in_scanword", m);
103    function->setCallingConv(CallingConv::C);
104    function->setAttributes(AttrSet);
105    function->addFnAttr(llvm::Attribute::AlwaysInline);
106   
107    Function::arg_iterator args = function->arg_begin();
108    Value * instance = &*(args++);
109    instance->setName("this");
110    Value * matches_input_parm = &*(args++);
111    matches_input_parm->setName("matches");
112    Value * record_breaks_input_parm = &*(args++);
113    record_breaks_input_parm->setName("breaks");
114    Value * scanwordPos = &*(args++);
115    scanwordPos->setName("scanwordPos");
116    Value * recordStart_input_parm = &*(args++);
117    recordStart_input_parm->setName("pendingLineStart");
118    Value * recordNum_input_parm = &*(args++);
119    recordNum_input_parm->setName("lineNum");
120   
121    Constant * matchProcessor;
[5206]122    switch (mGrepType) {
123        case GrepType::Normal:
124            matchProcessor = m->getOrInsertFunction("wrapped_report_match", Type::getVoidTy(ctxt), T, T, T, S, T, T, nullptr);
125            break;
126        case GrepType::NameExpression:
127            matchProcessor = m->getOrInsertFunction("insert_codepoints", Type::getVoidTy(ctxt), T, T, T, S, nullptr);
128            break;
129        case GrepType::PropertyValue:
130            matchProcessor = m->getOrInsertFunction("insert_property_values", Type::getVoidTy(ctxt), T, T, T, S, nullptr);
131            break;
132
[5055]133    }
134    iBuilder->SetInsertPoint(BasicBlock::Create(ctxt, "entry", function,0));
135   
136    BasicBlock * entry_block = iBuilder->GetInsertBlock();
137    BasicBlock * matches_test_block = BasicBlock::Create(ctxt, "matches_test_block", function, 0);
138    BasicBlock * process_matches_loop_entry = BasicBlock::Create(ctxt, "process_matches_loop", function, 0);
139    BasicBlock * prior_breaks_block = BasicBlock::Create(ctxt, "prior_breaks_block", function, 0);
140    BasicBlock * loop_final_block = BasicBlock::Create(ctxt, "loop_final_block", function, 0);
141    BasicBlock * matches_done_block = BasicBlock::Create(ctxt, "matches_done_block", function, 0);
142    BasicBlock * remaining_breaks_block = BasicBlock::Create(ctxt, "remaining_breaks_block", function, 0);
143    BasicBlock * return_block = BasicBlock::Create(ctxt, "return_block", function, 0);
144   
145   
146    // The match scanner works with a loop involving four variables:
147    // (a) the bit stream scanword of matches marking the ends of selected records,
148    // (b) the bit stream scanword of record_breaks marking the ends of all records,
149    // (c) the integer lastRecordNum indicating the number of records processed so far,
150    // (d) the index lastRecordStart indicating the file position of the last record.
151    // We set up a loop structure, in which a set of 4 phi nodes initialize these
152    // variables from either the input to the scanner or the computed values within
153    // the loop body.
154   
155   
156    iBuilder->CreateBr(matches_test_block);
157   
158    // LOOP Test Block
159    iBuilder->SetInsertPoint(matches_test_block);
160    PHINode * matches_phi = iBuilder->CreatePHI(T, 2, "matches");
161    PHINode * record_breaks_phi = iBuilder->CreatePHI(T, 2, "record_breaks");
162    PHINode * recordNum_phi = iBuilder->CreatePHI(T, 2, "recordNum");
163    PHINode * recordStart_phi = iBuilder->CreatePHI(T, 2, "recordStart");
164    matches_phi->addIncoming(matches_input_parm, entry_block);
165    record_breaks_phi->addIncoming(record_breaks_input_parm, entry_block);
166    recordNum_phi->addIncoming(recordNum_input_parm, entry_block);
167    recordStart_phi->addIncoming(recordStart_input_parm, entry_block);
168    Value * have_matches_cond = iBuilder->CreateICmpNE(matches_phi, ConstantInt::get(T, 0));
169    iBuilder->CreateCondBr(have_matches_cond, process_matches_loop_entry, matches_done_block);
170   
171    // LOOP BODY
172    // The loop body is entered if we have more matches to process.
173    iBuilder->SetInsertPoint(process_matches_loop_entry);
174    Value * prior_breaks = iBuilder->CreateAnd(generateForwardZeroesMask(iBuilder, matches_phi), record_breaks_phi);
175    // Within the loop we have a conditional block that is executed if there are any prior
176    // record breaks.
177    Value * prior_breaks_cond = iBuilder->CreateICmpNE(prior_breaks, ConstantInt::get(T, 0));
178    iBuilder->CreateCondBr(prior_breaks_cond, prior_breaks_block, loop_final_block);
179   
180    // PRIOR_BREAKS_BLOCK
181    // If there are prior breaks, we count them and compute the record start position.
182    iBuilder->SetInsertPoint(prior_breaks_block);
183    Value * matchRecordNum = iBuilder->CreateAdd(generatePopcount(iBuilder, prior_breaks), recordNum_phi);
184    Value * reverseDistance = generateCountReverseZeroes(iBuilder, prior_breaks);
[5204]185    Value * width = ConstantInt::get(T, T->getBitWidth());
[5055]186    Value * matchRecordStart = iBuilder->CreateAdd(scanwordPos, iBuilder->CreateSub(width, reverseDistance));
187    iBuilder->CreateBr(loop_final_block);
188   
189    // LOOP FINAL BLOCK
190    // The prior breaks, if any have been counted.  Set up phi nodes for the recordNum
191    // and recortStart depending on whether the conditional execution of prior_breaks_block.
192    iBuilder->SetInsertPoint(loop_final_block);
193    PHINode * matchRecordNum_phi = iBuilder->CreatePHI(T, 2, "matchRecordNum");
194    PHINode * matchRecordStart_phi = iBuilder->CreatePHI(T, 2, "matchRecordStart");
195    matchRecordNum_phi->addIncoming(recordNum_phi, process_matches_loop_entry);
196    matchRecordNum_phi->addIncoming(matchRecordNum, prior_breaks_block);
197    matchRecordStart_phi->addIncoming(recordStart_phi, process_matches_loop_entry);
198    matchRecordStart_phi->addIncoming(matchRecordStart, prior_breaks_block);   
199    Value * matchRecordEnd = iBuilder->CreateAdd(scanwordPos, generateCountForwardZeroes(iBuilder, matches_phi));
200   
201
202    Value * fileBuf = getScalarField(instance, "FileBuf");
[5206]203    switch (mGrepType) {
204        case GrepType::Normal:
205        {
206            Value * fileSize = getScalarField(instance, "FileSize");
207            Value * fileIdx = getScalarField(instance, "FileIdx");
208            iBuilder->CreateCall(matchProcessor, std::vector<Value *>({matchRecordNum_phi, matchRecordStart_phi, matchRecordEnd, fileBuf, fileSize, fileIdx}));
209            break;
210        }
211        case GrepType::NameExpression:
212        case GrepType::PropertyValue:
213            iBuilder->CreateCall(matchProcessor, std::vector<Value *>({matchRecordNum_phi, matchRecordStart_phi, matchRecordEnd, fileBuf}));
214            break;
[5055]215    }
216   
217    Value * remaining_matches = generateResetLowestBit(iBuilder, matches_phi);
218    Value * remaining_breaks = iBuilder->CreateXor(record_breaks_phi, prior_breaks);
219    matches_phi->addIncoming(remaining_matches, loop_final_block);
220    record_breaks_phi->addIncoming(remaining_breaks, loop_final_block);
221    recordNum_phi->addIncoming(matchRecordNum_phi, loop_final_block);
222    recordStart_phi->addIncoming(matchRecordStart_phi, loop_final_block);
223    iBuilder->CreateBr(matches_test_block);
224   
225   
226    // LOOP EXIT/MATCHES_DONE
227    iBuilder->SetInsertPoint(matches_done_block);
228    // When the matches are done, there may be additional record breaks remaining
229    Value * more_breaks_cond = iBuilder->CreateICmpNE(record_breaks_phi, ConstantInt::get(T, 0));
230    iBuilder->CreateCondBr(more_breaks_cond, remaining_breaks_block, return_block);
231   
232    // REMAINING_BREAKS_BLOCK: process remaining record breaks after all matches are processed
233    iBuilder->SetInsertPoint(remaining_breaks_block);
234    Value * break_count = generatePopcount(iBuilder, record_breaks_phi);
235    Value * final_record_num = iBuilder->CreateAdd(recordNum_phi, break_count);
236    Value * reverseZeroes = generateCountReverseZeroes(iBuilder, record_breaks_phi);
237    Value * pendingLineStart = iBuilder->CreateAdd(scanwordPos, iBuilder->CreateSub(width, reverseZeroes));
238    iBuilder->CreateBr(return_block);
239   
240    // RETURN block
241    iBuilder->SetInsertPoint(return_block);
242    PHINode * finalRecordCount_phi = iBuilder->CreatePHI(T, 2, "finalRecordCount");
243    PHINode * finalRecordStart_phi = iBuilder->CreatePHI(T, 2, "finalRecordStart");
244    finalRecordCount_phi->addIncoming(recordNum_phi, matches_done_block);
245    finalRecordCount_phi->addIncoming(final_record_num, remaining_breaks_block);
246    finalRecordStart_phi->addIncoming(recordStart_phi, matches_done_block);
247    finalRecordStart_phi->addIncoming(pendingLineStart, remaining_breaks_block);
248    Value * retVal = UndefValue::get(returnType);
249    retVal = iBuilder->CreateInsertValue(retVal, finalRecordStart_phi, 0);
250    retVal = iBuilder->CreateInsertValue(retVal, finalRecordCount_phi, 1);
251    iBuilder->CreateRet(retVal);
252   
253    return function;
254}
255
256}
Note: See TracBrowser for help on using the repository browser.