source: icGREP/icgrep-devel/icgrep/kernels/directorysearch.cpp

Last change on this file was 6261, checked in by nmedfort, 6 months ago

Work on OptimizationBranch?; revisited pipeline termination

File size: 14.9 KB
Line 
1#include "directorysearch.h"
2
3#include <dirent.h>
4#include <stdio.h>
5#include <stdlib.h>
6#include <fcntl.h>
7#include <sys/syscall.h>
8#include <kernels/kernel_builder.h>
9#include <llvm/IR/Module.h>
10
11#define FIRST_NONSYSTEM_INODE 11
12
13using namespace llvm;
14
15namespace kernel {
16
17void DirectorySearch::linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> & b) {
18    fOpen = b->LinkFunction("open", open);
19    fSysCall = b->LinkFunction("syscall", syscall);
20}
21
22void DirectorySearch::generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & b) {
23
24    Value * const buffer = b->CreateCacheAlignedMalloc(b->getSize(PATH_MAX * 2));
25    b->setScalarField("buffer", buffer);
26    Value * const rootPath = b->getScalarField("rootPath");
27
28    std::vector<Value *> args(2);
29    args[0] = rootPath;
30    args[1] = b->getInt32(O_RDONLY | O_DIRECTORY);
31    Value * const fd = b->CreateCall(fOpen, args);
32    b->setScalarField("fileDescriptor", fd);
33    BasicBlock * const invalid = b->CreateBasicBlock("invalidDirectory");
34    BasicBlock * const exit = b->CreateBasicBlock("exit");
35
36    Value * const isInvalid = b->CreateICmpEQ(fd, ConstantInt::getAllOnesValue(fd->getType()));
37    b->CreateUnlikelyCondBr(isInvalid, invalid, exit);
38
39    b->setInsertionPoint(invalid);
40    b->setTerminationSignal();
41    b->CreateBr(exit);
42
43    b->setInsertionPoint(exit);
44    Value * const rootPathLength = b->CreateStrlenCall(rootPath);
45    b->setScalarField("currentPathLength", rootPathLength);
46    addToOutputStream(b, rootPath, rootPathLength, "directoryNames", b->getSize(0));
47}
48
49StructType * getDirEntTy(llvm::LLVMContext & c) {
50    std::vector<Type *> fields;
51    #ifdef __USE_LARGEFILE64
52    fields.push_back(TypeBuilder<ino64_t>::get(c)); /* 64-bit inode number */
53    #define d_ino 0
54    fields.push_back(TypeBuilder<off64_t>::get(c));  /* 64-bit offset to next structure */
55    #define d_off 1
56    fields.push_back(TypeBuilder<unsigned short>::get(c)); /* Size of this dirent */
57    #define d_reclen 2
58    fields.push_back(TypeBuilder<unsigned char>::get(c)); /* File type */
59    #define d_type 3
60    fields.push_back(TypeBuilder<char *>::get(c)); /* Filename (null-terminated). length is actually (d_reclen - 2 - offsetof(struct linux_dirent, d_name)) */
61    #define d_name 4
62    #else
63    fields.push_back(TypeBuilder<unsigned long>::get(c)); /* Inode number */
64    #define d_ino 0
65    fields.push_back(TypeBuilder<unsigned long>::get(c)); /* Offset to next linux_dirent */
66    #define d_off 1
67    fields.push_back(TypeBuilder<unsigned short>::get(c)); /* Length of this linux_dirent */
68    #define d_reclen 2
69    fields.push_back(TypeBuilder<char *>::get(c)); /* Filename (null-terminated) */
70    #define d_name 3
71    fields.push_back(TypeBuilder<char>::get(c)); // Zero padding byte
72    fields.push_back(TypeBuilder<char>::get(c)); // File type (only since Linux 2.6.4); offset is (d_reclen - 1)
73    #define d_type 5
74    #endif
75    return StructType::get(c, fields, true);
76}
77
78#ifdef __USE_LARGEFILE64
79#define GETDENTNO SYS_getdents64
80#define NON_NAME_PADDING_BYTES 0
81#else
82#define GETDENTNO SYS_getdents
83#define NON_NAME_PADDING_BYTES 2
84#endif
85
86void DirectorySearch::addToOutputStream(const std::unique_ptr<kernel::KernelBuilder> & b, Value * const name, Value * const nameLength, StringRef field, Value * const consumed) {
87    Value * const produced = b->getProducedItemCount(field);
88    StreamSetBuffer * buffer = getStreamSetBuffer(field);
89    Value * const writable = buffer->getLinearlyWritableItems(b, produced, consumed);
90    Value * const hasSpace = b->CreateICmpULT(nameLength, writable);
91    BasicBlock * expandBuffer = b->CreateBasicBlock(field + "Expand");
92    BasicBlock * writeName = b->CreateBasicBlock(field + "Write");
93    b->CreateLikelyCondBr(hasSpace, writeName, expandBuffer);
94    // expand the output stream buffer if we cannot fit the name into it.
95    b->setInsertPoint(expandBuffer);
96    Value * const unconsumed = b->CreateSub(produced, consumed);
97    Value * size = b->CreateZExtOrTrunc(nameLength, unconsumed->getType());
98    size = b->CreateMul(size, ConstantInt::get(size->getType(), 16));
99    size = b->CreateAdd(size, unconsumed);
100    buffer->setCapacity(b.get(), size);
101    b->CreateBr(writeName);
102    // write the name to the buffer
103    b->setInsertPoint(writeName);
104    Value * const ptr = b->getRawOutputPointer(field, produced);
105    b->CreateMemCpy(ptr, name, nameLength, 1);
106}
107
108
109void DirectorySearch::generateDoSegmentMethod(const std::unique_ptr<kernel::KernelBuilder> & b) {
110
111    BasicBlock * const readDirectory = b->CreateBasicBlock("readDirectory");
112    BasicBlock * const processDirEnts = b->CreateBasicBlock("processBuffer");
113    BasicBlock * const checkEntry = b->CreateBasicBlock("checkEntry");
114
115    BasicBlock * const addFile = b->CreateBasicBlock("addFile");
116    BasicBlock * const nonFile = b->CreateBasicBlock("addFile");
117
118    BasicBlock * const addDirectory = b->CreateBasicBlock("addDirectory");
119    BasicBlock * const nextEntry = b->CreateBasicBlock("nextEntry");
120    BasicBlock * const finishedDirectory = b->CreateBasicBlock("finishedDirectory");
121
122    BasicBlock * const filledSegment = b->CreateBasicBlock("filledSegment");
123
124
125
126    std::vector<Value *> args(4);
127    IntegerType * const sysNoTy = b->getIntNTy(sizeof(long int) * 8);
128    args[0] = ConstantInt::get(sysNoTy, GETDENTNO);
129    args[1] = b->getScalarField("fileDescriptor");
130    args[2] = b->getScalarField("buffer");
131    args[3] = b->getScalarField("size");
132
133    // check whether there is any pending data in the buffer
134    StructType * const dirEntryTy = getDirEntTy(b->getContext());
135    Type * const recordLengthTy = dirEntryTy->getStructElementType(d_reclen);
136    Value * const pendingOffset = b->getScalarField("pendingOffset");
137    Value * const pendingBytes = b->getScalarField("pendingBytes");
138
139    Value * initialDirectoryOffset = nullptr;
140    if (mRecursive) {
141        initialDirectoryOffset = b->getScalarField("currentDirectoryOffset");
142    } else {
143        initialDirectoryOffset = b->getSize(0);
144    }
145    Value * const initialDirectoryLength = b->getScalarField("currentPathLength");
146
147    BasicBlock * const entryBlock = b->getInsertBlock();
148    Value * const initiallyExhausted = b->CreateICmpEQ(pendingOffset, pendingBytes);
149    b->CreateCondBr(initiallyExhausted, readDirectory, processDirEnts);
150
151    // read the directory entries from the OS
152    b->setInsertPoint(readDirectory);
153    Value * baseDirectoryLength = initialDirectoryLength;
154    if (mRecursive) {
155        PHINode * const baseDirectoryLengthPhi = b->CreatePHI(b->getSizeTy(), 2);
156        baseDirectoryLength->addIncoming(initialDirectoryLength, entryBlock);
157        baseDirectoryLength = baseDirectoryLengthPhi;
158
159
160    }
161
162
163    Value * const bytesRead = b->CreateCall(fSysCall, args);
164    // TODO: if -1, resize?
165    Value * const hasData = b->CreateIsNotNull(bytesRead);
166    BasicBlock * const readDirectoryExit = b->getInsertBlock();
167    b->CreateLikelyCondBr(hasData, processDirEnts, finishedDirectory);
168
169    // begin processing the directory entries
170    b->setInsertPoint(processDirEnts);
171    PHINode * const offset = b->CreatePHI(recordLengthTy, 3);
172    offset->addIncoming(ConstantInt::get(recordLengthTy, 0), readDirectory);
173    offset->addIncoming(pendingOffset, entryBlock);
174
175    PHINode * const numBytes = b->CreatePHI(recordLengthTy, 3);
176    numBytes->addIncoming(bytesRead, readDirectory);
177    numBytes->addIncoming(pendingBytes, entryBlock);
178
179    PHINode * const currentDirectoryOffset = nullptr;
180    if (mRecursive) {
181        currentDirectoryOffset = b->CreatePHI(b->getSizeTy(), 3);
182        currentDirectoryOffset->addIncoming(initialDirectoryOffset, readDirectory);
183        currentDirectoryOffset->addIncoming(initialDirectoryOffset, entryBlock);
184    }
185
186    PHINode * const currentDirectoryLength = b->CreatePHI(b->getSizeTy(), 3);
187    currentDirectoryLength->addIncoming(baseDirectoryLength, readDirectory);
188    currentDirectoryLength->addIncoming(initialDirectoryLength, entryBlock);
189
190
191    // parse the entry
192    Value * const bufferOffset = b->CreateGEP(buffer, offset);
193    Value * const dirEntry = b->CreatePointerCast(bufferOffset, dirEntryTy);
194    Value * const iNodePtr = b->CreateGEP(dirEntry, {b->getInt32(0), b->getInt32(d_ino)});
195    Value * const iNode = b->CreateLoad(iNodePtr);
196    Constant * const firstNonSystemINode = ConstantInt::get(iNode->getType(), FIRST_NONSYSTEM_INODE);
197    Value * const nonSystemEntry = b->CreateICmpULT(iNode, firstNonSystemINode);
198    Value * const recLenPtr = b->CreateGEP(dirEntry, {b->getInt32(0), b->getInt32(d_reclen)});
199    Value * const recLen = b->CreateLoad(recLenPtr);
200    b->CreateCondBr(nonSystemEntry, checkEntry, nextEntry);
201
202    // not a system file; check the entry
203    b->setInsertPoint(checkEntry);
204    // does this entry refer to a file or directory?
205    Value * const typePtr = b->CreateGEP(dirEntry, {b->getInt32(0), b->getInt32(d_type)});
206    Value * const type = b->CreateLoad(typePtr);
207    Constant * const FILE_FLAG = ConstantInt::get(type->getType(), DT_REG);
208    Constant * const DIR_FLAG = ConstantInt::get(type->getType(), DT_DIR);
209    Constant * const FLAG_NOT_SET = ConstantInt::get(type->getType(), 0);
210
211    Value * const name = b->CreateGEP(dirEntry, {b->getInt32(0), b->getInt32(d_name)});
212    // unless we want to include all hidden files, check if this file is hidden
213    if (!mIncludeHidden) {
214        BasicBlock * const checkFile = b->CreateBasicBlock("notHidden");
215        Value * const notHidden = b->CreateICmpNE(b->CreateLoad(name), b->getInt8('.'));
216        b->CreateLikelyCondBr(notHidden, checkFile, nextEntry);
217
218        b->setInsertPoint(checkFile);
219    }
220
221    // compute the name length
222    DataLayout DL(b->getModule());
223    const StructLayout * const dirEntLayout = DL.getStructLayout(dirEntryTy);
224    const auto nameOffset = dirEntLayout->getElementOffset(d_name);
225    Constant * const nonNameBytes = ConstantInt::get(recLen->getType(), nameOffset + NON_NAME_PADDING_BYTES);
226    Value * const nameLength = b->CreateSub(recLen, nonNameBytes);
227
228    // is this entry a file?
229    Value * const isFile = b->CreateICmpNE(b->CreateICmpAnd(type, FILE_FLAG), FLAG_NOT_SET);
230    b->CreateLikelyCondBr(isFile, addFile, nonFile);
231
232    // add file to the file stream
233    b->setInsertPoint(addFile);
234    addToOutputStream(b, name, nameLength, "fileNames", b->getConsumedItemCount(field));
235    // TODO: add the directory index and test whether we can store any more this segment?
236    b->CreateBr(nextEntry);
237
238    // not a file; is it a directory?
239    b->setInsertPoint(nonFile);
240    Value * const isDirectory = b->CreateICmpNE(b->CreateICmpAnd(type, DIR_FLAG), FLAG_NOT_SET);
241    b->CreateLikelyCondBr(isDirectory, addDirectory, nextEntry);
242    // add directory name to the directory stream
243    b->setInsertPoint(isDirectory);
244    // need to add current path prefix
245    addToOutputStream(b, name, nameLength, "directoryNames", b->getSize(0));
246    b->CreateBr(nextEntry);
247
248    // iterate to the next file until we've exhausted the buffer
249    b->setInsertPoint(nextEntry);
250    Value * const nextOffset = b->CreateAdd(offset, recLen);
251    offset->addIncoming(nextOffset, nextEntry);
252    numBytes->addIncoming(numBytes, nextEntry);
253
254    Value * const exhaustedBuffer = b->CreateICmpULT(nextOffset, numBytes);
255    b->CreateUnlikelyCondBr(exhaustedBuffer, processDirEnts, readDirectory);
256
257    b->setInsertPoint(finishedDirectory);
258    // if this is recursive, check for a pending directory in the buffer
259    if (mRecursive) {
260        PHINode * const directoryOffset = b->CreatePHI(currentDirectoryOffset->getType(), 2);
261        directoryOffset->addIncoming(currentDirectoryOffset, readDirectoryExit);
262        PHINode * const pathLength = b->CreatePHI(currentDirectoryLength->getType(), 2);
263        pathLength->addIncoming(currentDirectoryLength, readDirectoryExit);
264        Value * const nextDirectoryOffset = b->CreateAdd(directoryOffset, pathLength);
265        Value * const directoryNameLength = b->getProducedItemCount("directoryNames");
266        Value * const hasPendingFolder = b->CreateICmpNE(nextDirectoryOffset, directoryNameLength);
267        BasicBlock * const nextDirectory = b->CreateBasicBlock("nextDirectory");
268        BasicBlock * const checkedAllDirectories = b->CreateBasicBlock("checkedAllDirectories");
269        b->CreateCondBr(hasPendingFolder, nextDirectory, checkedAllDirectories);
270
271        // open the next directory; if it's valid, resume parsing the directory entries; otherwise
272        // check the subsequent folder.
273        b->setInsertPoint(nextDirectory);
274        Value * const nextPath = b->getRawOutputPointer("directoryNames", nextDirectoryOffset);
275        Value * const nextPathLength = b->CreateStrlenCall(nextPath);
276        std::vector<Value *> args(2);
277        args[0] = nextPath;
278        args[1] = b->getInt32(O_RDONLY | O_DIRECTORY);
279        Value * const fd = b->CreateCall(fOpen, args);
280        b->setScalarField("fileDescriptor", fd);
281        Value * const valid = b->CreateICmpNE(fd, ConstantInt::getAllOnesValue(fd->getType()));
282        BasicBlock * const nextDirectoryExit = b->getInsertBlock();
283        directoryOffset->addIncoming(nextDirectoryOffset, nextDirectoryExit);
284        pathLength->addIncoming(nextPathLength, nextDirectoryExit);
285        cast<PHINode>(baseDirectoryLength)->addIncoming(nextPathLength, nextDirectoryExit);
286        b->CreateLikelyCondBr(valid, readDirectory, finishedDirectory);
287
288        b->setInsertPoint(checkedAllDirectories);
289    }
290    b->setTerminationSignal();
291    filledSegment->moveAfter(b->getInsertBlock());
292    b->CreateBr(filledSegment);
293
294    b->setInsertPoint(filledSegment);
295}
296
297void DirectorySearch::generateFinalizeMethod(const std::unique_ptr<kernel::KernelBuilder> & b) {
298
299
300}
301
302DirectorySearch::DirectorySearch(const std::unique_ptr<kernel::KernelBuilder> & b,
303                                 Scalar * const rootPath,
304                                 StreamSet * const directoryNameStream,
305                                 StreamSet * const fileDirectoryStream, // stores an offset number to the directoryNameStream
306                                 StreamSet * const fileNameStream,
307                                 const unsigned filesPerSegment, const bool recursive = true, const bool includeHidden = false)
308: SegmentOrientedKernel(b, "DirectorySearch" + (recursive ? "R" : "") + (includeHidden ? "H" :"")
309// input streams
310,{}
311// output stream
312,{Binding{"directoryNames", directoryNameStream, UnknownRate(), ManagedBuffer()}
313, Binding{"fileDirectoryOffset", fileDirectoryStream, FixedRate()}
314, Binding{"fileNames", fileNameStream, UnknownRate(), ManagedBuffer()}
315}
316// input scalar
317,{Binding{"rootPath", rootPath}}
318// output scalar
319,{}
320// internal scalars
321,{Binding{b->getInt32Ty(), "fileDescriptor"}
322, Binding{b->getIntNTy(sizeof(unsigned short) * 8), "pendingOffset"}
323, Binding{b->getIntNTy(sizeof(unsigned short) * 8), "pendingBytes"}
324, Binding{b->getInt8PtrTy(), "buffer"}
325, Binding{b->getSizeTy(), "size"}
326, Binding{b->getSizeTy(), "currentPathLength"}
327})
328, mRecursive(recursive)
329, mIncludeHidden(includeHidden) {
330    if (recursive) {
331        addInternalScalar(b->getSizeTy(), "currentDirectoryOffset");
332    }
333    setStride(filesPerSegment);
334}
335
336}
Note: See TracBrowser for help on using the repository browser.