source: icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_base_generator.h @ 6144

Last change on this file since 6144 was 6144, checked in by xwa163, 6 weeks ago

lz4_grep: Init checkin for utf8 character class multiplexing

File size: 3.0 KB
Line 
1
2#ifndef ICGREP_LZ4GREPGENERATOR_H
3#define ICGREP_LZ4GREPGENERATOR_H
4
5#include "lz4/lz4_base_generator.h"
6
7#include <grep_interface.h>
8#include <kernels/streamset.h>
9#include <cc/multiplex_CCs.h>
10#include <string>
11#include <vector>
12#include <sstream>
13#include <atomic>
14#include <grep/grep_engine.h>
15
16typedef void (*ScanMatchGrepMainFunctionType)(char * byte_data, size_t headerSize, size_t filesize, bool hasBlockChecksum, intptr_t match_accumulator);
17typedef uint64_t (*CountOnlyGrepMainFunctionType)(char * byte_data, size_t headerSize, size_t filesize, bool hasBlockChecksum);
18
19
20
21class LZ4GrepBaseGenerator : public LZ4BaseGenerator {
22public:
23
24    LZ4GrepBaseGenerator();
25
26    void generateScanMatchGrepPipeline(re::RE* regex);
27    void generateCountOnlyGrepPipeline(re::RE* regex, bool enableMultiplexing, bool utf8CC);
28
29
30    void invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum);
31
32    ScanMatchGrepMainFunctionType getScanMatchGrepMainFunction();
33    CountOnlyGrepMainFunctionType getCountOnlyGrepMainFunction();
34
35
36
37protected:
38    virtual parabix::StreamSetBuffer* generateUncompressedBitStreams() = 0;
39    virtual parabix::StreamSetBuffer* decompressBitStream(parabix::StreamSetBuffer* compressedByteStream, parabix::StreamSetBuffer* compressedBitStream) = 0;
40    virtual std::vector<parabix::StreamSetBuffer*> decompressBitStreams(parabix::StreamSetBuffer* compressedByteStream, std::vector<parabix::StreamSetBuffer*> compressedBitStreams);
41
42
43private:
44    grep::GrepRecordBreakKind mGrepRecordBreak;
45    void initREs(re::RE * REs);
46
47
48    re::CC * mBreakCC;
49    re:: RE * mRE;
50    std::set<re::Name *> mUnicodeProperties;
51    bool mMoveMatchesToEOL;
52    re::RE* u8NonFinalRe;
53
54
55    std::vector<std::ostringstream> mResultStrs;
56
57    void generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
58    void generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
59
60
61    llvm::Value * match_accumulator;
62
63
64    parabix::StreamSetBuffer * linefeedStreamFromUncompressedBits(parabix::StreamSetBuffer *uncompressedBasisBits);
65
66
67    void generateFullyDecompressionCountOnlyGrepPipeline(re::RE *regex);
68    void generateMultiplexingCountOnlyGrepPipeline(re::RE *regex, bool utf8CC);
69
70
71    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grep(re::RE *RE,
72                                                                           parabix::StreamSetBuffer *uncompressedBasisBits, bool ccMultiplexing = false);
73    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> multiplexingGrep(
74            re::RE *RE,
75            parabix::StreamSetBuffer *compressedByteStream,
76            parabix::StreamSetBuffer *compressedBitStream,
77            bool utf8CC
78    );
79    std::unique_ptr<cc::MultiplexedAlphabet> mpx;
80
81
82    std::vector<parabix::StreamSetBuffer*> generateFakeStreams(
83            const std::unique_ptr<kernel::KernelBuilder> & iBuilder,
84            parabix::StreamSetBuffer* refStream,
85            std::vector<unsigned> numOfStreams
86    );
87
88};
89
90
91#endif //ICGREP_LZ4GREPGENERATOR_H
Note: See TracBrowser for help on using the repository browser.