source: icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_base_generator.h @ 6145

Last change on this file since 6145 was 6145, checked in by xwa163, 3 months ago
  1. LZ4 Grep: complete utf8 character classes for multiplexing pipeline
  2. Implement multiple streams version of S2P and P2S
File size: 3.0 KB
Line 
1
2#ifndef ICGREP_LZ4GREPGENERATOR_H
3#define ICGREP_LZ4GREPGENERATOR_H
4
5#include "lz4/lz4_base_generator.h"
6
7#include <grep_interface.h>
8#include <kernels/streamset.h>
9#include <cc/multiplex_CCs.h>
10#include <string>
11#include <vector>
12#include <sstream>
13#include <atomic>
14#include <grep/grep_engine.h>
15
16typedef void (*ScanMatchGrepMainFunctionType)(char * byte_data, size_t headerSize, size_t filesize, bool hasBlockChecksum, intptr_t match_accumulator);
17typedef uint64_t (*CountOnlyGrepMainFunctionType)(char * byte_data, size_t headerSize, size_t filesize, bool hasBlockChecksum);
18
19
20
21class LZ4GrepBaseGenerator : public LZ4BaseGenerator {
22public:
23
24    LZ4GrepBaseGenerator();
25
26    void generateScanMatchGrepPipeline(re::RE* regex);
27    void generateCountOnlyGrepPipeline(re::RE* regex, bool enableMultiplexing, bool utf8CC);
28
29
30    void invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum);
31
32    ScanMatchGrepMainFunctionType getScanMatchGrepMainFunction();
33    CountOnlyGrepMainFunctionType getCountOnlyGrepMainFunction();
34
35
36
37protected:
38    virtual parabix::StreamSetBuffer* generateUncompressedBitStreams() = 0;
39    virtual parabix::StreamSetBuffer* decompressBitStream(parabix::StreamSetBuffer* compressedByteStream, parabix::StreamSetBuffer* compressedBitStream) = 0;
40    virtual std::vector<parabix::StreamSetBuffer*> decompressBitStreams(parabix::StreamSetBuffer* compressedByteStream, std::vector<parabix::StreamSetBuffer*> compressedBitStreams);
41
42    std::vector<parabix::StreamSetBuffer*> generateFakeStreams(
43            const std::unique_ptr<kernel::KernelBuilder> & iBuilder,
44            parabix::StreamSetBuffer* refStream,
45            std::vector<unsigned> numOfStreams
46    );
47
48private:
49    grep::GrepRecordBreakKind mGrepRecordBreak;
50    void initREs(re::RE * REs);
51
52
53    re::CC * mBreakCC;
54    re:: RE * mRE;
55    std::set<re::Name *> mUnicodeProperties;
56    bool mMoveMatchesToEOL;
57    re::RE* u8NonFinalRe;
58
59
60    std::vector<std::ostringstream> mResultStrs;
61
62    void generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
63    void generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
64
65
66    llvm::Value * match_accumulator;
67
68
69    parabix::StreamSetBuffer * linefeedStreamFromUncompressedBits(parabix::StreamSetBuffer *uncompressedBasisBits);
70
71
72    void generateFullyDecompressionCountOnlyGrepPipeline(re::RE *regex);
73    void generateMultiplexingCountOnlyGrepPipeline(re::RE *regex, bool utf8CC);
74
75
76    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grep(re::RE *RE,
77                                                                           parabix::StreamSetBuffer *uncompressedBasisBits, bool ccMultiplexing = false);
78    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> multiplexingGrep(
79            re::RE *RE,
80            parabix::StreamSetBuffer *compressedByteStream,
81            parabix::StreamSetBuffer *compressedBitStream,
82            bool utf8CC
83    );
84    std::unique_ptr<cc::MultiplexedAlphabet> mpx;
85
86};
87
88
89#endif //ICGREP_LZ4GREPGENERATOR_H
Note: See TracBrowser for help on using the repository browser.