source: icXML/icXML-devel/src/icxercesc/util/regx/RegularExpression.hpp @ 2721

Last change on this file since 2721 was 2721, checked in by cameron, 6 years ago

Fix imports in icXML modified Xerces files

File size: 31.2 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: RegularExpression.hpp 822158 2009-10-06 07:52:59Z amassari $
20 */
21
22#if !defined(XERCESC_INCLUDE_GUARD_REGULAREXPRESSION_HPP)
23#define XERCESC_INCLUDE_GUARD_REGULAREXPRESSION_HPP
24
25// ---------------------------------------------------------------------------
26//  Includes
27// ---------------------------------------------------------------------------
28#include <xercesc/util/RefArrayVectorOf.hpp>
29#include <icxercesc/util/XMLString.hpp>
30#include <xercesc/util/Janitor.hpp>
31#include <xercesc/util/regx/Op.hpp>
32#include <xercesc/util/regx/TokenFactory.hpp>
33#include <xercesc/util/regx/BMPattern.hpp>
34#include <xercesc/util/regx/OpFactory.hpp>
35#include <xercesc/util/regx/RegxUtil.hpp>
36#include <icxercesc/framework/XMLBuffer.hpp>
37
38XERCES_CPP_NAMESPACE_BEGIN
39
40// ---------------------------------------------------------------------------
41//  Forward Declaration
42// ---------------------------------------------------------------------------
43class RangeToken;
44class Match;
45class RegxParser;
46
47/**
48 * The RegularExpression class represents a parsed executable regular expression.
49 * This class is thread safe. Two similar regular expression syntaxes are
50 * supported:
51 *
52 * <ol>
53 * <li><a href="http://www.w3.org/TR/xpath-functions/#regex-syntax">The XPath 2.0 / XQuery regular expression syntax.</a>
54 * <li><a href="http://www.w3.org/TR/xmlschema-2/#regexs">The XML Schema regular expression syntax.</a></li>
55 * </ol>
56 *
57 * XPath 2.0 regular expression syntax is used unless the "X" option is specified during construction.
58 *
59 * Options can be specified during construction to change the way that the regular expression is handled.
60 * Options are specified by a string consisting of any number of the following characters:
61 *
62 * <table border='1'>
63 * <tr>
64 * <th>Character</th>
65 * <th>Meaning</th>
66 * </tr>
67 * <tr>
68 * <td valign='top' rowspan='1' colspan='1'>i</td>
69 * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags">
70 * Ignore case</a> when matching the regular expression.</td>
71 * </tr>
72 * <tr>
73 * <td valign='top' rowspan='1' colspan='1'>m</td>
74 * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags">
75 * Multi-line mode</a>. The meta characters "^" and "$" will match the beginning and end of lines.</td>
76 * </tr>
77 * <tr>
78 * <td valign='top' rowspan='1' colspan='1'>s</td>
79 * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags">
80 * Single-line mode</a>. The meta character "." will match a newline character.</td>
81 * </tr>
82 * <tr>
83 * <td valign='top' rowspan='1' colspan='1'>x</td>
84 * <td valign='top' rowspan='1' colspan='1'>Allow extended comments.</td>
85 * </tr>
86 * <tr>
87 * <td valign='top' rowspan='1' colspan='1'>F</td>
88 * <td valign='top' rowspan='1' colspan='1'>Prohibit the fixed string optimization.</td>
89 * </tr>
90 * <tr>
91 * <td valign='top' rowspan='1' colspan='1'>H</td>
92 * <td valign='top' rowspan='1' colspan='1'>Prohibit the head character optimization.</td>
93 * </tr>
94 * <tr>
95 * <td valign='top' rowspan='1' colspan='1'>X</td>
96 * <td valign='top' rowspan='1' colspan='1'>Parse the regular expression according to the
97 * <a href="http://www.w3.org/TR/xmlschema-2/#regexs">XML Schema regular expression syntax</a>.</td>
98 * </tr>
99 * </table>
100 */
101class XMLUTIL_EXPORT RegularExpression : public XMemory
102{
103public:
104    // -----------------------------------------------------------------------
105    //  Public Constructors and Destructor
106    // -----------------------------------------------------------------------
107
108    /** @name Constructors and destructor */
109    //@{
110
111    /** Parses the given regular expression.
112      *
113      * @param pattern the regular expression in the local code page
114      * @param manager the memory manager to use
115      */
116    RegularExpression
117    (
118        const char* const pattern
119        , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
120    );
121
122    /** Parses the given regular expression using the options specified.
123      *
124      * @param pattern the regular expression in the local code page
125      * @param options the options string in the local code page
126      * @param manager the memory manager to use
127      */
128    RegularExpression
129    (
130        const char* const pattern
131        , const char* const options
132        , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
133    );
134
135    /** Parses the given regular expression.
136      *
137      * @param pattern the regular expression
138      * @param manager the memory manager to use
139      */
140    RegularExpression
141    (
142        const XMLCh* const pattern
143        , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
144    );
145
146    /** Parses the given regular expression using the options specified.
147      *
148      * @param pattern the regular expression
149      * @param options the options string
150      * @param manager the memory manager to use
151      */
152    RegularExpression
153    (
154        const XMLCh* const pattern
155        , const XMLCh* const options
156        , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
157    );
158
159    virtual ~RegularExpression();
160
161    //@}
162
163    // -----------------------------------------------------------------------
164    //  Public Constants
165    // -----------------------------------------------------------------------
166    static const unsigned int   IGNORE_CASE;
167    static const unsigned int   SINGLE_LINE;
168    static const unsigned int   MULTIPLE_LINE;
169    static const unsigned int   EXTENDED_COMMENT;
170    static const unsigned int   PROHIBIT_HEAD_CHARACTER_OPTIMIZATION;
171    static const unsigned int   PROHIBIT_FIXED_STRING_OPTIMIZATION;
172    static const unsigned int   XMLSCHEMA_MODE;
173    typedef enum
174    {
175        wordTypeIgnore = 0,
176        wordTypeLetter = 1,
177        wordTypeOther = 2
178    } wordType;
179
180    // -----------------------------------------------------------------------
181    //  Public Helper methods
182    // -----------------------------------------------------------------------
183
184    /** @name Public helper methods */
185    //@{
186
187    static int getOptionValue(const XMLCh ch);
188    static bool isSet(const int options, const int flag);
189
190    //@}
191
192    // -----------------------------------------------------------------------
193    //  Matching methods
194    // -----------------------------------------------------------------------
195
196    /** @name Matching methods */
197    //@{
198
199    /** Tries to match the given null terminated string against the regular expression, returning
200      * true if successful.
201      *
202      * @param matchString the string to match in the local code page
203      * @param manager     the memory manager to use
204      *
205      * @return Whether the string matched the regular expression or not.
206      */
207    bool matches(const char* const matchString,
208                 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
209
210    /** Tries to match the given string between the specified start and end offsets
211      * against the regular expression, returning true if successful.
212      *
213      * @param matchString the string to match in the local code page
214      * @param start       the offset of the start of the string
215      * @param end         the offset of the end of the string
216      * @param manager     the memory manager to use
217      *
218      * @return Whether the string matched the regular expression or not.
219      */
220    bool matches(const char* const matchString, const XMLSize_t start, const XMLSize_t end,
221                 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
222
223    /** Tries to match the given null terminated string against the regular expression, returning
224      * true if successful.
225      *
226      * @param matchString the string to match in the local code page
227      * @param pMatch      a Match object, which will be populated with the offsets for the
228      * regular expression match and sub-matches.
229      * @param manager     the memory manager to use
230      *
231      * @return Whether the string matched the regular expression or not.
232      */
233    bool matches(const char* const matchString, Match* const pMatch,
234                 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
235
236    /** Tries to match the given string between the specified start and end offsets
237      * against the regular expression, returning true if successful.
238      *
239      * @param matchString the string to match in the local code page
240      * @param start       the offset of the start of the string
241      * @param end         the offset of the end of the string
242      * @param pMatch      a Match object, which will be populated with the offsets for the
243      * regular expression match and sub-matches.
244      * @param manager     the memory manager to use
245      *
246      * @return Whether the string matched the regular expression or not.
247      */
248    bool matches(const char* const matchString, const XMLSize_t start, const XMLSize_t end,
249                 Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
250
251    /** Tries to match the given null terminated string against the regular expression, returning
252      * true if successful.
253      *
254      * @param matchString the string to match
255      * @param manager     the memory manager to use
256      *
257      * @return Whether the string matched the regular expression or not.
258      */
259    bool matches(const XMLCh* const matchString,
260                 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
261
262    /** Tries to match the given string between the specified start and end offsets
263      * against the regular expression, returning true if successful.
264      *
265      * @param matchString the string to match
266      * @param start       the offset of the start of the string
267      * @param end         the offset of the end of the string
268      * @param manager     the memory manager to use
269      *
270      * @return Whether the string matched the regular expression or not.
271      */
272    bool matches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
273                 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
274
275    /** Tries to match the given null terminated string against the regular expression, returning
276      * true if successful.
277      *
278      * @param matchString the string to match
279      * @param pMatch      a Match object, which will be populated with the offsets for the
280      * regular expression match and sub-matches.
281      * @param manager     the memory manager to use
282      *
283      * @return Whether the string matched the regular expression or not.
284      */
285    bool matches(const XMLCh* const matchString, Match* const pMatch,
286                 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
287
288    /** Tries to match the given string between the specified start and end offsets
289      * against the regular expression, returning true if successful.
290      *
291      * @param matchString the string to match
292      * @param start       the offset of the start of the string
293      * @param end         the offset of the end of the string
294      * @param pMatch      a Match object, which will be populated with the offsets for the
295      * regular expression match and sub-matches.
296      * @param manager     the memory manager to use
297      *
298      * @return Whether the string matched the regular expression or not.
299      */
300    bool matches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
301                 Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
302
303    /** Tries to match the given string between the specified start and end offsets
304      * against the regular expression. The subEx vector is populated with the details
305      * for every non-overlapping occurrence of a match in the string.
306      *
307      * @param matchString the string to match
308      * @param start       the offset of the start of the string
309      * @param end         the offset of the end of the string
310      * @param subEx       a RefVectorOf Match objects, populated with the offsets for the
311      * regular expression match and sub-matches.
312      * @param manager     the memory manager to use
313      */
314    void allMatches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
315                    RefVectorOf<Match> *subEx, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
316
317    //@}
318
319    // -----------------------------------------------------------------------
320    //  Tokenize methods
321    // -----------------------------------------------------------------------
322    // Note: The caller owns the string vector that is returned, and is responsible
323    //       for deleting it.
324
325    /** @name Tokenize methods */
326    //@{
327
328    /** Tokenizes the null terminated string according to the regular expression, returning
329      * the parts of the string that do not match the regular expression.
330      *
331      * @param matchString the string to match in the local code page
332      * @param manager     the memory manager to use
333      *
334      * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
335      * given MemoryManager. The caller owns the string vector that is returned, and is responsible for
336      * deleting it.
337      */
338    RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString,
339                                      MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
340
341    /** Tokenizes the string between the specified start and end offsets according to the regular
342      * expression, returning the parts of the string that do not match the regular expression.
343      *
344      * @param matchString the string to match in the local code page
345      * @param start       the offset of the start of the string
346      * @param end         the offset of the end of the string
347      * @param manager     the memory manager to use
348      *
349      * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
350      * given MemoryManager. The caller owns the string vector that is returned, and is responsible for
351      * deleting it.
352      */
353    RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString, const XMLSize_t start, const XMLSize_t end,
354                                      MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
355
356    /** Tokenizes the null terminated string according to the regular expression, returning
357      * the parts of the string that do not match the regular expression.
358      *
359      * @param matchString the string to match
360      * @param manager     the memory manager to use
361      *
362      * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
363      * given MemoryManager. The caller owns the string vector that is returned, and is responsible for
364      * deleting it.
365      */
366    RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString,
367                                      MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
368
369    /** Tokenizes the string between the specified start and end offsets according to the regular
370      * expression, returning the parts of the string that do not match the regular expression.
371      *
372      * @param matchString the string to match
373      * @param start       the offset of the start of the string
374      * @param end         the offset of the end of the string
375      * @param manager     the memory manager to use
376      *
377      * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
378      * given MemoryManager. The caller owns the string vector that is returned, and is responsible for
379      * deleting it.
380      */
381    RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
382                                      MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
383
384    //@}
385
386    // -----------------------------------------------------------------------
387    //  Replace methods
388    // -----------------------------------------------------------------------
389    // Note: The caller owns the XMLCh* that is returned, and is responsible for
390    //       deleting it.
391
392    /** @name Replace methods */
393    //@{
394
395    /** Performs a search and replace on the given null terminated string, replacing
396      * any substring that matches the regular expression with a string derived from
397      * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
398      *
399      * @param matchString   the string to match in the local code page
400      * @param replaceString the string to replace in the local code page
401      * @param manager       the memory manager to use
402      *
403      * @return The resulting string allocated using the given MemoryManager. The caller owns the string
404      * that is returned, and is responsible for deleting it.
405      */
406    XMLCh *replace(const char* const matchString, const char* const replaceString,
407                   MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
408
409    /** Performs a search and replace on the given string between the specified start and end offsets, replacing
410      * any substring that matches the regular expression with a string derived from
411      * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
412      *
413      * @param matchString   the string to match in the local code page
414      * @param replaceString the string to replace in the local code page
415      * @param start         the offset of the start of the string
416      * @param end           the offset of the end of the string
417      * @param manager       the memory manager to use
418      *
419      * @return The resulting string allocated using the given MemoryManager. The caller owns the string
420      * that is returned, and is responsible for deleting it.
421      */
422    XMLCh *replace(const char* const matchString, const char* const replaceString,
423                   const XMLSize_t start, const XMLSize_t end,
424                   MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
425
426    /** Performs a search and replace on the given null terminated string, replacing
427      * any substring that matches the regular expression with a string derived from
428      * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
429      *
430      * @param matchString   the string to match
431      * @param replaceString the string to replace
432      * @param manager       the memory manager to use
433      *
434      * @return The resulting string allocated using the given MemoryManager. The caller owns the string
435      * that is returned, and is responsible for deleting it.
436      */
437    XMLCh *replace(const XMLCh* const matchString, const XMLCh* const replaceString,
438                   MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
439
440    /** Performs a search and replace on the given string between the specified start and end offsets, replacing
441      * any substring that matches the regular expression with a string derived from
442      * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
443      *
444      * @param matchString   the string to match
445      * @param replaceString the string to replace
446      * @param start         the offset of the start of the string
447      * @param end           the offset of the end of the string
448      * @param manager       the memory manager to use
449      *
450      * @return The resulting string allocated using the given MemoryManager. The caller owns the string
451      * that is returned, and is responsible for deleting it.
452      */
453    XMLCh *replace(const XMLCh* const matchString, const XMLCh* const replaceString,
454                   const XMLSize_t start, const XMLSize_t end,
455                   MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
456
457    //@}
458
459    // -----------------------------------------------------------------------
460    //  Static initialize and cleanup methods
461    // -----------------------------------------------------------------------
462
463    /** @name Static initilize and cleanup methods */
464    //@{
465
466    static void
467    staticInitialize(MemoryManager*  memoryManager);
468
469    static void
470    staticCleanup();
471
472    //@}
473
474protected:
475    virtual RegxParser* getRegexParser(const int options, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
476
477    // -----------------------------------------------------------------------
478    //  Cleanup methods
479    // -----------------------------------------------------------------------
480    void cleanUp();
481
482    // -----------------------------------------------------------------------
483    //  Setter methods
484    // -----------------------------------------------------------------------
485    void setPattern(const XMLCh* const pattern, const XMLCh* const options=0);
486
487    // -----------------------------------------------------------------------
488    //  Protected data types
489    // -----------------------------------------------------------------------
490    class XMLUTIL_EXPORT Context : public XMemory
491    {
492        public :
493            Context(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
494            Context(Context* src);
495            ~Context();
496
497            Context& operator= (const Context& other);
498            inline const XMLCh* getString() const { return fString; }
499            void reset(const XMLCh* const string, const XMLSize_t stringLen,
500                       const XMLSize_t start, const XMLSize_t limit, const int noClosures,
501                       const unsigned int options);
502            bool nextCh(XMLInt32& ch, XMLSize_t& offset);
503
504            bool           fAdoptMatch;
505            XMLSize_t      fStart;
506            XMLSize_t      fLimit;
507            XMLSize_t      fLength;    // fLimit - fStart
508            int            fSize;
509            XMLSize_t      fStringMaxLen;
510            int*           fOffsets;
511            Match*         fMatch;
512            const XMLCh*   fString;
513            unsigned int   fOptions;
514            MemoryManager* fMemoryManager;
515    };
516
517    // -----------------------------------------------------------------------
518    //  Unimplemented constructors and operators
519    // -----------------------------------------------------------------------
520    RegularExpression(const RegularExpression&);
521    RegularExpression& operator=(const RegularExpression&);
522
523    // -----------------------------------------------------------------------
524    //  Protected Helper methods
525    // -----------------------------------------------------------------------
526    void prepare();
527    int parseOptions(const XMLCh* const options);
528
529    /**
530      *    Matching helpers
531      */
532    int match(Context* const context, const Op* const operations, XMLSize_t offset) const;
533    bool matchIgnoreCase(const XMLInt32 ch1, const XMLInt32 ch2) const;
534
535    /**
536      *    Helper methods used by match(Context* ...)
537      */
538    bool matchChar(Context* const context, const XMLInt32 ch, XMLSize_t& offset,
539                   const bool ignoreCase) const;
540    bool matchDot(Context* const context, XMLSize_t& offset) const;
541    bool matchRange(Context* const context, const Op* const op,
542                    XMLSize_t& offset, const bool ignoreCase) const;
543    bool matchAnchor(Context* const context, const XMLInt32 ch,
544                     const XMLSize_t offset) const;
545    bool matchBackReference(Context* const context, const XMLInt32 ch,
546                            XMLSize_t& offset, const bool ignoreCase) const;
547    bool matchString(Context* const context, const XMLCh* const literal,
548                     XMLSize_t& offset, const bool ignoreCase) const;
549    int  matchUnion(Context* const context, const Op* const op, XMLSize_t offset) const;
550    int matchCapture(Context* const context, const Op* const op, XMLSize_t offset) const;
551
552    /**
553     *    Replace helpers
554     */
555    void subInExp(const XMLCh* const repString,
556                  const XMLCh* const origString,
557                  const Match* subEx,
558                  XMLBuffer &result,
559                  MemoryManager* const manager) const;
560    /**
561     *    Converts a token tree into an operation tree
562     */
563    void compile(const Token* const token);
564    Op*  compile(const Token* const token, Op* const next,
565                 const bool reverse);
566    /**
567      *    Helper methods used by compile
568      */
569    Op* compileUnion(const Token* const token, Op* const next,
570                     const bool reverse);
571    Op* compileParenthesis(const Token* const token, Op* const next,
572                           const bool reverse);
573    Op* compileConcat(const Token* const token, Op* const next,
574                      const bool reverse);
575    Op* compileClosure(const Token* const token, Op* const next,
576                       const bool reverse, const Token::tokType tkType);
577
578    bool doTokenOverlap(const Op* op, Token* token);
579
580    // -----------------------------------------------------------------------
581    //  Protected data members
582    // -----------------------------------------------------------------------
583    bool               fHasBackReferences;
584    bool               fFixedStringOnly;
585    int                fNoGroups;
586    XMLSize_t          fMinLength;
587    unsigned int       fNoClosures;
588    unsigned int       fOptions;
589    const BMPattern*   fBMPattern;
590    XMLCh*             fPattern;
591    XMLCh*             fFixedString;
592    const Op*          fOperations;
593    Token*             fTokenTree;
594    RangeToken*        fFirstChar;
595    static RangeToken* fWordRange;
596    OpFactory          fOpFactory;
597    TokenFactory*      fTokenFactory;
598    MemoryManager*     fMemoryManager;
599};
600
601
602
603  // -----------------------------------------------------------------------
604  //  RegularExpression: Static initialize and cleanup methods
605  // -----------------------------------------------------------------------
606  inline void RegularExpression::staticCleanup()
607  {
608      fWordRange = 0;
609  }
610
611  // ---------------------------------------------------------------------------
612  //  RegularExpression: Cleanup methods
613  // ---------------------------------------------------------------------------
614  inline void RegularExpression::cleanUp() {
615
616      fMemoryManager->deallocate(fPattern);//delete [] fPattern;
617      fMemoryManager->deallocate(fFixedString);//delete [] fFixedString;
618      delete fBMPattern;
619      delete fTokenFactory;
620  }
621
622  // ---------------------------------------------------------------------------
623  //  RegularExpression: Helper methods
624  // ---------------------------------------------------------------------------
625  inline bool RegularExpression::isSet(const int options, const int flag) {
626
627      return (options & flag) == flag;
628  }
629
630
631  inline Op* RegularExpression::compileUnion(const Token* const token,
632                                             Op* const next,
633                                             const bool reverse) {
634
635      XMLSize_t tokSize = token->size();
636      UnionOp* uniOp = fOpFactory.createUnionOp(tokSize);
637
638      for (XMLSize_t i=0; i<tokSize; i++) {
639
640          uniOp->addElement(compile(token->getChild(i), next, reverse));
641      }
642
643      return uniOp;
644  }
645
646
647  inline Op* RegularExpression::compileParenthesis(const Token* const token,
648                                                   Op* const next,
649                                                   const bool reverse) {
650
651      if (token->getNoParen() == 0)
652          return compile(token->getChild(0), next, reverse);
653
654      Op* captureOp    = 0;
655
656      if (reverse) {
657
658          captureOp = fOpFactory.createCaptureOp(token->getNoParen(), next);
659          captureOp = compile(token->getChild(0), captureOp, reverse);
660
661          return fOpFactory.createCaptureOp(-token->getNoParen(), captureOp);
662      }
663
664      captureOp = fOpFactory.createCaptureOp(-token->getNoParen(), next);
665      captureOp = compile(token->getChild(0), captureOp, reverse);
666
667      return fOpFactory.createCaptureOp(token->getNoParen(), captureOp);
668  }
669
670  inline Op* RegularExpression::compileConcat(const Token* const token,
671                                              Op*  const next,
672                                              const bool reverse) {
673
674      Op* ret = next;
675      XMLSize_t tokSize = token->size();
676
677      if (!reverse) {
678
679          for (XMLSize_t i= tokSize; i>0; i--) {
680              ret = compile(token->getChild(i-1), ret, false);
681          }
682      }
683      else {
684
685          for (XMLSize_t i= 0; i< tokSize; i++) {
686              ret = compile(token->getChild(i), ret, true);
687          }
688      }
689
690      return ret;
691  }
692
693  inline Op* RegularExpression::compileClosure(const Token* const token,
694                                               Op* const next,
695                                               const bool reverse,
696                                               const Token::tokType tkType) {
697
698      Op*    ret      = 0;
699      Token* childTok = token->getChild(0);
700      int    min      = token->getMin();
701      int    max      = token->getMax();
702
703      if (min >= 0 && min == max) {
704
705          ret = next;
706          for (int i=0; i< min; i++) {
707              ret = compile(childTok, ret, reverse);
708          }
709
710          return ret;
711      }
712
713      if (min > 0 && max > 0)
714          max -= min;
715
716      if (max > 0) {
717
718          ret = next;
719          for (int i=0; i<max; i++) {
720
721              ChildOp* childOp = fOpFactory.createQuestionOp(
722                  tkType == Token::T_NONGREEDYCLOSURE);
723
724              childOp->setNextOp(next);
725              childOp->setChild(compile(childTok, ret, reverse));
726              ret = childOp;
727          }
728      }
729      else {
730
731          ChildOp* childOp = 0;
732
733          if (tkType == Token::T_NONGREEDYCLOSURE) {
734              childOp = fOpFactory.createNonGreedyClosureOp();
735          }
736          else {
737
738              if (childTok->getMinLength() == 0)
739                  childOp = fOpFactory.createClosureOp(fNoClosures++);
740              else
741                  childOp = fOpFactory.createClosureOp(-1);
742          }
743
744          childOp->setNextOp(next);
745          if(next==NULL || !doTokenOverlap(next, childTok))
746          {
747              childOp->setOpType(tkType == Token::T_NONGREEDYCLOSURE?Op::O_FINITE_NONGREEDYCLOSURE:Op::O_FINITE_CLOSURE);
748              childOp->setChild(compile(childTok, NULL, reverse));
749          }
750          else
751          {
752              childOp->setChild(compile(childTok, childOp, reverse));
753          }
754          ret = childOp;
755      }
756
757      if (min > 0) {
758
759          for (int i=0; i< min; i++) {
760              ret = compile(childTok, ret, reverse);
761          }
762      }
763
764      return ret;
765  }
766
767XERCES_CPP_NAMESPACE_END
768
769#endif
770/**
771  * End of file RegularExpression.hpp
772  */
773
Note: See TracBrowser for help on using the repository browser.