source: icXML/icXML-devel/src/xercesc/util/XMLStringTokenizer.hpp @ 2722

Last change on this file since 2722 was 2722, checked in by cameron, 6 years ago

Original Xerces files with import mods for icxercesc

File size: 6.9 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: XMLStringTokenizer.hpp 932887 2010-04-11 13:04:59Z borisk $
20 */
21
22#if !defined(XERCESC_INCLUDE_GUARD_XMLSTRINGTOKENIZER_HPP)
23#define XERCESC_INCLUDE_GUARD_XMLSTRINGTOKENIZER_HPP
24
25#include <xercesc/util/RefArrayVectorOf.hpp>
26#include <icxercesc/util/XMLString.hpp>
27
28XERCES_CPP_NAMESPACE_BEGIN
29
30/**
31  * The string tokenizer class breaks a string into tokens.
32  *
33  * The XMLStringTokenizer methods do not distinguish among identifiers,
34  * numbers, and quoted strings, nor do they recognize and skip comments
35  *
36  * A XMLStringTokenizer object internally maintains a current position within
37  * the string to be tokenized. Some operations advance this current position
38  * past the characters processed.
39  */
40
41
42  class XMLUTIL_EXPORT XMLStringTokenizer :public XMemory
43{
44public:
45    // -----------------------------------------------------------------------
46    //  Public Constructors
47    // -----------------------------------------------------------------------
48    /** @name Constructors */
49    //@{
50
51    /**
52      * Constructs a string tokenizer for the specified string. The tokenizer
53      * uses the default delimiter set, which is "\t\n\r\f": the space
54      * character, the tab character, the newline character, the
55      * carriage-return character, and the form-feed character. Delimiter
56      * characters themselves will not be treated as tokens.
57      *
58      * @param  srcStr  The string to be parsed.
59      * @param  manager Pointer to the memory manager to be used to
60      *                 allocate objects.
61      *
62      */
63        XMLStringTokenizer(const XMLCh* const srcStr,
64                       MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
65
66    /**
67      * Constructs a string tokenizer for the specified string. The characters
68      * in the delim argument are the delimiters for separating tokens.
69      * Delimiter characters themselves will not be treated as tokens.
70      *
71      * @param  srcStr  The string to be parsed.
72      * @param  delim   The set of delimiters.
73      * @param  manager Pointer to the memory manager to be used to
74      *                 allocate objects.
75      */
76    XMLStringTokenizer(const XMLCh* const srcStr
77                       , const XMLCh* const delim
78                       , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
79
80    //@}
81
82        // -----------------------------------------------------------------------
83    //  Public Destructor
84    // -----------------------------------------------------------------------
85        /** @name Destructor. */
86    //@{
87
88    ~XMLStringTokenizer();
89
90    //@}
91
92    // -----------------------------------------------------------------------
93    // Management methods
94    // -----------------------------------------------------------------------
95    /** @name Management Function */
96    //@{
97
98     /**
99       * Tests if there are more tokens available from this tokenizer's string.
100       *
101       * Returns true if and only if there is at least one token in the string
102       * after the current position; false otherwise.
103       */
104        bool hasMoreTokens();
105
106    /**
107      * Calculates the number of times that this tokenizer's nextToken method
108      * can be called to return a valid token. The current position is not
109      * advanced.
110      *
111      * Returns the number of tokens remaining in the string using the current
112      * delimiter set.
113      */
114    unsigned int countTokens();
115
116    /**
117      * Returns the next token from this string tokenizer.
118      *
119      * Function allocated, function managed (fafm). The calling function
120      * does not need to worry about deleting the returned pointer.
121          */
122        XMLCh* nextToken();
123
124    //@}
125
126private:
127    // -----------------------------------------------------------------------
128    //  Unimplemented constructors and operators
129    // -----------------------------------------------------------------------
130    XMLStringTokenizer(const XMLStringTokenizer&);
131    XMLStringTokenizer& operator=(const XMLStringTokenizer&);
132
133    // -----------------------------------------------------------------------
134    //  CleanUp methods
135    // -----------------------------------------------------------------------
136        void cleanUp();
137
138    // -----------------------------------------------------------------------
139    //  Helper methods
140    // -----------------------------------------------------------------------
141    bool isDelimeter(const XMLCh ch);
142
143    // -----------------------------------------------------------------------
144    //  Private data members
145    //
146    //  fOffset
147    //      The current position in the parsed string.
148    //
149    //  fStringLen
150    //      The length of the string parsed (for convenience).
151    //
152    //  fString
153    //      The string to be parsed
154        //
155    //  fDelimeters
156    //      A set of delimiter characters
157    //
158    //  fTokens
159    //      A vector of the token strings
160    // -----------------------------------------------------------------------
161    XMLSize_t           fOffset;
162    XMLSize_t           fStringLen;
163        XMLCh*              fString;
164    const XMLCh*        fDelimeters;
165        RefArrayVectorOf<XMLCh>* fTokens;
166    MemoryManager*           fMemoryManager;
167};
168
169// ---------------------------------------------------------------------------
170//  XMLStringTokenizer: Helper methods
171// ---------------------------------------------------------------------------
172inline bool XMLStringTokenizer::isDelimeter(const XMLCh ch) {
173
174    return XMLString::indexOf(fDelimeters, ch) == -1 ? false : true;
175}
176
177
178// ---------------------------------------------------------------------------
179//  XMLStringTokenizer: Management methods
180// ---------------------------------------------------------------------------
181inline unsigned int XMLStringTokenizer::countTokens() {
182
183    if (fStringLen == 0)
184                return 0;
185
186    unsigned int tokCount = 0;
187    bool inToken = false;
188
189    for (XMLSize_t i= fOffset; i< fStringLen; i++) {
190
191        if (isDelimeter(fString[i])) {
192
193            if (inToken) {
194                inToken = false;
195            }
196
197            continue;
198        }
199
200                if (!inToken) {
201
202            tokCount++;
203            inToken = true;
204        }
205
206    } // end for
207
208    return tokCount;
209}
210
211XERCES_CPP_NAMESPACE_END
212
213#endif
214
215/**
216  * End of file XMLStringTokenizer.hpp
217  */
218
Note: See TracBrowser for help on using the repository browser.