source: icXML/icXML-devel/src/xercesc/util/XMLUri.hpp @ 2722

Last change on this file since 2722 was 2722, checked in by cameron, 6 years ago

Original Xerces files with import mods for icxercesc

File size: 21.3 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: XMLUri.hpp 557254 2007-07-18 13:28:54Z amassari $
20 */
21
22#if !defined(XERCESC_INCLUDE_GUARD_XMLURI_HPP)
23#define XERCESC_INCLUDE_GUARD_XMLURI_HPP
24
25#include <xercesc/util/XMemory.hpp>
26#include <icxercesc/util/XMLString.hpp>
27
28#include <xercesc/internal/XSerializable.hpp>
29#include <icxercesc/framework/XMLBuffer.hpp>
30
31XERCES_CPP_NAMESPACE_BEGIN
32
33/*
34 * This class is a direct port of Java's URI class, to distinguish
35 * itself from the XMLURL, we use the name XMLUri instead of
36 * XMLURI.
37 *
38 * TODO: how to relate XMLUri and XMLURL since URL is part of URI.
39 *
40 */
41
42class XMLUTIL_EXPORT XMLUri : public XSerializable, public XMemory
43{
44public:
45
46    // -----------------------------------------------------------------------
47    //  Constructors and Destructor
48    // -----------------------------------------------------------------------
49
50    /**
51     * Construct a new URI from a URI specification string.
52     *
53     * If the specification follows the "generic URI" syntax, (two slashes
54     * following the first colon), the specification will be parsed
55     * accordingly - setting the
56     *                           scheme,
57     *                           userinfo,
58     *                           host,
59     *                           port,
60     *                           path,
61     *                           querystring and
62     *                           fragment
63     * fields as necessary.
64     *
65     * If the specification does not follow the "generic URI" syntax,
66     * the specification is parsed into a
67     *                           scheme and
68     *                           scheme-specific part (stored as the path) only.
69     *
70     * @param uriSpec the URI specification string (cannot be null or empty)
71     *
72     * @param manager Pointer to the memory manager to be used to
73     *                allocate objects.
74     *
75     * ctor# 2
76     *
77     */
78    XMLUri(const XMLCh* const    uriSpec,
79           MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
80
81    /**
82     * Construct a new URI from a base URI and a URI specification string.
83     * The URI specification string may be a relative URI.
84     *
85     * @param baseURI the base URI (cannot be null if uriSpec is null or
86     *                empty)
87     *
88     * @param uriSpec the URI specification string (cannot be null or
89     *                empty if base is null)
90     *
91     * @param manager Pointer to the memory manager to be used to
92     *                allocate objects.
93     *
94     * ctor# 7 relative ctor
95     *
96     */
97    XMLUri(const XMLUri* const  baseURI
98         , const XMLCh* const   uriSpec
99         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
100
101    /**
102     * Copy constructor
103     */
104    XMLUri(const XMLUri& toCopy);
105    XMLUri& operator=(const XMLUri& toAssign);
106
107    virtual ~XMLUri();
108
109    // -----------------------------------------------------------------------
110    //  Getter methods
111    // -----------------------------------------------------------------------
112    /**
113     * Get the URI as a string specification. See RFC 2396 Section 5.2.
114     *
115     * @return the URI string specification
116     */
117    const XMLCh* getUriText() const;
118
119    /**
120     * Get the scheme for this URI.
121     *
122     * @return the scheme for this URI
123     */
124     const XMLCh* getScheme() const;
125
126    /**
127     * Get the userinfo for this URI.
128     *
129     * @return the userinfo for this URI (null if not specified).
130     */
131     const XMLCh* getUserInfo() const;
132
133
134    /**
135     * Get the host for this URI.
136     *
137     * @return the host for this URI (null if not specified).
138     */
139     const XMLCh* getHost() const;
140
141    /**
142     * Get the port for this URI.
143     *
144     * @return the port for this URI (-1 if not specified).
145     */
146     int getPort() const;
147     
148    /**
149     * Get the registry based authority for this URI.
150     *
151     * @return the registry based authority (null if not specified).
152     */
153     const XMLCh* getRegBasedAuthority() const;
154
155    /**
156     * Get the path for this URI. Note that the value returned is the path
157     * only and does not include the query string or fragment.
158     *
159     * @return the path for this URI.
160     */
161     const XMLCh* getPath() const;
162
163    /**
164     * Get the query string for this URI.
165     *
166     * @return the query string for this URI. Null is returned if there
167     *         was no "?" in the URI spec, empty string if there was a
168     *         "?" but no query string following it.
169     */
170     const XMLCh* getQueryString() const;
171
172    /**
173     * Get the fragment for this URI.
174     *
175     * @return the fragment for this URI. Null is returned if there
176     *         was no "#" in the URI spec, empty string if there was a
177     *         "#" but no fragment following it.
178     */
179     const XMLCh* getFragment() const;
180
181    // -----------------------------------------------------------------------
182    //  Setter methods
183    // -----------------------------------------------------------------------
184
185    /**
186     * Set the scheme for this URI. The scheme is converted to lowercase
187     * before it is set.
188     *
189     * @param newScheme the scheme for this URI (cannot be null)
190     *
191     */
192     void setScheme(const XMLCh* const newScheme);
193
194    /**
195     * Set the userinfo for this URI. If a non-null value is passed in and
196     * the host value is null, then an exception is thrown.
197     *
198     * @param newUserInfo the userinfo for this URI
199     *
200     */
201     void setUserInfo(const XMLCh* const newUserInfo);
202
203    /**
204     * Set the host for this URI. If null is passed in, the userinfo
205     * field is also set to null and the port is set to -1.
206     *
207     * Note: This method overwrites registry based authority if it
208     * previously existed in this URI.
209     *
210     * @param newHost the host for this URI
211     *
212     */
213     void setHost(const XMLCh* const newHost);
214
215    /**
216     * Set the port for this URI. -1 is used to indicate that the port is
217     * not specified, otherwise valid port numbers are  between 0 and 65535.
218     * If a valid port number is passed in and the host field is null,
219     * an exception is thrown.
220     *
221     * @param newPort the port number for this URI
222     *
223     */
224     void setPort(int newPort);
225     
226    /**
227     * Sets the registry based authority for this URI.
228     *
229     * Note: This method overwrites server based authority
230     * if it previously existed in this URI.
231     *
232     * @param newRegAuth the registry based authority for this URI
233     */
234     void setRegBasedAuthority(const XMLCh* const newRegAuth);
235
236    /**
237     * Set the path for this URI.
238     *
239     * If the supplied path is null, then the
240     * query string and fragment are set to null as well.
241     *
242     * If the supplied path includes a query string and/or fragment,
243     * these fields will be parsed and set as well.
244     *
245     * Note:
246     *
247     * For URIs following the "generic URI" syntax, the path
248     * specified should start with a slash.
249     *
250     * For URIs that do not follow the generic URI syntax, this method
251     * sets the scheme-specific part.
252     *
253     * @param newPath the path for this URI (may be null)
254     *
255     */
256     void setPath(const XMLCh* const newPath);
257
258    /**
259     * Set the query string for this URI. A non-null value is valid only
260     * if this is an URI conforming to the generic URI syntax and
261     * the path value is not null.
262     *
263     * @param newQueryString the query string for this URI
264     *
265     */
266     void setQueryString(const XMLCh* const newQueryString);
267
268    /**
269     * Set the fragment for this URI. A non-null value is valid only
270     * if this is a URI conforming to the generic URI syntax and
271     * the path value is not null.
272     *
273     * @param newFragment the fragment for this URI
274     *
275     */
276     void setFragment(const XMLCh* const newFragment);
277
278     // -----------------------------------------------------------------------
279    //  Miscellaneous methods
280    // -----------------------------------------------------------------------
281
282    /**
283     * Determine whether a given string contains only URI characters (also
284     * called "uric" in RFC 2396). uric consist of all reserved
285     * characters, unreserved characters and escaped characters.
286     *
287     * @return true if the string is comprised of uric, false otherwise
288     */
289    static bool isURIString(const XMLCh* const uric);
290
291    /**
292     * Determine whether a given string is a valid URI
293     */
294    static bool isValidURI( const XMLUri* const baseURI
295                          , const XMLCh* const uriStr
296                          , bool bAllowSpaces=false);
297    /**
298     * Determine whether a given string is a valid URI
299     */
300    static bool isValidURI( bool haveBaseURI
301                          , const XMLCh* const uriStr
302                          , bool bAllowSpaces=false);
303
304
305    static void normalizeURI(const XMLCh*     const systemURI,
306                                   XMLBuffer&       normalizedURI);
307
308    /***
309     * Support for Serialization/De-serialization
310     ***/
311    DECL_XSERIALIZABLE(XMLUri)
312
313    XMLUri(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
314
315private:
316
317    static const XMLCh MARK_OR_RESERVED_CHARACTERS[];
318    static const XMLCh RESERVED_CHARACTERS[];
319    static const XMLCh MARK_CHARACTERS[];
320    static const XMLCh SCHEME_CHARACTERS[];
321    static const XMLCh USERINFO_CHARACTERS[];
322    static const XMLCh REG_NAME_CHARACTERS[];
323    static const XMLCh PATH_CHARACTERS[];
324
325    //helper method for getUriText
326    void buildFullText();
327
328    // -----------------------------------------------------------------------
329    //  Private helper methods
330    // -----------------------------------------------------------------------
331
332    /**
333     * Determine whether a character is a reserved character:
334     *
335     * @return true if the string contains any reserved characters
336     */
337    static bool isReservedCharacter(const XMLCh theChar);
338   
339    /**
340     * Determine whether a character is a path character:
341     *
342     * @return true if the character is path character
343     */
344    static bool isPathCharacter(const XMLCh theChar);
345
346    /**
347     * Determine whether a char is an unreserved character.
348     *
349     * @return true if the char is unreserved, false otherwise
350     */
351    static bool isUnreservedCharacter(const XMLCh theChar);
352
353    /**
354     * Determine whether a char is an reserved or unreserved character.
355     *
356     * @return true if the char is reserved or unreserved, false otherwise
357     */               
358    static bool isReservedOrUnreservedCharacter(const XMLCh theChar);
359
360    /**
361     * Determine whether a scheme conforms to the rules for a scheme name.
362     * A scheme is conformant if it starts with an alphanumeric, and
363     * contains only alphanumerics, '+','-' and '.'.
364     *
365     * @return true if the scheme is conformant, false otherwise
366     */
367    static bool isConformantSchemeName(const XMLCh* const scheme);
368
369    /**
370     * Determine whether a userInfo conforms to the rules for a userinfo.
371     *
372     * @return true if the scheme is conformant, false otherwise
373     */
374    static void isConformantUserInfo(const XMLCh* const userInfo
375        , MemoryManager* const manager);
376   
377    /**
378     * Determines whether the components host, port, and user info
379     * are valid as a server authority.
380     *
381     * @return true if the given host, port, and userinfo compose
382     * a valid server authority
383     */
384    static bool isValidServerBasedAuthority(const XMLCh* const host
385                                           , const XMLSize_t hostLen
386                                           , const int port
387                                           , const XMLCh* const userinfo
388                                           , const XMLSize_t userLen);
389                                           
390    /**
391     * Determines whether the components host, port, and user info
392     * are valid as a server authority.
393     *
394     * @return true if the given host, port, and userinfo compose
395     * a valid server authority
396     */
397    static bool isValidServerBasedAuthority(const XMLCh* const host
398                                           , const int port
399                                           , const XMLCh* const userinfo
400                                           , MemoryManager* const manager);
401     
402   /**
403    * Determines whether the given string is a registry based authority.
404    *
405    * @param authority the authority component of a URI
406    *
407    * @return true if the given string is a registry based authority
408    */
409    static bool isValidRegistryBasedAuthority(const XMLCh* const authority
410                                             , const XMLSize_t authLen);
411
412   /**
413    * Determines whether the given string is a registry based authority.
414    *
415    * @param authority the authority component of a URI
416    *
417    * @return true if the given string is a registry based authority
418    */
419    static bool isValidRegistryBasedAuthority(const XMLCh* const authority);
420
421    /**
422     * Determine whether a string is syntactically capable of representing
423     * a valid IPv4 address, IPv6 reference or the domain name of a network host.
424     *
425     * A valid IPv4 address consists of four decimal digit groups
426     * separated by a '.'.
427     *
428     * See RFC 2732 Section 3, and RFC 2373 Section 2.2, for the
429     * definition of IPv6 references.
430     *
431     * A hostname consists of domain labels (each of which must begin and
432     * end with an alphanumeric but may contain '-') separated by a '.'.
433     * See RFC 2396 Section 3.2.2.
434     *
435     * @return true if the string is a syntactically valid IPv4 address
436     *              or hostname
437     */
438     static bool isWellFormedAddress(const XMLCh* const addr
439         , MemoryManager* const manager);
440     
441    /**
442     * Determines whether a string is an IPv4 address as defined by
443     * RFC 2373, and under the further constraint that it must be a 32-bit
444     * address. Though not expressed in the grammar, in order to satisfy
445     * the 32-bit address constraint, each segment of the address cannot
446     * be greater than 255 (8 bits of information).
447     *
448     * @return true if the string is a syntactically valid IPv4 address
449     */
450     static bool isWellFormedIPv4Address(const XMLCh* const addr, const XMLSize_t length);
451     
452    /**
453     * Determines whether a string is an IPv6 reference as defined
454     * by RFC 2732, where IPv6address is defined in RFC 2373. The
455     * IPv6 address is parsed according to Section 2.2 of RFC 2373,
456     * with the additional constraint that the address be composed of
457     * 128 bits of information.
458     *
459     * Note: The BNF expressed in RFC 2373 Appendix B does not
460     * accurately describe section 2.2, and was in fact removed from
461     * RFC 3513, the successor of RFC 2373.
462     *
463     * @return true if the string is a syntactically valid IPv6 reference
464     */
465     static bool isWellFormedIPv6Reference(const XMLCh* const addr, const XMLSize_t length);
466     
467    /**
468     * Helper function for isWellFormedIPv6Reference which scans the
469     * hex sequences of an IPv6 address. It returns the index of the
470     * next character to scan in the address, or -1 if the string
471     * cannot match a valid IPv6 address.
472     *
473     * @param address the string to be scanned
474     * @param index the beginning index (inclusive)
475     * @param end the ending index (exclusive)
476     * @param counter a counter for the number of 16-bit sections read
477     * in the address
478     *
479     * @return the index of the next character to scan, or -1 if the
480     * string cannot match a valid IPv6 address
481     */
482     static int scanHexSequence (const XMLCh* const addr, XMLSize_t index, XMLSize_t end, int& counter);
483
484    /**
485     * Get the indicator as to whether this URI uses the "generic URI"
486     * syntax.
487     *
488     * @return true if this URI uses the "generic URI" syntax, false
489     *         otherwise
490     */
491     bool isGenericURI();
492
493    // -----------------------------------------------------------------------
494    //  Miscellaneous methods
495    // -----------------------------------------------------------------------
496
497    /**
498     * Initialize all fields of this URI from another URI.
499     *
500     * @param toCopy the URI to copy (cannot be null)
501     */
502     void initialize(const XMLUri& toCopy);
503
504    /**
505     * Initializes this URI from a base URI and a URI specification string.
506     * See RFC 2396 Section 4 and Appendix B for specifications on parsing
507     * the URI and Section 5 for specifications on resolving relative URIs
508     * and relative paths.
509     *
510     * @param baseURI the base URI (may be null if uriSpec is an absolute
511     *               URI)
512     *
513     * @param uriSpec the URI spec string which may be an absolute or
514     *                  relative URI (can only be null/empty if base
515     *                  is not null)
516     *
517     */
518     void initialize(const XMLUri* const baseURI
519                   , const XMLCh*  const uriSpec);
520
521    /**
522     * Initialize the scheme for this URI from a URI string spec.
523     *
524     * @param uriSpec the URI specification (cannot be null)
525     *
526     */
527     void initializeScheme(const XMLCh* const uriSpec);
528
529    /**
530     * Initialize the authority (userinfo, host and port) for this
531     * URI from a URI string spec.
532     *
533     * @param uriSpec the URI specification (cannot be null)
534     *
535     */
536     void initializeAuthority(const XMLCh* const uriSpec);
537
538    /**
539     * Initialize the path for this URI from a URI string spec.
540     *
541     * @param uriSpec the URI specification (cannot be null)
542     *
543     */
544     void initializePath(const XMLCh* const uriSpec);
545
546     /**
547      * cleanup the data variables
548      *
549      */
550     void cleanUp();
551
552    static bool isConformantSchemeName(const XMLCh* const scheme,
553                                       const XMLSize_t schemeLen);
554    static bool processScheme(const XMLCh* const uriStr, XMLSize_t& index);
555    static bool processAuthority(const XMLCh* const uriStr, const XMLSize_t authLen);
556    static bool isWellFormedAddress(const XMLCh* const addr, const XMLSize_t addrLen);
557    static bool processPath(const XMLCh* const pathStr, const XMLSize_t pathStrLen,
558                            const bool isSchemePresent, const bool bAllowSpaces=false);
559
560    // -----------------------------------------------------------------------
561    //  Data members
562    //
563    //  for all the data member, we own it,
564    //  responsible for the creation and/or deletion for
565    //  the memory allocated.
566    //
567    // -----------------------------------------------------------------------
568    int             fPort;
569    XMLCh*          fScheme;
570    XMLCh*          fUserInfo;
571    XMLCh*          fHost;
572    XMLCh*          fRegAuth;
573    XMLCh*          fPath;
574    XMLCh*          fQueryString;
575    XMLCh*          fFragment;
576    XMLCh*          fURIText;
577    MemoryManager*  fMemoryManager;
578};
579
580// ---------------------------------------------------------------------------
581//  XMLUri: Getter methods
582// ---------------------------------------------------------------------------
583inline const XMLCh* XMLUri::getScheme() const
584{
585    return fScheme;
586}
587
588inline const XMLCh* XMLUri::getUserInfo() const
589{
590        return fUserInfo;
591}
592
593inline const XMLCh* XMLUri::getHost() const
594{
595        return fHost;
596}
597
598inline int XMLUri::getPort() const
599{
600        return fPort;
601}
602
603inline const XMLCh* XMLUri::getRegBasedAuthority() const
604{
605        return fRegAuth;
606}
607
608inline const XMLCh* XMLUri::getPath() const
609{
610        return fPath;
611}
612
613inline const XMLCh* XMLUri::getQueryString() const
614{
615        return fQueryString;
616}
617
618inline const XMLCh* XMLUri::getFragment() const
619{
620        return fFragment;
621}
622
623inline const XMLCh* XMLUri::getUriText() const
624{
625    //
626    //  Fault it in if not already. Since this is a const method and we
627    //  can't use mutable members due the compilers we have to support,
628    //  we have to cast off the constness.
629    //
630    if (!fURIText)
631        ((XMLUri*)this)->buildFullText();
632
633    return fURIText;
634}
635
636// ---------------------------------------------------------------------------
637//  XMLUri: Helper methods
638// ---------------------------------------------------------------------------
639inline bool XMLUri::isReservedOrUnreservedCharacter(const XMLCh theChar)
640{
641   return (XMLString::isAlphaNum(theChar) ||
642           XMLString::indexOf(MARK_OR_RESERVED_CHARACTERS, theChar) != -1);
643}
644
645inline bool XMLUri::isReservedCharacter(const XMLCh theChar)
646{
647    return (XMLString::indexOf(RESERVED_CHARACTERS, theChar) != -1);
648}
649
650inline bool XMLUri::isPathCharacter(const XMLCh theChar)
651{
652    return (XMLString::indexOf(PATH_CHARACTERS, theChar) != -1);
653}
654
655inline bool XMLUri::isUnreservedCharacter(const XMLCh theChar)
656{
657    return (XMLString::isAlphaNum(theChar) ||
658            XMLString::indexOf(MARK_CHARACTERS, theChar) != -1);
659}
660
661XERCES_CPP_NAMESPACE_END
662
663#endif
Note: See TracBrowser for help on using the repository browser.