source: icXML/icXML-devel/src/icxmlc/XMLNamespaceResolver.c @ 2774

Last change on this file since 2774 was 2774, checked in by cameron, 6 years ago

Various fixes

File size: 19.5 KB
Line 
1/*
2 *  Copyright © 2012 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icXML is a trademark of International Characters.
5 */
6
7/*
8 * @author Nigel Medforth, nigelm -at- interational-characters.com
9 * @version $Id: XMLNamespaceResolver.c 224 2012-12-12 03:31:56Z nigelm $
10 *
11 */
12
13#include <icxmlc/XMLNamespaceResolver.hpp>
14#include <icxmlc/XMLSymbol.hpp>
15#include <icxmlc/XMLStringU.hpp>
16
17XERCES_CPP_NAMESPACE_BEGIN
18
19/// -----------------------------------------------------------------------------------------------
20// TODO: should I store a localPartId for xmlns attributes? would the memory cost outweigh the performance gain?
21
22// TODO: if we bind a uri to a prefix and the binding is non-canonical but the canonical entry for the prefix is
23// empty, should we move the prefix into that entry and rewrite all of the symbols? it would require storing a
24// list of symbols per prefix or having a "moved" bitset to tell the resolver to update the symbol's binding.
25// would this be worth the effort?
26
27IDISA_ALWAYS_INLINE
28unsigned int
29XMLNamespaceResolver::bindNamespace(const XMLSymbol * symbol, const XMLCh * uri, const XMLSize_t length, bool & speculatedCorrectly, XMLErrs::Codes & bindingError)
30{
31        // Get the prefix id of the local part of this symbol name, e.g. the prefix id of "ogc" if the symbol is "xmlns:ogc"
32        // or fEmptyNamespaceId if the symbol is "xmlns"
33
34        DEBUG_NAMESPACE_MESSAGE
35        (
36                "XMLNamespaceResolver::bindNamespace("
37        << symbol
38                << ",\"" << uri
39                << "\"," << length
40                << ',' << speculatedCorrectly
41                << ')'
42        );
43
44    const static unsigned int XMLNS_URI_ID_TABLE[2] = { XMLNamespaceResolver::fEmptyUriId, XMLNamespaceResolver::fXMLNSUriId };
45
46    const static XMLErrs::Codes BINDING_ERRORS[3] = { XMLErrs::NoError, XMLErrs::XMLURINotMatchXMLPrefix, XMLErrs::NoUseOfxmlnsURI };
47
48    const unsigned int xmlnsUriId   = XMLNS_URI_ID_TABLE[symbol->isQualified()];
49    const unsigned int prefixId     = symbol->getPrefixId();
50
51    unsigned int localUriId, globalUriId;
52    resolveUri(uri, length, prefixId, localUriId, globalUriId);
53
54        int namespaceId = resolveNamespaceId(prefixId);
55
56    // is the XML URI being bound to some prefix other than "xml" or vice versa? OR was the XMLNS URI bound to some prefix?
57    bindingError = BINDING_ERRORS[((prefixId == fXMLUriId) ^ (globalUriId == fXMLUriId)) | ((globalUriId == fXMLNSUriId) << 1)];
58
59    if (unlikely(bindingError != XMLErrs::NoError))
60    {
61        return XMLNamespaceResolver::fUnknownUriId;
62    }
63    if (unlikely(namespaceId == -1))
64        {
65                // both the prefixId and the uriId exist, but they are not mapped to one another.
66        namespaceId = mapPrefixToUri(prefixId, localUriId, globalUriId);
67
68                // add the new namespace to the namespace 'stack'
69                fLocallyModifiedNamespaces[fCurrentScope] += namespaceId;
70
71                // and mark it as visible
72                fCurrentlyVisibleNamespaces += namespaceId;
73
74        // since we speculated that the prefixId == globalUriId, if we find that this is not
75                // true, then we have to re-resolve everything regarding this element.
76        speculatedCorrectly &= (prefixId == globalUriId);
77        }
78        else
79        {
80        if (unlikely(getNamespaceUriId(namespaceId) != globalUriId))
81                {
82            remapPrefixToUri(prefixId, localUriId, globalUriId);
83                        speculatedCorrectly = 0;
84                }
85        }
86
87    return xmlnsUriId;
88}
89
90/// -----------------------------------------------------------------------------------------------
91
92IDISA_ALWAYS_INLINE
93unsigned int
94XMLNamespaceResolver::resolveUriId(const XMLSymbol * symbol, const bool isAttribute) const
95{
96    bool unknown = 0;
97    return resolveUriId(symbol, isAttribute, unknown);
98}
99
100/// -----------------------------------------------------------------------------------------------
101
102IDISA_ALWAYS_INLINE
103unsigned int
104XMLNamespaceResolver::resolveUriId(const XMLSymbol * symbol, const bool isAttribute, bool & unknown) const
105{
106    const unsigned int prefixId = symbol->getPrefixId();
107
108    DEBUG_NAMESPACE_MESSAGE("XMLNamespaceResolver::resolveUriId(" << symbol << ':' << prefixId << ',' << isAttribute << ',' << unknown << ')')
109
110        if (isAttribute && (prefixId == fEmptyUriId))
111        {
112                return fEmptyUriId;
113        }
114        return resolveUriId(prefixId, prefixId, unknown);
115}
116
117/// -----------------------------------------------------------------------------------------------
118
119IDISA_ALWAYS_INLINE
120unsigned int
121XMLNamespaceResolver::resolveUriId(const unsigned int prefixId, const unsigned int defaultUriId, bool & unknown) const
122{
123        unsigned int uriId = defaultUriId;
124        // first check canonical set to see if this prefix id is also the uri id.
125        if (likely(fCanonicalBindingSet.mask_and_extract(fCurrentlyVisibleNamespaces, prefixId)))
126        {
127                // TODO: investigate the value of having a canonical set that also states localId == globalId?
128                uriId = fUriPool[prefixId]->getId();
129        }
130        else
131        {
132                // it's not canonical; resolve which of the possible uri ids this prefix id
133                // currently points to
134                const int namespaceId = resolveNamespaceId(prefixId);
135
136                // if this prefix has not been mapped to a namespace; assume that it will
137                // be mapped to the 'adjacent' uri.
138
139                if (likely(namespaceId != -1))
140                {
141                        uriId = fNamespaceToUriBindingTable[namespaceId];
142                }
143                else
144                {
145                        unknown = true;
146                }
147        }
148
149        return uriId;
150}
151
152/// -----------------------------------------------------------------------------------------------
153
154IDISA_ALWAYS_INLINE
155unsigned int
156XMLNamespaceResolver::resolveUriId(const XMLCh * uri) const
157{
158        if (unlikely(!uri || *uri == 0))
159        {
160                return fEmptyUriId;
161        }
162
163        XMLSize_t length = XMLStringU::stringLen(uri);
164
165        unsigned int localUriId = fUriPool.find(uri, length);
166
167        if (unlikely(localUriId == -1))
168        {
169                XMLNamespaceResolver * self = const_cast<XMLNamespaceResolver*>(this);
170
171                // no, it was already used; claim the next unused uri id.
172                self->fNextUnreservedUriId = max(fNextUnreservedUriId, fNextUnreservedPrefixId);
173                localUriId = self->fNextUnreservedUriId++;
174                self->fGlobalUriPool->addOrFind(localUriId, uri, length, self->fUriPool);
175        }
176
177        return fUriPool[localUriId]->getId();
178}
179
180
181/// -----------------------------------------------------------------------------------------------
182
183IDISA_ALWAYS_INLINE
184unsigned int
185XMLNamespaceResolver::resolveUriId(const QName * qName)
186{
187        const XMLCh * prefix = qName->getPrefix();
188        const int length = XMLStringU::stringLen(prefix);
189
190        if (unlikely(length <= 0))
191        {
192                return fEmptyUriId;
193        }
194
195        bool unknown;
196        return resolveUriId(addOrFindPrefix(prefix, length)->getId(), fUnknownUriId, unknown);
197}
198
199/// -----------------------------------------------------------------------------------------------
200
201IDISA_ALWAYS_INLINE
202unsigned int
203XMLNamespaceResolver::resolvePrefixId(const XMLCh * qName)
204{
205        int length = XMLStringU::stringLenOrIndexOf<chColon>(qName);
206
207        if (unlikely(length <= 0))
208        {
209                return fEmptyUriId;
210        }
211
212        //      if (XMLString::equals(attLocalName, XMLUni::fgXMLNSString))
213        //              emitError(XMLErrs::NoUseOfxmlnsAsPrefix);
214        //      else if (XMLString::equals(attLocalName, XMLUni::fgXMLString))
215        //      {
216        //              if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName))
217        //                      emitError(XMLErrs::PrefixXMLNotMatchXMLURI);
218        //      }
219
220        //      if (!namespaceURI)
221        //              emitError(XMLErrs::NoEmptyStrNamespace, attName);
222        //      else if(!*namespaceURI && fXMLVersion == XMLReader::XMLV1_0)
223        //              emitError(XMLErrs::NoEmptyStrNamespace, attName);
224
225
226        // get the prefix id of the prefix portion of the qName, e.g., the prefix id of "xmlns" if given "xmlns:ogc"
227        // or the prefix of "" if given "xmlns".
228        return addOrFindPrefix(qName, length)->getId();
229}
230
231/// -----------------------------------------------------------------------------------------------
232
233IDISA_ALWAYS_INLINE
234unsigned int XMLNamespaceResolver::resolvePrefixId(const XMLCh * qName, const XMLSize_t length, int & colon)
235{
236    colon = XMLStringU::indexOf<chColon>(qName, length);
237
238    assert (colon == XMLString::indexOf(qName, chColon));
239
240    return (unlikely(colon == -1)) ? fEmptyUriId : resolvePrefixId(qName, colon);
241}
242
243/// -----------------------------------------------------------------------------------------------
244
245IDISA_ALWAYS_INLINE
246unsigned int XMLNamespaceResolver::resolvePrefixId(const XMLCh * qName, const XMLSize_t length)
247{
248    return addOrFindPrefix(qName, length)->getId();
249}
250
251/// -----------------------------------------------------------------------------------------------
252
253IDISA_ALWAYS_INLINE
254const XMLCh *
255XMLNamespaceResolver::addOrFindPrefix(const XMLCh * qName)
256{
257        int length = XMLStringU::stringLenOrIndexOf<chColon>(qName);
258        if (unlikely(length <= 0))
259        {
260                return XMLUni::fgZeroLenString;
261        }
262        // get the prefix id of the prefix portion of the qName, e.g., the prefix id of "xmlns" if given "xmlns:ogc"
263        // or the prefix of "" if given "xmlns".
264        return addOrFindPrefix(qName, length)->getKey();
265
266}
267
268/// -----------------------------------------------------------------------------------------------
269
270IDISA_ALWAYS_INLINE
271const XMLCh *
272XMLNamespaceResolver::addOrFindLocalPart(const XMLCh * qName)
273{
274        int length = XMLStringU::stringLenOrIndexOf<chColon>(qName);
275        if (qName[length] == chColon)
276        {
277                const XMLCh * localPart = &qName[length + 1];
278                length = XMLStringU::stringLen(localPart);
279                return addOrFindPrefix(localPart, length)->getKey();
280        }
281        else
282        {
283                return addOrFindPrefix(qName, length)->getKey();
284        }
285}
286
287/// -----------------------------------------------------------------------------------------------
288
289IDISA_ALWAYS_INLINE
290const XMLPrefixEntry *
291XMLNamespaceResolver::addOrFindPrefix(const XMLCh * prefix, const int length)
292{
293
294
295        // resolve the prefix id from the prefix; since this will not be required
296        // very often, just iterate through a list.
297        for (unsigned int index = 0; index < fPrefixCount; index++)
298        {
299                if (fPrefixList[index].equals(prefix, length))
300                {
301                        return &fPrefixList[index];
302                }
303        }
304
305        // if our prefix count equals our capacity, then we need to expand all of the related buffers
306        if (unlikely(fPrefixCount == fPrefixList.capacity()))
307        {
308                fPrefixList.expand(fPrefixList.capacity());
309        }
310
311        fNextUnreservedPrefixId = max(fNextUnreservedPrefixId, fNextUnreservedUriId);
312        const unsigned int prefixId = fNextUnreservedPrefixId++;
313
314        if (unlikely(fPrefixToNamespaceBindingTable.setCapacity() <= prefixId))
315        {
316                DEBUG_NAMESPACE_MESSAGE(" *** expanding prefix tables from " << fPrefixToNamespaceBindingTable.setCapacity() << " due to prefixId " << prefixId)
317                fCanonicalBindingSet.expand(prefixId);
318                fPrefixToNamespaceBindingTable.increaseSetCapacity(prefixId);
319                // fLocallyModifiedNamespaces.increaseBitCapacity(prefixId);
320        }
321
322        XMLPrefixEntry * entry = &fPrefixList[fPrefixCount++];
323        // QUESTION: does storing the prefixes in the string pool help or hinder? we can just point to
324        // the symbols and store only the prefix's length?
325        // NOTE: this may require modifying the TraverseSchema class to do this safely.
326    entry->key = fPrefixPool.insert(prefix, length);
327        entry->length = length;
328        entry->id = prefixId;
329
330        // add this entry to the canonical prefix set since we assume that the prefix and uri ids
331        // will match
332        fCanonicalBindingSet += prefixId;
333
334        return entry;
335}
336
337/// -----------------------------------------------------------------------------------------------
338
339IDISA_ALWAYS_INLINE
340void
341XMLNamespaceResolver::resolveUri(const XMLCh * uri, const XMLSize_t length, const unsigned int preferredUriId, unsigned int & localUriId, unsigned int & globalUriId)
342{
343    localUriId = fUriPool.find(uri, length);
344    if (unlikely(localUriId == -1))
345        {
346        localUriId = preferredUriId;
347                // is the reserved uri slot for this prefix still free?
348        if (unlikely(fUriPool[preferredUriId]->getKey() != NULL))
349                {
350                        // no, it was already used; claim the next unused uri id.
351                        fNextUnreservedUriId = max(fNextUnreservedUriId, fNextUnreservedPrefixId);
352                        localUriId = fNextUnreservedUriId++;
353                }
354
355        globalUriId = fGlobalUriPool->addOrFind(localUriId, uri, length, fUriPool);
356        }
357    else
358    {
359        globalUriId = fUriPool[localUriId]->getId();
360    }
361}
362
363/// -----------------------------------------------------------------------------------------------
364
365IDISA_ALWAYS_INLINE
366unsigned int
367XMLNamespaceResolver::mapPrefixToUri(const unsigned int prefixId, const unsigned int localUriId, const unsigned int globalUriId)
368{
369        unsigned int namespaceId;
370
371        // I want to reserve the LOCAL uriId slot for this prefix; not the GLOBAL uriId slot!
372
373        BindingSetIterator namespaceItr(fPrefixToNamespaceBindingTable.get(prefixId), fPrefixToNamespaceBindingTable.bitCapacity());
374
375        size_t count = 0;
376
377        while (namespaceItr.next())
378        {
379                count++;
380
381        if (getNamespaceUriId(namespaceItr.pos()) == globalUriId)
382                {
383                        namespaceId = namespaceItr.pos();
384
385                        goto FOUND_NAMESPACE_MAPPING;
386                }
387        }
388
389        namespaceId = fNextNamespaceId++;
390
391    if (unlikely(fCurrentlyVisibleNamespaces.capacity() <= namespaceId))
392        {
393                DEBUG_NAMESPACE_MESSAGE(" *** expanding namespace space due to namespaceId " << fNextNamespaceId);
394                fCurrentlyVisibleNamespaces.expand(namespaceId);
395                fCanonicalBindingSet.expand(namespaceId);               
396        fNamespaceToUriBindingTable.expand(namespaceId);
397        fNamespaceToPrefixBindingTable.expand(namespaceId);
398                fLocallyModifiedNamespaces.increaseBitCapacity(namespaceId);
399                fDistinctContextSet.increaseBitCapacity(namespaceId);
400        }
401
402FOUND_NAMESPACE_MAPPING:
403
404
405        // map this prefix to the namespace
406        fPrefixToNamespaceBindingTable[prefixId] += namespaceId;
407        fNamespaceToUriBindingTable[namespaceId] = globalUriId;
408    // and the URI back to the prefix
409    fNamespaceToPrefixBindingTable[namespaceId] = prefixId;
410
411        if (prefixId == localUriId)
412        {
413                // if prefix id == uri id == namespace id, then this entry is canonical (again?)
414                fCanonicalBindingSet += prefixId;
415        }
416        else
417        {
418                // else remove this prefix id from the canonical binding set (assuming it was ever in there)
419                fCanonicalBindingSet -= prefixId;
420        }
421
422        return namespaceId;
423}
424
425/// -----------------------------------------------------------------------------------------------
426
427IDISA_ALWAYS_INLINE
428unsigned int
429XMLNamespaceResolver::remapPrefixToUri(const unsigned int prefixId, const unsigned int localUriId, const unsigned int globalUriId)
430{
431    const int namespaceId = mapPrefixToUri(prefixId, localUriId, globalUriId);
432
433        const BindingSet modified =
434                (fCurrentlyVisibleNamespaces & fPrefixToNamespaceBindingTable.get(prefixId)) + namespaceId;
435
436        // mark any modified namespaces in the namespace 'stack'
437        fLocallyModifiedNamespaces[fCurrentScope] += modified;
438
439        // remove the old namespace/uri binding (if one exists) and add the new binding to the set of
440        // currently visible namespaces
441        fCurrentlyVisibleNamespaces ^= modified;
442
443        return namespaceId;
444}
445
446/// -----------------------------------------------------------------------------------------------
447
448IDISA_ALWAYS_INLINE
449void
450XMLNamespaceResolver::addPredefinedPrefix
451(
452        const unsigned int      id
453        , const XMLCh *         prefix
454        , const XMLSize_t       prefixLength
455        , const XMLCh *         uri
456        , const XMLSize_t       uriLength
457)
458{
459        DEBUG_MESSAGE("XMLNamespaceResolver::addPredefinedPrefix(" << id << ',' << prefix << ',' << uri << ')');
460
461        XMLPrefixEntry * entry = &fPrefixList[id];
462        // QUESTION: does storing the prefixes in the string pool help or hinder? we can just point to
463        // the symbols and store only the prefix's length?
464        // NOTE: this may require modifying the TraverseSchema class to do this safely.
465        entry->key = prefix; // _prefixAndUriStringPool.insert(prefix, length);
466        entry->length = prefixLength;
467        entry->id = id;
468
469        // create the namespace entry and add it
470        const unsigned int uriId = fGlobalUriPool->addOrFind(id, uri, uriLength, fUriPool);
471
472        assert (id == uriId);
473
474    mapPrefixToUri(id, id, id);
475}
476
477/// -----------------------------------------------------------------------------------------------
478/// NAMESPACE CONTEXT HANDLING
479/// -----------------------------------------------------------------------------------------------
480
481IDISA_ALWAYS_INLINE
482unsigned int
483XMLNamespaceResolver::getCurrentContextId()
484{
485        if (unlikely(!fLocallyModifiedNamespaces[fCurrentScope].isEmpty()))
486        {
487                for (unsigned int index = 0; index < fDistinctContextCount; index++)
488                {
489                        if (fDistinctContextSet[index] == fCurrentlyVisibleNamespaces)
490                        {
491                                fCurrentContextId = index;
492                                return index;
493                        }
494                }
495                if (unlikely(fDistinctContextCount == fDistinctContextSet.setCapacity()))
496                {
497                        fDistinctContextSet.increaseSetCapacity(fDistinctContextSet.setCapacity());
498                }
499
500                fCurrentContextId = fDistinctContextCount++;
501                fDistinctContextSet.set(fCurrentContextId, fCurrentlyVisibleNamespaces);
502        }
503        return fCurrentContextId;
504}
505
506/// -----------------------------------------------------------------------------------------------
507
508IDISA_ALWAYS_INLINE
509BindingSetIterator XMLNamespaceResolver::getNamespaceIterator(const unsigned int contextId) const
510{
511    return BindingSetIterator(fDistinctContextSet[contextId]);
512}
513
514/// -----------------------------------------------------------------------------------------------
515
516IDISA_ALWAYS_INLINE
517unsigned int
518XMLNamespaceResolver::getNamespaceUriId(const unsigned int namespaceId) const
519{
520    return fNamespaceToUriBindingTable[namespaceId];
521}
522
523/// -----------------------------------------------------------------------------------------------
524
525IDISA_ALWAYS_INLINE
526const XMLCh *
527XMLNamespaceResolver::getPrefixForNamespaceId(const unsigned int namespaceId) const
528{
529    return fPrefixList[fNamespaceToPrefixBindingTable[namespaceId]].getKey();
530}
531
532
533/// -----------------------------------------------------------------------------------------------
534
535IDISA_ALWAYS_INLINE
536bool
537XMLNamespaceResolver::isPrefixVisible(const XMLCh * prefix, const unsigned int contextId)
538{
539        const unsigned int prefixId = resolvePrefixId(prefix);
540        return isPrefixVisible(prefixId, contextId);
541}
542
543/// -----------------------------------------------------------------------------------------------
544
545IDISA_ALWAYS_INLINE
546bool
547XMLNamespaceResolver::isPrefixVisible(const unsigned int prefixId, const unsigned int contextId)
548{
549        return !(fDistinctContextSet[contextId] & fPrefixToNamespaceBindingTable.get(prefixId)).isEmpty();
550}
551
552/// -----------------------------------------------------------------------------------------------
553
554IDISA_ALWAYS_INLINE
555unsigned int
556XMLNamespaceResolver::getUriIdForPrefix(const XMLCh * prefix, const unsigned int contextId)
557{
558        return getUriIdForPrefix(resolvePrefixId(prefix), contextId);
559}
560
561/// -----------------------------------------------------------------------------------------------
562
563IDISA_ALWAYS_INLINE
564unsigned int
565XMLNamespaceResolver::getUriIdForPrefix(const unsigned int prefixId, const unsigned int contextId)
566{
567        FixedBindingSet bindings(fDistinctContextSet[contextId] & fPrefixToNamespaceBindingTable[prefixId]);
568        BindingSetIterator namespaceItr(bindings);
569        if (unlikely(!namespaceItr.first()))
570        {
571                return fUnknownUriId;
572        }
573        return fNamespaceToUriBindingTable[namespaceItr.pos()];
574}
575
576/// -----------------------------------------------------------------------------------------------
577
578IDISA_ALWAYS_INLINE
579const XMLCh *
580XMLNamespaceResolver::getUriForPrefix(const XMLCh * prefix, const unsigned int contextId)
581{
582        return getUriForPrefix(resolvePrefixId(prefix), contextId);
583}
584
585/// -----------------------------------------------------------------------------------------------
586
587IDISA_ALWAYS_INLINE
588const XMLCh *
589XMLNamespaceResolver::getUriForPrefix(const unsigned int prefixId, const unsigned int contextId)
590{
591        return getUriForId(getUriIdForPrefix(prefixId, contextId));
592}
593
594/// -----------------------------------------------------------------------------------------------
595
596#ifdef PRINT_DEBUG_MESSAGE
597static std::ostream & operator << (std::ostream & out, const XMLNamespaceResolver * resolver)
598{
599        if (resolver)
600        {
601                char leadingChar = '{';
602                const XMLCh * uri;
603
604                for (unsigned int uriId = 0; uriId < resolver->fNextUnreservedUriId; uriId++)
605                {
606                        if ((uri = (*resolver)[uriId]) != 0)
607                        {
608                                out << leadingChar << uriId << ':' << uri;
609                                leadingChar = ',';
610                        }
611                }
612                if (leadingChar != '{')
613                {
614                        out << '}';
615                }
616        }
617        return out;
618}
619#endif
620
621XERCES_CPP_NAMESPACE_END
Note: See TracBrowser for help on using the repository browser.