http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Release Info

Installation
Download
Build

FAQs
Samples
API Docs

DOM C++ Binding
Programming
Migration Guide

Feedback
Bug-Reporting
PDF Document

CVS Repository
Mail Archive

API Docs for SAX and DOM
 

Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

XMLChar.hpp

Go to the documentation of this file.
00001 /*
00002  * The Apache Software License, Version 1.1
00003  *
00004  * Copyright (c) 2002 The Apache Software Foundation.  All rights
00005  * reserved.
00006  *
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  *
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  *
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  *
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  *
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written
00029  *    permission, please contact apache\@apache.org.
00030  *
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  *
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  *
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  * $Log: XMLChar.hpp,v $
00059  * Revision 1.1  2002/12/20 22:10:21  tng
00060  * XML 1.1
00061  *
00062  */
00063 
00064 #if !defined(XMLCHAR_HPP)
00065 #define XMLCHAR_HPP
00066 
00067 #include <xercesc/util/XMLUniDefs.hpp>
00068 
00069 XERCES_CPP_NAMESPACE_BEGIN
00070 
00071 // ---------------------------------------------------------------------------
00072 //  This file defines Char and utility that conforms to XML 1.0 and XML 1.1
00073 // ---------------------------------------------------------------------------
00074 // Masks for the fgCharCharsTable1_0 array
00075 const XMLByte   gLetterCharMask             = 0x1;
00076 const XMLByte   gFirstNameCharMask          = 0x2;
00077 const XMLByte   gNameCharMask               = 0x4;
00078 const XMLByte   gPlainContentCharMask       = 0x8;
00079 const XMLByte   gSpecialStartTagCharMask    = 0x10;
00080 const XMLByte   gControlCharMask            = 0x20;
00081 const XMLByte   gXMLCharMask                = 0x40;
00082 const XMLByte   gWhitespaceCharMask         = 0x80;
00083 
00084 // ---------------------------------------------------------------------------
00085 //  This class is for XML 1.0
00086 // ---------------------------------------------------------------------------
00087 class  XMLChar1_0
00088 {
00089 public:
00090     // -----------------------------------------------------------------------
00091     //  Public, static methods, check the string
00092     // -----------------------------------------------------------------------
00093     static bool isAllSpaces
00094     (
00095         const   XMLCh* const    toCheck
00096         , const unsigned int    count
00097     );
00098 
00099     static bool containsWhiteSpace
00100     (
00101         const   XMLCh* const    toCheck
00102         , const unsigned int    count
00103     );
00104 
00105     static bool isValidName
00106     (
00107         const   XMLCh* const    toCheck
00108         , const unsigned int    count
00109     );
00110 
00111     static bool isValidNCName
00112     (
00113         const   XMLCh* const    toCheck
00114         , const unsigned int    count
00115     );
00116 
00117     static bool isValidQName
00118     (
00119         const   XMLCh* const    toCheck
00120         , const unsigned int    count
00121     );
00122 
00123     // -----------------------------------------------------------------------
00124     //  Public, static methods, check the XMLCh
00125     //  surrogate pair is assumed if second parameter is not null
00126     // -----------------------------------------------------------------------
00127     static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00128     static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00129     static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00130     static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00131     static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00132     static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00133     static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00134     static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00135 
00136     static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00137 
00138     // -----------------------------------------------------------------------
00139     //  Special Non-conformant Public, static methods
00140     // -----------------------------------------------------------------------
00144     static bool isNELRecognized();
00145 
00149     static void enableNELWS();
00150 
00151 private:
00152     // -----------------------------------------------------------------------
00153     //  Static data members
00154     //
00155     //  fgCharCharsTable1_0
00156     //      The character characteristics table. Bits in each byte, represent
00157     //      the characteristics of each character. It is generated via some
00158     //      code and then hard coded into the cpp file for speed.
00159     //
00160     //  fNEL
00161     //      Flag to respresents whether NEL and LSEP newline recognition is enabled
00162     //      or disabled
00163     // -----------------------------------------------------------------------
00164     static XMLByte  fgCharCharsTable1_0[0x10000];
00165     static bool     enableNEL;
00166 
00167     friend class XMLReader;
00168 };
00169 
00170 
00171 // ---------------------------------------------------------------------------
00172 //  XMLReader: Public, static methods
00173 // ---------------------------------------------------------------------------
00174 inline bool XMLChar1_0::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
00175 {
00176     if (!toCheck2)
00177         return ((fgCharCharsTable1_0[toCheck] & gLetterCharMask) != 0);
00178     return false;
00179 }
00180 
00181 inline bool XMLChar1_0::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00182 {
00183     if (!toCheck2)
00184         return ((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0);
00185     return false;
00186 }
00187 
00188 inline bool XMLChar1_0::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00189 {
00190     if (!toCheck2)
00191         return ((fgCharCharsTable1_0[toCheck] & gNameCharMask) != 0);
00192     return false;
00193 }
00194 
00195 inline bool XMLChar1_0::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
00196 {
00197     if (!toCheck2)
00198         return ((fgCharCharsTable1_0[toCheck] & gPlainContentCharMask) != 0);
00199     else {
00200         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00201            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00202                return true;
00203     }
00204     return false;
00205 }
00206 
00207 
00208 inline bool XMLChar1_0::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
00209 {
00210     if (!toCheck2)
00211         return ((fgCharCharsTable1_0[toCheck] & gSpecialStartTagCharMask) != 0);
00212     return false;
00213 }
00214 
00215 inline bool XMLChar1_0::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
00216 {
00217     if (!toCheck2)
00218         return ((fgCharCharsTable1_0[toCheck] & gXMLCharMask) != 0);
00219     else {
00220         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00221            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00222                return true;
00223     }
00224     return false;
00225 }
00226 
00227 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
00228 {
00229     if (!toCheck2)
00230         return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
00231     return false;
00232 }
00233 
00234 inline bool XMLChar1_0::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
00235 {
00236     if (!toCheck2)
00237         return ((fgCharCharsTable1_0[toCheck] & gControlCharMask) != 0);
00238     return false;
00239 }
00240 
00241 inline bool XMLChar1_0::isNELRecognized() {
00242 
00243     return enableNEL;
00244 }
00245 
00246 
00247 // ---------------------------------------------------------------------------
00248 //  This class is for XML 1.1
00249 // ---------------------------------------------------------------------------
00250 class  XMLChar1_1
00251 {
00252 public:
00253     // -----------------------------------------------------------------------
00254     //  Public, static methods, check the string
00255     // -----------------------------------------------------------------------
00256     static bool isAllSpaces
00257     (
00258         const   XMLCh* const    toCheck
00259         , const unsigned int    count
00260     );
00261 
00262     static bool containsWhiteSpace
00263     (
00264         const   XMLCh* const    toCheck
00265         , const unsigned int    count
00266     );
00267 
00268     static bool isValidName
00269     (
00270         const   XMLCh* const    toCheck
00271         , const unsigned int    count
00272     );
00273 
00274     static bool isValidNCName
00275     (
00276         const   XMLCh* const    toCheck
00277         , const unsigned int    count
00278     );
00279 
00280     static bool isValidQName
00281     (
00282         const   XMLCh* const    toCheck
00283         , const unsigned int    count
00284     );
00285 
00286     // -----------------------------------------------------------------------
00287     //  Public, static methods, check the XMLCh
00288     // -----------------------------------------------------------------------
00289     static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00290     static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00291     static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00292     static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00293     static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00294     static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00295     static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00296     static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00297 
00298     static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00299 
00300 private:
00301     // -----------------------------------------------------------------------
00302     //  Static data members
00303     //
00304     //  fgCharCharsTable1_1
00305     //      The character characteristics table. Bits in each byte, represent
00306     //      the characteristics of each character. It is generated via some
00307     //      code and then hard coded into the cpp file for speed.
00308     //
00309     // -----------------------------------------------------------------------
00310     static XMLByte  fgCharCharsTable1_1[0x10000];
00311 
00312     friend class XMLReader;
00313 };
00314 
00315 
00316 // ---------------------------------------------------------------------------
00317 //  XMLReader: Public, static methods
00318 // ---------------------------------------------------------------------------
00319 inline bool XMLChar1_1::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
00320 {
00321     if (!toCheck2)
00322         return ((fgCharCharsTable1_1[toCheck] & gLetterCharMask) != 0);
00323     return false;
00324 }
00325 
00326 inline bool XMLChar1_1::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00327 {
00328     if (!toCheck2)
00329         return ((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0);
00330     else {
00331         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00332            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00333                return true;
00334     }
00335     return false;
00336 }
00337 
00338 inline bool XMLChar1_1::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00339 {
00340     if (!toCheck2)
00341         return ((fgCharCharsTable1_1[toCheck] & gNameCharMask) != 0);
00342     else {
00343         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00344            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00345                return true;
00346     }
00347     return false;
00348 }
00349 
00350 inline bool XMLChar1_1::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
00351 {
00352     if (!toCheck2)
00353         return ((fgCharCharsTable1_1[toCheck] & gPlainContentCharMask) != 0);
00354     else {
00355         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00356            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00357                return true;
00358     }
00359     return false;
00360 }
00361 
00362 
00363 inline bool XMLChar1_1::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
00364 {
00365     if (!toCheck2)
00366         return ((fgCharCharsTable1_1[toCheck] & gSpecialStartTagCharMask) != 0);
00367     return false;
00368 }
00369 
00370 inline bool XMLChar1_1::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
00371 {
00372     if (!toCheck2)
00373         return ((fgCharCharsTable1_1[toCheck] & gXMLCharMask) != 0);
00374     else {
00375         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00376            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00377                return true;
00378     }
00379     return false;
00380 }
00381 
00382 inline bool XMLChar1_1::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
00383 {
00384     if (!toCheck2)
00385         return ((fgCharCharsTable1_1[toCheck] & gWhitespaceCharMask) != 0);
00386     return false;
00387 }
00388 
00389 inline bool XMLChar1_1::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
00390 {
00391     if (!toCheck2)
00392         return ((fgCharCharsTable1_1[toCheck] & gControlCharMask) != 0);
00393     return false;
00394 }
00395 
00396 
00397 XERCES_CPP_NAMESPACE_END
00398 
00399 #endif


Copyright © 2000 The Apache Software Foundation. All Rights Reserved.