00001 /* This is JavaScriptCore's variant of the PCRE library. While this library 00002 started out as a copy of PCRE, many of the features of PCRE have been 00003 removed. This library now supports only the regular expression features 00004 required by the JavaScript language specification, and has only the functions 00005 needed by JavaScriptCore and the rest of WebKit. 00006 00007 Originally written by Philip Hazel 00008 Copyright (c) 1997-2006 University of Cambridge 00009 Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved. 00010 00011 ----------------------------------------------------------------------------- 00012 Redistribution and use in source and binary forms, with or without 00013 modification, are permitted provided that the following conditions are met: 00014 00015 * Redistributions of source code must retain the above copyright notice, 00016 this list of conditions and the following disclaimer. 00017 00018 * Redistributions in binary form must reproduce the above copyright 00019 notice, this list of conditions and the following disclaimer in the 00020 documentation and/or other materials provided with the distribution. 00021 00022 * Neither the name of the University of Cambridge nor the names of its 00023 contributors may be used to endorse or promote products derived from 00024 this software without specific prior written permission. 00025 00026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00027 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00028 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00029 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 00030 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 00031 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 00032 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 00033 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 00034 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 00035 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00036 POSSIBILITY OF SUCH DAMAGE. 00037 ----------------------------------------------------------------------------- 00038 */ 00039 00040 /* This module contains an internal function that is used to match an extended 00041 class (one that contains characters whose values are > 255). */ 00042 00043 #include "pcre_internal.h" 00044 00045 /************************************************* 00046 * Match character against an XCLASS * 00047 *************************************************/ 00048 00049 /* This function is called to match a character against an extended class that 00050 might contain values > 255. 00051 00052 Arguments: 00053 c the character 00054 data points to the flag byte of the XCLASS data 00055 00056 Returns: true if character matches, else false 00057 */ 00058 00059 /* Get the next UTF-8 character, advancing the pointer. This is called when we 00060 know we are in UTF-8 mode. */ 00061 00062 static inline void getUTF8CharAndAdvancePointer(int& c, const unsigned char*& subjectPtr) 00063 { 00064 c = *subjectPtr++; 00065 if ((c & 0xc0) == 0xc0) { 00066 int gcaa = kjs_pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ 00067 int gcss = 6 * gcaa; 00068 c = (c & kjs_pcre_utf8_table3[gcaa]) << gcss; 00069 while (gcaa-- > 0) { 00070 gcss -= 6; 00071 c |= (*subjectPtr++ & 0x3f) << gcss; 00072 } 00073 } 00074 } 00075 00076 bool kjs_pcre_xclass(int c, const unsigned char* data) 00077 { 00078 bool negated = (*data & XCL_NOT); 00079 00080 /* Character values < 256 are matched against a bitmap, if one is present. If 00081 not, we still carry on, because there may be ranges that start below 256 in the 00082 additional data. */ 00083 00084 if (c < 256) { 00085 if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0) 00086 return !negated; /* char found */ 00087 } 00088 00089 /* First skip the bit map if present. Then match against the list of Unicode 00090 properties or large chars or ranges that end with a large char. We won't ever 00091 encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */ 00092 00093 if ((*data++ & XCL_MAP) != 0) 00094 data += 32; 00095 00096 int t; 00097 while ((t = *data++) != XCL_END) { 00098 if (t == XCL_SINGLE) { 00099 int x; 00100 getUTF8CharAndAdvancePointer(x, data); 00101 if (c == x) 00102 return !negated; 00103 } 00104 else if (t == XCL_RANGE) { 00105 int x, y; 00106 getUTF8CharAndAdvancePointer(x, data); 00107 getUTF8CharAndAdvancePointer(y, data); 00108 if (c >= x && c <= y) 00109 return !negated; 00110 } 00111 } 00112 00113 return negated; /* char did not match */ 00114 }