説明を見る。00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 #ifndef PCRE_INTERNAL_H
00045 #define PCRE_INTERNAL_H
00046
00047
00048
00049 #define ctype_space 0x01
00050 #define ctype_xdigit 0x08
00051 #define ctype_word 0x10
00052
00053
00054
00055
00056 #define cbit_space 0
00057 #define cbit_digit 32
00058 #define cbit_word 64
00059 #define cbit_length 96
00060
00061
00062
00063
00064 #define lcc_offset 0
00065 #define fcc_offset 128
00066 #define cbits_offset 256
00067 #define ctypes_offset (cbits_offset + cbit_length)
00068 #define tables_length (ctypes_offset + 128)
00069
00070 #ifndef DFTABLES
00071
00072
00073 #define ASSERT(x) do { } while(0)
00074 #define ASSERT_NOT_REACHED() do {} while(0)
00075
00076 #ifdef WIN32
00077 #pragma warning(disable: 4232)
00078 #pragma warning(disable: 4244)
00079 #endif
00080
00081 #include "pcre.h"
00082
00083
00084
00085
00086
00087 #define LINK_SIZE 2
00088
00089
00090
00091 #if 0
00092 #define DEBUG
00093 #endif
00094
00095
00096
00097
00098
00099
00100 #ifdef DEBUG
00101 #define DPRINTF(p) printf p
00102 #else
00103 #define DPRINTF(p)
00104 #endif
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119 static inline void put2ByteValue(unsigned char* opcodePtr, int value)
00120 {
00121 ASSERT(value >= 0 && value <= 0xFFFF);
00122 opcodePtr[0] = value >> 8;
00123 opcodePtr[1] = value;
00124 }
00125
00126 static inline int get2ByteValue(const unsigned char* opcodePtr)
00127 {
00128 return (opcodePtr[0] << 8) | opcodePtr[1];
00129 }
00130
00131 static inline void put2ByteValueAndAdvance(unsigned char*& opcodePtr, int value)
00132 {
00133 put2ByteValue(opcodePtr, value);
00134 opcodePtr += 2;
00135 }
00136
00137 static inline void putLinkValueAllowZero(unsigned char* opcodePtr, int value)
00138 {
00139 put2ByteValue(opcodePtr, value);
00140 }
00141
00142 static inline int getLinkValueAllowZero(const unsigned char* opcodePtr)
00143 {
00144 return get2ByteValue(opcodePtr);
00145 }
00146
00147 #define MAX_PATTERN_SIZE (1 << 16)
00148
00149 static inline void putLinkValue(unsigned char* opcodePtr, int value)
00150 {
00151 ASSERT(value);
00152 putLinkValueAllowZero(opcodePtr, value);
00153 }
00154
00155 static inline int getLinkValue(const unsigned char* opcodePtr)
00156 {
00157 int value = getLinkValueAllowZero(opcodePtr);
00158 ASSERT(value);
00159 return value;
00160 }
00161
00162 static inline void putLinkValueAndAdvance(unsigned char*& opcodePtr, int value)
00163 {
00164 putLinkValue(opcodePtr, value);
00165 opcodePtr += LINK_SIZE;
00166 }
00167
00168 static inline void putLinkValueAllowZeroAndAdvance(unsigned char*& opcodePtr, int value)
00169 {
00170 putLinkValueAllowZero(opcodePtr, value);
00171 opcodePtr += LINK_SIZE;
00172 }
00173
00174
00175 enum RegExpOptions {
00176 UseFirstByteOptimizationOption = 0x40000000,
00177 UseRequiredByteOptimizationOption = 0x20000000,
00178 UseMultiLineFirstByteOptimizationOption = 0x10000000,
00179 IsAnchoredOption = 0x02000000,
00180 IgnoreCaseOption = 0x00000001,
00181 MatchAcrossMultipleLinesOption = 0x00000002
00182 };
00183
00184
00185
00186
00187 #define REQ_IGNORE_CASE 0x0100
00188 #define REQ_VARY 0x0200
00189
00190
00191
00192
00193
00194
00195 #define XCL_NOT 0x01
00196 #define XCL_MAP 0x02
00197
00198 #define XCL_END 0
00199 #define XCL_SINGLE 1
00200 #define XCL_RANGE 2
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212 enum { ESC_B = 1, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, ESC_REF };
00213
00214
00215
00216
00217
00218
00219
00220 #define FOR_EACH_OPCODE(macro) \
00221 macro(END) \
00222 \
00223 macro(NOT_WORD_BOUNDARY) \
00224 macro(WORD_BOUNDARY) \
00225 macro(NOT_DIGIT) \
00226 macro(DIGIT) \
00227 macro(NOT_WHITESPACE) \
00228 macro(WHITESPACE) \
00229 macro(NOT_WORDCHAR) \
00230 macro(WORDCHAR) \
00231 \
00232 macro(NOT_NEWLINE) \
00233 \
00234 macro(CIRC) \
00235 macro(DOLL) \
00236 macro(BOL) \
00237 macro(EOL) \
00238 macro(CHAR) \
00239 macro(CHAR_IGNORING_CASE) \
00240 macro(ASCII_CHAR) \
00241 macro(ASCII_LETTER_IGNORING_CASE) \
00242 macro(NOT) \
00243 \
00244 macro(STAR) \
00245 macro(MINSTAR) \
00246 macro(PLUS) \
00247 macro(MINPLUS) \
00248 macro(QUERY) \
00249 macro(MINQUERY) \
00250 macro(UPTO) \
00251 macro(MINUPTO) \
00252 macro(EXACT) \
00253 \
00254 macro(NOTSTAR) \
00255 macro(NOTMINSTAR) \
00256 macro(NOTPLUS) \
00257 macro(NOTMINPLUS) \
00258 macro(NOTQUERY) \
00259 macro(NOTMINQUERY) \
00260 macro(NOTUPTO) \
00261 macro(NOTMINUPTO) \
00262 macro(NOTEXACT) \
00263 \
00264 macro(TYPESTAR) \
00265 macro(TYPEMINSTAR) \
00266 macro(TYPEPLUS) \
00267 macro(TYPEMINPLUS) \
00268 macro(TYPEQUERY) \
00269 macro(TYPEMINQUERY) \
00270 macro(TYPEUPTO) \
00271 macro(TYPEMINUPTO) \
00272 macro(TYPEEXACT) \
00273 \
00274 macro(CRSTAR) \
00275 macro(CRMINSTAR) \
00276 macro(CRPLUS) \
00277 macro(CRMINPLUS) \
00278 macro(CRQUERY) \
00279 macro(CRMINQUERY) \
00280 macro(CRRANGE) \
00281 macro(CRMINRANGE) \
00282 \
00283 macro(CLASS) \
00284 macro(NCLASS) \
00285 macro(XCLASS) \
00286 \
00287 macro(REF) \
00288 \
00289 macro(ALT) \
00290 macro(KET) \
00291 macro(KETRMAX) \
00292 macro(KETRMIN) \
00293 \
00294 macro(ASSERT) \
00295 macro(ASSERT_NOT) \
00296 \
00297 macro(BRAZERO) \
00298 macro(BRAMINZERO) \
00299 macro(BRANUMBER) \
00300 macro(BRA)
00301
00302 #define OPCODE_ENUM_VALUE(opcode) OP_##opcode,
00303 enum { FOR_EACH_OPCODE(OPCODE_ENUM_VALUE) };
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318 #define EXTRACT_BASIC_MAX 100
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328 struct JSRegExp {
00329 unsigned options;
00330
00331 unsigned short top_bracket;
00332 unsigned short top_backref;
00333
00334 unsigned short first_byte;
00335 unsigned short req_byte;
00336 };
00337
00338
00339
00340
00341
00342
00343 #define kjs_pcre_utf8_table1_size 6
00344
00345 extern const int kjs_pcre_utf8_table1[6];
00346 extern const int kjs_pcre_utf8_table2[6];
00347 extern const int kjs_pcre_utf8_table3[6];
00348 extern const unsigned char kjs_pcre_utf8_table4[0x40];
00349
00350 extern const unsigned char kjs_pcre_default_tables[tables_length];
00351
00352 static inline unsigned char toLowerCase(unsigned char c)
00353 {
00354 static const unsigned char* lowerCaseChars = kjs_pcre_default_tables + lcc_offset;
00355 return lowerCaseChars[c];
00356 }
00357
00358 static inline unsigned char flipCase(unsigned char c)
00359 {
00360 static const unsigned char* flippedCaseChars = kjs_pcre_default_tables + fcc_offset;
00361 return flippedCaseChars[c];
00362 }
00363
00364 static inline unsigned char classBitmapForChar(unsigned char c)
00365 {
00366 static const unsigned char* charClassBitmaps = kjs_pcre_default_tables + cbits_offset;
00367 return charClassBitmaps[c];
00368 }
00369
00370 static inline unsigned char charTypeForChar(unsigned char c)
00371 {
00372 const unsigned char* charTypeMap = kjs_pcre_default_tables + ctypes_offset;
00373 return charTypeMap[c];
00374 }
00375
00376 static inline bool isWordChar(UChar c)
00377 {
00378 return c < 128 && (charTypeForChar(c) & ctype_word);
00379 }
00380
00381 static inline bool isSpaceChar(UChar c)
00382 {
00383 return (c < 128 && (charTypeForChar(c) & ctype_space));
00384 }
00385
00386 static inline bool isNewline(UChar nl)
00387 {
00388 return (nl == 0xA || nl == 0xD || nl == 0x2028 || nl == 0x2029);
00389 }
00390
00391 static inline bool isBracketStartOpcode(unsigned char opcode)
00392 {
00393 if (opcode >= OP_BRA)
00394 return true;
00395 switch (opcode) {
00396 case OP_ASSERT:
00397 case OP_ASSERT_NOT:
00398 return true;
00399 default:
00400 return false;
00401 }
00402 }
00403
00404 static inline void advanceToEndOfBracket(const unsigned char*& opcodePtr)
00405 {
00406 ASSERT(isBracketStartOpcode(*opcodePtr) || *opcodePtr == OP_ALT);
00407 do
00408 opcodePtr += getLinkValue(opcodePtr + 1);
00409 while (*opcodePtr == OP_ALT);
00410 }
00411
00412
00413
00414
00415
00416 extern int kjs_pcre_ucp_othercase(unsigned);
00417 extern bool kjs_pcre_xclass(int, const unsigned char*);
00418
00419 #endif
00420
00421 #endif
00422
00423