00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include "v8.h"
00029
00030 #include "execution.h"
00031 #include "factory.h"
00032 #include "jsregexp.h"
00033 #include "platform.h"
00034 #include "runtime.h"
00035 #include "top.h"
00036 #include "compilation-cache.h"
00037
00038
00039
00040
00041 #ifdef DEBUG
00042 #include "third_party/jscre/pcre.h"
00043 #define DEBUG
00044 #else
00045 #include "third_party/jscre/pcre.h"
00046 #endif
00047
00048 namespace v8 { namespace internal {
00049
00050
00051 #define CAPTURE_INDEX 0
00052 #define INTERNAL_INDEX 1
00053
00054 static Failure* malloc_failure;
00055
00056 static void* JSREMalloc(size_t size) {
00057 Object* obj = Heap::AllocateByteArray(size);
00058
00059
00060
00061
00062 if (obj->IsFailure()) {
00063 malloc_failure = Failure::cast(obj);
00064 return NULL;
00065 }
00066
00067
00068
00069 return reinterpret_cast<void*>(ByteArray::cast(obj)->GetDataStartAddress());
00070 }
00071
00072
00073 static void JSREFree(void* p) {
00074 USE(p);
00075 }
00076
00077
00078 String* RegExpImpl::last_ascii_string_ = NULL;
00079 String* RegExpImpl::two_byte_cached_string_ = NULL;
00080
00081
00082 void RegExpImpl::NewSpaceCollectionPrologue() {
00083
00084
00085 if (Heap::InNewSpace(last_ascii_string_)) {
00086
00087 last_ascii_string_ = NULL;
00088 two_byte_cached_string_ = NULL;
00089 }
00090 }
00091
00092
00093 void RegExpImpl::OldSpaceCollectionPrologue() {
00094 last_ascii_string_ = NULL;
00095 two_byte_cached_string_ = NULL;
00096 }
00097
00098
00099 Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor,
00100 Handle<String> pattern,
00101 Handle<String> flags,
00102 bool* has_pending_exception) {
00103
00104 if (!constructor->IsLoaded()) {
00105 LoadLazy(constructor, has_pending_exception);
00106 if (*has_pending_exception) return Handle<Object>(Failure::Exception());
00107 }
00108
00109 Object** argv[2] = { Handle<Object>::cast(pattern).location(),
00110 Handle<Object>::cast(flags).location() };
00111 return Execution::New(constructor, 2, argv, has_pending_exception);
00112 }
00113
00114
00115
00116
00117 Handle<String> RegExpImpl::CachedStringToTwoByte(Handle<String> subject) {
00118 if (*subject == last_ascii_string_) {
00119 ASSERT(two_byte_cached_string_ != NULL);
00120 return Handle<String>(String::cast(two_byte_cached_string_));
00121 }
00122 Handle<String> two_byte_string = StringToTwoByte(subject);
00123 last_ascii_string_ = *subject;
00124 two_byte_cached_string_ = *two_byte_string;
00125 return two_byte_string;
00126 }
00127
00128
00129
00130
00131 Handle<String> RegExpImpl::StringToTwoByte(Handle<String> pattern) {
00132 if (!pattern->IsFlat()) {
00133 FlattenString(pattern);
00134 }
00135 Handle<String> flat_string(pattern->IsConsString() ?
00136 String::cast(ConsString::cast(*pattern)->first()) :
00137 *pattern);
00138 ASSERT(!flat_string->IsConsString());
00139 ASSERT(flat_string->IsSeqString() || flat_string->IsSlicedString() ||
00140 flat_string->IsExternalString());
00141 if (!flat_string->IsAsciiRepresentation()) {
00142 return flat_string;
00143 }
00144
00145 Handle<String> two_byte_string =
00146 Factory::NewRawTwoByteString(flat_string->length(), TENURED);
00147 static StringInputBuffer convert_to_two_byte_buffer;
00148 convert_to_two_byte_buffer.Reset(*flat_string);
00149 for (int i = 0; convert_to_two_byte_buffer.has_more(); i++) {
00150 two_byte_string->Set(i, convert_to_two_byte_buffer.GetNext());
00151 }
00152 return two_byte_string;
00153 }
00154
00155
00156 static JSRegExp::Flags RegExpFlagsFromString(Handle<String> str) {
00157 int flags = JSRegExp::NONE;
00158 for (int i = 0; i < str->length(); i++) {
00159 switch (str->Get(i)) {
00160 case 'i':
00161 flags |= JSRegExp::IGNORE_CASE;
00162 break;
00163 case 'g':
00164 flags |= JSRegExp::GLOBAL;
00165 break;
00166 case 'm':
00167 flags |= JSRegExp::MULTILINE;
00168 break;
00169 }
00170 }
00171 return JSRegExp::Flags(flags);
00172 }
00173
00174
00175 unibrow::Predicate<unibrow::RegExpSpecialChar, 128> is_reg_exp_special_char;
00176
00177
00178 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
00179 Handle<String> pattern,
00180 Handle<String> flag_str) {
00181 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
00182 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags);
00183 bool in_cache = !cached.is_null();
00184 Handle<Object> result;
00185 if (in_cache) {
00186 re->set_data(*cached);
00187 result = re;
00188 } else {
00189 bool is_atom = !flags.is_ignore_case();
00190 for (int i = 0; is_atom && i < pattern->length(); i++) {
00191 if (is_reg_exp_special_char.get(pattern->Get(i)))
00192 is_atom = false;
00193 }
00194 if (is_atom) {
00195 result = AtomCompile(re, pattern, flags);
00196 } else {
00197 result = JsreCompile(re, pattern, flags);
00198 }
00199 Object* data = re->data();
00200 if (data->IsFixedArray()) {
00201
00202
00203 Handle<FixedArray> data(FixedArray::cast(re->data()));
00204 CompilationCache::PutRegExp(pattern, flags, data);
00205 }
00206 }
00207
00208 LOG(RegExpCompileEvent(re, in_cache));
00209 return result;
00210 }
00211
00212
00213 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
00214 Handle<String> subject,
00215 Handle<Object> index) {
00216 switch (regexp->TypeTag()) {
00217 case JSRegExp::JSCRE:
00218 return JsreExec(regexp, subject, index);
00219 case JSRegExp::ATOM:
00220 return AtomExec(regexp, subject, index);
00221 default:
00222 UNREACHABLE();
00223 return Handle<Object>();
00224 }
00225 }
00226
00227
00228 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
00229 Handle<String> subject) {
00230 switch (regexp->TypeTag()) {
00231 case JSRegExp::JSCRE:
00232 return JsreExecGlobal(regexp, subject);
00233 case JSRegExp::ATOM:
00234 return AtomExecGlobal(regexp, subject);
00235 default:
00236 UNREACHABLE();
00237 return Handle<Object>();
00238 }
00239 }
00240
00241
00242 Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
00243 Handle<String> pattern,
00244 JSRegExp::Flags flags) {
00245 Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, pattern);
00246 return re;
00247 }
00248
00249
00250 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
00251 Handle<String> subject,
00252 Handle<Object> index) {
00253 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
00254
00255 uint32_t start_index;
00256 if (!Array::IndexFromObject(*index, &start_index)) {
00257 return Handle<Smi>(Smi::FromInt(-1));
00258 }
00259
00260 LOG(RegExpExecEvent(re, start_index, subject));
00261 int value = Runtime::StringMatch(subject, needle, start_index);
00262 if (value == -1) return Factory::null_value();
00263
00264 Handle<FixedArray> array = Factory::NewFixedArray(2);
00265 array->set(0,
00266 Smi::FromInt(value),
00267 SKIP_WRITE_BARRIER);
00268 array->set(1,
00269 Smi::FromInt(value + needle->length()),
00270 SKIP_WRITE_BARRIER);
00271 return Factory::NewJSArrayWithElements(array);
00272 }
00273
00274
00275 Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
00276 Handle<String> subject) {
00277 Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
00278 Handle<JSArray> result = Factory::NewJSArray(1);
00279 int index = 0;
00280 int match_count = 0;
00281 int subject_length = subject->length();
00282 int needle_length = needle->length();
00283 while (true) {
00284 LOG(RegExpExecEvent(re, index, subject));
00285 int value = -1;
00286 if (index + needle_length <= subject_length) {
00287 value = Runtime::StringMatch(subject, needle, index);
00288 }
00289 if (value == -1) break;
00290 HandleScope scope;
00291 int end = value + needle_length;
00292
00293 Handle<FixedArray> array = Factory::NewFixedArray(2);
00294 array->set(0,
00295 Smi::FromInt(value),
00296 SKIP_WRITE_BARRIER);
00297 array->set(1,
00298 Smi::FromInt(end),
00299 SKIP_WRITE_BARRIER);
00300 Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
00301 SetElement(result, match_count, pair);
00302 match_count++;
00303 index = end;
00304 if (needle_length == 0) index++;
00305 }
00306 return result;
00307 }
00308
00309
00310 static inline Object* DoCompile(String* pattern,
00311 JSRegExp::Flags flags,
00312 unsigned* number_of_captures,
00313 const char** error_message,
00314 JscreRegExp** code) {
00315 JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()
00316 ? JSRegExpIgnoreCase
00317 : JSRegExpDoNotIgnoreCase;
00318 JSRegExpMultilineOption multiline_option = flags.is_multiline()
00319 ? JSRegExpMultiline
00320 : JSRegExpSingleLine;
00321 *error_message = NULL;
00322 malloc_failure = Failure::Exception();
00323 *code = jsRegExpCompile(pattern->GetTwoByteData(),
00324 pattern->length(),
00325 case_option,
00326 multiline_option,
00327 number_of_captures,
00328 error_message,
00329 &JSREMalloc,
00330 &JSREFree);
00331 if (*code == NULL && (malloc_failure->IsRetryAfterGC() ||
00332 malloc_failure->IsOutOfMemoryFailure())) {
00333 return malloc_failure;
00334 } else {
00335
00336
00337
00338 return pattern;
00339 }
00340 }
00341
00342
00343 void CompileWithRetryAfterGC(Handle<String> pattern,
00344 JSRegExp::Flags flags,
00345 unsigned* number_of_captures,
00346 const char** error_message,
00347 JscreRegExp** code) {
00348 CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern,
00349 flags,
00350 number_of_captures,
00351 error_message,
00352 code));
00353 }
00354
00355
00356 Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re,
00357 Handle<String> pattern,
00358 JSRegExp::Flags flags) {
00359 Handle<String> two_byte_pattern = StringToTwoByte(pattern);
00360
00361 unsigned number_of_captures;
00362 const char* error_message = NULL;
00363
00364 JscreRegExp* code = NULL;
00365 FlattenString(pattern);
00366
00367 CompileWithRetryAfterGC(two_byte_pattern,
00368 flags,
00369 &number_of_captures,
00370 &error_message,
00371 &code);
00372
00373 if (code == NULL) {
00374
00375 Handle<JSArray> array = Factory::NewJSArray(2);
00376 SetElement(array, 0, pattern);
00377 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(
00378 (error_message == NULL) ? "Unknown regexp error" : error_message)));
00379 Handle<Object> regexp_err =
00380 Factory::NewSyntaxError("malformed_regexp", array);
00381 return Handle<Object>(Top::Throw(*regexp_err));
00382 }
00383
00384
00385 Handle<ByteArray> internal(
00386 ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));
00387
00388 Handle<FixedArray> value = Factory::NewFixedArray(2);
00389 value->set(CAPTURE_INDEX, Smi::FromInt(number_of_captures));
00390 value->set(INTERNAL_INDEX, *internal);
00391 Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
00392
00393 return re;
00394 }
00395
00396
00397 Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSRegExp> regexp,
00398 int num_captures,
00399 Handle<String> subject,
00400 int previous_index,
00401 const uc16* two_byte_subject,
00402 int* offsets_vector,
00403 int offsets_vector_length) {
00404 int rc;
00405 {
00406 AssertNoAllocation a;
00407 ByteArray* internal = JsreInternal(regexp);
00408 const JscreRegExp* js_regexp =
00409 reinterpret_cast<JscreRegExp*>(internal->GetDataStartAddress());
00410
00411 LOG(RegExpExecEvent(regexp, previous_index, subject));
00412
00413 rc = jsRegExpExecute(js_regexp,
00414 two_byte_subject,
00415 subject->length(),
00416 previous_index,
00417 offsets_vector,
00418 offsets_vector_length);
00419 }
00420
00421
00422
00423 if (rc == JSRegExpErrorNoMatch
00424 || rc == JSRegExpErrorHitLimit) {
00425 return Factory::null_value();
00426 }
00427
00428
00429 if (rc < 0) {
00430
00431 Handle<Object> code(Smi::FromInt(rc));
00432 Handle<Object> args[2] = { Factory::LookupAsciiSymbol("jsre_exec"), code };
00433 Handle<Object> regexp_err(
00434 Factory::NewTypeError("jsre_error", HandleVector(args, 2)));
00435 return Handle<Object>(Top::Throw(*regexp_err));
00436 }
00437
00438 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
00439
00440 for (int i = 0; i < 2 * (num_captures+1); i += 2) {
00441 array->set(i,
00442 Smi::FromInt(offsets_vector[i]),
00443 SKIP_WRITE_BARRIER);
00444 array->set(i+1,
00445 Smi::FromInt(offsets_vector[i+1]),
00446 SKIP_WRITE_BARRIER);
00447 }
00448 return Factory::NewJSArrayWithElements(array);
00449 }
00450
00451
00452 class OffsetsVector {
00453 public:
00454 inline OffsetsVector(int num_captures) {
00455 offsets_vector_length_ = (num_captures + 1) * 3;
00456 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
00457 vector_ = NewArray<int>(offsets_vector_length_);
00458 } else {
00459 vector_ = static_offsets_vector_;
00460 }
00461 }
00462
00463
00464 inline ~OffsetsVector() {
00465 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
00466 DeleteArray(vector_);
00467 vector_ = NULL;
00468 }
00469 }
00470
00471
00472 inline int* vector() {
00473 return vector_;
00474 }
00475
00476
00477 inline int length() {
00478 return offsets_vector_length_;
00479 }
00480
00481 private:
00482 int* vector_;
00483 int offsets_vector_length_;
00484 static const int kStaticOffsetsVectorSize = 30;
00485 static int static_offsets_vector_[kStaticOffsetsVectorSize];
00486 };
00487
00488
00489 int OffsetsVector::static_offsets_vector_[
00490 OffsetsVector::kStaticOffsetsVectorSize];
00491
00492
00493 Handle<Object> RegExpImpl::JsreExec(Handle<JSRegExp> regexp,
00494 Handle<String> subject,
00495 Handle<Object> index) {
00496
00497 int num_captures = JsreCapture(regexp);
00498
00499 OffsetsVector offsets(num_captures);
00500
00501 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
00502
00503 Handle<String> subject16 = CachedStringToTwoByte(subject);
00504
00505 Handle<Object> result(JsreExecOnce(regexp, num_captures, subject,
00506 previous_index,
00507 subject16->GetTwoByteData(),
00508 offsets.vector(), offsets.length()));
00509
00510 return result;
00511 }
00512
00513
00514 Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp,
00515 Handle<String> subject) {
00516
00517 int num_captures = JsreCapture(regexp);
00518
00519 OffsetsVector offsets(num_captures);
00520
00521 int previous_index = 0;
00522
00523 Handle<JSArray> result = Factory::NewJSArray(0);
00524 int i = 0;
00525 Handle<Object> matches;
00526
00527 Handle<String> subject16 = CachedStringToTwoByte(subject);
00528
00529 do {
00530 if (previous_index > subject->length() || previous_index < 0) {
00531
00532
00533 matches = Factory::null_value();
00534 } else {
00535 matches = JsreExecOnce(regexp, num_captures, subject, previous_index,
00536 subject16->GetTwoByteData(),
00537 offsets.vector(), offsets.length());
00538
00539 if (matches->IsJSArray()) {
00540 SetElement(result, i, matches);
00541 i++;
00542 previous_index = offsets.vector()[1];
00543 if (offsets.vector()[0] == offsets.vector()[1]) {
00544 previous_index++;
00545 }
00546 }
00547 }
00548 } while (matches->IsJSArray());
00549
00550
00551 if (matches->IsNull()) {
00552 return result;
00553 } else {
00554 return matches;
00555 }
00556 }
00557
00558
00559 int RegExpImpl::JsreCapture(Handle<JSRegExp> re) {
00560 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
00561 return Smi::cast(value->get(CAPTURE_INDEX))->value();
00562 }
00563
00564
00565 ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) {
00566 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
00567 return ByteArray::cast(value->get(INTERNAL_INDEX));
00568 }
00569
00570 }}