説明を見る。00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #ifndef V8_SCANNER_H_
00029 #define V8_SCANNER_H_
00030
00031 #include "token.h"
00032 #include "char-predicates-inl.h"
00033
00034 namespace v8 { namespace internal {
00035
00036
00037 class UTF8Buffer {
00038 public:
00039 UTF8Buffer();
00040 ~UTF8Buffer();
00041
00042 void Initialize(char* src, int length);
00043 void AddChar(uc32 c);
00044 void Reset() { pos_ = 0; }
00045 int pos() const { return pos_; }
00046 char* data() const { return data_; }
00047
00048 private:
00049 char* data_;
00050 int size_;
00051 int pos_;
00052 };
00053
00054
00055 class UTF16Buffer {
00056 public:
00057 UTF16Buffer();
00058
00059 void Initialize(Handle<String> data, unibrow::CharacterStream* stream);
00060 void PushBack(uc32 ch);
00061 uc32 Advance();
00062 uint16_t CharAt(int index);
00063 int pos() const { return pos_; }
00064 int size() const { return size_; }
00065 Handle<String> SubString(int start, int end);
00066 List<uc32>* pushback_buffer() { return &pushback_buffer_; }
00067 void SeekForward(int pos);
00068
00069 private:
00070 Handle<String> data_;
00071 int pos_;
00072 int size_;
00073 List<uc32> pushback_buffer_;
00074 uc32 last_;
00075 unibrow::CharacterStream* stream_;
00076 };
00077
00078
00079 class Scanner {
00080 public:
00081
00082 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
00083
00084
00085 explicit Scanner(bool is_pre_parsing);
00086
00087
00088 void Init(Handle<String> source,
00089 unibrow::CharacterStream* stream,
00090 int position);
00091
00092
00093 Token::Value Next();
00094
00095
00096 Token::Value peek() const { return next_.token; }
00097
00098
00099 bool has_line_terminator_before_next() const {
00100 return has_line_terminator_before_next_;
00101 }
00102
00103 struct Location {
00104 Location(int b, int e) : beg_pos(b), end_pos(e) { }
00105 Location() : beg_pos(0), end_pos(0) { }
00106 int beg_pos;
00107 int end_pos;
00108 };
00109
00110
00111
00112 Location location() const { return current_.location; }
00113 Location peek_location() const { return next_.location; }
00114
00115
00116
00117
00118
00119 const char* literal_string() const {
00120 return &literals_.data()[current_.literal_pos];
00121 }
00122 int literal_length() const {
00123 return current_.literal_end - current_.literal_pos;
00124 }
00125
00126 Vector<const char> next_literal() const {
00127 return Vector<const char>(next_literal_string(), next_literal_length());
00128 }
00129
00130
00131
00132 const char* next_literal_string() const {
00133 return &literals_.data()[next_.literal_pos];
00134 }
00135
00136
00137 int next_literal_length() const {
00138 return next_.literal_end - next_.literal_pos;
00139 }
00140
00141
00142
00143 bool ScanRegExpPattern(bool seen_equal);
00144
00145
00146 bool ScanRegExpFlags();
00147
00148
00149
00150
00151
00152 void SeekForward(int pos);
00153
00154 Handle<String> SubString(int start_pos, int end_pos);
00155 bool stack_overflow() { return stack_overflow_; }
00156
00157 static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; }
00158
00159
00160
00161 static bool IsIdentifier(unibrow::CharacterStream* buffer);
00162
00163 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
00164 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
00165 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
00166 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
00167
00168 private:
00169
00170 UTF16Buffer source_;
00171 int position_;
00172
00173
00174
00175 UTF8Buffer literals_;
00176
00177 bool stack_overflow_;
00178 static StaticResource<Utf8Decoder> utf8_decoder_;
00179
00180
00181 uc32 c0_;
00182
00183
00184 struct TokenDesc {
00185 Token::Value token;
00186 Location location;
00187 int literal_pos, literal_end;
00188 };
00189
00190 TokenDesc current_;
00191 TokenDesc next_;
00192 bool has_line_terminator_before_next_;
00193 bool is_pre_parsing_;
00194
00195 static const int kCharacterLookaheadBufferSize = 1;
00196
00197
00198 void StartLiteral();
00199 void AddChar(uc32 ch);
00200 void AddCharAdvance();
00201 void TerminateLiteral();
00202
00203
00204 void Advance();
00205 void PushBack(uc32 ch);
00206
00207 void SkipWhiteSpace(bool initial);
00208 Token::Value SkipSingleLineComment();
00209 Token::Value SkipMultiLineComment();
00210
00211 inline Token::Value Select(Token::Value tok);
00212 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_);
00213
00214 void Scan();
00215 Token::Value ScanToken();
00216 void ScanDecimalDigits();
00217 Token::Value ScanNumber(bool seen_period);
00218 Token::Value ScanIdentifier();
00219 uc32 ScanHexEscape(uc32 c, int length);
00220 uc32 ScanOctalEscape(uc32 c, int length);
00221 void ScanEscape();
00222 Token::Value ScanString();
00223
00224
00225 Token::Value ScanHtmlComment();
00226
00227
00228 int source_pos() {
00229 return source_.pos() - kCharacterLookaheadBufferSize + position_;
00230 }
00231
00232
00233
00234 uc32 ScanIdentifierUnicodeEscape();
00235 };
00236
00237 } }
00238
00239 #endif // V8_SCANNER_H_