Brian Silverman | 70325d6 | 2015-09-20 17:00:43 -0400 | [diff] [blame^] | 1 | // Copyright (c) 2007, Google Inc. |
| 2 | // All rights reserved. |
| 3 | // |
| 4 | // Redistribution and use in source and binary forms, with or without |
| 5 | // modification, are permitted provided that the following conditions are |
| 6 | // met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright |
| 9 | // notice, this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above |
| 11 | // copyright notice, this list of conditions and the following disclaimer |
| 12 | // in the documentation and/or other materials provided with the |
| 13 | // distribution. |
| 14 | // * Neither the name of Google Inc. nor the names of its |
| 15 | // contributors may be used to endorse or promote products derived from |
| 16 | // this software without specific prior written permission. |
| 17 | // |
| 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | // --- |
| 30 | // Author: falmeida@google.com (Filipe Almeida) |
| 31 | // |
| 32 | // Verify at different points during HTML processing that the parser is in the |
| 33 | // correct state. |
| 34 | // |
| 35 | // The annotated file consists of regular html blocks and html processing |
| 36 | // instructions with a target name of "state" and a list of comma separated key |
| 37 | // value pairs describing the expected state or invoking a parser method. |
| 38 | // Example: |
| 39 | // |
| 40 | // <html><body><?state state=text, tag=body ?> |
| 41 | // |
| 42 | // For a more detailed explanation of the acceptable values please consult |
| 43 | // htmlparser_cpp.h. Following is a list of the possible keys: |
| 44 | // |
| 45 | // state: Current parser state as returned by HtmlParser::state(). |
| 46 | // Possible values: text, tag, attr, value, comment or error. |
| 47 | // tag: Current tag name as returned by HtmlParser::tag() |
| 48 | // attr: Current attribute name as returned by HtmlParser::attr() |
| 49 | // attr_type: Current attribute type as returned by HtmlParser::attr_type() |
| 50 | // Possible values: none, regular, uri, js or style. |
| 51 | // attr_quoted: True if the attribute is quoted, false if it's not. |
| 52 | // in_js: True if currently processing javascript (either an attribute value |
| 53 | // that expects javascript, a script block or the parser being in |
| 54 | // MODE_JS) |
| 55 | // js_quoted: True if inside a javascript string literal. |
| 56 | // js_state: Current javascript state as returned by |
| 57 | // HtmlParser::javascript_state(). |
| 58 | // Possible values: text, q, dq, regexp or comment. |
| 59 | // in_css: True if currently inside a CSS section or attribute. |
| 60 | // line_number: Integer value containing the current line count. |
| 61 | // column_number: Integer value containing the current column count. |
| 62 | // value_index: Integer value containing the current character index in the |
| 63 | // current value starting from 0. |
| 64 | // is_url_start: True if if this is the first character of a url attribute. |
| 65 | // reset: If true, resets the parser state to it's initial values. |
| 66 | // reset_mode: Similar to reset but receives an argument that changes the |
| 67 | // parser mode into either mode html or mode js. |
| 68 | // insert_text: Executes HtmlParser::InsertText() if the argument is true. |
| 69 | |
| 70 | #include "config_for_unittests.h" |
| 71 | #include <stdlib.h> |
| 72 | #include <stdio.h> |
| 73 | #include <string.h> |
| 74 | #include <string> |
| 75 | #include <utility> |
| 76 | #include <vector> |
| 77 | #include <map> |
| 78 | #include "htmlparser/htmlparser_cpp.h" |
| 79 | #include "ctemplate/template_pathops.h" |
| 80 | #include "base/util.h" |
| 81 | |
| 82 | #define FAIL() EXPECT_TRUE(false) |
| 83 | TEST_INIT // Among other things, defines RUN_ALL_TESTS |
| 84 | |
| 85 | using std::map; |
| 86 | using std::pair; |
| 87 | using std::string; |
| 88 | using std::vector; |
| 89 | using GOOGLE_NAMESPACE::PathJoin; |
| 90 | |
| 91 | namespace ctemplate_htmlparser { |
| 92 | |
| 93 | // Maximum file size limit. |
| 94 | static const int kMaxFileSize = 1000000; |
| 95 | |
| 96 | static void ReadToString(const char* filename, string* s) { |
| 97 | const int bufsize = 8092; |
| 98 | char buffer[bufsize]; |
| 99 | size_t n; |
| 100 | FILE* fp = fopen(filename, "rb"); |
| 101 | if (!fp) PFATAL(filename); |
| 102 | while ((n=fread(buffer, 1, bufsize, fp)) > 0) { |
| 103 | if (ferror(fp)) PFATAL(filename); |
| 104 | s->append(string(buffer, n)); |
| 105 | } |
| 106 | fclose(fp); |
| 107 | } |
| 108 | |
| 109 | class HtmlparserCppTest : public testing::Test { |
| 110 | protected: |
| 111 | |
| 112 | typedef map<string, HtmlParser *> ContextMap; |
| 113 | |
| 114 | // Structure that stores the mapping between an id and a name. |
| 115 | struct IdNameMap { |
| 116 | int id; |
| 117 | const char *name; |
| 118 | }; |
| 119 | |
| 120 | // Mapping between the enum and the string representation of the state. |
| 121 | static const struct IdNameMap kStateMap[]; |
| 122 | |
| 123 | // Mapping between the enum and the string representation of the javascript |
| 124 | // state. |
| 125 | static const struct IdNameMap kJavascriptStateMap[]; |
| 126 | |
| 127 | // Mapping between the enum and the string representation of the attribute |
| 128 | // type. |
| 129 | static const struct IdNameMap kAttributeTypeMap[]; |
| 130 | |
| 131 | // Mapping between the enum and the string representation of the reset mode. |
| 132 | static const struct IdNameMap kResetModeMap[]; |
| 133 | |
| 134 | // String that marks the start of an annotation. |
| 135 | static const char kDirectiveBegin[]; |
| 136 | |
| 137 | // String that marks the end of an annotation. |
| 138 | static const char kDirectiveEnd[]; |
| 139 | |
| 140 | // Count the number of lines in a string. |
| 141 | static int UpdateLines(const string &str, int line); |
| 142 | |
| 143 | // Count the number of columns in a string. |
| 144 | static int UpdateColumns(const string &str, int column); |
| 145 | |
| 146 | // Converts a string to a boolean. |
| 147 | static bool StringToBool(const string &value); |
| 148 | |
| 149 | // Returns the name of the corresponding enum_id by consulting an array of |
| 150 | // type IdNameMap. |
| 151 | const char *IdToName(const struct IdNameMap *list, int enum_id); |
| 152 | |
| 153 | // Returns the enum_id of the correspondent name by consulting an array of |
| 154 | // type IdNameMap. |
| 155 | int NameToId(const struct IdNameMap *list, const string &name); |
| 156 | |
| 157 | // Reads the filename of an annotated html file and validates the |
| 158 | // annotations against the html parser state. |
| 159 | void ValidateFile(string filename); |
| 160 | |
| 161 | // Validate an annotation string against the current parser state. |
| 162 | void ProcessAnnotation(const string &dir); |
| 163 | |
| 164 | // Validate the parser state against the provided state. |
| 165 | void ValidateState(const string &tag); |
| 166 | |
| 167 | // Validate the parser tag name against the provided tag name. |
| 168 | void ValidateTag(const string &tag); |
| 169 | |
| 170 | // Validate the parser attribute name against the provided attribute name. |
| 171 | void ValidateAttribute(const string &attr); |
| 172 | |
| 173 | // Validate the parser attribute value contents against the provided string. |
| 174 | void ValidateValue(const string &contents); |
| 175 | |
| 176 | // Validate the parser attribute type against the provided attribute type. |
| 177 | void ValidateAttributeType(const string &attr); |
| 178 | |
| 179 | // Validate the parser attribute quoted state against the provided |
| 180 | // boolean. |
| 181 | void ValidateAttributeQuoted(const string "ed); |
| 182 | |
| 183 | // Validates the parser in javascript state against the provided boolean. |
| 184 | void ValidateInJavascript(const string "ed); |
| 185 | |
| 186 | // Validate the current parser javascript quoted state against the provided |
| 187 | // boolean. |
| 188 | void ValidateJavascriptQuoted(const string "ed); |
| 189 | |
| 190 | // Validate the javascript parser state against the provided state. |
| 191 | void ValidateJavascriptState(const string &expected_state); |
| 192 | |
| 193 | // Validates the parser css state against the provided boolean. |
| 194 | void ValidateInCss(const string "ed); |
| 195 | |
| 196 | // Validate the line count against the expected count. |
| 197 | void ValidateLine(const string &expected_line); |
| 198 | |
| 199 | // Validate the line count against the expected count. |
| 200 | void ValidateColumn(const string &expected_column); |
| 201 | |
| 202 | // Validate the current parser value index against the provided index. |
| 203 | void ValidateValueIndex(const string &value_index); |
| 204 | |
| 205 | // Validate the parser is_url_start value against the provided one. |
| 206 | void ValidateIsUrlStart(const string &expected_is_url_start); |
| 207 | |
| 208 | void SetUp() { |
| 209 | parser_.Reset(); |
| 210 | } |
| 211 | |
| 212 | void TearDown() { |
| 213 | // Delete all parser instances from the context map |
| 214 | for (ContextMap::iterator iter = contextMap.begin(); |
| 215 | iter != contextMap.end(); ++iter) { |
| 216 | delete iter->second; |
| 217 | } |
| 218 | contextMap.clear(); |
| 219 | } |
| 220 | |
| 221 | // Map containing the registers where the parser context is saved. |
| 222 | ContextMap contextMap; |
| 223 | |
| 224 | // Parser instance |
| 225 | HtmlParser parser_; |
| 226 | |
| 227 | friend class Test_HtmlparserTest_TestFiles; |
| 228 | }; |
| 229 | |
| 230 | const char HtmlparserCppTest::kDirectiveBegin[] = "<?state"; |
| 231 | const char HtmlparserCppTest::kDirectiveEnd[] = "?>"; |
| 232 | |
| 233 | const struct HtmlparserCppTest::IdNameMap |
| 234 | HtmlparserCppTest::kStateMap[] = { |
| 235 | { HtmlParser::STATE_TEXT, "text" }, |
| 236 | { HtmlParser::STATE_TAG, "tag" }, |
| 237 | { HtmlParser::STATE_ATTR, "attr" }, |
| 238 | { HtmlParser::STATE_VALUE, "value" }, |
| 239 | { HtmlParser::STATE_COMMENT, "comment" }, |
| 240 | { HtmlParser::STATE_JS_FILE, "js_file" }, |
| 241 | { HtmlParser::STATE_CSS_FILE, "css_file" }, |
| 242 | { HtmlParser::STATE_ERROR, "error" }, |
| 243 | { 0, NULL } |
| 244 | }; |
| 245 | |
| 246 | const struct HtmlparserCppTest::IdNameMap |
| 247 | HtmlparserCppTest::kAttributeTypeMap[] = { |
| 248 | { HtmlParser::ATTR_NONE, "none" }, |
| 249 | { HtmlParser::ATTR_REGULAR, "regular" }, |
| 250 | { HtmlParser::ATTR_URI, "uri" }, |
| 251 | { HtmlParser::ATTR_JS, "js" }, |
| 252 | { HtmlParser::ATTR_STYLE, "style" }, |
| 253 | { 0, NULL } |
| 254 | }; |
| 255 | |
| 256 | const struct HtmlparserCppTest::IdNameMap |
| 257 | HtmlparserCppTest::kJavascriptStateMap[] = { |
| 258 | { JavascriptParser::STATE_TEXT, "text" }, |
| 259 | { JavascriptParser::STATE_Q, "q" }, |
| 260 | { JavascriptParser::STATE_DQ, "dq" }, |
| 261 | { JavascriptParser::STATE_REGEXP, "regexp" }, |
| 262 | { JavascriptParser::STATE_COMMENT, "comment" }, |
| 263 | { 0, NULL } |
| 264 | }; |
| 265 | |
| 266 | const struct HtmlparserCppTest::IdNameMap |
| 267 | HtmlparserCppTest::kResetModeMap[] = { |
| 268 | { HtmlParser::MODE_HTML, "html" }, |
| 269 | { HtmlParser::MODE_JS, "js" }, |
| 270 | { HtmlParser::MODE_CSS, "css" }, |
| 271 | { HtmlParser::MODE_HTML_IN_TAG, "html_in_tag" }, |
| 272 | { 0, NULL } |
| 273 | }; |
| 274 | |
| 275 | |
| 276 | // Count the number of lines in a string. |
| 277 | int HtmlparserCppTest::UpdateLines(const string &str, int line) { |
| 278 | int linecount = line; |
| 279 | for (string::size_type i = 0; i < str.length(); ++i) { |
| 280 | if (str[i] == '\n') |
| 281 | ++linecount; |
| 282 | } |
| 283 | return linecount; |
| 284 | } |
| 285 | |
| 286 | // Count the number of columns in a string. |
| 287 | int HtmlparserCppTest::UpdateColumns(const string &str, int column) { |
| 288 | // Number of bytes since the last newline. |
| 289 | size_t last_newline = str.rfind('\n'); |
| 290 | |
| 291 | // If no newline was found, we just sum up all the characters in the |
| 292 | // annotation. |
| 293 | if (last_newline == string::npos) { |
| 294 | return static_cast<int>(column + str.size() + |
| 295 | strlen(kDirectiveBegin) + strlen(kDirectiveEnd)); |
| 296 | // If a newline was found, the new column count becomes the number of |
| 297 | // characters after the last newline. |
| 298 | } else { |
| 299 | return static_cast<int>(str.size() + strlen(kDirectiveEnd) - last_newline); |
| 300 | } |
| 301 | } |
| 302 | |
| 303 | |
| 304 | // Converts a string to a boolean. |
| 305 | bool HtmlparserCppTest::StringToBool(const string &value) { |
| 306 | if (strcasecmp(value.c_str(), "true") == 0) { |
| 307 | return true; |
| 308 | } else if (strcasecmp(value.c_str(), "false") == 0) { |
| 309 | return false; |
| 310 | } else { |
| 311 | LOG(FATAL) << "Unknown boolean value"; |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | // Returns the name of the corresponding enum_id by consulting an array of |
| 316 | // type IdNameMap. |
| 317 | const char *HtmlparserCppTest::IdToName(const struct IdNameMap *list, |
| 318 | int enum_id) { |
| 319 | CHECK(list != NULL); |
| 320 | while (list->name) { |
| 321 | if (enum_id == list->id) { |
| 322 | return list->name; |
| 323 | } |
| 324 | list++; |
| 325 | } |
| 326 | LOG(FATAL) << "Unknown id"; |
| 327 | } |
| 328 | |
| 329 | // Returns the enum_id of the correspondent name by consulting an array of |
| 330 | // type IdNameMap. |
| 331 | int HtmlparserCppTest::NameToId(const struct IdNameMap *list, |
| 332 | const string &name) { |
| 333 | CHECK(list != NULL); |
| 334 | while (list->name) { |
| 335 | if (name.compare(list->name) == 0) { |
| 336 | return list->id; |
| 337 | } |
| 338 | list++; |
| 339 | } |
| 340 | LOG(FATAL) << "Unknown name"; |
| 341 | } |
| 342 | |
| 343 | // Validate the parser state against the provided state. |
| 344 | void HtmlparserCppTest::ValidateState(const string &expected_state) { |
| 345 | const char* parsed_state = IdToName(kStateMap, parser_.state()); |
| 346 | EXPECT_TRUE(parsed_state != NULL); |
| 347 | EXPECT_TRUE(!expected_state.empty()); |
| 348 | EXPECT_EQ(expected_state, string(parsed_state)) |
| 349 | << "Unexpected state at line " << parser_.line_number(); |
| 350 | } |
| 351 | |
| 352 | // Validate the parser tag name against the provided tag name. |
| 353 | void HtmlparserCppTest::ValidateTag(const string &expected_tag) { |
| 354 | EXPECT_TRUE(parser_.tag() != NULL); |
| 355 | EXPECT_TRUE(expected_tag == parser_.tag()) |
| 356 | << "Unexpected attr tag name at line " << parser_.line_number(); |
| 357 | } |
| 358 | |
| 359 | // Validate the parser attribute name against the provided attribute name. |
| 360 | void HtmlparserCppTest::ValidateAttribute(const string &expected_attr) { |
| 361 | EXPECT_TRUE(parser_.attribute() != NULL); |
| 362 | EXPECT_EQ(expected_attr, parser_.attribute()) |
| 363 | << "Unexpected attr name value at line " << parser_.line_number(); |
| 364 | } |
| 365 | |
| 366 | // Validate the parser attribute value contents against the provided string. |
| 367 | void HtmlparserCppTest::ValidateValue(const string &expected_value) { |
| 368 | EXPECT_TRUE(parser_.value() != NULL); |
| 369 | const string parsed_state(parser_.value()); |
| 370 | EXPECT_EQ(expected_value, parsed_state) |
| 371 | << "Unexpected value at line " << parser_.line_number(); |
| 372 | } |
| 373 | |
| 374 | // Validate the parser attribute type against the provided attribute type. |
| 375 | void HtmlparserCppTest::ValidateAttributeType( |
| 376 | const string &expected_attr_type) { |
| 377 | const char *parsed_attr_type = IdToName(kAttributeTypeMap, |
| 378 | parser_.AttributeType()); |
| 379 | EXPECT_TRUE(parsed_attr_type != NULL); |
| 380 | EXPECT_TRUE(!expected_attr_type.empty()); |
| 381 | EXPECT_EQ(expected_attr_type, string(parsed_attr_type)) |
| 382 | << "Unexpected attr_type value at line " << parser_.line_number(); |
| 383 | } |
| 384 | |
| 385 | // Validate the parser attribute quoted state against the provided |
| 386 | // boolean. |
| 387 | void HtmlparserCppTest::ValidateAttributeQuoted( |
| 388 | const string &expected_attr_quoted) { |
| 389 | bool attr_quoted_bool = StringToBool(expected_attr_quoted); |
| 390 | EXPECT_EQ(attr_quoted_bool, parser_.IsAttributeQuoted()) |
| 391 | << "Unexpected attr_quoted value at line " << parser_.line_number(); |
| 392 | } |
| 393 | |
| 394 | // Validates the parser in javascript state against the provided boolean. |
| 395 | void HtmlparserCppTest::ValidateInJavascript(const string &expected_in_js) { |
| 396 | bool in_js_bool = StringToBool(expected_in_js); |
| 397 | EXPECT_EQ(in_js_bool, parser_.InJavascript()) |
| 398 | << "Unexpected in_js value at line " << parser_.line_number(); |
| 399 | } |
| 400 | |
| 401 | // Validate the current parser javascript quoted state against the provided |
| 402 | // boolean. |
| 403 | void HtmlparserCppTest::ValidateJavascriptQuoted( |
| 404 | const string &expected_js_quoted) { |
| 405 | bool js_quoted_bool = StringToBool(expected_js_quoted); |
| 406 | EXPECT_EQ(js_quoted_bool, parser_.IsJavascriptQuoted()) |
| 407 | << "Unexpected js_quoted value at line " << parser_.line_number(); |
| 408 | } |
| 409 | |
| 410 | // Validate the javascript parser state against the provided state. |
| 411 | void HtmlparserCppTest::ValidateJavascriptState(const string &expected_state) { |
| 412 | const char* parsed_state = IdToName(kJavascriptStateMap, |
| 413 | parser_.javascript_state()); |
| 414 | EXPECT_TRUE(parsed_state != NULL); |
| 415 | EXPECT_TRUE(!expected_state.empty()); |
| 416 | EXPECT_EQ(expected_state, string(parsed_state)) |
| 417 | << "Unexpected javascript state at line " << parser_.line_number(); |
| 418 | } |
| 419 | |
| 420 | // Validates the parser css state against the provided boolean. |
| 421 | void HtmlparserCppTest::ValidateInCss(const string &expected_in_css) { |
| 422 | bool in_css_bool = StringToBool(expected_in_css); |
| 423 | EXPECT_EQ(in_css_bool, parser_.InCss()) |
| 424 | << "Unexpected in_css value at line " << parser_.line_number(); |
| 425 | } |
| 426 | |
| 427 | // Validate the line count against the expected count. |
| 428 | void HtmlparserCppTest::ValidateLine(const string &expected_line) { |
| 429 | int line; |
| 430 | CHECK(safe_strto32(expected_line, &line)); |
| 431 | EXPECT_EQ(line, parser_.line_number()) |
| 432 | << "Unexpected line count at line " << parser_.line_number(); |
| 433 | } |
| 434 | |
| 435 | // Validate the line count against the expected count. |
| 436 | void HtmlparserCppTest::ValidateColumn(const string &expected_column) { |
| 437 | int column; |
| 438 | CHECK(safe_strto32(expected_column, &column)); |
| 439 | EXPECT_EQ(column, parser_.column_number()) |
| 440 | << "Unexpected column count at line " << parser_.line_number(); |
| 441 | } |
| 442 | |
| 443 | // Validate the current parser value index against the provided index. |
| 444 | void HtmlparserCppTest::ValidateValueIndex(const string &expected_value_index) { |
| 445 | int index; |
| 446 | CHECK(safe_strto32(expected_value_index, &index)); |
| 447 | EXPECT_EQ(index, parser_.ValueIndex()) |
| 448 | << "Unexpected value_index value at line " << parser_.line_number(); |
| 449 | } |
| 450 | |
| 451 | // Validate the parser is_url_start value against the provided one. |
| 452 | void HtmlparserCppTest::ValidateIsUrlStart( |
| 453 | const string &expected_is_url_start) { |
| 454 | bool is_url_start_bool = StringToBool(expected_is_url_start); |
| 455 | EXPECT_EQ(is_url_start_bool, parser_.IsUrlStart()) |
| 456 | << "Unexpected is_url_start value at line " << parser_.line_number(); |
| 457 | } |
| 458 | |
| 459 | // Validate an annotation string against the current parser state. |
| 460 | // |
| 461 | // Split the annotation into a list of key value pairs and call the appropriate |
| 462 | // handler for each pair. |
| 463 | void HtmlparserCppTest::ProcessAnnotation(const string &annotation) { |
| 464 | vector< pair< string, string > > pairs; |
| 465 | SplitStringIntoKeyValuePairs(annotation, "=", ",", &pairs); |
| 466 | |
| 467 | vector< pair< string, string > >::iterator iter; |
| 468 | |
| 469 | iter = pairs.begin(); |
| 470 | for (iter = pairs.begin(); iter != pairs.end(); ++iter) { |
| 471 | StripWhiteSpace(&iter->first); |
| 472 | StripWhiteSpace(&iter->second); |
| 473 | |
| 474 | if (iter->first.compare("state") == 0) { |
| 475 | ValidateState(iter->second); |
| 476 | } else if (iter->first.compare("tag") == 0) { |
| 477 | ValidateTag(iter->second); |
| 478 | } else if (iter->first.compare("attr") == 0) { |
| 479 | ValidateAttribute(iter->second); |
| 480 | } else if (iter->first.compare("value") == 0) { |
| 481 | ValidateValue(iter->second); |
| 482 | } else if (iter->first.compare("attr_type") == 0) { |
| 483 | ValidateAttributeType(iter->second); |
| 484 | } else if (iter->first.compare("attr_quoted") == 0) { |
| 485 | ValidateAttributeQuoted(iter->second); |
| 486 | } else if (iter->first.compare("in_js") == 0) { |
| 487 | ValidateInJavascript(iter->second); |
| 488 | } else if (iter->first.compare("js_quoted") == 0) { |
| 489 | ValidateJavascriptQuoted(iter->second); |
| 490 | } else if (iter->first.compare("js_state") == 0) { |
| 491 | ValidateJavascriptState(iter->second); |
| 492 | } else if (iter->first.compare("in_css") == 0) { |
| 493 | ValidateInCss(iter->second); |
| 494 | } else if (iter->first.compare("line_number") == 0) { |
| 495 | ValidateLine(iter->second); |
| 496 | } else if (iter->first.compare("column_number") == 0) { |
| 497 | ValidateColumn(iter->second); |
| 498 | } else if (iter->first.compare("value_index") == 0) { |
| 499 | ValidateValueIndex(iter->second); |
| 500 | } else if (iter->first.compare("is_url_start") == 0) { |
| 501 | ValidateIsUrlStart(iter->second); |
| 502 | } else if (iter->first.compare("save_context") == 0) { |
| 503 | if (!contextMap.count(iter->second)) { |
| 504 | contextMap[iter->second] = new HtmlParser(); |
| 505 | } |
| 506 | contextMap[iter->second]->CopyFrom(&parser_); |
| 507 | } else if (iter->first.compare("load_context") == 0) { |
| 508 | CHECK(contextMap.count(iter->second)); |
| 509 | parser_.CopyFrom(contextMap[iter->second]); |
| 510 | } else if (iter->first.compare("reset") == 0) { |
| 511 | if (StringToBool(iter->second)) { |
| 512 | parser_.Reset(); |
| 513 | } |
| 514 | } else if (iter->first.compare("reset_mode") == 0) { |
| 515 | HtmlParser::Mode mode = |
| 516 | static_cast<HtmlParser::Mode>(NameToId(kResetModeMap, iter->second)); |
| 517 | parser_.ResetMode(mode); |
| 518 | } else if (iter->first.compare("insert_text") == 0) { |
| 519 | if (StringToBool(iter->second)) { |
| 520 | parser_.InsertText(); |
| 521 | } |
| 522 | } else { |
| 523 | FAIL() << "Unknown test directive: " << iter->first; |
| 524 | } |
| 525 | } |
| 526 | } |
| 527 | |
| 528 | // Validates an html annotated file against the parser state. |
| 529 | // |
| 530 | // It iterates over the html file splitting it into html blocks and annotation |
| 531 | // blocks. It sends the html block to the parser and uses the annotation block |
| 532 | // to validate the parser state. |
| 533 | void HtmlparserCppTest::ValidateFile(string filename) { |
| 534 | // If TEMPLATE_ROOTDIR is set in the environment, it overrides the |
| 535 | // default of ".". We use an env-var rather than argv because |
| 536 | // that's what automake supports most easily. |
| 537 | const char* template_rootdir = getenv("TEMPLATE_ROOTDIR"); |
| 538 | if (template_rootdir == NULL) |
| 539 | template_rootdir = DEFAULT_TEMPLATE_ROOTDIR; // probably "." |
| 540 | string dir = PathJoin(template_rootdir, "src"); |
| 541 | dir = PathJoin(dir, "tests"); |
| 542 | dir = PathJoin(dir, "htmlparser_testdata"); |
| 543 | const string fullpath = PathJoin(dir, filename); |
| 544 | fprintf(stderr, "Validating %s", fullpath.c_str()); |
| 545 | string buffer; |
| 546 | ReadToString(fullpath.c_str(), &buffer); |
| 547 | |
| 548 | // Start of the current html block. |
| 549 | size_t start_html = 0; |
| 550 | |
| 551 | // Start of the next annotation. |
| 552 | size_t start_annotation = buffer.find(kDirectiveBegin, 0); |
| 553 | |
| 554 | // Ending of the current annotation. |
| 555 | size_t end_annotation = buffer.find(kDirectiveEnd, start_annotation); |
| 556 | |
| 557 | while (start_annotation != string::npos) { |
| 558 | string html_block(buffer, start_html, start_annotation - start_html); |
| 559 | parser_.Parse(html_block); |
| 560 | |
| 561 | start_annotation += strlen(kDirectiveBegin); |
| 562 | |
| 563 | string annotation_block(buffer, start_annotation, |
| 564 | end_annotation - start_annotation); |
| 565 | ProcessAnnotation(annotation_block); |
| 566 | |
| 567 | // Update line and column count. |
| 568 | parser_.set_line_number(UpdateLines(annotation_block, |
| 569 | parser_.line_number())); |
| 570 | parser_.set_column_number(UpdateColumns(annotation_block, |
| 571 | parser_.column_number())); |
| 572 | |
| 573 | start_html = end_annotation + strlen(kDirectiveEnd); |
| 574 | start_annotation = buffer.find(kDirectiveBegin, start_html); |
| 575 | end_annotation = buffer.find(kDirectiveEnd, start_annotation); |
| 576 | |
| 577 | // Check for unclosed annotation. |
| 578 | CHECK(!(start_annotation != string::npos && |
| 579 | end_annotation == string::npos)); |
| 580 | } |
| 581 | } |
| 582 | |
| 583 | static vector<string> g_filenames; |
| 584 | #define TEST_FILE(testname, filename) \ |
| 585 | struct Register_##testname { \ |
| 586 | Register_##testname() { g_filenames.push_back(filename); } \ |
| 587 | }; \ |
| 588 | static Register_##testname g_register_##testname |
| 589 | |
| 590 | TEST(HtmlparserTest, TestFiles) { |
| 591 | HtmlparserCppTest tester; |
| 592 | for (vector<string>::const_iterator it = g_filenames.begin(); |
| 593 | it != g_filenames.end(); ++it) { |
| 594 | tester.SetUp(); |
| 595 | tester.ValidateFile(*it); |
| 596 | tester.TearDown(); |
| 597 | } |
| 598 | } |
| 599 | |
| 600 | TEST_FILE(SimpleHtml, "simple.html"); |
| 601 | TEST_FILE(Comments, "comments.html"); |
| 602 | TEST_FILE(JavascriptBlock, "javascript_block.html"); |
| 603 | TEST_FILE(JavascriptAttribute, "javascript_attribute.html"); |
| 604 | TEST_FILE(JavascriptRegExp, "javascript_regexp.html"); |
| 605 | TEST_FILE(Tags, "tags.html"); |
| 606 | TEST_FILE(Context, "context.html"); |
| 607 | TEST_FILE(Reset, "reset.html"); |
| 608 | TEST_FILE(CData, "cdata.html"); |
| 609 | TEST_FILE(LineCount, "position.html"); |
| 610 | |
| 611 | TEST(Htmlparser, Error) { |
| 612 | HtmlParser html; |
| 613 | |
| 614 | EXPECT_EQ(html.GetErrorMessage(), (const char *)NULL); |
| 615 | EXPECT_EQ(html.Parse("<a href='http://www.google.com' ''>\n"), |
| 616 | HtmlParser::STATE_ERROR); |
| 617 | |
| 618 | EXPECT_STREQ(html.GetErrorMessage(), |
| 619 | "Unexpected character '\\'' in state 'tag_space'"); |
| 620 | html.Reset(); |
| 621 | EXPECT_EQ(html.GetErrorMessage(), (const char *)NULL); |
| 622 | } |
| 623 | |
| 624 | } // namespace security_streamhtmlparser |
| 625 | |
| 626 | int main(int argc, char **argv) { |
| 627 | |
| 628 | return RUN_ALL_TESTS(); |
| 629 | } |