Squashed 'third_party/ctemplate/' content from commit 6742f62
Change-Id: I828e4e4c906f13ba19944d78a8a78652b62949af
git-subtree-dir: third_party/ctemplate
git-subtree-split: 6742f6233db12f545e90baa8f34f5c29c4eb396a
diff --git a/src/tests/htmlparser_testdata/cdata.html b/src/tests/htmlparser_testdata/cdata.html
new file mode 100644
index 0000000..817938b
--- /dev/null
+++ b/src/tests/htmlparser_testdata/cdata.html
@@ -0,0 +1,112 @@
+<html>
+<?state state=text, tag=html ?>
+
+ <head>
+ <?state state=text, tag=head ?>
+ <!-- Title element with markup -->
+ <title>
+ <?state state=text, tag=title ?>
+ <h1>
+ <?state state=text, tag=title ?>
+ </h1>
+ <!--
+ <?state state=text, tag=title ?>
+ </title>
+ <?state state=text, tag=title ?>
+ -->
+ <?state state=text, tag=title ?>
+ </title>
+ <?state state=text ?>
+
+ <!-- Style element with attributes -->
+ <style a=b>
+ <b><?state state=text, tag=style, in_js=false, in_css=true?></b>
+ </style>
+ <?state in_css=false?>
+ </head>
+<body>
+<?state state=text, in_js=false ?>
+ <!-- PCDATA nested block -->
+ <b>
+ <?state state=text, tag=b ?>
+ <i>
+ <?state state=text, tag=i ?>
+ </i>
+ <?state state=text ?>
+ </b>
+ <?state state=text ?>
+
+ <!-- Textarea element with space at the end of the closing tag -->
+ <textarea>
+ <?state state=text, tag=textarea ?>
+ <b>
+ <?state state=text, tag=textarea ?>
+ <i>
+ <?state state=text, tag=textarea, in_css=false ?>
+ <!--
+ <?state state=text, tag=textarea ?>
+ </textarea>
+ <?state state=text, tag=textarea ?>
+ -->
+ </i>
+ <?state state=text, tag=textarea ?>
+ </b>
+ <?state state=text, tag=textarea ?>
+ </textarea >
+
+<?state state=text ?>
+
+ <!-- script tag with other tags inside -->
+ <script>
+ document.write("
+ <?state in_js=true, js_quoted=true, tag=script ?>
+ <style>
+ .none { display:none }
+ </style>
+ <?state in_js=true, js_quoted=true ?>
+ ");
+ <?state in_js=true, js_quoted=false ?>
+ </script>
+
+ <?state in_js=false ?>
+
+ <!-- script tag with a backslash quoted script tag -->
+ <script>
+ <?state in_js=true, js_quoted=false ?>
+ document.body.innerHTML = '<script><\/script>'
+ <?state in_js=true, js_quoted=false ?>
+ </script>
+
+ <?state in_js=false ?>
+
+ <!-- </script> appearing between javascript comments -->
+ <script>
+ <!--
+ <?state in_js=true, js_quoted=false ?>
+ document.body.innerHTML = '<script></script>'
+ <?state in_js=true, js_quoted=false ?>
+ -->
+ </script>
+
+ <?state in_js=false ?>
+
+ <!-- Closing script with an extra space at the end of the tag. Some browsers
+ ignore this tag and some browsers honour it. We honour it. -->
+ <script>
+ <?state in_js=true, js_quoted=false ?>
+ document.body.innerHTML = '<script><\/script>'
+ <?state in_js=true, js_quoted=false ?>
+ </script >
+
+ <script>
+ <?state in_js=true, js_quoted=false ?>
+ </script%>
+ <?state in_js=true, js_quoted=false ?>
+ </script >
+
+ <?state in_js=false ?>
+
+</body>
+<?state in_js=false ?>
+</html>
+
diff --git a/src/tests/htmlparser_testdata/comments.html b/src/tests/htmlparser_testdata/comments.html
new file mode 100644
index 0000000..391f3f0
--- /dev/null
+++ b/src/tests/htmlparser_testdata/comments.html
@@ -0,0 +1,61 @@
+<!-- Tests for HTML comments and cdata escaping text spans. -->
+<html>
+
+<body>
+
+<?state state=text, tag=body ?>
+
+<!-- HTML doctype declaration -->
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+<?state state=text, tag=body?>
+"http://www.w3.org/TR/html4/strict.dtd">
+<?state state=text, tag=body ?>
+
+<!-- Regular HTML comment -->
+<!-- <?state state=comment, tag=body ?> -->
+<?state state=text, tag=body ?>
+
+<!-- HTML comment with tags -->
+<!-- > -> </b> <a href="<?state state=comment, tag=body ?>"></a>-->
+<?state state=text, tag=body ?>
+
+<!-- Should not be interpreted as an SGML comment -->
+<?state state=text, tag=body ?>
+<!-- -- -->
+<?state state=text, tag=body ?>
+
+<!-- -- Sync back the SGML comment for editors who parse SGML comments
+(ie: vim) -->
+<?state state=text, tag=body ?>
+
+<!-- Multiple dashes at the end. -->
+<!----- <?state state=comment, tag=body ?> --><?state state=text, tag=body ?>
+<!----- <?state state=comment, tag=body ?> ---><?state state=text, tag=body ?>
+<!----- <?state state=comment, tag=body ?> ----><?state state=text, tag=body ?>
+<!----- <?state state=comment, tag=body ?> -----><?state state=text, tag=body ?>
+
+<!-- Some more misc tests -->
+<!-- test <?state state=comment?> --><?state state=text?> test test --><?state state=text?>
+<!-- test -> test test --><?state state=text?>
+<!-- test test='--><?state state=text?>'
+<!----><?state state=text?>
+<!-----><?state state=text?>
+
+<!-- Make sure the double dash sequence is not interpreted as an SGML comment
+by introducing a legit postfix decrement operator -->
+<?state state=text, in_js=false ?>
+<script>
+<!--
+<?state state=text, in_js=true ?>
+
+var x = 1;
+x--;
+<?state state=text, in_js=true ?>
+-->
+</script>
+<?state state=text, in_js=false ?>
+
+</body>
+
+</html>
+<?state state=text ?>
diff --git a/src/tests/htmlparser_testdata/context.html b/src/tests/htmlparser_testdata/context.html
new file mode 100644
index 0000000..aaaaa46
--- /dev/null
+++ b/src/tests/htmlparser_testdata/context.html
@@ -0,0 +1,79 @@
+<!-- Tests for CopyFrom() -->
+<html>
+ <body>
+ <?state save_context=body?>
+ <?state tag=body?>
+ <h1>
+ <?state save_context=h1?>
+ <?state tag=h1?>
+ <?state load_context=body?>
+ <?state tag=body?>
+
+ <a href="http://www.google.com<?state save_context=href?>"></a>
+
+ <script>
+ <?state save_context=js?>
+ var x ='<?state save_context=js_str_literal?>
+ <?state load_context=href?><?state state=value,
+ tag=a,
+ attr=href,
+ in_js=false,
+ value=http://www.google.com?>
+ <?state load_context=js_str_literal?>
+ <?state state=text,
+ tag=script,
+ in_js=true,
+ js_quoted=true?>';
+
+ // Regexp handling
+ var expression = 10 / <?state save_context=js_expression?> / <?state save_context=js_regexp?> /;
+
+ <?state load_context=js_expression?><?state js_state=text?>
+ <?state load_context=js_regexp?><?state js_state=regexp?> /;
+ <?state js_state=text?>
+
+ </script>
+ <?state in_js=false?>
+ <?state load_context=js?>
+ <?state tag=script, js_state=text, in_js=true?>
+ </script>
+
+ <!-- html encoded script attribute -->
+ <a onclick="alert('<?state save_context=onclick_str_literal?>'"></a>
+ <?state in_js=false?>
+ <?state load_context=onclick_str_literal?><?state state=value,
+ tag=a,
+ attr=onclick,
+ attr_type=js,
+ in_js=true,
+ js_quoted=true?>'">
+ <?state state=text, tag=a?>
+ </a>
+
+<!-- ResetMode() tests -->
+
+<?state reset_mode=css?>
+<?state in_css=true?>
+<?state state=css_file?>
+<?state save_context=mode_css?>
+
+<?state reset_mode=html?>
+<?state state=text?>
+<?state in_css=false?>
+<?state load_context=mode_css?>
+<?state in_css=true?>
+<?state state=css_file?>
+
+<?state reset_mode=html_in_tag?>blah=<?state save_context=in_tag?>
+<?state load_context=onclick_str_literal?><?state state=value,
+ tag=a,
+ attr=onclick,
+ attr_type=js,
+ in_js=true,
+ js_quoted=true?>'">
+<?state load_context=in_tag?>
+<?state attr=blah?>xpto<?state value=xpto?>
+
+
+ </body>
+</html>
diff --git a/src/tests/htmlparser_testdata/google.html b/src/tests/htmlparser_testdata/google.html
new file mode 100644
index 0000000..45dddd8
--- /dev/null
+++ b/src/tests/htmlparser_testdata/google.html
@@ -0,0 +1,3 @@
+<html><head><meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"><title>Google</title><style>body,td,a,p,.h{font-family:arial,sans-serif}.h{font-size:20px}.h{color:#3366cc}.q{color:#00c}.ts td{padding:0}.ts{border-collapse:collapse}#gbar{height:22px;padding-left:2px}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}#gbi,#gbs{background:#fff;left:0;position:absolute;top:24px;visibility:hidden;z-index:1000}#gbi{border:1px solid;border-color:#c9d7f1 #36c #36c #a2bae7;z-index:1001}#guser{padding-bottom:7px !important}#gbar,#guser{font-size:13px;padding-top:1px !important}@media all{.gb1,.gb3{height:22px;margin-right:.73em;vertical-align:top}#gbar{float:left}}.gb2{display:block;padding:.2em .5em}a.gb1,a.gb2,a.gb3{color:#00c !important}.gb2,.gb3{text-decoration:none}a.gb2:hover{background:#36c;color:#fff !important}</style><script>window.google={kEI:"jigHScf6BKDwswP7-eSsAw",kEXPI:"17259,19016",kHL:"en"};
+function sf(){document.f.q.focus()}
+window.gbar={};(function(){var b=window.gbar,f,h;b.qs=function(a){var c=window.encodeURIComponent&&(document.forms[0].q||"").value;if(c)a.href=a.href.replace(/([?&])q=[^&]*|$/,function(i,g){return(g||"&")+"q="+encodeURIComponent(c)})};function j(a,c){a.visibility=h?"hidden":"visible";a.left=c+"px"}b.tg=function(a){a=a||window.event;var c=0,i,g=window.navExtra,d=document.getElementById("gbi"),e=a.target||a.srcElement;a.cancelBubble=true;if(!f){f=document.createElement(Array.every||window.createPopup?"iframe":"div");f.frameBorder="0";f.src="#";d.parentNode.appendChild(f).id="gbs";if(g)for(i in g)d.insertBefore(g[i],d.firstChild).className="gb2";document.onclick=b.close}if(e.className!="gb3")e=e.parentNode;do c+=e.offsetLeft;while(e=e.offsetParent);j(d.style,c);f.style.width=d.offsetWidth+"px";f.style.height=d.offsetHeight+"px";j(f.style,c);h=!h};b.close=function(a){h&&b.tg(a)}})();</script></head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onload="sf();if(document.images){new Image().src='/images/nav_logo3.png'}" topmargin=3 marginheight=3><div id=gbar><nobr><b class=gb1>Web</b> <a href="http://images.google.com/imghp?hl=en&tab=wi" onclick=gbar.qs(this) class=gb1>Images</a> <a href="http://maps.google.com/maps?hl=en&tab=wl" onclick=gbar.qs(this) class=gb1>Maps</a> <a href="http://news.google.com/nwshp?hl=en&tab=wn" onclick=gbar.qs(this) class=gb1>News</a> <a href="http://www.google.com/prdhp?hl=en&tab=wf" onclick=gbar.qs(this) class=gb1>Shopping</a> <a href="http://mail.google.com/mail/?hl=en&tab=wm" class=gb1>Gmail</a> <a href="http://www.google.com/intl/en/options/" onclick="this.blur();gbar.tg(event);return !1" class=gb3><u>more</u> <small>▼</small></a><div id=gbi> <a href="http://video.google.com/?hl=en&tab=wv" onclick=gbar.qs(this) class=gb2>Video</a> <a href="http://groups.google.com/grphp?hl=en&tab=wg" onclick=gbar.qs(this) class=gb2>Groups</a> <a href="http://books.google.com/bkshp?hl=en&tab=wp" onclick=gbar.qs(this) class=gb2>Books</a> <a href="http://scholar.google.com/schhp?hl=en&tab=ws" onclick=gbar.qs(this) class=gb2>Scholar</a> <a href="http://finance.google.com/finance?hl=en&tab=we" onclick=gbar.qs(this) class=gb2>Finance</a> <a href="http://blogsearch.google.com/?hl=en&tab=wb" onclick=gbar.qs(this) class=gb2>Blogs</a> <div class=gb2><div class=gbd></div></div> <a href="http://www.youtube.com/?hl=en&tab=w1" onclick=gbar.qs(this) class=gb2>YouTube</a> <a href="http://www.google.com/calendar/render?hl=en&tab=wc" class=gb2>Calendar</a> <a href="http://picasaweb.google.com/home?hl=en&tab=wq" onclick=gbar.qs(this) class=gb2>Photos</a> <a href="http://docs.google.com/?hl=en&tab=wo" class=gb2>Documents</a> <a href="http://www.google.com/reader/view/?hl=en&tab=wy" class=gb2>Reader</a> <a href="http://sites.google.com/?hl=en&tab=w3" class=gb2>Sites</a> <div class=gb2><div class=gbd></div></div> <a href="http://www.google.com/intl/en/options/" class=gb2>even more »</a></div> </nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div><div align=right id=guser style="font-size:84%;padding:0 0 4px" width=100%><nobr><a href="/url?sa=p&pref=ig&pval=3&q=http://www.google.com/ig%3Fhl%3Den%26source%3Diglk&usg=AFQjCNFA18XPfgb7dKnXfKz7x7g1GDH1tg">iGoogle</a> | <a href="https://www.google.com/accounts/Login?continue=http://www.google.com/&hl=en">Sign in</a></nobr></div><center><br clear=all id=lgpd><img alt="Google" height=110 src="/intl/en_ALL/images/logo.gif" width=276><br><br><form action="/search" name=f><table cellpadding=0 cellspacing=0><tr valign=top><td width=25%> </td><td align=center nowrap><input name=hl type=hidden value=en><input type=hidden name=ie value="ISO-8859-1"><input autocomplete="off" maxlength=2048 name=q size=55 title="Google Search" value=""><br><input name=btnG type=submit value="Google Search"><input name=btnI type=submit value="I'm Feeling Lucky"></td><td nowrap width=25%><font size=-2> <a href=/advanced_search?hl=en>Advanced Search</a><br> <a href=/preferences?hl=en>Preferences</a><br> <a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><font size=-1><font color=red>New!</font> The G1 phone is on sale now. <a href="/aclk?sa=L&ai=BuJQcgigHSbvbCqDUsAPGm6X7DvPUz3en34zVCcHZnNkT0IYDEAEYASDBVDgAUJL0-Mb8_____wFgyQY&num=1&sig=AGiWqtxZNijZyCsNtIwkfSx_S1WSW0Uh8A&q=http://www.google.com/intl/en_us/mobile/android/hpp.html">Learn more</a>.</font><br><br><br><font size=-1><a href="/intl/en/ads/">Advertising Programs</a> - <a href="/services/">Business Solutions</a> - <a href="/intl/en/about.html">About Google</a></font><p><font size=-2>©2008 - <a href="/intl/en/privacy.html">Privacy</a></font></p></center></body><script>google.y={first:[]};window.setTimeout(function(){var xjs=document.createElement('script');xjs.src='/extern_js/f/CgJlbhICdXMgACswCjgILCswGDgDLA/Vh5nhw3Xn6A.js';document.getElementsByTagName('head')[0].appendChild(xjs)},0);google.y.first.push(function(){google.ac.i(document.f,document.f.q,'','')})</script></html>
\ No newline at end of file
diff --git a/src/tests/htmlparser_testdata/javascript_attribute.html b/src/tests/htmlparser_testdata/javascript_attribute.html
new file mode 100644
index 0000000..db096f0
--- /dev/null
+++ b/src/tests/htmlparser_testdata/javascript_attribute.html
@@ -0,0 +1,27 @@
+<html>
+<body>
+
+<a onclick="alert('<?state state=value, tag=a, attr=onclick, attr_type=js,
+in_js=true, js_quoted=true?> x') &; &a; &x;/*blah <?state state=value,
+tag=a, attr=onclick, attr_type=js, in_js=true ?> */ "></a>
+
+<?state state=text, in_js=false ?>
+
+<a onmouseover='alert(document.domain<?state state=value, tag=a,
+attr=onmouseover, attr_type=js, in_js=true ?>)'>test</a>
+
+<?state state=text, in_js=false ?>
+
+<a onmouseover="">test</a>
+
+<?state state=text, in_js=false ?>
+
+<a onclick="<?state in_js=true, js_quoted=false?>">test</a>
+<?state state=text, in_js=false ?>
+
+<a onclick="'<?state in_js=true, js_quoted=true?>">test</a>
+<?state state=text, in_js=false ?>
+
+</body>
+</html>
+<?state state=text ?><?state state=text ?>
diff --git a/src/tests/htmlparser_testdata/javascript_block.html b/src/tests/htmlparser_testdata/javascript_block.html
new file mode 100644
index 0000000..539c1a6
--- /dev/null
+++ b/src/tests/htmlparser_testdata/javascript_block.html
@@ -0,0 +1,50 @@
+<html>
+<body>
+
+<script>
+
+x < 1;
+
+<?state state=text, tag=script, in_js=true ?>
+
+</script>
+
+<?state state=text?>
+
+<script>
+//<!--
+var x = 1;
+<?state state=text, tag=script, in_js=true ?>
+//--> </script>
+
+<?state state=text?>
+
+<script> //<!--
+var x = 1;
+<?state state=text, tag=script, in_js=true ?>
+</script>
+<?state state=text, tag=script, in_js=true ?>
+//--> </script>
+
+<?state state=text?>
+
+<script>
+<!--
+var x = 1;
+<?state state=text, tag=script, in_js=true ?>
+</script>
+<?state state=text, tag=script, in_js=true ?>
+-->
+</script>
+
+<?state state=text?>
+
+<script><?state tag=script, in_js=true?> </script><?state in_js=false?>
+<script><?state tag=script, in_js=true, js_quoted=false?></script><?state in_js=false?>
+<script>'<?state tag=script, in_js=true, js_quoted=true?></script><?state in_js=false?>
+<script>"<?state tag=script, in_js=true, js_quoted=true?></script><?state in_js=false?>
+
+</body>
+</html>
+<?state state=text ?>
+<?state state=text ?>
diff --git a/src/tests/htmlparser_testdata/javascript_regexp.html b/src/tests/htmlparser_testdata/javascript_regexp.html
new file mode 100644
index 0000000..7c1f88d
--- /dev/null
+++ b/src/tests/htmlparser_testdata/javascript_regexp.html
@@ -0,0 +1,171 @@
+<html>
+<body>
+
+
+<script>
+
+// General regular expression literal synching tests.
+
+var regexp = /x'/;
+<?state state=text, in_js=true, js_quoted=false?>
+
+var string = '<?state state=text, in_js=true, js_quoted=true?>';
+<?state state=text, in_js=true, js_quoted=false?>
+
+var op = 1 / 2;
+var string2 = '<?state state=text, in_js=true, js_quoted=true?>';
+<?state state=text, in_js=true, js_quoted=false?>
+
+return /x'/;
+<?state state=text, in_js=true, js_quoted=false?>
+
+
+// General regular expression state tests
+
+var regexp = / <?state js_state=regexp?> /; <?state js_state=text?>
+
+var a = /"hello/.exec("<?state state=text, in_js=true, js_quoted=true ?>");
+var a = /"hello"/.exec("<?state state=text, in_js=true, js_quoted=true ?>");
+
+var expression = 10 / <?state js_state=text?> / <?state js_state=regexp?> /;
+
+<?state js_state=text?>
+
+var expression2 = / <?state js_state=regexp?> /;
+
+if (window.frames.length < /\d+<?state js_state=regexp?>/.exec(<?state js_state=text?>)[0]) {
+ alert(/ '" <?state js_state=regexp?>/.exec(<?state js_state=text?>)/);
+ var quoted_string = "<?state js_state=dq?>" <?state js_state=text?>;
+}
+
+switch(/ <?state js_state=regexp?> /) { <?state js_state=text?>
+ case / <?state js_state=regexp?> /: <?state js_state=text?>
+ break;
+ case / \/<?state js_state=regexp?> /: <?state js_state=text?>
+ break;
+}
+
+delete / <?state js_state=regexp?> x / <?state js_state=text?>;
+id / <?state js_state=text?> x / <?state js_state=text?>;
+
+function test(/ <?state js_state=regexp?> /) {
+ return / <?state js_state=regexp?> /.exec(<?state js_state=text?>);
+}
+
+function test2(/ <?state js_state=regexp?> /, <?state js_state=text?>) {
+ return / '"<?state js_state=regexp?> /.exec(<?state js_state=text?>);
+}
+
+var a = "/<?state js_state=dq?>"/<?state js_state=text?>;
+
+test in / <?state js_state=regexp?>/;
+min / <?state js_state=text?>;
+IN / <?state js_state=text?>;
+
+3.. /<?state js_state=text?>/;
+0x3./<?state js_state=text?>/;
+
+// Escaping in regular expressions
+
+var a = / blah\/<?state js_state=regexp?>/<?state js_state=text?>,
+/\//<?state js_state=text?>,
+/\/*/<?state js_state=text?> /**/ <?state js_state=text?>,
+
+// Bracket expressions
+var a = [/[/] <?state js_state=regexp?> / <?state js_state=text?>,
+var a = /[/\]/ <?state js_state=regexp?> ]/ <?state js_state=text?>,
+var a = /[/\\]/ <?state js_state=text?>];
+
+/* Unary incremented/decremented variable, followed by a division. */
+
+var w = w++ / 1 <?state js_state=text?>;
+var w = w-- / 1 <?state js_state=text?>;
+
+/* Division after array acessor. */
+var test = xpto[2] / <?state js_state=text?>;
+
+/* Division after parenthesis expression. */
+var test = (2 + 2) / <?state js_state=text?>;
+
+/* Division with comments before the the previous token. */
+var test = x/* test *// <?state js_state=text?>;
+var test = x /* test *// <?state js_state=text?>;
+var test = x/* test */ / <?state js_state=text?>;
+var test = x /* test */ / <?state js_state=text?>;
+var test = x /* test */
+/ <?state js_state=text?>;
+
+var test = x // test
+/ <?state js_state=text?>;
+
+var test = x // test
+ / <?state js_state=text?>;
+
+var test = x // test
+
+/ <?state js_state=text?>;
+
+/* Regexp with multi line comment before the the previous token. */
+var test =/* test *// <?state js_state=regexp?> /;
+var test = /* test *// <?state js_state=regexp?> /;
+var test = /* test *// <?state js_state=regexp?> /;
+var test = /* test */ / <?state js_state=regexp?> /;
+var test = /* test */
+/ <?state js_state=regexp?> /;
+
+var test = // test
+/ <?state js_state=regexp?> /;
+
+var test = // test
+ / <?state js_state=regexp?> /;
+
+var test = // test
+
+/ <?state js_state=regexp?> /;
+
+
+/* Semicolon insertion after a code block */
+function() {} / <?state js_state=regexp?>/
+
+/****************************************************************************
+ Tests that won't pass right now due to design or implementation choices.
+*/
+
+/* Division after a regular expression.
+
+var test = <?nopstate js_state=text?>
+/ <?nopstate js_state=regexp?>
+/ <?nopstate js_state=text?>
+/ <?nopstate js_state=text?>
+/ <?nopstate js_state=regexp?>
+/ <?nopstate js_state=text?>;
+
+*/
+
+/* Division of an object literal
+
+{
+ a: 1,
+ b : 2
+} / <?nopstate js_state=text?>/
+
+*/
+
+/* Unary increment and decrement of regular expressions.
+
+var w = ++/ <?nopstate js_state=regexp?>/i;
+var x = --/ <?nopstate js_state=regexp?>/i
+
+*/
+
+
+</script>
+
+<script>
+
+/ <?state js_state=regexp?> /;
+
+</script>
+
+</body>
+</html>
diff --git a/src/tests/htmlparser_testdata/position.html b/src/tests/htmlparser_testdata/position.html
new file mode 100644
index 0000000..120ca4e
--- /dev/null
+++ b/src/tests/htmlparser_testdata/position.html
@@ -0,0 +1,33 @@
+<?state line_number=1?>
+<?state line_number=2?>
+<html>
+<?state column_number=1?>
+ <body><?state column_number=9?>
+ <?state line_number=6?><?state column_number=28?>
+ <?state
+
+ line_number=7
+ ?><?state column_number=7?>
+ <?state line_number=11?><?state column_number=29?>
+ </body>
+ <?state line_number=13?>
+
+
+
+<?state column_number=1?>
+ <?state column_number=2?>
+ <?state column_number=3?>
+
+
+
+
+
+<a href="http://ww.google.com" onclick="var x=<?state column_number=47?>">
+</a>
+
+<img src="http://www.google.com" onerror="var w = &qu<?state column_number=54?>ot;test"">
+
+
+
+ <?state line_number=32?>
+</html>
diff --git a/src/tests/htmlparser_testdata/reset.html b/src/tests/htmlparser_testdata/reset.html
new file mode 100644
index 0000000..cd0d070
--- /dev/null
+++ b/src/tests/htmlparser_testdata/reset.html
@@ -0,0 +1,31 @@
+<html>
+ <body>
+ <?state state=text, attr_type=none?>
+ <b font="<?state state=value, tag=b, attr=font, attr_quoted=true,
+ in_js=false, attr_type=regular ?>
+<?state reset=true ?>
+<?state state=text, attr_type=none ?>
+<b <?state state=tag, tag=b ?>
+<?state reset_mode=js ?>
+<?state state=js_file?>
+var unquoted =<?state js_quoted=false, in_js=true ?>;
+var single_quoted ='<?state js_quoted=true, in_js=true ?>';
+var unquoted =<?state js_quoted=false, in_js=true ?>;
+<?state reset_mode=html_in_tag?>blah=<?state attr=blah?>xpto<?state value=xpto?>
+test<?state state=attr?>
+<?state reset_mode=html_in_tag?>
+test="test123<?state attr=test, value=test123?>">
+<?state state=text?>
+<?state reset_mode=css?>
+<?state in_css=true?>
+<?state state=css_file?>
+
+<a href="<?state in_css=true?>"></style><?state in_css=true?>
+
+<123 <script><?state in_css=true?>
+
+<?state reset_mode=html?>
+<?state in_css=false?>
+ <?state state=text, attr_type=none?>
+ <b font="<?state state=value, tag=b, attr=font, attr_quoted=true,
+ in_js=false, attr_type=regular ?>
diff --git a/src/tests/htmlparser_testdata/sample_fsm.c b/src/tests/htmlparser_testdata/sample_fsm.c
new file mode 100644
index 0000000..ed85c5f
--- /dev/null
+++ b/src/tests/htmlparser_testdata/sample_fsm.c
@@ -0,0 +1,802 @@
+/* Parses C style strings
+ * Auto generated by generate_fsm.py. Please do not edit.
+ */
+#define STRINGPARSER_NUM_STATES 4
+enum stringparser_state_internal_enum {
+ STRINGPARSER_STATE_INT_TEXT,
+ STRINGPARSER_STATE_INT_STRING,
+ STRINGPARSER_STATE_INT_STRING_ESCAPE
+};
+
+static const int stringparser_states_external[] = {
+ STRINGPARSER_STATE_TEXT,
+ STRINGPARSER_STATE_STRING,
+ STRINGPARSER_STATE_STRING
+};
+
+static const char * stringparser_states_internal_names[] = {
+ "text",
+ "string",
+ "string_escape"
+};
+
+static const int stringparser_transition_row_text[] = {
+ /* '\x00' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x01' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x02' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x03' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x04' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x05' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x06' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x07' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x08' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\t' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\n' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x0b' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x0c' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\r' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x0e' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x0f' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x10' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x11' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x12' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x13' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x14' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x15' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x16' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x17' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x18' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x19' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x1a' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x1b' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x1c' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x1d' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x1e' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x1f' */ STRINGPARSER_STATE_INT_TEXT,
+ /* ' ' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '!' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '"' */ STRINGPARSER_STATE_INT_STRING,
+ /* '#' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '$' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '%' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '&' */ STRINGPARSER_STATE_INT_TEXT,
+ /* "'" */ STRINGPARSER_STATE_INT_TEXT,
+ /* '(' */ STRINGPARSER_STATE_INT_TEXT,
+ /* ')' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '*' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '+' */ STRINGPARSER_STATE_INT_TEXT,
+ /* ',' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '-' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '.' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '/' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '0' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '1' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '2' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '3' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '4' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '5' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '6' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '7' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '8' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '9' */ STRINGPARSER_STATE_INT_TEXT,
+ /* ':' */ STRINGPARSER_STATE_INT_TEXT,
+ /* ';' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '<' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '=' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '>' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '?' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '@' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'A' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'B' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'C' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'D' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'E' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'F' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'G' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'H' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'I' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'J' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'K' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'L' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'M' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'N' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'O' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'P' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'Q' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'R' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'S' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'T' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'U' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'V' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'W' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'X' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'Y' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'Z' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '[' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\\' */ STRINGPARSER_STATE_INT_STRING,
+ /* ']' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '^' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '_' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '`' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'a' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'b' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'c' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'd' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'e' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'f' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'g' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'h' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'i' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'j' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'k' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'l' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'm' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'n' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'o' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'p' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'q' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'r' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 's' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 't' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'u' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'v' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'w' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'x' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'y' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'z' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '{' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '|' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '}' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '~' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x7f' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x80' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x81' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x82' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x83' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x84' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x85' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x86' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x87' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x88' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x89' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x8a' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x8b' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x8c' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x8d' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x8e' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x8f' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x90' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x91' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x92' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x93' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x94' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x95' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x96' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x97' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x98' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x99' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x9a' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x9b' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x9c' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x9d' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x9e' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x9f' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa0' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa1' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa2' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa3' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa4' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa5' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa6' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa7' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa8' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa9' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xaa' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xab' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xac' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xad' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xae' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xaf' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb0' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb1' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb2' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb3' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb4' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb5' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb6' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb7' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb8' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb9' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xba' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xbb' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xbc' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xbd' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xbe' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xbf' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc0' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc1' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc2' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc3' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc4' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc5' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc6' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc7' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc8' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc9' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xca' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xcb' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xcc' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xcd' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xce' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xcf' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd0' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd1' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd2' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd3' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd4' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd5' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd6' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd7' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd8' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd9' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xda' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xdb' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xdc' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xdd' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xde' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xdf' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe0' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe1' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe2' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe3' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe4' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe5' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe6' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe7' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe8' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe9' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xea' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xeb' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xec' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xed' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xee' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xef' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf0' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf1' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf2' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf3' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf4' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf5' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf6' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf7' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf8' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf9' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xfa' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xfb' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xfc' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xfd' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xfe' */ STRINGPARSER_STATE_INT_TEXT
+};
+
+static const int stringparser_transition_row_string[] = {
+ /* '\x00' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x01' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x02' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x03' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x04' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x05' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x06' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x07' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x08' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\t' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\n' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\r' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x10' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x11' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x12' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x13' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x14' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x15' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x16' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x17' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x18' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x19' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1a' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1d' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1f' */ STRINGPARSER_STATE_INT_STRING,
+ /* ' ' */ STRINGPARSER_STATE_INT_STRING,
+ /* '!' */ STRINGPARSER_STATE_INT_STRING,
+ /* '"' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '#' */ STRINGPARSER_STATE_INT_STRING,
+ /* '$' */ STRINGPARSER_STATE_INT_STRING,
+ /* '%' */ STRINGPARSER_STATE_INT_STRING,
+ /* '&' */ STRINGPARSER_STATE_INT_STRING,
+ /* "'" */ STRINGPARSER_STATE_INT_STRING,
+ /* '(' */ STRINGPARSER_STATE_INT_STRING,
+ /* ')' */ STRINGPARSER_STATE_INT_STRING,
+ /* '*' */ STRINGPARSER_STATE_INT_STRING,
+ /* '+' */ STRINGPARSER_STATE_INT_STRING,
+ /* ',' */ STRINGPARSER_STATE_INT_STRING,
+ /* '-' */ STRINGPARSER_STATE_INT_STRING,
+ /* '.' */ STRINGPARSER_STATE_INT_STRING,
+ /* '/' */ STRINGPARSER_STATE_INT_STRING,
+ /* '0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '9' */ STRINGPARSER_STATE_INT_STRING,
+ /* ':' */ STRINGPARSER_STATE_INT_STRING,
+ /* ';' */ STRINGPARSER_STATE_INT_STRING,
+ /* '<' */ STRINGPARSER_STATE_INT_STRING,
+ /* '=' */ STRINGPARSER_STATE_INT_STRING,
+ /* '>' */ STRINGPARSER_STATE_INT_STRING,
+ /* '?' */ STRINGPARSER_STATE_INT_STRING,
+ /* '@' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'A' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'B' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'C' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'D' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'E' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'F' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'G' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'H' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'I' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'J' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'K' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'L' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'M' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'N' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'O' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'P' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'Q' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'R' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'S' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'T' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'U' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'V' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'W' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'X' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'Y' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'Z' */ STRINGPARSER_STATE_INT_STRING,
+ /* '[' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\\' */ STRINGPARSER_STATE_INT_STRING_ESCAPE,
+ /* ']' */ STRINGPARSER_STATE_INT_STRING,
+ /* '^' */ STRINGPARSER_STATE_INT_STRING,
+ /* '_' */ STRINGPARSER_STATE_INT_STRING,
+ /* '`' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'a' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'b' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'c' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'd' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'e' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'f' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'g' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'h' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'i' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'j' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'k' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'l' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'm' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'n' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'o' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'p' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'q' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'r' */ STRINGPARSER_STATE_INT_STRING,
+ /* 's' */ STRINGPARSER_STATE_INT_STRING,
+ /* 't' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'u' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'v' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'w' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'x' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'y' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'z' */ STRINGPARSER_STATE_INT_STRING,
+ /* '{' */ STRINGPARSER_STATE_INT_STRING,
+ /* '|' */ STRINGPARSER_STATE_INT_STRING,
+ /* '}' */ STRINGPARSER_STATE_INT_STRING,
+ /* '~' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x7f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x80' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x81' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x82' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x83' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x84' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x85' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x86' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x87' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x88' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x89' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8a' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8d' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x90' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x91' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x92' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x93' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x94' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x95' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x96' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x97' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x98' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x99' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9a' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9d' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xaa' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xab' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xac' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xad' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xae' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xaf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xba' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbe' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xca' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xce' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xda' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xde' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xea' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xeb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xec' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xed' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xee' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xef' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfa' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfe' */ STRINGPARSER_STATE_INT_STRING
+};
+
+static const int stringparser_transition_row_string_escape[] = {
+ /* '\x00' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x01' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x02' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x03' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x04' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x05' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x06' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x07' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x08' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\t' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\n' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\r' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x10' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x11' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x12' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x13' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x14' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x15' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x16' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x17' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x18' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x19' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1a' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1d' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1f' */ STRINGPARSER_STATE_INT_STRING,
+ /* ' ' */ STRINGPARSER_STATE_INT_STRING,
+ /* '!' */ STRINGPARSER_STATE_INT_STRING,
+ /* '"' */ STRINGPARSER_STATE_INT_STRING,
+ /* '#' */ STRINGPARSER_STATE_INT_STRING,
+ /* '$' */ STRINGPARSER_STATE_INT_STRING,
+ /* '%' */ STRINGPARSER_STATE_INT_STRING,
+ /* '&' */ STRINGPARSER_STATE_INT_STRING,
+ /* "'" */ STRINGPARSER_STATE_INT_STRING,
+ /* '(' */ STRINGPARSER_STATE_INT_STRING,
+ /* ')' */ STRINGPARSER_STATE_INT_STRING,
+ /* '*' */ STRINGPARSER_STATE_INT_STRING,
+ /* '+' */ STRINGPARSER_STATE_INT_STRING,
+ /* ',' */ STRINGPARSER_STATE_INT_STRING,
+ /* '-' */ STRINGPARSER_STATE_INT_STRING,
+ /* '.' */ STRINGPARSER_STATE_INT_STRING,
+ /* '/' */ STRINGPARSER_STATE_INT_STRING,
+ /* '0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '9' */ STRINGPARSER_STATE_INT_STRING,
+ /* ':' */ STRINGPARSER_STATE_INT_STRING,
+ /* ';' */ STRINGPARSER_STATE_INT_STRING,
+ /* '<' */ STRINGPARSER_STATE_INT_STRING,
+ /* '=' */ STRINGPARSER_STATE_INT_STRING,
+ /* '>' */ STRINGPARSER_STATE_INT_STRING,
+ /* '?' */ STRINGPARSER_STATE_INT_STRING,
+ /* '@' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'A' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'B' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'C' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'D' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'E' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'F' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'G' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'H' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'I' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'J' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'K' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'L' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'M' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'N' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'O' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'P' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'Q' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'R' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'S' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'T' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'U' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'V' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'W' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'X' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'Y' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'Z' */ STRINGPARSER_STATE_INT_STRING,
+ /* '[' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\\' */ STRINGPARSER_STATE_INT_STRING,
+ /* ']' */ STRINGPARSER_STATE_INT_STRING,
+ /* '^' */ STRINGPARSER_STATE_INT_STRING,
+ /* '_' */ STRINGPARSER_STATE_INT_STRING,
+ /* '`' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'a' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'b' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'c' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'd' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'e' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'f' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'g' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'h' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'i' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'j' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'k' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'l' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'm' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'n' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'o' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'p' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'q' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'r' */ STRINGPARSER_STATE_INT_STRING,
+ /* 's' */ STRINGPARSER_STATE_INT_STRING,
+ /* 't' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'u' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'v' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'w' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'x' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'y' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'z' */ STRINGPARSER_STATE_INT_STRING,
+ /* '{' */ STRINGPARSER_STATE_INT_STRING,
+ /* '|' */ STRINGPARSER_STATE_INT_STRING,
+ /* '}' */ STRINGPARSER_STATE_INT_STRING,
+ /* '~' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x7f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x80' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x81' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x82' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x83' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x84' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x85' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x86' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x87' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x88' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x89' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8a' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8d' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x90' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x91' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x92' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x93' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x94' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x95' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x96' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x97' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x98' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x99' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9a' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9d' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xaa' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xab' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xac' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xad' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xae' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xaf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xba' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbe' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xca' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xce' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xda' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xde' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xea' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xeb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xec' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xed' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xee' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xef' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfa' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfe' */ STRINGPARSER_STATE_INT_STRING
+};
+
+static const int * stringparser_state_transitions[] = {
+ stringparser_transition_row_text,
+ stringparser_transition_row_string,
+ stringparser_transition_row_string_escape
+};
+
diff --git a/src/tests/htmlparser_testdata/sample_fsm.config b/src/tests/htmlparser_testdata/sample_fsm.config
new file mode 100644
index 0000000..df66e69
--- /dev/null
+++ b/src/tests/htmlparser_testdata/sample_fsm.config
@@ -0,0 +1,64 @@
+# Copyright (c) 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ---
+#
+# Author: falmeida@google.com (Filipe Almeida)
+
+name = 'stringparser'
+
+comment = 'Parses C style strings'
+
+condition('dq', '\\"'),
+condition('backslash', '\\\\'),
+condition('default', '[:default:]')
+
+# Outside a string
+state(name = 'text',
+ external = 'text',
+ transitions = [
+ ['dq', 'string'],
+ ['default', 'text']
+ ])
+
+# String literal
+state(name = 'string',
+ external = 'string',
+ transitions = [
+ ['backslash', 'string_escape'],
+ ['dq', 'text'],
+ ['default', 'string']
+ ])
+
+# Escaped character in a string literal. Ignore the next character
+state(name = 'string_escape',
+ external = 'string',
+ transitions = [
+ ['default', 'string']
+ ])
+
diff --git a/src/tests/htmlparser_testdata/simple.html b/src/tests/htmlparser_testdata/simple.html
new file mode 100644
index 0000000..555928f
--- /dev/null
+++ b/src/tests/htmlparser_testdata/simple.html
@@ -0,0 +1,26 @@
+<html>
+ <body>
+ <?state state=text,tag=body?>
+ <a href="<?state state=value,tag=a?>">test</a>
+
+ <test test123=<?state state=value, tag=test, attr=test123,
+ attr_type=regular ?>>
+
+ <?state state=text?>
+
+ <body blah='<?state state=value, tag=body, attr=blah, attr_type=regular
+ ?>'>
+
+ <style>
+ <?state in_css=true?>
+ </style>
+ <?state in_css=false?>
+
+ <h1 onclick="<?state state=value, tag=h1, attr=onclick, attr_type=js,
+ in_js=true ?>" style="<?state in_css=true?>" <?state in_css=false?>>
+ <?state state=text, tag=h1?>
+ </h1>
+
+
+ </body>
+</html>
diff --git a/src/tests/htmlparser_testdata/tags.html b/src/tests/htmlparser_testdata/tags.html
new file mode 100644
index 0000000..1caf68d
--- /dev/null
+++ b/src/tests/htmlparser_testdata/tags.html
@@ -0,0 +1,214 @@
+<html>
+
+<body blah='<?state state=value, tag=body, attr=blah, attr_type=regular,
+attr_quoted=true ?>'>
+
+<?state state=text, tag=body ?>
+<a href=<?state state=value, tag=a, attr=href, attr_type=uri ?>><?state state=text, tag=a ?></a>
+<a href=
+ "<?state state=value, tag=a, attr=href, attr_type=uri, attr_quoted=true ?>"></a>
+
+<a href=<?state state=value, tag=a, attr=href, attr_type=uri ?> blah=x></a>
+<a href=
+ "<?state state=value, tag=a, attr=href, attr_type=uri ?>" blah=x></a>
+
+<a href=
+ <?state state=value, tag=a, attr=href, attr_type=uri, attr_quoted=false ?> blah=x></a>
+
+<a href><?state state=text, tag=a ?></a>
+
+<a href=x<?state state=value, tag=a, attr=href, attr_type=uri ?> <?state state=tag, tag=a ?>></a>
+
+<a href =<?state state=value, tag=a, attr=href, attr_type=uri ?>></a>
+<a href
+=<?state state=value, tag=a, attr=href, attr_type=uri ?>></a>
+<a href
+ =<?state state=value, tag=a, attr=href, attr_type=uri ?>></a>
+
+<?state state=text?>
+
+<b font=<?state state=value, value_index=0?>></b>
+<b font=x<?state state=value, value_index=1?>></b>
+<b font='<?state state=value, value_index=0?>'></b>
+<b font='x<?state state=value, value_index=1?>'></b>
+
+<!-- XML Processing instruction -->
+
+<?example <?state state=text?> <a href=<?state state=text?>></a
+ <script>
+ <?state state=text, in_js=false?>
+ </script>
+?>
+
+<a href=http://www.google.com/<?state state=value, tag=a, attr=href, attr_type=uri ?>?q=tt<?state state=value, tag=a, attr=href, attr_type=uri ?>>test</a>
+
+<!-- Test javascript url handling -->
+<a href="test<?state value=test, in_js=false ?>">test</a>
+<a href="javascript<?state value=javascript, in_js=false ?>">test</a>
+<a href="javascript:<?state value=javascript:, in_js=false ?>">test</a>
+<a href="javascript:alert('<?state in_js=false ?>">test</a>
+<a href="http:<?state value=http:, in_js=false ?>">test</a>
+<a href="http://www.google.com"
+ alt="javascript:<?state value=javascript:, in_js=false ?>">test</a>
+
+<!-- Test calls to TemplateDirective() -->
+<b font=<?state state=value?>
+ color<?state state=value?>></b>
+
+<b font=<?state state=value?><?state insert_text=true?>
+ color<?state state=attr?>></b>
+
+<b font="<?state state=value?><?state insert_text=true?>
+ color<?state state=value?>"></b>
+
+<a href=
+ <?state state=value?><?state insert_text=true?> alt<?state state=attr?>>
+ link
+</a>
+
+<b font=<?state state=value?>><?state state=text, tag=b?></b>
+
+<!-- Large invalid HTML entity -->
+<a onclick="&testtesttesttesttesttesttesttesttesttesttesttest;"
+ href="http://www.google.com/"></a>
+
+<!-- URI attributes. The attribute list can be found in
+ htmlparser.c:is_uri_attribute() -->
+<a target="<?state attr_type=regular?>"></a>
+<!-- -->
+<form action="<?state attr_type=uri?>"></form>
+<applet archive="<?state attr_type=uri?>"></applet>
+<blockquote cite="<?state attr_type=uri?>"></blockquote>
+<object classid="<?state attr_type=uri?>"></object>
+<object codebase="<?state attr_type=uri?>"></object>
+<object data="<?state attr_type=uri?>"></object>
+<img dynsrc="<?state attr_type=uri?>"></img>
+<a href="<?state attr_type=uri?>"></a>
+<img longdesc="<?state attr_type=uri?>"></img>
+<img src="<?state attr_type=uri?>"></img>
+<img usemap="<?state attr_type=uri?>"></img>
+<!-- -->
+<form style="x" action="<?state attr_type=uri?>"></form>
+<applet style="x" archive="<?state attr_type=uri?>"></applet>
+<blockquote style="x" cite="<?state attr_type=uri?>"></blockquote>
+<object style="x" classid="<?state attr_type=uri?>"></object>
+<object style="x" codebase="<?state attr_type=uri?>"></object>
+<object style="x" data="<?state attr_type=uri?>"></object>
+<img style="x" dynsrc="<?state attr_type=uri?>"></img>
+<a style="x" href="<?state attr_type=uri?>"></a>
+<img style="x" longdesc="<?state attr_type=uri?>"></img>
+<img style="x" src="<?state attr_type=uri?>"></img>
+<img style="x" usemap="<?state attr_type=uri?>"></img>
+<!-- -->
+<img alt="<?state attr_type=regular?>"></a>
+
+
+<!-- Style attributes as returned by htmlparser.c:is_style_attribute() -->
+<a target="<?state attr_type=regular?>"></a>
+<!-- -->
+<b style="<?state attr_type=style?>"></b>
+<!-- -->
+<a target="<?state attr_type=regular?>"></a>
+
+<!-- Big attribute value. We can't do prefix checking right now so we can't
+ validate the contents of the value here, although statemachine_test.c has
+ a test for that. -->
+
+<a href="http://www.google.com/"
+ alt="01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ <?state state=value, attr_quoted=true, tag=a, attr=alt?>"></a>
+
+<?state state=text?>
+
+<!-- is_url_start tests -->
+
+<a href="<?state is_url_start=true?>"></a>
+<a href="http://<?state is_url_start=false?>"></a>
+<a href="http://www.google.com?q=<?state is_url_start=false?>"></a>
+<b font="<?state is_url_start=false?>"></b>
+<b font="http://www.google.com?q=<?state is_url_start=false?>"></b>
+<?state is_url_start=false?>
+
+<!-- <?state is_url_start=false?> -->
+
+<!-- Tag opening tests -->
+
+<a <?state state=tag?>></a><?state state=text?>
+<br <?state state=tag?>></br><?state state=text?>
+< br <?state state=text?>></br><?state state=text?>
+<< <?state state=text?>><?state state=text?>
+< <?state state=text?> alt="<?state state=text?>">
+</blah <?state state=tag?>><?state state=text?>
+<<i<?state state=tag?>><?state state=text?></i>
+
+
+<!-- We do allow numbers to open html tags, which is not how most browsers
+behave. We still test this anyway. -->
+<0 <?state state=tag?>><?state state=text?>
+<1 <?state state=tag?>><?state state=text?>
+
+<!-- meta redirect url tests. -->
+<meta http-equiv="refresh" content="5;URL=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="10;URL=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="5 ;URL=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content=" 5 ;URL=<?state attr_type=uri, is_url_start=true?>">
+<?state attr_type=none, is_url_start=false?>
+<meta http-equiv="refresh" content=" 5 ; url = <?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="5;Url=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="5;UrL=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="5;uRL=<?state attr_type=uri, is_url_start=true?>">
+<?state attr_type=none, is_url_start=false?>
+<meta http-equiv="refresh" content="5;uRL=http://<?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content="5 ; URL=http://www.google.com/<?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;URL=/<?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;URL=../<?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content=" 123456789 ; url = ../<?state attr_type=uri, is_url_start=false?>">
+
+<!-- Quoted url's -->
+<meta http-equiv="refresh" content="5;URL = '<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content='5;URL = "<?state attr_type=uri, is_url_start=true?>"'>
+<meta http-equiv="refresh" content="5;URL = ' <?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content='5;URL = " <?state attr_type=uri, is_url_start=false?>"'>
+
+<?state attr_type=none, is_url_start=false?>
+
+<meta http-equiv="refresh" content="5x;URL=<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;<?state attr_type=regular, is_url_start=false?>">
+<?state attr_type=none, is_url_start=false?>
+<meta http-equiv="refresh" content="5;U<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;UR<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;URL<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;URL <?state attr_type=regular, is_url_start=false?>">
+<?state attr_type=none, is_url_start=false?>
+<meta http-equiv="refresh" content="5x;URL= <?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;UR L <?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="URL = <?state attr_type=regular, is_url_start=false?>">
+
+<meta http-equiv="refresh" content="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA <?state attr_type=regular?>">
+
+<span a:type="<?state state=value, attr=a:type?>"
+ a:abc.abc="<?state state=value, attr=a:abc.abc?>"
+ b:a.b.c.d.e.f=<?state state=value, attr=b:a.b.c.d.e.f?>>
+
+<tag.test>
+<?state state=text, tag=tag.test?>
+</tag.test>
+
+<!-- Tests regarding our specific implementation -->
+<meta content="5;URL=<?state attr_type=uri, is_url_start=true?>">
+
+</body>
+
+</html>
+<?state state=text ?>