Squashed 'third_party/ctemplate/' content from commit 6742f62

Change-Id: I828e4e4c906f13ba19944d78a8a78652b62949af
git-subtree-dir: third_party/ctemplate
git-subtree-split: 6742f6233db12f545e90baa8f34f5c29c4eb396a
diff --git a/src/tests/htmlparser_testdata/cdata.html b/src/tests/htmlparser_testdata/cdata.html
new file mode 100644
index 0000000..817938b
--- /dev/null
+++ b/src/tests/htmlparser_testdata/cdata.html
@@ -0,0 +1,112 @@
+<html>
+<?state state=text, tag=html ?>
+
+  <head>
+    <?state state=text, tag=head ?>
+    <!-- Title element with markup -->
+    <title>
+      <?state state=text, tag=title ?>
+      <h1>
+        <?state state=text, tag=title ?>
+      </h1>
+      <!--
+        <?state state=text, tag=title ?>
+        </title>
+        <?state state=text, tag=title ?>
+      -->
+      <?state state=text, tag=title ?>
+    </title>
+    <?state state=text ?>
+
+    <!-- Style element with attributes -->
+    <style a=b>
+      <b><?state state=text, tag=style, in_js=false, in_css=true?></b>
+    </style>
+    <?state in_css=false?>
+  </head>
+<body>
+<?state state=text, in_js=false ?>
+  <!-- PCDATA nested block -->
+  <b>
+    <?state state=text, tag=b ?>
+    <i>
+      <?state state=text, tag=i ?>
+    </i>
+    <?state state=text ?>
+  </b>
+  <?state state=text ?>
+
+  <!-- Textarea element with space at the end of the closing tag -->
+  <textarea>
+  <?state state=text, tag=textarea ?>
+    <b>
+    <?state state=text, tag=textarea ?>
+      <i>
+      <?state state=text, tag=textarea, in_css=false ?>
+      <!--
+        <?state state=text, tag=textarea ?>
+        </textarea>
+        <?state state=text, tag=textarea ?>
+      -->
+      </i>
+      <?state state=text, tag=textarea ?>
+    </b>
+    <?state state=text, tag=textarea ?>
+  </textarea >
+
+<?state state=text ?>
+
+  <!-- script tag with other tags inside -->
+  <script>
+    document.write("
+      <?state in_js=true, js_quoted=true, tag=script ?>
+      <style>
+        .none { display:none }
+      </style>
+      <?state in_js=true, js_quoted=true ?>
+    ");
+    <?state in_js=true, js_quoted=false ?>
+  </script>
+
+  <?state in_js=false ?>
+
+  <!-- script tag with a backslash quoted script tag -->
+  <script>
+    <?state in_js=true, js_quoted=false ?>
+    document.body.innerHTML = '<script><\/script>'
+    <?state in_js=true, js_quoted=false ?>
+  </script>
+
+  <?state in_js=false ?>
+
+  <!-- </script> appearing between javascript comments -->
+  <script>
+  <!--
+    <?state in_js=true, js_quoted=false ?>
+    document.body.innerHTML = '<script></script>'
+    <?state in_js=true, js_quoted=false ?>
+  -->
+  </script>
+
+  <?state in_js=false ?>
+
+  <!-- Closing script with an extra space at the end of the tag. Some browsers
+  ignore this tag and some browsers honour it. We honour it. -->
+  <script>
+    <?state in_js=true, js_quoted=false ?>
+    document.body.innerHTML = '<script><\/script>'
+    <?state in_js=true, js_quoted=false ?>
+  </script >
+
+  <script>
+    <?state in_js=true, js_quoted=false ?>
+    </script%>
+    <?state in_js=true, js_quoted=false ?>
+  </script >
+
+  <?state in_js=false ?>
+
+</body>
+<?state in_js=false ?>
+</html>
+
diff --git a/src/tests/htmlparser_testdata/comments.html b/src/tests/htmlparser_testdata/comments.html
new file mode 100644
index 0000000..391f3f0
--- /dev/null
+++ b/src/tests/htmlparser_testdata/comments.html
@@ -0,0 +1,61 @@
+<!-- Tests for HTML comments and cdata escaping text spans. -->
+<html>
+
+<body>
+
+<?state state=text, tag=body ?>
+
+<!-- HTML doctype declaration -->
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+<?state state=text, tag=body?>
+"http://www.w3.org/TR/html4/strict.dtd">
+<?state state=text, tag=body ?>
+
+<!-- Regular HTML comment -->
+<!-- <?state state=comment, tag=body ?> -->
+<?state state=text, tag=body ?>
+
+<!-- HTML comment with tags -->
+<!-- > -> </b> <a href="<?state state=comment, tag=body ?>"></a>-->
+<?state state=text, tag=body ?>
+
+<!-- Should not be interpreted as an SGML comment -->
+<?state state=text, tag=body ?>
+<!-- -- -->
+<?state state=text, tag=body ?>
+
+<!-- -- Sync back the SGML comment for editors who parse SGML comments
+(ie: vim) -->
+<?state state=text, tag=body ?>
+
+<!-- Multiple dashes at the end. -->
+<!----- <?state state=comment, tag=body ?> --><?state state=text, tag=body ?>
+<!----- <?state state=comment, tag=body ?> ---><?state state=text, tag=body ?>
+<!----- <?state state=comment, tag=body ?> ----><?state state=text, tag=body ?>
+<!----- <?state state=comment, tag=body ?> -----><?state state=text, tag=body ?>
+
+<!-- Some more misc tests -->
+<!-- test <?state state=comment?> --><?state state=text?> test test --><?state state=text?>
+<!-- test -> test test --><?state state=text?>
+<!-- test test='--><?state state=text?>'
+<!----><?state state=text?>
+<!-----><?state state=text?>
+
+<!-- Make sure the double dash sequence is not interpreted as an SGML comment
+by introducing a legit postfix decrement operator -->
+<?state state=text, in_js=false ?>
+<script>
+<!--
+<?state state=text, in_js=true ?>
+
+var x = 1;
+x--;
+<?state state=text, in_js=true ?>
+-->
+</script>
+<?state state=text, in_js=false ?>
+
+</body>
+
+</html>
+<?state state=text ?>
diff --git a/src/tests/htmlparser_testdata/context.html b/src/tests/htmlparser_testdata/context.html
new file mode 100644
index 0000000..aaaaa46
--- /dev/null
+++ b/src/tests/htmlparser_testdata/context.html
@@ -0,0 +1,79 @@
+<!-- Tests for CopyFrom() -->
+<html>
+  <body>
+    <?state save_context=body?>
+    <?state tag=body?>
+    <h1>
+      <?state save_context=h1?>
+      <?state tag=h1?>
+      <?state load_context=body?>
+    <?state tag=body?>
+
+    <a href="http://www.google.com<?state save_context=href?>"></a>
+
+    <script>
+      <?state save_context=js?>
+      var x ='<?state save_context=js_str_literal?>
+      <?state load_context=href?><?state state=value,
+                                         tag=a,
+                                         attr=href,
+                                         in_js=false,
+                                         value=http://www.google.com?>
+    <?state load_context=js_str_literal?>
+    <?state state=text,
+            tag=script,
+            in_js=true,
+            js_quoted=true?>';
+
+    // Regexp handling
+    var expression = 10 / <?state save_context=js_expression?> / <?state save_context=js_regexp?> /;
+
+    <?state load_context=js_expression?><?state js_state=text?>
+    <?state load_context=js_regexp?><?state js_state=regexp?> /;
+    <?state js_state=text?>
+
+  </script>
+  <?state in_js=false?>
+  <?state load_context=js?>
+  <?state tag=script, js_state=text, in_js=true?>
+  </script>
+
+  <!-- html encoded script attribute -->
+  <a onclick="alert(&#39;<?state save_context=onclick_str_literal?>'"></a>
+  <?state in_js=false?>
+  <?state load_context=onclick_str_literal?><?state state=value,
+                                                    tag=a,
+                                                    attr=onclick,
+                                                    attr_type=js,
+                                                    in_js=true,
+                                                    js_quoted=true?>'">
+    <?state state=text, tag=a?>
+  </a>
+
+<!-- ResetMode() tests -->
+
+<?state reset_mode=css?>
+<?state in_css=true?>
+<?state state=css_file?>
+<?state save_context=mode_css?>
+
+<?state reset_mode=html?>
+<?state state=text?>
+<?state in_css=false?>
+<?state load_context=mode_css?>
+<?state in_css=true?>
+<?state state=css_file?>
+
+<?state reset_mode=html_in_tag?>blah=<?state save_context=in_tag?>
+<?state load_context=onclick_str_literal?><?state state=value,
+                                                  tag=a,
+                                                  attr=onclick,
+                                                  attr_type=js,
+                                                  in_js=true,
+                                                  js_quoted=true?>'">
+<?state load_context=in_tag?>
+<?state attr=blah?>xpto<?state value=xpto?>
+
+
+  </body>
+</html>
diff --git a/src/tests/htmlparser_testdata/google.html b/src/tests/htmlparser_testdata/google.html
new file mode 100644
index 0000000..45dddd8
--- /dev/null
+++ b/src/tests/htmlparser_testdata/google.html
@@ -0,0 +1,3 @@
+<html><head><meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"><title>Google</title><style>body,td,a,p,.h{font-family:arial,sans-serif}.h{font-size:20px}.h{color:#3366cc}.q{color:#00c}.ts td{padding:0}.ts{border-collapse:collapse}#gbar{height:22px;padding-left:2px}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}#gbi,#gbs{background:#fff;left:0;position:absolute;top:24px;visibility:hidden;z-index:1000}#gbi{border:1px solid;border-color:#c9d7f1 #36c #36c #a2bae7;z-index:1001}#guser{padding-bottom:7px !important}#gbar,#guser{font-size:13px;padding-top:1px !important}@media all{.gb1,.gb3{height:22px;margin-right:.73em;vertical-align:top}#gbar{float:left}}.gb2{display:block;padding:.2em .5em}a.gb1,a.gb2,a.gb3{color:#00c !important}.gb2,.gb3{text-decoration:none}a.gb2:hover{background:#36c;color:#fff !important}</style><script>window.google={kEI:"jigHScf6BKDwswP7-eSsAw",kEXPI:"17259,19016",kHL:"en"};
+function sf(){document.f.q.focus()}
+window.gbar={};(function(){var b=window.gbar,f,h;b.qs=function(a){var c=window.encodeURIComponent&&(document.forms[0].q||"").value;if(c)a.href=a.href.replace(/([?&])q=[^&]*|$/,function(i,g){return(g||"&")+"q="+encodeURIComponent(c)})};function j(a,c){a.visibility=h?"hidden":"visible";a.left=c+"px"}b.tg=function(a){a=a||window.event;var c=0,i,g=window.navExtra,d=document.getElementById("gbi"),e=a.target||a.srcElement;a.cancelBubble=true;if(!f){f=document.createElement(Array.every||window.createPopup?"iframe":"div");f.frameBorder="0";f.src="#";d.parentNode.appendChild(f).id="gbs";if(g)for(i in g)d.insertBefore(g[i],d.firstChild).className="gb2";document.onclick=b.close}if(e.className!="gb3")e=e.parentNode;do c+=e.offsetLeft;while(e=e.offsetParent);j(d.style,c);f.style.width=d.offsetWidth+"px";f.style.height=d.offsetHeight+"px";j(f.style,c);h=!h};b.close=function(a){h&&b.tg(a)}})();</script></head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onload="sf();if(document.images){new Image().src='/images/nav_logo3.png'}" topmargin=3 marginheight=3><div id=gbar><nobr><b class=gb1>Web</b> <a href="http://images.google.com/imghp?hl=en&tab=wi" onclick=gbar.qs(this) class=gb1>Images</a> <a href="http://maps.google.com/maps?hl=en&tab=wl" onclick=gbar.qs(this) class=gb1>Maps</a> <a href="http://news.google.com/nwshp?hl=en&tab=wn" onclick=gbar.qs(this) class=gb1>News</a> <a href="http://www.google.com/prdhp?hl=en&tab=wf" onclick=gbar.qs(this) class=gb1>Shopping</a> <a href="http://mail.google.com/mail/?hl=en&tab=wm" class=gb1>Gmail</a> <a href="http://www.google.com/intl/en/options/" onclick="this.blur();gbar.tg(event);return !1" class=gb3><u>more</u> <small>&#9660;</small></a><div id=gbi> <a href="http://video.google.com/?hl=en&tab=wv" onclick=gbar.qs(this) class=gb2>Video</a> <a href="http://groups.google.com/grphp?hl=en&tab=wg" onclick=gbar.qs(this) class=gb2>Groups</a> <a href="http://books.google.com/bkshp?hl=en&tab=wp" onclick=gbar.qs(this) class=gb2>Books</a> <a href="http://scholar.google.com/schhp?hl=en&tab=ws" onclick=gbar.qs(this) class=gb2>Scholar</a> <a href="http://finance.google.com/finance?hl=en&tab=we" onclick=gbar.qs(this) class=gb2>Finance</a> <a href="http://blogsearch.google.com/?hl=en&tab=wb" onclick=gbar.qs(this) class=gb2>Blogs</a> <div class=gb2><div class=gbd></div></div> <a href="http://www.youtube.com/?hl=en&tab=w1" onclick=gbar.qs(this) class=gb2>YouTube</a> <a href="http://www.google.com/calendar/render?hl=en&tab=wc" class=gb2>Calendar</a> <a href="http://picasaweb.google.com/home?hl=en&tab=wq" onclick=gbar.qs(this) class=gb2>Photos</a> <a href="http://docs.google.com/?hl=en&tab=wo" class=gb2>Documents</a> <a href="http://www.google.com/reader/view/?hl=en&tab=wy" class=gb2>Reader</a> <a href="http://sites.google.com/?hl=en&tab=w3" class=gb2>Sites</a> <div class=gb2><div class=gbd></div></div> <a href="http://www.google.com/intl/en/options/" class=gb2>even more &raquo;</a></div> </nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div><div align=right id=guser style="font-size:84%;padding:0 0 4px" width=100%><nobr><a href="/url?sa=p&pref=ig&pval=3&q=http://www.google.com/ig%3Fhl%3Den%26source%3Diglk&usg=AFQjCNFA18XPfgb7dKnXfKz7x7g1GDH1tg">iGoogle</a> | <a href="https://www.google.com/accounts/Login?continue=http://www.google.com/&hl=en">Sign in</a></nobr></div><center><br clear=all id=lgpd><img alt="Google" height=110 src="/intl/en_ALL/images/logo.gif" width=276><br><br><form action="/search" name=f><table cellpadding=0 cellspacing=0><tr valign=top><td width=25%>&nbsp;</td><td align=center nowrap><input name=hl type=hidden value=en><input type=hidden name=ie value="ISO-8859-1"><input autocomplete="off" maxlength=2048 name=q size=55 title="Google Search" value=""><br><input name=btnG type=submit value="Google Search"><input name=btnI type=submit value="I'm Feeling Lucky"></td><td nowrap width=25%><font size=-2>&nbsp;&nbsp;<a href=/advanced_search?hl=en>Advanced Search</a><br>&nbsp;&nbsp;<a href=/preferences?hl=en>Preferences</a><br>&nbsp;&nbsp;<a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><font size=-1><font color=red>New!</font> The G1 phone is on sale now. <a href="/aclk?sa=L&ai=BuJQcgigHSbvbCqDUsAPGm6X7DvPUz3en34zVCcHZnNkT0IYDEAEYASDBVDgAUJL0-Mb8_____wFgyQY&num=1&sig=AGiWqtxZNijZyCsNtIwkfSx_S1WSW0Uh8A&q=http://www.google.com/intl/en_us/mobile/android/hpp.html">Learn more</a>.</font><br><br><br><font size=-1><a href="/intl/en/ads/">Advertising&nbsp;Programs</a> - <a href="/services/">Business Solutions</a> - <a href="/intl/en/about.html">About Google</a></font><p><font size=-2>&copy;2008 - <a href="/intl/en/privacy.html">Privacy</a></font></p></center></body><script>google.y={first:[]};window.setTimeout(function(){var xjs=document.createElement('script');xjs.src='/extern_js/f/CgJlbhICdXMgACswCjgILCswGDgDLA/Vh5nhw3Xn6A.js';document.getElementsByTagName('head')[0].appendChild(xjs)},0);google.y.first.push(function(){google.ac.i(document.f,document.f.q,'','')})</script></html>
\ No newline at end of file
diff --git a/src/tests/htmlparser_testdata/javascript_attribute.html b/src/tests/htmlparser_testdata/javascript_attribute.html
new file mode 100644
index 0000000..db096f0
--- /dev/null
+++ b/src/tests/htmlparser_testdata/javascript_attribute.html
@@ -0,0 +1,27 @@
+<html>
+<body>
+
+<a onclick="alert(&#39;<?state state=value, tag=a, attr=onclick, attr_type=js,
+in_js=true, js_quoted=true?> x&#x27;) &; &a; &x;/*blah <?state state=value,
+tag=a, attr=onclick, attr_type=js, in_js=true ?> */ "></a>
+
+<?state state=text, in_js=false ?>
+
+<a onmouseover='alert(document.domain<?state state=value, tag=a,
+attr=onmouseover, attr_type=js, in_js=true ?>)'>test</a>
+
+<?state state=text, in_js=false ?>
+
+<a onmouseover="">test</a>
+
+<?state state=text, in_js=false ?>
+
+<a onclick="<?state in_js=true, js_quoted=false?>">test</a>
+<?state state=text, in_js=false ?>
+
+<a onclick="'<?state in_js=true, js_quoted=true?>">test</a>
+<?state state=text, in_js=false ?>
+
+</body>
+</html>
+<?state state=text ?><?state state=text ?>
diff --git a/src/tests/htmlparser_testdata/javascript_block.html b/src/tests/htmlparser_testdata/javascript_block.html
new file mode 100644
index 0000000..539c1a6
--- /dev/null
+++ b/src/tests/htmlparser_testdata/javascript_block.html
@@ -0,0 +1,50 @@
+<html>
+<body>
+
+<script>
+
+x < 1;
+
+<?state state=text, tag=script, in_js=true ?>
+
+</script>
+
+<?state state=text?>
+
+<script>
+//<!--
+var x = 1;
+<?state state=text, tag=script, in_js=true ?>
+//--> </script>
+
+<?state state=text?>
+
+<script> //<!--
+var x = 1;
+<?state state=text, tag=script, in_js=true ?>
+</script>
+<?state state=text, tag=script, in_js=true ?>
+//--> </script>
+
+<?state state=text?>
+
+<script>
+<!--
+var x = 1;
+<?state state=text, tag=script, in_js=true ?>
+</script>
+<?state state=text, tag=script, in_js=true ?>
+-->
+</script>
+
+<?state state=text?>
+
+<script><?state tag=script, in_js=true?> </script><?state in_js=false?>
+<script><?state tag=script, in_js=true, js_quoted=false?></script><?state in_js=false?>
+<script>'<?state tag=script, in_js=true, js_quoted=true?></script><?state in_js=false?>
+<script>"<?state tag=script, in_js=true, js_quoted=true?></script><?state in_js=false?>
+
+</body>
+</html>
+<?state state=text ?>
+<?state state=text ?>
diff --git a/src/tests/htmlparser_testdata/javascript_regexp.html b/src/tests/htmlparser_testdata/javascript_regexp.html
new file mode 100644
index 0000000..7c1f88d
--- /dev/null
+++ b/src/tests/htmlparser_testdata/javascript_regexp.html
@@ -0,0 +1,171 @@
+<html>
+<body>
+
+
+<script>
+
+// General regular expression literal synching tests.
+
+var regexp = /x'/;
+<?state state=text, in_js=true, js_quoted=false?>
+
+var string = '<?state state=text, in_js=true, js_quoted=true?>';
+<?state state=text, in_js=true, js_quoted=false?>
+
+var op = 1 / 2;
+var string2 = '<?state state=text, in_js=true, js_quoted=true?>';
+<?state state=text, in_js=true, js_quoted=false?>
+
+return /x'/;
+<?state state=text, in_js=true, js_quoted=false?>
+
+
+// General regular expression state tests
+
+var regexp = / <?state js_state=regexp?> /; <?state js_state=text?>
+
+var a = /"hello/.exec("<?state state=text, in_js=true, js_quoted=true ?>");
+var a = /"hello"/.exec("<?state state=text, in_js=true, js_quoted=true ?>");
+
+var expression = 10 / <?state js_state=text?> / <?state js_state=regexp?> /;
+
+<?state js_state=text?>
+
+var expression2 = / <?state js_state=regexp?> /;
+
+if (window.frames.length < /\d+<?state js_state=regexp?>/.exec(<?state js_state=text?>)[0]) {
+  alert(/ '" <?state js_state=regexp?>/.exec(<?state js_state=text?>)/);
+  var quoted_string = "<?state js_state=dq?>" <?state js_state=text?>;
+}
+
+switch(/ <?state js_state=regexp?> /) { <?state js_state=text?>
+  case / <?state js_state=regexp?> /: <?state js_state=text?>
+    break;
+  case / \/<?state js_state=regexp?> /: <?state js_state=text?>
+    break;
+}
+
+delete / <?state js_state=regexp?> x / <?state js_state=text?>;
+id / <?state js_state=text?> x / <?state js_state=text?>;
+
+function test(/ <?state js_state=regexp?> /) {
+  return / <?state js_state=regexp?> /.exec(<?state js_state=text?>);
+}
+
+function test2(/ <?state js_state=regexp?> /, <?state js_state=text?>) {
+  return / '"<?state js_state=regexp?> /.exec(<?state js_state=text?>);
+}
+
+var a = "/<?state js_state=dq?>"/<?state js_state=text?>;
+
+test in / <?state js_state=regexp?>/;
+min / <?state js_state=text?>;
+IN / <?state js_state=text?>;
+
+3.. /<?state js_state=text?>/;
+0x3./<?state js_state=text?>/;
+
+// Escaping in regular expressions
+
+var a = / blah\/<?state js_state=regexp?>/<?state js_state=text?>,
+/\//<?state js_state=text?>,
+/\/*/<?state js_state=text?> /**/ <?state js_state=text?>,
+
+// Bracket expressions
+var a = [/[/] <?state js_state=regexp?> / <?state js_state=text?>,
+var a = /[/\]/ <?state js_state=regexp?> ]/ <?state js_state=text?>,
+var a = /[/\\]/ <?state js_state=text?>];
+
+/* Unary incremented/decremented variable, followed by a division. */
+
+var w = w++ / 1 <?state js_state=text?>;
+var w = w-- / 1 <?state js_state=text?>;
+
+/* Division after array acessor. */
+var test = xpto[2] / <?state js_state=text?>;
+
+/* Division after parenthesis expression. */
+var test = (2 + 2) / <?state js_state=text?>;
+
+/* Division with comments before the the previous token. */
+var test = x/* test *// <?state js_state=text?>;
+var test = x /* test *// <?state js_state=text?>;
+var test = x/* test */ / <?state js_state=text?>;
+var test = x /* test */ / <?state js_state=text?>;
+var test = x /* test */
+/ <?state js_state=text?>;
+
+var test = x // test
+/ <?state js_state=text?>;
+
+var test = x // test
+ / <?state js_state=text?>;
+
+var test = x // test
+
+/ <?state js_state=text?>;
+
+/* Regexp with multi line comment before the the previous token. */
+var test =/* test *// <?state js_state=regexp?> /;
+var test = /* test *// <?state js_state=regexp?> /;
+var test = /* test *// <?state js_state=regexp?> /;
+var test = /* test */ / <?state js_state=regexp?> /;
+var test = /* test */
+/ <?state js_state=regexp?> /;
+
+var test = // test
+/ <?state js_state=regexp?> /;
+
+var test = // test
+ / <?state js_state=regexp?> /;
+
+var test = // test
+
+/ <?state js_state=regexp?> /;
+
+
+/* Semicolon insertion after a code block */
+function() {} / <?state js_state=regexp?>/
+
+/****************************************************************************
+  Tests that won't pass right now due to design or implementation choices.
+*/
+
+/* Division after a regular expression.
+
+var test = <?nopstate js_state=text?>
+/ <?nopstate js_state=regexp?>
+/ <?nopstate js_state=text?>
+/ <?nopstate js_state=text?>
+/ <?nopstate js_state=regexp?>
+/ <?nopstate js_state=text?>;
+
+*/
+
+/* Division of an object literal
+
+{
+ a: 1,
+ b : 2
+} / <?nopstate js_state=text?>/
+
+*/
+
+/* Unary increment and decrement of regular expressions.
+
+var w = ++/ <?nopstate js_state=regexp?>/i;
+var x = --/ <?nopstate js_state=regexp?>/i
+
+*/
+
+
+</script>
+
+<script>
+
+/ <?state js_state=regexp?> /;
+
+</script>
+
+</body>
+</html>
diff --git a/src/tests/htmlparser_testdata/position.html b/src/tests/htmlparser_testdata/position.html
new file mode 100644
index 0000000..120ca4e
--- /dev/null
+++ b/src/tests/htmlparser_testdata/position.html
@@ -0,0 +1,33 @@
+<?state line_number=1?>
+<?state line_number=2?>
+<html>
+<?state column_number=1?>
+  <body><?state column_number=9?>
+    <?state line_number=6?><?state column_number=28?>
+    <?state
+
+      line_number=7
+    ?><?state column_number=7?>
+    <?state line_number=11?><?state column_number=29?>
+  </body>
+  <?state line_number=13?>
+
+
+
+<?state column_number=1?>
+ <?state column_number=2?>
+  <?state column_number=3?>
+
+
+
+
+
+<a href="http://ww.google.com" onclick="var x=<?state column_number=47?>">
+</a>
+
+<img src="http://www.google.com" onerror="var w = &qu<?state column_number=54?>ot;test&quot;">
+
+
+
+  <?state line_number=32?>
+</html>
diff --git a/src/tests/htmlparser_testdata/reset.html b/src/tests/htmlparser_testdata/reset.html
new file mode 100644
index 0000000..cd0d070
--- /dev/null
+++ b/src/tests/htmlparser_testdata/reset.html
@@ -0,0 +1,31 @@
+<html>
+  <body>
+    <?state state=text, attr_type=none?>
+    <b font="<?state state=value, tag=b, attr=font, attr_quoted=true,
+      in_js=false, attr_type=regular ?>
+<?state reset=true ?>
+<?state state=text, attr_type=none ?>
+<b <?state state=tag, tag=b ?>
+<?state reset_mode=js ?>
+<?state state=js_file?>
+var unquoted =<?state js_quoted=false, in_js=true ?>;
+var single_quoted ='<?state js_quoted=true, in_js=true ?>';
+var unquoted =<?state js_quoted=false, in_js=true ?>;
+<?state reset_mode=html_in_tag?>blah=<?state attr=blah?>xpto<?state value=xpto?>
+test<?state state=attr?>
+<?state reset_mode=html_in_tag?>
+test="test123<?state attr=test, value=test123?>">
+<?state state=text?>
+<?state reset_mode=css?>
+<?state in_css=true?>
+<?state state=css_file?>
+
+<a href="<?state in_css=true?>"></style><?state in_css=true?>
+
+<123 <script><?state in_css=true?>
+
+<?state reset_mode=html?>
+<?state in_css=false?>
+    <?state state=text, attr_type=none?>
+    <b font="<?state state=value, tag=b, attr=font, attr_quoted=true,
+      in_js=false, attr_type=regular ?>
diff --git a/src/tests/htmlparser_testdata/sample_fsm.c b/src/tests/htmlparser_testdata/sample_fsm.c
new file mode 100644
index 0000000..ed85c5f
--- /dev/null
+++ b/src/tests/htmlparser_testdata/sample_fsm.c
@@ -0,0 +1,802 @@
+/* Parses C style strings
+ * Auto generated by generate_fsm.py. Please do not edit.
+ */
+#define STRINGPARSER_NUM_STATES 4
+enum stringparser_state_internal_enum {
+  STRINGPARSER_STATE_INT_TEXT,
+  STRINGPARSER_STATE_INT_STRING,
+  STRINGPARSER_STATE_INT_STRING_ESCAPE
+};
+
+static const int stringparser_states_external[] = {
+  STRINGPARSER_STATE_TEXT,
+  STRINGPARSER_STATE_STRING,
+  STRINGPARSER_STATE_STRING
+};
+
+static const char * stringparser_states_internal_names[] = {
+  "text",
+  "string",
+  "string_escape"
+};
+
+static const int stringparser_transition_row_text[] = {
+      /* '\x00' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x01' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x02' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x03' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x04' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x05' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x06' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x07' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x08' */ STRINGPARSER_STATE_INT_TEXT,
+      /*   '\t' */ STRINGPARSER_STATE_INT_TEXT,
+      /*   '\n' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x0b' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x0c' */ STRINGPARSER_STATE_INT_TEXT,
+      /*   '\r' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x0e' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x0f' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x10' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x11' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x12' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x13' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x14' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x15' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x16' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x17' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x18' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x19' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x1a' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x1b' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x1c' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x1d' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x1e' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x1f' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    ' ' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '!' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '"' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '#' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '$' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '%' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '&' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    "'" */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '(' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    ')' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '*' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '+' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    ',' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '-' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '.' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '/' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '0' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '1' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '2' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '3' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '4' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '5' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '6' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '7' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '8' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '9' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    ':' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    ';' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '<' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '=' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '>' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '?' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '@' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'A' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'B' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'C' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'D' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'E' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'F' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'G' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'H' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'I' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'J' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'K' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'L' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'M' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'N' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'O' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'P' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'Q' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'R' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'S' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'T' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'U' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'V' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'W' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'X' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'Y' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'Z' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '[' */ STRINGPARSER_STATE_INT_TEXT,
+      /*   '\\' */ STRINGPARSER_STATE_INT_STRING,
+      /*    ']' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '^' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '_' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '`' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'a' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'b' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'c' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'd' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'e' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'f' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'g' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'h' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'i' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'j' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'k' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'l' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'm' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'n' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'o' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'p' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'q' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'r' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    's' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    't' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'u' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'v' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'w' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'x' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'y' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    'z' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '{' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '|' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '}' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '~' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x7f' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x80' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x81' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x82' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x83' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x84' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x85' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x86' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x87' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x88' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x89' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x8a' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x8b' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x8c' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x8d' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x8e' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x8f' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x90' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x91' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x92' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x93' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x94' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x95' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x96' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x97' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x98' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x99' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x9a' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x9b' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x9c' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x9d' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x9e' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\x9f' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xa0' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xa1' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xa2' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xa3' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xa4' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xa5' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xa6' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xa7' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xa8' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xa9' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xaa' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xab' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xac' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xad' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xae' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xaf' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xb0' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xb1' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xb2' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xb3' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xb4' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xb5' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xb6' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xb7' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xb8' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xb9' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xba' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xbb' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xbc' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xbd' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xbe' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xbf' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xc0' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xc1' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xc2' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xc3' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xc4' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xc5' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xc6' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xc7' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xc8' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xc9' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xca' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xcb' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xcc' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xcd' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xce' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xcf' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xd0' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xd1' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xd2' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xd3' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xd4' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xd5' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xd6' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xd7' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xd8' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xd9' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xda' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xdb' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xdc' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xdd' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xde' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xdf' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xe0' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xe1' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xe2' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xe3' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xe4' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xe5' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xe6' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xe7' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xe8' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xe9' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xea' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xeb' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xec' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xed' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xee' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xef' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xf0' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xf1' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xf2' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xf3' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xf4' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xf5' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xf6' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xf7' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xf8' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xf9' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xfa' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xfb' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xfc' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xfd' */ STRINGPARSER_STATE_INT_TEXT,
+      /* '\xfe' */ STRINGPARSER_STATE_INT_TEXT
+};
+
+static const int stringparser_transition_row_string[] = {
+      /* '\x00' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x01' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x02' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x03' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x04' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x05' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x06' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x07' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x08' */ STRINGPARSER_STATE_INT_STRING,
+      /*   '\t' */ STRINGPARSER_STATE_INT_STRING,
+      /*   '\n' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x0b' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x0c' */ STRINGPARSER_STATE_INT_STRING,
+      /*   '\r' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x0e' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x0f' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x10' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x11' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x12' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x13' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x14' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x15' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x16' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x17' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x18' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x19' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x1a' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x1b' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x1c' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x1d' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x1e' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x1f' */ STRINGPARSER_STATE_INT_STRING,
+      /*    ' ' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '!' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '"' */ STRINGPARSER_STATE_INT_TEXT,
+      /*    '#' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '$' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '%' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '&' */ STRINGPARSER_STATE_INT_STRING,
+      /*    "'" */ STRINGPARSER_STATE_INT_STRING,
+      /*    '(' */ STRINGPARSER_STATE_INT_STRING,
+      /*    ')' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '*' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '+' */ STRINGPARSER_STATE_INT_STRING,
+      /*    ',' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '-' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '.' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '/' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '0' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '1' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '2' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '3' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '4' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '5' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '6' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '7' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '8' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '9' */ STRINGPARSER_STATE_INT_STRING,
+      /*    ':' */ STRINGPARSER_STATE_INT_STRING,
+      /*    ';' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '<' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '=' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '>' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '?' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '@' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'A' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'B' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'C' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'D' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'E' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'F' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'G' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'H' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'I' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'J' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'K' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'L' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'M' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'N' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'O' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'P' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'Q' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'R' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'S' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'T' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'U' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'V' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'W' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'X' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'Y' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'Z' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '[' */ STRINGPARSER_STATE_INT_STRING,
+      /*   '\\' */ STRINGPARSER_STATE_INT_STRING_ESCAPE,
+      /*    ']' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '^' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '_' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '`' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'a' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'b' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'c' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'd' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'e' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'f' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'g' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'h' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'i' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'j' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'k' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'l' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'm' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'n' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'o' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'p' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'q' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'r' */ STRINGPARSER_STATE_INT_STRING,
+      /*    's' */ STRINGPARSER_STATE_INT_STRING,
+      /*    't' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'u' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'v' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'w' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'x' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'y' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'z' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '{' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '|' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '}' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '~' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x7f' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x80' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x81' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x82' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x83' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x84' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x85' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x86' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x87' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x88' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x89' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x8a' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x8b' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x8c' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x8d' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x8e' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x8f' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x90' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x91' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x92' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x93' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x94' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x95' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x96' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x97' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x98' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x99' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x9a' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x9b' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x9c' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x9d' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x9e' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x9f' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa0' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa1' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa2' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa3' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa4' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa5' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa6' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa7' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa8' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa9' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xaa' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xab' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xac' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xad' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xae' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xaf' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb0' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb1' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb2' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb3' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb4' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb5' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb6' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb7' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb8' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb9' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xba' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xbb' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xbc' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xbd' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xbe' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xbf' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc0' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc1' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc2' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc3' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc4' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc5' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc6' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc7' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc8' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc9' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xca' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xcb' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xcc' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xcd' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xce' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xcf' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd0' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd1' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd2' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd3' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd4' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd5' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd6' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd7' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd8' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd9' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xda' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xdb' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xdc' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xdd' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xde' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xdf' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe0' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe1' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe2' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe3' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe4' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe5' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe6' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe7' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe8' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe9' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xea' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xeb' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xec' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xed' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xee' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xef' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf0' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf1' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf2' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf3' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf4' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf5' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf6' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf7' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf8' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf9' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xfa' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xfb' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xfc' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xfd' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xfe' */ STRINGPARSER_STATE_INT_STRING
+};
+
+static const int stringparser_transition_row_string_escape[] = {
+      /* '\x00' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x01' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x02' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x03' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x04' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x05' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x06' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x07' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x08' */ STRINGPARSER_STATE_INT_STRING,
+      /*   '\t' */ STRINGPARSER_STATE_INT_STRING,
+      /*   '\n' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x0b' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x0c' */ STRINGPARSER_STATE_INT_STRING,
+      /*   '\r' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x0e' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x0f' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x10' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x11' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x12' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x13' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x14' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x15' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x16' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x17' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x18' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x19' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x1a' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x1b' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x1c' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x1d' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x1e' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x1f' */ STRINGPARSER_STATE_INT_STRING,
+      /*    ' ' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '!' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '"' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '#' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '$' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '%' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '&' */ STRINGPARSER_STATE_INT_STRING,
+      /*    "'" */ STRINGPARSER_STATE_INT_STRING,
+      /*    '(' */ STRINGPARSER_STATE_INT_STRING,
+      /*    ')' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '*' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '+' */ STRINGPARSER_STATE_INT_STRING,
+      /*    ',' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '-' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '.' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '/' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '0' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '1' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '2' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '3' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '4' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '5' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '6' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '7' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '8' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '9' */ STRINGPARSER_STATE_INT_STRING,
+      /*    ':' */ STRINGPARSER_STATE_INT_STRING,
+      /*    ';' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '<' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '=' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '>' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '?' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '@' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'A' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'B' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'C' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'D' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'E' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'F' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'G' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'H' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'I' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'J' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'K' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'L' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'M' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'N' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'O' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'P' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'Q' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'R' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'S' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'T' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'U' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'V' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'W' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'X' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'Y' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'Z' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '[' */ STRINGPARSER_STATE_INT_STRING,
+      /*   '\\' */ STRINGPARSER_STATE_INT_STRING,
+      /*    ']' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '^' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '_' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '`' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'a' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'b' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'c' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'd' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'e' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'f' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'g' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'h' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'i' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'j' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'k' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'l' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'm' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'n' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'o' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'p' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'q' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'r' */ STRINGPARSER_STATE_INT_STRING,
+      /*    's' */ STRINGPARSER_STATE_INT_STRING,
+      /*    't' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'u' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'v' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'w' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'x' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'y' */ STRINGPARSER_STATE_INT_STRING,
+      /*    'z' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '{' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '|' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '}' */ STRINGPARSER_STATE_INT_STRING,
+      /*    '~' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x7f' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x80' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x81' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x82' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x83' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x84' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x85' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x86' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x87' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x88' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x89' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x8a' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x8b' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x8c' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x8d' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x8e' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x8f' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x90' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x91' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x92' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x93' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x94' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x95' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x96' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x97' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x98' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x99' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x9a' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x9b' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x9c' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x9d' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x9e' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\x9f' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa0' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa1' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa2' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa3' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa4' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa5' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa6' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa7' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa8' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xa9' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xaa' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xab' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xac' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xad' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xae' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xaf' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb0' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb1' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb2' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb3' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb4' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb5' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb6' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb7' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb8' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xb9' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xba' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xbb' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xbc' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xbd' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xbe' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xbf' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc0' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc1' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc2' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc3' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc4' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc5' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc6' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc7' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc8' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xc9' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xca' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xcb' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xcc' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xcd' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xce' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xcf' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd0' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd1' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd2' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd3' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd4' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd5' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd6' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd7' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd8' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xd9' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xda' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xdb' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xdc' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xdd' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xde' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xdf' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe0' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe1' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe2' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe3' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe4' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe5' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe6' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe7' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe8' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xe9' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xea' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xeb' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xec' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xed' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xee' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xef' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf0' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf1' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf2' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf3' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf4' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf5' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf6' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf7' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf8' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xf9' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xfa' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xfb' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xfc' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xfd' */ STRINGPARSER_STATE_INT_STRING,
+      /* '\xfe' */ STRINGPARSER_STATE_INT_STRING
+};
+
+static const int * stringparser_state_transitions[] = {
+  stringparser_transition_row_text,
+  stringparser_transition_row_string,
+  stringparser_transition_row_string_escape
+};
+
diff --git a/src/tests/htmlparser_testdata/sample_fsm.config b/src/tests/htmlparser_testdata/sample_fsm.config
new file mode 100644
index 0000000..df66e69
--- /dev/null
+++ b/src/tests/htmlparser_testdata/sample_fsm.config
@@ -0,0 +1,64 @@
+# Copyright (c) 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ---
+#
+# Author: falmeida@google.com (Filipe Almeida)
+
+name = 'stringparser'
+
+comment = 'Parses C style strings'
+
+condition('dq', '\\"'),
+condition('backslash', '\\\\'),
+condition('default', '[:default:]')
+
+# Outside a string
+state(name = 'text',
+      external = 'text',
+      transitions = [
+        ['dq', 'string'],
+        ['default', 'text']
+      ])
+
+# String literal
+state(name = 'string',
+      external = 'string',
+      transitions = [
+        ['backslash', 'string_escape'],
+        ['dq', 'text'],
+        ['default', 'string']
+      ])
+
+# Escaped character in a string literal. Ignore the next character
+state(name = 'string_escape',
+      external = 'string',
+      transitions = [
+        ['default', 'string']
+      ])
+
diff --git a/src/tests/htmlparser_testdata/simple.html b/src/tests/htmlparser_testdata/simple.html
new file mode 100644
index 0000000..555928f
--- /dev/null
+++ b/src/tests/htmlparser_testdata/simple.html
@@ -0,0 +1,26 @@
+<html>
+  <body>
+    <?state state=text,tag=body?>
+    <a href="<?state state=value,tag=a?>">test</a>
+
+    <test test123=<?state state=value, tag=test, attr=test123,
+    attr_type=regular ?>>
+
+    <?state state=text?>
+
+    <body blah='<?state state=value, tag=body, attr=blah, attr_type=regular
+      ?>'>
+
+      <style>
+        <?state in_css=true?>
+      </style>
+      <?state in_css=false?>
+
+      <h1 onclick="<?state state=value, tag=h1, attr=onclick, attr_type=js,
+        in_js=true ?>" style="<?state in_css=true?>" <?state in_css=false?>>
+        <?state state=text, tag=h1?>
+      </h1>
+
+
+  </body>
+</html>
diff --git a/src/tests/htmlparser_testdata/tags.html b/src/tests/htmlparser_testdata/tags.html
new file mode 100644
index 0000000..1caf68d
--- /dev/null
+++ b/src/tests/htmlparser_testdata/tags.html
@@ -0,0 +1,214 @@
+<html>
+
+<body blah='<?state state=value, tag=body, attr=blah, attr_type=regular,
+attr_quoted=true ?>'>
+
+<?state state=text, tag=body ?>
+<a href=<?state state=value, tag=a, attr=href, attr_type=uri ?>><?state state=text, tag=a ?></a>
+<a href=
+  "<?state state=value, tag=a, attr=href, attr_type=uri, attr_quoted=true ?>"></a>
+
+<a href=<?state state=value, tag=a, attr=href, attr_type=uri ?> blah=x></a>
+<a href=
+  "<?state state=value, tag=a, attr=href, attr_type=uri ?>" blah=x></a>
+
+<a href=
+  <?state state=value, tag=a, attr=href, attr_type=uri, attr_quoted=false ?> blah=x></a>
+
+<a href><?state state=text, tag=a ?></a>
+
+<a href=x<?state state=value, tag=a, attr=href, attr_type=uri ?> <?state state=tag, tag=a ?>></a>
+
+<a href =<?state state=value, tag=a, attr=href, attr_type=uri ?>></a>
+<a href
+=<?state state=value, tag=a, attr=href, attr_type=uri ?>></a>
+<a href
+  =<?state state=value, tag=a, attr=href, attr_type=uri ?>></a>
+
+<?state state=text?>
+
+<b font=<?state state=value, value_index=0?>></b>
+<b font=x<?state state=value, value_index=1?>></b>
+<b font='<?state state=value, value_index=0?>'></b>
+<b font='x<?state state=value, value_index=1?>'></b>
+
+<!-- XML Processing instruction -->
+
+<?example <?state state=text?> <a href=<?state state=text?>></a
+  <script>
+  <?state state=text, in_js=false?>
+  </script>
+?>
+
+<a href=http://www.google.com/<?state state=value, tag=a, attr=href, attr_type=uri ?>?q=tt<?state state=value, tag=a, attr=href, attr_type=uri ?>>test</a>
+
+<!-- Test javascript url handling -->
+<a href="test<?state value=test, in_js=false ?>">test</a>
+<a href="javascript<?state value=javascript, in_js=false ?>">test</a>
+<a href="javascript:<?state value=javascript:, in_js=false ?>">test</a>
+<a href="javascript:alert('<?state in_js=false ?>">test</a>
+<a href="http:<?state value=http:, in_js=false ?>">test</a>
+<a href="http://www.google.com"
+   alt="javascript:<?state value=javascript:, in_js=false ?>">test</a>
+
+<!-- Test calls to  TemplateDirective() -->
+<b font=<?state state=value?>
+   color<?state state=value?>></b>
+
+<b font=<?state state=value?><?state insert_text=true?>
+   color<?state state=attr?>></b>
+
+<b font="<?state state=value?><?state insert_text=true?>
+   color<?state state=value?>"></b>
+
+<a href=
+  <?state state=value?><?state insert_text=true?> alt<?state state=attr?>>
+  link
+</a>
+
+<b font=<?state state=value?>><?state state=text, tag=b?></b>
+
+<!-- Large invalid HTML entity -->
+<a onclick="&testtesttesttesttesttesttesttesttesttesttesttest;"
+   href="http://www.google.com/"></a>
+
+<!-- URI attributes.  The attribute list can be found in
+     htmlparser.c:is_uri_attribute() -->
+<a target="<?state attr_type=regular?>"></a>
+<!-- -->
+<form action="<?state attr_type=uri?>"></form>
+<applet archive="<?state attr_type=uri?>"></applet>
+<blockquote cite="<?state attr_type=uri?>"></blockquote>
+<object classid="<?state attr_type=uri?>"></object>
+<object codebase="<?state attr_type=uri?>"></object>
+<object data="<?state attr_type=uri?>"></object>
+<img dynsrc="<?state attr_type=uri?>"></img>
+<a href="<?state attr_type=uri?>"></a>
+<img longdesc="<?state attr_type=uri?>"></img>
+<img src="<?state attr_type=uri?>"></img>
+<img usemap="<?state attr_type=uri?>"></img>
+<!-- -->
+<form style="x" action="<?state attr_type=uri?>"></form>
+<applet style="x" archive="<?state attr_type=uri?>"></applet>
+<blockquote style="x" cite="<?state attr_type=uri?>"></blockquote>
+<object style="x" classid="<?state attr_type=uri?>"></object>
+<object style="x" codebase="<?state attr_type=uri?>"></object>
+<object style="x" data="<?state attr_type=uri?>"></object>
+<img style="x" dynsrc="<?state attr_type=uri?>"></img>
+<a style="x" href="<?state attr_type=uri?>"></a>
+<img style="x" longdesc="<?state attr_type=uri?>"></img>
+<img style="x" src="<?state attr_type=uri?>"></img>
+<img style="x" usemap="<?state attr_type=uri?>"></img>
+<!-- -->
+<img alt="<?state attr_type=regular?>"></a>
+
+
+<!-- Style attributes as returned by htmlparser.c:is_style_attribute() -->
+<a target="<?state attr_type=regular?>"></a>
+<!-- -->
+<b style="<?state attr_type=style?>"></b>
+<!-- -->
+<a target="<?state attr_type=regular?>"></a>
+
+<!-- Big attribute value. We can't do prefix checking right now so we can't
+     validate the contents of the value here, although statemachine_test.c has
+     a test for that. -->
+
+<a href="http://www.google.com/"
+   alt="01234567890123456789012345678901234567890123456789
+        01234567890123456789012345678901234567890123456789
+        01234567890123456789012345678901234567890123456789
+        01234567890123456789012345678901234567890123456789
+        01234567890123456789012345678901234567890123456789
+        01234567890123456789012345678901234567890123456789
+        01234567890123456789012345678901234567890123456789
+        01234567890123456789012345678901234567890123456789
+        01234567890123456789012345678901234567890123456789
+        01234567890123456789012345678901234567890123456789
+        <?state state=value, attr_quoted=true, tag=a, attr=alt?>"></a>
+
+<?state state=text?>
+
+<!-- is_url_start tests -->
+
+<a href="<?state is_url_start=true?>"></a>
+<a href="http://<?state is_url_start=false?>"></a>
+<a href="http://www.google.com?q=<?state is_url_start=false?>"></a>
+<b font="<?state is_url_start=false?>"></b>
+<b font="http://www.google.com?q=<?state is_url_start=false?>"></b>
+<?state is_url_start=false?>
+
+<!-- <?state is_url_start=false?> -->
+
+<!-- Tag opening tests -->
+
+<a <?state state=tag?>></a><?state state=text?>
+<br <?state state=tag?>></br><?state state=text?>
+< br <?state state=text?>></br><?state state=text?>
+<< <?state state=text?>><?state state=text?>
+<  <?state state=text?> alt="<?state state=text?>">
+</blah <?state state=tag?>><?state state=text?>
+<<i<?state state=tag?>><?state state=text?></i>
+
+
+<!-- We do allow numbers to open html tags, which is not how most browsers
+behave. We still test this anyway. -->
+<0 <?state state=tag?>><?state state=text?>
+<1 <?state state=tag?>><?state state=text?>
+
+<!-- meta redirect url tests. -->
+<meta http-equiv="refresh" content="5;URL=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="10;URL=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="5 ;URL=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content=" 5 ;URL=<?state attr_type=uri, is_url_start=true?>">
+<?state attr_type=none, is_url_start=false?>
+<meta http-equiv="refresh" content=" 5 ;    url   =   <?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="5;Url=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="5;UrL=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="5;uRL=<?state attr_type=uri, is_url_start=true?>">
+<?state attr_type=none, is_url_start=false?>
+<meta http-equiv="refresh" content="5;uRL=http://<?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content="5 ; URL=http://www.google.com/<?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;URL=/<?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;URL=../<?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content="             123456789    ;    url    =  ../<?state attr_type=uri, is_url_start=false?>">
+
+<!-- Quoted url's -->
+<meta http-equiv="refresh" content="5;URL = '<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content='5;URL = "<?state attr_type=uri, is_url_start=true?>"'>
+<meta http-equiv="refresh" content="5;URL = ' <?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content='5;URL = " <?state attr_type=uri, is_url_start=false?>"'>
+
+<?state attr_type=none, is_url_start=false?>
+
+<meta http-equiv="refresh" content="5x;URL=<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;<?state attr_type=regular, is_url_start=false?>">
+<?state attr_type=none, is_url_start=false?>
+<meta http-equiv="refresh" content="5;U<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;UR<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;URL<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;URL <?state attr_type=regular, is_url_start=false?>">
+<?state attr_type=none, is_url_start=false?>
+<meta http-equiv="refresh" content="5x;URL= <?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;UR L <?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="URL = <?state attr_type=regular, is_url_start=false?>">
+
+<meta http-equiv="refresh" contentstate attr_type=regular?>">
+
+<span a:type="<?state state=value, attr=a:type?>"
+  a:abc.abc="<?state state=value, attr=a:abc.abc?>"
+  b:a.b.c.d.e.f=<?state state=value, attr=b:a.b.c.d.e.f?>>
+
+<tag.test>
+<?state state=text, tag=tag.test?>
+</tag.test>
+
+<!-- Tests regarding our specific implementation -->
+<meta content="5;URL=<?state attr_type=uri, is_url_start=true?>">
+
+</body>
+
+</html>
+<?state state=text ?>