Brian Silverman | cbe6df2 | 2015-09-26 17:32:32 -0400 | [diff] [blame] | 1 | // Copyright (c) 2007, Google Inc. |
| 2 | // All rights reserved. |
| 3 | // |
| 4 | // Redistribution and use in source and binary forms, with or without |
| 5 | // modification, are permitted provided that the following conditions are |
| 6 | // met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright |
| 9 | // notice, this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above |
| 11 | // copyright notice, this list of conditions and the following disclaimer |
| 12 | // in the documentation and/or other materials provided with the |
| 13 | // distribution. |
| 14 | // * Neither the name of Google Inc. nor the names of its |
| 15 | // contributors may be used to endorse or promote products derived from |
| 16 | // this software without specific prior written permission. |
| 17 | // |
| 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | |
| 30 | // --- |
| 31 | // Author: csilvers@google.com (Craig Silverstein) |
| 32 | // |
| 33 | // We allow template variables to have modifiers, each possibly with a |
| 34 | // value associated with it. Format is |
| 35 | // {{VARNAME:modname[=modifier-value]:modname[=modifier-value]:...}} |
| 36 | // Modname refers to a functor that takes the variable's value |
| 37 | // and modifier-value (empty-string if no modifier-value was |
| 38 | // specified), and returns a munged value. Modifiers are applied |
| 39 | // left-to-right. We define the legal modnames here, and the |
| 40 | // functors they refer to. |
| 41 | // |
| 42 | // Modifiers have a long-name, an optional short-name (one char; |
| 43 | // may be \0 if you don't want a shortname), and a functor that's |
| 44 | // applied to the variable. |
| 45 | // |
| 46 | // In addition to the list of modifiers hard-coded in the source code |
| 47 | // here, it is possible to dynamicly register modifiers using a long |
| 48 | // name starting with "x-". If you wish to define your own modifier |
| 49 | // class, in your own source code, just subclass TemplateModifier -- |
| 50 | // see template_modifiers.cc for details of how to do that. |
| 51 | // |
| 52 | // Adding a new built-in modifier, to this file, takes several steps, |
| 53 | // both in this .h file and in the corresponding .cc file: |
| 54 | // 1) .h file: Define a struct for the modifier. It must subclass |
| 55 | // TemplateModifier. |
| 56 | // 2) .h file: declare a variable that's an instance of the struct. |
| 57 | // This is used for people who want to modify the string themselves, |
| 58 | // via TemplateDictionary::SetEscapedValue. |
| 59 | // 5) .cc file: define the new modifier's Modify method. |
| 60 | // 6) .cc file: give storage for the variable declared in the .h file (in 2). |
| 61 | // 7) .cc file: add the modifier to the g_modifiers array. |
| 62 | |
| 63 | #ifndef TEMPLATE_TEMPLATE_MODIFIERS_H_ |
| 64 | #define TEMPLATE_TEMPLATE_MODIFIERS_H_ |
| 65 | |
| 66 | #include <sys/types.h> // for size_t |
| 67 | #include <string> |
| 68 | #include <ctemplate/template_emitter.h> // so we can inline operator() |
| 69 | #include <ctemplate/per_expand_data.h> // could probably just forward-declare |
| 70 | |
| 71 | |
| 72 | |
| 73 | namespace ctemplate { |
| 74 | |
| 75 | class Template; |
| 76 | |
| 77 | #define MODIFY_SIGNATURE_ \ |
| 78 | public: \ |
| 79 | virtual void Modify(const char* in, size_t inlen, \ |
| 80 | const PerExpandData*, ExpandEmitter* outbuf, \ |
| 81 | const std::string& arg) const |
| 82 | |
| 83 | // If you wish to write your own modifier, it should subclass this |
| 84 | // method. Your subclass should only define Modify(); for efficiency, |
| 85 | // we do not make operator() virtual. |
| 86 | class TemplateModifier { |
| 87 | public: |
| 88 | // This function takes a string as input, a char*/size_t pair, and |
| 89 | // appends the modified version to the end of outbuf. In addition |
| 90 | // to the variable-value to modify (specified via in/inlen), each |
| 91 | // Modify passes in two pieces of user-supplied data: |
| 92 | // 1) arg: this is the modifier-value, for modifiers that take a |
| 93 | // value (e.g. "{{VAR:modifier=value}}"). This value |
| 94 | // comes from the template file. For modifiers that take |
| 95 | // no modval argument, arg will always be "". For modifiers |
| 96 | // that do take such an argument, arg will always start with "=". |
| 97 | // 2) per_expand_data: this is a set of data that the application can |
| 98 | // associate with a TemplateDictionary, and is passed in to |
| 99 | // every variable expanded using that dictionary. This value |
| 100 | // comes from the source code. |
| 101 | virtual void Modify(const char* in, size_t inlen, |
| 102 | const PerExpandData* per_expand_data, |
| 103 | ExpandEmitter* outbuf, |
| 104 | const std::string& arg) const = 0; |
| 105 | |
| 106 | // This function can be used to speed up modification. If Modify() |
| 107 | // is often a noop, you can implement MightModify() to indicate |
| 108 | // situations where it's safe to avoid the call to Modify(), because |
| 109 | // Modify() won't do any modifications in this case. Note it's |
| 110 | // always safe to return true here; you should just return false if |
| 111 | // you're certain Modify() can be ignored. This function is |
| 112 | // advisory; the template system is not required to call |
| 113 | // MightModify() before Modify(). |
| 114 | virtual bool MightModify(const PerExpandData* /*per_expand_data*/, |
| 115 | const std::string& /*arg*/) const { |
| 116 | return true; |
| 117 | } |
| 118 | |
| 119 | // We support both modifiers that take an argument, and those that don't. |
| 120 | // We also support passing in a string, or a char*/int pair. |
| 121 | std::string operator()(const char* in, size_t inlen, const std::string& arg="") const { |
| 122 | std::string out; |
| 123 | // we'll reserve some space to account for minimal escaping: say 12% |
| 124 | out.reserve(inlen + inlen/8 + 16); |
| 125 | StringEmitter outbuf(&out); |
| 126 | Modify(in, inlen, NULL, &outbuf, arg); |
| 127 | return out; |
| 128 | } |
| 129 | std::string operator()(const std::string& in, const std::string& arg="") const { |
| 130 | return operator()(in.data(), in.size(), arg); |
| 131 | } |
| 132 | |
| 133 | virtual ~TemplateModifier(); // always need a virtual destructor! |
| 134 | }; |
| 135 | |
| 136 | |
| 137 | // Returns the input verbatim (for testing) |
| 138 | class NullModifier : public TemplateModifier { |
| 139 | MODIFY_SIGNATURE_; |
| 140 | }; |
| 141 | extern NullModifier null_modifier; |
| 142 | |
| 143 | // Escapes < > " ' & <non-space whitespace> to < > " |
| 144 | // ' & <space> |
| 145 | class HtmlEscape : public TemplateModifier { |
| 146 | MODIFY_SIGNATURE_; |
| 147 | }; |
| 148 | extern HtmlEscape html_escape; |
| 149 | |
| 150 | // Same as HtmlEscape but leaves all whitespace alone. Eg. for <pre>..</pre> |
| 151 | class PreEscape : public TemplateModifier { |
| 152 | MODIFY_SIGNATURE_; |
| 153 | }; |
| 154 | extern PreEscape pre_escape; |
| 155 | |
| 156 | // Like HtmlEscape but allows HTML entities, <br> tags, <wbr> tags, |
| 157 | // matched <b> and </b> tags, matched <i> and </i> tags, matched <em> and </em> |
| 158 | // tags, and matched <span dir=(rtl|ltr)> tags. |
| 159 | class SnippetEscape : public TemplateModifier { |
| 160 | MODIFY_SIGNATURE_; |
| 161 | }; |
| 162 | extern SnippetEscape snippet_escape; |
| 163 | |
| 164 | // Replaces characters not safe for an unquoted attribute with underscore. |
| 165 | // Safe characters are alphanumeric, underscore, dash, period, and colon. |
| 166 | // The equal sign is also considered safe unless it is at the start |
| 167 | // or end of the input in which case it is replaced with underscore. |
| 168 | // |
| 169 | // We added the equal sign to the safe characters to allow this modifier |
| 170 | // to be used on attribute name/value pairs in HTML tags such as |
| 171 | // <div {{CLASS:H=attribute}}> |
| 172 | // where CLASS is expanded to "class=bla". |
| 173 | // |
| 174 | // Note: The equal sign is replaced when found at either boundaries of the |
| 175 | // string due to the concern it may be lead to XSS under some special |
| 176 | // circumstances: Say, if this string is the value of an attribute in an |
| 177 | // HTML tag and ends with an equal sign, a browser may possibly end up |
| 178 | // interpreting the next token as the value of this string rather than |
| 179 | // a new attribute (esoteric). |
| 180 | class CleanseAttribute : public TemplateModifier { |
| 181 | MODIFY_SIGNATURE_; |
| 182 | }; |
| 183 | extern CleanseAttribute cleanse_attribute; |
| 184 | |
| 185 | // Removes characters not safe for a CSS value. Safe characters are |
| 186 | // alphanumeric, space, underscore, period, coma, exclamation mark, |
| 187 | // pound, percent, and dash. |
| 188 | class CleanseCss : public TemplateModifier { |
| 189 | MODIFY_SIGNATURE_; |
| 190 | }; |
| 191 | extern CleanseCss cleanse_css; |
| 192 | |
| 193 | // Checks that a url is either an absolute http(s) URL or a relative |
| 194 | // url that doesn't have a protocol hidden in it (ie [foo.html] is |
| 195 | // fine, but not [javascript:foo]) and then performs another type of |
| 196 | // escaping. Returns the url escaped with the specified modifier if |
| 197 | // good, otherwise returns a safe replacement URL. |
| 198 | // This is normally "#", but for <img> tags, it is not safe to set |
| 199 | // the src attribute to "#". This is because this causes some browsers |
| 200 | // to reload the page, which can cause a DoS. |
| 201 | class ValidateUrl : public TemplateModifier { |
| 202 | public: |
| 203 | explicit ValidateUrl(const TemplateModifier& chained_modifier, |
| 204 | const char* unsafe_url_replacement) |
| 205 | : chained_modifier_(chained_modifier), |
| 206 | unsafe_url_replacement_(unsafe_url_replacement), |
| 207 | unsafe_url_replacement_length_(strlen(unsafe_url_replacement)) { } |
| 208 | MODIFY_SIGNATURE_; |
| 209 | static const char* const kUnsafeUrlReplacement; |
| 210 | static const char* const kUnsafeImgSrcUrlReplacement; |
| 211 | private: |
| 212 | const TemplateModifier& chained_modifier_; |
| 213 | const char* unsafe_url_replacement_; |
| 214 | int unsafe_url_replacement_length_; |
| 215 | }; |
| 216 | extern ValidateUrl validate_url_and_html_escape; |
| 217 | extern ValidateUrl validate_url_and_javascript_escape; |
| 218 | extern ValidateUrl validate_url_and_css_escape; |
| 219 | extern ValidateUrl validate_img_src_url_and_html_escape; |
| 220 | extern ValidateUrl validate_img_src_url_and_javascript_escape; |
| 221 | extern ValidateUrl validate_img_src_url_and_css_escape; |
| 222 | |
| 223 | // Escapes < > & " ' to < > & " ' (same as in HtmlEscape). |
| 224 | // If you use it within a CDATA section, you may be escaping more characters |
| 225 | // than strictly necessary. If this turns out to be an issue, we will need |
| 226 | // to add a variant just for CDATA. |
| 227 | class XmlEscape : public TemplateModifier { |
| 228 | MODIFY_SIGNATURE_; |
| 229 | }; |
| 230 | extern XmlEscape xml_escape; |
| 231 | |
| 232 | // Escapes characters that cannot appear unescaped in a javascript string |
| 233 | // assuming UTF-8 encoded input. |
| 234 | // This does NOT escape all characters that cannot appear unescaped in a |
| 235 | // javascript regular expression literal. |
| 236 | class JavascriptEscape : public TemplateModifier { |
| 237 | MODIFY_SIGNATURE_; |
| 238 | }; |
| 239 | extern JavascriptEscape javascript_escape; |
| 240 | |
| 241 | // Checks that the input is a valid javascript non-string literal |
| 242 | // meaning a boolean (true, false) or a numeric value (decimal, hex or octal). |
| 243 | // If valid, we output the input as is, otherwise we output null instead. |
| 244 | // Input of zero length is considered valid and nothing is output. |
| 245 | // |
| 246 | // The emphasis is on safety against injection of javascript code rather |
| 247 | // than perfect validation, as such it is possible for non-valid literals to |
| 248 | // pass through. |
| 249 | // |
| 250 | // You would use this modifier for javascript variables that are not |
| 251 | // enclosed in quotes such as: |
| 252 | // <script>var a = {{VALUE}};</script> OR |
| 253 | // <a href="url" onclick="doSubmit({{ID}})"> |
| 254 | // For variables that are quoted (i.e. string literals) use javascript_escape. |
| 255 | // |
| 256 | // Limitations: |
| 257 | // . NaN, +/-Infinity and null are not recognized. |
| 258 | // . Output is not guaranteed to be a valid literal, |
| 259 | // e.g: +55+-e34 will output as is. |
| 260 | // e.g: trueeee will output nothing as it is not a valid boolean. |
| 261 | // |
| 262 | // Details: |
| 263 | // . For Hex numbers, it checks for case-insensitive 0x[0-9A-F]+ |
| 264 | // that should be a proper check. |
| 265 | // . For other numbers, it checks for case-insensitive [0-9eE+-.]* |
| 266 | // so can also accept invalid numbers such as the number 5..45--10. |
| 267 | // . "true" and "false" (without quotes) are also accepted and that's it. |
| 268 | // |
| 269 | class JavascriptNumber : public TemplateModifier { |
| 270 | MODIFY_SIGNATURE_; |
| 271 | }; |
| 272 | extern JavascriptNumber javascript_number; |
| 273 | |
| 274 | // Escapes characters not in [0-9a-zA-Z.,_:*/~!()-] as %-prefixed hex. |
| 275 | // Space is encoded as a +. |
| 276 | class UrlQueryEscape : public TemplateModifier { |
| 277 | MODIFY_SIGNATURE_; |
| 278 | }; |
| 279 | extern UrlQueryEscape url_query_escape; |
| 280 | |
| 281 | // Escapes " \ / <FF> <CR> <LF> <BS> <TAB> to \" \\ \/ \f \r \n \b \t |
| 282 | // Also escapes < > & to their corresponding \uXXXX representation |
| 283 | // (\u003C, \u003E, \u0026 respectively). |
| 284 | class JsonEscape : public TemplateModifier { |
| 285 | MODIFY_SIGNATURE_; |
| 286 | }; |
| 287 | extern JsonEscape json_escape; |
| 288 | |
| 289 | // Inserts the given prefix (given as the argument to this modifier) |
| 290 | // after every newline in the text. Note that it does *not* insert |
| 291 | // prefix at the very beginning of the text -- in its expected use, |
| 292 | // that prefix will already be present before this text, in the |
| 293 | // template. This is meant to be used internally, and is not exported |
| 294 | // via the g_modifiers list. |
| 295 | class PrefixLine : public TemplateModifier { |
| 296 | MODIFY_SIGNATURE_; |
| 297 | }; |
| 298 | extern PrefixLine prefix_line; |
| 299 | |
| 300 | |
| 301 | #undef MODIFY_SIGNATURE_ |
| 302 | |
| 303 | |
| 304 | // Registers a new template modifier. |
| 305 | // long_name must start with "x-". |
| 306 | // If the modifier takes a value (eg "{{VAR:x-name=value}}"), then |
| 307 | // long_name should end with "=". This is similar to getopt(3) syntax. |
| 308 | // We also allow value-specializations, with specific values specified |
| 309 | // as part of long-name. For instance: |
| 310 | // AddModifier("x-mod=", &my_modifierA); |
| 311 | // AddModifier("x-mod=bar", &my_modifierB); |
| 312 | // AddModifier("x-mod2", &my_modifierC); |
| 313 | // For the template |
| 314 | // {{VAR1:x-mod=foo}} {{VAR2:x-mod=bar}} {{VAR3:x-mod=baz}} {{VAR4:x-mod2}} |
| 315 | // VAR1 and VAR3 would get modified by my_modifierA, VAR2 by my_modifierB, |
| 316 | // and VAR4 by my_modifierC. The order of the AddModifier calls is not |
| 317 | // significant. |
| 318 | extern |
| 319 | bool AddModifier(const char* long_name, const TemplateModifier* modifier); |
| 320 | |
| 321 | // Same as AddModifier() above except that the modifier is considered |
| 322 | // to produce safe output that can be inserted in any context without |
| 323 | // the need for additional escaping. This difference only impacts |
| 324 | // the Auto-Escape mode: In that mode, when a variable (or template-include) |
| 325 | // has a modifier added via AddXssSafeModifier(), it is excluded from |
| 326 | // further escaping, effectively treated as though it had the :none modifier. |
| 327 | // Because Auto-Escape is disabled for any variable and template-include |
| 328 | // that includes such a modifier, use this function with care and ensure |
| 329 | // that it may not emit harmful output that could lead to XSS. |
| 330 | // |
| 331 | // Some valid uses of AddXssSafeModifier: |
| 332 | // . A modifier that converts a string to an integer since |
| 333 | // an integer is generally safe in any context. |
| 334 | // . A modifier that returns one of a fixed number of safe values |
| 335 | // depending on properties of the input. |
| 336 | // |
| 337 | // Some not recommended uses of AddXssSafeModifier: |
| 338 | // . A modifier that applies some extra formatting to the input |
| 339 | // before returning it since the output will still contain |
| 340 | // harmful content if the input does. |
| 341 | // . A modifier that applies one type of escaping to the input |
| 342 | // (say HTML-escape). This may be dangerous when the modifier |
| 343 | // is used in a different context (say Javascript) where this |
| 344 | // escaping may be inadequate. |
| 345 | extern |
| 346 | bool AddXssSafeModifier(const char* long_name, const TemplateModifier* modifier); |
| 347 | |
| 348 | } |
| 349 | |
| 350 | #endif // TEMPLATE_TEMPLATE_MODIFIERS_H_ |