| // Copyright (c) 2007, Google Inc. |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| // --- |
| // Author: csilvers@google.com (Craig Silverstein) |
| // |
| // We allow template variables to have modifiers, each possibly with a |
| // value associated with it. Format is |
| // {{VARNAME:modname[=modifier-value]:modname[=modifier-value]:...}} |
| // Modname refers to a functor that takes the variable's value |
| // and modifier-value (empty-string if no modifier-value was |
| // specified), and returns a munged value. Modifiers are applied |
| // left-to-right. We define the legal modnames here, and the |
| // functors they refer to. |
| // |
| // Modifiers have a long-name, an optional short-name (one char; |
| // may be \0 if you don't want a shortname), and a functor that's |
| // applied to the variable. |
| // |
| // In addition to the list of modifiers hard-coded in the source code |
| // here, it is possible to dynamicly register modifiers using a long |
| // name starting with "x-". If you wish to define your own modifier |
| // class, in your own source code, just subclass TemplateModifier -- |
| // see template_modifiers.cc for details of how to do that. |
| // |
| // Adding a new built-in modifier, to this file, takes several steps, |
| // both in this .h file and in the corresponding .cc file: |
| // 1) .h file: Define a struct for the modifier. It must subclass |
| // TemplateModifier. |
| // 2) .h file: declare a variable that's an instance of the struct. |
| // This is used for people who want to modify the string themselves, |
| // via TemplateDictionary::SetEscapedValue. |
| // 5) .cc file: define the new modifier's Modify method. |
| // 6) .cc file: give storage for the variable declared in the .h file (in 2). |
| // 7) .cc file: add the modifier to the g_modifiers array. |
| |
| #ifndef TEMPLATE_TEMPLATE_MODIFIERS_H_ |
| #define TEMPLATE_TEMPLATE_MODIFIERS_H_ |
| |
| #include <sys/types.h> // for size_t |
| #include <string> |
| #include <ctemplate/template_emitter.h> // so we can inline operator() |
| #include <ctemplate/per_expand_data.h> // could probably just forward-declare |
| |
| |
| |
| namespace ctemplate { |
| |
| class Template; |
| |
| #define MODIFY_SIGNATURE_ \ |
| public: \ |
| virtual void Modify(const char* in, size_t inlen, \ |
| const PerExpandData*, ExpandEmitter* outbuf, \ |
| const std::string& arg) const |
| |
| // If you wish to write your own modifier, it should subclass this |
| // method. Your subclass should only define Modify(); for efficiency, |
| // we do not make operator() virtual. |
| class TemplateModifier { |
| public: |
| // This function takes a string as input, a char*/size_t pair, and |
| // appends the modified version to the end of outbuf. In addition |
| // to the variable-value to modify (specified via in/inlen), each |
| // Modify passes in two pieces of user-supplied data: |
| // 1) arg: this is the modifier-value, for modifiers that take a |
| // value (e.g. "{{VAR:modifier=value}}"). This value |
| // comes from the template file. For modifiers that take |
| // no modval argument, arg will always be "". For modifiers |
| // that do take such an argument, arg will always start with "=". |
| // 2) per_expand_data: this is a set of data that the application can |
| // associate with a TemplateDictionary, and is passed in to |
| // every variable expanded using that dictionary. This value |
| // comes from the source code. |
| virtual void Modify(const char* in, size_t inlen, |
| const PerExpandData* per_expand_data, |
| ExpandEmitter* outbuf, |
| const std::string& arg) const = 0; |
| |
| // This function can be used to speed up modification. If Modify() |
| // is often a noop, you can implement MightModify() to indicate |
| // situations where it's safe to avoid the call to Modify(), because |
| // Modify() won't do any modifications in this case. Note it's |
| // always safe to return true here; you should just return false if |
| // you're certain Modify() can be ignored. This function is |
| // advisory; the template system is not required to call |
| // MightModify() before Modify(). |
| virtual bool MightModify(const PerExpandData* /*per_expand_data*/, |
| const std::string& /*arg*/) const { |
| return true; |
| } |
| |
| // We support both modifiers that take an argument, and those that don't. |
| // We also support passing in a string, or a char*/int pair. |
| std::string operator()(const char* in, size_t inlen, const std::string& arg="") const { |
| std::string out; |
| // we'll reserve some space to account for minimal escaping: say 12% |
| out.reserve(inlen + inlen/8 + 16); |
| StringEmitter outbuf(&out); |
| Modify(in, inlen, NULL, &outbuf, arg); |
| return out; |
| } |
| std::string operator()(const std::string& in, const std::string& arg="") const { |
| return operator()(in.data(), in.size(), arg); |
| } |
| |
| virtual ~TemplateModifier(); // always need a virtual destructor! |
| }; |
| |
| |
| // Returns the input verbatim (for testing) |
| class NullModifier : public TemplateModifier { |
| MODIFY_SIGNATURE_; |
| }; |
| extern NullModifier null_modifier; |
| |
| // Escapes < > " ' & <non-space whitespace> to < > " |
| // ' & <space> |
| class HtmlEscape : public TemplateModifier { |
| MODIFY_SIGNATURE_; |
| }; |
| extern HtmlEscape html_escape; |
| |
| // Same as HtmlEscape but leaves all whitespace alone. Eg. for <pre>..</pre> |
| class PreEscape : public TemplateModifier { |
| MODIFY_SIGNATURE_; |
| }; |
| extern PreEscape pre_escape; |
| |
| // Like HtmlEscape but allows HTML entities, <br> tags, <wbr> tags, |
| // matched <b> and </b> tags, matched <i> and </i> tags, matched <em> and </em> |
| // tags, and matched <span dir=(rtl|ltr)> tags. |
| class SnippetEscape : public TemplateModifier { |
| MODIFY_SIGNATURE_; |
| }; |
| extern SnippetEscape snippet_escape; |
| |
| // Replaces characters not safe for an unquoted attribute with underscore. |
| // Safe characters are alphanumeric, underscore, dash, period, and colon. |
| // The equal sign is also considered safe unless it is at the start |
| // or end of the input in which case it is replaced with underscore. |
| // |
| // We added the equal sign to the safe characters to allow this modifier |
| // to be used on attribute name/value pairs in HTML tags such as |
| // <div {{CLASS:H=attribute}}> |
| // where CLASS is expanded to "class=bla". |
| // |
| // Note: The equal sign is replaced when found at either boundaries of the |
| // string due to the concern it may be lead to XSS under some special |
| // circumstances: Say, if this string is the value of an attribute in an |
| // HTML tag and ends with an equal sign, a browser may possibly end up |
| // interpreting the next token as the value of this string rather than |
| // a new attribute (esoteric). |
| class CleanseAttribute : public TemplateModifier { |
| MODIFY_SIGNATURE_; |
| }; |
| extern CleanseAttribute cleanse_attribute; |
| |
| // Removes characters not safe for a CSS value. Safe characters are |
| // alphanumeric, space, underscore, period, coma, exclamation mark, |
| // pound, percent, and dash. |
| class CleanseCss : public TemplateModifier { |
| MODIFY_SIGNATURE_; |
| }; |
| extern CleanseCss cleanse_css; |
| |
| // Checks that a url is either an absolute http(s) URL or a relative |
| // url that doesn't have a protocol hidden in it (ie [foo.html] is |
| // fine, but not [javascript:foo]) and then performs another type of |
| // escaping. Returns the url escaped with the specified modifier if |
| // good, otherwise returns a safe replacement URL. |
| // This is normally "#", but for <img> tags, it is not safe to set |
| // the src attribute to "#". This is because this causes some browsers |
| // to reload the page, which can cause a DoS. |
| class ValidateUrl : public TemplateModifier { |
| public: |
| explicit ValidateUrl(const TemplateModifier& chained_modifier, |
| const char* unsafe_url_replacement) |
| : chained_modifier_(chained_modifier), |
| unsafe_url_replacement_(unsafe_url_replacement), |
| unsafe_url_replacement_length_(strlen(unsafe_url_replacement)) { } |
| MODIFY_SIGNATURE_; |
| static const char* const kUnsafeUrlReplacement; |
| static const char* const kUnsafeImgSrcUrlReplacement; |
| private: |
| const TemplateModifier& chained_modifier_; |
| const char* unsafe_url_replacement_; |
| int unsafe_url_replacement_length_; |
| }; |
| extern ValidateUrl validate_url_and_html_escape; |
| extern ValidateUrl validate_url_and_javascript_escape; |
| extern ValidateUrl validate_url_and_css_escape; |
| extern ValidateUrl validate_img_src_url_and_html_escape; |
| extern ValidateUrl validate_img_src_url_and_javascript_escape; |
| extern ValidateUrl validate_img_src_url_and_css_escape; |
| |
| // Escapes < > & " ' to < > & " ' (same as in HtmlEscape). |
| // If you use it within a CDATA section, you may be escaping more characters |
| // than strictly necessary. If this turns out to be an issue, we will need |
| // to add a variant just for CDATA. |
| class XmlEscape : public TemplateModifier { |
| MODIFY_SIGNATURE_; |
| }; |
| extern XmlEscape xml_escape; |
| |
| // Escapes characters that cannot appear unescaped in a javascript string |
| // assuming UTF-8 encoded input. |
| // This does NOT escape all characters that cannot appear unescaped in a |
| // javascript regular expression literal. |
| class JavascriptEscape : public TemplateModifier { |
| MODIFY_SIGNATURE_; |
| }; |
| extern JavascriptEscape javascript_escape; |
| |
| // Checks that the input is a valid javascript non-string literal |
| // meaning a boolean (true, false) or a numeric value (decimal, hex or octal). |
| // If valid, we output the input as is, otherwise we output null instead. |
| // Input of zero length is considered valid and nothing is output. |
| // |
| // The emphasis is on safety against injection of javascript code rather |
| // than perfect validation, as such it is possible for non-valid literals to |
| // pass through. |
| // |
| // You would use this modifier for javascript variables that are not |
| // enclosed in quotes such as: |
| // <script>var a = {{VALUE}};</script> OR |
| // <a href="url" onclick="doSubmit({{ID}})"> |
| // For variables that are quoted (i.e. string literals) use javascript_escape. |
| // |
| // Limitations: |
| // . NaN, +/-Infinity and null are not recognized. |
| // . Output is not guaranteed to be a valid literal, |
| // e.g: +55+-e34 will output as is. |
| // e.g: trueeee will output nothing as it is not a valid boolean. |
| // |
| // Details: |
| // . For Hex numbers, it checks for case-insensitive 0x[0-9A-F]+ |
| // that should be a proper check. |
| // . For other numbers, it checks for case-insensitive [0-9eE+-.]* |
| // so can also accept invalid numbers such as the number 5..45--10. |
| // . "true" and "false" (without quotes) are also accepted and that's it. |
| // |
| class JavascriptNumber : public TemplateModifier { |
| MODIFY_SIGNATURE_; |
| }; |
| extern JavascriptNumber javascript_number; |
| |
| // Escapes characters not in [0-9a-zA-Z.,_:*/~!()-] as %-prefixed hex. |
| // Space is encoded as a +. |
| class UrlQueryEscape : public TemplateModifier { |
| MODIFY_SIGNATURE_; |
| }; |
| extern UrlQueryEscape url_query_escape; |
| |
| // Escapes " \ / <FF> <CR> <LF> <BS> <TAB> to \" \\ \/ \f \r \n \b \t |
| // Also escapes < > & to their corresponding \uXXXX representation |
| // (\u003C, \u003E, \u0026 respectively). |
| class JsonEscape : public TemplateModifier { |
| MODIFY_SIGNATURE_; |
| }; |
| extern JsonEscape json_escape; |
| |
| // Inserts the given prefix (given as the argument to this modifier) |
| // after every newline in the text. Note that it does *not* insert |
| // prefix at the very beginning of the text -- in its expected use, |
| // that prefix will already be present before this text, in the |
| // template. This is meant to be used internally, and is not exported |
| // via the g_modifiers list. |
| class PrefixLine : public TemplateModifier { |
| MODIFY_SIGNATURE_; |
| }; |
| extern PrefixLine prefix_line; |
| |
| |
| #undef MODIFY_SIGNATURE_ |
| |
| |
| // Registers a new template modifier. |
| // long_name must start with "x-". |
| // If the modifier takes a value (eg "{{VAR:x-name=value}}"), then |
| // long_name should end with "=". This is similar to getopt(3) syntax. |
| // We also allow value-specializations, with specific values specified |
| // as part of long-name. For instance: |
| // AddModifier("x-mod=", &my_modifierA); |
| // AddModifier("x-mod=bar", &my_modifierB); |
| // AddModifier("x-mod2", &my_modifierC); |
| // For the template |
| // {{VAR1:x-mod=foo}} {{VAR2:x-mod=bar}} {{VAR3:x-mod=baz}} {{VAR4:x-mod2}} |
| // VAR1 and VAR3 would get modified by my_modifierA, VAR2 by my_modifierB, |
| // and VAR4 by my_modifierC. The order of the AddModifier calls is not |
| // significant. |
| extern |
| bool AddModifier(const char* long_name, const TemplateModifier* modifier); |
| |
| // Same as AddModifier() above except that the modifier is considered |
| // to produce safe output that can be inserted in any context without |
| // the need for additional escaping. This difference only impacts |
| // the Auto-Escape mode: In that mode, when a variable (or template-include) |
| // has a modifier added via AddXssSafeModifier(), it is excluded from |
| // further escaping, effectively treated as though it had the :none modifier. |
| // Because Auto-Escape is disabled for any variable and template-include |
| // that includes such a modifier, use this function with care and ensure |
| // that it may not emit harmful output that could lead to XSS. |
| // |
| // Some valid uses of AddXssSafeModifier: |
| // . A modifier that converts a string to an integer since |
| // an integer is generally safe in any context. |
| // . A modifier that returns one of a fixed number of safe values |
| // depending on properties of the input. |
| // |
| // Some not recommended uses of AddXssSafeModifier: |
| // . A modifier that applies some extra formatting to the input |
| // before returning it since the output will still contain |
| // harmful content if the input does. |
| // . A modifier that applies one type of escaping to the input |
| // (say HTML-escape). This may be dangerous when the modifier |
| // is used in a different context (say Javascript) where this |
| // escaping may be inadequate. |
| extern |
| bool AddXssSafeModifier(const char* long_name, const TemplateModifier* modifier); |
| |
| } |
| |
| #endif // TEMPLATE_TEMPLATE_MODIFIERS_H_ |