blob: b981a60616518826badda0a029c70d8de36244fa [file] [log] [blame]
Brian Silverman70325d62015-09-20 17:00:43 -04001// Copyright (c) 2007, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31// Author: csilvers@google.com (Craig Silverstein)
32//
33// We allow template variables to have modifiers, each possibly with a
34// value associated with it. Format is
35// {{VARNAME:modname[=modifier-value]:modname[=modifier-value]:...}}
36// Modname refers to a functor that takes the variable's value
37// and modifier-value (empty-string if no modifier-value was
38// specified), and returns a munged value. Modifiers are applied
39// left-to-right. We define the legal modnames here, and the
40// functors they refer to.
41//
42// Modifiers have a long-name, an optional short-name (one char;
43// may be \0 if you don't want a shortname), and a functor that's
44// applied to the variable.
45//
46// In addition to the list of modifiers hard-coded in the source code
47// here, it is possible to dynamicly register modifiers using a long
48// name starting with "x-". If you wish to define your own modifier
49// class, in your own source code, just subclass TemplateModifier --
50// see template_modifiers.cc for details of how to do that.
51//
52// Adding a new built-in modifier, to this file, takes several steps,
53// both in this .h file and in the corresponding .cc file:
54// 1) .h file: Define a struct for the modifier. It must subclass
55// TemplateModifier.
56// 2) .h file: declare a variable that's an instance of the struct.
57// This is used for people who want to modify the string themselves,
58// via TemplateDictionary::SetEscapedValue.
59// 5) .cc file: define the new modifier's Modify method.
60// 6) .cc file: give storage for the variable declared in the .h file (in 2).
61// 7) .cc file: add the modifier to the g_modifiers array.
62
63#ifndef TEMPLATE_TEMPLATE_MODIFIERS_H_
64#define TEMPLATE_TEMPLATE_MODIFIERS_H_
65
66#include <sys/types.h> // for size_t
67#include <string>
68#include <ctemplate/template_emitter.h> // so we can inline operator()
69#include <ctemplate/per_expand_data.h> // could probably just forward-declare
70
71// NOTE: if you are statically linking the template library into your binary
72// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
73// as a compiler flag in your project file to turn off the dllimports.
74#ifndef CTEMPLATE_DLL_DECL
75# define CTEMPLATE_DLL_DECL __declspec(dllimport)
76#endif
77
78namespace ctemplate {
79
80class Template;
81
82#define MODIFY_SIGNATURE_ \
83 public: \
84 virtual void Modify(const char* in, size_t inlen, \
85 const PerExpandData*, ExpandEmitter* outbuf, \
86 const std::string& arg) const
87
88// If you wish to write your own modifier, it should subclass this
89// method. Your subclass should only define Modify(); for efficiency,
90// we do not make operator() virtual.
91class CTEMPLATE_DLL_DECL TemplateModifier {
92 public:
93 // This function takes a string as input, a char*/size_t pair, and
94 // appends the modified version to the end of outbuf. In addition
95 // to the variable-value to modify (specified via in/inlen), each
96 // Modify passes in two pieces of user-supplied data:
97 // 1) arg: this is the modifier-value, for modifiers that take a
98 // value (e.g. "{{VAR:modifier=value}}"). This value
99 // comes from the template file. For modifiers that take
100 // no modval argument, arg will always be "". For modifiers
101 // that do take such an argument, arg will always start with "=".
102 // 2) per_expand_data: this is a set of data that the application can
103 // associate with a TemplateDictionary, and is passed in to
104 // every variable expanded using that dictionary. This value
105 // comes from the source code.
106 virtual void Modify(const char* in, size_t inlen,
107 const PerExpandData* per_expand_data,
108 ExpandEmitter* outbuf,
109 const std::string& arg) const = 0;
110
111 // This function can be used to speed up modification. If Modify()
112 // is often a noop, you can implement MightModify() to indicate
113 // situations where it's safe to avoid the call to Modify(), because
114 // Modify() won't do any modifications in this case. Note it's
115 // always safe to return true here; you should just return false if
116 // you're certain Modify() can be ignored. This function is
117 // advisory; the template system is not required to call
118 // MightModify() before Modify().
119 virtual bool MightModify(const PerExpandData* /*per_expand_data*/,
120 const std::string& /*arg*/) const {
121 return true;
122 }
123
124 // We support both modifiers that take an argument, and those that don't.
125 // We also support passing in a string, or a char*/int pair.
126 std::string operator()(const char* in, size_t inlen, const std::string& arg="") const {
127 std::string out;
128 // we'll reserve some space to account for minimal escaping: say 12%
129 out.reserve(inlen + inlen/8 + 16);
130 StringEmitter outbuf(&out);
131 Modify(in, inlen, NULL, &outbuf, arg);
132 return out;
133 }
134 std::string operator()(const std::string& in, const std::string& arg="") const {
135 return operator()(in.data(), in.size(), arg);
136 }
137
138 virtual ~TemplateModifier(); // always need a virtual destructor!
139};
140
141
142// Returns the input verbatim (for testing)
143class CTEMPLATE_DLL_DECL NullModifier : public TemplateModifier {
144 MODIFY_SIGNATURE_;
145};
146extern CTEMPLATE_DLL_DECL NullModifier null_modifier;
147
148// Escapes < > " ' & <non-space whitespace> to &lt; &gt; &quot;
149// &#39; &amp; <space>
150class CTEMPLATE_DLL_DECL HtmlEscape : public TemplateModifier {
151 MODIFY_SIGNATURE_;
152};
153extern CTEMPLATE_DLL_DECL HtmlEscape html_escape;
154
155// Same as HtmlEscape but leaves all whitespace alone. Eg. for <pre>..</pre>
156class CTEMPLATE_DLL_DECL PreEscape : public TemplateModifier {
157 MODIFY_SIGNATURE_;
158};
159extern CTEMPLATE_DLL_DECL PreEscape pre_escape;
160
161// Like HtmlEscape but allows HTML entities, <br> tags, <wbr> tags,
162// matched <b> and </b> tags, matched <i> and </i> tags, matched <em> and </em>
163// tags, and matched <span dir=(rtl|ltr)> tags.
164class CTEMPLATE_DLL_DECL SnippetEscape : public TemplateModifier {
165 MODIFY_SIGNATURE_;
166};
167extern CTEMPLATE_DLL_DECL SnippetEscape snippet_escape;
168
169// Replaces characters not safe for an unquoted attribute with underscore.
170// Safe characters are alphanumeric, underscore, dash, period, and colon.
171// The equal sign is also considered safe unless it is at the start
172// or end of the input in which case it is replaced with underscore.
173//
174// We added the equal sign to the safe characters to allow this modifier
175// to be used on attribute name/value pairs in HTML tags such as
176// <div {{CLASS:H=attribute}}>
177// where CLASS is expanded to "class=bla".
178//
179// Note: The equal sign is replaced when found at either boundaries of the
180// string due to the concern it may be lead to XSS under some special
181// circumstances: Say, if this string is the value of an attribute in an
182// HTML tag and ends with an equal sign, a browser may possibly end up
183// interpreting the next token as the value of this string rather than
184// a new attribute (esoteric).
185class CTEMPLATE_DLL_DECL CleanseAttribute : public TemplateModifier {
186 MODIFY_SIGNATURE_;
187};
188extern CTEMPLATE_DLL_DECL CleanseAttribute cleanse_attribute;
189
190// Removes characters not safe for a CSS value. Safe characters are
191// alphanumeric, space, underscore, period, coma, exclamation mark,
192// pound, percent, and dash.
193class CTEMPLATE_DLL_DECL CleanseCss : public TemplateModifier {
194 MODIFY_SIGNATURE_;
195};
196extern CTEMPLATE_DLL_DECL CleanseCss cleanse_css;
197
198// Checks that a url is either an absolute http(s) URL or a relative
199// url that doesn't have a protocol hidden in it (ie [foo.html] is
200// fine, but not [javascript:foo]) and then performs another type of
201// escaping. Returns the url escaped with the specified modifier if
202// good, otherwise returns a safe replacement URL.
203// This is normally "#", but for <img> tags, it is not safe to set
204// the src attribute to "#". This is because this causes some browsers
205// to reload the page, which can cause a DoS.
206class CTEMPLATE_DLL_DECL ValidateUrl : public TemplateModifier {
207 public:
208 explicit ValidateUrl(const TemplateModifier& chained_modifier,
209 const char* unsafe_url_replacement)
210 : chained_modifier_(chained_modifier),
211 unsafe_url_replacement_(unsafe_url_replacement),
212 unsafe_url_replacement_length_(strlen(unsafe_url_replacement)) { }
213 MODIFY_SIGNATURE_;
214 static const char* const kUnsafeUrlReplacement;
215 static const char* const kUnsafeImgSrcUrlReplacement;
216 private:
217 const TemplateModifier& chained_modifier_;
218 const char* unsafe_url_replacement_;
219 int unsafe_url_replacement_length_;
220};
221extern CTEMPLATE_DLL_DECL ValidateUrl validate_url_and_html_escape;
222extern CTEMPLATE_DLL_DECL ValidateUrl validate_url_and_javascript_escape;
223extern CTEMPLATE_DLL_DECL ValidateUrl validate_url_and_css_escape;
224extern CTEMPLATE_DLL_DECL ValidateUrl validate_img_src_url_and_html_escape;
225extern CTEMPLATE_DLL_DECL ValidateUrl validate_img_src_url_and_javascript_escape;
226extern CTEMPLATE_DLL_DECL ValidateUrl validate_img_src_url_and_css_escape;
227
228// Escapes < > & " ' to &lt; &gt; &amp; &quot; &#39; (same as in HtmlEscape).
229// If you use it within a CDATA section, you may be escaping more characters
230// than strictly necessary. If this turns out to be an issue, we will need
231// to add a variant just for CDATA.
232class CTEMPLATE_DLL_DECL XmlEscape : public TemplateModifier {
233 MODIFY_SIGNATURE_;
234};
235extern CTEMPLATE_DLL_DECL XmlEscape xml_escape;
236
237// Escapes characters that cannot appear unescaped in a javascript string
238// assuming UTF-8 encoded input.
239// This does NOT escape all characters that cannot appear unescaped in a
240// javascript regular expression literal.
241class CTEMPLATE_DLL_DECL JavascriptEscape : public TemplateModifier {
242 MODIFY_SIGNATURE_;
243};
244extern CTEMPLATE_DLL_DECL JavascriptEscape javascript_escape;
245
246// Checks that the input is a valid javascript non-string literal
247// meaning a boolean (true, false) or a numeric value (decimal, hex or octal).
248// If valid, we output the input as is, otherwise we output null instead.
249// Input of zero length is considered valid and nothing is output.
250//
251// The emphasis is on safety against injection of javascript code rather
252// than perfect validation, as such it is possible for non-valid literals to
253// pass through.
254//
255// You would use this modifier for javascript variables that are not
256// enclosed in quotes such as:
257// <script>var a = {{VALUE}};</script> OR
258// <a href="url" onclick="doSubmit({{ID}})">
259// For variables that are quoted (i.e. string literals) use javascript_escape.
260//
261// Limitations:
262// . NaN, +/-Infinity and null are not recognized.
263// . Output is not guaranteed to be a valid literal,
264// e.g: +55+-e34 will output as is.
265// e.g: trueeee will output nothing as it is not a valid boolean.
266//
267// Details:
268// . For Hex numbers, it checks for case-insensitive 0x[0-9A-F]+
269// that should be a proper check.
270// . For other numbers, it checks for case-insensitive [0-9eE+-.]*
271// so can also accept invalid numbers such as the number 5..45--10.
272// . "true" and "false" (without quotes) are also accepted and that's it.
273//
274class CTEMPLATE_DLL_DECL JavascriptNumber : public TemplateModifier {
275 MODIFY_SIGNATURE_;
276};
277extern CTEMPLATE_DLL_DECL JavascriptNumber javascript_number;
278
279// Escapes characters not in [0-9a-zA-Z.,_:*/~!()-] as %-prefixed hex.
280// Space is encoded as a +.
281class CTEMPLATE_DLL_DECL UrlQueryEscape : public TemplateModifier {
282 MODIFY_SIGNATURE_;
283};
284extern CTEMPLATE_DLL_DECL UrlQueryEscape url_query_escape;
285
286// Escapes " \ / <FF> <CR> <LF> <BS> <TAB> to \" \\ \/ \f \r \n \b \t
287// Also escapes < > & to their corresponding \uXXXX representation
288// (\u003C, \u003E, \u0026 respectively).
289class CTEMPLATE_DLL_DECL JsonEscape : public TemplateModifier {
290 MODIFY_SIGNATURE_;
291};
292extern CTEMPLATE_DLL_DECL JsonEscape json_escape;
293
294// Inserts the given prefix (given as the argument to this modifier)
295// after every newline in the text. Note that it does *not* insert
296// prefix at the very beginning of the text -- in its expected use,
297// that prefix will already be present before this text, in the
298// template. This is meant to be used internally, and is not exported
299// via the g_modifiers list.
300class CTEMPLATE_DLL_DECL PrefixLine : public TemplateModifier {
301 MODIFY_SIGNATURE_;
302};
303extern CTEMPLATE_DLL_DECL PrefixLine prefix_line;
304
305
306#undef MODIFY_SIGNATURE_
307
308
309// Registers a new template modifier.
310// long_name must start with "x-".
311// If the modifier takes a value (eg "{{VAR:x-name=value}}"), then
312// long_name should end with "=". This is similar to getopt(3) syntax.
313// We also allow value-specializations, with specific values specified
314// as part of long-name. For instance:
315// AddModifier("x-mod=", &my_modifierA);
316// AddModifier("x-mod=bar", &my_modifierB);
317// AddModifier("x-mod2", &my_modifierC);
318// For the template
319// {{VAR1:x-mod=foo}} {{VAR2:x-mod=bar}} {{VAR3:x-mod=baz}} {{VAR4:x-mod2}}
320// VAR1 and VAR3 would get modified by my_modifierA, VAR2 by my_modifierB,
321// and VAR4 by my_modifierC. The order of the AddModifier calls is not
322// significant.
323extern CTEMPLATE_DLL_DECL
324bool AddModifier(const char* long_name, const TemplateModifier* modifier);
325
326// Same as AddModifier() above except that the modifier is considered
327// to produce safe output that can be inserted in any context without
328// the need for additional escaping. This difference only impacts
329// the Auto-Escape mode: In that mode, when a variable (or template-include)
330// has a modifier added via AddXssSafeModifier(), it is excluded from
331// further escaping, effectively treated as though it had the :none modifier.
332// Because Auto-Escape is disabled for any variable and template-include
333// that includes such a modifier, use this function with care and ensure
334// that it may not emit harmful output that could lead to XSS.
335//
336// Some valid uses of AddXssSafeModifier:
337// . A modifier that converts a string to an integer since
338// an integer is generally safe in any context.
339// . A modifier that returns one of a fixed number of safe values
340// depending on properties of the input.
341//
342// Some not recommended uses of AddXssSafeModifier:
343// . A modifier that applies some extra formatting to the input
344// before returning it since the output will still contain
345// harmful content if the input does.
346// . A modifier that applies one type of escaping to the input
347// (say HTML-escape). This may be dangerous when the modifier
348// is used in a different context (say Javascript) where this
349// escaping may be inadequate.
350extern CTEMPLATE_DLL_DECL
351bool AddXssSafeModifier(const char* long_name,
352 const TemplateModifier* modifier);
353
354}
355
356
357#endif // TEMPLATE_TEMPLATE_MODIFIERS_H_