blob: de20d069b842791f13b53502f16fb4b0b4cb3a29 [file] [log] [blame]
Brian Silverman70325d62015-09-20 17:00:43 -04001// Copyright (c) 2006, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31
32#include <config.h>
33#include "base/mutex.h" // This must go first so we get _XOPEN_SOURCE
34#include <ctemplate/template.h>
35
36#include <assert.h>
37#include <errno.h>
38#include <stdio.h> // for fwrite, fflush
39#include <stdlib.h>
40#include <string.h>
41#include <sys/stat.h>
42#include <time.h>
43#ifdef HAVE_UNISTD_H
44# include <unistd.h>
45#endif // for stat() and open() and getcwd()
46#include <algorithm> // for binary_search()
47#include <functional> // for binary_function()
48#include HASH_MAP_H
49#include <iterator>
50#include <list>
51#include <string>
52#include <utility> // for pair
53#include <vector>
54
55#include "base/thread_annotations.h"
56#include "htmlparser/htmlparser_cpp.h"
57#include <ctemplate/per_expand_data.h>
58#include <ctemplate/template_annotator.h>
59#include <ctemplate/template_cache.h>
60#include <ctemplate/template_dictionary.h>
61#include <ctemplate/template_dictionary_interface.h> // also gets kIndent
62#include <ctemplate/template_modifiers.h>
63#include "template_modifiers_internal.h"
64#include <ctemplate/template_pathops.h>
65#include <ctemplate/template_string.h>
66#include "base/fileutil.h"
67#include <ctype.h>
68#include <iostream>
69#include <sstream> // for ostringstream
70
71#ifndef PATH_MAX
72#ifdef MAXPATHLEN
73#define PATH_MAX MAXPATHLEN
74#else
75#define PATH_MAX 4096 // seems conservative for max filename len!
76#endif
77#endif
78
79#define arraysize(x) ( sizeof(x) / sizeof(*(x)) )
80
81#define AS_STR1(x) #x
82#define AS_STR(x) AS_STR1(x)
83
84// A very simple logging system
85#undef LOG // a non-working version is provided in base/util.h; redefine it
86static int kVerbosity = 0; // you can change this by hand to get vlogs
87#define LOG(level) std::cerr << #level ": "
88#define VLOG(level) if (kVerbosity >= level) LOG(level)
89
90// TODO(csilvers): use our own tables for these?
91static bool ascii_isalnum(char c) {
92 return ((c & 0x80) == 0) && isalnum(c); // 7-bit ascii, and an alnum
93}
94
95static bool ascii_isspace(char c) {
96 return ((c & 0x80) == 0) && isspace(c); // 7-bit ascii, and a space
97}
98
99#define strsuffix(str, suffix) \
100 ( strlen(str) > (sizeof("" suffix "") - 1) && \
101 strcmp(str + strlen(str) - (sizeof(suffix) - 1), suffix) == 0 )
102
103using std::endl;
104using std::string;
105using std::list;
106using std::vector;
107using std::pair;
108using std::binary_search;
109#ifdef HAVE_UNORDERED_MAP
110using HASH_NAMESPACE::unordered_map;
111// This is totally cheap, but minimizes the need for #ifdef's below...
112#define hash_map unordered_map
113#else
114using HASH_NAMESPACE::hash_map;
115#endif
116
117namespace ctemplate {
118
119using ctemplate_htmlparser::HtmlParser;
120
121TemplateId GlobalIdForSTS_INIT(const TemplateString& s) {
122 return s.GetGlobalId(); // normally this method is private
123}
124
125int Template::num_deletes_ = 0;
126
127namespace {
128// Mutex for protecting Expand calls against ReloadIfChanged, which
129// might change a template while it's being expanded. This mutex used
130// to be a per-template mutex, rather than a global mutex, which seems
131// like it would be strictly better, but we ran into subtle problems
132// with deadlocks when a template would sub-include itself (thus
133// requiring a recursive read-lock during Expand), and the template
134// was Expanded and ReloadIfChanged at the same time. Rather than
135// deal with that complication, we just go with a global mutex. Since
136// ReloadIfChanged is deprecated, in most applications all the mutex
137// uses will be as read-locks, so this shouldn't cause much contention.
138static Mutex g_template_mutex(base::LINKER_INITIALIZED);
139
140// Mutex for protecting vars_seen in WriteOneHeaderEntry, below.
141// g_template_mutex and g_header_mutex are never held at the same time.
142// TODO(csilvers): assert this in the codebase.
143static Mutex g_header_mutex(base::LINKER_INITIALIZED);
144
145// It's not great to have a global variable with a constructor, but
146// it's safe in this case: the constructor is trivial and does not
147// depend on any other global constructors running first, and the
148// variable is used in only one place below, always after main() has
149// started.
150// It is ok for this modifier to be in XssClass XSS_WEB_STANDARD because
151// it only adds indentation characters - typically whitespace - iff these
152// are already present in the text. If such characters were XSS-harmful
153// in a given context, they would have already been escaped or replaced
154// by earlier escaping such as H=attribute.
155static const ModifierInfo g_prefix_line_info("", '\0', XSS_WEB_STANDARD,
156 &prefix_line);
157
158const char * const kDefaultTemplateDirectory = kCWD; // "./"
159// Note this name is syntactically impossible for a user to accidentally use.
160const char * const kMainSectionName = "__{{MAIN}}__";
161
162// A TemplateString object that precomputes its hash. This can be
163// useful in places like template filling code, where we'd like to
164// hash the string once then reuse it many times. This should not be
165// used for filling any part of a template dictionary, since we don't
166// map the id to its corresponding string or manage memory for the
167// string - it is for lookups *only*.
168class HashedTemplateString : public TemplateString {
169 public:
170 HashedTemplateString(const char* s, size_t slen) : TemplateString(s, slen) {
171 CacheGlobalId();
172 }
173};
174
175#define LOG_TEMPLATE_NAME(severity, template) \
176 LOG(severity) << "Template " << template->template_file() << ": "
177
178#define LOG_AUTO_ESCAPE_ERROR(error_msg, my_template) do { \
179 LOG_TEMPLATE_NAME(ERROR, my_template); \
180 LOG(ERROR) << "Auto-Escape: " << error_msg << endl; \
181 } while (0)
182
183// We are in auto-escape mode.
184#define AUTO_ESCAPE_MODE(context) ((context) != TC_MANUAL)
185
186// Auto-Escape contexts which utilize the HTML Parser.
187#define AUTO_ESCAPE_PARSING_CONTEXT(context) \
188 ((context) == TC_HTML || (context) == TC_JS || (context) == TC_CSS)
189
190// ----------------------------------------------------------------------
191// PragmaId
192// PragmaDefinition
193// PragmaMarker
194// Functionality to support the PRAGMA marker in the template, i.e
195// the {{%IDENTIFIER [name1="value1" [name2="value2"]...]}} syntax:
196// . IDENTIFIER as well as all attribute names are case-insensitive
197// whereas attribute values are case-sensitive.
198// . No extraneous whitespace is allowed (e.g. between name and '=').
199// . Double quotes inside an attribute value need to be backslash
200// escaped, i.e. " -> \". We unescape them during parsing.
201//
202// The only identifier currently supported is AUTOESCAPE which is
203// used to auto-escape a given template. Its syntax is:
204// {{%AUTOESCAPE context="context" [state="state"]}} where:
205// . context is one of: "HTML", "JAVASCRIPT", "CSS", "XML", "JSON".
206// . state may be omitted or equivalently, it may be set to "default".
207// It also accepts the value "IN_TAG" in the HTML context to
208// indicate the template contains HTML attribute name/value
209// pairs that are enclosed in a tag specified in a parent template.
210// e.g: Consider the parent template:
211// <a href="/bla" {{>INC}}>text</a>
212// and the included template:
213// class="{{CLASS}}" target="{{TARGET}}"
214// Then, for the included template to be auto-escaped properly, it
215// must have the pragma: {{%AUTOESCAPE context="HTML" state="IN_TAG"}}.
216// This is a very uncommon template structure.
217//
218// To add a new pragma identifier, you'll have to at least:
219// 1. Add a new id for it in PragmaId enum.
220// 2. Add the corresponding definition in static g_pragmas array
221// 3. If you accept more than 2 attributes, increase the size
222// of attribute_names in the PragmaDefinition struct.
223// 4. Add handling of that pragma in SectionTemplateNode::GetNextToken()
224// and possibly SectionTemplateNode::AddPragmaNode()
225// ----------------------------------------------------------------------
226
227// PragmaId
228// Identify all the pragma identifiers we support. Currently only
229// one (for AutoEscape). PI_ERROR is only for internal error reporting,
230// and is not a valid pragma identifier.
231enum PragmaId { PI_UNUSED, PI_ERROR, PI_AUTOESCAPE, NUM_PRAGMA_IDS };
232
233// Each pragma definition has a unique identifier as well as a list of
234// attribute names it accepts. This allows initial error checking while
235// parsing a pragma definition. Such error checking will need supplementing
236// with more pragma-specific logic in SectionTemplateNode::GetNextToken().
237static struct PragmaDefinition {
238 PragmaId pragma_id;
239 const char* identifier;
240 const char* attribute_names[2]; // Increase as needed.
241} g_pragmas[NUM_PRAGMA_IDS] = {
242 /* PI_UNUSED */ { PI_UNUSED, NULL, {} },
243 /* PI_ERROR */ { PI_ERROR, NULL, {} },
244 /* PI_AUTOESCAPE */ { PI_AUTOESCAPE, "AUTOESCAPE", {"context", "state"} }
245};
246
247// PragmaMarker
248// Functionality to parse the {{%...}} syntax and extract the
249// provided attribute values. We store the PragmaId as well
250// as a vector of all the attribute names and values provided.
251class PragmaMarker {
252 public:
253 // Constructs a PragmaMarker object from the PRAGMA marker
254 // {{%ID [[name1=\"value1"] ...]}}. On error (unable to parse
255 // the marker), returns an error description in error_msg. On
256 // success, error_msg is cleared.
257 PragmaMarker(const char* token_start, const char* token_end,
258 string* error_msg);
259
260 // Returns the attribute value for the corresponding attribute name
261 // or NULL if none is found (as is the case with optional attributes).
262 // Ensure you only call it on attribute names registered in g_pragmas
263 // for that PragmaId.
264 const string* GetAttributeValue(const char* attribute_name) const;
265
266 private:
267 // Checks that the identifier given matches one of the pragma
268 // identifiers we know of, in which case returns the corresponding
269 // PragmaId. In case of error, returns PI_ERROR.
270 static PragmaId GetPragmaId(const char* id, size_t id_len);
271
272 // Parses an attribute value enclosed in double quotes and updates
273 // value_end to point at ending double quotes. Returns the attribute
274 // value. If an error occurred, error_msg is set with information.
275 // It is cleared on success.
276 // Unescapes backslash-escaped double quotes ('\"' -> '"') if present.
277 static string ParseAttributeValue(const char* value_start,
278 const char** value_end,
279 string* error_msg);
280
281 // Returns true if the attribute name is an accepted one for that
282 // given PragmaId. Otherwise returns false.
283 static bool IsValidAttribute(PragmaId pragma_id, const char* name,
284 size_t namelen);
285
286 PragmaId pragma_id_;
287 // A vector of attribute (name, value) pairs.
288 vector<pair<string, string> > names_and_values_;
289};
290
291PragmaId PragmaMarker::GetPragmaId(const char* id, size_t id_len) {
292 for (int i = 0; i < NUM_PRAGMA_IDS; ++i) {
293 if (g_pragmas[i].identifier == NULL) // PI_UNUSED, PI_ERROR
294 continue;
295 if ((strlen(g_pragmas[i].identifier) == id_len) &&
296 (strncasecmp(id, g_pragmas[i].identifier, id_len) == 0))
297 return g_pragmas[i].pragma_id;
298 }
299 return PI_ERROR;
300}
301
302bool PragmaMarker::IsValidAttribute(PragmaId pragma_id, const char* name,
303 size_t namelen) {
304 const int kMaxAttributes = sizeof(g_pragmas[0].attribute_names) /
305 sizeof(*g_pragmas[0].attribute_names);
306 for (int i = 0; i < kMaxAttributes; ++i) {
307 const char* attr_name = g_pragmas[pragma_id].attribute_names[i];
308 if (attr_name == NULL)
309 break;
310 if ((strlen(attr_name) == namelen) &&
311 (strncasecmp(attr_name, name, namelen) == 0))
312 // We found the given name in our accepted attribute list.
313 return true;
314 }
315 return false; // We did not find the name.
316}
317
318const string* PragmaMarker::GetAttributeValue(
319 const char* attribute_name) const {
320 // Developer error if assert triggers.
321 assert(IsValidAttribute(pragma_id_, attribute_name, strlen(attribute_name)));
322 for (vector<pair<string, string> >::const_iterator it =
323 names_and_values_.begin(); it != names_and_values_.end(); ++it) {
324 if (strcasecmp(attribute_name, it->first.c_str()) == 0)
325 return &it->second;
326 }
327 return NULL;
328}
329
330string PragmaMarker::ParseAttributeValue(const char* value_start,
331 const char** value_end,
332 string* error_msg) {
333 assert(error_msg);
334 if (*value_start != '"') {
335 error_msg->append("Attribute value is not enclosed in double quotes.");
336 return "";
337 }
338 const char* current = ++value_start; // Advance past the leading '"'
339 const char* val_end;
340 do {
341 if (current >= *value_end ||
342 ((val_end =
343 (const char*)memchr(current, '"', *value_end - current)) == NULL)) {
344 error_msg->append("Attribute value not terminated.");
345 return "";
346 }
347 current = val_end + 1; // Advance past the current '"'
348 } while (val_end[-1] == '\\');
349
350 string attribute_value(value_start, val_end - value_start);
351 // Now replace \" with "
352 size_t found;
353 while ((found = attribute_value.find("\\\"")) != string::npos)
354 attribute_value.erase(found, 1);
355 *value_end = val_end;
356 error_msg->clear();
357 return attribute_value;
358}
359
360PragmaMarker::PragmaMarker(const char* token_start, const char* token_end,
361 string* error_msg) {
362 assert(error_msg);
363 string error;
364 const char* identifier_end =
365 (const char*)memchr(token_start, ' ', token_end - token_start);
366 if (identifier_end == NULL)
367 identifier_end = token_end;
368 pragma_id_ = PragmaMarker::GetPragmaId(token_start,
369 identifier_end - token_start);
370 if (pragma_id_ == PI_ERROR) {
371 error = "Unrecognized pragma identifier.";
372 } else {
373 const char* val_end;
374 // Loop through attribute name/value pairs.
375 for (const char* nameval = identifier_end; nameval < token_end;
376 nameval = val_end + 1) {
377 // Either after identifier or afer a name/value pair. Must be whitespace.
378 if (*nameval++ != ' ') {
379 error = "Extraneous text.";
380 break;
381 }
382 const char* val = (const char*)memchr(nameval, '=', token_end - nameval);
383 if (val == NULL || val == nameval) {
384 error = "Missing attribute name or value";
385 break;
386 }
387 const string attribute_name(nameval, val - nameval);
388 if (!PragmaMarker::IsValidAttribute(pragma_id_, attribute_name.data(),
389 attribute_name.length())) {
390 error = "Unrecognized attribute name: " + attribute_name;
391 break;
392 }
393 ++val; // Advance past '='
394 val_end = token_end;
395 const string attribute_value = ParseAttributeValue(val, &val_end, &error);
396 if (!error.empty()) // Failed to parse attribute value.
397 break;
398 names_and_values_.push_back(pair<const string, const string>(
399 attribute_name, attribute_value));
400 }
401 }
402 if (error.empty()) // Success
403 error_msg->clear();
404 else // Error
405 error_msg->append("In PRAGMA directive '" +
406 string(token_start, token_end - token_start) +
407 "' Error: " + error);
408}
409
410// ----------------------------------------------------------------------
411// memmatch()
412// Return a pointer to the first occurrences of the given
413// length-denominated string, inside a bigger length-denominated
414// string, or NULL if not found. The mem version of strstr.
415// ----------------------------------------------------------------------
416
417static const char *memmatch(const char *haystack, size_t haystack_len,
418 const char *needle, size_t needle_len) {
419 if (needle_len == 0)
420 return haystack; // even if haystack_len is 0
421 else if (needle_len > haystack_len)
422 return NULL;
423
424 const char* match;
425 const char* hayend = haystack + haystack_len - needle_len + 1;
426 while ((match = (const char*)memchr(haystack, needle[0],
427 hayend - haystack))) {
428 if (memcmp(match, needle, needle_len) == 0)
429 return match;
430 else
431 haystack = match + 1;
432 }
433 return NULL;
434}
435
436// ----------------------------------------------------------------------
437// FilenameValidForContext()
438// GetTemplateContextFromPragma()
439// GetModifierForContext()
440// FindLongestMatch()
441// PrettyPrintTokenModifiers()
442// Static methods for the auto-escape mode specifically.
443
444// Perfoms matching of filename against the TemplateContext
445// and warns in the log on mismatch using "unwritten" filename
446// conventions below for templates in our codebase:
447// 1. If filename contains "css", "stylesheet" or "style"
448// check that it has type TC_CSS.
449// 2. If filename contains "js" or "javascript" check that
450// it has type TC_JS.
451// Returns false if there was a mismatch although currently
452// we ignore it and just rely on the LOG(WARNING) in the logs.
453static bool FilenameValidForContext(const string& filename,
454 TemplateContext context) {
455 string stripped_filename = Basename(filename);
456
457 if (GOOGLE_NAMESPACE::ContainsFullWord(stripped_filename, "css") ||
458 GOOGLE_NAMESPACE::ContainsFullWord(stripped_filename, "stylesheet") ||
459 GOOGLE_NAMESPACE::ContainsFullWord(stripped_filename, "style")) {
460 if (context != TC_CSS) {
461 LOG(WARNING) << "Template filename " << filename
462 << " indicates CSS but given TemplateContext"
463 << " was not TC_CSS." << endl;
464 return false;
465 }
466 } else if (GOOGLE_NAMESPACE::ContainsFullWord(stripped_filename, "js") ||
467 GOOGLE_NAMESPACE::ContainsFullWord(stripped_filename, "javascript")) {
468 if (context != TC_JS) {
469 LOG(WARNING) << "Template filename " << filename
470 << " indicates javascript but given TemplateContext"
471 << " was not TC_JS." << endl;
472 return false;
473 }
474 }
475 return true;
476}
477
478// Returns a string containing a human-readable description of
479// the modifiers in the vector. The format is:
480// :modifier1[=val1][:modifier2][=val2]...
481static string PrettyPrintTokenModifiers(
482 const vector<ModifierAndValue>& modvals) {
483 string out;
484 for (vector<ModifierAndValue>::const_iterator it =
485 modvals.begin(); it != modvals.end(); ++it) {
486 string one_mod = PrettyPrintOneModifier(*it);
487 out.append(one_mod);
488 }
489 return out;
490}
491
492// Returns the TemplateContext corresponding to the "context" attribute
493// of the AUTOESCAPE pragma. Returns TC_MANUAL to indicate an error,
494// meaning an invalid context was given in the pragma.
495static TemplateContext GetTemplateContextFromPragma(
496 const PragmaMarker& pragma) {
497 const string* context = pragma.GetAttributeValue("context");
498 if (context == NULL)
499 return TC_MANUAL;
500 if (*context == "HTML" || *context == "html")
501 return TC_HTML;
502 else if (*context == "JAVASCRIPT" || *context == "javascript")
503 return TC_JS;
504 else if (*context == "CSS" || *context == "css")
505 return TC_CSS;
506 else if (*context == "JSON" || *context == "json")
507 return TC_JSON;
508 else if (*context == "XML" || *context == "xml")
509 return TC_XML;
510 return TC_MANUAL;
511}
512
513// Based on the state of the parser, determines the appropriate escaping
514// directive and returns a pointer to the corresponding
515// global ModifierAndValue vector. Called when a variable template node
516// is traversed.
517// Returns NULL if there is no suitable modifier for that context in
518// which the case the caller is expected to fail the template initialization.
519static const vector<const ModifierAndValue*> GetModifierForContext(
520 TemplateContext my_context, HtmlParser *htmlparser,
521 const Template* my_template) {
522 assert(AUTO_ESCAPE_MODE(my_context));
523 vector<const ModifierAndValue*> modvals;
524 string error_msg;
525
526 switch (my_context) {
527 case TC_XML:
528 modvals = GetModifierForXml(htmlparser, &error_msg);
529 break;
530 case TC_JSON:
531 modvals = GetModifierForJson(htmlparser, &error_msg);
532 break;
533 case TC_CSS:
534 assert(htmlparser); // Parser is active in CSS
535 modvals = GetModifierForCss(htmlparser, &error_msg);
536 break;
537 default:
538 // Must be in TC_HTML or TC_JS. Parser is active in these modes.
539 assert(AUTO_ESCAPE_PARSING_CONTEXT(my_context));
540 assert(htmlparser);
541 modvals = GetModifierForHtmlJs(htmlparser, &error_msg);
542 }
543 // In any mode, there should be at least one modifier.
544 if (modvals.empty())
545 LOG_AUTO_ESCAPE_ERROR(error_msg, my_template);
546 return modvals;
547}
548
549// Returns the largest int N indicating how many XSS safe alternative
550// modifiers are in the in-template modifiers already.
551// . If N is equal to the number of modifiers determined by the Auto Escaper,
552// we have a full match and the in-template modifiers were safe. We leave
553// them untouched.
554// . Otherwise, N is less (or zero) and we have a partial match (or none).
555// The in-template modifiers are not XSS safe and need the missing ones,
556// i.e. those in the auto escape modifiers which are not in the first N.
557//
558// We allow in-template modifiers to have extra modifiers than we deem
559// necessary, for e.g. :j:h when :j would have sufficed. But to make sure
560// these modifiers do not introduce XSS concerns we require that they
561// be in the same XssClass as the modifier we had.
562// For example :h:x-bla is not safe in HTML context because x-bla is
563// in a different XssClass as our :h whereas :h:j would be safe.
564static size_t FindLongestMatch(
565 const vector<ModifierAndValue>& modvals_man,
566 const vector<const ModifierAndValue*>& modvals_auto) {
567 if (modvals_auto.empty())
568 return 0;
569
570 // See if modvals_auto is "consistent" with the modifiers that are
571 // already present (modvals_man). This is true if all the
572 // modifiers in auto also occur in man, and any gaps between them
573 // (if any) are filled by "neutral" modifiers that do not affect
574 // xss-safety. We go through the vectors backwards.
575 // If all of modvals_auto is not consistent, maybe a prefix of it
576 // is; that's better than nothing, since we only need to auto-apply
577 // the suffix that's not already in modvals_man.
578 typedef vector<const ModifierAndValue*>::const_reverse_iterator
579 ModAutoIterator;
580 typedef vector<ModifierAndValue>::const_reverse_iterator ModManIterator;
581 for (ModAutoIterator end_of_prefix = modvals_auto.rbegin();
582 end_of_prefix != modvals_auto.rend();
583 ++end_of_prefix) {
584 ModAutoIterator curr_auto = end_of_prefix;
585 ModManIterator curr_man = modvals_man.rbegin();
586 while (curr_auto != modvals_auto.rend() &&
587 curr_man != modvals_man.rend()) {
588 if (IsSafeXSSAlternative(*(*curr_auto)->modifier_info,
589 *curr_man->modifier_info)) {
590 ++curr_auto;
591 ++curr_man;
592 } else if ((curr_man->modifier_info->xss_class ==
593 (*curr_auto)->modifier_info->xss_class) &&
594 (curr_man->modifier_info->xss_class != XSS_UNIQUE)) {
595 ++curr_man; // Ignore this modifier: it's harmless.
596 } else {
597 break; // An incompatible modifier; we've failed
598 }
599 }
600 if (curr_auto == modvals_auto.rend()) // got through them all, full match!
601 return curr_auto - end_of_prefix;
602 }
603 return 0;
604}
605
606// ----------------------------------------------------------------------
607// WriteOneHeaderEntry()
608// This dumps information about a template that is useful to
609// make_tpl_varnames_h -- information about the variable and
610// section names used in a template, so we can define constants
611// to refer to them instead of having to type them in by hand.
612// Output is *appended* to outstring.
613// ----------------------------------------------------------------------
614
615static void WriteOneHeaderEntry(
616 string *outstring, const string& variable, const string& full_pathname)
617 LOCKS_EXCLUDED(g_header_mutex) {
618 MutexLock ml(&g_header_mutex);
619
620 // we use hash_map instead of hash_set just to keep the stl size down
621 static hash_map<string, bool, StringHash> vars_seen
622 GUARDED_BY(g_header_mutex);
623 static string current_file GUARDED_BY(g_header_mutex);
624 static string prefix GUARDED_BY(g_header_mutex);
625
626 if (full_pathname != current_file) {
627 // changed files so re-initialize the static variables
628 vars_seen.clear();
629 current_file = full_pathname;
630
631 // remove the path before the filename
632 string filename(Basename(full_pathname));
633
634 prefix = "k";
635 bool take_next = true;
636
637 for (string::size_type i = 0; i < filename.length(); i++) {
638 if (filename[i] == '.') {
639 // stop when we find the dot
640 break;
641 }
642 if (take_next) {
643 if (filename.substr(i, 4) == "post") {
644 // stop before we process post...
645 break;
646 }
647 prefix = prefix + filename[i];
648 take_next = false;
649 }
650 if (filename[i] == '_') {
651 take_next = true;
652 }
653 }
654 prefix = prefix + "_";
655 }
656
657 // print out the variable, but only if we haven't seen it before.
658 if (!vars_seen.count(variable)) {
659 if (variable == kMainSectionName || variable.find("BI_") == 0) {
660 // We don't want to write entries for __MAIN__ or the built-ins
661 } else {
662 const TemplateId id = GlobalIdForSTS_INIT(TemplateString(variable));
663 std::ostringstream outstream;
664 outstream << "static const "
665 << AS_STR(GOOGLE_NAMESPACE) << "::StaticTemplateString "
666 << prefix << variable << " = STS_INIT_WITH_HASH("
667 << prefix << variable << ", \"" << variable << "\", "
668 << id << "ULL);\n";
669 outstring->append(outstream.str());
670 }
671 vars_seen[variable] = true;
672 }
673}
674
675// ----------------------------------------------------------------------
676// TemplateToken
677// A TemplateToken is a string marked with a token type enum. The string
678// has different meanings for different token types. For text, the
679// string is the text itself. For variable and template types, the
680// string is the name of the variable holding the value or the
681// template name, resp. For section types, the string is the name
682// of the section, used to retrieve the hidden/visible state and
683// the associated list of dictionaries, if any. For pragma type,
684// the string is the full text of the marker and is only used for
685// debug information.
686// ----------------------------------------------------------------------
687
688enum TemplateTokenType { TOKENTYPE_UNUSED, TOKENTYPE_TEXT,
689 TOKENTYPE_VARIABLE, TOKENTYPE_SECTION_START,
690 TOKENTYPE_SECTION_END, TOKENTYPE_TEMPLATE,
691 TOKENTYPE_COMMENT, TOKENTYPE_SET_DELIMITERS,
692 TOKENTYPE_PRAGMA, TOKENTYPE_NULL,
693 TOKENTYPE_HIDDEN_DEFAULT_SECTION,
694 };
695
696} // unnamed namespace
697
698// A sorted array of Template variable names that Auto-Escape should
699// not escape. Variables that you may want to add here typically
700// satisfy all the following conditions:
701// 1. Are "trusted" variables, meaning variables you know to not
702// contain potentially harmful content.
703// 2. Contain some markup that gets broken when escaping is
704// applied to them.
705// 3. Are used often such that requiring developers to add
706// ":none" to each use is error-prone and inconvenient.
707//
708// Note: Keep this array sorted as you add new elements!
709//
710const char * const Template::kSafeWhitelistedVariables[] = {
711 "" // a placekeeper element: replace with your real values!
712};
713const size_t Template::kNumSafeWhitelistedVariables =
714 arraysize(Template::kSafeWhitelistedVariables);
715
716// A TemplateToken is a typed string. The semantics of the string depends on the
717// token type, as follows:
718// TOKENTYPE_TEXT - the text
719// TOKENTYPE_VARIABLE - the name of the variable
720// TOKENTYPE_SECTION_START - the name of the section being started
721// TOKENTYPE_SECTION_END - the name of the section being ended
722// TOKENTYPE_TEMPLATE - the name of the variable whose value will be
723// the template filename
724// TOKENTYPE_COMMENT - the empty string, not used
725// TOKENTYPE_SET_DELIMITERS- the empty string, not used
726// TOKENTYPE_PRAGMA - identifier and optional set of name/value pairs
727// - exactly as given in the template
728// TOKENTYPE_NULL - the empty string
729// TOKENTYPE_HIDDEN_DEFAULT_SECTION
730// - like TOKENTYPE_SECTION_START, but defaults to
731// hidden
732// All non-comment tokens may also have modifiers, which follow the name
733// of the token: the syntax is {{<PREFIX><NAME>:<mod>:<mod>:<mod>...}}
734// The modifiers are also stored as a string, starting with the first :
735struct TemplateToken {
736 TemplateTokenType type;
737 const char* text;
738 size_t textlen;
739 vector<ModifierAndValue> modvals;
740 TemplateToken(TemplateTokenType t, const char* txt, size_t len,
741 const vector<ModifierAndValue>* m)
742 : type(t), text(txt), textlen(len) {
743 if (m) modvals = *m;
744 }
745
746 string ToString() const { // used for debugging (annotations)
747 string retval(text, textlen);
748 for (vector<ModifierAndValue>::const_iterator it = modvals.begin();
749 it != modvals.end(); ++it) {
750 const string& modname = it->modifier_info->long_name;
751 retval += string(":") + modname;
752 if (!it->modifier_info->is_registered)
753 retval += "<not registered>";
754 }
755 return retval;
756 }
757
758 // Updates the correct modifiers for the token (variable or template node)
759 // based on our computed modifiers from the HTML parser context as well
760 // as the in-template modifiers that may have been provided.
761 // If the in-template modifiers are considered safe, we use them
762 // without modification. This could happen in one of three cases:
763 // 1. The token has the ":none" modifier as one of the modifiers.
764 // 2. The token has a custom modifier considered XSS-Safe as one of
765 // the modifiers. The modifier was added via AddXssSafeModifier()
766 // and has the XSS_SAFE XssClass.
767 // 3. The escaping modifiers are XSS-equivalent to the ones we computed.
768 //
769 // If the in-template modifiers are not found to be safe, we add
770 // the escaping modifiers we determine missing. This is done based on a
771 // longest match search between the two modifiers vectors, refer to comment
772 // in FindLongestMatch. We also issue a warning in the log, unless the
773 // in-template modifiers were all not escaping related (e.g. custom)
774 // since that case is similar to that of not providing any modifiers.
775 void UpdateModifier(const vector<const ModifierAndValue*>& auto_modvals) {
776 // Common case: no modifiers given in template. Assign our own. No warning.
777 if (modvals.empty()) {
778 for (vector<const ModifierAndValue*>::const_iterator it
779 = auto_modvals.begin(); it != auto_modvals.end(); ++it) {
780 modvals.push_back(**it);
781 }
782 return;
783 }
784
785 // Look for any XSS-Safe modifiers (added via AddXssSafeModifier or :none).
786 // If one is found anywhere in the vector, consider the variable safe.
787 for (vector<ModifierAndValue>::const_iterator it = modvals.begin();
788 it != modvals.end(); ++it) {
789 if (it->modifier_info->xss_class == XSS_SAFE)
790 return;
791 }
792
793 size_t longest_match = FindLongestMatch(modvals, auto_modvals);
794 if (longest_match == auto_modvals.size()) {
795 return; // We have a complete match, nothing to do.
796 } else { // Copy missing ones and issue warning.
797 assert(longest_match >= 0 && longest_match < auto_modvals.size());
798 // We only log if one or more of the in-template modifiers was
799 // escaping-related which we infer from the XssClass. Currently,
800 // all escaping modifiers are in XSS_WEB_STANDARD except for 'none'
801 // but that one is handled above.
802 bool do_log = false;
803 for (vector<ModifierAndValue>::const_iterator it = modvals.begin();
804 it != modvals.end(); ++it) {
805 if (it->modifier_info->xss_class == XSS_WEB_STANDARD) {
806 do_log = true;
807 break;
808 }
809 }
810 string before = PrettyPrintTokenModifiers(modvals); // for logging
811 for (vector<const ModifierAndValue*>::const_iterator it
812 = auto_modvals.begin() + longest_match;
813 it != auto_modvals.end(); ++it) {
814 modvals.push_back(**it);
815 }
816 if (do_log)
817 LOG(ERROR)
818 << "Token: " << string(text, textlen)
819 << " has missing in-template modifiers. You gave " << before
820 << " and we computed " << PrettyPrintModifiers(auto_modvals, "")
821 << ". We changed to " << PrettyPrintTokenModifiers(modvals) << endl;
822 }
823 }
824};
825
826static bool AnyMightModify(const vector<ModifierAndValue>& modifiers,
827 const PerExpandData* data) {
828 for (vector<ModifierAndValue>::const_iterator it = modifiers.begin();
829 it != modifiers.end(); ++it) {
830 string value_string(it->value, it->value_len);
831 if (it->modifier_info->modifier->MightModify(data, value_string)) {
832 return true;
833 }
834 }
835 return false;
836}
837
838// This applies the modifiers to the string in/inlen, and writes the end
839// result directly to the end of outbuf. Precondition: |modifiers| > 0.
840//
841// TODO(user): In the case of multiple modifiers, we are applying
842// all of them if any of them MightModify the output. We can do
843// better. We should store the MightModify values that we use to
844// compute AnyMightModify and respect them here.
845static void EmitModifiedString(const vector<ModifierAndValue>& modifiers,
846 const char* in, size_t inlen,
847 const PerExpandData* data,
848 ExpandEmitter* outbuf) {
849 string result;
850 string value_string;
851 if (modifiers.size() > 1) {
852 // If there's more than one modifiers, we need to store the
853 // intermediate results in a temp-buffer. We use a string.
854 // We'll assume that each modifier adds about 12% to the input
855 // size.
856 result.reserve((inlen + inlen/8) + 16);
857 StringEmitter scratchbuf(&result);
858 value_string = string(modifiers.front().value, modifiers.front().value_len);
859 modifiers.front().modifier_info->modifier->Modify(in, inlen, data,
860 &scratchbuf,
861 value_string);
862 // Only used when modifiers.size() > 2
863 for (vector<ModifierAndValue>::const_iterator it = modifiers.begin() + 1;
864 it != modifiers.end()-1; ++it) {
865 string output_of_this_modifier;
866 output_of_this_modifier.reserve(result.size() + result.size()/8 + 16);
867 StringEmitter scratchbuf2(&output_of_this_modifier);
868 value_string = string(it->value, it->value_len);
869 it->modifier_info->modifier->Modify(result.c_str(), result.size(), data,
870 &scratchbuf2, value_string);
871 result.swap(output_of_this_modifier);
872 }
873 in = result.data();
874 inlen = result.size();
875 }
876 // For the last modifier, we can write directly into outbuf
877 assert(!modifiers.empty());
878 value_string = string(modifiers.back().value, modifiers.back().value_len);
879 modifiers.back().modifier_info->modifier->Modify(in, inlen, data, outbuf,
880 value_string);
881}
882
883static void AppendTokenWithIndent(int level, string *out, const string& before,
884 const TemplateToken& token,
885 const string& after) {
886 out->append(string(level * kIndent, ' '));
887 string token_string(token.text, token.textlen);
888 out->append(before + token_string + after);
889}
890
891// ----------------------------------------------------------------------
892// TemplateNode
893// When we read a template, we decompose it into its components:
894// variables, sections, include-templates, and runs of raw text.
895// Each of these we see becomes one TemplateNode. TemplateNode
896// is the abstract base class; each component has its own type.
897// ----------------------------------------------------------------------
898
899class TemplateNode {
900 public:
901 TemplateNode() {}
902 virtual ~TemplateNode() {}
903
904 // Expands the template node using the supplied dictionary. The
905 // result is placed into output_buffer. If
906 // per_expand_data->annotate() is true, the output is annotated.
907 // Returns true iff all the template files load and parse correctly.
908 virtual bool Expand(ExpandEmitter *output_buffer,
909 const TemplateDictionaryInterface *dictionary,
910 PerExpandData *per_expand_data,
911 const TemplateCache *cache) const = 0;
912
913 // Writes entries to a header file to provide syntax checking at
914 // compile time.
915 virtual void WriteHeaderEntries(string *outstring,
916 const string& filename) const = 0;
917
918 // Appends a representation of the node and its subnodes to a string
919 // as a debugging aid.
920 virtual void DumpToString(int level, string *out) const = 0;
921
922 protected:
923 typedef list<TemplateNode *> NodeList;
924
925 private:
926 TemplateNode(const TemplateNode&); // disallow copying
927 void operator=(const TemplateNode&);
928};
929
930// ----------------------------------------------------------------------
931// TextTemplateNode
932// The simplest template-node: it holds runs of raw template text,
933// that should be emitted verbatim. The text points into
934// template_text_.
935// ----------------------------------------------------------------------
936
937class TextTemplateNode : public TemplateNode {
938 public:
939 explicit TextTemplateNode(const TemplateToken& token)
940 : token_(token) {
941 VLOG(2) << "Constructing TextTemplateNode: "
942 << string(token_.text, token_.textlen) << endl;
943 }
944 virtual ~TextTemplateNode() {
945 VLOG(2) << "Deleting TextTemplateNode: "
946 << string(token_.text, token_.textlen) << endl;
947 }
948
949 // Expands the text node by simply outputting the text string. This
950 // virtual method does not use TemplateDictionaryInterface or PerExpandData.
951 // Returns true iff all the template files load and parse correctly.
952 virtual bool Expand(ExpandEmitter *output_buffer,
953 const TemplateDictionaryInterface *,
954 PerExpandData *,
955 const TemplateCache *) const {
956 output_buffer->Emit(token_.text, token_.textlen);
957 return true;
958 }
959
960 // A noop for text nodes
961 virtual void WriteHeaderEntries(string *outstring,
962 const string& filename) const {
963 return;
964 }
965
966 // Appends a representation of the text node to a string.
967 virtual void DumpToString(int level, string *out) const {
968 assert(out);
969 AppendTokenWithIndent(level, out, "Text Node: -->|", token_, "|<--\n");
970 }
971
972 private:
973 TemplateToken token_; // The text held by this node.
974};
975
976// ----------------------------------------------------------------------
977// VariableTemplateNode
978// Holds a variable to be replaced when the template is expanded.
979// The variable is stored in a token object, which has a char*
980// that points into template_text_. There may also be modifiers,
981// which are applied at Expand time.
982// ----------------------------------------------------------------------
983
984class VariableTemplateNode : public TemplateNode {
985 public:
986 explicit VariableTemplateNode(const TemplateToken& token)
987 : token_(token),
988 variable_(token_.text, token_.textlen) {
989 VLOG(2) << "Constructing VariableTemplateNode: "
990 << string(token_.text, token_.textlen) << endl;
991 }
992 virtual ~VariableTemplateNode() {
993 VLOG(2) << "Deleting VariableTemplateNode: "
994 << string(token_.text, token_.textlen) << endl;
995 }
996
997 // Expands the variable node by outputting the value (if there is one)
998 // of the node variable which is retrieved from the dictionary
999 // Returns true iff all the template files load and parse correctly.
1000 virtual bool Expand(ExpandEmitter *output_buffer,
1001 const TemplateDictionaryInterface *dictionary,
1002 PerExpandData *per_expand_data,
1003 const TemplateCache *cache) const;
1004
1005 virtual void WriteHeaderEntries(string *outstring,
1006 const string& filename) const {
1007 WriteOneHeaderEntry(outstring, string(token_.text, token_.textlen),
1008 filename);
1009 }
1010
1011 // Appends a representation of the variable node to a string. We
1012 // also append the modifiers for that variable in the form:
1013 // :modifier1[=val1][:modifier2][=val2]...\n
1014 virtual void DumpToString(int level, string *out) const {
1015 assert(out);
1016 AppendTokenWithIndent(level, out, "Variable Node: ", token_,
1017 PrettyPrintTokenModifiers(token_.modvals) + "\n");
1018 }
1019
1020 private:
1021 const TemplateToken token_;
1022 const HashedTemplateString variable_;
1023};
1024
1025bool VariableTemplateNode::Expand(ExpandEmitter *output_buffer,
1026 const TemplateDictionaryInterface *dictionary,
1027 PerExpandData* per_expand_data,
1028 const TemplateCache *cache) const {
1029 if (per_expand_data->annotate()) {
1030 per_expand_data->annotator()->EmitOpenVariable(output_buffer,
1031 token_.ToString());
1032 }
1033
1034 const TemplateString value = dictionary->GetValue(variable_);
1035
1036 if (AnyMightModify(token_.modvals, per_expand_data)) {
1037 EmitModifiedString(token_.modvals, value.data(), value.size(),
1038 per_expand_data, output_buffer);
1039 } else {
1040 // No need to modify value, so just emit it.
1041 output_buffer->Emit(value.data(), value.size());
1042 }
1043
1044 if (per_expand_data->annotate()) {
1045 per_expand_data->annotator()->EmitCloseVariable(output_buffer);
1046 }
1047
1048 return true;
1049}
1050
1051// ----------------------------------------------------------------------
1052// PragmaTemplateNode
1053// It simply stores the text given inside the pragma marker
1054// {{%...}} for possible use in DumpToString().
1055// ----------------------------------------------------------------------
1056
1057class PragmaTemplateNode : public TemplateNode {
1058 public:
1059 explicit PragmaTemplateNode(const TemplateToken& token)
1060 : token_(token) {
1061 VLOG(2) << "Constructing PragmaTemplateNode: "
1062 << string(token_.text, token_.textlen) << endl;
1063 }
1064 virtual ~PragmaTemplateNode() {
1065 VLOG(2) << "Deleting PragmaTemplateNode: "
1066 << string(token_.text, token_.textlen) << endl;
1067 }
1068
1069 // A no-op for pragma nodes.
1070 virtual bool Expand(ExpandEmitter *output_buffer,
1071 const TemplateDictionaryInterface *,
1072 PerExpandData *,
1073 const TemplateCache *) const {
1074 return true;
1075 };
1076
1077 // A no-op for pragma nodes.
1078 virtual void WriteHeaderEntries(string *outstring,
1079 const string& filename) const { }
1080
1081 // Appends a representation of the pragma node to a string. We output
1082 // the full text given in {{%...}} verbatim.
1083 virtual void DumpToString(int level, string *out) const {
1084 assert(out);
1085 AppendTokenWithIndent(level, out, "Pragma Node: -->|", token_, "|<--\n");
1086 }
1087
1088 private:
1089 TemplateToken token_; // The text of the pragma held by this node.
1090};
1091
1092// ----------------------------------------------------------------------
1093// TemplateTemplateNode
1094// Holds a variable to be replaced by an expanded (included)
1095// template whose filename is the value of the variable in the
1096// dictionary.
1097// Also holds the TemplateContext which it passes on to
1098// GetTemplateCommon when this included template is initialized.
1099// The indentation_ string is used by the PrefixLine modifier so be
1100// careful not to perform any operation on it that might invalidate
1101// its character array (indentation_.data()).
1102//
1103// In the Auto Escape mode, the PrefixLine modifier is added *after*
1104// auto-escape has updated the modifiers that may be present for that
1105// template include, but that is ok because PrefixLine does not invalidate
1106// their XSS-safety.
1107// ----------------------------------------------------------------------
1108
1109class TemplateTemplateNode : public TemplateNode {
1110 public:
1111 explicit TemplateTemplateNode(const TemplateToken& token, Strip strip,
1112 const string& indentation)
1113 : token_(token),
1114 variable_(token_.text, token_.textlen),
1115 strip_(strip), indentation_(indentation) {
1116 VLOG(2) << "Constructing TemplateTemplateNode: "
1117 << string(token_.text, token_.textlen) << endl;
1118
1119 // If this template is indented (eg, " {{>SUBTPL}}"), make sure
1120 // every line of the expanded template is indented, not just the
1121 // first one. We do this by adding a modifier that applies to
1122 // the entire template node, that inserts spaces after newlines.
1123 if (!indentation_.empty()) {
1124 token_.modvals.push_back(ModifierAndValue(&g_prefix_line_info,
1125 indentation_.data(),
1126 indentation_.length()));
1127 }
1128 }
1129 virtual ~TemplateTemplateNode() {
1130 VLOG(2) << "Deleting TemplateTemplateNode: "
1131 << string(token_.text, token_.textlen) << endl;
1132 }
1133
1134 // Expands the template node by retrieving the name of a template
1135 // file from the supplied dictionary, expanding it (using this
1136 // dictionary if none other is provided in the TemplateDictionary),
1137 // and then outputting this newly expanded template in place of the
1138 // original variable.
1139 // Returns true iff all the template files load and parse correctly.
1140 virtual bool Expand(ExpandEmitter *output_buffer,
1141 const TemplateDictionaryInterface *dictionary,
1142 PerExpandData *per_expand_data,
1143 const TemplateCache *cache) const;
1144
1145 virtual void WriteHeaderEntries(string *outstring,
1146 const string& filename) const {
1147 WriteOneHeaderEntry(outstring, string(token_.text, token_.textlen),
1148 filename);
1149 }
1150
1151 virtual void DumpToString(int level, string *out) const {
1152 assert(out);
1153 AppendTokenWithIndent(level, out, "Template Node: ", token_, "\n");
1154 }
1155
1156 private:
1157 TemplateToken token_; // text is the name of a template file.
1158 const HashedTemplateString variable_;
1159 Strip strip_; // Flag to pass from parent template to included template.
1160 const string indentation_; // Used by ModifierAndValue for g_prefix_line.
1161
1162 // A helper used for expanding one child dictionary.
1163 bool ExpandOnce(ExpandEmitter *output_buffer,
1164 const TemplateDictionaryInterface &dictionary,
1165 const char* const filename,
1166 PerExpandData *per_expand_data,
1167 const TemplateCache *cache) const;
1168};
1169
1170// If no value is found in the dictionary for the template variable
1171// in this node, then no output is generated in place of this variable.
1172bool TemplateTemplateNode::Expand(ExpandEmitter *output_buffer,
1173 const TemplateDictionaryInterface *dictionary,
1174 PerExpandData *per_expand_data,
1175 const TemplateCache *cache) const {
1176 if (dictionary->IsHiddenTemplate(variable_)) {
1177 // if this "template include" section is "hidden", do nothing
1178 return true;
1179 }
1180
1181 TemplateDictionaryInterface::Iterator* di =
1182 dictionary->CreateTemplateIterator(variable_);
1183
1184 if (!di->HasNext()) { // empty dict means 'expand once using containing dict'
1185 delete di;
1186 // TODO(csilvers): have this return a TemplateString instead?
1187 const char* const filename =
1188 dictionary->GetIncludeTemplateName(variable_, 0);
1189 // If the filename wasn't set then treat it as if it were "hidden", i.e, do
1190 // nothing
1191 if (filename && *filename) {
1192 return ExpandOnce(output_buffer, *dictionary, filename, per_expand_data,
1193 cache);
1194 } else {
1195 return true;
1196 }
1197 }
1198
1199 bool error_free = true;
1200 for (int dict_num = 0; di->HasNext(); ++dict_num) {
1201 const TemplateDictionaryInterface& child = di->Next();
1202 // We do this in the loop, because maybe one day we'll support
1203 // each expansion having its own template dictionary. That's also
1204 // why we pass in the dictionary-index as an argument.
1205 const char* const filename = dictionary->GetIncludeTemplateName(
1206 variable_, dict_num);
1207 // If the filename wasn't set then treat it as if it were "hidden", i.e, do
1208 // nothing
1209 if (filename && *filename) {
1210 error_free &= ExpandOnce(output_buffer, child, filename, per_expand_data,
1211 cache);
1212 }
1213 }
1214 delete di;
1215
1216 return error_free;
1217}
1218
1219static void EmitMissingInclude(const char* const filename,
1220 ExpandEmitter *output_buffer,
1221 PerExpandData *per_expand_data) {
1222 // if there was a problem retrieving the template, bail!
1223 if (per_expand_data->annotate()) {
1224 TemplateAnnotator* annotator = per_expand_data->annotator();
1225 annotator->EmitFileIsMissing(output_buffer, filename);
1226 }
1227 LOG(ERROR) << "Failed to load included template: \"" << filename << "\"\n";
1228}
1229
1230bool TemplateTemplateNode::ExpandOnce(
1231 ExpandEmitter *output_buffer,
1232 const TemplateDictionaryInterface &dictionary,
1233 const char* const filename,
1234 PerExpandData *per_expand_data,
1235 const TemplateCache *cache) const {
1236 bool error_free = true;
1237 // NOTE: Although we do this const_cast here, if the cache is frozen
1238 // the expansion doesn't mutate the cache, and is effectively 'const'.
1239 TemplateCache* cache_ptr = const_cast<TemplateCache*>(cache);
1240
1241 // Expand the included template once for each "template specific"
1242 // dictionary. Normally this will only iterate once, but it's
1243 // possible to supply a list of more than one sub-dictionary and
1244 // then the template explansion will be iterative, just as though
1245 // the included template were an iterated section.
1246 if (per_expand_data->annotate()) {
1247 per_expand_data->annotator()->EmitOpenInclude(output_buffer,
1248 token_.ToString());
1249 }
1250 // sub-dictionary NULL means 'just use the current dictionary instead'.
1251 // We force children to annotate the output if we have to.
1252 // If the include-template has modifiers, we need to expand to a string,
1253 // modify the string, and append to output_buffer. Otherwise (common
1254 // case), we can just expand into the output-buffer directly.
1255 if (AnyMightModify(token_.modvals, per_expand_data)) {
1256 string sub_template;
1257 StringEmitter subtemplate_buffer(&sub_template);
1258 if (!cache_ptr->ExpandLocked(filename, strip_,
1259 &subtemplate_buffer,
1260 &dictionary,
1261 per_expand_data)) {
1262 EmitMissingInclude(filename, output_buffer, per_expand_data);
1263 error_free = false;
1264 } else {
1265 EmitModifiedString(token_.modvals,
1266 sub_template.data(), sub_template.size(),
1267 per_expand_data, output_buffer);
1268 }
1269 } else {
1270 // No need to modify sub-template
1271 if (!cache_ptr->ExpandLocked(filename, strip_,
1272 output_buffer,
1273 &dictionary,
1274 per_expand_data)) {
1275 EmitMissingInclude(filename, output_buffer, per_expand_data);
1276 error_free = false;
1277 }
1278 }
1279 if (per_expand_data->annotate()) {
1280 per_expand_data->annotator()->EmitCloseInclude(output_buffer);
1281 }
1282 return error_free;
1283}
1284
1285// ----------------------------------------------------------------------
1286// SectionTemplateNode
1287// Holds the name of a section and a list of subnodes contained
1288// in that section.
1289// ----------------------------------------------------------------------
1290
1291class SectionTemplateNode : public TemplateNode {
1292 public:
1293 SectionTemplateNode(const TemplateToken& token, bool hidden_by_default);
1294 virtual ~SectionTemplateNode();
1295
1296 // The highest level parsing method. Reads a single token from the
1297 // input -- taken from my_template->parse_state_ -- and adds the
1298 // corresponding type of node to the template's parse
1299 // tree. It may add a node of any type, whether text, variable,
1300 // section, or template to the list of nodes contained in this
1301 // section. Returns true iff we really added a node and didn't just
1302 // end a section or hit a syntax error in the template file.
1303 // You should hold the g_template_mutex write-lock when calling this
1304 // (unless you're calling it from a constructor).
1305 bool AddSubnode(Template *my_template);
1306
1307 // Expands a section node as follows:
1308 // - Checks to see if the section is hidden and if so, does nothing but
1309 // return
1310 // - Tries to retrieve a list of dictionaries from the supplied dictionary
1311 // stored under this section's name
1312 // - If it finds a non-empty list of dictionaries, it iterates over the
1313 // list and calls itself recursively to expand the section once for
1314 // each dictionary
1315 // - If there is no dictionary list (or an empty dictionary list somehow)
1316 // is found, then the section is expanded once using the supplied
1317 // dictionary. (This is the mechanism used to expand each single
1318 // iteration of the section as well as to show a non-hidden section,
1319 // allowing the section template syntax to be used for both conditional
1320 // and iterative text).
1321 // Returns true iff all the template files load and parse correctly.
1322 virtual bool Expand(ExpandEmitter *output_buffer,
1323 const TemplateDictionaryInterface *dictionary,
1324 PerExpandData* per_expand_data,
1325 const TemplateCache *cache) const;
1326
1327 // Writes a header entry for the section name and calls the same
1328 // method on all the nodes in the section
1329 virtual void WriteHeaderEntries(string *outstring,
1330 const string& filename) const;
1331
1332 virtual void DumpToString(int level, string *out) const;
1333
1334 private:
1335 const TemplateToken token_; // text is the name of the section
1336 const HashedTemplateString variable_;
1337 NodeList node_list_; // The list of subnodes in the section
1338 // A sub-section named "OURNAME_separator" is special. If we see it
1339 // when parsing our section, store a pointer to it for ease of use.
1340 SectionTemplateNode* separator_section_;
1341
1342 // When the last node read was literal text that ends with "\n? +"
1343 // (that is, leading whitespace on a line), this stores the leading
1344 // whitespace. This is used to properly indent included
1345 // sub-templates.
1346 string indentation_;
1347
1348 // If true, hide sections that have not explicitly had their hidden/visible
1349 // state set. If false, use the underlying template dictionary's default
1350 // behavior for hiding.
1351 // This bool is currently always set to true.
1352 bool hidden_by_default_;
1353
1354 // A protected method used in parsing the template file
1355 // Finds the next token in the file and return it. Anything not inside
1356 // a template marker is just text. Each template marker type, delimited
1357 // by "{{" and "}}" (or parser_state_->marker_delimiters.start_marker
1358 // and .end_marker, more precisely) is a different type of token. The
1359 // first character inside the opening curly braces indicates the type
1360 // of the marker, as follows:
1361 // # - Start a section
1362 // / - End a section
1363 // > - A template file variable (the "include" directive)
1364 // ! - A template comment
1365 // % - A pragma such as AUTOESCAPE
1366 // = - Change marker delimiters (from the default of '{{' and '}}')
1367 // <alnum or _> - A scalar variable
1368 // One more thing. Before a name token is returned, if it happens to be
1369 // any type other than a scalar variable, and if the next character after
1370 // the closing curly braces is a newline, then the newline is eliminated
1371 // from the output. This reduces the number of extraneous blank
1372 // lines in the output. If the template author desires a newline to be
1373 // retained after a final marker on a line, they must add a space character
1374 // between the marker and the linefeed character.
1375 TemplateToken GetNextToken(Template* my_template);
1376
1377 // Helper routine used by Expand
1378 virtual bool ExpandOnce(
1379 ExpandEmitter *output_buffer,
1380 const TemplateDictionaryInterface *dictionary,
1381 PerExpandData* per_expand_data,
1382 bool is_last_child_dict,
1383 const TemplateCache *cache) const;
1384
1385 // The specific methods called used by AddSubnode to add the
1386 // different types of nodes to this section node.
1387 // Currently only reasons to fail (return false) are if the
1388 // HTML parser failed to parse in auto-escape mode or the
1389 // PRAGMA marker was invalid in the template.
1390 bool AddTextNode(const TemplateToken* token, Template* my_template);
1391 bool AddVariableNode(TemplateToken* token, Template* my_template);
1392 bool AddPragmaNode(TemplateToken* token, Template* my_template);
1393 bool AddTemplateNode(TemplateToken* token, Template* my_template,
1394 const string& indentation);
1395 bool AddSectionNode(const TemplateToken* token, Template* my_template,
1396 bool hidden_by_default);
1397 bool AddSectionNode(const TemplateToken* token, Template* my_template);
1398};
1399
1400// --- constructor and destructor, Expand, Dump, and WriteHeaderEntries
1401
1402SectionTemplateNode::SectionTemplateNode(const TemplateToken& token,
1403 bool hidden_by_default)
1404
1405 : token_(token),
1406 variable_(token_.text, token_.textlen),
1407 separator_section_(NULL), indentation_("\n"),
1408 hidden_by_default_(hidden_by_default) {
1409 VLOG(2) << "Constructing SectionTemplateNode: "
1410 << string(token_.text, token_.textlen) << endl;
1411}
1412
1413SectionTemplateNode::~SectionTemplateNode() {
1414 VLOG(2) << "Deleting SectionTemplateNode: "
1415 << string(token_.text, token_.textlen) << " and its subnodes"
1416 << endl;
1417
1418 // Need to delete the member of the list because the list is a list
1419 // of pointers to these instances.
1420 NodeList::iterator iter = node_list_.begin();
1421 for (; iter != node_list_.end(); ++iter) {
1422 delete (*iter);
1423 }
1424 VLOG(2) << "Finished deleting subnodes of SectionTemplateNode: "
1425 << string(token_.text, token_.textlen) << endl;
1426}
1427
1428bool SectionTemplateNode::ExpandOnce(
1429 ExpandEmitter *output_buffer,
1430 const TemplateDictionaryInterface *dictionary,
1431 PerExpandData *per_expand_data,
1432 bool is_last_child_dict,
1433 const TemplateCache* cache) const {
1434 bool error_free = true;
1435
1436 if (per_expand_data->annotate()) {
1437 per_expand_data->annotator()->EmitOpenSection(output_buffer,
1438 token_.ToString());
1439 }
1440
1441 // Expand using the section-specific dictionary.
1442 // We force children to annotate the output if we have to.
1443 NodeList::const_iterator iter = node_list_.begin();
1444 for (; iter != node_list_.end(); ++iter) {
1445 error_free &=
1446 (*iter)->Expand(output_buffer, dictionary, per_expand_data, cache);
1447 // If this sub-node is a "separator section" -- a subsection
1448 // with the name "OURNAME_separator" -- expand it every time
1449 // through but the last.
1450 if (*iter == separator_section_ && !is_last_child_dict) {
1451 // We call ExpandOnce to make sure we always expand,
1452 // even if *iter would normally be hidden.
1453 error_free &= separator_section_->ExpandOnce(output_buffer, dictionary,
1454 per_expand_data, true,
1455 cache);
1456 }
1457 }
1458
1459 if (per_expand_data->annotate()) {
1460 per_expand_data->annotator()->EmitCloseSection(output_buffer);
1461 }
1462
1463 return error_free;
1464}
1465
1466bool SectionTemplateNode::Expand(
1467 ExpandEmitter *output_buffer,
1468 const TemplateDictionaryInterface *dictionary,
1469 PerExpandData *per_expand_data,
1470 const TemplateCache *cache) const {
1471 // The section named __{{MAIN}}__ is special: you always expand it
1472 // exactly once using the containing (main) dictionary.
1473 if (token_.text == kMainSectionName) {
1474 return ExpandOnce(output_buffer, dictionary, per_expand_data, true, cache);
1475 } else if (hidden_by_default_ ?
1476 !dictionary->IsUnhiddenSection(variable_) :
1477 dictionary->IsHiddenSection(variable_)) {
1478 // Some dictionaries might have sections that can be explicitly hidden
1479 // and unhidden, so by default both IsHidden() and IsUnhidden() are false,
1480 // in which case hidden_by_default_ controls the behavior.
1481 return true; // if this section is "hidden", do nothing
1482 }
1483
1484 TemplateDictionaryInterface::Iterator* di =
1485 dictionary->CreateSectionIterator(variable_);
1486
1487 // If there are no child dictionaries, that means we should expand with the
1488 // current dictionary instead. This corresponds to the situation where
1489 // template variables within a section are set on the template-wide dictionary
1490 // instead of adding a dictionary to the section and setting them there.
1491 if (!di->HasNext()) {
1492 delete di;
1493 return ExpandOnce(output_buffer, dictionary, per_expand_data,
1494 true, cache);
1495 }
1496
1497 // Otherwise, there's at least one child dictionary, and when expanding this
1498 // section, we should use the child dictionaries instead of the current one.
1499 bool error_free = true;
1500 while (di->HasNext()) {
1501 const TemplateDictionaryInterface& child = di->Next();
1502 error_free &= ExpandOnce(output_buffer, &child, per_expand_data,
1503 !di->HasNext(), cache);
1504 }
1505 delete di;
1506 return error_free;
1507}
1508
1509void SectionTemplateNode::WriteHeaderEntries(string *outstring,
1510 const string& filename) const {
1511 WriteOneHeaderEntry(outstring, string(token_.text, token_.textlen),
1512 filename);
1513
1514 NodeList::const_iterator iter = node_list_.begin();
1515 for (; iter != node_list_.end(); ++iter) {
1516 (*iter)->WriteHeaderEntries(outstring, filename);
1517 }
1518}
1519
1520void SectionTemplateNode::DumpToString(int level, string *out) const {
1521 assert(out);
1522 AppendTokenWithIndent(level, out, "Section Start: ", token_, "\n");
1523 NodeList::const_iterator iter = node_list_.begin();
1524 for (; iter != node_list_.end(); ++iter) {
1525 (*iter)->DumpToString(level + 1, out);
1526 }
1527 AppendTokenWithIndent(level, out, "Section End: ", token_, "\n");
1528}
1529
1530// --- AddSubnode and its sub-routines
1531
1532// Under auto-escape (and parsing-enabled modes) advance the parser state.
1533// TextTemplateNode is the only TemplateNode type that can change
1534// the state of the parser.
1535// Returns false only if the HTML parser failed to parse in
1536// auto-escape mode.
1537bool SectionTemplateNode::AddTextNode(const TemplateToken* token,
1538 Template* my_template) {
1539 assert(token);
1540 bool success = true;
1541 HtmlParser *htmlparser = my_template->htmlparser_;
1542
1543 if (token->textlen > 0) { // ignore null text sections
1544 node_list_.push_back(new TextTemplateNode(*token));
1545 if (AUTO_ESCAPE_PARSING_CONTEXT(my_template->initial_context_)) {
1546 assert(htmlparser);
1547 if (htmlparser->state() == HtmlParser::STATE_ERROR ||
1548 htmlparser->Parse(token->text, static_cast<int>(token->textlen)) ==
1549 HtmlParser::STATE_ERROR) {
1550 string error_msg = "Failed parsing: " +
1551 string(token->text, token->textlen) +
1552 "\nIn: " + string(token_.text, token_.textlen);
1553 LOG_AUTO_ESCAPE_ERROR(error_msg, my_template);
1554 success = false;
1555 }
1556 }
1557 }
1558 return success;
1559}
1560
1561// In Auto Escape mode, we update the variable modifiers based on what
1562// modifiers are specified in the template and what Auto-Escape computes
1563// for that context. Returns false only if the HTML parser failed to parse
1564// in auto-escape mode.
1565//
1566// We also have special logic for BI_SPACE and BI_NEWLINE.
1567// Even though they look like variables, they're really not: the user
1568// is expected to use them in situations where they'd normally put
1569// a space character or a newline character, but can't for technical
1570// reasons (namely, that the template parser would strip these
1571// characters because of the STRIP mode it's in). So unlike other
1572// variables, we want to treat these variables as literal text. This
1573// means that we never add modifiers to them, but we do let the
1574// htmlparser know about them in order to update its state. Existing
1575// modifiers will be honored.
1576//
1577// Finally, we check if the variable is whitelisted, in which case
1578// Auto-Escape does not apply escaping to it. See comment for global
1579// array kSafeWhitelistedVariables[].
1580bool SectionTemplateNode::AddVariableNode(TemplateToken* token,
1581 Template* my_template) {
1582 assert(token);
1583 bool success = true;
1584 HtmlParser *htmlparser = my_template->htmlparser_;
1585 TemplateContext initial_context = my_template->initial_context_;
1586
1587 if (AUTO_ESCAPE_MODE(initial_context)) {
1588 // Determines modifiers for the variable in auto escape mode.
1589 string variable_name(token->text, token->textlen);
1590 // We declare in the documentation that if the user changes the
1591 // value of these variables, they must only change it to a value
1592 // that's "equivalent" from the point of view of an html parser.
1593 // So it's ok to hard-code in that these are " " and "\n",
1594 // respectively, even though in theory the user could change them
1595 // (to say, BI_NEWLINE == "\r\n").
1596 if (variable_name == "BI_SPACE" || variable_name == "BI_NEWLINE") {
1597 if (AUTO_ESCAPE_PARSING_CONTEXT(initial_context)) {
1598 assert(htmlparser);
1599 if (htmlparser->state() == HtmlParser::STATE_ERROR ||
1600 htmlparser->Parse(variable_name == "BI_SPACE" ? " " : "\n") ==
1601 HtmlParser::STATE_ERROR)
1602 success = false;
1603 }
1604 } else if (binary_search(Template::kSafeWhitelistedVariables,
1605 Template::kSafeWhitelistedVariables +
1606 arraysize(Template::kSafeWhitelistedVariables),
1607 variable_name.c_str(),
1608 // Luckily, StringHash(a, b) is defined as "a < b"
1609 StringHash())) {
1610 // Do not escape the variable, it is whitelisted.
1611 } else {
1612 vector<const ModifierAndValue*> modvals =
1613 GetModifierForContext(initial_context, htmlparser, my_template);
1614 // There should always be at least one modifier in any Auto-Escape mode.
1615 if (modvals.empty())
1616 success = false;
1617 else
1618 token->UpdateModifier(modvals);
1619 }
1620 }
1621 node_list_.push_back(new VariableTemplateNode(*token));
1622 return success;
1623}
1624
1625// AddPragmaNode
1626// Create a pragma node from the given token and add it
1627// to the node list.
1628// The AUTOESCAPE pragma is only allowed at the top of a template
1629// file (above any non-comment node) to minimize the chance of the
1630// HTML parser being out of sync with the template text. So we check
1631// that the section is the MAIN section and we are the first node.
1632// Note: Since currently we only support one pragma, we apply the check
1633// always but when other pragmas are added we'll need to propagate the
1634// Pragma identifier from GetNextToken().
1635bool SectionTemplateNode::AddPragmaNode(TemplateToken* token,
1636 Template* my_template) {
1637 if (token_.text != kMainSectionName || !node_list_.empty())
1638 return false;
1639
1640 node_list_.push_back(new PragmaTemplateNode(*token));
1641 return true;
1642}
1643
1644// AddSectionNode
1645bool SectionTemplateNode::AddSectionNode(const TemplateToken* token,
1646 Template* my_template,
1647 bool hidden_by_default) {
1648 assert(token);
1649 SectionTemplateNode *new_node = new SectionTemplateNode(*token,
1650 hidden_by_default);
1651
1652 // Not only create a new section node, but fill it with all *its*
1653 // subnodes by repeatedly calling AddSubNode until it returns false
1654 // (indicating either the end of the section or a syntax error)
1655 while (new_node->AddSubnode(my_template)) {
1656 // Found a new subnode to add
1657 }
1658 node_list_.push_back(new_node);
1659 // Check the name of new_node. If it's "OURNAME_separator", store it
1660 // as a special "separator" section.
1661 if (token->textlen == token_.textlen + sizeof("_separator")-1 &&
1662 memcmp(token->text, token_.text, token_.textlen) == 0 &&
1663 memcmp(token->text + token_.textlen, "_separator", sizeof("_separator")-1)
1664 == 0)
1665 separator_section_ = new_node;
1666 return true;
1667}
1668
1669// Note: indentation will be used in constructor of TemplateTemplateNode.
1670// Note on Auto-Escape: Each template is Auto-Escaped independently of
1671// the template it may be included from or templates it may include.
1672// The context does not carry on and hence does not need to be provided
1673// to the new TemplateNode.
1674bool SectionTemplateNode::AddTemplateNode(TemplateToken* token,
1675 Template* my_template,
1676 const string& indentation) {
1677 assert(token);
1678 bool success = true;
1679 node_list_.push_back(
1680 new TemplateTemplateNode(*token, my_template->strip_, indentation));
1681 return success;
1682}
1683
1684// If "text" ends with a newline followed by whitspace, returns a
1685// string holding that whitespace. Otherwise, returns the empty
1686// string. If implicit_newline is true, also consider the text to be
1687// an indentation if it consists entirely of whitespace; this is set
1688// when we know that right before this text there was a newline, or
1689// this text is the beginning of a document.
1690static string GetIndentation(const char* text, size_t textlen,
1691 bool implicit_newline) {
1692 const char* nextline; // points to one char past the last newline
1693 for (nextline = text + textlen; nextline > text; --nextline)
1694 if (nextline[-1] == '\n') break;
1695 if (nextline == text && !implicit_newline)
1696 return ""; // no newline found, so no indentation
1697
1698 bool prefix_is_whitespace = true;
1699 for (const char* p = nextline; p < text + textlen; ++p) {
1700 if (*p != ' ' && *p != '\t') {
1701 prefix_is_whitespace = false;
1702 break;
1703 }
1704 }
1705 if (prefix_is_whitespace && text + textlen > nextline)
1706 return string(nextline, text + textlen - nextline);
1707 else
1708 return "";
1709}
1710
1711bool SectionTemplateNode::AddSubnode(Template *my_template) {
1712 bool auto_escape_success = true;
1713 // Don't proceed if we already found an error
1714 if (my_template->state() == TS_ERROR) {
1715 return false;
1716 }
1717
1718 // Stop when the buffer is empty.
1719 if (my_template->parse_state_.bufstart >= my_template->parse_state_.bufend) {
1720 // running out of file contents ends the section too
1721 if (token_.text != kMainSectionName) {
1722 // if we are not in the main section, we have a syntax error in the file
1723 LOG_TEMPLATE_NAME(ERROR, my_template);
1724 LOG(ERROR) << "File ended before all sections were closed" << endl;
1725 my_template->set_state(TS_ERROR);
1726 }
1727 return false;
1728 }
1729
1730 TemplateToken token = GetNextToken(my_template);
1731
1732 switch (token.type) {
1733 case TOKENTYPE_TEXT:
1734 auto_escape_success = this->AddTextNode(&token, my_template);
1735 // Store the indentation (trailing whitespace after a newline), if any.
1736 this->indentation_ = GetIndentation(token.text, token.textlen,
1737 indentation_ == "\n");
1738 break;
1739 case TOKENTYPE_VARIABLE:
1740 auto_escape_success = this->AddVariableNode(&token, my_template);
1741 this->indentation_.clear(); // clear whenever last read wasn't whitespace
1742 break;
1743 case TOKENTYPE_SECTION_START:
1744 auto_escape_success = this->AddSectionNode(&token, my_template, false);
1745 this->indentation_.clear(); // clear whenever last read wasn't whitespace
1746 break;
1747 case TOKENTYPE_HIDDEN_DEFAULT_SECTION:
1748 auto_escape_success = this->AddSectionNode(&token, my_template, true);
1749 this->indentation_.clear(); // clear whenever last read wasn't whitespace
1750 break;
1751 case TOKENTYPE_SECTION_END:
1752 // Don't add a node. Just make sure we are ending the right section
1753 // and return false to indicate the section is complete
1754 if (token.textlen != token_.textlen ||
1755 memcmp(token.text, token_.text, token.textlen)) {
1756 LOG_TEMPLATE_NAME(ERROR, my_template);
1757 LOG(ERROR) << "Found end of different section than the one I am in"
1758 << "\nFound: " << string(token.text, token.textlen)
1759 << "\nIn: " << string(token_.text, token_.textlen) << endl;
1760 my_template->set_state(TS_ERROR);
1761 }
1762 this->indentation_.clear(); // clear whenever last read wasn't whitespace
1763 return false;
1764 break;
1765 case TOKENTYPE_TEMPLATE:
1766 auto_escape_success = this->AddTemplateNode(&token, my_template,
1767 this->indentation_);
1768 this->indentation_.clear(); // clear whenever last read wasn't whitespace
1769 break;
1770 case TOKENTYPE_COMMENT:
1771 // Do nothing. Comments just drop out of the file altogether.
1772 break;
1773 case TOKENTYPE_SET_DELIMITERS:
1774 if (!Template::ParseDelimiters(
1775 token.text, token.textlen,
1776 &my_template->parse_state_.current_delimiters)) {
1777 LOG_TEMPLATE_NAME(ERROR, my_template);
1778 LOG(ERROR) << "Invalid delimiter-setting command."
1779 << "\nFound: " << string(token.text, token.textlen)
1780 << "\nIn: " << string(token_.text, token_.textlen) << endl;
1781 my_template->set_state(TS_ERROR);
1782 }
1783 break;
1784 case TOKENTYPE_PRAGMA:
1785 // We can do nothing and simply drop the pragma of the file as is done
1786 // for comments. But, there is value in keeping it for debug purposes
1787 // (via DumpToString) so add it as a pragma node.
1788 if (!this->AddPragmaNode(&token, my_template)) {
1789 LOG_TEMPLATE_NAME(ERROR, my_template);
1790 LOG(ERROR) << "Pragma marker must be at the top of the template: '"
1791 << string(token.text, token.textlen) << "'" << endl;
1792 my_template->set_state(TS_ERROR);
1793 }
1794 break;
1795 case TOKENTYPE_NULL:
1796 // GetNextToken either hit the end of the file or a syntax error
1797 // in the file. Do nothing more here. Just return false to stop
1798 // processing.
1799 return false;
1800 break;
1801 default:
1802 // This shouldn't happen. If it does, it's a programmer error.
1803 LOG_TEMPLATE_NAME(ERROR, my_template);
1804 LOG(ERROR) << "Invalid token type returned from GetNextToken" << endl;
1805 }
1806
1807 if (!auto_escape_success) {
1808 // The error is logged where it happens. Here indicate
1809 // the initialization failed.
1810 my_template->set_state(TS_ERROR);
1811 return false;
1812 }
1813
1814 // for all the cases where we did not return false
1815 return true;
1816}
1817
1818// --- GetNextToken and its subroutines
1819
1820// A valid marker name is made up of alphanumerics and underscores...
1821// nothing else.
1822static bool IsValidName(const char* name, int namelen) {
1823 for (const char *cur_char = name; cur_char - name < namelen; ++cur_char) {
1824 if (!ascii_isalnum(*cur_char) && *cur_char != '_')
1825 return false;
1826 }
1827 return true;
1828}
1829
1830// If we're pointing to the end of a line, and in a high enough strip mode,
1831// pass over the newline. If the line ends in a \, we skip over the \ and
1832// keep the newline. Returns a pointer to the new 'start' location, which
1833// is either 'start' or after a newline.
1834static const char* MaybeEatNewline(const char* start, const char* end,
1835 Strip strip) {
1836 // first, see if we have the escaped linefeed sequence
1837 if (end - start >= 2 && start[0] == '\\' && start[1] == '\n') {
1838 ++start; // skip over the \, which keeps the \n
1839 } else if (end - start >= 1 && start[0] == '\n' &&
1840 strip >= STRIP_WHITESPACE) {
1841 ++start; // skip over the \n in high strip_ modes
1842 }
1843 return start;
1844}
1845
1846// When the parse fails, we take several actions. msg is a stream
1847#define FAIL(msg) do { \
1848 LOG_TEMPLATE_NAME(ERROR, my_template); \
1849 LOG(ERROR) << msg << endl; \
1850 my_template->set_state(TS_ERROR); \
1851 /* make extra-sure we never try to parse anything more */ \
1852 my_template->parse_state_.bufstart = my_template->parse_state_.bufend; \
1853 return TemplateToken(TOKENTYPE_NULL, "", 0, NULL); \
1854 } while (0)
1855
1856// Parses the text of the template file in the input_buffer as
1857// follows: If the buffer is empty, return the null token. If getting
1858// text, search for the next "{{" sequence (more precisely, for
1859// parse_state_->marker_delimiters.start_marker). If one is found,
1860// return all the text collected up to that sequence in a TextToken
1861// and change the token-parsing phase variable to GETTING_NAME, so the
1862// next call will know to look for a named marker, instead of more
1863// text. If getting a name, read the next character to learn what
1864// kind of marker it is. Then collect the characters of the name up
1865// to the "}}" sequence. If the "name" is a template comment, then we
1866// do not return the text of the comment in the token. If it is any
1867// other valid type of name, we return the token with the appropriate
1868// type and the name. If any syntax errors are discovered (like
1869// inappropriate characters in a name, not finding the closing curly
1870// braces, etc.) an error message is logged, the error state of the
1871// template is set, and a NULL token is returned. Updates
1872// parse_state_. You should hold the g_template_mutex write-lock
1873// when calling this (unless you're calling it from a constructor).
1874TemplateToken SectionTemplateNode::GetNextToken(Template *my_template) {
1875 Template::ParseState* ps = &my_template->parse_state_; // short abbrev.
1876 const char* token_start = ps->bufstart;
1877
1878 if (ps->bufstart >= ps->bufend) { // at end of buffer
1879 return TemplateToken(TOKENTYPE_NULL, "", 0, NULL);
1880 }
1881
1882 switch (ps->phase) {
1883 case Template::ParseState::GETTING_TEXT: {
1884 const char* token_end = memmatch(ps->bufstart, ps->bufend - ps->bufstart,
1885 ps->current_delimiters.start_marker,
1886 ps->current_delimiters.start_marker_len);
1887 if (!token_end) {
1888 // Didn't find the start-marker ('{{'), so just grab all the
1889 // rest of the buffer.
1890 token_end = ps->bufend;
1891 ps->bufstart = ps->bufend; // next token will start at EOF
1892 } else {
1893 // If we see code like this: "{{{VAR}}, we want to match the
1894 // second "{{", not the first.
1895 while ((token_end + 1 + ps->current_delimiters.start_marker_len
1896 <= ps->bufend) &&
1897 memcmp(token_end + 1, ps->current_delimiters.start_marker,
1898 ps->current_delimiters.start_marker_len) == 0)
1899 token_end++;
1900 ps->phase = Template::ParseState::GETTING_NAME;
1901 ps->bufstart = token_end + ps->current_delimiters.start_marker_len;
1902 }
1903 return TemplateToken(TOKENTYPE_TEXT, token_start,
1904 token_end - token_start, NULL);
1905 }
1906
1907 case Template::ParseState::GETTING_NAME: {
1908 TemplateTokenType ttype;
1909 const char* token_end = NULL;
1910 // Find out what type of name we are getting
1911 switch (token_start[0]) {
1912 case '#':
1913 ttype = TOKENTYPE_SECTION_START;
1914 ++token_start;
1915 break;
1916 case '/':
1917 ttype = TOKENTYPE_SECTION_END;
1918 ++token_start;
1919 break;
1920 case '!':
1921 ttype = TOKENTYPE_COMMENT;
1922 ++token_start;
1923 break;
1924 case '=':
1925 ttype = TOKENTYPE_SET_DELIMITERS;
1926 // Keep token_start the same; the token includes the leading '='.
1927 // But we have to figure token-end specially: it should be "=}}".
1928 if (ps->bufend > (token_start + 1))
1929 token_end = (char*)memchr(token_start + 1, '=',
1930 ps->bufend - (token_start + 1));
1931 if (!token_end ||
1932 token_end + ps->current_delimiters.end_marker_len > ps->bufend ||
1933 memcmp(token_end + 1, ps->current_delimiters.end_marker,
1934 ps->current_delimiters.end_marker_len) != 0)
1935 token_end = NULL; // didn't find it, fall through to code below
1936 else
1937 token_end++; // advance past the "=" to the "}}".
1938 break;
1939 case '>':
1940 ttype = TOKENTYPE_TEMPLATE;
1941 ++token_start;
1942 break;
1943 case '%':
1944 ttype = TOKENTYPE_PRAGMA;
1945 ++token_start;
1946 break;
1947 default:
1948 // the assumption that the next char is alnum or _ will be
1949 // tested below in the call to IsValidName().
1950 ttype = TOKENTYPE_VARIABLE;
1951 }
1952
1953 // Now get the name (or the comment, as the case may be)
1954 if (!token_end) // that is, it wasn't set in special-case code above
1955 token_end = memmatch(token_start, ps->bufend - token_start,
1956 ps->current_delimiters.end_marker,
1957 ps->current_delimiters.end_marker_len);
1958 if (!token_end) { // Didn't find the '}}', so name never ended. Error!
1959 FAIL("No ending '" << string(ps->current_delimiters.end_marker,
1960 ps->current_delimiters.end_marker_len)
1961 << "' when parsing name starting with "
1962 << "'" << string(token_start, ps->bufend-token_start) << "'");
1963 }
1964
1965 if (ttype == TOKENTYPE_PRAGMA) {
1966 string error_msg;
1967 const PragmaMarker pragma(token_start, token_end, &error_msg);
1968 if (!error_msg.empty())
1969 FAIL(error_msg);
1970 TemplateContext context = GetTemplateContextFromPragma(pragma);
1971 if (context == TC_MANUAL) // TC_MANUAL is used to indicate error.
1972 FAIL("Invalid context in Pragma directive.");
1973 const string* parser_state = pragma.GetAttributeValue("state");
1974 bool in_tag = false;
1975 if (parser_state != NULL) {
1976 if (context == TC_HTML && (*parser_state == "IN_TAG" ||
1977 *parser_state == "in_tag"))
1978 in_tag = true;
1979 else if (*parser_state != "default")
1980 FAIL("Unsupported state '" + *parser_state +
1981 "'in Pragma directive.");
1982 }
1983 // Only an AUTOESCAPE pragma can change the initial_context
1984 // away from TC_MANUAL and we do not support multiple such pragmas.
1985 assert(my_template->initial_context_ == TC_MANUAL);
1986 my_template->initial_context_ = context;
1987 my_template->MaybeInitHtmlParser(in_tag);
1988 // ParseState change will happen below.
1989 }
1990
1991 // Comments are a special case, since they don't have a name or action.
1992 // The set-delimiters command is the same way.
1993 if (ttype == TOKENTYPE_COMMENT || ttype == TOKENTYPE_SET_DELIMITERS ||
1994 ttype == TOKENTYPE_PRAGMA) {
1995 ps->phase = Template::ParseState::GETTING_TEXT;
1996 ps->bufstart = token_end + ps->current_delimiters.end_marker_len;
1997 // If requested, remove any unescaped linefeed following a comment
1998 ps->bufstart = MaybeEatNewline(ps->bufstart, ps->bufend,
1999 my_template->strip_);
2000 // For comments, don't bother returning the text
2001 if (ttype == TOKENTYPE_COMMENT)
2002 token_start = token_end;
2003 return TemplateToken(ttype, token_start, token_end - token_start, NULL);
2004 }
2005
2006 // Now we have the name, possibly with following modifiers.
2007 // Find the modifier-start.
2008 const char* mod_start = (const char*)memchr(token_start, ':',
2009 token_end - token_start);
2010 if (mod_start == NULL)
2011 mod_start = token_end;
2012
2013 // Make sure the name is legal.
2014 if (!IsValidName(token_start, mod_start - token_start)) {
2015 FAIL("Illegal name in template '"
2016 << string(token_start, mod_start-token_start) << "'");
2017 }
2018
2019 // Figure out what all the modifiers are. Mods are colon-separated.
2020 vector<ModifierAndValue> modifiers;
2021 const char* mod_end;
2022 for (const char* mod = mod_start; mod < token_end; mod = mod_end) {
2023 assert(*mod == ':');
2024 ++mod; // skip past the starting colon
2025 mod_end = (const char*)memchr(mod, ':', token_end - mod);
2026 if (mod_end == NULL)
2027 mod_end = token_end;
2028 // Modifiers can be of the form :modname=value. Extract out value
2029 const char* value = (const char*)memchr(mod, '=', mod_end - mod);
2030 if (value == NULL)
2031 value = mod_end;
2032 string value_string(value, mod_end - value);
2033 // Convert the string to a functor, and error out if we can't.
2034 const ModifierInfo* modstruct = FindModifier(mod, value - mod,
2035 value, mod_end - value);
2036 // There are various ways a modifier syntax can be illegal.
2037 if (modstruct == NULL) {
2038 FAIL("Unknown modifier for variable "
2039 << string(token_start, mod_start - token_start) << ": "
2040 << "'" << string(mod, value - mod) << "'");
2041 } else if (!modstruct->modval_required && value < mod_end) {
2042 FAIL("Modifier for variable "
2043 << string(token_start, mod_start - token_start) << ":"
2044 << string(mod, value - mod) << " "
2045 << "has illegal mod-value '" << value_string << "'");
2046 } else if (modstruct->modval_required && value == mod_end) {
2047 FAIL("Modifier for variable "
2048 << string(token_start, mod_start - token_start) << ":"
2049 << string(mod, value - mod) << " "
2050 << "is missing a required mod-value");
2051 }
2052
2053 // We rely on the fact that the memory pointed to by 'value'
2054 // remains valid throughout the life of this token since
2055 // ModifierAndValue does not itself manage its memory.
2056 modifiers.push_back(
2057 ModifierAndValue(modstruct, value, mod_end - value));
2058 }
2059
2060 // For now, we only allow variable and include nodes to have
2061 // modifiers. I think it's better not to have this for
2062 // sections, but instead to modify all the text and vars in the
2063 // section appropriately, but I could be convinced otherwise.
2064 if (!modifiers.empty() &&
2065 ttype != TOKENTYPE_VARIABLE && ttype != TOKENTYPE_TEMPLATE) {
2066 FAIL(string(token_start, token_end - token_start)
2067 << "malformed: only variables and template-includes "
2068 << "are allowed to have modifiers");
2069 }
2070
2071 // Whew! We passed the gauntlet. Get ready for the next token
2072 ps->phase = Template::ParseState::GETTING_TEXT;
2073 ps->bufstart = token_end + ps->current_delimiters.end_marker_len;
2074 // If requested, remove any linefeed following a comment,
2075 // or section start or end, or template marker, unless
2076 // it is escaped by '\'
2077 if (ttype != TOKENTYPE_VARIABLE) {
2078 ps->bufstart = MaybeEatNewline(ps->bufstart, ps->bufend,
2079 my_template->strip_);
2080 }
2081
2082 // create and return the TEXT token that we found
2083 return TemplateToken(ttype, token_start, mod_start - token_start,
2084 &modifiers);
2085 }
2086
2087 default: {
2088 FAIL("Programming error: Unexpected parse phase while "
2089 << "parsing template: " << ps->phase);
2090 }
2091 }
2092}
2093
2094// ----------------------------------------------------------------------
2095// CreateTemplateCache()
2096// default_template_cache()
2097// mutable_default_template_cache()
2098// These create the default TemplateCache object, that Template
2099// often just delegates (deprecated) operations to.
2100// ----------------------------------------------------------------------
2101
2102static TemplateCache* g_default_template_cache = NULL;
2103GoogleOnceType g_default_cache_init_once = GOOGLE_ONCE_INIT;
2104
2105static void CreateTemplateCache() {
2106 g_default_template_cache = new TemplateCache();
2107}
2108
2109const TemplateCache* default_template_cache() {
2110 GoogleOnceInit(&g_default_cache_init_once, &CreateTemplateCache);
2111 return g_default_template_cache;
2112}
2113
2114TemplateCache* mutable_default_template_cache() {
2115 GoogleOnceInit(&g_default_cache_init_once, &CreateTemplateCache);
2116 return g_default_template_cache;
2117}
2118
2119// ----------------------------------------------------------------------
2120// Template::StringToTemplate()
2121// StringToTemplate reads a string representing a template (eg
2122// "Hello {{WORLD}}"), and parses it to a Template*. It returns
2123// the parsed template, or NULL if there was a parsing error.
2124// StringToTemplateCache does the same, but then inserts the
2125// resulting Template* into the template cache, for future retrieval
2126// via GetTemplate. You pass in the key to use with GetTemplate.
2127// It returns a bool indicating success or failure of template
2128// creation/insertion. (Insertion will fail if a string or file
2129// with that key already exists in the cache.)
2130// RemoveStringFromTemplateCache() lets you remove a string that
2131// you had previously interned via StringToTemplateCache().
2132// ----------------------------------------------------------------------
2133
2134Template* Template::StringToTemplate(const TemplateString& content,
2135 Strip strip) {
2136 // An empty original_filename_ keeps ReloadIfChangedLocked from performing
2137 // file operations.
2138
2139 Template *tpl = new Template("", strip, NULL);
2140
2141 // But we have to do the "loading" and parsing ourselves:
2142
2143 // BuildTree deletes the buffer when done, so we need a copy for it.
2144 char* buffer = new char[content.size()];
2145 size_t content_len = content.size();
2146 memcpy(buffer, content.data(), content_len);
2147 tpl->StripBuffer(&buffer, &content_len);
2148 if ( tpl->BuildTree(buffer, buffer + content_len) ) {
2149 assert(tpl->state() == TS_READY);
2150 } else {
2151 assert(tpl->state() != TS_READY);
2152 delete tpl;
2153 return NULL;
2154 }
2155 return tpl;
2156}
2157
2158// ----------------------------------------------------------------------
2159// Template::Template()
2160// Template::~Template()
2161// Template::MaybeInitHtmlParser()
2162// Calls ReloadIfChanged to load the template the first time.
2163// The constructor is private; GetTemplate() is the factory
2164// method used to actually construct a new template if needed.
2165// GetTemplateCommon() first looks in the two caches -- the
2166// cache of parsed template trees, and the cache of raw
2167// template-file contents -- before trying to load the
2168// template-file from disk.
2169// ----------------------------------------------------------------------
2170
2171Template::Template(const TemplateString& filename, Strip strip,
2172 TemplateCache* owner)
2173 // TODO(csilvers): replace ToString() with an is_immutable() check
2174 : original_filename_(filename.data(), filename.size()), resolved_filename_(),
2175 filename_mtime_(0), strip_(strip), state_(TS_EMPTY),
2176 template_cache_(owner), template_text_(NULL), template_text_len_(0),
2177 tree_(NULL), parse_state_(),
2178 initial_context_(TC_MANUAL), htmlparser_(NULL) {
2179 VLOG(2) << "Constructing Template for " << template_file()
2180 << "; with context " << initial_context_
2181 << "; and strip " << strip_ << endl;
2182
2183 // Preserve whitespace in Javascript files because carriage returns
2184 // can convey meaning for comment termination and closures
2185 if (strsuffix(original_filename_.c_str(), ".js") &&
2186 strip_ == STRIP_WHITESPACE) {
2187 strip_ = STRIP_BLANK_LINES;
2188 }
2189 ReloadIfChangedLocked();
2190}
2191
2192Template::~Template() {
2193 VLOG(2) << endl << "Deleting Template for " << template_file()
2194 << "; with context " << initial_context_
2195 << "; and strip " << strip_ << endl;
2196 // Since this is only used by tests, we don't bother with locking
2197 num_deletes_++;
2198 delete tree_;
2199 // Delete this last, since tree has pointers into template_text_
2200 delete[] template_text_;
2201 delete htmlparser_;
2202}
2203
2204// In TemplateContexts where the HTML parser is needed, we initialize it in
2205// the appropriate mode. Also we do a sanity check (cannot fail) on the
2206// template filename. This function is invoked when an AUTOESCAPE pragma is
2207// found during template parsing and should at most be called once per template.
2208//
2209// In_tag is only meaningful for TC_HTML: It is true for templates that
2210// start inside an HTML tag and hence are expected to contain HTML attribute
2211// name/value pairs only. It is false for standard HTML templates.
2212void Template::MaybeInitHtmlParser(bool in_tag) {
2213 assert(!htmlparser_);
2214 if (AUTO_ESCAPE_PARSING_CONTEXT(initial_context_)) {
2215 htmlparser_ = new HtmlParser();
2216 switch (initial_context_) {
2217 case TC_JS:
2218 htmlparser_->ResetMode(HtmlParser::MODE_JS);
2219 break;
2220 case TC_CSS:
2221 htmlparser_->ResetMode(HtmlParser::MODE_CSS);
2222 break;
2223 default:
2224 if (in_tag)
2225 htmlparser_->ResetMode(HtmlParser::MODE_HTML_IN_TAG);
2226 break;
2227 }
2228 FilenameValidForContext(original_filename_, initial_context_);
2229 }
2230}
2231
2232// ----------------------------------------------------------------------
2233// Template::BuildTree()
2234// Template::WriteHeaderEntry()
2235// Template::Dump()
2236// These kick off their various parsers -- BuildTree for the
2237// main task of parsing a Template when it's read from memory,
2238// WriteHeaderEntry for parsing for make_tpl_varnames_h, and
2239// Dump() for when Dump() is called by the caller.
2240// ----------------------------------------------------------------------
2241
2242// NOTE: BuildTree takes over ownership of input_buffer, and will delete it.
2243// It should have been created via new[].
2244// You should hold a write-lock on g_template_mutex before calling this
2245// (unless you're calling it from a constructor).
2246// In auto-escape mode, the HTML context is tracked as the tree is being
2247// built, in a single pass. When this function completes, all variables
2248// will have the proper modifiers set.
2249bool Template::BuildTree(const char* input_buffer,
2250 const char* input_buffer_end) {
2251 set_state(TS_EMPTY);
2252 parse_state_.bufstart = input_buffer;
2253 parse_state_.bufend = input_buffer_end;
2254 parse_state_.phase = ParseState::GETTING_TEXT;
2255 parse_state_.current_delimiters = Template::MarkerDelimiters();
2256 // Assign an arbitrary name to the top-level node
2257 SectionTemplateNode *top_node = new SectionTemplateNode(
2258 TemplateToken(TOKENTYPE_SECTION_START,
2259 kMainSectionName, strlen(kMainSectionName), NULL),
2260 false);
2261 while (top_node->AddSubnode(this)) {
2262 // Add the rest of the template in.
2263 }
2264
2265 // get rid of the old tree, whenever we try to build a new one.
2266 delete tree_;
2267 delete[] template_text_;
2268 tree_ = top_node;
2269 template_text_ = input_buffer;
2270 template_text_len_ = input_buffer_end - input_buffer;
2271
2272 // TS_ERROR can also be set by the auto-escape mode, at the point
2273 // where the parser failed to parse.
2274 if (state() != TS_ERROR) {
2275 set_state(TS_READY);
2276 return true;
2277 } else {
2278 delete tree_;
2279 tree_ = NULL;
2280 delete[] template_text_;
2281 template_text_ = NULL;
2282 template_text_len_ = 0;
2283 return false;
2284 }
2285}
2286
2287void Template::WriteHeaderEntries(string *outstring) const {
2288 if (state() == TS_READY) { // only write header entries for 'good' tpls
2289 outstring->append("#include <ctemplate/template_string.h>\n");
2290 tree_->WriteHeaderEntries(outstring, template_file());
2291 }
2292}
2293
2294// Dumps the parsed structure of the template for debugging assistance.
2295// It goes to stdout instead of LOG to avoid possible truncation due to size.
2296void Template::Dump(const char *filename) const {
2297 string out;
2298 DumpToString(filename, &out);
2299 fwrite(out.data(), 1, out.length(), stdout);
2300 fflush(stdout);
2301}
2302
2303void Template::DumpToString(const char *filename, string *out) const {
2304 if (!out)
2305 return;
2306 out->append("------------Start Template Dump [" + string(filename) +
2307 "]--------------\n");
2308 if (tree_) {
2309 tree_->DumpToString(1, out);
2310 } else {
2311 out->append("No parse tree has been produced for this template\n");
2312 }
2313 out->append("------------End Template Dump----------------\n");
2314}
2315
2316// -------------------------------------------------------------------------
2317// Template::state()
2318// Template::set_state()
2319// Template::template_file()
2320// Template::original_filename()
2321// Template::strip()
2322// Template::mtime()
2323// Various introspection methods. state() is the parse-state
2324// (success, error). template_file() is the resolved filename of a
2325// given template object's input. original_filename() is the unresolved,
2326// original filename, strip() is the Strip type. mtime() is
2327// the lastmod time. For string-based templates, not backed by a file,
2328// mtime() returns 0.
2329// -------------------------------------------------------------------------
2330
2331void Template::set_state(TemplateState new_state) {
2332 state_ = new_state;
2333}
2334
2335TemplateState Template::state() const {
2336 return state_;
2337}
2338
2339const char *Template::template_file() const {
2340 return resolved_filename_.c_str();
2341}
2342
2343const char *Template::original_filename() const {
2344 return original_filename_.c_str();
2345}
2346
2347Strip Template::strip() const {
2348 return strip_;
2349}
2350
2351time_t Template::mtime() const {
2352 return filename_mtime_;
2353}
2354
2355// ----------------------------------------------------------------------
2356// Template::GetTemplate()
2357// Template::StringToTemplateCache()
2358// Template::SetTemplateRootDirectory()
2359// Template::AddAlternateTemplateRootDirectory()
2360// Template::template_root_directory()
2361// Template::FindTemplateFilename()
2362// Template::RemoveStringFromTemplateCache()
2363// Template::ClearCache()
2364// Template::ReloadAllIfChanged()
2365// These are deprecated static methods that have been moved to
2366// template_cache.h. We just forward to them, using the global
2367// default template cache.
2368// ----------------------------------------------------------------------
2369
2370Template *Template::GetTemplate(const TemplateString& filename, Strip strip) {
2371 // Until I've resolved the TODO that lets me return a const Template*
2372 // here, I have to do an ugly cast. :-(
2373 return const_cast<Template*>(
2374 mutable_default_template_cache()->GetTemplate(filename, strip));
2375}
2376
2377// This method is deprecated (and slow). Instead, use the above
2378// StringToTemplateCache method that takes a Strip argument.
2379bool Template::StringToTemplateCache(const TemplateString& key,
2380 const TemplateString& content) {
2381 // We say the insert succeeded only if it succeded for all strip values.
2382 bool retval = true;
2383 for (int i = 0; i < static_cast<int>(NUM_STRIPS); ++i) {
2384 if (!GOOGLE_NAMESPACE::StringToTemplateCache(key, content, static_cast<Strip>(i)))
2385 retval = false;
2386 }
2387 return retval;
2388}
2389
2390// ----------------------------------------------------------------------
2391// Template::ParseDelimiters()
2392// Given an input that looks like =XXX YYY=, set the
2393// MarkerDelimiters to point to XXX and YYY. This is used to parse
2394// {{=XXX YYY=}} markers, which reset the marker delimiters.
2395// Returns true if successfully parsed (starts and ends with =,
2396// exactly one space, no internal ='s), false else.
2397// ----------------------------------------------------------------------
2398
2399bool Template::ParseDelimiters(const char* text, size_t textlen,
2400 MarkerDelimiters* delim) {
2401 const char* space = (const char*)memchr(text, ' ', textlen);
2402 if (textlen < 3 ||
2403 text[0] != '=' || text[textlen - 1] != '=' || // no = at ends
2404 memchr(text + 1, '=', textlen - 2) || // = in the middle
2405 !space || // no interior space
2406 memchr(space + 1, ' ', text + textlen - (space+1))) // too many spaces
2407 return false;
2408
2409 delim->start_marker = text + 1;
2410 delim->start_marker_len = space - delim->start_marker;
2411 delim->end_marker = space + 1;
2412 delim->end_marker_len = text + textlen - 1 - delim->end_marker;
2413 return true;
2414}
2415
2416// ----------------------------------------------------------------------
2417// StripTemplateWhiteSpace()
2418// Template::IsBlankOrOnlyHasOneRemovableMarker()
2419// Template::InsertLine()
2420// Template::StripBuffer()
2421// This mini-parser modifies an input buffer, replacing it with a
2422// new buffer that is the same as the old, but with whitespace
2423// removed as is consistent with the given strip-mode:
2424// STRIP_WHITESPACE, STRIP_BLANK_LINES, DO_NOT_STRIP (the last
2425// of these is a no-op). This parser may work by allocating
2426// a new buffer and deleting the input buffer when it's done).
2427// The trickiest bit if in STRIP_BLANK_LINES mode, if we see
2428// a line that consits entirely of one "removable" marker on it,
2429// and nothing else other than whitespace. ("Removable" markers
2430// are comments, start sections, end sections, pragmas and
2431// template-include.) In such a case, we elide the newline at
2432// the end of that line.
2433// ----------------------------------------------------------------------
2434
2435// We define our own version rather than using the one in strutil, mostly
2436// so we can take a size_t instead of an int. The code is simple enough.
2437static void StripTemplateWhiteSpace(const char** str, size_t* len) {
2438 // Strip off trailing whitespace.
2439 while ((*len) > 0 && ascii_isspace((*str)[(*len)-1])) {
2440 (*len)--;
2441 }
2442
2443 // Strip off leading whitespace.
2444 while ((*len) > 0 && ascii_isspace((*str)[0])) {
2445 (*len)--;
2446 (*str)++;
2447 }
2448}
2449
2450// Adjusts line and length iff condition is met, and RETURNS true.
2451// MarkerDelimiters are {{ and }}, or equivalent.
2452bool Template::IsBlankOrOnlyHasOneRemovableMarker(
2453 const char** line, size_t* len, const Template::MarkerDelimiters& delim) {
2454 const char *clean_line = *line;
2455 size_t new_len = *len;
2456 StripTemplateWhiteSpace(&clean_line, &new_len);
2457
2458 // If there was only white space on the line, new_len will now be zero.
2459 // In that case the line should be removed, so return true.
2460 if (new_len == 0) {
2461 *line = clean_line;
2462 *len = new_len;
2463 return true;
2464 }
2465
2466 // The smallest removable marker is at least start_marker_len +
2467 // end_marker_len + 1 characters long. If there aren't enough
2468 // characters, then keep the line by returning false.
2469 if (new_len < delim.start_marker_len + delim.end_marker_len + 1) {
2470 return false;
2471 }
2472
2473 // Only {{#...}}, {{/....}, {{>...}, {{!...}, {{%...}} and {{=...=}}
2474 // are "removable"
2475 if (memcmp(clean_line, delim.start_marker, delim.start_marker_len) != 0 ||
2476 !strchr("#/>!%=", clean_line[delim.start_marker_len])) {
2477 return false;
2478 }
2479
2480 const char *found_end_marker = memmatch(clean_line + delim.start_marker_len,
2481 new_len - delim.start_marker_len,
2482 delim.end_marker,
2483 delim.end_marker_len);
2484
2485 // Make sure the end marker comes at the end of the line.
2486 if (!found_end_marker ||
2487 found_end_marker + delim.end_marker_len != clean_line + new_len) {
2488 return false;
2489 }
2490
2491 // else return the line stripped of its white space chars so when the
2492 // marker is removed in expansion, no white space is left from the line
2493 // that has now been removed
2494 *line = clean_line;
2495 *len = new_len;
2496 return true;
2497}
2498
2499size_t Template::InsertLine(const char *line, size_t len, Strip strip,
2500 const MarkerDelimiters& delim, char* buffer) {
2501 bool add_newline = (len > 0 && line[len-1] == '\n');
2502 if (add_newline)
2503 len--; // so we ignore the newline from now on
2504
2505 if (strip >= STRIP_WHITESPACE) {
2506 StripTemplateWhiteSpace(&line, &len);
2507 add_newline = false;
2508
2509 // IsBlankOrOnlyHasOneRemovableMarker may modify the two input
2510 // parameters if the line contains only spaces or only one input
2511 // marker. This modification must be done before the line is
2512 // written to the input buffer. Hence the need for the boolean flag
2513 // add_newline to be referenced after the Write statement.
2514 } else if (strip >= STRIP_BLANK_LINES
2515 && IsBlankOrOnlyHasOneRemovableMarker(&line, &len, delim)) {
2516 add_newline = false;
2517 }
2518
2519 memcpy(buffer, line, len);
2520
2521 if (add_newline) {
2522 buffer[len++] = '\n';
2523 }
2524 return len;
2525}
2526
2527void Template::StripBuffer(char **buffer, size_t* len) {
2528 if (strip_ == DO_NOT_STRIP)
2529 return;
2530
2531 char* bufend = *buffer + *len;
2532 char* retval = new char[*len];
2533 char* write_pos = retval;
2534
2535 MarkerDelimiters delim;
2536
2537 const char* next_pos = NULL;
2538 for (const char* prev_pos = *buffer; prev_pos < bufend; prev_pos = next_pos) {
2539 next_pos = (char*)memchr(prev_pos, '\n', bufend - prev_pos);
2540 if (next_pos)
2541 next_pos++; // include the newline
2542 else
2543 next_pos = bufend; // for the last line, when it has no newline
2544
2545 write_pos += InsertLine(prev_pos, next_pos - prev_pos, strip_, delim,
2546 write_pos);
2547 assert(write_pos >= retval &&
2548 static_cast<size_t>(write_pos-retval) <= *len);
2549
2550 // Before looking at the next line, see if the current line
2551 // changed the marker-delimiter. We care for
2552 // IsBlankOrOnlyHasOneRemovableMarker, so we don't need to be
2553 // perfect -- we don't have to handle the delimiter changing in
2554 // the middle of a line -- just make sure that the next time
2555 // there's only one marker on a line, we notice because we know
2556 // the right delim.
2557 const char* end_marker = NULL;
2558 for (const char* marker = prev_pos; marker; marker = end_marker) {
2559 marker = memmatch(marker, next_pos - marker,
2560 delim.start_marker, delim.start_marker_len);
2561 if (!marker) break;
2562 end_marker = memmatch(marker + delim.start_marker_len,
2563 next_pos - (marker + delim.start_marker_len),
2564 delim.end_marker, delim.end_marker_len);
2565 if (!end_marker) break;
2566 end_marker += delim.end_marker_len; // needed for the for loop
2567 // This tries to parse the marker as a set-delimiters marker.
2568 // If it succeeds, it updates delim. If not, it ignores it.
2569 assert(((end_marker - delim.end_marker_len)
2570 - (marker + delim.start_marker_len)) >= 0);
2571 Template::ParseDelimiters(marker + delim.start_marker_len,
2572 ((end_marker - delim.end_marker_len)
2573 - (marker + delim.start_marker_len)),
2574 &delim);
2575 }
2576 }
2577 assert(write_pos >= retval);
2578
2579 // Replace the input retval with our new retval.
2580 delete[] *buffer;
2581 *buffer = retval;
2582 *len = static_cast<size_t>(write_pos - retval);
2583}
2584
2585// ----------------------------------------------------------------------
2586// Template::ReloadIfChanged()
2587// Template::ReloadIfChangedLocked()
2588// If one template, try immediately to reload it from disk. If all
2589// templates, just set all their reload statuses to true, so next time
2590// GetTemplate() is called on the template, it will be reloaded from disk if
2591// the disk version is newer than the one currently in memory.
2592// ReloadIfChanged() returns true if the file changed and disk *and* we
2593// successfully reloaded and parsed it. It never returns true if
2594// original_filename_ is "".
2595// ----------------------------------------------------------------------
2596
2597// Besides being called when locked, it's also ok to call this from
2598// the constructor, when you know nobody else will be messing with
2599// this object.
2600bool Template::ReloadIfChangedLocked()
2601 EXCLUSIVE_LOCKS_REQUIRED(g_template_mutex) {
2602 // TODO(panicker): Remove this duplicate code when constructing the template,
2603 // after deprecating this method.
2604 // TemplateCache::GetTemplate() already checks if the template filename is
2605 // valid and resolvable. It also checks if the file needs to be reloaded
2606 // based on mtime.
2607
2608 // NOTE(panicker): we should not be using original_filename_ to determine
2609 // if a template is string-based, instead use the boolean 'string_based'
2610 // in the template cache.
2611 if (original_filename_.empty()) {
2612 // string-based templates don't reload
2613 return false;
2614 }
2615
2616 FileStat statbuf;
2617 if (resolved_filename_.empty()) {
2618 if (!template_cache_->ResolveTemplateFilename(original_filename_,
2619 &resolved_filename_,
2620 &statbuf)) {
2621 LOG(WARNING) << "Unable to locate file " << original_filename_ << endl;
2622 set_state(TS_ERROR);
2623 return false;
2624 }
2625 } else {
2626 if (!File::Stat(resolved_filename_, &statbuf)) {
2627 LOG(WARNING) << "Unable to stat file " << resolved_filename_ << endl;
2628 // We keep the old tree if there is one, otherwise we're in error
2629 set_state(TS_ERROR);
2630 return false;
2631 }
2632 }
2633
2634 if (statbuf.IsDirectory()) {
2635 LOG(WARNING) << resolved_filename_
2636 << "is a directory and thus not readable" << endl;
2637 // We keep the old tree if there is one, otherwise we're in error
2638 set_state(TS_ERROR);
2639 return false;
2640 }
2641 if (statbuf.mtime == filename_mtime_ && filename_mtime_ > 0
2642 && tree_) { // force a reload if we don't already have a tree_
2643 VLOG(1) << "Not reloading file " << resolved_filename_
2644 << ": no new mod-time" << endl;
2645 set_state(TS_READY);
2646 return false; // file's timestamp hasn't changed, so no need to reload
2647 }
2648
2649 File* fp = File::Open(resolved_filename_.c_str(), "r");
2650 if (fp == NULL) {
2651 LOG(ERROR) << "Can't find file " << resolved_filename_
2652 << "; skipping" << endl;
2653 // We keep the old tree if there is one, otherwise we're in error
2654 set_state(TS_ERROR);
2655 return false;
2656 }
2657 size_t buflen = statbuf.length;
2658 char* file_buffer = new char[buflen];
2659 if (fp->Read(file_buffer, buflen) != buflen) {
2660 LOG(ERROR) << "Error reading file " << resolved_filename_
2661 << ": " << strerror(errno) << endl;
2662 fp->Close();
2663 delete[] file_buffer;
2664 // We could just keep the old tree, but probably safer to say 'error'
2665 set_state(TS_ERROR);
2666 return false;
2667 }
2668 fp->Close();
2669
2670 // Now that we know we've read the file ok, mark the new mtime
2671 filename_mtime_ = statbuf.mtime;
2672
2673 // Parse the input one line at a time to get the "stripped" input.
2674 StripBuffer(&file_buffer, &buflen);
2675
2676 // Re-initialize Auto-Escape data. Delete the parser and reset the template
2677 // context back to TC_MANUAL. If the new content has the AUTOESCAPE pragma,
2678 // the parser will then be re-created.
2679 initial_context_ = TC_MANUAL;
2680 delete htmlparser_;
2681 htmlparser_ = NULL;
2682
2683 // Now parse the template we just read. BuildTree takes over ownership
2684 // of input_buffer in every case, and will eventually delete it.
2685 if ( BuildTree(file_buffer, file_buffer + buflen) ) {
2686 assert(state() == TS_READY);
2687 return true;
2688 } else {
2689 assert(state() != TS_READY);
2690 return false;
2691 }
2692}
2693
2694// ----------------------------------------------------------------------
2695// Template::ExpandLocked()
2696// Template::ExpandWithDataAndCache()
2697// This is the main function clients call: it expands a template
2698// by expanding its parse tree (which starts with a top-level
2699// section node). For each variable/section/include-template it
2700// sees, it replaces the name stored in the parse-tree with the
2701// appropriate value from the passed-in dictionary.
2702// ----------------------------------------------------------------------
2703
2704bool Template::ExpandLocked(ExpandEmitter *expand_emitter,
2705 const TemplateDictionaryInterface *dict,
2706 PerExpandData *per_expand_data,
2707 const TemplateCache *cache) const
2708 SHARED_LOCKS_REQUIRED(g_template_mutex) {
2709 // Accumulator for the results of Expand for each sub-tree.
2710 bool error_free = true;
2711
2712 // TODO(csilvers): could make this static if it's expensive to construct.
2713 PerExpandData empty_per_expand_data;
2714 if (per_expand_data == NULL)
2715 per_expand_data = &empty_per_expand_data;
2716
2717 if (state() != TS_READY) {
2718 // We'd like to reload if reload status is true, but ExpandWD() is const
2719 return false;
2720 }
2721
2722 if (per_expand_data->annotate()) {
2723 // Remove the machine dependent prefix from the template file name.
2724 const char* file = template_file();
2725 const char* short_file = strstr(file, per_expand_data->annotate_path());
2726 if (short_file != NULL) {
2727 file = short_file;
2728 }
2729 per_expand_data->annotator()->EmitOpenFile(expand_emitter,
2730 string(file));
2731 }
2732
2733 // If the client registered an expand-modifier, which is a modifier
2734 // meant to modify all templates after they are expanded, apply it
2735 // now.
2736 const TemplateModifier* modifier =
2737 per_expand_data->template_expansion_modifier();
2738 if (modifier && modifier->MightModify(per_expand_data, template_file())) {
2739 // We found a expand TemplateModifier. Apply it.
2740 //
2741 // Since the expand-modifier doesn't ever have an arg (it doesn't
2742 // have a name and can't be applied in the text of a template), we
2743 // pass the template name in as the string arg in this case.
2744 string value;
2745 StringEmitter tmp_emitter(&value);
2746 error_free &= tree_->Expand(&tmp_emitter, dict, per_expand_data, cache);
2747 modifier->Modify(value.data(), value.size(), per_expand_data,
2748 expand_emitter, template_file());
2749 } else {
2750 // No need to modify this template.
2751 error_free &= tree_->Expand(expand_emitter, dict, per_expand_data, cache);
2752 }
2753
2754 if (per_expand_data->annotate()) {
2755 per_expand_data->annotator()->EmitCloseFile(expand_emitter);
2756 }
2757
2758 return error_free;
2759}
2760
2761bool Template::ExpandWithDataAndCache(
2762 ExpandEmitter *expand_emitter,
2763 const TemplateDictionaryInterface *dict,
2764 PerExpandData *per_expand_data,
2765 const TemplateCache *cache) const LOCKS_EXCLUDED(g_template_mutex) {
2766 // We hold g_template_mutex the entire time we expand, because
2767 // ReloadIfChanged(), which also holds template_mutex, is allowed to
2768 // delete tree_, and we want to make sure it doesn't do that (in another
2769 // thread) while we're expanding. We also protect state_, etc.
2770 // Note we only need a read-lock here, so many expands can go on at once.
2771 // TODO(csilvers): We can remove this once we delete ReloadIfChanged.
2772 // When we do that, ExpandLocked() can go away as well.
2773 ReaderMutexLock ml(&g_template_mutex);
2774 return ExpandLocked(expand_emitter, dict, per_expand_data, cache);
2775}
2776
2777}