Squashed 'third_party/ctemplate/' content from commit 6742f62
Change-Id: I828e4e4c906f13ba19944d78a8a78652b62949af
git-subtree-dir: third_party/ctemplate
git-subtree-split: 6742f6233db12f545e90baa8f34f5c29c4eb396a
diff --git a/src/base/arena-inl.h b/src/base/arena-inl.h
new file mode 100644
index 0000000..d4aee74
--- /dev/null
+++ b/src/base/arena-inl.h
@@ -0,0 +1,351 @@
+// Copyright (c) 2000, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ---
+//
+// Reorganized by Craig Silverstein
+//
+// In this file we define the arena template code. This includes the
+// ArenaAllocator, which is meant only to be used with STL, and also
+// the Gladiator (which needs to know how to new and delete various
+// types of objects).
+//
+// If you're only using the MALLOC-LIKE functionality of the arena,
+// you don't need to include this file at all! You do need to include
+// it (in your own .cc file) if you want to use the STRING, STL, or
+// NEW aspects of the arena. See arena.h for details on these types.
+//
+// ArenaAllocator is an STL allocator, but because it relies on unequal
+// instances, it may not work with all standards-conforming STL
+// implementations. But it works with SGI STL so we're happy.
+//
+// Here's an example of how the ArenaAllocator would be used.
+// Say we have a vector of ints that we want to have use the arena
+// for memory allocation. Here's one way to do it:
+// UnsafeArena* arena = new UnsafeArena(1000); // or SafeArena(), or 10000
+// vector<int, ArenaAllocator<int, UnsafeArena> > v(arena);
+//
+// Note that every STL type always allows the allocator (in this case,
+// the arena, which is automatically promoted to an allocator) as the last
+// arg to the constructor. So if you would normally do
+// vector<...> v(foo, bar),
+// with the arena you can do
+// vector<...> v(foo, bar, arena);
+
+#ifndef BASE_ARENA_INL_H_
+#define BASE_ARENA_INL_H_
+
+#include <config.h>
+#include "base/arena.h"
+#include <assert.h>
+#include <stddef.h>
+#include <new>
+#include <memory>
+
+namespace ctemplate {
+
+// T is the type we want to allocate, and C is the type of the arena.
+// ArenaAllocator has the thread-safety characteristics of C.
+template <class T, class C> class ArenaAllocator {
+ public:
+ typedef T value_type;
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef T& reference;
+ typedef const T& const_reference;
+ pointer address(reference r) const { return &r; }
+ const_pointer address(const_reference r) const { return &r; }
+ size_type max_size() const { return size_t(-1) / sizeof(T); }
+
+ // DO NOT USE! The default constructor is for gcc3 compatibility only.
+ ArenaAllocator() : arena_(0) { }
+ // This is not an explicit constructor! So you can pass in an arena*
+ // to functions needing an ArenaAllocator (like the astring constructor)
+ // and everything will work ok.
+ ArenaAllocator(C* arena) : arena_(arena) { } // NOLINT
+ ~ArenaAllocator() { }
+
+ pointer allocate(size_type n,
+ std::allocator<void>::const_pointer /*hint*/ = 0) {
+ assert(arena_ && "No arena to allocate from!");
+ return reinterpret_cast<T*>(arena_->AllocAligned(n * sizeof(T),
+ kAlignment));
+ }
+ void deallocate(pointer p, size_type n) {
+ arena_->Free(p, n * sizeof(T));
+ }
+ void construct(pointer p, const T & val) {
+ new(reinterpret_cast<void*>(p)) T(val);
+ }
+ void construct(pointer p) {
+ new(reinterpret_cast<void*>(p)) T();
+ }
+ void destroy(pointer p) { p->~T(); }
+
+ C* arena(void) const { return arena_; }
+
+ template<class U> struct rebind {
+ typedef ArenaAllocator<U, C> other;
+ };
+
+ template<class U> ArenaAllocator(const ArenaAllocator<U, C>& other)
+ : arena_(other.arena()) { }
+
+ template<class U> bool operator==(const ArenaAllocator<U, C>& other) const {
+ return arena_ == other.arena();
+ }
+
+ template<class U> bool operator!=(const ArenaAllocator<U, C>& other) const {
+ return arena_ != other.arena();
+ }
+
+ protected:
+ static const int kAlignment;
+ C* arena_;
+};
+
+template<class T, class C> const int ArenaAllocator<T, C>::kAlignment =
+ (1 == sizeof(T) ? 1 : BaseArena::kDefaultAlignment);
+
+
+// 'new' must be in the global namespace.
+}
+using GOOGLE_NAMESPACE::UnsafeArena;
+
+
+// Operators for allocation on the arena
+// Syntax: new (AllocateInArena, arena) MyClass;
+// new (AllocateInArena, arena) MyClass[num];
+// Useful for classes you can't descend from Gladiator, such as POD,
+// STL containers, etc.
+enum AllocateInArenaType { AllocateInArena };
+
+inline void* operator new(size_t size,
+ AllocateInArenaType /* unused */,
+ UnsafeArena *arena) {
+ return arena->Alloc(size);
+}
+
+inline void* operator new[](size_t size,
+ AllocateInArenaType /* unused */,
+ UnsafeArena *arena) {
+ return arena->Alloc(size);
+}
+
+namespace ctemplate {
+
+// Ordinarily in C++, one allocates all instances of a class from an
+// arena. If that's what you want to do, you don't need Gladiator.
+// (However you may find ArenaOnlyGladiator useful.)
+//
+// However, for utility classes that are used by multiple clients, the
+// everything-in-one-arena model may not work. Some clients may wish
+// not to use an arena at all. Or perhaps a composite structure
+// (tree) will contain multiple objects (nodes) and some of those
+// objects will be created by a factory, using an arena, while other
+// objects will be created on-the-fly by an unsuspecting user who
+// doesn't know anything about the arena.
+//
+// To support that, have the arena-allocated class inherit from
+// Gladiator. The ordinary operator new will continue to allocate
+// from the heap. To allocate from an arena, do
+// Myclass * m = new (AllocateInArena, a) Myclass (args, to, constructor);
+// where a is either an arena or an allocator. Now you can call
+// delete on all the objects, whether they are allocated from an arena
+// or on the heap. Heap memory will be released, while arena memory will
+// not be.
+//
+// If a client knows that no objects were allocated on the heap, it
+// need not delete any objects (but it may if it wishes). The only
+// objects that must be deleted are those that were actually allocated
+// from the heap.
+//
+// NOTE: an exception to the google C++ style guide rule for "No multiple
+// implementation inheritance" is granted for this class: you can treat this
+// class as an "Interface" class, and use it in a multiple inheritence context,
+// even though it implements operator new/delete.
+
+class Gladiator {
+ public:
+ Gladiator() { }
+ virtual ~Gladiator() { }
+
+ // We do not override the array allocators, so array allocation and
+ // deallocation will always be from the heap. Typically, arrays are
+ // larger, and thus the costs of arena allocation are higher and the
+ // benefits smaller. Since arrays are typically allocated and deallocated
+ // very differently from scalars, this may not interfere too much with
+ // the arena concept. If it does pose a problem, flesh out the
+ // ArrayGladiator class below.
+
+ void* operator new(size_t size) {
+ void* ret = ::operator new(1 + size);
+ static_cast<char *>(ret)[size] = 1; // mark as heap-allocated
+ return ret;
+ }
+ // the ignored parameter keeps us from stepping on placement new
+ template<class T> void* operator new(size_t size, const int ignored,
+ T* allocator) {
+ if (allocator) {
+ void* ret = allocator->AllocAligned(1 + size,
+ BaseArena::kDefaultAlignment);
+ static_cast<char*>(ret)[size] = 0; // mark as arena-allocated
+ return ret;
+ } else {
+ return operator new(size); // this is the function above
+ }
+ }
+ void operator delete(void* memory, size_t size) {
+ if (static_cast<char*>(memory)[size]) {
+ assert (1 == static_cast<char *>(memory)[size]);
+ ::operator delete(memory);
+ } else {
+ // We never call the allocator's Free method. If we need to do
+ // that someday, we can store a pointer to the arena instead of
+ // the Boolean marker flag.
+ }
+ }
+ template<class T> void operator delete(void* memory, size_t size,
+ const int ign, T* allocator) {
+ // This "placement delete" can only be called if the constructor
+ // throws an exception.
+ if (allocator) {
+ allocator->Free(memory, 1 + size);
+ } else {
+ ::operator delete(memory);
+ }
+ }
+};
+
+// This avoids the space overhead of Gladiator if you just want to
+// override new and delete. It helps avoid some of the more common
+// problems that can occur when overriding new and delete.
+
+class ArenaOnlyGladiator {
+ public:
+ ArenaOnlyGladiator() { }
+ // No virtual destructor is needed because we ignore the size
+ // parameter in all the delete functions.
+ // virtual ~ArenaOnlyGladiator() { }
+
+ // can't just return NULL here -- compiler gives a warning. :-|
+ void* operator new(size_t /*size*/) {
+ assert(0);
+ return reinterpret_cast<void *>(1);
+ }
+ void* operator new[](size_t /*size*/) {
+ assert(0);
+ return reinterpret_cast<void *>(1);
+ }
+
+ // the ignored parameter keeps us from stepping on placement new
+ template<class T> void* operator new(size_t size, const int ignored,
+ T* allocator) {
+ assert(allocator);
+ return allocator->AllocAligned(size, BaseArena::kDefaultAlignment);
+ }
+ template<class T> void* operator new[](size_t size,
+ const int ignored, T* allocator) {
+ assert(allocator);
+ return allocator->AllocAligned (size, BaseArena::kDefaultAlignment);
+ }
+ void operator delete(void* /*memory*/, size_t /*size*/) { }
+ template<class T> void operator delete(void* memory, size_t size,
+ const int ign, T* allocator) { }
+ void operator delete [](void* /*memory*/) { }
+ template<class T> void operator delete(void* memory,
+ const int ign, T* allocator) { }
+};
+
+#if 0 // ********** for example purposes only; 100% untested.
+
+// Note that this implementation incurs an overhead of kHeaderSize for
+// every array that is allocated. *Before* the space is returned to the
+// user, we store the address of the Arena that owns the space, and
+// the length of th space itself.
+
+class ArrayGladiator : public Gladiator {
+ public:
+ void * operator new[] (size_t size) {
+ const int sizeplus = size + kHeaderSize;
+ void * const ret = ::operator new(sizeplus);
+ *static_cast<Arena **>(ret) = NULL; // mark as heap-allocated
+ *static_cast<size_t *>(ret + sizeof(Arena *)) = sizeplus;
+ return ret + kHeaderSize;
+ }
+ // the ignored parameter keeps us from stepping on placement new
+ template<class T> void * operator new[] (size_t size,
+ const int ignored, T * allocator) {
+ if (allocator) {
+ const int sizeplus = size + kHeaderSize;
+ void * const ret =
+ allocator->AllocAligned(sizeplus, BaseArena::kDefaultAlignment);
+ *static_cast<Arena **>(ret) = allocator->arena();
+ *static_cast<size_t *>(ret + sizeof(Arena *)) = sizeplus;
+ return ret + kHeaderSize;
+ } else {
+ return operator new[](size); // this is the function above
+ }
+ }
+ void operator delete [] (void * memory) {
+ memory -= kHeaderSize;
+ Arena * const arena = *static_cast<Arena **>(memory);
+ size_t sizeplus = *static_cast<size_t *>(memory + sizeof(arena));
+ if (arena) {
+ arena->SlowFree(memory, sizeplus);
+ } else {
+ ::operator delete (memory);
+ }
+ }
+ template<class T> void * operator delete (void * memory,
+ const int ign, T * allocator) {
+ // This "placement delete" can only be called if the constructor
+ // throws an exception.
+ memory -= kHeaderSize;
+ size_t sizeplus = *static_cast<size_t *>(memory + sizeof(Arena *));
+ if (allocator) {
+ allocator->Free(memory, 1 + size);
+ } else {
+ operator delete (memory);
+ }
+ }
+
+ protected:
+ static const int kMinSize = sizeof size_t + sizeof(Arena *);
+ static const int kHeaderSize = kMinSize > BaseArena::kDefaultAlignment ?
+ 2 * BaseArena::kDefaultAlignment : BaseArena::kDefaultAlignment;
+};
+
+#endif // ********** example
+
+}
+
+#endif // BASE_ARENA_INL_H_
diff --git a/src/base/arena.cc b/src/base/arena.cc
new file mode 100644
index 0000000..62df770
--- /dev/null
+++ b/src/base/arena.cc
@@ -0,0 +1,505 @@
+// Copyright (c) 2000, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ---
+//
+// Reorganized by Craig Silverstein
+// "Handles" by Ilan Horn
+//
+// This approach to arenas overcomes many of the limitations described
+// in the "Specialized allocators" section of
+// http://www.pdos.lcs.mit.edu/~dm/c++-new.html
+//
+// A somewhat similar approach to Gladiator, but for heap-detection, was
+// suggested by Ron van der Wal and Scott Meyers at
+// http://www.aristeia.com/BookErrata/M27Comments_frames.html
+
+#include <config.h>
+#include "base/arena.h"
+#include "base/arena-inl.h"
+#include <assert.h>
+#include <algorithm>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+#include <vector>
+#include <sys/types.h> // one place uintptr_t might be
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif // another place uintptr_t might be
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif // last place uintptr_t might be
+#include "base/macros.h" // for uint64
+#include "base/mutex.h"
+#include "base/util.h" // for DCHECK_*
+
+using std::min;
+using std::vector;
+
+// TODO(csilvers): add in a portable implementation of aligned_malloc
+static void* aligned_malloc(size_t size, size_t alignment) {
+ LOG(FATAL) << "page_aligned_ not currently supported\n";
+}
+
+// The value here doesn't matter until page_aligned_ is supported.
+static const int kPageSize = 8192; // should be getpagesize()
+
+namespace ctemplate {
+
+// We used to only keep track of how much space has been allocated in
+// debug mode. Now we track this for optimized builds, as well. If you
+// want to play with the old scheme to see if this helps performance,
+// change this ARENASET() macro to a NOP. However, NOTE: some
+// applications of arenas depend on this space information (exported
+// via bytes_allocated()).
+#define ARENASET(x) (x)
+
+// ----------------------------------------------------------------------
+// BaseArena::BaseArena()
+// BaseArena::~BaseArena()
+// Destroying the arena automatically calls Reset()
+// ----------------------------------------------------------------------
+
+
+BaseArena::BaseArena(char* first, const size_t block_size, bool align_to_page)
+ : remaining_(0),
+ first_block_we_own_(first ? 1 : 0),
+ block_size_(block_size),
+ freestart_(NULL), // set for real in Reset()
+ last_alloc_(NULL),
+ blocks_alloced_(1),
+ overflow_blocks_(NULL),
+ page_aligned_(align_to_page),
+ handle_alignment_(1),
+ handle_alignment_bits_(0),
+ block_size_bits_(0) {
+ assert(block_size > kDefaultAlignment);
+
+ while ((static_cast<size_t>(1) << block_size_bits_) < block_size_) {
+ ++block_size_bits_;
+ }
+
+ if (page_aligned_) {
+ // kPageSize must be power of 2, so make sure of this.
+ CHECK(kPageSize > 0 && 0 == (kPageSize & (kPageSize - 1)))
+ << "kPageSize[ " << kPageSize << "] is not "
+ << "correctly initialized: not a power of 2.";
+ }
+
+ if (first) {
+ CHECK(!page_aligned_ ||
+ (reinterpret_cast<uintptr_t>(first) & (kPageSize - 1)) == 0);
+ first_blocks_[0].mem = first;
+ } else {
+ if (page_aligned_) {
+ // Make sure the blocksize is page multiple, as we need to end on a page
+ // boundary.
+ CHECK_EQ(block_size & (kPageSize - 1), 0) << "block_size is not a"
+ << "multiple of kPageSize";
+ first_blocks_[0].mem = reinterpret_cast<char*>(aligned_malloc(block_size_,
+ kPageSize));
+ PCHECK(NULL != first_blocks_[0].mem);
+ } else {
+ first_blocks_[0].mem = reinterpret_cast<char*>(malloc(block_size_));
+ }
+ }
+ first_blocks_[0].size = block_size_;
+
+ Reset();
+}
+
+BaseArena::~BaseArena() {
+ FreeBlocks();
+ assert(overflow_blocks_ == NULL); // FreeBlocks() should do that
+ // The first X blocks stay allocated always by default. Delete them now.
+ for ( int i = first_block_we_own_; i < blocks_alloced_; ++i )
+ free(first_blocks_[i].mem);
+}
+
+// ----------------------------------------------------------------------
+// BaseArena::block_count()
+// Only reason this is in .cc file is because it involves STL.
+// ----------------------------------------------------------------------
+
+int BaseArena::block_count() const {
+ return (blocks_alloced_ +
+ (overflow_blocks_ ? static_cast<int>(overflow_blocks_->size()) : 0));
+}
+
+// ----------------------------------------------------------------------
+// BaseArena::Reset()
+// Clears all the memory an arena is using.
+// ----------------------------------------------------------------------
+
+void BaseArena::Reset() {
+ FreeBlocks();
+ freestart_ = first_blocks_[0].mem;
+ remaining_ = first_blocks_[0].size;
+ last_alloc_ = NULL;
+
+ ARENASET(status_.bytes_allocated_ = block_size_);
+
+ // We do not know for sure whether or not the first block is aligned,
+ // so we fix that right now.
+ const int overage = reinterpret_cast<uintptr_t>(freestart_) &
+ (kDefaultAlignment-1);
+ if (overage > 0) {
+ const int waste = kDefaultAlignment - overage;
+ freestart_ += waste;
+ remaining_ -= waste;
+ }
+ freestart_when_empty_ = freestart_;
+ assert(!(reinterpret_cast<uintptr_t>(freestart_)&(kDefaultAlignment-1)));
+}
+
+// ----------------------------------------------------------------------
+// BaseArena::MakeNewBlock()
+// Our sbrk() equivalent. We always make blocks of the same size
+// (though GetMemory() can also make a new block for really big
+// data.
+// ----------------------------------------------------------------------
+
+void BaseArena::MakeNewBlock() {
+ AllocatedBlock *block = AllocNewBlock(block_size_);
+ freestart_ = block->mem;
+ remaining_ = block->size;
+}
+
+// -------------------------------------------------------------
+// BaseArena::AllocNewBlock()
+// Adds and returns an AllocatedBlock.
+// The returned AllocatedBlock* is valid until the next call
+// to AllocNewBlock or Reset. (i.e. anything that might
+// affect overflow_blocks_).
+// -------------------------------------------------------------
+
+BaseArena::AllocatedBlock* BaseArena::AllocNewBlock(const size_t block_size) {
+ AllocatedBlock *block;
+ // Find the next block.
+ if ( blocks_alloced_ < ARRAYSIZE(first_blocks_) ) {
+ // Use one of the pre-allocated blocks
+ block = &first_blocks_[blocks_alloced_++];
+ } else { // oops, out of space, move to the vector
+ if (overflow_blocks_ == NULL) overflow_blocks_ = new vector<AllocatedBlock>;
+ // Adds another block to the vector.
+ overflow_blocks_->resize(overflow_blocks_->size()+1);
+ // block points to the last block of the vector.
+ block = &overflow_blocks_->back();
+ }
+
+ if (page_aligned_) {
+ // We need the size to be multiple of kPageSize to mprotect it later.
+ size_t num_pages = ((block_size - 1) / kPageSize) + 1;
+ size_t new_block_size = num_pages * kPageSize;
+ block->mem = reinterpret_cast<char*>(aligned_malloc(new_block_size,
+ kPageSize));
+ PCHECK(NULL != block->mem);
+ block->size = new_block_size;
+ } else {
+ block->mem = reinterpret_cast<char*>(malloc(block_size));
+ block->size = block_size;
+ }
+
+ ARENASET(status_.bytes_allocated_ += block_size);
+
+ return block;
+}
+
+// ----------------------------------------------------------------------
+// BaseArena::IndexToBlock()
+// Index encoding is as follows:
+// For blocks in the first_blocks_ array, we use index of the block in
+// the array.
+// For blocks in the overflow_blocks_ vector, we use the index of the
+// block in iverflow_blocks_, plus the size of the first_blocks_ array.
+// ----------------------------------------------------------------------
+
+const BaseArena::AllocatedBlock *BaseArena::IndexToBlock(int index) const {
+ if (index < ARRAYSIZE(first_blocks_)) {
+ return &first_blocks_[index];
+ }
+ CHECK(overflow_blocks_ != NULL);
+ int index_in_overflow_blocks = index - ARRAYSIZE(first_blocks_);
+ CHECK_GE(index_in_overflow_blocks, 0);
+ CHECK_LT(static_cast<size_t>(index_in_overflow_blocks),
+ overflow_blocks_->size());
+ return &(*overflow_blocks_)[index_in_overflow_blocks];
+}
+
+// ----------------------------------------------------------------------
+// BaseArena::GetMemoryFallback()
+// We take memory out of our pool, aligned on the byte boundary
+// requested. If we don't have space in our current pool, we
+// allocate a new block (wasting the remaining space in the
+// current block) and give you that. If your memory needs are
+// too big for a single block, we make a special your-memory-only
+// allocation -- this is equivalent to not using the arena at all.
+// ----------------------------------------------------------------------
+
+void* BaseArena::GetMemoryFallback(const size_t size, const int align_as_int) {
+ if (0 == size) {
+ return NULL; // stl/stl_alloc.h says this is okay
+ }
+ // This makes the type-checker happy.
+ const size_t align = static_cast<size_t>(align_as_int);
+
+ assert(align_as_int > 0 && 0 == (align & (align - 1))); // must be power of 2
+
+ // If the object is more than a quarter of the block size, allocate
+ // it separately to avoid wasting too much space in leftover bytes
+ if (block_size_ == 0 || size > block_size_/4) {
+ // then it gets its own block in the arena
+ assert(align <= kDefaultAlignment); // because that's what new gives us
+ // This block stays separate from the rest of the world; in particular
+ // we don't update last_alloc_ so you can't reclaim space on this block.
+ return AllocNewBlock(size)->mem;
+ }
+
+ const size_t overage =
+ (reinterpret_cast<uintptr_t>(freestart_) & (align-1));
+ if (overage) {
+ const size_t waste = align - overage;
+ freestart_ += waste;
+ if (waste < remaining_) {
+ remaining_ -= waste;
+ } else {
+ remaining_ = 0;
+ }
+ }
+ if (size > remaining_) {
+ MakeNewBlock();
+ }
+ remaining_ -= size;
+ last_alloc_ = freestart_;
+ freestart_ += size;
+ assert(0 == (reinterpret_cast<uintptr_t>(last_alloc_) & (align-1)));
+ return reinterpret_cast<void*>(last_alloc_);
+}
+
+// ----------------------------------------------------------------------
+// BaseArena::ReturnMemoryFallback()
+// BaseArena::FreeBlocks()
+// Unlike GetMemory(), which does actual work, ReturnMemory() is a
+// no-op: we don't "free" memory until Reset() is called. We do
+// update some stats, though. Note we do no checking that the
+// pointer you pass in was actually allocated by us, or that it
+// was allocated for the size you say, so be careful here!
+// FreeBlocks() does the work for Reset(), actually freeing all
+// memory allocated in one fell swoop.
+// ----------------------------------------------------------------------
+
+void BaseArena::FreeBlocks() {
+ for ( int i = 1; i < blocks_alloced_; ++i ) { // keep first block alloced
+ free(first_blocks_[i].mem);
+ first_blocks_[i].mem = NULL;
+ first_blocks_[i].size = 0;
+ }
+ blocks_alloced_ = 1;
+ if (overflow_blocks_ != NULL) {
+ vector<AllocatedBlock>::iterator it;
+ for (it = overflow_blocks_->begin(); it != overflow_blocks_->end(); ++it) {
+ free(it->mem);
+ }
+ delete overflow_blocks_; // These should be used very rarely
+ overflow_blocks_ = NULL;
+ }
+}
+
+// ----------------------------------------------------------------------
+// BaseArena::AdjustLastAlloc()
+// If you realize you didn't want your last alloc to be for
+// the size you asked, after all, you can fix it by calling
+// this. We'll grow or shrink the last-alloc region if we
+// can (we can always shrink, but we might not be able to
+// grow if you want to grow too big.
+// RETURNS true if we successfully modified the last-alloc
+// region, false if the pointer you passed in wasn't actually
+// the last alloc or if you tried to grow bigger than we could.
+// ----------------------------------------------------------------------
+
+bool BaseArena::AdjustLastAlloc(void *last_alloc, const size_t newsize) {
+ // It's only legal to call this on the last thing you alloced.
+ if (last_alloc == NULL || last_alloc != last_alloc_) return false;
+ // last_alloc_ should never point into a "big" block, w/ size >= block_size_
+ assert(freestart_ >= last_alloc_ && freestart_ <= last_alloc_ + block_size_);
+ assert(remaining_ >= 0); // should be: it's a size_t!
+ if (newsize > (freestart_ - last_alloc_) + remaining_)
+ return false; // not enough room, even after we get back last_alloc_ space
+ const char* old_freestart = freestart_; // where last alloc used to end
+ freestart_ = last_alloc_ + newsize; // where last alloc ends now
+ remaining_ -= (freestart_ - old_freestart); // how much new space we've taken
+ return true;
+}
+
+// ----------------------------------------------------------------------
+// BaseArena::GetMemoryWithHandle()
+// First, memory is allocated using GetMemory, using handle_alignment_.
+// Since using different alignments for different handles would make
+// the handles incompatible (e.g., we could end up with the same handle
+// value referencing two different allocations, the alignment is not passed
+// as an argument to GetMemoryWithHandle, and handle_alignment_ is used
+// automatically for all GetMemoryWithHandle calls.
+// Then we go about building a handle to reference the allocated memory.
+// The block index used for the allocation, along with the offset inside
+// the block, are encoded into the handle as follows:
+// (block_index*block_size)+offset
+// offset is simply the difference between the pointer returned by
+// GetMemory and the starting pointer of the block.
+// The above value is then divided by the alignment. As we know that
+// both offset and the block_size are divisable by the alignment (this is
+// enforced by set_handle_alignment() for block_size, and by GetMemory()
+// for the offset), this does not lose any information, but allows to cram
+// more into the limited space in handle.
+// If the result does not fit into an unsigned 32-bit integer, we
+// have run out of space that the handle can represent, and return
+// an invalid handle. Note that the returned pointer is still usable,
+// but this allocation cannot be referenced by a handle.
+// ----------------------------------------------------------------------
+
+void* BaseArena::GetMemoryWithHandle(
+ const size_t size, BaseArena::Handle* handle) {
+ CHECK(handle != NULL);
+ // For efficiency, handles are always allocated aligned to a power of 2.
+ void* p = GetMemory(size, (1 << handle_alignment_bits_));
+ // Find the index of the block the memory was allocated from. In most
+ // cases, this will be the last block, so the following loop will
+ // iterate exactly once.
+ int block_index;
+ const AllocatedBlock* block = NULL;
+ for (block_index = block_count() - 1; block_index >= 0; --block_index) {
+ block = IndexToBlock(block_index);
+ if ((p >= block->mem) && (p < (block->mem + block->size))) {
+ break;
+ }
+ }
+ CHECK_GE(block_index, 0) << "Failed to find block that was allocated from";
+ CHECK(block != NULL) << "Failed to find block that was allocated from";
+ const uint64 offset = reinterpret_cast<char*>(p) - block->mem;
+ DCHECK_LT(offset, block_size_);
+ DCHECK((offset & ((1 << handle_alignment_bits_) - 1)) == 0);
+ DCHECK((block_size_ & ((1 << handle_alignment_bits_) - 1)) == 0);
+ uint64 handle_value =
+ ((static_cast<uint64>(block_index) << block_size_bits_) + offset) >>
+ handle_alignment_bits_;
+ if (handle_value >= static_cast<uint64>(0xFFFFFFFF)) {
+ // We ran out of space to be able to return a handle, so return an invalid
+ // handle.
+ handle_value = Handle::kInvalidValue;
+ }
+ handle->handle_ = static_cast<uint32>(handle_value);
+ return p;
+}
+
+// ----------------------------------------------------------------------
+// BaseArena::set_handle_alignment()
+// Set the alignment to be used when Handles are requested. This can only
+// be set for an arena that is empty - it cannot be changed on the fly.
+// The alignment must be a power of 2 that the block size is divisable by.
+// The default alignment is 1.
+// Trying to set an alignment that does not meet the above constraints will
+// cause a CHECK-failure.
+// ----------------------------------------------------------------------
+
+void BaseArena::set_handle_alignment(int align) {
+ CHECK(align > 0 && 0 == (align & (align - 1))); // must be power of 2
+ CHECK(static_cast<size_t>(align) < block_size_);
+ CHECK((block_size_ % align) == 0);
+ CHECK(is_empty());
+ handle_alignment_ = align;
+ handle_alignment_bits_ = 0;
+ while ((1 << handle_alignment_bits_) < handle_alignment_) {
+ ++handle_alignment_bits_;
+ }
+}
+
+// ----------------------------------------------------------------------
+// BaseArena::HandleToPointer()
+// First, the handle value needs to gain back the alignment factor that
+// was divided out of it by GetMemoryWithHandle. Once this is done, it
+// becomes trivial to extract the block index and offset in the block out
+// of it, and calculate the pointer.
+// ----------------------------------------------------------------------
+
+void* BaseArena::HandleToPointer(const Handle& h) const {
+ CHECK(h.valid());
+ uint64 handle = static_cast<uint64>(h.handle_) << handle_alignment_bits_;
+ int block_index = static_cast<int>(handle >> block_size_bits_);
+ size_t block_offset =
+ static_cast<size_t>(handle & ((1 << block_size_bits_) - 1));
+ const AllocatedBlock* block = IndexToBlock(block_index);
+ CHECK(block != NULL);
+ return reinterpret_cast<void*>(block->mem + block_offset);
+}
+
+
+// ----------------------------------------------------------------------
+// UnsafeArena::Realloc()
+// SafeArena::Realloc()
+// If you decide you want to grow -- or shrink -- a memory region,
+// we'll do it for you here. Typically this will involve copying
+// the existing memory to somewhere else on the arena that has
+// more space reserved. But if you're reallocing the last-allocated
+// block, we may be able to accomodate you just by updating a
+// pointer. In any case, we return a pointer to the new memory
+// location, which may be the same as the pointer you passed in.
+// Here's an example of how you might use Realloc():
+//
+// compr_buf = arena->Alloc(uncompr_size); // get too-much space
+// int compr_size;
+// zlib.Compress(uncompr_buf, uncompr_size, compr_buf, &compr_size);
+// compr_buf = arena->Realloc(compr_buf, uncompr_size, compr_size);
+// ----------------------------------------------------------------------
+
+char* UnsafeArena::Realloc(char* s, size_t oldsize, size_t newsize) {
+ assert(oldsize >= 0 && newsize >= 0);
+ if ( AdjustLastAlloc(s, newsize) ) // in case s was last alloc
+ return s;
+ if ( newsize <= oldsize ) {
+ return s; // no need to do anything; we're ain't reclaiming any memory!
+ }
+ char * newstr = Alloc(newsize);
+ memcpy(newstr, s, min(oldsize, newsize));
+ return newstr;
+}
+
+char* SafeArena::Realloc(char* s, size_t oldsize, size_t newsize) {
+ assert(oldsize >= 0 && newsize >= 0);
+ { MutexLock lock(&mutex_);
+ if ( AdjustLastAlloc(s, newsize) ) // in case s was last alloc
+ return s;
+ }
+ if ( newsize <= oldsize ) {
+ return s; // no need to do anything; we're ain't reclaiming any memory!
+ }
+ char * newstr = Alloc(newsize);
+ memcpy(newstr, s, min(oldsize, newsize));
+ return newstr;
+}
+
+}
diff --git a/src/base/arena.h b/src/base/arena.h
new file mode 100644
index 0000000..049a6b5
--- /dev/null
+++ b/src/base/arena.h
@@ -0,0 +1,698 @@
+// Copyright (c) 2000, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ---
+//
+// Reorganized by Craig Silverstein
+// "Handles" by Ilan Horn
+//
+// Sometimes it is necessary to allocate a large number of small
+// objects. Doing this the usual way (malloc, new) is slow,
+// especially for multithreaded programs. A BaseArena provides a
+// mark/release method of memory management: it asks for a large chunk
+// from the operating system and doles it out bit by bit as required.
+// Then you free all the memory at once by calling BaseArena::Reset().
+//
+// Use SafeArena for multi-threaded programs where multiple threads
+// could access the same arena at once. Use UnsafeArena otherwise.
+// Usually you'll want UnsafeArena.
+//
+// There are four ways to use the arena. arena.h and arena.cc are
+// sufficient for the MALLOC and STRINGS uses. For NEW and STL you'll
+// also need to include arena-inl.h in the appropriate .cc file.
+// However, we do *declare* (but not define) the template types here.
+//
+// LIKE MALLOC: --Uses UnsafeArena (or SafeArena)--
+// This is the simplest way. Just create an arena, and whenever you
+// need a block of memory to put something in, call BaseArena::Alloc(). eg
+// s = arena.Alloc(100);
+// snprintf(s, 100, "%s:%d", host, port);
+// arena.Shrink(strlen(s)+1); // optional; see below for use
+//
+// You'll probably use the convenience routines more often:
+// s = arena.Strdup(host); // a copy of host lives in the arena
+// s = arena.Strndup(host, 100); // we guarantee to NUL-terminate!
+// s = arena.Memdup(protobuf, sizeof(protobuf);
+//
+// If you go the Alloc() route, you'll probably allocate too-much-space.
+// You can reclaim the extra space by calling Shrink() before the next
+// Alloc() (or Strdup(), or whatever), with the #bytes you actually used.
+// If you use this method, memory management is easy: just call Alloc()
+// and friends a lot, and call Reset() when you're done with the data.
+//
+// FOR STRINGS: --Uses UnsafeArena (or SafeArena)--
+// This is a special case of STL (below), but is simpler. Use an
+// astring, which acts like a string but allocates from the passed-in
+// arena:
+// astring s(arena); // or "sastring" to use a SafeArena
+// s.assign(host);
+// astring s2(host, hostlen, arena);
+//
+// WITH NEW: --Uses BaseArena, Gladiator (or ArenaOnlyGladiator)--
+// Use this to allocate a C++ class object (or any other object you
+// have to get via new/delete rather than malloc/free).
+// There are several things you have to do in this case:
+// 1) Your class (the one you new) must inherit from Gladiator.
+// 2) To actually allocate this class on the arena, use
+// myclass = new (AllocateInArena, arena) MyClass(constructor, args)
+//
+// Note that MyClass doesn't need to have the arena passed in.
+// But if it, in turn, wants to call "new" on some of its member
+// variables, and you want those member vars to be on the arena
+// too, you better pass in an arena so it can call new(0,arena).
+//
+// If you can guarantee that everyone who ever calls new on
+// MyClass uses the new(0,arena) form (ie nobody ever just says
+// new), you can have MyClass subclass from ArenaOnlyGladiator
+// rather than from Gladiator. ArenaOnlyGladiator is a bit more
+// efficient (faster and smaller), but is otherwise identical.
+//
+// If you allocate myclass using new(0,arena), and MyClass only
+// does memory management in the destructor, it's not necessary
+// to even call "delete myclass;", you can just call arena.Reset();
+// If the destructor does something else (closes a file, logs
+// a message, whatever), you'll have to call destructor and Reset()
+// both: "delete myclass; arena.Reset();"
+//
+// Note that you can not allocate an array of classes this way:
+// noway = new (AllocateInArena, arena) MyClass[5]; // not supported!
+// It's not difficult to program, we just haven't done it. Arrays
+// are typically big and so there's little point to arena-izing them.
+//
+// WITH NEW: --Uses UnsafeArena--
+// There are cases where you can't inherit the class from Gladiator,
+// or inheriting would be too expensive. Examples of this include
+// plain-old-data (allocated using new) and third-party classes (such
+// as STL containers). arena-inl.h provides a global operator new
+// that can be used as follows:
+//
+// #include "base/arena-inl.h"
+//
+// UnsafeArena arena(1000);
+// Foo* foo = new (AllocateInArena, &arena) Foo;
+// Foo* foo_array = new (AllocateInArena, &arena) Foo[10];
+//
+// IN STL: --Uses BaseArena, ArenaAllocator--
+// All STL containers (vector, hash_map, etc) take an allocator.
+// You can use the arena as an allocator. Then whenever the vector
+// (or whatever) wants to allocate memory, it will take it from the
+// arena. To use, you just indicate in the type that you want to use the
+// arena, and then actually give a pointer to the arena as the last
+// constructor arg:
+// vector<int, ArenaAllocator<int, UnsafeArena> > v(&arena);
+// v.push_back(3);
+//
+// WARNING: Careless use of STL within an arena-allocated object can
+// result in memory leaks if you rely on arena.Reset() to free
+// memory and do not call the object destructor. This is actually
+// a subclass of a more general hazard: If an arena-allocated
+// object creates (and owns) objects that are not also
+// arena-allocated, then the creating object must have a
+// destructor that deletes them, or they will not be deleted.
+// However, since the outer object is arena allocated, it's easy to
+// forget to call delete on it, and needing to do so may seem to
+// negate much of the benefit of arena allocation. A specific
+// example is use of vector<string> in an arena-allocated object,
+// since type string is not atomic and is always allocated by the
+// default runtime allocator. The arena definition provided here
+// allows for much flexibility, but you ought to carefully consider
+// before defining arena-allocated objects which in turn create
+// non-arena allocated objects.
+//
+// WITH HANDLES:
+// The various arena classes can supply compact handles to data kept
+// in the arena. These handles consume only 4 bytes each, and are thus
+// more efficient than pointers - this may be interesting in cases
+// where a very large number of references to memory in the arena need
+// to be kept.
+// Note that handles are limited in the amount of data that can be reference
+// in the arena, typically to 4GB*the number given to set_handle_alignment()
+// (which defaults to 1). The number of allocations that can have handles
+// is, of course, smaller than 4G (that's what's representable by 32 bits).
+// It does depend on their sizes, however. In a worst-case scenario each
+// allocation consumes a page of its own, and we will run out of handles
+// after approximately (4G/block_size)*handle_alignment allocations.
+// When we run out of handles or allocate data over the amount of memory
+// that handles can reference, an invalid handle will be returned (but
+// the requested memory will still be allocated in the arena).
+// Handles memory use is most efficient when the arena block size is a power
+// of two. When this is not the case, we can run out of handles when at
+// most half of the addressable space (as described above) is not in use.
+// At worst handles can reference at least 2GB*handle_alignment.
+// Example use:
+// UnsafeArena arena(16384);
+// arena.set_handle_alignment(4);
+// // Assume you want to keep the string s in the arena.
+// Handle h = arena.MemdupWithHandle(s.c_str(), s.length());
+// // Later, to get the memory from the handle, use:
+// void* p = arena.HandleToPointer(h);
+// // Note that there's no way to retrieve the size from the handle.
+// // It probably makes sense to encode the size into the buffer saved,
+// // unless the size is known/fixed.
+// Internal machinery of handles:
+// The handle consists of the block index in the arena and the offset
+// inside the block, encoded into a single unsigned uint32 value.
+// Note that, the rightmost alignment bits (controlled by
+// set_handle_alignment()) are shaved off the saved offset in the Handle,
+// to give some extra capacity :)
+// set_handle_alignment() can only be called when the arena is empty,
+// as changing it invalidates any handles that are still in flight.
+//
+//
+// PUTTING IT ALL TOGETHER
+// Here's a program that uses all of the above. Note almost all the
+// examples are the various ways to use "new" and STL. Using the
+// malloc-like features and the string type are much easier!
+//
+// Class A : public Gladiator {
+// public:
+// int i;
+// vector<int> v1;
+// vector<int, ArenaAllocator<int, UnsafeArena> >* v3;
+// vector<int, ArenaAllocator<int, UnsafeArena> >* v4;
+// vector<int>* v5;
+// vector<string> vs;
+// vector<astring> va;
+// char *s;
+// A() : v1(), v3(NULL), v4(NULL), vs(), va(), s(NULL) {
+// // v1 is allocated on the arena whenever A is. Its ints never are.
+// v5 = new vector<int>;
+// // v5 is not allocated on the arena, and neither are any of its ints.
+// }
+// ~A() {
+// delete v5; // needed since v5 wasn't allocated on the arena
+// printf("I'm done!\n");
+// }
+// };
+//
+// class B : public A { // we inherit from Gladiator, but indirectly
+// public:
+// UnsafeArena* arena_;
+// vector<int, ArenaAllocator<int, UnsafeArena> > v2;
+// vector<A> va1;
+// vector<A, ArenaAllocator<A, UnsafeArena> > va2;
+// vector<A>* pva;
+// vector<A, ArenaAllocator<A, UnsafeArena> >* pva2;
+// astring a;
+//
+// B(UnsafeArena * arena)
+// : arena_(arena), v2(arena_), va1(), va2(arena_), a("initval", arena_) {
+// v3 = new vector<int, ArenaAllocator<int, UnsafeArena> >(arena_);
+// v4 = new (AllocateInArena, arena_) vector<int, ArenaAllocator<int, UnsafeArena> >(arena_);
+// v5 = new (AllocateInArena, arena_) vector<int>;
+// // v2 is allocated on the arena whenever B is. Its ints always are.
+// // v3 is not allocated on the arena, but the ints you give it are
+// // v4 is allocated on the arena, and so are the ints you give it
+// // v5 is allocated on the arena, but the ints you give it are not
+// // va1 is allocated on the arena whenever B is. No A ever is.
+// // va2 is allocated on the arena whenever B is. Its A's always are.
+// pva = new (AllocateInArena, arena_) vector<A>;
+// pva2 = new (AllocateInArena, arena_) vector<A, ArenaAllocator<A, UnsafeArena> >(arena_);
+// // pva is allocated on the arena, but its A's are not
+// // pva2 is allocated on the arena, and so are its A's.
+// // a's value "initval" is stored on the arena. If we reassign a,
+// // the new value will be stored on the arena too.
+// }
+// ~B() {
+// delete v3; // necessary to free v3's memory, though not its ints'
+// // don't need to delete v4: arena_.Reset() will do as good
+// delete v5; // necessary to free v5's ints memory, though not v5 itself
+// delete pva; // necessary to make sure you reclaim space used by A's
+// delete pva2; // safe to call this; needed if you want to see the printfs
+// // pva2->clear() -- not necessary, arena_.Reset() will do just as good
+// }
+// };
+//
+// main() {
+// UnsafeArena arena(1000);
+// A a1; // a1 is not on the arena
+// a1.vs.push_back(string("hello")); // hello is not copied onto the arena
+// a1.va.push_back(astring("hello", &arena)); // hello is on the arena,
+// // astring container isn't
+// a1.s = arena.Strdup("hello"); // hello is on the arena
+//
+// A* a2 = new (AllocateInArena, arena) A; // a2 is on the arena
+// a2.vs.push_back(string("hello")); // hello is *still* not on the arena
+// a2.s = arena.Strdup("world"); // world is on the arena. a1.s is ok
+//
+// B b1(&arena); // B is not allocated on the arena
+// b1.a.assign("hello"); // hello is on the arena
+// b1.pva2.push_back(a1); // our copy of a1 will be stored on
+// // the arena, though a1 itself wasn't
+// arena.Reset(); // all done with our memory!
+// }
+
+#ifndef BASE_ARENA_H_
+#define BASE_ARENA_H_
+
+#include <config.h>
+#include "base/mutex.h" // must go first to get _XOPEN_SOURCE
+#include <assert.h>
+#include <string.h>
+#include <vector>
+#include "base/thread_annotations.h"
+#include "base/macros.h" // for uint32
+#include "base/util.h" // for CHECK, etc
+
+namespace ctemplate {
+
+// Annoying stuff for windows -- make sure clients (in this case
+// unittests) can import the class definitions and variables.
+#ifndef CTEMPLATE_DLL_DECL
+# ifdef _MSC_VER
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+# else
+# define CTEMPLATE_DLL_DECL /* should be the empty string for non-windows */
+# endif
+#endif
+
+// This class is "thread-compatible": different threads can access the
+// arena at the same time without locking, as long as they use only
+// const methods.
+class CTEMPLATE_DLL_DECL BaseArena {
+ protected: // You can't make an arena directly; only a subclass of one
+ BaseArena(char* first_block, const size_t block_size, bool align_to_page);
+ public:
+ virtual ~BaseArena();
+
+ virtual void Reset();
+
+ // A handle to a pointer in an arena. An opaque type, with default
+ // copy and assignment semantics.
+ class Handle {
+ public:
+ static const uint32 kInvalidValue = 0xFFFFFFFF; // int32-max
+
+ Handle() : handle_(kInvalidValue) { }
+ // Default copy constructors are fine here.
+ bool operator==(const Handle& h) const { return handle_ == h.handle_; }
+ bool operator!=(const Handle& h) const { return handle_ != h.handle_; }
+
+ uint32 hash() const { return handle_; }
+ bool valid() const { return handle_ != kInvalidValue; }
+
+ private:
+ // Arena needs to be able to access the internal data.
+ friend class BaseArena;
+
+ explicit Handle(uint32 handle) : handle_(handle) { }
+
+ uint32 handle_;
+ };
+
+ // they're "slow" only 'cause they're virtual (subclasses define "fast" ones)
+ virtual char* SlowAlloc(size_t size) = 0;
+ virtual void SlowFree(void* memory, size_t size) = 0;
+ virtual char* SlowRealloc(char* memory, size_t old_size, size_t new_size) = 0;
+ virtual char* SlowAllocWithHandle(const size_t size, Handle* handle) = 0;
+
+ // Set the alignment to be used when Handles are requested. This can only
+ // be set for an arena that is empty - it cannot be changed on the fly.
+ // The alignment must be a power of 2 that the block size is divisable by.
+ // The default alignment is 1.
+ // Trying to set an alignment that does not meet the above constraints will
+ // cause a CHECK-failure.
+ void set_handle_alignment(int align);
+
+ // Retrieve the memory pointer that the supplied handle refers to.
+ // Calling this with an invalid handle will CHECK-fail.
+ void* HandleToPointer(const Handle& h) const;
+
+
+ class Status {
+ private:
+ friend class BaseArena;
+ size_t bytes_allocated_;
+ public:
+ Status() : bytes_allocated_(0) { }
+ size_t bytes_allocated() const {
+ return bytes_allocated_;
+ }
+ };
+
+ // Accessors and stats counters
+ // This accessor isn't so useful here, but is included so we can be
+ // type-compatible with ArenaAllocator (in arena-inl.h). That is,
+ // we define arena() because ArenaAllocator does, and that way you
+ // can template on either of these and know it's safe to call arena().
+ virtual BaseArena* arena() { return this; }
+ size_t block_size() const { return block_size_; }
+ int block_count() const;
+ bool is_empty() const {
+ // must check block count in case we allocated a block larger than blksize
+ return freestart_ == freestart_when_empty_ && 1 == block_count();
+ }
+
+ // This should be the worst-case alignment for any type. This is
+ // good for IA-32, SPARC version 7 (the last one I know), and
+ // supposedly Alpha. i386 would be more time-efficient with a
+ // default alignment of 8, but ::operator new() uses alignment of 4,
+ // and an assertion will fail below after the call to MakeNewBlock()
+ // if you try to use a larger alignment.
+#ifdef __i386__
+ static const int kDefaultAlignment = 4;
+#else
+ static const int kDefaultAlignment = 8;
+#endif
+
+ protected:
+ void MakeNewBlock();
+ void* GetMemoryFallback(const size_t size, const int align);
+ void* GetMemory(const size_t size, const int align) {
+ assert(remaining_ <= block_size_); // an invariant
+ if ( size > 0 && size < remaining_ && align == 1 ) { // common case
+ last_alloc_ = freestart_;
+ freestart_ += size;
+ remaining_ -= size;
+ return reinterpret_cast<void*>(last_alloc_);
+ }
+ return GetMemoryFallback(size, align);
+ }
+
+ // This doesn't actually free any memory except for the last piece allocated
+ void ReturnMemory(void* memory, const size_t size) {
+ if ( memory == last_alloc_ && size == freestart_ - last_alloc_ ) {
+ remaining_ += size;
+ freestart_ = last_alloc_;
+ }
+ }
+
+ // This is used by Realloc() -- usually we Realloc just by copying to a
+ // bigger space, but for the last alloc we can realloc by growing the region.
+ bool AdjustLastAlloc(void* last_alloc, const size_t newsize);
+
+ // Since using different alignments for different handles would make
+ // the handles incompatible (e.g., we could end up with the same handle
+ // value referencing two different allocations, the alignment is not passed
+ // as an argument to GetMemoryWithHandle, and handle_alignment_ is used
+ // automatically for all GetMemoryWithHandle calls.
+ void* GetMemoryWithHandle(const size_t size, Handle* handle);
+
+ Status status_;
+ size_t remaining_;
+
+ private:
+ struct AllocatedBlock {
+ char *mem;
+ size_t size;
+ };
+
+ // The returned AllocatedBlock* is valid until the next call to AllocNewBlock
+ // or Reset (i.e. anything that might affect overflow_blocks_).
+ AllocatedBlock *AllocNewBlock(const size_t block_size);
+
+ const AllocatedBlock *IndexToBlock(int index) const;
+
+ const int first_block_we_own_; // 1 if they pass in 1st block, 0 else
+ const size_t block_size_;
+ char* freestart_; // beginning of the free space in most recent block
+ char* freestart_when_empty_; // beginning of the free space when we're empty
+ char* last_alloc_; // used to make sure ReturnBytes() is safe
+ // STL vector isn't as efficient as it could be, so we use an array at first
+ int blocks_alloced_; // how many of the first_blocks_ have been alloced
+ AllocatedBlock first_blocks_[16]; // the length of this array is arbitrary
+ // if the first_blocks_ aren't enough, expand into overflow_blocks_.
+ std::vector<AllocatedBlock>* overflow_blocks_;
+ const bool page_aligned_; // when true, all blocks need to be page aligned
+ int handle_alignment_; // Alignment to be used when Handles are requested.
+ int handle_alignment_bits_; // log2(handle_alignment_).
+ // The amount of bits required to keep block_size_ (ceil(log2(block_size_))).
+ size_t block_size_bits_;
+
+ void FreeBlocks(); // Frees all except first block
+
+ // This subclass needs to alter permissions for all allocated blocks.
+ friend class ProtectableUnsafeArena;
+
+ DISALLOW_COPY_AND_ASSIGN(BaseArena);
+};
+
+class CTEMPLATE_DLL_DECL UnsafeArena : public BaseArena {
+ public:
+ // Allocates a thread-compatible arena with the specified block size.
+ explicit UnsafeArena(const size_t block_size)
+ : BaseArena(NULL, block_size, false) { }
+ UnsafeArena(const size_t block_size, bool align)
+ : BaseArena(NULL, block_size, align) { }
+
+ // Allocates a thread-compatible arena with the specified block
+ // size. "first_block" must have size "block_size". Memory is
+ // allocated from "first_block" until it is exhausted; after that
+ // memory is allocated by allocating new blocks from the heap.
+ UnsafeArena(char* first_block, const size_t block_size)
+ : BaseArena(first_block, block_size, false) { }
+ UnsafeArena(char* first_block, const size_t block_size, bool align)
+ : BaseArena(first_block, block_size, align) { }
+
+ char* Alloc(const size_t size) {
+ return reinterpret_cast<char*>(GetMemory(size, 1));
+ }
+ void* AllocAligned(const size_t size, const int align) {
+ return GetMemory(size, align);
+ }
+ char* Calloc(const size_t size) {
+ void* return_value = Alloc(size);
+ memset(return_value, 0, size);
+ return reinterpret_cast<char*>(return_value);
+ }
+ void* CallocAligned(const size_t size, const int align) {
+ void* return_value = AllocAligned(size, align);
+ memset(return_value, 0, size);
+ return return_value;
+ }
+ // Free does nothing except for the last piece allocated.
+ void Free(void* memory, size_t size) {
+ ReturnMemory(memory, size);
+ }
+ typedef BaseArena::Handle Handle;
+ char* AllocWithHandle(const size_t size, Handle* handle) {
+ return reinterpret_cast<char*>(GetMemoryWithHandle(size, handle));
+ }
+ virtual char* SlowAlloc(size_t size) { // "slow" 'cause it's virtual
+ return Alloc(size);
+ }
+ virtual void SlowFree(void* memory, size_t size) { // "slow" 'cause it's virt
+ Free(memory, size);
+ }
+ virtual char* SlowRealloc(char* memory, size_t old_size, size_t new_size) {
+ return Realloc(memory, old_size, new_size);
+ }
+ virtual char* SlowAllocWithHandle(const size_t size, Handle* handle) {
+ return AllocWithHandle(size, handle);
+ }
+
+ char* Memdup(const char* s, size_t bytes) {
+ char* newstr = Alloc(bytes);
+ memcpy(newstr, s, bytes);
+ return newstr;
+ }
+ char* MemdupPlusNUL(const char* s, size_t bytes) { // like "string(s, len)"
+ char* newstr = Alloc(bytes+1);
+ memcpy(newstr, s, bytes);
+ newstr[bytes] = '\0';
+ return newstr;
+ }
+ Handle MemdupWithHandle(const char* s, size_t bytes) {
+ Handle handle;
+ char* newstr = AllocWithHandle(bytes, &handle);
+ memcpy(newstr, s, bytes);
+ return handle;
+ }
+ char* Strdup(const char* s) {
+ return Memdup(s, strlen(s) + 1);
+ }
+ // Unlike libc's strncpy, I always NUL-terminate. libc's semantics are dumb.
+ // This will allocate at most n+1 bytes (+1 is for the NULL terminator).
+ char* Strndup(const char* s, size_t n) {
+ // Use memchr so we don't walk past n.
+ // We can't use the one in //strings since this is the base library,
+ // so we have to reinterpret_cast from the libc void *.
+ const char* eos = reinterpret_cast<const char*>(memchr(s, '\0', n));
+ // if no null terminator found, use full n
+ const size_t bytes = (eos == NULL) ? n + 1 : eos - s + 1;
+ char* ret = Memdup(s, bytes);
+ ret[bytes-1] = '\0'; // make sure the string is NUL-terminated
+ return ret;
+ }
+
+ // You can realloc a previously-allocated string either bigger or smaller.
+ // We can be more efficient if you realloc a string right after you allocate
+ // it (eg allocate way-too-much space, fill it, realloc to just-big-enough)
+ char* Realloc(char* s, size_t oldsize, size_t newsize);
+ // If you know the new size is smaller (or equal), you don't need to know
+ // oldsize. We don't check that newsize is smaller, so you'd better be sure!
+ char* Shrink(char* s, size_t newsize) {
+ AdjustLastAlloc(s, newsize); // reclaim space if we can
+ return s; // never need to move if we go smaller
+ }
+
+ // We make a copy so you can keep track of status at a given point in time
+ Status status() const { return status_; }
+
+ // Number of bytes remaining before the arena has to allocate another block.
+ size_t bytes_until_next_allocation() const { return remaining_; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(UnsafeArena);
+};
+
+
+
+// we inherit from BaseArena instead of UnsafeArena so that we don't need
+// virtual methods for allocation/deallocation. This means, however,
+// I have to copy the definitions of strdup, strndup, etc. :-(
+
+class CTEMPLATE_DLL_DECL SafeArena : public BaseArena {
+ public:
+ // Allocates a thread-safe arena with the specified block size.
+ explicit SafeArena(const size_t block_size)
+ : BaseArena(NULL, block_size, false) { }
+
+ // Allocates a thread-safe arena with the specified block size.
+ // "first_block" must have size "block_size". Memory is allocated
+ // from "first_block" until it is exhausted; after that memory is
+ // allocated by allocating new blocks from the heap.
+ SafeArena(char* first_block, const size_t block_size)
+ : BaseArena(first_block, block_size, false) { }
+
+ virtual void Reset() LOCKS_EXCLUDED(mutex_) {
+ MutexLock lock(&mutex_); // in case two threads Reset() at same time
+ BaseArena::Reset();
+ }
+
+ char* Alloc(const size_t size) LOCKS_EXCLUDED(mutex_) {
+ MutexLock lock(&mutex_);
+ return reinterpret_cast<char*>(GetMemory(size, 1));
+ }
+ void* AllocAligned(const size_t size, const int align)
+ LOCKS_EXCLUDED(mutex_) {
+ MutexLock lock(&mutex_);
+ return GetMemory(size, align);
+ }
+ char* Calloc(const size_t size) {
+ void* return_value = Alloc(size);
+ memset(return_value, 0, size);
+ return reinterpret_cast<char*>(return_value);
+ }
+ void* CallocAligned(const size_t size, const int align) {
+ void* return_value = AllocAligned(size, align);
+ memset(return_value, 0, size);
+ return return_value;
+ }
+ // Free does nothing except for the last piece allocated.
+ void Free(void* memory, size_t size) LOCKS_EXCLUDED(mutex_) {
+ MutexLock lock(&mutex_);
+ ReturnMemory(memory, size);
+ }
+ typedef BaseArena::Handle Handle;
+ char* AllocWithHandle(const size_t size, Handle* handle)
+ LOCKS_EXCLUDED(mutex_) {
+ MutexLock lock(&mutex_);
+ return reinterpret_cast<char*>(GetMemoryWithHandle(size, handle));
+ }
+ virtual char* SlowAlloc(size_t size) { // "slow" 'cause it's virtual
+ return Alloc(size);
+ }
+ virtual void SlowFree(void* memory, size_t size) { // "slow" 'cause it's virt
+ Free(memory, size);
+ }
+ virtual char* SlowRealloc(char* memory, size_t old_size, size_t new_size) {
+ return Realloc(memory, old_size, new_size);
+ }
+ virtual char* SlowAllocWithHandle(const size_t size, Handle* handle) {
+ return AllocWithHandle(size, handle);
+ }
+
+ char* Memdup(const char* s, size_t bytes) {
+ char* newstr = Alloc(bytes);
+ memcpy(newstr, s, bytes);
+ return newstr;
+ }
+ char* MemdupPlusNUL(const char* s, size_t bytes) { // like "string(s, len)"
+ char* newstr = Alloc(bytes+1);
+ memcpy(newstr, s, bytes);
+ newstr[bytes] = '\0';
+ return newstr;
+ }
+ Handle MemdupWithHandle(const char* s, size_t bytes) {
+ Handle handle;
+ char* newstr = AllocWithHandle(bytes, &handle);
+ memcpy(newstr, s, bytes);
+ return handle;
+ }
+ char* Strdup(const char* s) {
+ return Memdup(s, strlen(s) + 1);
+ }
+ // Unlike libc's strncpy, I always NUL-terminate. libc's semantics are dumb.
+ // This will allocate at most n+1 bytes (+1 is for the NULL terminator).
+ char* Strndup(const char* s, size_t n) {
+ // Use memchr so we don't walk past n.
+ // We can't use the one in //strings since this is the base library,
+ // so we have to reinterpret_cast from the libc void *.
+ const char* eos = reinterpret_cast<const char*>(memchr(s, '\0', n));
+ // if no null terminator found, use full n
+ const size_t bytes = (eos == NULL) ? n + 1 : eos - s + 1;
+ char* ret = Memdup(s, bytes);
+ ret[bytes-1] = '\0'; // make sure the string is NUL-terminated
+ return ret;
+ }
+
+ // You can realloc a previously-allocated string either bigger or smaller.
+ // We can be more efficient if you realloc a string right after you allocate
+ // it (eg allocate way-too-much space, fill it, realloc to just-big-enough)
+ char* Realloc(char* s, size_t oldsize, size_t newsize)
+ LOCKS_EXCLUDED(mutex_);
+ // If you know the new size is smaller (or equal), you don't need to know
+ // oldsize. We don't check that newsize is smaller, so you'd better be sure!
+ char* Shrink(char* s, size_t newsize) LOCKS_EXCLUDED(mutex_) {
+ MutexLock lock(&mutex_);
+ AdjustLastAlloc(s, newsize); // reclaim space if we can
+ return s; // we never need to move if we go smaller
+ }
+
+ Status status() LOCKS_EXCLUDED(mutex_) {
+ MutexLock lock(&mutex_);
+ return status_;
+ }
+
+ // Number of bytes remaining before the arena has to allocate another block.
+ size_t bytes_until_next_allocation() LOCKS_EXCLUDED(mutex_) {
+ MutexLock lock(&mutex_);
+ return remaining_;
+ }
+
+ protected:
+ Mutex mutex_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(SafeArena);
+};
+
+}
+
+#endif // BASE_ARENA_H_
diff --git a/src/base/fileutil.h b/src/base/fileutil.h
new file mode 100644
index 0000000..4a207bb
--- /dev/null
+++ b/src/base/fileutil.h
@@ -0,0 +1,106 @@
+// Copyright (c) 2011, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// A tiny wrapper around struct stat and FILE*.
+
+#ifndef TEMPLATE_OPENSOURCE_FILEUTIL_H_
+#define TEMPLATE_OPENSOURCE_FILEUTIL_H_
+
+#include <config.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <time.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+#include <string>
+
+namespace ctemplate {
+
+class FileStat {
+ public:
+ time_t mtime;
+ off_t length;
+ bool IsDirectory() { return S_ISDIR(internal_statbuf.st_mode); }
+
+ private:
+ friend class File;
+ struct stat internal_statbuf;
+};
+
+class File {
+ public:
+ static bool Stat(const std::string& filename, FileStat* statbuf) {
+ if (stat(filename.c_str(), &statbuf->internal_statbuf) != 0)
+ return false;
+ statbuf->mtime = statbuf->internal_statbuf.st_mtime;
+ statbuf->length = statbuf->internal_statbuf.st_size;
+ return true;
+ }
+
+ static bool Readable(const char* filename) {
+ return access(filename, R_OK) == 0;
+ }
+
+ static File* Open(const char* filename, const char* mode) {
+ char binary_mode[3];
+ const char* mode_to_use = mode;
+ if ((mode[0] == 'r' || mode[0] == 'w') && mode[1] == '\0') {
+ // We add a 'b' to make sure we do the right thing even on
+ // Windows. On unix, this will be a noop.
+ binary_mode[0] = mode[0];
+ binary_mode[1] = 'b';
+ binary_mode[2] = '\0';
+ mode_to_use = binary_mode;
+ }
+ FILE* fp = fopen(filename, mode_to_use);
+ if (!fp) return NULL;
+ return new File(fp);
+ }
+
+ size_t Read(char* buf, size_t size) {
+ return fread(buf, 1, size, fp_);
+ }
+
+ void Close() {
+ fclose(fp_);
+ delete this; // naughty naughty!
+ }
+
+ private:
+ explicit File(FILE* fp) : fp_(fp) { }
+ FILE* fp_;
+};
+
+}
+
+#endif // TEMPLATE_OPENSOURCE_FILEUTIL_H_
diff --git a/src/base/macros.h b/src/base/macros.h
new file mode 100644
index 0000000..9d0327c
--- /dev/null
+++ b/src/base/macros.h
@@ -0,0 +1,114 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// Provides macros and typedefs based on config.h settings.
+// Provides the following macros:
+// UNALIGNED_LOAD32 (may be an inline function on some architectures)
+// and the following typedefs:
+// uint32
+// uint64
+
+#ifndef CTEMPLATE_MACROS_H_
+#define CTEMPLATE_MACROS_H_
+
+#include <config.h>
+#ifdef HAVE_STDINT_H
+#include <stdint.h> // the normal place uint32_t is defined
+#endif
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h> // the normal place u_int32_t is defined
+#endif
+#ifdef HAVE_INTTYPES_H
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif // a third place for uint32_t or u_int32_t
+#endif
+
+#if defined(HAVE_U_INT32_T)
+typedef u_int32_t uint32;
+#elif defined(HAVE_UINT32_T)
+typedef uint32_t uint32;
+#elif defined(HAVE___INT32)
+typedef unsigned __int32 uint32;
+#endif
+
+#if defined(HAVE_U_INT64_T)
+typedef u_int64_t uint64;
+#elif defined(HAVE_UINT64_T)
+typedef uint64_t uint64;
+#elif defined(HAVE___INT64)
+typedef unsigned __int64 uint64;
+#endif
+
+
+// This is all to figure out endian-ness and byte-swapping on various systems
+#if defined(HAVE_ENDIAN_H)
+#include <endian.h> // for the __BYTE_ORDER use below
+#elif defined(HAVE_SYS_ENDIAN_H)
+#include <sys/endian.h> // location on FreeBSD
+#elif defined(HAVE_MACHINE_ENDIAN_H)
+#include <machine/endian.h> // location on OS X
+#endif
+#if defined(HAVE_SYS_BYTEORDER_H)
+#include <sys/byteorder.h> // BSWAP_32 on Solaris 10
+#endif
+#ifdef HAVE_SYS_ISA_DEFS_H
+#include <sys/isa_defs.h> // _BIG_ENDIAN/_LITTLE_ENDIAN on Solaris 10
+#endif
+
+// MurmurHash does a lot of 4-byte unaligned integer access. It
+// interprets these integers in little-endian order. This is perfect
+// on x86, for which this is a natural memory access; for other systems
+// we do what we can to make this as efficient as possible.
+#if defined(HAVE_BYTESWAP_H)
+# include <byteswap.h> // GNU (especially linux)
+# define BSWAP32(x) bswap_32(x)
+#elif defined(HAVE_LIBKERN_OSBYTEORDER_H)
+# include <libkern/OSByteOrder.h> // OS X
+# define BSWAP32(x) OSSwapInt32(x)
+#elif defined(bswap32) // FreeBSD
+ // FreeBSD defines bswap32 as a macro in sys/endian.h (already #included)
+# define BSWAP32(x) bswap32(x)
+#elif defined(BSWAP_32) // Solaris 10
+ // Solaris defines BSWSAP_32 as a macro in sys/byteorder.h (already #included)
+# define BSWAP32(x) BSWAP_32(x)
+#elif !defined(BSWAP32)
+# define BSWAP32(x) ((((x) & 0x000000ff) << 24) | \
+ (((x) & 0x0000ff00) << 8) | \
+ (((x) & 0x00ff0000) >> 8) | \
+ (((x) & 0xff000000) >> 24));
+#else
+# define CTEMPLATE_BSWAP32_ALREADY_DEFINED
+#endif
+
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
+ // We know they allow unaligned memory access and are little-endian
+# define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
+#elif defined(__ppc__) || defined(__ppc64__)
+ // We know they allow unaligned memory access and are big-endian
+# define UNALIGNED_LOAD32(_p) BSWAP32(*reinterpret_cast<const uint32 *>(_p))
+#elif (BYTE_ORDER == 1234) || (_BYTE_ORDER == 1234) || defined(_LITTLE_ENDIAN)
+ // Use memcpy to align the memory properly
+ inline uint32 UNALIGNED_LOAD32(const void *p) {
+ uint32 t;
+ memcpy(&t, p, sizeof(t));
+ return t;
+ }
+#elif (BYTE_ORDER == 4321) || (_BYTE_ORDER == 4321) || defined(_BIG_ENDIAN)
+ inline uint32 UNALIGNED_LOAD32(const void *p) {
+ uint32 t;
+ memcpy(&t, p, sizeof(t));
+ return BSWAP32(t);
+ }
+#else
+ // Means we can't find find endian.h on this machine:
+# error Need to define UNALIGNED_LOAD32 for this architecture
+#endif
+
+#ifndef CTEMPLATE_BSWAP32_ALREADY_DEFINED
+# undef BSWAP32 // don't leak outside this file
+#else
+# undef CTEMPLATE_BSWAP32_ALREADY_DEFINED // just cleaning up
+#endif
+
+#endif // CTEMPLATE_MACROS_H_
diff --git a/src/base/manual_constructor.h b/src/base/manual_constructor.h
new file mode 100644
index 0000000..a5d430c
--- /dev/null
+++ b/src/base/manual_constructor.h
@@ -0,0 +1,237 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ---
+//
+// Author: kenton@google.com (Kenton Varda)
+//
+// ManualConstructor statically-allocates space in which to store some
+// object, but does not initialize it. You can then call the constructor
+// and destructor for the object yourself as you see fit. This is useful
+// for memory management optimizations, where you want to initialize and
+// destroy an object multiple times but only allocate it once.
+//
+// (When I say ManualConstructor statically allocates space, I mean that
+// the ManualConstructor object itself is forced to be the right size.)
+//
+// For example usage, check out util/gtl/small_map.h.
+
+#ifndef UTIL_GTL_MANUAL_CONSTRUCTOR_H_
+#define UTIL_GTL_MANUAL_CONSTRUCTOR_H_
+
+#include <config.h>
+
+namespace ctemplate {
+
+namespace util {
+namespace gtl {
+namespace internal {
+
+//
+// Provides a char array with the exact same alignment as another type. The
+// first parameter must be a complete type, the second parameter is how many
+// of that type to provide space for.
+//
+// UTIL_GTL_ALIGNED_CHAR_ARRAY(struct stat, 16) storage_;
+//
+// Because MSVC and older GCCs require that the argument to their alignment
+// construct to be a literal constant integer, we use a template instantiated
+// at all the possible powers of two.
+#ifndef SWIG
+template<int alignment, int size> struct AlignType { };
+template<int size> struct AlignType<0, size> { typedef char result[size]; };
+#if defined(_MSC_VER)
+#define UTIL_GTL_ALIGN_ATTRIBUTE(X) __declspec(align(X))
+#define UTIL_GTL_ALIGN_OF(T) __alignof(T)
+#elif defined(__GNUC__) || defined(__APPLE__) || defined(__INTEL_COMPILER) \
+ || defined(__nacl__)
+#define UTIL_GTL_ALIGN_ATTRIBUTE(X) __attribute__((aligned(X)))
+#define UTIL_GTL_ALIGN_OF(T) __alignof__(T)
+#endif
+
+#if defined(UTIL_GTL_ALIGN_ATTRIBUTE)
+
+#define UTIL_GTL_ALIGNTYPE_TEMPLATE(X) \
+ template<int size> struct AlignType<X, size> { \
+ typedef UTIL_GTL_ALIGN_ATTRIBUTE(X) char result[size]; \
+ }
+
+UTIL_GTL_ALIGNTYPE_TEMPLATE(1);
+UTIL_GTL_ALIGNTYPE_TEMPLATE(2);
+UTIL_GTL_ALIGNTYPE_TEMPLATE(4);
+UTIL_GTL_ALIGNTYPE_TEMPLATE(8);
+UTIL_GTL_ALIGNTYPE_TEMPLATE(16);
+UTIL_GTL_ALIGNTYPE_TEMPLATE(32);
+UTIL_GTL_ALIGNTYPE_TEMPLATE(64);
+UTIL_GTL_ALIGNTYPE_TEMPLATE(128);
+UTIL_GTL_ALIGNTYPE_TEMPLATE(256);
+UTIL_GTL_ALIGNTYPE_TEMPLATE(512);
+UTIL_GTL_ALIGNTYPE_TEMPLATE(1024);
+UTIL_GTL_ALIGNTYPE_TEMPLATE(2048);
+UTIL_GTL_ALIGNTYPE_TEMPLATE(4096);
+UTIL_GTL_ALIGNTYPE_TEMPLATE(8192);
+// Any larger and MSVC++ will complain.
+
+#define UTIL_GTL_ALIGNED_CHAR_ARRAY(T, Size) \
+ typename util::gtl::internal::AlignType<UTIL_GTL_ALIGN_OF(T), \
+ sizeof(T) * Size>::result
+
+#undef UTIL_GTL_ALIGNTYPE_TEMPLATE
+#undef UTIL_GTL_ALIGN_ATTRIBUTE
+
+#else // defined(UTIL_GTL_ALIGN_ATTRIBUTE)
+#error "You must define UTIL_GTL_ALIGNED_CHAR_ARRAY for your compiler."
+#endif // defined(UTIL_GTL_ALIGN_ATTRIBUTE)
+
+#else // !SWIG
+
+// SWIG can't represent alignment and doesn't care about alignment on data
+// members (it works fine without it).
+template<typename Size>
+struct AlignType { typedef char result[Size]; };
+#define UTIL_GTL_ALIGNED_CHAR_ARRAY(T, Size) \
+ util::gtl::internal::AlignType<Size * sizeof(T)>::result
+
+#endif // !SWIG
+
+} // namespace internal
+} // namespace gtl
+} // namespace util
+
+template <typename Type>
+class ManualConstructor {
+ public:
+ // No constructor or destructor because one of the most useful uses of
+ // this class is as part of a union, and members of a union cannot have
+ // constructors or destructors. And, anyway, the whole point of this
+ // class is to bypass these.
+
+ inline Type* get() {
+ return reinterpret_cast<Type*>(space_);
+ }
+ inline const Type* get() const {
+ return reinterpret_cast<const Type*>(space_);
+ }
+
+ inline Type* operator->() { return get(); }
+ inline const Type* operator->() const { return get(); }
+
+ inline Type& operator*() { return *get(); }
+ inline const Type& operator*() const { return *get(); }
+
+ // You can pass up to four constructor arguments as arguments of Init().
+ inline void Init() {
+ new(space_) Type;
+ }
+
+ template <typename T1>
+ inline void Init(const T1& p1) {
+ new(space_) Type(p1);
+ }
+
+ template <typename T1, typename T2>
+ inline void Init(const T1& p1, const T2& p2) {
+ new(space_) Type(p1, p2);
+ }
+
+ template <typename T1, typename T2, typename T3>
+ inline void Init(const T1& p1, const T2& p2, const T3& p3) {
+ new(space_) Type(p1, p2, p3);
+ }
+
+ template <typename T1, typename T2, typename T3, typename T4>
+ inline void Init(const T1& p1, const T2& p2, const T3& p3, const T4& p4) {
+ new(space_) Type(p1, p2, p3, p4);
+ }
+
+ template <typename T1, typename T2, typename T3, typename T4, typename T5>
+ inline void Init(const T1& p1, const T2& p2, const T3& p3, const T4& p4,
+ const T5& p5) {
+ new(space_) Type(p1, p2, p3, p4, p5);
+ }
+
+ template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6>
+ inline void Init(const T1& p1, const T2& p2, const T3& p3, const T4& p4,
+ const T5& p5, const T6& p6) {
+ new(space_) Type(p1, p2, p3, p4, p5, p6);
+ }
+
+ template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7>
+ inline void Init(const T1& p1, const T2& p2, const T3& p3, const T4& p4,
+ const T5& p5, const T6& p6, const T7& p7) {
+ new(space_) Type(p1, p2, p3, p4, p5, p6, p7);
+ }
+
+ template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8>
+ inline void Init(const T1& p1, const T2& p2, const T3& p3, const T4& p4,
+ const T5& p5, const T6& p6, const T7& p7, const T8& p8) {
+ new(space_) Type(p1, p2, p3, p4, p5, p6, p7, p8);
+ }
+
+ template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9>
+ inline void Init(const T1& p1, const T2& p2, const T3& p3, const T4& p4,
+ const T5& p5, const T6& p6, const T7& p7, const T8& p8,
+ const T9& p9) {
+ new(space_) Type(p1, p2, p3, p4, p5, p6, p7, p8, p9);
+ }
+
+ template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10>
+ inline void Init(const T1& p1, const T2& p2, const T3& p3, const T4& p4,
+ const T5& p5, const T6& p6, const T7& p7, const T8& p8,
+ const T9& p9, const T10& p10) {
+ new(space_) Type(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10);
+ }
+
+ template <typename T1, typename T2, typename T3, typename T4, typename T5,
+ typename T6, typename T7, typename T8, typename T9, typename T10,
+ typename T11>
+ inline void Init(const T1& p1, const T2& p2, const T3& p3, const T4& p4,
+ const T5& p5, const T6& p6, const T7& p7, const T8& p8,
+ const T9& p9, const T10& p10, const T11& p11) {
+ new(space_) Type(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11);
+ }
+
+ inline void Destroy() {
+ get()->~Type();
+ }
+
+ private:
+ UTIL_GTL_ALIGNED_CHAR_ARRAY(Type, 1) space_;
+};
+
+#undef UTIL_GTL_ALIGNED_CHAR_ARRAY
+#undef UTIL_GTL_ALIGN_OF
+
+}
+
+#endif // UTIL_GTL_MANUAL_CONSTRUCTOR_H_
diff --git a/src/base/mutex.h b/src/base/mutex.h
new file mode 100644
index 0000000..3962a6d
--- /dev/null
+++ b/src/base/mutex.h
@@ -0,0 +1,408 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// ---
+//
+// A simple mutex wrapper, supporting locks and read-write locks.
+// You should assume the locks are *not* re-entrant.
+//
+// To use: you should define the following macros in your configure.ac:
+// ACX_PTHREAD
+// AC_RWLOCK
+// The latter is defined in ../autoconf.
+//
+// This class is meant to be internal-only and should be wrapped by an
+// internal namespace. Before you use this module, please give the
+// name of your internal namespace for this module. Or, if you want
+// to expose it, you'll want to move it to the Google namespace. We
+// cannot put this class in global namespace because there can be some
+// problems when we have multiple versions of Mutex in each shared object.
+//
+// NOTE: by default, we have #ifdef'ed out the TryLock() method.
+// This is for two reasons:
+// 1) TryLock() under Windows is a bit annoying (it requires a
+// #define to be defined very early).
+// 2) TryLock() is broken for NO_THREADS mode, at least in NDEBUG
+// mode.
+// If you need TryLock(), and either these two caveats are not a
+// problem for you, or you're willing to work around them, then
+// feel free to #define GMUTEX_TRYLOCK, or to remove the #ifdefs
+// in the code below.
+//
+// CYGWIN NOTE: Cygwin support for rwlock seems to be buggy:
+// http://www.cygwin.com/ml/cygwin/2008-12/msg00017.html
+// Because of that, we might as well use windows locks for
+// cygwin. They seem to be more reliable than the cygwin pthreads layer.
+//
+// TRICKY IMPLEMENTATION NOTE:
+// This class is designed to be safe to use during
+// dynamic-initialization -- that is, by global constructors that are
+// run before main() starts. The issue in this case is that
+// dynamic-initialization happens in an unpredictable order, and it
+// could be that someone else's dynamic initializer could call a
+// function that tries to acquire this mutex -- but that all happens
+// before this mutex's constructor has run. (This can happen even if
+// the mutex and the function that uses the mutex are in the same .cc
+// file.) Basically, because Mutex does non-trivial work in its
+// constructor, it's not, in the naive implementation, safe to use
+// before dynamic initialization has run on it.
+//
+// The solution used here is to pair the actual mutex primitive with a
+// bool that is set to true when the mutex is dynamically initialized.
+// (Before that it's false.) Then we modify all mutex routines to
+// look at the bool, and not try to lock/unlock until the bool makes
+// it to true (which happens after the Mutex constructor has run.)
+//
+// This works because before main() starts -- particularly, during
+// dynamic initialization -- there are no threads, so a) it's ok that
+// the mutex operations are a no-op, since we don't need locking then
+// anyway; and b) we can be quite confident our bool won't change
+// state between a call to Lock() and a call to Unlock() (that would
+// require a global constructor in one translation unit to call Lock()
+// and another global constructor in another translation unit to call
+// Unlock() later, which is pretty perverse).
+//
+// That said, it's tricky, and can conceivably fail; it's safest to
+// avoid trying to acquire a mutex in a global constructor, if you
+// can. One way it can fail is that a really smart compiler might
+// initialize the bool to true at static-initialization time (too
+// early) rather than at dynamic-initialization time. To discourage
+// that, we set is_safe_ to true in code (not the constructor
+// colon-initializer) and set it to true via a function that always
+// evaluates to true, but that the compiler can't know always
+// evaluates to true. This should be good enough.
+//
+// A related issue is code that could try to access the mutex
+// after it's been destroyed in the global destructors (because
+// the Mutex global destructor runs before some other global
+// destructor, that tries to acquire the mutex). The way we
+// deal with this is by taking a constructor arg that global
+// mutexes should pass in, that causes the destructor to do no
+// work. We still depend on the compiler not doing anything
+// weird to a Mutex's memory after it is destroyed, but for a
+// static global variable, that's pretty safe.
+
+#ifndef GOOGLE_MUTEX_H_
+#define GOOGLE_MUTEX_H_
+
+#include <config.h>
+#if defined(NO_THREADS)
+ typedef int MutexType; // to keep a lock-count
+#elif defined(_WIN32) || defined(__CYGWIN32__) || defined(__CYGWIN64__)
+# ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN // We only need minimal includes
+# endif
+# ifndef NOMINMAX
+# define NOMINMAX // Don't want windows to override min()/max()
+# endif
+# ifdef GMUTEX_TRYLOCK
+ // We need Windows NT or later for TryEnterCriticalSection(). If you
+ // don't need that functionality, you can remove these _WIN32_WINNT
+ // lines, and change TryLock() to assert(0) or something.
+# ifndef _WIN32_WINNT
+# define _WIN32_WINNT 0x0400
+# endif
+# endif
+# include <windows.h>
+ typedef CRITICAL_SECTION MutexType;
+#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
+ // Needed for pthread_rwlock_*. If it causes problems, you could take it
+ // out, but then you'd have to unset HAVE_RWLOCK (at least on linux -- it
+ // *does* cause problems for FreeBSD, or MacOSX, but isn't needed
+ // for locking there.)
+# ifdef __linux__
+# if _XOPEN_SOURCE < 500 // including not being defined at all
+# undef _XOPEN_SOURCE
+# define _XOPEN_SOURCE 500 // may be needed to get the rwlock calls
+# endif
+# endif
+#if defined(HAVE_PTHREAD) && !defined(NO_THREADS)
+# include <pthread.h>
+#endif
+ typedef pthread_rwlock_t MutexType;
+#elif defined(HAVE_PTHREAD)
+#if defined(HAVE_PTHREAD) && !defined(NO_THREADS)
+# include <pthread.h>
+#endif
+ typedef pthread_mutex_t MutexType;
+#else
+# error Need to implement mutex.h for your architecture, or #define NO_THREADS
+#endif
+
+#include <assert.h>
+#include <stdlib.h> // for abort()
+
+namespace ctemplate {
+
+namespace base {
+// This is used for the single-arg constructor
+enum LinkerInitialized { LINKER_INITIALIZED };
+}
+
+class Mutex {
+ public:
+ // Create a Mutex that is not held by anybody. This constructor is
+ // typically used for Mutexes allocated on the heap or the stack.
+ inline Mutex();
+ // This constructor should be used for global, static Mutex objects.
+ // It inhibits work being done by the destructor, which makes it
+ // safer for code that tries to acqiure this mutex in their global
+ // destructor.
+ inline Mutex(base::LinkerInitialized);
+
+ // Destructor
+ inline ~Mutex();
+
+ inline void Lock(); // Block if needed until free then acquire exclusively
+ inline void Unlock(); // Release a lock acquired via Lock()
+#ifdef GMUTEX_TRYLOCK
+ inline bool TryLock(); // If free, Lock() and return true, else return false
+#endif
+ // Note that on systems that don't support read-write locks, these may
+ // be implemented as synonyms to Lock() and Unlock(). So you can use
+ // these for efficiency, but don't use them anyplace where being able
+ // to do shared reads is necessary to avoid deadlock.
+ inline void ReaderLock(); // Block until free or shared then acquire a share
+ inline void ReaderUnlock(); // Release a read share of this Mutex
+ inline void WriterLock() { Lock(); } // Acquire an exclusive lock
+ inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()
+
+ private:
+ MutexType mutex_;
+ // We want to make sure that the compiler sets is_safe_ to true only
+ // when we tell it to, and never makes assumptions is_safe_ is
+ // always true. volatile is the most reliable way to do that.
+ volatile bool is_safe_;
+ // This indicates which constructor was called.
+ bool destroy_;
+
+ inline void SetIsSafe() { is_safe_ = true; }
+
+ // Catch the error of writing Mutex when intending MutexLock.
+ Mutex(Mutex* /*ignored*/) {}
+ // Disallow "evil" constructors
+ Mutex(const Mutex&);
+ void operator=(const Mutex&);
+};
+
+// We will also define GoogleOnceType, GOOGLE_ONCE_INIT, and
+// GoogleOnceInit, which are portable versions of pthread_once_t,
+// PTHREAD_ONCE_INIT, and pthread_once.
+
+// Now the implementation of Mutex for various systems
+#if defined(NO_THREADS)
+
+// When we don't have threads, we can be either reading or writing,
+// but not both. We can have lots of readers at once (in no-threads
+// mode, that's most likely to happen in recursive function calls),
+// but only one writer. We represent this by having mutex_ be -1 when
+// writing and a number > 0 when reading (and 0 when no lock is held).
+//
+// In debug mode, we assert these invariants, while in non-debug mode
+// we do nothing, for efficiency. That's why everything is in an
+// assert.
+
+Mutex::Mutex() : mutex_(0) { }
+Mutex::Mutex(base::LinkerInitialized) : mutex_(0) { }
+Mutex::~Mutex() { assert(mutex_ == 0); }
+void Mutex::Lock() { assert(--mutex_ == -1); }
+void Mutex::Unlock() { assert(mutex_++ == -1); }
+#ifdef GMUTEX_TRYLOCK
+bool Mutex::TryLock() { if (mutex_) return false; Lock(); return true; }
+#endif
+void Mutex::ReaderLock() { assert(++mutex_ > 0); }
+void Mutex::ReaderUnlock() { assert(mutex_-- > 0); }
+
+typedef int GoogleOnceType;
+const GoogleOnceType GOOGLE_ONCE_INIT = 0;
+inline int GoogleOnceInit(GoogleOnceType* once_control,
+ void (*init_routine)(void)) {
+ if ((*once_control)++ == 0)
+ (*init_routine)();
+ return 0;
+}
+
+#elif defined(_WIN32) || defined(__CYGWIN32__) || defined(__CYGWIN64__)
+
+Mutex::Mutex() : destroy_(true) {
+ InitializeCriticalSection(&mutex_);
+ SetIsSafe();
+}
+Mutex::Mutex(base::LinkerInitialized) : destroy_(false) {
+ InitializeCriticalSection(&mutex_);
+ SetIsSafe();
+}
+Mutex::~Mutex() { if (destroy_) DeleteCriticalSection(&mutex_); }
+void Mutex::Lock() { if (is_safe_) EnterCriticalSection(&mutex_); }
+void Mutex::Unlock() { if (is_safe_) LeaveCriticalSection(&mutex_); }
+#ifdef GMUTEX_TRYLOCK
+bool Mutex::TryLock() { return is_safe_ ?
+ TryEnterCriticalSection(&mutex_) != 0 : true; }
+#endif
+void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks
+void Mutex::ReaderUnlock() { Unlock(); }
+
+// We do a simple spinlock for pthread_once_t. See
+// http://www.ddj.com/cpp/199203083?pgno=3
+#ifdef INTERLOCKED_EXCHANGE_NONVOLATILE
+typedef LONG GoogleOnceType;
+#else
+typedef volatile LONG GoogleOnceType;
+#endif
+const GoogleOnceType GOOGLE_ONCE_INIT = 0;
+inline int GoogleOnceInit(GoogleOnceType* once_control,
+ void (*init_routine)(void)) {
+ while (1) {
+ LONG prev = InterlockedCompareExchange(once_control, 1, 0);
+ if (prev == 2) { // We've successfully initted in the past.
+ return 0;
+ } else if (prev == 0) { // No init yet, but we have the lock.
+ (*init_routine)();
+ InterlockedExchange(once_control, 2);
+ return 0;
+ } else { // Someone else is holding the lock, so wait.
+ assert(1 == prev);
+ Sleep(1); // sleep for 1ms
+ }
+ }
+ return 1; // unreachable
+}
+
+#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK)
+
+#define SAFE_PTHREAD(fncall) do { /* run fncall if is_safe_ is true */ \
+ if (is_safe_ && fncall(&mutex_) != 0) abort(); \
+} while (0)
+
+Mutex::Mutex() : destroy_(true) {
+ SetIsSafe();
+ if (is_safe_ && pthread_rwlock_init(&mutex_, NULL) != 0) abort();
+}
+Mutex::Mutex(base::LinkerInitialized) : destroy_(false) {
+ SetIsSafe();
+ if (is_safe_ && pthread_rwlock_init(&mutex_, NULL) != 0) abort();
+}
+Mutex::~Mutex() { if (destroy_) SAFE_PTHREAD(pthread_rwlock_destroy); }
+void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock); }
+void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock); }
+#ifdef GMUTEX_TRYLOCK
+bool Mutex::TryLock() { return is_safe_ ?
+ pthread_rwlock_trywrlock(&mutex_) == 0 : true; }
+#endif
+void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock); }
+void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock); }
+#undef SAFE_PTHREAD
+
+typedef pthread_once_t GoogleOnceType;
+const GoogleOnceType GOOGLE_ONCE_INIT = PTHREAD_ONCE_INIT;
+inline int GoogleOnceInit(GoogleOnceType* once_control,
+ void (*init_routine)(void)) {
+ return pthread_once(once_control, init_routine);
+}
+
+#elif defined(HAVE_PTHREAD)
+
+#define SAFE_PTHREAD(fncall) do { /* run fncall if is_safe_ is true */ \
+ if (is_safe_ && fncall(&mutex_) != 0) abort(); \
+} while (0)
+
+Mutex::Mutex() : destroy_(true) {
+ SetIsSafe();
+ if (is_safe_ && pthread_mutex_init(&mutex_, NULL) != 0) abort();
+}
+Mutex::Mutex(base::LinkerInitialized) : destroy_(false) {
+ SetIsSafe();
+ if (is_safe_ && pthread_mutex_init(&mutex_, NULL) != 0) abort();
+}
+Mutex::~Mutex() { if (destroy_) SAFE_PTHREAD(pthread_mutex_destroy); }
+void Mutex::Lock() { SAFE_PTHREAD(pthread_mutex_lock); }
+void Mutex::Unlock() { SAFE_PTHREAD(pthread_mutex_unlock); }
+#ifdef GMUTEX_TRYLOCK
+bool Mutex::TryLock() { return is_safe_ ?
+ pthread_mutex_trylock(&mutex_) == 0 : true; }
+#endif
+void Mutex::ReaderLock() { Lock(); }
+void Mutex::ReaderUnlock() { Unlock(); }
+#undef SAFE_PTHREAD
+
+typedef pthread_once_t GoogleOnceType;
+const GoogleOnceType GOOGLE_ONCE_INIT = PTHREAD_ONCE_INIT;
+inline int GoogleOnceInit(GoogleOnceType* once_control,
+ void (*init_routine)(void)) {
+ return pthread_once(once_control, init_routine);
+}
+
+#endif
+
+// --------------------------------------------------------------------------
+// Some helper classes
+
+// MutexLock(mu) acquires mu when constructed and releases it when destroyed.
+class MutexLock {
+ public:
+ explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); }
+ ~MutexLock() { mu_->Unlock(); }
+ private:
+ Mutex * const mu_;
+ // Disallow "evil" constructors
+ MutexLock(const MutexLock&);
+ void operator=(const MutexLock&);
+};
+
+// ReaderMutexLock and WriterMutexLock do the same, for rwlocks
+class ReaderMutexLock {
+ public:
+ explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); }
+ ~ReaderMutexLock() { mu_->ReaderUnlock(); }
+ private:
+ Mutex * const mu_;
+ // Disallow "evil" constructors
+ ReaderMutexLock(const ReaderMutexLock&);
+ void operator=(const ReaderMutexLock&);
+};
+
+class WriterMutexLock {
+ public:
+ explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); }
+ ~WriterMutexLock() { mu_->WriterUnlock(); }
+ private:
+ Mutex * const mu_;
+ // Disallow "evil" constructors
+ WriterMutexLock(const WriterMutexLock&);
+ void operator=(const WriterMutexLock&);
+};
+
+// Catch bug where variable name is omitted, e.g. MutexLock (&mu);
+#define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_decl_missing_var_name)
+#define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name)
+#define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name)
+
+}
+
+#endif /* #define GOOGLE_MUTEX_H__ */
diff --git a/src/base/small_map.h b/src/base/small_map.h
new file mode 100644
index 0000000..3e17d71
--- /dev/null
+++ b/src/base/small_map.h
@@ -0,0 +1,569 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ---
+//
+// Author: kenton@google.com (Kenton Varda)
+//
+// small_map is a drop-in replacement for map or hash_map. It uses a fixed
+// array to store a certain number of elements, then reverts to using a
+// map or hash_map when it runs out of space. For maps that are typically
+// small, this can be considerably faster than using something like hash_map
+// directly, as hash_map is optimized for large data sets. Of course, in
+// order for this to be a significant win, you have to have a situation where
+// you are using lots and lots of these small maps. One such situation is
+// MessageSet: A set of search results may contain thousands of MessageSets,
+// each containing only a couple items.
+//
+// TODO(kenton): This is very minimal, and was originally written for a
+// very specific use (MessageSet). It only implements a few core methods
+// of the STL associative container interface, though you are welcome to
+// extend it.
+
+#ifndef UTIL_GTL_SMALL_MAP_H_
+#define UTIL_GTL_SMALL_MAP_H_
+
+#include <config.h>
+#include <assert.h>
+#include <utility> // for make_pair()
+#include "base/manual_constructor.h"
+
+namespace ctemplate {
+
+template <bool> struct CompileAssert { };
+#define COMPILE_ASSERT(expr, msg) \
+ typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
+
+// An STL-like associative container which starts out backed by a simple
+// array but switches to some other container type if it grows beyond a
+// fixed size.
+//
+// NormalMap: The map type to fall back to. This also defines the key
+// and value types for the small_map.
+// kArraySize: The size of the initial array of results. Once the map
+// grows beyond this size, the map type will be used instead.
+// EqualKey: A functor which tests two keys for equality. If the wrapped
+// map type has a "key_equal" member (hash_map does), then that
+// will be used by default. Otherwise you must specify this
+// manually.
+// MapInit: A functor that takes a ManualConstructor<NormalMap>* and uses it to
+// initialize the map. This functor will be called at most once per
+// small_map, when the map exceeds the threshold of kArraySize and we
+// are about to copy values from the array to the map. The functor
+// *must* call one of the Init() methods provided by
+// ManualConstructor, since after it runs we assume that the NormalMap
+// has been initialized.
+//
+// example:
+// small_map<hash_map<string, int> > days;
+// days["sunday" ] = 0;
+// days["monday" ] = 1;
+// days["tuesday" ] = 2;
+// days["wednesday"] = 3;
+// days["thursday" ] = 4;
+// days["friday" ] = 5;
+// days["saturday" ] = 6;
+//
+// You should assume that small_map might invalidate all the iterators
+// on any call to erase(), insert() and operator[].
+template <typename NormalMap>
+class small_map_default_init {
+ public:
+ void operator ()(ManualConstructor<NormalMap>* map) const {
+ map->Init();
+ }
+};
+
+template <typename NormalMap,
+ int kArraySize = 4,
+ typename EqualKey = typename NormalMap::key_equal,
+ typename MapInit = small_map_default_init<NormalMap> >
+class small_map {
+ // We cannot rely on the compiler to reject array of size 0. In
+ // particular, gcc 2.95.3 does it but later versions allow 0-length
+ // arrays. Therefore, we explicitly reject non-positive kArraySize
+ // here.
+ COMPILE_ASSERT(kArraySize > 0, default_initial_size_should_be_positive);
+
+ public:
+ typedef typename NormalMap::key_type key_type;
+ typedef typename NormalMap::mapped_type data_type;
+ typedef typename NormalMap::mapped_type mapped_type;
+ typedef typename NormalMap::value_type value_type;
+ typedef EqualKey key_equal;
+
+ small_map() : size_(0), functor_(MapInit()) {}
+
+ explicit small_map(const MapInit& functor) : size_(0), functor_(functor) {}
+
+ // Allow copy-constructor and assignment, since STL allows them too.
+ small_map(const small_map& src) {
+ // size_ and functor_ are initted in InitFrom()
+ InitFrom(src);
+ }
+ void operator=(const small_map& src) {
+ if (&src == this) return;
+
+ // This is not optimal. If src and dest are both using the small
+ // array, we could skip the teardown and reconstruct. One problem
+ // to be resolved is that the value_type itself is pair<const K,
+ // V>, and const K is not assignable.
+ Destroy();
+ InitFrom(src);
+ }
+ ~small_map() {
+ Destroy();
+ }
+
+ class const_iterator;
+
+ class iterator {
+ public:
+ typedef typename NormalMap::iterator::iterator_category iterator_category;
+ typedef typename NormalMap::iterator::value_type value_type;
+ typedef typename NormalMap::iterator::difference_type difference_type;
+ typedef typename NormalMap::iterator::pointer pointer;
+ typedef typename NormalMap::iterator::reference reference;
+
+ inline iterator(): array_iter_(NULL) {}
+
+ inline iterator& operator++() {
+ if (array_iter_ != NULL) {
+ ++array_iter_;
+ } else {
+ ++hash_iter_;
+ }
+ return *this;
+ }
+ inline iterator operator++(int) {
+ iterator result(*this);
+ ++(*this);
+ return result;
+ }
+ inline iterator& operator--() {
+ if (array_iter_ != NULL) {
+ --array_iter_;
+ } else {
+ --hash_iter_;
+ }
+ return *this;
+ }
+ inline iterator operator--(int) {
+ iterator result(*this);
+ --(*this);
+ return result;
+ }
+ inline value_type* operator->() const {
+ if (array_iter_ != NULL) {
+ return array_iter_->get();
+ } else {
+ return hash_iter_.operator->();
+ }
+ }
+
+ inline value_type& operator*() const {
+ if (array_iter_ != NULL) {
+ return *array_iter_->get();
+ } else {
+ return *hash_iter_;
+ }
+ }
+
+ inline bool operator==(const iterator& other) const {
+ if (array_iter_ != NULL) {
+ return array_iter_ == other.array_iter_;
+ } else {
+ return other.array_iter_ == NULL && hash_iter_ == other.hash_iter_;
+ }
+ }
+
+ inline bool operator!=(const iterator& other) const {
+ return !(*this == other);
+ }
+
+ bool operator==(const const_iterator& other) const;
+ bool operator!=(const const_iterator& other) const;
+
+ private:
+ friend class small_map;
+ friend class const_iterator;
+ inline explicit iterator(ManualConstructor<value_type>* init)
+ : array_iter_(init) {}
+ inline explicit iterator(const typename NormalMap::iterator& init)
+ : array_iter_(NULL), hash_iter_(init) {}
+
+ ManualConstructor<value_type>* array_iter_;
+ typename NormalMap::iterator hash_iter_;
+ };
+
+ class const_iterator {
+ public:
+ typedef typename NormalMap::const_iterator::iterator_category iterator_category;
+ typedef typename NormalMap::const_iterator::value_type value_type;
+ typedef typename NormalMap::const_iterator::difference_type difference_type;
+ typedef typename NormalMap::const_iterator::pointer pointer;
+ typedef typename NormalMap::const_iterator::reference reference;
+
+ inline const_iterator(): array_iter_(NULL) {}
+ inline const_iterator(const iterator& other)
+ : array_iter_(other.array_iter_), hash_iter_(other.hash_iter_) {}
+
+ inline const_iterator& operator++() {
+ if (array_iter_ != NULL) {
+ ++array_iter_;
+ } else {
+ ++hash_iter_;
+ }
+ return *this;
+ }
+ inline const_iterator operator++(int) {
+ const_iterator result(*this);
+ ++(*this);
+ return result;
+ }
+
+ inline const_iterator& operator--() {
+ if (array_iter_ != NULL) {
+ --array_iter_;
+ } else {
+ --hash_iter_;
+ }
+ return *this;
+ }
+ inline const_iterator operator--(int) {
+ const_iterator result(*this);
+ --(*this);
+ return result;
+ }
+
+ inline const value_type* operator->() const {
+ if (array_iter_ != NULL) {
+ return array_iter_->get();
+ } else {
+ return hash_iter_.operator->();
+ }
+ }
+
+ inline const value_type& operator*() const {
+ if (array_iter_ != NULL) {
+ return *array_iter_->get();
+ } else {
+ return *hash_iter_;
+ }
+ }
+
+ inline bool operator==(const const_iterator& other) const {
+ if (array_iter_ != NULL) {
+ return array_iter_ == other.array_iter_;
+ } else {
+ return other.array_iter_ == NULL && hash_iter_ == other.hash_iter_;
+ }
+ }
+
+ inline bool operator!=(const const_iterator& other) const {
+ return !(*this == other);
+ }
+
+ private:
+ friend class small_map;
+ inline explicit const_iterator(
+ const ManualConstructor<value_type>* init)
+ : array_iter_(init) {}
+ inline explicit const_iterator(
+ const typename NormalMap::const_iterator& init)
+ : array_iter_(NULL), hash_iter_(init) {}
+
+ const ManualConstructor<value_type>* array_iter_;
+ typename NormalMap::const_iterator hash_iter_;
+ };
+
+ iterator find(const key_type& key) {
+ key_equal compare;
+ if (size_ >= 0) {
+ for (int i = 0; i < size_; i++) {
+ if (compare(array_[i]->first, key)) {
+ return iterator(array_ + i);
+ }
+ }
+ return iterator(array_ + size_);
+ } else {
+ return iterator(map()->find(key));
+ }
+ }
+
+ const_iterator find(const key_type& key) const {
+ key_equal compare;
+ if (size_ >= 0) {
+ for (int i = 0; i < size_; i++) {
+ if (compare(array_[i]->first, key)) {
+ return const_iterator(array_ + i);
+ }
+ }
+ return const_iterator(array_ + size_);
+ } else {
+ return const_iterator(map()->find(key));
+ }
+ }
+
+ // Invalidates iterators.
+ data_type& operator[](const key_type& key) {
+ key_equal compare;
+
+ if (size_ >= 0) {
+ // operator[] searches backwards, favoring recently-added
+ // elements.
+ for (int i = size_-1; i >= 0; --i) {
+ if (compare(array_[i]->first, key)) {
+ return array_[i]->second;
+ }
+ }
+ if (size_ == kArraySize) {
+ ConvertToRealMap();
+ return (*map_)[key];
+ } else {
+ array_[size_].Init(key, data_type());
+ return array_[size_++]->second;
+ }
+ } else {
+ return (*map_)[key];
+ }
+ }
+
+ // Invalidates iterators.
+ std::pair<iterator, bool> insert(const value_type& x) {
+ key_equal compare;
+
+ if (size_ >= 0) {
+ for (int i = 0; i < size_; i++) {
+ if (compare(array_[i]->first, x.first)) {
+ return std::make_pair(iterator(array_ + i), false);
+ }
+ }
+ if (size_ == kArraySize) {
+ ConvertToRealMap(); // Invalidates all iterators!
+ std::pair<typename NormalMap::iterator, bool> ret = map_->insert(x);
+ return std::make_pair(iterator(ret.first), ret.second);
+ } else {
+ array_[size_].Init(x);
+ return std::make_pair(iterator(array_ + size_++), true);
+ }
+ } else {
+ std::pair<typename NormalMap::iterator, bool> ret = map_->insert(x);
+ return std::make_pair(iterator(ret.first), ret.second);
+ }
+ }
+
+ // Invalidates iterators.
+ template <class InputIterator>
+ void insert(InputIterator f, InputIterator l) {
+ while (f != l) {
+ insert(*f);
+ ++f;
+ }
+ }
+
+ iterator begin() {
+ if (size_ >= 0) {
+ return iterator(array_);
+ } else {
+ return iterator(map_->begin());
+ }
+ }
+ const_iterator begin() const {
+ if (size_ >= 0) {
+ return const_iterator(array_);
+ } else {
+ return const_iterator(map_->begin());
+ }
+ }
+
+ iterator end() {
+ if (size_ >= 0) {
+ return iterator(array_ + size_);
+ } else {
+ return iterator(map_->end());
+ }
+ }
+ const_iterator end() const {
+ if (size_ >= 0) {
+ return const_iterator(array_ + size_);
+ } else {
+ return const_iterator(map_->end());
+ }
+ }
+
+ void clear() {
+ if (size_ >= 0) {
+ for (int i = 0; i < size_; i++) {
+ array_[i].Destroy();
+ }
+ } else {
+ map_.Destroy();
+ }
+ size_ = 0;
+ }
+
+ // Invalidates iterators.
+ void erase(const iterator& position) {
+ if (size_ >= 0) {
+ int i = position.array_iter_ - array_;
+ array_[i].Destroy();
+ --size_;
+ if (i != size_) {
+ array_[i].Init(*array_[size_]);
+ array_[size_].Destroy();
+ }
+ } else {
+ map_->erase(position.hash_iter_);
+ }
+ }
+
+ int erase(const key_type& key) {
+ iterator iter = find(key);
+ if (iter == end()) return 0;
+ erase(iter);
+ return 1;
+ }
+
+ int count(const key_type& key) const {
+ return (find(key) == end()) ? 0 : 1;
+ }
+
+ int size() const {
+ if (size_ >= 0) {
+ return size_;
+ } else {
+ return map_->size();
+ }
+ }
+
+ bool empty() const {
+ if (size_ >= 0) {
+ return (size_ == 0);
+ } else {
+ return map_->empty();
+ }
+ }
+
+ // Returns true if we have fallen back to using the underlying map
+ // representation.
+ bool using_full_map() const {
+ return size_ < 0;
+ }
+
+ inline NormalMap* map() {
+ assert(using_full_map());
+ return map_.get();
+ }
+ inline const NormalMap* map() const {
+ assert(using_full_map());
+ return map_.get();
+ }
+
+ private:
+ int size_; // negative = using hash_map
+
+ MapInit functor_;
+
+ // We want to call constructors and destructors manually, but we don't
+ // want to allocate and deallocate the memory used for them separately.
+ // So, we use this crazy ManualConstructor class.
+ //
+ // Since array_ and map_ are mutually exclusive, we'll put them in a
+ // union, too. We add in a dummy_ value which quiets MSVC (both
+ // 7.1 and 8.0) from otherwise giving an erroneous "union member has
+ // copy constructor" error message (C2621). This dummy member has
+ // to come before array_ to quiet the compiler. Shrug.
+ union {
+ ManualConstructor<value_type> dummy_;
+ ManualConstructor<value_type> array_[kArraySize];
+ ManualConstructor<NormalMap> map_;
+ };
+
+ void ConvertToRealMap() {
+ // Move the current elements into a temporary array.
+ ManualConstructor<value_type> temp_array[kArraySize];
+
+ for (int i = 0; i < kArraySize; i++) {
+ temp_array[i].Init(*array_[i]);
+ array_[i].Destroy();
+ }
+
+ // Initialize the map.
+ size_ = -1;
+ functor_(&map_);
+
+ // Insert elements into it.
+ for (int i = 0; i < kArraySize; i++) {
+ map_->insert(*temp_array[i]);
+ temp_array[i].Destroy();
+ }
+ }
+
+ // Helpers for constructors and destructors.
+ void InitFrom(const small_map& src) {
+ functor_ = src.functor_;
+ size_ = src.size_;
+ if (src.size_ >= 0) {
+ for (int i = 0; i < size_; i++) {
+ array_[i].Init(*src.array_[i]);
+ }
+ } else {
+ functor_(&map_);
+ (*map_.get()) = (*src.map_.get());
+ }
+ }
+ void Destroy() {
+ if (size_ >= 0) {
+ for (int i = 0; i < size_; i++) {
+ array_[i].Destroy();
+ }
+ } else {
+ map_.Destroy();
+ }
+ }
+};
+
+template <typename NormalMap, int kArraySize, typename EqualKey,
+ typename Functor>
+inline bool small_map<NormalMap, kArraySize, EqualKey,
+ Functor>::iterator::operator==(
+ const const_iterator& other) const {
+ return other == *this;
+}
+template <typename NormalMap, int kArraySize, typename EqualKey,
+ typename Functor>
+inline bool small_map<NormalMap, kArraySize, EqualKey,
+ Functor>::iterator::operator!=(
+ const const_iterator& other) const {
+ return other != *this;
+}
+
+}
+
+#endif // UTIL_GTL_SMALL_MAP_H_
diff --git a/src/base/thread_annotations.h b/src/base/thread_annotations.h
new file mode 100644
index 0000000..b6db61b
--- /dev/null
+++ b/src/base/thread_annotations.h
@@ -0,0 +1,130 @@
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ---
+//
+//
+// This header file contains the macro definitions for thread safety
+// annotations that allow the developers to document the locking policies
+// of their multi-threaded code. The annotations can also help program
+// analysis tools to identify potential thread safety issues.
+//
+//
+// The annotations are implemented using GCC's "attributes" extension.
+// Using the macros defined here instead of the raw GCC attributes allows
+// for portability and future compatibility.
+//
+
+#ifndef BASE_THREAD_ANNOTATIONS_H_
+#define BASE_THREAD_ANNOTATIONS_H_
+
+
+#include <config.h>
+#if defined(__GNUC__) && defined(__SUPPORT_TS_ANNOTATION__) && !defined(SWIG)
+#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x))
+#else
+#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op
+#endif
+
+
+// Document if a shared variable/field needs to be protected by a lock.
+// GUARDED_BY allows the user to specify a particular lock that should be
+// held when accessing the annotated variable, while GUARDED_VAR only
+// indicates a shared variable should be guarded (by any lock). GUARDED_VAR
+// is primarily used when the client cannot express the name of the lock.
+#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
+#define GUARDED_VAR THREAD_ANNOTATION_ATTRIBUTE__(guarded)
+
+// Document if the memory location pointed to by a pointer should be guarded
+// by a lock when dereferencing the pointer. Similar to GUARDED_VAR,
+// PT_GUARDED_VAR is primarily used when the client cannot express the name
+// of the lock. Note that a pointer variable to a shared memory location
+// could itself be a shared variable. For example, if a shared global pointer
+// q, which is guarded by mu1, points to a shared memory location that is
+// guarded by mu2, q should be annotated as follows:
+// int *q GUARDED_BY(mu1) PT_GUARDED_BY(mu2);
+#define PT_GUARDED_BY(x) \
+ THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded_by(x))
+#define PT_GUARDED_VAR \
+ THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded)
+
+// Document the acquisition order between locks that can be held
+// simultaneously by a thread. For any two locks that need to be annotated
+// to establish an acquisition order, only one of them needs the annotation.
+// (i.e. You don't have to annotate both locks with both ACQUIRED_AFTER
+// and ACQUIRED_BEFORE.)
+#define ACQUIRED_AFTER(x) \
+ THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(x))
+#define ACQUIRED_BEFORE(x) \
+ THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(x))
+
+// The following three annotations document the lock requirements for
+// functions/methods.
+
+// Document if a function expects certain locks to be held before it is called
+#define EXCLUSIVE_LOCKS_REQUIRED(x) \
+ THREAD_ANNOTATION_ATTRIBUTE__(exclusive_locks_required(x))
+
+#define SHARED_LOCKS_REQUIRED(x) \
+ THREAD_ANNOTATION_ATTRIBUTE__(shared_locks_required(x))
+
+// Document the locks acquired in the body of the function. These locks
+// non-reentrant).
+#define LOCKS_EXCLUDED(x) \
+ THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(x))
+
+// Document the lock the annotated function returns without acquiring it.
+#define LOCK_RETURNED(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
+
+// Document if a class/type is a lockable type (such as the Mutex class).
+#define LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(lockable)
+
+// Document if a class is a scoped lockable type (such as the MutexLock class).
+#define SCOPED_LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
+
+// The following annotations specify lock and unlock primitives.
+#define EXCLUSIVE_LOCK_FUNCTION(x) \
+ THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock(x))
+
+#define SHARED_LOCK_FUNCTION(x) \
+ THREAD_ANNOTATION_ATTRIBUTE__(shared_lock(x))
+
+#define EXCLUSIVE_TRYLOCK_FUNCTION(x) \
+ THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock(x))
+
+#define SHARED_TRYLOCK_FUNCTION(x) \
+ THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock(x))
+
+#define UNLOCK_FUNCTION(x) \
+ THREAD_ANNOTATION_ATTRIBUTE__(unlock(x))
+
+// An escape hatch for thread safety analysis to ignore the annotated function.
+#define NO_THREAD_SAFETY_ANALYSIS \
+ THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
+
+#endif // BASE_THREAD_ANNOTATIONS_H_
diff --git a/src/base/util.h b/src/base/util.h
new file mode 100644
index 0000000..2d9f1db
--- /dev/null
+++ b/src/base/util.h
@@ -0,0 +1,235 @@
+// Copyright (c) 2011, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ---
+//
+// Some generically useful utility routines that in google-land would
+// be their own projects. We make a shortened version here.
+
+#ifndef TEMPLATE_UTIL_H_
+#define TEMPLATE_UTIL_H_
+
+#include <config.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <iostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+// -- utility macros ---------------------------------------------------------
+
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ TypeName(const TypeName&); \
+ void operator=(const TypeName&)
+
+// Starting with Visual C++ 2005, WinNT.h includes ARRAYSIZE.
+#if !defined(_MSC_VER) || _MSC_VER < 1400
+#define ARRAYSIZE(a) \
+ ((sizeof(a) / sizeof(*(a))) / \
+ static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
+#endif
+
+template<typename To, typename From> // use like this: down_cast<T*>(foo);
+inline To down_cast(From* f) { // so we only accept pointers
+ return static_cast<To>(f);
+}
+
+// -- CHECK macros ---------------------------------------------------------
+
+// CHECK dies with a fatal error if condition is not true. It is *not*
+// controlled by NDEBUG, so the check will be executed regardless of
+// compilation mode. Therefore, it is safe to do things like:
+// CHECK(fp->Write(x) == 4)
+// We allow stream-like objects after this for debugging, but they're ignored.
+#define CHECK(condition) \
+ if (true) { \
+ if (!(condition)) { \
+ fprintf(stderr, "Check failed: %s\n", #condition); \
+ exit(1); \
+ } \
+ } else std::cerr << ""
+
+#define CHECK_OP(op, val1, val2) \
+ if (true) { \
+ if (!((val1) op (val2))) { \
+ fprintf(stderr, "Check failed: %s %s %s\n", #val1, #op, #val2); \
+ exit(1); \
+ } \
+ } else std::cerr << ""
+
+#define CHECK_EQ(val1, val2) CHECK_OP(==, val1, val2)
+#define CHECK_NE(val1, val2) CHECK_OP(!=, val1, val2)
+#define CHECK_LE(val1, val2) CHECK_OP(<=, val1, val2)
+#define CHECK_LT(val1, val2) CHECK_OP(< , val1, val2)
+#define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2)
+#define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2)
+// Synonyms for CHECK_* that are used in some unittests.
+#define EXPECT_EQ(val1, val2) CHECK_EQ(val1, val2)
+#define EXPECT_NE(val1, val2) CHECK_NE(val1, val2)
+#define EXPECT_LE(val1, val2) CHECK_LE(val1, val2)
+#define EXPECT_LT(val1, val2) CHECK_LT(val1, val2)
+#define EXPECT_GE(val1, val2) CHECK_GE(val1, val2)
+#define EXPECT_GT(val1, val2) CHECK_GT(val1, val2)
+#define EXPECT_TRUE(cond) CHECK(cond)
+#define EXPECT_FALSE(cond) CHECK(!(cond))
+#define EXPECT_STREQ(a, b) CHECK(strcmp(a, b) == 0)
+#define ASSERT_TRUE(cond) EXPECT_TRUE(cond)
+// LOG(FATAL) is an alias for CHECK(FALSE). We define FATAL, but no
+// other value that is reasonable inside LOG(), so the compile will
+// fail if someone tries to use LOG(DEBUG) or the like.
+#define LOG(x) INTERNAL_DO_LOG_ ## x
+#define INTERNAL_DO_LOG_FATAL CHECK(false)
+
+// These are used only in debug mode.
+#ifdef NDEBUG
+#define DCHECK(condition) CHECK(condition)
+#define DCHECK_EQ(val1, val2) CHECK_EQ(val1, val2)
+#define DCHECK_NE(val1, val2) CHECK_NE(val1, val2)
+#define DCHECK_LE(val1, val2) CHECK_LE(val1, val2)
+#define DCHECK_LT(val1, val2) CHECK_LT(val1, val2)
+#define DCHECK_GE(val1, val2) CHECK_GE(val1, val2)
+#define DCHECK_GT(val1, val2) CHECK_GT(val1, val2)
+#else
+#define DCHECK(condition) if (true) {} else std::cerr << ""
+#define DCHECK_EQ(val1, val2) if (true) {} else std::cerr << ""
+#define DCHECK_NE(val1, val2) if (true) {} else std::cerr << ""
+#define DCHECK_LE(val1, val2) if (true) {} else std::cerr << ""
+#define DCHECK_LT(val1, val2) if (true) {} else std::cerr << ""
+#define DCHECK_GE(val1, val2) if (true) {} else std::cerr << ""
+#define DCHECK_GT(val1, val2) if (true) {} else std::cerr << ""
+#endif
+
+#define PCHECK(cond) CHECK(cond) << ": " << strerror(errno)
+#define PFATAL(s) do { perror(s); exit(1); } while (0)
+
+// -- testing-related macros --------------------------------------------------
+
+// Call this in a .cc file where you will later call RUN_ALL_TESTS in main().
+#define TEST_INIT \
+ static std::vector<void (*)()> g_testlist; /* the tests to run */ \
+ static int RUN_ALL_TESTS() { \
+ std::vector<void (*)()>::const_iterator it; \
+ for (it = g_testlist.begin(); it != g_testlist.end(); ++it) { \
+ (*it)(); /* The test will error-exit if there's a problem. */ \
+ } \
+ fprintf(stderr, "\nPassed %d tests\n\nPASS\n", \
+ static_cast<int>(g_testlist.size())); \
+ return 0; \
+ }
+
+#define TEST(a, b) \
+ class Test_##a##_##b { \
+ public: \
+ Test_##a##_##b() { g_testlist.push_back(&Run); } \
+ static void Run(); \
+ }; \
+ static Test_##a##_##b g_test_##a##_##b; \
+ void Test_##a##_##b::Run()
+
+// This is a dummy class that eases the google->opensource transition.
+namespace testing {
+class Test {};
+}
+
+// -- template-related macros ----------------------------------------------
+
+#ifndef DEFAULT_TEMPLATE_ROOTDIR
+# define DEFAULT_TEMPLATE_ROOTDIR "."
+#endif
+
+// -- string-related functions ----------------------------------------------
+
+inline bool safe_strto32(const std::string& s, int* i) {
+ char* error_pos;
+ if (s.empty()) return false; // no input at all
+ errno = 0; // just to be sure
+ *i = strtol(s.c_str(), &error_pos, 10);
+ return *error_pos == '\0' && errno == 0;
+}
+
+inline int atoi32(const char* s) {
+ return atoi(s);
+}
+
+inline void StripWhiteSpace(std::string* str) {
+ int str_length = str->length();
+
+ // Strip off leading whitespace.
+ int first = 0;
+ while (first < str_length && isspace(str->at(first))) {
+ ++first;
+ }
+ // If entire string is white space.
+ if (first == str_length) {
+ str->clear();
+ return;
+ }
+ if (first > 0) {
+ str->erase(0, first);
+ str_length -= first;
+ }
+
+ // Strip off trailing whitespace.
+ int last = str_length - 1;
+ while (last >= 0 && isspace(str->at(last))) {
+ --last;
+ }
+ if (last != (str_length - 1) && last >= 0) {
+ str->erase(last + 1, std::string::npos);
+ }
+}
+
+inline void SplitStringIntoKeyValuePairs(
+ const std::string& s,
+ const char* kv_split, // For instance: "="
+ const char* pair_split, // For instance: ","
+ std::vector< std::pair<std::string, std::string> > *pairs) {
+ std::string key, value;
+ std::string* add_to = &key;
+ for (std::string::size_type i = 0; i < s.length(); ++i) {
+ if (s[i] == kv_split[0]) {
+ add_to = &value;
+ } else if (s[i] == pair_split[0]) {
+ if (!key.empty())
+ pairs->push_back(std::pair<std::string, std::string>(key, value));
+ key.clear();
+ value.clear();
+ add_to = &key;
+ } else {
+ *add_to += s[i];
+ }
+ }
+ if (!key.empty())
+ pairs->push_back(std::pair<std::string, std::string>(key, value));
+}
+
+#endif // TEMPLATE_UTIL_H_
diff --git a/src/config.h.in b/src/config.h.in
new file mode 100644
index 0000000..440f123
--- /dev/null
+++ b/src/config.h.in
@@ -0,0 +1,195 @@
+/* src/config.h.in. Generated from configure.ac by autoheader. */
+
+/* Namespace for Google classes */
+#undef GOOGLE_NAMESPACE
+
+/* the location of <unordered_map> or <hash_map> */
+#undef HASH_MAP_H
+
+/* the namespace of hash_map/hash_set */
+#undef HASH_NAMESPACE
+
+/* the location of <unordered_set> or <hash_set> */
+#undef HASH_SET_H
+
+/* Define to 1 if you have the <byteswap.h> header file. */
+#undef HAVE_BYTESWAP_H
+
+/* Define to 1 if you have the <dirent.h> header file, and it defines `DIR'.
+ */
+#undef HAVE_DIRENT_H
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define to 1 if you have the <endian.h> header file. */
+#undef HAVE_ENDIAN_H
+
+/* Define to 1 if you have the `getopt' function. */
+#undef HAVE_GETOPT
+
+/* Define to 1 if you have the <getopt.h> header file. */
+#undef HAVE_GETOPT_H
+
+/* Define to 1 if you have the `getopt_long' function. */
+#undef HAVE_GETOPT_LONG
+
+/* define if the compiler has hash_map */
+#undef HAVE_HASH_MAP
+
+/* define if the compiler has hash_set */
+#undef HAVE_HASH_SET
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <libkern/OSByteOrder.h> header file. */
+#undef HAVE_LIBKERN_OSBYTEORDER_H
+
+/* Define to 1 if you have the <machine/endian.h> header file. */
+#undef HAVE_MACHINE_ENDIAN_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* define if the compiler implements namespaces */
+#undef HAVE_NAMESPACES
+
+/* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */
+#undef HAVE_NDIR_H
+
+/* Define if you have POSIX threads libraries and header files. */
+#undef HAVE_PTHREAD
+
+/* define if the compiler implements pthread_rwlock_* */
+#undef HAVE_RWLOCK
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the <sys/byteorder.h> header file. */
+#undef HAVE_SYS_BYTEORDER_H
+
+/* Define to 1 if you have the <sys/dir.h> header file, and it defines `DIR'.
+ */
+#undef HAVE_SYS_DIR_H
+
+/* Define to 1 if you have the <sys/endian.h> header file. */
+#undef HAVE_SYS_ENDIAN_H
+
+/* Define to 1 if you have the <sys/isa_defs.h> header file. */
+#undef HAVE_SYS_ISA_DEFS_H
+
+/* Define to 1 if you have the <sys/ndir.h> header file, and it defines `DIR'.
+ */
+#undef HAVE_SYS_NDIR_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if the system has the type `uint32_t'. */
+#undef HAVE_UINT32_T
+
+/* Define to 1 if the system has the type `uint64_t'. */
+#undef HAVE_UINT64_T
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* define if the compiler supports unordered_{map,set} */
+#undef HAVE_UNORDERED_MAP
+
+/* Define to 1 if you have the <utime.h> header file. */
+#undef HAVE_UTIME_H
+
+/* Define to 1 if the system has the type `u_int32_t'. */
+#undef HAVE_U_INT32_T
+
+/* Define to 1 if the system has the type `u_int64_t'. */
+#undef HAVE_U_INT64_T
+
+/* define if your compiler has __attribute__ */
+#undef HAVE___ATTRIBUTE__
+
+/* Define to 1 if the system has the type `__int32'. */
+#undef HAVE___INT32
+
+/* Define to 1 if the system has the type `__int64'. */
+#undef HAVE___INT64
+
+/* The namespace to put the htmlparser code. */
+#undef HTMLPARSER_NAMESPACE
+
+/* define if first argument to InterlockedExchange is just LONG */
+#undef INTERLOCKED_EXCHANGE_NONVOLATILE
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+ */
+#undef LT_OBJDIR
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* printf format code for printing a size_t and ssize_t */
+#undef PRIdS
+
+/* printf format code for printing a size_t and ssize_t */
+#undef PRIuS
+
+/* printf format code for printing a size_t and ssize_t */
+#undef PRIxS
+
+/* Define to necessary symbol if this constant uses a non-standard name on
+ your system. */
+#undef PTHREAD_CREATE_JOINABLE
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* the namespace where STL code like vector<> is defined */
+#undef STL_NAMESPACE
+
+/* Version number of package */
+#undef VERSION
+
+/* Stops putting the code inside the Google namespace */
+#undef _END_GOOGLE_NAMESPACE_
+
+/* Puts following code inside the Google namespace */
+#undef _START_GOOGLE_NAMESPACE_
+
+
+#if defined( __MINGW32__) || defined(__MINGW64__)
+#include "windows/port.h"
+#endif
+
diff --git a/src/ctemplate/find_ptr.h.in b/src/ctemplate/find_ptr.h.in
new file mode 100644
index 0000000..e67e532
--- /dev/null
+++ b/src/ctemplate/find_ptr.h.in
@@ -0,0 +1,79 @@
+// Copyright (c) 2012, Olaf van der Spek <olafvdspek@gmail.com>
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Olaf van der Spek <olafvdspek@gmail.com>
+
+#ifndef TEMPLATE_FIND_PTR_H_
+#define TEMPLATE_FIND_PTR_H_
+
+#include <cstddef>
+
+@ac_windows_dllexport_defines@
+
+namespace ctemplate {
+
+template <class T, class U>
+const typename T::value_type* find_ptr0(const T& c, U v)
+{
+ typename T::const_iterator i = c.find(v);
+ return i == c.end() ? NULL : &*i;
+}
+
+template <class T, class U>
+typename T::value_type::second_type* find_ptr(T& c, U v)
+{
+ typename T::iterator i = c.find(v);
+ return i == c.end() ? NULL : &i->second;
+}
+
+template <class T, class U>
+const typename T::value_type::second_type* find_ptr(const T& c, U v)
+{
+ typename T::const_iterator i = c.find(v);
+ return i == c.end() ? NULL : &i->second;
+}
+
+template <class T, class U>
+typename T::value_type::second_type find_ptr2(T& c, U v)
+{
+ typename T::iterator i = c.find(v);
+ return i == c.end() ? NULL : i->second;
+}
+
+template <class T, class U>
+const typename T::value_type::second_type find_ptr2(const T& c, U v)
+{
+ typename T::const_iterator i = c.find(v);
+ return i == c.end() ? NULL : i->second;
+}
+
+}
+
+#endif // TEMPLATE_FIND_PTR_H_
diff --git a/src/ctemplate/per_expand_data.h.in b/src/ctemplate/per_expand_data.h.in
new file mode 100644
index 0000000..96e628b
--- /dev/null
+++ b/src/ctemplate/per_expand_data.h.in
@@ -0,0 +1,149 @@
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// In addition to a TemplateDictionary, there is also a PerExpandData
+// dictionary. This dictionary holds information that applies to one
+// call to Expand, such as whether to annotate the template expansion
+// output. A template dictionary is associated with a template (.tpl)
+// file; a per-expand dictionary is associated to a particular call to
+// Expand() in a .cc file.
+//
+// For (many) more details, see the doc/ directory.
+
+#ifndef TEMPLATE_PER_EXPAND_DATA_H_
+#define TEMPLATE_PER_EXPAND_DATA_H_
+
+#include <stdlib.h> // for NULL
+#include <string.h> // for strcmp
+#include <sys/types.h>
+#include @ac_cv_cxx_hash_map@
+#include <ctemplate/template_string.h> // for StringHash
+
+@ac_windows_dllexport_defines@
+
+namespace ctemplate {
+
+class TemplateModifier;
+class TemplateAnnotator;
+
+class @ac_windows_dllexport@ PerExpandData {
+ public:
+ PerExpandData()
+ : annotate_path_(NULL),
+ annotator_(NULL),
+ expand_modifier_(NULL),
+ map_(NULL) { }
+
+ ~PerExpandData();
+
+ // Indicate that annotations should be inserted during template expansion.
+ // template_path_start - the start of a template path. When
+ // printing the filename for template-includes, anything before and
+ // including template_path_start is elided. This can make the
+ // output less dependent on filesystem location for template files.
+ void SetAnnotateOutput(const char* template_path_start) {
+ annotate_path_ = template_path_start;
+ }
+
+ // Whether to annotate the expanded output.
+ bool annotate() const { return annotate_path_ != NULL; }
+
+ // The annotate-path; undefined if annotate() != true
+ const char* annotate_path() const { return annotate_path_; }
+
+ // This sets the TemplateAnnotator to be used when annotating is on.
+ // This allows you to override the default text-based annotator
+ // that will be used if you do not call this. The passed annotator
+ // will be aliased by this object and returned by annotator().
+ // Passing NULL has the special behavior of causing annotator() to
+ // revert to returning its built-in instance.
+ void SetAnnotator(TemplateAnnotator* annotator) {
+ annotator_ = annotator;
+ }
+
+ // This returns the TemplateAnnotator to be used when annotating is on.
+ // The value returned will be either an instance previously provided
+ // to SetAnnotator() or the callable built-in text-based annotator.
+ TemplateAnnotator* annotator() const;
+
+ // This is a TemplateModifier to be applied to all templates
+ // expanded via this call to Expand(). That is, this modifier is
+ // applies to the template (.tpl) file we expand, as well as
+ // sub-templates that are expanded due to {{>INCLUDE}} directives.
+ // Caller is responsible for ensuring that modifier exists for the
+ // lifetime of this object.
+ void SetTemplateExpansionModifier(const TemplateModifier* modifier) {
+ expand_modifier_ = modifier;
+ }
+
+ const TemplateModifier* template_expansion_modifier() const {
+ return expand_modifier_;
+ }
+
+ // Store data in this structure, to be used by template modifiers
+ // (see template_modifiers.h). Call with value set to NULL to clear
+ // any value previously set. Caller is responsible for ensuring key
+ // and value point to valid data for the lifetime of this object.
+ void InsertForModifiers(const char* key, const void* value);
+
+ // Retrieve data specific to this Expand call. Returns NULL if key
+ // is not found. This should only be used by template modifiers.
+ const void* LookupForModifiers(const char* key) const;
+
+ // Same as Lookup, but casts the result to a c string.
+ const char* LookupForModifiersAsString(const char* key) const {
+ return static_cast<const char*>(LookupForModifiers(key));
+ }
+
+ private:
+#ifdef _MSC_VER
+ typedef @ac_cv_cxx_hash_map_class@<const char*, const void*, StringHash> DataMap;
+#else
+ struct DataEq {
+ bool operator()(const char* s1, const char* s2) const;
+ };
+ typedef @ac_cv_cxx_hash_map_class@<const char*, const void*, StringHash, DataEq>
+ DataMap;
+#endif
+
+ const char* annotate_path_;
+ TemplateAnnotator* annotator_;
+ const TemplateModifier* expand_modifier_;
+ DataMap* map_;
+
+ PerExpandData(const PerExpandData&); // disallow evil copy constructor
+ void operator=(const PerExpandData&); // disallow evil operator=
+};
+
+}
+
+#endif // TEMPLATE_PER_EXPAND_DATA_H_
diff --git a/src/ctemplate/str_ref.h.in b/src/ctemplate/str_ref.h.in
new file mode 100644
index 0000000..68b591a
--- /dev/null
+++ b/src/ctemplate/str_ref.h.in
@@ -0,0 +1,129 @@
+// Copyright (c) 2012, Olaf van der Spek <olafvdspek@gmail.com>
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Olaf van der Spek <olafvdspek@gmail.com>
+
+#ifndef TEMPLATE_STR_REF_H_
+#define TEMPLATE_STR_REF_H_
+
+#include <cstddef>
+
+@ac_windows_dllexport_defines@
+
+namespace ctemplate {
+
+template <class T>
+class str_ref_basic
+{
+public:
+ str_ref_basic()
+ {
+ clear();
+ }
+
+ template <class U>
+ str_ref_basic(const U& c)
+ {
+ if (c.end() != c.begin())
+ assign(&*c.begin(), c.end() - c.begin() + &*c.begin());
+ else
+ clear();
+ }
+
+ str_ref_basic(const void* b, const void* e)
+ {
+ assign(b, e);
+ }
+
+ str_ref_basic(const void* b, size_t sz)
+ {
+ assign(b, sz);
+ }
+
+ str_ref_basic(const char* b)
+ {
+ if (b)
+ assign(b, strlen(b));
+ else
+ clear();
+ }
+
+ void clear()
+ {
+ begin_ = end_ = NULL;
+ }
+
+ void assign(const void* b, const void* e)
+ {
+ begin_ = reinterpret_cast<T>(b);
+ end_ = reinterpret_cast<T>(e);
+ }
+
+ void assign(const void* b, size_t sz)
+ {
+ begin_ = reinterpret_cast<T>(b);
+ end_ = begin_ + sz;
+ }
+
+ T begin() const
+ {
+ return begin_;
+ }
+
+ T end() const
+ {
+ return end_;
+ }
+
+ T data() const
+ {
+ return begin();
+ }
+
+ size_t size() const
+ {
+ return end() - begin();
+ }
+
+ bool empty() const
+ {
+ return begin() == end();
+ }
+private:
+ T begin_;
+ T end_;
+};
+
+typedef str_ref_basic<const unsigned char*> data_ref;
+typedef str_ref_basic<const char*> str_ref;
+
+}
+
+#endif // TEMPLATE_STR_REF_H_
diff --git a/src/ctemplate/template.h.in b/src/ctemplate/template.h.in
new file mode 100644
index 0000000..c2201f8
--- /dev/null
+++ b/src/ctemplate/template.h.in
@@ -0,0 +1,483 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+//
+// This file implements the Template class. For information about
+// how to use this class, and to write the templates it takes as input,
+// see the doc/ directory.
+
+#ifndef CTEMPLATE_TEMPLATE_H_
+#define CTEMPLATE_TEMPLATE_H_
+
+#include <time.h> // for time_t
+#include <string>
+#include <ctemplate/template_cache.h>
+#include <ctemplate/template_enums.h>
+#include <ctemplate/template_string.h>
+
+// We include this just so folks don't have to include both template.h
+// and template_dictionary.h, or template_namelist.h etc, to use the
+// template system; we don't actually use anything in these files
+// ourselves.
+#if 1
+#include <ctemplate/template_dictionary.h>
+#include <ctemplate/template_namelist.h>
+#include <ctemplate/per_expand_data.h>
+#else
+@ac_google_start_namespace@
+class TemplateDictionaryInterface;
+class PerExpandData;
+}
+#endif
+
+namespace ctemplate_htmlparser {
+class HtmlParser;
+}
+
+@ac_windows_dllexport_defines@
+
+namespace ctemplate {
+
+// These free functions form the "simple" template API, and support
+// the most common operations (expanding a template from a file, and
+// from a string). They all just delegate to a default instance of
+// the TemplateCache object.
+//
+// For more sophisticated use of the template system, you may need
+// to create your own TemplateCache object, and work directly with
+// it. See template_cache.h for details.
+
+extern @ac_windows_dllexport@ const TemplateCache* default_template_cache();
+extern @ac_windows_dllexport@ TemplateCache* mutable_default_template_cache();
+
+
+// ---- EXPANDING A TEMPLATE -------
+// ExpandTemplate
+// ExpandWithData
+
+// Loads the template named filename from disk if necessary -- it
+// gets it from the cache instead, if the template had been loaded
+// before or if it had been put explicitly in the cache via a call
+// to StringToTemplateCache() -- and expands it using the given
+// dictionary.
+// The first version is the most general, followed by common-case code.
+inline bool ExpandTemplate(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface *dictionary,
+ ExpandEmitter* output) {
+ return mutable_default_template_cache()->ExpandWithData(
+ filename, strip, dictionary, NULL, output);
+}
+inline bool ExpandTemplate(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface* dictionary,
+ std::string* output_buffer) {
+ return mutable_default_template_cache()->ExpandWithData(
+ filename, strip, dictionary, NULL, output_buffer);
+}
+
+// If you want any per-expand data to be used at expand time, call
+// this routine instead of Expand. You pass in an extra
+// PerExpandData structure (see per_expand_data.h) which sets this
+// data: whether or not you want the template to be annotated, and
+// any data you want to pass in to template modifers. If
+// per_expand_data is NULL, this is exactly the same as Expand().
+// The first version is the most general, followed by common-case code.
+inline bool ExpandWithData(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data,
+ ExpandEmitter* output) {
+ return mutable_default_template_cache()->ExpandWithData(
+ filename, strip, dictionary, per_expand_data, output);
+}
+inline bool ExpandWithData(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface* dictionary,
+ PerExpandData* per_expand_data,
+ std::string* output_buffer) {
+ return mutable_default_template_cache()->ExpandWithData(
+ filename, strip, dictionary, per_expand_data, output_buffer);
+}
+
+// ---- INSERTING INTO THE CACHE -------
+// LoadTemplate
+// StringToTemplateCache
+
+// Reads a file from disk and inserts it into the template, if it's
+// not already there. Returns true on success or false if the
+// template could not be found, or could not be parsed. It's never
+// necessary to call this -- Expand() will load templates lazily if
+// needed -- but you may want to if you want to make sure templates
+// exist before trying to expand them, or because you want to
+// control disk access patterns, or for some other reason.
+inline bool LoadTemplate(const TemplateString& filename, Strip strip) {
+ return mutable_default_template_cache()->LoadTemplate(filename, strip);
+}
+
+// Inserts the given string into the default template cache, as if
+// it were a file read from disk. You can call Expand() with its
+// first arg (filename) the same as the key you use here. You can
+// also use this key as the 'filename' for sub-included templates,
+// in TemplateDictionary::SetFilename().
+inline bool StringToTemplateCache(const TemplateString& key,
+ const TemplateString& content,
+ Strip strip) {
+ return mutable_default_template_cache()->StringToTemplateCache(
+ key, content, strip);
+}
+inline bool StringToTemplateCache(const TemplateString& key,
+ const char* content, size_t content_len,
+ Strip strip) {
+ return mutable_default_template_cache()->StringToTemplateCache(
+ key, content, content_len, strip);
+}
+
+
+// ---------------------------------------------------------------------
+// The follow are deprecated.
+// TODO(csilvers): move to parsed_template.h
+
+// TemplateState of a template is:
+// - TS_EMPTY before parsing is complete,
+// - TS_ERROR if a syntax error was found during parsing, and
+// - TS_READY if parsing has completed successfully
+// (TS_UNUSED is not used)
+enum TemplateState { TS_UNUSED, TS_EMPTY, TS_ERROR, TS_READY };
+
+// Used for Auto-Escape. It represents the different contexts a template may
+// be initialized in via the AUTOESCAPE pragma in the template file
+// (or string). It is only public for testing. The contexts are:
+// - TC_HTML: The template contains HTML code. Need not be a complete HTML
+// page just content the browser interprets in the context of
+// HTML parsing. This should be the most common context to use.
+// This mode activates our HTML parser.
+// - TC_JS: The template contains raw javascript. If your template
+// starts with <script> tag, it is of type TC_HTML not TC_JS.
+// TC_JS is typically associated with a content-type of
+// text/javascript. This mode activates our HTML parser.
+// - TC_CSS: The template contains CSS (cascaded style-sheet). If your
+// template starts with a <style> tag, it is of type TC_HTML
+// not TC_CSS. A TC_CSS template is typically associated with a
+// text/css content-type header. Currently treated same as
+// TC_HTML but don't rely on that. We may later develop
+// CSS-specific sanitizers and parsers.
+// - TC_JSON: The template contains raw JSON. Applies javascript_escape
+// to variables. Note: javascript_escape is safer than
+// json_escape which we may want to remove.
+// - TC_XML: The template contains raw XML. Applies xml_escape to variables.
+// CAUTION: This mode is not suitable for cases where the
+// application data encapsulated in XML requires special
+// escaping, such as the case of XHTML.
+// TC_XML is typically associated with text/xml content-type.
+// - TC_MANUAL: Equivalent to not specifying auto-escaping at all.
+//
+// TODO(csilvers): Make this a private part of the Template class.
+enum TemplateContext { TC_UNUSED, TC_HTML, TC_JS, TC_CSS, TC_JSON,
+ TC_XML, TC_MANUAL };
+
+
+// This class is deprecated. Old code uses this class heavily (via
+// GetTemplate() to obtain a Template*, and then methods on that
+// Template*) but new code should use the free functions above.
+class @ac_windows_dllexport@ Template {
+ public:
+ // ---- METHODS FOR TOOLS ----
+ // These are not intended for normal use, but are public so a
+ // tool can use them.
+
+ // Used by make_tpl_varnames_h.cc.
+ void WriteHeaderEntries(std::string *outstring) const;
+
+ // ---- DEPRECATED METHODS ----
+ // These methods used to be the primary way of using the Template
+ // object, but have been deprecated in favor of the (static)
+ // methods above. If you are using these deprecated methods,
+ // consider moving to the above methods instead, or to moving to
+ // using your own TemplateCache (which supports richer operations
+ // on parsed templates).
+
+ // Loads a template from disk or cache or string, and returns the Template*.
+ // INSTEAD, use the static Expand that takes a filename.
+ static Template *GetTemplate(const TemplateString& filename, Strip strip);
+ virtual ~Template(); // when the time comes to delete these Template*'s.
+
+ // Parses a string immediately and returns the resulting Template*.
+ // You can call the (deprecated) non-static Expand() method on this
+ // template in order to expand it with a dictionary. You are
+ // responsible for deleting the Template* when you are done with it.
+ // INSTEAD, use StringToTemplateCache (with a key) plus the static Expand().
+ // TOOO(csilvers): return a const Template* instead.
+ static Template* StringToTemplate(const TemplateString& content,
+ Strip strip);
+ static Template* StringToTemplate(const char* content, size_t content_len,
+ Strip strip) {
+ return StringToTemplate(TemplateString(content, content_len), strip);
+ }
+
+ // Non-static Expand*() works on a Template* returned from GetTemplate().
+ // INSTEAD, use static expand with a filename (or key-name for strings).
+ bool ExpandWithData(ExpandEmitter* output,
+ const TemplateDictionaryInterface* dictionary,
+ PerExpandData* per_expand_data) const {
+ return ExpandWithDataAndCache(output, dictionary, per_expand_data,
+ default_template_cache());
+ }
+ bool ExpandWithData(std::string* output_buffer,
+ const TemplateDictionaryInterface* dictionary,
+ PerExpandData* per_expand_data) const {
+ if (output_buffer == NULL) return false;
+ StringEmitter e(output_buffer);
+ return ExpandWithData(&e, dictionary, per_expand_data);
+ }
+ bool Expand(ExpandEmitter* output,
+ const TemplateDictionaryInterface* dictionary) const {
+ return ExpandWithData(output, dictionary, NULL);
+ }
+ bool Expand(std::string* output_buffer,
+ const TemplateDictionaryInterface* dictionary) const {
+ return ExpandWithData(output_buffer, dictionary, NULL);
+ }
+
+ // Dump to stdout or a string. filename is just used to annotate output.
+ void Dump(const char *filename) const;
+ void DumpToString(const char *filename, std::string *out) const;
+
+ // Retrieves the state, template-file, or strip mode of this Template.
+ TemplateState state() const;
+ const char *template_file() const;
+ const char *original_filename() const;
+ Strip strip() const;
+
+ // Work at the level of groups of templates, so just call through to
+ // the default TemplateCache; see template_cache.h for what these do.
+ // INSTEAD, create your own TemplateCache and call these methods on that.
+ static bool SetTemplateRootDirectory(const std::string& dir) {
+ return mutable_default_template_cache()->SetTemplateRootDirectory(dir);
+ }
+ static bool AddAlternateTemplateRootDirectory(const std::string& dir) {
+ return mutable_default_template_cache()->AddAlternateTemplateRootDirectory(
+ dir);
+ }
+ static std::string template_root_directory() {
+ return default_template_cache()->template_root_directory();
+ }
+ static std::string FindTemplateFilename(
+ const std::string& unresolved) {
+ return default_template_cache()->FindTemplateFilename(unresolved);
+ }
+ static void RemoveStringFromTemplateCache(const std::string& key) {
+ mutable_default_template_cache()->Delete(key);
+ }
+ static void ClearCache() {
+ mutable_default_template_cache()->ClearCache();
+ }
+ static void ReloadAllIfChanged() {
+ mutable_default_template_cache()->ReloadAllIfChanged(
+ TemplateCache::LAZY_RELOAD);
+ }
+
+ // ---- EXTRA-DEPRECATED METHODS ----
+ // These methods were deprecated even before the move to
+ // TemplateCache. We'd really like you to move from these to one
+ // of the "approved" methods, or even one of the deprecated
+ // methods. Comments here don't even describe what these
+ // functions do, just how to transition off of using them.
+
+ // INSTEAD, use the StringToTemplateCache function that takes the strip mode.
+ static bool StringToTemplateCache(const TemplateString& key,
+ const TemplateString& content);
+ static bool StringToTemplateCache(const TemplateString& key,
+ const char* content, size_t content_len) {
+ return StringToTemplateCache(key, TemplateString(content, content_len));
+ }
+ // This is to prevent against typos: you want the global (free-function)
+ // StringToTemplateCache here, not the one in Template.
+ static bool StringToTemplateCache(const TemplateString& key,
+ const char* content, Strip);
+
+ protected:
+ friend class SectionTemplateNode; // for access to set_state(), ParseState
+ friend class TemplateTemplateNode; // for recursive call to Expand()
+
+ // Template constructor
+ // Reads the template file and parses it into a parse tree of TemplateNodes
+ // by calling the method ReloadIfChanged
+ // The top node is a section node with the arbitrary name "__{{MAIN}}__"
+ // 'Strip' indicates how to handle whitespace when expanding the
+ // template. DO_NOT_STRIP keeps the template exactly as-is.
+ // STRIP_BLANK_LINES elides all blank lines in the template.
+ // STRIP_WHITESPACE elides all blank lines, and also all whitespace
+ // at either the beginning or end of a line. It also removes
+ // any linefeed (possibly following whitespace) that follows a closing
+ // '}}' of any kind of template marker EXCEPT a template variable.
+ // This means a linefeed may be removed anywhere by simply placing
+ // a comment marker as the last element on the line.
+ // These two options allow the template to include whitespace for
+ // readability without adding to the expanded output.
+ Template(const TemplateString& filename, Strip strip, TemplateCache* owner);
+
+ // MaybeInitHtmlParser
+ // In TemplateContexts where the HTML parser is needed, we
+ // initialize it in the appropriate mode. Also we do a sanity
+ // check (cannot fail) on the template filename. This function is
+ // called at most once for a Template. In_tag is only meaningful
+ // for TC_HTML: It is true for templates that start inside an HTML
+ // tag and hence are expected to contain HTML attribute name/value
+ // pairs only. It is false for standard HTML templates.
+ void MaybeInitHtmlParser(bool in_tag);
+
+ // BuildTree
+ // Parses the contents of the file (retrieved via ReloadIfChanged)
+ // and stores the resulting parse structure in tree_. Returns true
+ // iff the tree-builder encountered no errors. Note: takes
+ // ownership of input_buffer, and will delete it. It should have
+ // been created via new[].
+ bool BuildTree(const char *input_buffer, const char* input_buffer_end);
+
+ // Internal version of ReloadIfChanged, used when the function already
+ // has a write-lock on g_template_mutex.
+ bool ReloadIfChangedLocked();
+
+ // set_state
+ // Sets the state of the template. Used during BuildTree().
+ void set_state(TemplateState new_state);
+
+ // StripBuffer
+ // Modifies buffer in-place based on the strip_ mode, to remove
+ // extra whitespace. May delete[] the input buffer and replace
+ // it with a new buffer. Used by ReloadIfChanged().
+ void StripBuffer(char **buffer, size_t* len);
+
+ // The file we originally got from the Template() constructor
+ const std::string original_filename_;
+ // The pathname as fully resolved on the filesystem
+ std::string resolved_filename_;
+ time_t filename_mtime_; // lastmod time for filename last time we loaded it
+
+ // What to do with whitespace at template-expand time
+ Strip strip_;
+
+ // Keeps track of where we are in reloading, or if there was an error loading
+ TemplateState state_;
+
+ // The cache we got this template from. This is not well-defined: a
+ // Template can be in more than one cache.
+ // TODO(csilvers): remove this once we deprecate the one user, which
+ // is ReloadIfChanged.
+ TemplateCache* template_cache_;
+
+ // The current template-contents, as read from the file
+ const char* template_text_;
+ int template_text_len_;
+
+ // The current parsed template structure. Has pointers into template_text_.
+ class SectionTemplateNode *tree_; // defined in template.cc
+
+ // Template markers have the form {{VARIABLE}}, etc. These constants
+ // define the {{ and }} that delimit template markers.
+ struct @ac_windows_dllexport@ MarkerDelimiters {
+ const char* start_marker;
+ size_t start_marker_len;
+ const char* end_marker;
+ size_t end_marker_len;
+
+ MarkerDelimiters() {
+ start_marker = "{{"; // The default start-marker
+ start_marker_len = strlen(start_marker);
+ end_marker = "}}";
+ end_marker_len = strlen(end_marker);
+ }
+ };
+
+ // The current parsing state. Used in BuildTree() and subroutines
+ struct @ac_windows_dllexport@ ParseState {
+ const char* bufstart;
+ const char* bufend;
+ enum { PS_UNUSED, GETTING_TEXT, GETTING_NAME } phase;
+ MarkerDelimiters current_delimiters;
+ ParseState()
+ : bufstart(NULL), bufend(NULL), phase(PS_UNUSED), current_delimiters()
+ {}
+ };
+ ParseState parse_state_;
+
+ // All templates are initialized to TC_MANUAL (no Auto-Escape). Then,
+ // during template parsing (BuildTree()), if an AUTOESCAPE pragma is
+ // encountered, the context changes appropriately.
+ TemplateContext initial_context_;
+ // Non-null if the template was initialized in an Auto-Escape mode that
+ // requires a parser (currently TC_HTML, TC_CSS and TC_JS).
+ ctemplate_htmlparser::HtmlParser *htmlparser_;
+
+ // A sorted list of trusted variable names, declared here because a unittest
+ // needs to verify that it is appropriately sorted (an unsorted array would
+ // lead to the binary search of this array failing).
+ static const char * const kSafeWhitelistedVariables[];
+ static const size_t kNumSafeWhitelistedVariables;
+
+ private:
+ friend class TemplateCache;
+ friend class TemplateCachePeer; // to access num_deletes_
+
+ // Internal implementation of Expand
+ bool ExpandWithDataAndCache(ExpandEmitter* output,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data,
+ const TemplateCache* cache) const;
+
+ // This is called for recursive expands, when we already hold template_lock.
+ bool ExpandLocked(ExpandEmitter* output,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data,
+ const TemplateCache* cache) const;
+
+ // Returns the lastmod time in mtime_
+ // For string-based templates, not backed by a file, this returns 0
+ time_t mtime() const;
+
+ // These are helper routines to StripFile. I would make them static
+ // inside template.cc, but they use the MarerDelimiters struct.
+ static bool ParseDelimiters(const char* text, size_t textlen,
+ MarkerDelimiters* delim);
+ static bool IsBlankOrOnlyHasOneRemovableMarker(const char** line, size_t* len,
+ const MarkerDelimiters& delim);
+ static size_t InsertLine(const char *line, size_t len, Strip strip,
+ const MarkerDelimiters& delim, char* buffer);
+
+ // This is only used by template_cache_test, via TemplateCachePeer.
+ static int num_deletes() { return num_deletes_; }
+
+ static int num_deletes_; // how many times the destructor has been called
+
+ // Can't invoke copy constructor or assignment operator
+ Template(const Template&);
+ void operator=(const Template &);
+};
+
+}
+
+#endif // CTEMPLATE_TEMPLATE_H_
diff --git a/src/ctemplate/template_annotator.h.in b/src/ctemplate/template_annotator.h.in
new file mode 100644
index 0000000..29c703a
--- /dev/null
+++ b/src/ctemplate/template_annotator.h.in
@@ -0,0 +1,136 @@
+// Copyright (c) 2009, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+//
+// The template expansion system provides a set of hooks that allow for
+// the insertion of diagnostic content into the output stream for the use
+// by content developers and testers. For instance, the default text
+// annotation mode inserts strings bracketed by {{ }} to describe every
+// occurrence of a dynamic substitution feature. That mode turns the
+// rendering into a logical text description of the construction of
+// template-based output. It is useful for regression testing of output
+// in conjunction with text-based diffing tools.
+//
+// An annotation mode is implemented with the TemplateAnnotator interface.
+// When template annotation is turned on, then during template expansion
+// whenever a dynamic substitution feature is encountered, a call is made
+// to one of the TemplateAnnotator functions. In response to a call
+// an implementation can render any additional content into the passed
+// emitter, which is the same emitter that the rendering output is going
+// to.
+//
+// Template annotation is turned on and the template annotator subclass
+// set by methods in @ac_google_namespace@::PerExpandData.
+
+#ifndef TEMPLATE_TEMPLATE_ANNOTATOR_H_
+#define TEMPLATE_TEMPLATE_ANNOTATOR_H_
+
+#include <string>
+
+@ac_windows_dllexport_defines@
+
+namespace ctemplate {
+
+class ExpandEmitter;
+
+// This is the abstract interface for an annotation mode. A new annotation
+// mode is introduced by subclassing and implementing each function
+// to add annotation content. There is one function for each internal
+// template expansion event type. The emitter argument passed to the
+// function is the same stream that the expanding content is being output to;
+// so the action of an implementation will be to add additional inline
+// content. The emitter argument is never to be remembered beyond each
+// function call.
+class @ac_windows_dllexport@ TemplateAnnotator {
+ public:
+ TemplateAnnotator() { }
+ virtual ~TemplateAnnotator() { }
+
+ // Called before processing a subtemplate include marker.
+ // Passed value is the include marker name.
+ virtual void EmitOpenInclude(ExpandEmitter* emitter, const std::string& value) = 0;
+ // Called after processing a subtemplate include marker.
+ virtual void EmitCloseInclude(ExpandEmitter* emitter) = 0;
+
+ // Called before opening a template or subtemplate file for processing.
+ // Passed value is the filename.
+ virtual void EmitOpenFile(ExpandEmitter* emitter, const std::string& value) = 0;
+ // Called after processing a template or subtemplate file.
+ virtual void EmitCloseFile(ExpandEmitter* emitter) = 0;
+
+ // Called before processing a section.
+ // Passed value is the section name.
+ virtual void EmitOpenSection(ExpandEmitter* emitter, const std::string& value) = 0;
+ // Called after processing a section.
+ virtual void EmitCloseSection(ExpandEmitter* emitter) = 0;
+
+ // Called before processing a variable marker.
+ // Passed value is the variable name.
+ virtual void EmitOpenVariable(ExpandEmitter* emitter,
+ const std::string& value) = 0;
+ // Called after processing a variable marker.
+ virtual void EmitCloseVariable(ExpandEmitter* emitter) = 0;
+
+ virtual void EmitFileIsMissing(ExpandEmitter* emitter,
+ const std::string& value) = 0;
+
+ private:
+ // Can't invoke copy constructor or assignment operator
+ TemplateAnnotator(const TemplateAnnotator&);
+ void operator=(const TemplateAnnotator&);
+};
+
+// This is a concrete template annotator class that inserts annotations
+// that have a standard text form bracketed by {{ }}. It is used as
+// the default annotation implementation when annotation is turned on
+// by PerExpandData and no annotator type is specified.
+class @ac_windows_dllexport@ TextTemplateAnnotator : public TemplateAnnotator {
+ public:
+ TextTemplateAnnotator() { }
+ virtual void EmitOpenInclude(ExpandEmitter* emitter, const std::string& value);
+ virtual void EmitCloseInclude(ExpandEmitter* emitter);
+ virtual void EmitOpenFile(ExpandEmitter* emitter, const std::string& value);
+ virtual void EmitCloseFile(ExpandEmitter* emitter);
+ virtual void EmitOpenSection(ExpandEmitter* emitter, const std::string& value);
+ virtual void EmitCloseSection(ExpandEmitter* emitter);
+ virtual void EmitOpenVariable(ExpandEmitter* emitter, const std::string& value);
+ virtual void EmitCloseVariable(ExpandEmitter* emitter);
+ virtual void EmitFileIsMissing(ExpandEmitter* emitter,
+ const std::string& value);
+
+ private:
+ // Can't invoke copy constructor or assignment operator
+ TextTemplateAnnotator(const TextTemplateAnnotator&);
+ void operator=(const TextTemplateAnnotator&);
+};
+
+}
+
+#endif // TEMPLATE_TEMPLATE_ANNOTATOR_H_
diff --git a/src/ctemplate/template_cache.h.in b/src/ctemplate/template_cache.h.in
new file mode 100644
index 0000000..998eff2
--- /dev/null
+++ b/src/ctemplate/template_cache.h.in
@@ -0,0 +1,368 @@
+// Copyright (c) 2009, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+//
+// This file implements the Template Cache used to store templates.
+
+#ifndef TEMPLATE_TEMPLATE_CACHE_H_
+#define TEMPLATE_TEMPLATE_CACHE_H_
+
+#include @ac_cv_cxx_hash_map@ // for @ac_cv_cxx_hash_map_class@<>
+#include <string> // for string
+#include <utility> // for pair
+#include <vector> // for vector<>
+#include <ctemplate/template_emitter.h> // for ExpandEmitter, etc
+#include <ctemplate/template_enums.h> // for Strip
+#include <ctemplate/template_string.h>
+#include <ctemplate/per_expand_data.h>
+@ac_google_start_namespace@
+class FileStat;
+}
+class Mutex;
+class TemplateCacheUnittest;
+
+@ac_windows_dllexport_defines@
+
+namespace ctemplate {
+
+class PerExpandData;
+class Template;
+class TemplateCachePeer;
+class TemplateDictionaryInterface;
+
+// A cache to store parsed templates.
+class @ac_windows_dllexport@ TemplateCache {
+ public:
+ TemplateCache();
+ ~TemplateCache();
+
+ // ---- CREATING A TEMPLATE OBJECT -------
+ // LoadTemplate
+ // StringToTemplateCache
+
+ // Attempts to load the template object stored under its filename,
+ // into the template cache. It first checks if the object is already
+ // in the cache. Any object retrieved from the cache is then
+ // checked to see if its status is marked for "reload if changed."
+ // If so, ReloadIfChanged is called on the retrieved object. Returns
+ // true if the object is loaded. Also returns true if the object
+ // already exists, and no reload was required.
+ //
+ // When it fails to retrieve one from the cache, it creates a new
+ // template object, passing the filename and 'strip' values to the
+ // constructor. (See constructor below for the meaning of the
+ // flags.) If it succeeds in creating an object, including loading
+ // and parsing the associated template file, the object is stored in
+ // the cache, and the method returns true.
+ //
+ // If it fails in loading and parsing the template file, either
+ // because the file was not found or it contained syntax errors,
+ // then the newly created object is deleted and the method returns
+ // false. (NOTE: This description is much longer and less precise
+ // and probably harder to understand than the method itself. Read
+ // the code.)
+ //
+ // To enable Auto-Escape on that template, place the corresponding
+ // AUTOESCAPE pragma at the top of the template file. The template
+ // will then be Auto-Escaped independently of the template it may be
+ // included from or the templates it may include.
+ //
+ // 'Strip' indicates how to handle whitespace when expanding the
+ // template. DO_NOT_STRIP keeps the template exactly as-is.
+ // STRIP_BLANK_LINES elides all blank lines in the template.
+ // STRIP_WHITESPACE elides all blank lines, and also all whitespace
+ // at either the beginning or end of a line. See template constructor
+ // for more details.
+ bool LoadTemplate(const TemplateString& filename, Strip strip);
+
+ // Parses the string as a template file (e.g. "Hello {{WORLD}}"),
+ // and inserts it into the parsed template cache, so it can later be
+ // used by the user. The user specifies a key and a strip, which are
+ // later passed in to expand the template.
+ // Returns true if the template was successfully parsed and
+ // inserted to the template cache, or false otherwise. In particular,
+ // we return false if a string was already cached with the given key.
+ // NOTE: to include this template from within another template (via
+ // "{{>TEMPLATE_THAT_COMES_FROM_A_STRING}}"), the argument you pass
+ // to TemplateDictionary::SetFilename() is the key you used to register
+ // the string-template.
+ bool StringToTemplateCache(const TemplateString& key,
+ const TemplateString& content,
+ Strip strip);
+ bool StringToTemplateCache(const TemplateString& key,
+ const char* content,
+ size_t content_len,
+ Strip strip) {
+ return StringToTemplateCache(key,
+ TemplateString(content, content_len),
+ strip);
+ }
+
+ // ---- EXPANDING A TEMPLATE -------
+ // ExpandWithData
+ // ExpandFrozen
+
+ // This returns false if the expand failed for some reason: filename
+ // could not be found on disk (and isn't already in the cache), or
+ // the template is mal-formed, or a sub-included template couldn't
+ // be found. Note that even if it returns false, it may have emitted
+ // some output to ExpandEmitter, before it noticed the problem.
+ bool ExpandWithData(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data,
+ ExpandEmitter* output);
+ bool ExpandWithData(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface* dictionary,
+ PerExpandData* per_expand_data,
+ std::string* output_buffer) {
+ if (output_buffer == NULL) return false;
+ StringEmitter e(output_buffer);
+ return ExpandWithData(filename, strip, dictionary, per_expand_data, &e);
+ }
+
+ // Const version of ExpandWithData, intended for use with frozen
+ // caches. This method returns false if the requested
+ // template-filename is not found in the cache, rather than fetching
+ // the template from disk and continuing, as ExpandWithData does.
+ // (That is why the method can be const.) Likewise, it will return
+ // false, rather than fetch, if any sub-included template filename
+ // is not found in the cache.
+ // Unfortunately, the only way to enforce this last requirement at
+ // the moment is to have the template-cache be Frozen(). So that
+ // is a pre-requisite for calling this method. It may be relaxed
+ // in the future (if we rewrite the control flow to pass around the
+ // necessary state).
+ // Like ExpandWithData(), this may write partial results into output
+ // even if it returns false (due to template error or file not found).
+ bool ExpandNoLoad(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data,
+ ExpandEmitter* output) const;
+ bool ExpandNoLoad(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface* dictionary,
+ PerExpandData* per_expand_data,
+ std::string* output_buffer) const {
+ if (output_buffer == NULL) return false;
+ StringEmitter e(output_buffer);
+ return ExpandNoLoad(filename, strip, dictionary, per_expand_data, &e);
+ }
+
+ // ---- FINDING A TEMPLATE FILE -------
+
+ // Sets the root directory for all templates used by the program.
+ // After calling this method, the filename passed to GetTemplate may
+ // be a relative pathname (no leading '/'), in which case this
+ // root-directory is prepended to the filename. This clears the old
+ // 'main' root directory, and also all alternate root directories
+ // that may had been added previously.
+ bool SetTemplateRootDirectory(const std::string& directory);
+
+ // Adds an additional search path for all templates used by the
+ // program. You may call this multiple times.
+ bool AddAlternateTemplateRootDirectory(const std::string& directory);
+
+ // Returns the 'main' root directory set by SetTemplateRootDirectory().
+ std::string template_root_directory() const;
+
+ // Given an unresolved filename, look through the template search
+ // path to see if the template can be found. If so, return the path
+ // of the resolved filename, otherwise return an empty string.
+ std::string FindTemplateFilename(const std::string& unresolved)
+ const;
+
+ // ---- MANAGING THE CACHE -------
+ // Freeze
+ // Delete
+ // ClearCache
+ // ReloadAllIfChanged
+ // Clone
+
+ // Marks the template cache as immutable. After this method is called,
+ // the cache can no longer be modified by loading new templates or
+ // reloading existing templates. During expansion only cached
+ // included templates will be used, they won't be loaded on-demand.
+ void Freeze();
+
+ // Delete
+ // Deletes one template object from the cache, if it exists.
+ // This can be used for either file- or string-based templates.
+ // Returns true if the object was deleted, false otherwise.
+ bool Delete(const TemplateString& key);
+
+ // ClearCache
+ // Deletes all the template objects in the cache and all raw
+ // contents cached from StringToTemplateCache. This should only
+ // be done once, just before exiting the program and after all
+ // template expansions are completed. (If you want to refresh the
+ // cache, the correct method to use is ReloadAllIfChanged, not
+ // this one.) Note: this method is not necessary unless you are
+ // testing for memory leaks. Calling this before exiting the
+ // program will prevent unnecessary reporting in that case.
+ void ClearCache();
+
+ // ReloadAllIfChanged
+ // If IMMEDIATE_RELOAD, reloads and parses all templates right away,
+ // if the corresponding template files have changed.
+ // If LAZY_RELOAD, then sets the reload bit on all templates.
+ // Subsequent call to GetTemplate() checks if file has changed, and if so
+ // reloads and parses the file into the cache.
+ //
+ // IMMEDIATE_RELOAD gives a more consistent snapshot of the current
+ // templates, since all templates in the cache are reloaded at
+ // (approximately) the same time. On the other hand, LAZY_RELOAD
+ // causes less of a latency spike, since it does not require
+ // loading lots of templates from disk at the same time. If in
+ // doubt, LAZY_RELOAD is probably a better choice.
+
+ // If a file with the same name as an existing template-file, is added
+ // in another search path, ReloadAllIfChanged will pick up the file in the
+ // earlier search-path.
+ enum ReloadType { LAZY_RELOAD, IMMEDIATE_RELOAD };
+ void ReloadAllIfChanged(ReloadType reload_tyle);
+
+ // Clone
+ // Returns a copy of the cache. It makes a shallow copy of the
+ // parsed_template_cache_, incrementing refcount of templates.
+ // The caller is responsible for deallocating the returned TemplateCache.
+ // NOTE(user): Annotalysis expects this method to have a lock for
+ // a TemplateCache instance local to the method, but we
+ // know that no other threads will have access to the
+ // instance, so ignore thread safety errors.
+ TemplateCache* Clone() const;
+
+ // ---- INSPECTING THE CACHE -------
+ // Dump
+ // DumpToString
+ // TODO(csilvers): implement these?
+
+ private:
+ // TODO(csilvers): nix Template friend once Template::ReloadIfChanged is gone
+ friend class Template; // for ResolveTemplateFilename
+ friend class TemplateTemplateNode; // for ExpandLocked
+ friend class TemplateCachePeer; // for unittests
+ friend class ::TemplateCacheUnittest; // for unittests
+
+ class RefcountedTemplate;
+ struct CachedTemplate;
+ class TemplateCacheHash;
+ class RefTplPtrHash;
+ // due to a bug(?) in MSVC, TemplateCachePeer won't compile unless this
+ // particular typedef is public. Ugh.
+ public:
+ typedef std::pair<TemplateId, int> TemplateCacheKey;
+ private:
+ typedef @ac_cv_cxx_hash_map_class@<TemplateCacheKey, CachedTemplate, TemplateCacheHash>
+ TemplateMap;
+ typedef @ac_cv_cxx_hash_map_class@<RefcountedTemplate*, int, RefTplPtrHash> TemplateCallMap;
+ // Where to search for files.
+ typedef std::vector<std::string> TemplateSearchPath;
+
+ // GetTemplate
+ // This method is deprecated. It exists here because it is called by
+ // Template::GetTemplate. Also this is used in tests.
+ const Template* GetTemplate(const TemplateString& key, Strip strip);
+
+ bool ResolveTemplateFilename(const std::string& unresolved,
+ std::string* resolved,
+ FileStat* statbuf) const;
+
+ // This is used only for internal (recursive) calls to Expand due
+ // to internal template-includes. It doesn't try to acquire the
+ // global template_lock again, in template.cc.
+ // TODO(csilvers): remove this when template.cc's g_template_lock goes away.
+ bool ExpandLocked(const TemplateString& filename, Strip strip,
+ ExpandEmitter* output,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data);
+
+ bool AddAlternateTemplateRootDirectoryHelper(
+ const std::string& directory,
+ bool clear_template_search_path);
+
+ // DoneWithGetTemplatePtrs
+ // For historical reasons, GetTemplate() returns a raw Template
+ // pointer rather than a refcounted pointer. So it's impossible
+ // for the user to call DecRef on the template when it's done
+ // using it. To make up for that, we provide this routine, which
+ // says "call DecRef()" on *all* Templates ever used by
+ // GetTemplate(). It's safe for the client to call this when it's
+ // done using all templates it's ever retrieved before (via
+ // GetTemplate). Most likely, the user will call this indirectly,
+ // via ClearCache().
+ // TODO(panicker): Consider making this method public.
+ void DoneWithGetTemplatePtrs();
+
+ // ValidTemplateFilename
+ // Validates the user provided filename before constructing the template
+ bool IsValidTemplateFilename(const std::string& filename,
+ std::string* resolved_filename,
+ FileStat* statbuf) const;
+
+ // GetTemplateLocked
+ // Internal version of GetTemplate. It's used when the function already
+ // has a write-lock on mutex_. It returns a pointer to a refcounted
+ // template (in the cache), or NULL if the template is not found.
+ // Its used by GetTemplate & ForceReloadAllIfChanged.
+ RefcountedTemplate* GetTemplateLocked(
+ const TemplateString& filename,
+ Strip strip,
+ const TemplateCacheKey& key);
+
+ // Refcount
+ // Testing only. Returns the refcount of a template, given its cache key.
+ int Refcount(const TemplateCacheKey template_cache_key) const;
+
+ // GetCachedTemplate
+ // Debug only. Returns whether the cache key is in the parsed cache.
+ bool TemplateIsCached(const TemplateCacheKey template_cache_key) const;
+
+ TemplateMap* parsed_template_cache_;
+ bool is_frozen_;
+ TemplateSearchPath search_path_;
+
+ // Since GetTemplate() returns a raw pointer, it's impossible for
+ // the caller to call DecRef() on the returned template when it's
+ // done using it. To make up for that, we store each retval of
+ // GetTemplate in this data structure. Then the user can call
+ // DecRef() on all of them at once, via a DoneWithGetTemplatePtrs()
+ // (which they will probably get at via a call to ClearCache()).
+ TemplateCallMap* get_template_calls_;
+
+ Mutex* const mutex_;
+ Mutex* const search_path_mutex_;
+
+ // Can't invoke copy constructor or assignment operator
+ TemplateCache(const TemplateCache&);
+ void operator=(const TemplateCache &);
+};
+
+}
+
+#endif // TEMPLATE_TEMPLATE_CACHE_H_
diff --git a/src/ctemplate/template_dictionary.h.in b/src/ctemplate/template_dictionary.h.in
new file mode 100644
index 0000000..e0c9942
--- /dev/null
+++ b/src/ctemplate/template_dictionary.h.in
@@ -0,0 +1,454 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// Based on the 'old' TemplateDictionary by Frank Jernigan.
+//
+// A template dictionary maps names (as found in template files)
+// to their values. There are three types of names:
+// variables: value is a string.
+// sections: value is a list of sub-dicts to use when expanding the section;
+// the section is expanded once per sub-dict.
+// template-include: value is a list of pairs: name of the template file
+// to include, and the sub-dict to use when expanding it.
+// TemplateDictionary has routines for setting these values.
+//
+// For (many) more details, see the doc/ directory.
+
+#ifndef TEMPLATE_TEMPLATE_DICTIONARY_H_
+#define TEMPLATE_TEMPLATE_DICTIONARY_H_
+
+#include <stdarg.h> // for StringAppendV()
+#include <stddef.h> // for size_t and ptrdiff_t
+#include <stdlib.h> // for NULL
+#include <sys/types.h>
+#include <functional> // for less<>
+#include <map>
+#include <string>
+#include <vector>
+
+#include <ctemplate/str_ref.h>
+#include <ctemplate/template_dictionary_interface.h>
+#include <ctemplate/template_modifiers.h>
+#include <ctemplate/template_string.h>
+
+@ac_windows_dllexport_defines@
+
+@ac_google_start_namespace@
+template <class T, class C> class ArenaAllocator;
+class UnsafeArena;
+template<typename A, int B, typename C, typename D> class small_map;
+template<typename NormalMap> class small_map_default_init; // in small_map.h
+}
+
+namespace ctemplate {
+
+
+class @ac_windows_dllexport@ TemplateDictionary : public TemplateDictionaryInterface {
+ public:
+ // name is used only for debugging.
+ // arena is used to store all names and values. It can be NULL (the
+ // default), in which case we create own own arena.
+ explicit TemplateDictionary(const TemplateString& name,
+ UnsafeArena* arena=NULL);
+ ~TemplateDictionary();
+
+ // If you want to be explicit, you can use NO_ARENA as a synonym to NULL.
+ static UnsafeArena* const NO_ARENA;
+
+ std::string name() const {
+ return std::string(name_.data(), name_.size());
+ }
+
+ // Returns a recursive copy of this dictionary. This dictionary
+ // *must* be a "top-level" dictionary (that is, not created via
+ // AddSectionDictionary() or AddIncludeDictionary()). Caller owns
+ // the resulting dict, and must delete it. If arena is NULL, we
+ // create our own. Returns NULL if the copy fails (probably because
+ // the "top-level" rule was violated).
+ TemplateDictionary* MakeCopy(const TemplateString& name_of_copy,
+ UnsafeArena* arena=NULL);
+
+ // --- Routines for VARIABLES
+ // These are the five main routines used to set the value of a variable.
+ // As always, wherever you see TemplateString, you can also pass in
+ // either a char* or a C++ string, or a TemplateString(s, slen).
+
+ void SetValue(const TemplateString variable, const TemplateString value);
+ void SetIntValue(const TemplateString variable, long value);
+ void SetFormattedValue(const TemplateString variable, const char* format, ...)
+#if @ac_google_attribute@
+ __attribute__((__format__ (__printf__, 3, 4)))
+#endif
+ ; // starts at 3 because of implicit 1st arg 'this'
+
+ class SetProxy {
+ public:
+ SetProxy(TemplateDictionary& dict, const TemplateString& variable) :
+ dict_(dict),
+ variable_(variable) {
+ }
+
+ void operator=(str_ref value) {
+ dict_.SetValue(variable_, TemplateString(value.data(), value.size()));
+ }
+
+ void operator=(long value) {
+ dict_.SetIntValue(variable_, value);
+ }
+
+ private:
+ TemplateDictionary& dict_;
+ const TemplateString& variable_;
+ };
+
+ SetProxy operator[](const TemplateString& variable) {
+ return SetProxy(*this, variable);
+ }
+
+ // We also let you set values in the 'global' dictionary which is
+ // referenced when all other dictionaries fail. Note this is a
+ // static method: no TemplateDictionary instance needed. Since
+ // this routine is rarely used, we don't provide variants.
+ static void SetGlobalValue(const TemplateString variable,
+ const TemplateString value);
+
+ // This is used for a value that you want to be 'global', but only
+ // in the scope of a given template, including all its sections and
+ // all its sub-included dictionaries. The main difference between
+ // SetTemplateGlobalValue() and SetValue(), is that
+ // SetTemplateGlobalValue() values persist across template-includes.
+ // This is intended for session-global data; since that should be
+ // fairly rare, we don't provide variants.
+ void SetTemplateGlobalValue(const TemplateString variable,
+ const TemplateString value);
+
+ // Similar SetTemplateGlobalValue above, this method shows a section in this
+ // template, all its sections, and all its template-includes. This is intended
+ // for session-global data, for example allowing you to show variant portions
+ // of your template for certain browsers/languages without having to call
+ // ShowSection on each template you use.
+ void ShowTemplateGlobalSection(const TemplateString variable);
+
+ // These routines are like SetValue and SetTemplateGlobalValue, but
+ // they do not make a copy of the input data. THE CALLER IS
+ // RESPONSIBLE FOR ENSURING THE PASSED-IN STRINGS LIVE FOR AT LEAST
+ // AS LONG AS THIS DICTIONARY! In general, they yield a quite minor
+ // performance increase for significant increased code fragility,
+ // so do not use them unless you really need the speed improvements.
+ void SetValueWithoutCopy(const TemplateString variable,
+ const TemplateString value);
+ void SetTemplateGlobalValueWithoutCopy(const TemplateString variable,
+ const TemplateString value);
+
+
+ // --- Routines for SECTIONS
+ // We show a section once per dictionary that is added with its name.
+ // Recall that lookups are hierarchical: if a section tried to look
+ // up a variable in its sub-dictionary and fails, it will look next
+ // in its parent dictionary (us). So it's perfectly appropriate to
+ // keep the sub-dictionary empty: that will show the section once,
+ // and take all var definitions from us. ShowSection() is a
+ // convenience routine that does exactly that.
+
+ // Creates an empty dictionary whose parent is us, and returns it.
+ // As always, wherever you see TemplateString, you can also pass in
+ // either a char* or a C++ string, or a TemplateString(s, slen).
+ TemplateDictionary* AddSectionDictionary(const TemplateString section_name);
+ void ShowSection(const TemplateString section_name);
+
+ // A convenience method. Often a single variable is surrounded by
+ // some HTML that should not be printed if the variable has no
+ // value. The way to do this is to put that html in a section.
+ // This method makes it so the section is shown exactly once, with a
+ // dictionary that maps the variable to the proper value. If the
+ // value is "", on the other hand, this method does nothing, so the
+ // section remains hidden.
+ void SetValueAndShowSection(const TemplateString variable,
+ const TemplateString value,
+ const TemplateString section_name);
+
+
+ // --- Routines for TEMPLATE-INCLUDES
+ // Included templates are treated like sections, but they require
+ // the name of the include-file to go along with each dictionary.
+
+ TemplateDictionary* AddIncludeDictionary(const TemplateString variable);
+
+ // This is required for include-templates; it specifies what template
+ // to include. But feel free to call this on any dictionary, to
+ // document what template-file the dictionary is intended to go with.
+ void SetFilename(const TemplateString filename);
+
+ // --- DEBUGGING TOOLS
+
+ // Logs the contents of a dictionary and its sub-dictionaries.
+ // Dump goes to stdout/stderr, while DumpToString goes to the given string.
+ // 'indent' is how much to indent each line of the output.
+ void Dump(int indent=0) const;
+ virtual void DumpToString(std::string* out, int indent=0) const;
+
+
+ // --- DEPRECATED ESCAPING FUNCTIONALITY
+
+ // Escaping in the binary has been deprecated in favor of using modifiers
+ // to do the escaping in the template:
+ // "...{{MYVAR:html_escape}}..."
+ void SetEscapedValue(const TemplateString variable, const TemplateString value,
+ const TemplateModifier& escfn);
+ void SetEscapedFormattedValue(const TemplateString variable,
+ const TemplateModifier& escfn,
+ const char* format, ...)
+#if @ac_google_attribute@
+ __attribute__((__format__ (__printf__, 4, 5)))
+#endif
+ ; // starts at 4 because of implicit 1st arg 'this'
+
+
+ private:
+ friend class SectionTemplateNode; // for access to GetSectionValue(), etc.
+ friend class TemplateTemplateNode; // for access to GetSectionValue(), etc.
+ friend class VariableTemplateNode; // for access to GetSectionValue(), etc.
+ // For unittesting code using a TemplateDictionary.
+ friend class TemplateDictionaryPeer;
+
+ class DictionaryPrinter; // nested class
+ friend class DictionaryPrinter;
+
+ // We need this functor to tell small_map how to create a map<> when
+ // it decides to do so: we want it to create that map on the arena.
+ class map_arena_init;
+
+ typedef std::vector<TemplateDictionary*,
+ ArenaAllocator<TemplateDictionary*, UnsafeArena> >
+ DictVector;
+ // The '4' here is the size where small_map switches from vector<> to map<>.
+ typedef small_map<std::map<TemplateId, TemplateString, std::less<TemplateId>,
+ ArenaAllocator<std::pair<const TemplateId, TemplateString>,
+ UnsafeArena> >,
+ 4, std::equal_to<TemplateId>, map_arena_init>
+ VariableDict;
+ typedef small_map<std::map<TemplateId, DictVector*, std::less<TemplateId>,
+ ArenaAllocator<std::pair<const TemplateId, DictVector*>,
+ UnsafeArena> >,
+ 4, std::equal_to<TemplateId>, map_arena_init>
+ SectionDict;
+ typedef small_map<std::map<TemplateId, DictVector*, std::less<TemplateId>,
+ ArenaAllocator<std::pair<const TemplateId, DictVector*>,
+ UnsafeArena> >,
+ 4, std::equal_to<TemplateId>, map_arena_init>
+ IncludeDict;
+ // This is used only for global_dict_, which is just like a VariableDict
+ // but does not bother with an arena (since this memory lives forever).
+ typedef small_map<std::map<TemplateId, TemplateString, std::less<TemplateId> >,
+ 4, std::equal_to<TemplateId>,
+ small_map_default_init<
+ std::map<TemplateId, TemplateString,
+ std::less<TemplateId> > > >
+ GlobalDict;
+
+
+ // These are helper functions to allocate the parts of the dictionary
+ // on the arena.
+ template<typename T> inline void LazilyCreateDict(T** dict);
+ inline void LazyCreateTemplateGlobalDict();
+ inline DictVector* CreateDictVector();
+ inline TemplateDictionary* CreateTemplateSubdict(
+ const TemplateString& name,
+ UnsafeArena* arena,
+ TemplateDictionary* parent_dict,
+ TemplateDictionary* template_global_dict_owner);
+
+ // This is a helper function to insert <key,value> into m.
+ // Normally, we'd just use m[key] = value, but map rules
+ // require default constructor to be public for that to compile, and
+ // for some types we'd rather not allow that. HashInsert also inserts
+ // the key into an id(key)->key map, to allow for id-lookups later.
+ template<typename MapType, typename ValueType>
+ static void HashInsert(MapType* m, TemplateString key, ValueType value);
+
+ // Constructor created for all children dictionaries. This includes
+ // both a pointer to the parent dictionary and also the the
+ // template-global dictionary from which all children (both
+ // IncludeDictionary and SectionDictionary) inherit. Values are
+ // filled into global_template_dict via SetTemplateGlobalValue.
+ explicit TemplateDictionary(const TemplateString& name,
+ class UnsafeArena* arena,
+ TemplateDictionary* parent_dict,
+ TemplateDictionary* template_global_dict_owner);
+
+ // Helps set up the static stuff. Must be called exactly once before
+ // accessing global_dict_. GoogleOnceInit() is used to manage that
+ // initialization in a thread-safe way.
+ static void SetupGlobalDict();
+
+ // Utility functions for copying a string into the arena.
+ // Memdup also copies in a trailing NUL, which is why we have the
+ // trailing-NUL check in the TemplateString version of Memdup.
+ TemplateString Memdup(const char* s, size_t slen);
+ TemplateString Memdup(const TemplateString& s) {
+ if (s.is_immutable() && s.data()[s.size()] == '\0') {
+ return s;
+ }
+ return Memdup(s.data(), s.size());
+ }
+
+ // Used for recursive MakeCopy calls.
+ TemplateDictionary* InternalMakeCopy(
+ const TemplateString& name_of_copy,
+ UnsafeArena* arena,
+ TemplateDictionary* parent_dict,
+ TemplateDictionary* template_global_dict_owner);
+
+ // A helper for creating section and include dicts.
+ static std::string CreateSubdictName(
+ const TemplateString& dict_name, const TemplateString& sub_name,
+ size_t index, const char* suffix);
+
+ // Must be called whenever we add a value to one of the dictionaries above,
+ // to ensure that we can reconstruct the id -> string mapping.
+ static void AddToIdToNameMap(TemplateId id, const TemplateString& str);
+
+ // Used to do the formatting for the SetFormatted*() functions
+ static int StringAppendV(char* space, char** out,
+ const char* format, va_list ap);
+
+ // How Template::Expand() and its children access the template-dictionary.
+ // These fill the API required by TemplateDictionaryInterface.
+ virtual TemplateString GetValue(const TemplateString& variable) const;
+ virtual bool IsHiddenSection(const TemplateString& name) const;
+ virtual bool IsUnhiddenSection(const TemplateString& name) const {
+ return !IsHiddenSection(name);
+ }
+ virtual bool IsHiddenTemplate(const TemplateString& name) const;
+ virtual const char* GetIncludeTemplateName(
+ const TemplateString& variable, int dictnum) const;
+
+ // Determine whether there's anything set in this dictionary
+ bool Empty() const;
+
+ // This is needed by DictionaryPrinter because it's not a friend
+ // of TemplateString, but we are
+ static std::string PrintableTemplateString(
+ const TemplateString& ts) {
+ return std::string(ts.data(), ts.size());
+ }
+ static bool InvalidTemplateString(const TemplateString& ts) {
+ return ts.data() == NULL;
+ }
+ // Compilers differ about whether nested classes inherit our friendship.
+ // The only thing DictionaryPrinter needs is IdToString, so just re-export.
+ static TemplateString IdToString(TemplateId id) { // for DictionaryPrinter
+ return TemplateString::IdToString(id);
+ }
+
+ // CreateTemplateIterator
+ // This is SectionIterator exactly, just with a different name to
+ // self-document the fact the value applies to a template include.
+ // Caller frees return value.
+ virtual TemplateDictionaryInterface::Iterator* CreateTemplateIterator(
+ const TemplateString& section_name) const;
+
+ // CreateSectionIterator
+ // Factory method implementation that constructs a iterator representing the
+ // set of dictionaries associated with a section name, if any. This
+ // implementation checks the local dictionary itself, not the template-wide
+ // dictionary or the global dictionary.
+ // Caller frees return value.
+ virtual TemplateDictionaryInterface::Iterator* CreateSectionIterator(
+ const TemplateString& section_name) const;
+
+ // TemplateDictionary-specific implementation of dictionary iterators.
+ template <typename T> // T is *TemplateDictionary::const_iterator
+ class Iterator : public TemplateDictionaryInterface::Iterator {
+ protected:
+ friend class TemplateDictionary;
+ Iterator(T begin, T end) : begin_(begin), end_(end) { }
+ public:
+ virtual ~Iterator() { }
+ virtual bool HasNext() const;
+ virtual const TemplateDictionaryInterface& Next();
+ private:
+ T begin_;
+ const T end_;
+ };
+
+ // A small helper factory function for Iterator
+ template <typename T>
+ static Iterator<typename T::const_iterator>* MakeIterator(const T& dv) {
+ return new Iterator<typename T::const_iterator>(dv.begin(), dv.end());
+ }
+
+
+ // The "name" of the dictionary for debugging output (Dump, etc.)
+ // The arena, also set at construction time.
+ class UnsafeArena* const arena_;
+ bool should_delete_arena_; // only true if we 'new arena' in constructor
+ TemplateString name_; // points into the arena, or to static memory
+
+ // The three dictionaries that I own -- for vars, sections, and template-incs
+ VariableDict* variable_dict_;
+ SectionDict* section_dict_;
+ IncludeDict* include_dict_;
+
+
+ // The template_global_dict is consulted if a lookup in the variable, section,
+ // or include dicts named above fails. It forms a convenient place to store
+ // session-specific data that's applicable to all templates in the dictionary
+ // tree.
+ // For the parent-template, template_global_dict_ is not NULL, and
+ // template_global_dict_owner_ is this. For all of its children,
+ // template_global_dict_ is NULL, and template_global_dict_owner_ points to
+ // the root parent-template (the one with the non-NULL template_global_dict_).
+ TemplateDictionary* template_global_dict_;
+ TemplateDictionary* template_global_dict_owner_;
+
+ // My parent dictionary, used when variable lookups at this level fail.
+ // Note this is only for *variables* and *sections*, not templates.
+ TemplateDictionary* parent_dict_;
+ // The static, global dictionary, at the top of the parent-dictionary chain
+ static GlobalDict* global_dict_;
+ static TemplateString* empty_string_; // what is returned on lookup misses
+
+ // The filename associated with this dictionary. If set, this declares
+ // what template the dictionary is supposed to be expanded with. Required
+ // for template-includes, optional (but useful) for 'normal' dicts.
+ const char* filename_;
+
+ private:
+ // Can't invoke copy constructor or assignment operator
+ TemplateDictionary(const TemplateDictionary&);
+ void operator=(const TemplateDictionary&);
+};
+
+}
+
+#endif // TEMPLATE_TEMPLATE_DICTIONARY_H_
diff --git a/src/ctemplate/template_dictionary_interface.h.in b/src/ctemplate/template_dictionary_interface.h.in
new file mode 100644
index 0000000..0cc9171
--- /dev/null
+++ b/src/ctemplate/template_dictionary_interface.h.in
@@ -0,0 +1,143 @@
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: williasr@google.com (Scott Williams)
+//
+// This file implements the TemplateDictionaryInterface class. This interface
+// forms the root of the TemplateDictionary class tree, but the interface is
+// minimal enough to allow other sources of template data. Note that the
+// TemplateDictionaryInterface class enumerates the properties expected by
+// Template: it doesn't constrain how data gets into the
+// TemplateDictionaryInterface class to begin with. For these methods, see
+// TemplateDictionary.
+//
+
+#ifndef TEMPLATE_TEMPLATE_DICTIONARY_INTERFACE_H_
+#define TEMPLATE_TEMPLATE_DICTIONARY_INTERFACE_H_
+
+#include <stdlib.h>
+#include <string>
+#include <ctemplate/template_string.h>
+
+@ac_windows_dllexport_defines@
+
+namespace ctemplate {
+
+const int kIndent = 2; // num spaces to indent each level -- used with dump
+
+// TemplateDictionaryInterface
+// The template data contains the associated values for
+// variables, the hidden/visible state for sections and included
+// templates, the associated set of dictionaries for sections and
+// included templates, and the template filenames to be expanded in
+// place of template-include nodes.
+class @ac_windows_dllexport@ TemplateDictionaryInterface {
+ public:
+ // TemplateDictionaryInterface destructor
+ virtual ~TemplateDictionaryInterface() {}
+
+ protected:
+ // The interface as follows is used at expand-time by Expand.
+ friend class VariableTemplateNode;
+ friend class SectionTemplateNode;
+ friend class TemplateTemplateNode;
+ // This class reaches into our internals for testing.
+ friend class TemplateDictionaryPeer;
+ friend class TemplateDictionaryPeerIterator;
+
+ // GetSectionValue
+ // Returns the value of a variable.
+ virtual TemplateString GetValue(const TemplateString& variable) const = 0;
+
+ // IsHiddenSection
+ // A predicate to indicate the current hidden/visible state of a section
+ // whose name is passed to it.
+ virtual bool IsHiddenSection(const TemplateString& name) const = 0;
+
+ // Dump a string representation of this dictionary to the supplied string.
+ virtual void DumpToString(std::string* out, int level) const = 0;
+
+ // TemplateDictionaryInterface is an abstract class, so its constructor is
+ // only visible to its subclasses.
+ TemplateDictionaryInterface() {}
+
+ class Iterator {
+ protected:
+ Iterator() { }
+ public:
+ virtual ~Iterator() { }
+
+ // Returns false if the iterator is exhausted.
+ virtual bool HasNext() const = 0;
+
+ // Returns the current referent and increments the iterator to the next.
+ virtual const TemplateDictionaryInterface& Next() = 0;
+ };
+
+ // IsHiddenTemplate
+ // Returns true if the template include is hidden. This is analogous to
+ // IsHiddenSection, but for template nodes.
+ virtual bool IsHiddenTemplate(const TemplateString& name) const = 0;
+
+ // GetIncludeTemplateName
+ // Returns the name of the template associated with the given template
+ // include variable. If more than one dictionary is attached to the include
+ // symbol, dictnum can be used to disambiguate which include name you mean.
+ virtual const char* GetIncludeTemplateName(
+ const TemplateString& variable, int dictnum) const = 0;
+
+ // CreateTemplateIterator
+ // A factory method for constructing an iterator representing the
+ // subdictionaries of the given include node. The caller is
+ // responsible for deleting the return value when it's done with it.
+ virtual Iterator* CreateTemplateIterator(
+ const TemplateString& section) const = 0;
+
+ // CreateSectionIterator
+ // A factory method for constructing an iterator representing the
+ // subdictionaries of the given section node. The caller is
+ // responsible for deleting the return value when it's done with it.
+ virtual Iterator* CreateSectionIterator(
+ const TemplateString& section) const = 0;
+
+ // IsUnhiddenSection
+ // Returns true if the section has been marked visible and false otherwise.
+ virtual bool IsUnhiddenSection(
+ const TemplateString& name) const = 0;
+
+ private:
+ // Disallow copy and assign.
+ TemplateDictionaryInterface(const TemplateDictionaryInterface&);
+ void operator=(const TemplateDictionaryInterface&);
+};
+
+}
+
+#endif // TEMPLATE_TEMPLATE_DICTIONARY_INTERFACE_H_
diff --git a/src/ctemplate/template_emitter.h.in b/src/ctemplate/template_emitter.h.in
new file mode 100644
index 0000000..fc29098
--- /dev/null
+++ b/src/ctemplate/template_emitter.h.in
@@ -0,0 +1,70 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// When we expand a template, we expand into an abstract "emitter".
+// This is typically a string, but could be a file-wrapper, or any
+// other data structure that supports this very simple "append" API.
+
+#ifndef TEMPLATE_TEMPLATE_EMITTER_H_
+#define TEMPLATE_TEMPLATE_EMITTER_H_
+
+#include <sys/types.h> // for size_t
+#include <string>
+
+@ac_windows_dllexport_defines@
+
+namespace ctemplate {
+
+class @ac_windows_dllexport@ ExpandEmitter {
+ public:
+ ExpandEmitter() {}
+ virtual ~ExpandEmitter() {}
+ virtual void Emit(char c) = 0;
+ virtual void Emit(const std::string& s) = 0;
+ virtual void Emit(const char* s) = 0;
+ virtual void Emit(const char* s, size_t slen) = 0;
+};
+
+
+class @ac_windows_dllexport@ StringEmitter : public ExpandEmitter {
+ std::string* const outbuf_;
+ public:
+ StringEmitter(std::string* outbuf) : outbuf_(outbuf) {}
+ virtual void Emit(char c) { *outbuf_ += c; }
+ virtual void Emit(const std::string& s) { *outbuf_ += s; }
+ virtual void Emit(const char* s) { *outbuf_ += s; }
+ virtual void Emit(const char* s, size_t slen) { outbuf_->append(s, slen); }
+};
+
+}
+
+#endif // TEMPLATE_TEMPLATE_EMITTER_H_
diff --git a/src/ctemplate/template_enums.h.in b/src/ctemplate/template_enums.h.in
new file mode 100644
index 0000000..57db6ca
--- /dev/null
+++ b/src/ctemplate/template_enums.h.in
@@ -0,0 +1,46 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+//
+// Alas that we can't forward-declare enums! These are the ones
+// used by multiple files
+
+#ifndef TEMPLATE_TEMPLATE_ENUMS_H_
+#define TEMPLATE_TEMPLATE_ENUMS_H_
+
+namespace ctemplate {
+
+// Enums for GetTemplate flag values
+enum Strip { DO_NOT_STRIP, STRIP_BLANK_LINES, STRIP_WHITESPACE,
+ NUM_STRIPS }; // sentinel value
+
+}
+
+#endif // TEMPLATE_TEMPLATE_ENUMS_H_
diff --git a/src/ctemplate/template_modifiers.h.in b/src/ctemplate/template_modifiers.h.in
new file mode 100644
index 0000000..dd7b158
--- /dev/null
+++ b/src/ctemplate/template_modifiers.h.in
@@ -0,0 +1,350 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// We allow template variables to have modifiers, each possibly with a
+// value associated with it. Format is
+// {{VARNAME:modname[=modifier-value]:modname[=modifier-value]:...}}
+// Modname refers to a functor that takes the variable's value
+// and modifier-value (empty-string if no modifier-value was
+// specified), and returns a munged value. Modifiers are applied
+// left-to-right. We define the legal modnames here, and the
+// functors they refer to.
+//
+// Modifiers have a long-name, an optional short-name (one char;
+// may be \0 if you don't want a shortname), and a functor that's
+// applied to the variable.
+//
+// In addition to the list of modifiers hard-coded in the source code
+// here, it is possible to dynamicly register modifiers using a long
+// name starting with "x-". If you wish to define your own modifier
+// class, in your own source code, just subclass TemplateModifier --
+// see template_modifiers.cc for details of how to do that.
+//
+// Adding a new built-in modifier, to this file, takes several steps,
+// both in this .h file and in the corresponding .cc file:
+// 1) .h file: Define a struct for the modifier. It must subclass
+// TemplateModifier.
+// 2) .h file: declare a variable that's an instance of the struct.
+// This is used for people who want to modify the string themselves,
+// via TemplateDictionary::SetEscapedValue.
+// 5) .cc file: define the new modifier's Modify method.
+// 6) .cc file: give storage for the variable declared in the .h file (in 2).
+// 7) .cc file: add the modifier to the g_modifiers array.
+
+#ifndef TEMPLATE_TEMPLATE_MODIFIERS_H_
+#define TEMPLATE_TEMPLATE_MODIFIERS_H_
+
+#include <sys/types.h> // for size_t
+#include <string>
+#include <ctemplate/template_emitter.h> // so we can inline operator()
+#include <ctemplate/per_expand_data.h> // could probably just forward-declare
+
+@ac_windows_dllexport_defines@
+
+namespace ctemplate {
+
+class Template;
+
+#define MODIFY_SIGNATURE_ \
+ public: \
+ virtual void Modify(const char* in, size_t inlen, \
+ const PerExpandData*, ExpandEmitter* outbuf, \
+ const std::string& arg) const
+
+// If you wish to write your own modifier, it should subclass this
+// method. Your subclass should only define Modify(); for efficiency,
+// we do not make operator() virtual.
+class @ac_windows_dllexport@ TemplateModifier {
+ public:
+ // This function takes a string as input, a char*/size_t pair, and
+ // appends the modified version to the end of outbuf. In addition
+ // to the variable-value to modify (specified via in/inlen), each
+ // Modify passes in two pieces of user-supplied data:
+ // 1) arg: this is the modifier-value, for modifiers that take a
+ // value (e.g. "{{VAR:modifier=value}}"). This value
+ // comes from the template file. For modifiers that take
+ // no modval argument, arg will always be "". For modifiers
+ // that do take such an argument, arg will always start with "=".
+ // 2) per_expand_data: this is a set of data that the application can
+ // associate with a TemplateDictionary, and is passed in to
+ // every variable expanded using that dictionary. This value
+ // comes from the source code.
+ virtual void Modify(const char* in, size_t inlen,
+ const PerExpandData* per_expand_data,
+ ExpandEmitter* outbuf,
+ const std::string& arg) const = 0;
+
+ // This function can be used to speed up modification. If Modify()
+ // is often a noop, you can implement MightModify() to indicate
+ // situations where it's safe to avoid the call to Modify(), because
+ // Modify() won't do any modifications in this case. Note it's
+ // always safe to return true here; you should just return false if
+ // you're certain Modify() can be ignored. This function is
+ // advisory; the template system is not required to call
+ // MightModify() before Modify().
+ virtual bool MightModify(const PerExpandData* /*per_expand_data*/,
+ const std::string& /*arg*/) const {
+ return true;
+ }
+
+ // We support both modifiers that take an argument, and those that don't.
+ // We also support passing in a string, or a char*/int pair.
+ std::string operator()(const char* in, size_t inlen, const std::string& arg="") const {
+ std::string out;
+ // we'll reserve some space to account for minimal escaping: say 12%
+ out.reserve(inlen + inlen/8 + 16);
+ StringEmitter outbuf(&out);
+ Modify(in, inlen, NULL, &outbuf, arg);
+ return out;
+ }
+ std::string operator()(const std::string& in, const std::string& arg="") const {
+ return operator()(in.data(), in.size(), arg);
+ }
+
+ virtual ~TemplateModifier(); // always need a virtual destructor!
+};
+
+
+// Returns the input verbatim (for testing)
+class @ac_windows_dllexport@ NullModifier : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern @ac_windows_dllexport@ NullModifier null_modifier;
+
+// Escapes < > " ' & <non-space whitespace> to < > "
+// ' & <space>
+class @ac_windows_dllexport@ HtmlEscape : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern @ac_windows_dllexport@ HtmlEscape html_escape;
+
+// Same as HtmlEscape but leaves all whitespace alone. Eg. for <pre>..</pre>
+class @ac_windows_dllexport@ PreEscape : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern @ac_windows_dllexport@ PreEscape pre_escape;
+
+// Like HtmlEscape but allows HTML entities, <br> tags, <wbr> tags,
+// matched <b> and </b> tags, matched <i> and </i> tags, matched <em> and </em>
+// tags, and matched <span dir=(rtl|ltr)> tags.
+class @ac_windows_dllexport@ SnippetEscape : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern @ac_windows_dllexport@ SnippetEscape snippet_escape;
+
+// Replaces characters not safe for an unquoted attribute with underscore.
+// Safe characters are alphanumeric, underscore, dash, period, and colon.
+// The equal sign is also considered safe unless it is at the start
+// or end of the input in which case it is replaced with underscore.
+//
+// We added the equal sign to the safe characters to allow this modifier
+// to be used on attribute name/value pairs in HTML tags such as
+// <div {{CLASS:H=attribute}}>
+// where CLASS is expanded to "class=bla".
+//
+// Note: The equal sign is replaced when found at either boundaries of the
+// string due to the concern it may be lead to XSS under some special
+// circumstances: Say, if this string is the value of an attribute in an
+// HTML tag and ends with an equal sign, a browser may possibly end up
+// interpreting the next token as the value of this string rather than
+// a new attribute (esoteric).
+class @ac_windows_dllexport@ CleanseAttribute : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern @ac_windows_dllexport@ CleanseAttribute cleanse_attribute;
+
+// Removes characters not safe for a CSS value. Safe characters are
+// alphanumeric, space, underscore, period, coma, exclamation mark,
+// pound, percent, and dash.
+class @ac_windows_dllexport@ CleanseCss : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern @ac_windows_dllexport@ CleanseCss cleanse_css;
+
+// Checks that a url is either an absolute http(s) URL or a relative
+// url that doesn't have a protocol hidden in it (ie [foo.html] is
+// fine, but not [javascript:foo]) and then performs another type of
+// escaping. Returns the url escaped with the specified modifier if
+// good, otherwise returns a safe replacement URL.
+// This is normally "#", but for <img> tags, it is not safe to set
+// the src attribute to "#". This is because this causes some browsers
+// to reload the page, which can cause a DoS.
+class @ac_windows_dllexport@ ValidateUrl : public TemplateModifier {
+ public:
+ explicit ValidateUrl(const TemplateModifier& chained_modifier,
+ const char* unsafe_url_replacement)
+ : chained_modifier_(chained_modifier),
+ unsafe_url_replacement_(unsafe_url_replacement),
+ unsafe_url_replacement_length_(strlen(unsafe_url_replacement)) { }
+ MODIFY_SIGNATURE_;
+ static const char* const kUnsafeUrlReplacement;
+ static const char* const kUnsafeImgSrcUrlReplacement;
+ private:
+ const TemplateModifier& chained_modifier_;
+ const char* unsafe_url_replacement_;
+ int unsafe_url_replacement_length_;
+};
+extern @ac_windows_dllexport@ ValidateUrl validate_url_and_html_escape;
+extern @ac_windows_dllexport@ ValidateUrl validate_url_and_javascript_escape;
+extern @ac_windows_dllexport@ ValidateUrl validate_url_and_css_escape;
+extern @ac_windows_dllexport@ ValidateUrl validate_img_src_url_and_html_escape;
+extern @ac_windows_dllexport@ ValidateUrl validate_img_src_url_and_javascript_escape;
+extern @ac_windows_dllexport@ ValidateUrl validate_img_src_url_and_css_escape;
+
+// Escapes < > & " ' to < > & " ' (same as in HtmlEscape).
+// If you use it within a CDATA section, you may be escaping more characters
+// than strictly necessary. If this turns out to be an issue, we will need
+// to add a variant just for CDATA.
+class @ac_windows_dllexport@ XmlEscape : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern @ac_windows_dllexport@ XmlEscape xml_escape;
+
+// Escapes characters that cannot appear unescaped in a javascript string
+// assuming UTF-8 encoded input.
+// This does NOT escape all characters that cannot appear unescaped in a
+// javascript regular expression literal.
+class @ac_windows_dllexport@ JavascriptEscape : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern @ac_windows_dllexport@ JavascriptEscape javascript_escape;
+
+// Checks that the input is a valid javascript non-string literal
+// meaning a boolean (true, false) or a numeric value (decimal, hex or octal).
+// If valid, we output the input as is, otherwise we output null instead.
+// Input of zero length is considered valid and nothing is output.
+//
+// The emphasis is on safety against injection of javascript code rather
+// than perfect validation, as such it is possible for non-valid literals to
+// pass through.
+//
+// You would use this modifier for javascript variables that are not
+// enclosed in quotes such as:
+// <script>var a = {{VALUE}};</script> OR
+// <a href="url" onclick="doSubmit({{ID}})">
+// For variables that are quoted (i.e. string literals) use javascript_escape.
+//
+// Limitations:
+// . NaN, +/-Infinity and null are not recognized.
+// . Output is not guaranteed to be a valid literal,
+// e.g: +55+-e34 will output as is.
+// e.g: trueeee will output nothing as it is not a valid boolean.
+//
+// Details:
+// . For Hex numbers, it checks for case-insensitive 0x[0-9A-F]+
+// that should be a proper check.
+// . For other numbers, it checks for case-insensitive [0-9eE+-.]*
+// so can also accept invalid numbers such as the number 5..45--10.
+// . "true" and "false" (without quotes) are also accepted and that's it.
+//
+class @ac_windows_dllexport@ JavascriptNumber : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern @ac_windows_dllexport@ JavascriptNumber javascript_number;
+
+// Escapes characters not in [0-9a-zA-Z.,_:*/~!()-] as %-prefixed hex.
+// Space is encoded as a +.
+class @ac_windows_dllexport@ UrlQueryEscape : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern @ac_windows_dllexport@ UrlQueryEscape url_query_escape;
+
+// Escapes " \ / <FF> <CR> <LF> <BS> <TAB> to \" \\ \/ \f \r \n \b \t
+// Also escapes < > & to their corresponding \uXXXX representation
+// (\u003C, \u003E, \u0026 respectively).
+class @ac_windows_dllexport@ JsonEscape : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern @ac_windows_dllexport@ JsonEscape json_escape;
+
+// Inserts the given prefix (given as the argument to this modifier)
+// after every newline in the text. Note that it does *not* insert
+// prefix at the very beginning of the text -- in its expected use,
+// that prefix will already be present before this text, in the
+// template. This is meant to be used internally, and is not exported
+// via the g_modifiers list.
+class @ac_windows_dllexport@ PrefixLine : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern @ac_windows_dllexport@ PrefixLine prefix_line;
+
+
+#undef MODIFY_SIGNATURE_
+
+
+// Registers a new template modifier.
+// long_name must start with "x-".
+// If the modifier takes a value (eg "{{VAR:x-name=value}}"), then
+// long_name should end with "=". This is similar to getopt(3) syntax.
+// We also allow value-specializations, with specific values specified
+// as part of long-name. For instance:
+// AddModifier("x-mod=", &my_modifierA);
+// AddModifier("x-mod=bar", &my_modifierB);
+// AddModifier("x-mod2", &my_modifierC);
+// For the template
+// {{VAR1:x-mod=foo}} {{VAR2:x-mod=bar}} {{VAR3:x-mod=baz}} {{VAR4:x-mod2}}
+// VAR1 and VAR3 would get modified by my_modifierA, VAR2 by my_modifierB,
+// and VAR4 by my_modifierC. The order of the AddModifier calls is not
+// significant.
+extern @ac_windows_dllexport@
+bool AddModifier(const char* long_name, const TemplateModifier* modifier);
+
+// Same as AddModifier() above except that the modifier is considered
+// to produce safe output that can be inserted in any context without
+// the need for additional escaping. This difference only impacts
+// the Auto-Escape mode: In that mode, when a variable (or template-include)
+// has a modifier added via AddXssSafeModifier(), it is excluded from
+// further escaping, effectively treated as though it had the :none modifier.
+// Because Auto-Escape is disabled for any variable and template-include
+// that includes such a modifier, use this function with care and ensure
+// that it may not emit harmful output that could lead to XSS.
+//
+// Some valid uses of AddXssSafeModifier:
+// . A modifier that converts a string to an integer since
+// an integer is generally safe in any context.
+// . A modifier that returns one of a fixed number of safe values
+// depending on properties of the input.
+//
+// Some not recommended uses of AddXssSafeModifier:
+// . A modifier that applies some extra formatting to the input
+// before returning it since the output will still contain
+// harmful content if the input does.
+// . A modifier that applies one type of escaping to the input
+// (say HTML-escape). This may be dangerous when the modifier
+// is used in a different context (say Javascript) where this
+// escaping may be inadequate.
+extern @ac_windows_dllexport@
+bool AddXssSafeModifier(const char* long_name, const TemplateModifier* modifier);
+
+}
+
+#endif // TEMPLATE_TEMPLATE_MODIFIERS_H_
diff --git a/src/ctemplate/template_namelist.h.in b/src/ctemplate/template_namelist.h.in
new file mode 100644
index 0000000..2758a8b
--- /dev/null
+++ b/src/ctemplate/template_namelist.h.in
@@ -0,0 +1,163 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+//
+// This class implements some template helper classes, that manage
+// template files and make it easier to monitor them.
+//
+// For information about how to use these classes and macros, and to
+// write the templates it takes as input,
+// see the doc/ directory.
+
+#ifndef TEMPLATE_TEMPLATE_NAMELIST_H_
+#define TEMPLATE_TEMPLATE_NAMELIST_H_
+
+#include <time.h> // for time_t
+#include @ac_cv_cxx_hash_set@
+#include <string>
+#include <vector>
+#include <ctemplate/template_enums.h> // for Strip
+#include <ctemplate/template_string.h> // for StringHash
+
+@ac_windows_dllexport_defines@
+
+namespace ctemplate {
+
+// RegisterTemplateFilename
+// Used to define a reference variable for the name of a template file. E.g:
+// RegisterTemplateFilename(EXAMPLE_FN, "example.tpl");
+// This should be used rather than the seemingly equivalent:
+// #define EXAMPLE_FN "example.tpl"
+// As in the latter, any call to GetTemplate may then reference the name
+// via the first argument. In the example, that would be:
+// Template::GetTemplate(EXAMPLE_FN, DO_NOT_STRIP);
+// By using this macro, rather than the #define, all templates can
+// later be tested for either existence or correct syntax after program
+// start-up.
+// TODO (we wish): Make this macro produce the #include for the auto-generated
+// header files, when and if the macro pre-processor supports that
+#define RegisterTemplateFilename(var, name) \
+ const char* const var = @ac_google_namespace@::TemplateNamelist::RegisterTemplate(name);
+
+// Class: TemplateNamelist
+// Each time this class is instantiated, the name passed to
+// the constructor is added to the class's static list of names. The
+// entire list may be retrieved later by calling the GetList accessor
+// method. Or they may all be checked for existence or for correct
+// syntax via the other methods. We use this in our
+// sanity-checking code to make sure all the templates used by a program
+// exist and are syntactically correct.
+
+class @ac_windows_dllexport@ TemplateNamelist {
+ friend class TemporaryRegisterTemplate;
+ public:
+ // These types should be taken as 'generic' containers. The only
+ // thing you should do with them is call size() and/or iterate
+ // between begin() and end(), and the only operations we promise
+ // the iterators will support are operator* and operator++.
+ typedef @ac_cv_cxx_hash_set_class@<std::string, StringHash> NameListType;
+ typedef std::vector<std::string> MissingListType;
+ typedef std::vector<std::string> SyntaxListType;
+
+ public:
+ // Takes a name and pushes it onto the static namelist
+ // Returns: a pointer to the entry in namelist which holds the name
+ static const char* RegisterTemplate(const char* name);
+
+ // GetList
+ // Description: Returns the collected list of names.
+ static const NameListType& GetList();
+
+ // GetMissingList
+ // If refresh is true or if it is the first time the function is called
+ // in the execution of the program, it creates (or clears) the missing
+ // list and then fills it with the list of
+ // templates that the program knows about but are missing from
+ // the template directory.
+ // If refresh is false and it is not the first time the function is
+ // called, it merely returns the list created in the
+ // call when the last refresh was done.
+ // NOTE: The templates are NOT read, parsed, or cached
+ // by this function.
+ static const MissingListType& GetMissingList(bool refresh);
+
+ // GetBadSyntaxList
+ // If refresh is true or if it is the first time the function is called
+ // in the execution of the program, it creates (or clears) the "bad
+ // syntax" list and then fills it with the list of
+ // templates that the program knows about but contain syntax errors.
+ // A missing file is not considered a syntax error, and thus is
+ // not included in this list.
+ // If refresh is false and it is not the first time the function is
+ // called, it merely returns the list created in the
+ // call when the last refresh was done.
+ // NOTE: The side effect of calling this the first time or
+ // with refresh equal true is that all templates are parsed and cached.
+ // Hence they need to be retrieved with the flags that
+ // the program needs them loaded with (i.e, the strip parameter
+ // passed to Template::GetTemplate.)
+ static const SyntaxListType& GetBadSyntaxList(bool refresh, Strip strip);
+
+ // GetLastmodTime
+ // Iterates through all non-missing templates, and returns the latest
+ // last-modification time for the template files, as returned by stat().
+ // This can be used to make sure template files are getting refreshed.
+ static time_t GetLastmodTime();
+
+ // AllDoExist
+ // Retrieves the missing list (always refreshing the list)
+ // and returns true if it contains any names.
+ // Else, returns false.
+ static bool AllDoExist();
+
+ // IsAllSyntaxOkay
+ // Retrieves the "bad syntax" list (always refreshing the list)
+ // and returns true if it contains any names.
+ // Else, returns false.
+ // NOTE: The side effect of calling this is that all templates are parsed
+ // and cached, hence they need to be retrieved with the flags that
+ // the program needs them loaded with. (I.e, the strip parameter
+ // ultimately passed to Template::GetTemplate.)
+ static bool IsAllSyntaxOkay(Strip strip);
+
+ protected:
+ // The static list of names
+ static NameListType *namelist_;
+ static MissingListType *missing_list_;
+ static SyntaxListType *bad_syntax_list_;
+
+ private:
+ TemplateNamelist(const TemplateNamelist&); // disallow copying
+ void operator=(const TemplateNamelist&);
+};
+
+}
+
+#endif // TEMPLATE_TEMPLATE_NAMELIST_H_
diff --git a/src/ctemplate/template_pathops.h.in b/src/ctemplate/template_pathops.h.in
new file mode 100644
index 0000000..7a11ade
--- /dev/null
+++ b/src/ctemplate/template_pathops.h.in
@@ -0,0 +1,73 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+
+#ifndef TEMPLATE_TEMPLATE_PATHOPS_H_
+#define TEMPLATE_TEMPLATE_PATHOPS_H_
+
+#include <string>
+
+@ac_windows_dllexport_defines@
+
+namespace ctemplate {
+
+extern @ac_windows_dllexport@
+const char kCWD[]; // a string that's equivalent to "./"
+extern @ac_windows_dllexport@
+const char kRootdir[]; // a string that's equivalent to "/"
+
+extern @ac_windows_dllexport@
+std::string PathJoin(const std::string& a, const std::string& b);
+
+extern @ac_windows_dllexport@
+bool IsAbspath(const std::string& path);
+
+extern @ac_windows_dllexport@
+bool IsDirectory(const std::string& path); // checks if path ends with /
+
+extern @ac_windows_dllexport@
+void NormalizeDirectory(std::string* dir); // adds trailing / if needed
+
+extern @ac_windows_dllexport@
+std::string Basename(const std::string& path); // everything after last /
+
+// Returns true iff text contains the word as a full word, i.e. delimited by one
+// of [.,_-#*?:] on both the sides.
+// This is used while loading a template, to check that the file's name matches
+// the auto-escape mode specified by it.
+// NOTE: This assumes that the word doesn't contain any of the delimiter
+// characters.
+extern @ac_windows_dllexport@
+bool ContainsFullWord(const std::string& text, const std::string& word);
+
+}
+
+#endif // TEMPLATE_TEMPLATE_PATHOPS_H_
diff --git a/src/ctemplate/template_string.h.in b/src/ctemplate/template_string.h.in
new file mode 100644
index 0000000..e151a35
--- /dev/null
+++ b/src/ctemplate/template_string.h.in
@@ -0,0 +1,357 @@
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// ---
+// Author: csilvers@google.com (Craig Silerstein)
+
+#ifndef TEMPLATE_TEMPLATE_STRING_H_
+#define TEMPLATE_TEMPLATE_STRING_H_
+
+#include <string.h> // for memcmp() and size_t
+#include @ac_cv_cxx_hash_map@
+#include <string>
+#include <vector>
+
+#include <assert.h>
+#if @ac_cv_have_stdint_h@
+#include <stdint.h> // one place @ac_cv_unit64@ might live
+#endif
+#if @ac_cv_have_inttypes_h@
+#include <inttypes.h> // another place @ac_cv_unit64@ might live
+#endif
+#include <sys/types.h> // final place @ac_cv_unit64@ might live
+
+class TemplateStringTest; // needed for friendship declaration
+class StaticTemplateStringTest;
+
+#if @ac_have_attribute_weak@
+extern char _start[] __attribute__((weak)); // linker emits: start of .text
+extern char data_start[] __attribute__((weak)); // start of .data
+#endif
+
+@ac_windows_dllexport_defines@
+
+namespace ctemplate {
+
+// Most methods of TemplateDictionary take a TemplateString rather than a
+// C++ string. This is for efficiency: it can avoid extra string copies.
+// For any argument that takes a TemplateString, you can pass in any of:
+// * A C++ string
+// * A char*
+// * A StringPiece
+// * TemplateString(char*, length)
+// The last of these is the most efficient, though it requires more work
+// on the call site (you have to create the TemplateString explicitly).
+class TemplateString;
+
+// If you have a string constant (e.g. the string literal "foo") that
+// you need to pass into template routines repeatedly, it is more
+// efficient if you convert it into a TemplateString only once. The
+// way to do this is to use a global StaticTemplateString via STS_INIT
+// (note: do this at global scope *only*!):
+// static const StaticTemplateString kMyVar = STS_INIT(kMyVar, "MY_VALUE");
+struct StaticTemplateString;
+
+#define STS_INIT(name, str) STS_INIT_WITH_HASH(name, str, 0)
+
+// Let's define a convenient hash function for hashing 'normal'
+// strings: char* and string. We'll use MurmurHash, which is probably
+// better than the STL default. We don't include TemplateString or
+// StaticTemplateString here, since they are hashed more efficiently
+// based on their id.
+struct @ac_windows_dllexport@ StringHash {
+ inline size_t operator()(const char* s) const {
+ return Hash(s, strlen(s));
+ };
+
+ inline size_t operator()(const std::string& s) const {
+ return Hash(s.data(), s.size());
+ }
+
+ inline bool operator()(const char* a, const char* b) const {
+ return (a != b) && (strcmp(a, b) < 0); // <, for MSVC
+ }
+
+ inline bool operator()(const std::string& a, const std::string& b) const {
+ return a < b;
+ }
+
+ static const size_t bucket_size = 4; // These are required by MSVC
+ static const size_t min_buckets = 8; // 4 and 8 are the defaults
+ private:
+ size_t Hash(const char* s, size_t slen) const;
+};
+
+// ----------------------- THE CLASSES -------------------------------
+
+typedef @ac_cv_uint64@ TemplateId;
+
+const TemplateId kIllegalTemplateId = 0;
+
+struct @ac_windows_dllexport@ StaticTemplateString {
+ // Do not define a constructor! We use only brace-initialization,
+ // so the data is constructed at static-initialization time.
+ // Anything you want to put in a constructor, put in
+ // StaticTemplateStringInitializer instead.
+
+ // These members shouldn't be accessed directly, except in the
+ // internals of the template code. They are public because that is
+ // the only way we can brace-initialize them. NOTE: MSVC (at least
+ // up to 8.0) has a bug where it ignores 'mutable' when it's buried
+ // in an internal struct. To fix that, we have to make this whole
+ // internal struct mutable. We only do this on MSVC, so on other
+ // compilers we get the full constness we want.
+#ifdef _MSC_VER
+ mutable
+#endif
+ struct {
+ const char* ptr_;
+ size_t length_;
+ mutable TemplateId id_; // sometimes lazily-initialized.
+ } do_not_use_directly_;
+
+ // This class is a good hash functor to pass in as the third
+ // argument to @ac_cv_cxx_hash_map_class@<>, when creating a map whose keys are
+ // StaticTemplateString. NOTE: This class isn't that safe to use,
+ // because it requires that StaticTemplateStringInitializer has done
+ // its job. Unfortunately, even when you use the STS_INIT macro
+ // (which is always, right??), dynamic initialiation does not happen
+ // in a particular order, and objects in different .cc files may
+ // reference a StaticTemplateString before the corresponding
+ // StaticTemplateStringInitializer sets the id.
+ struct Hasher {
+ inline size_t operator()(const StaticTemplateString& sts) const;
+ inline bool operator()(const StaticTemplateString& a, // <, for MSVC
+ const StaticTemplateString& b) const;
+ static const size_t bucket_size = 4; // These are required by MSVC
+ static const size_t min_buckets = 8; // 4 and 8 are the defaults
+ };
+
+ inline bool empty() const {
+ return do_not_use_directly_.length_ == 0;
+ }
+
+ // Allows comparisons of StaticTemplateString objects as if they were
+ // strings. This is useful for STL.
+ inline bool operator==(const StaticTemplateString& x) const;
+};
+
+class @ac_windows_dllexport@ TemplateString {
+ public:
+ TemplateString(const char* s)
+ : ptr_(s ? s : ""), length_(strlen(ptr_)),
+ is_immutable_(InTextSegment(ptr_)), id_(kIllegalTemplateId) {
+ }
+ TemplateString(const std::string& s)
+ : ptr_(s.data()), length_(s.size()),
+ is_immutable_(false), id_(kIllegalTemplateId) {
+ }
+ TemplateString(const char* s, size_t slen)
+ : ptr_(s), length_(slen),
+ is_immutable_(InTextSegment(s)), id_(kIllegalTemplateId) {
+ }
+ TemplateString(const StaticTemplateString& s)
+ : ptr_(s.do_not_use_directly_.ptr_),
+ length_(s.do_not_use_directly_.length_),
+ is_immutable_(true), id_(s.do_not_use_directly_.id_) {
+ }
+
+ const char* begin() const {
+ return ptr_;
+ }
+
+ const char* end() const {
+ return ptr_ + length_;
+ }
+
+ const char* data() const {
+ return ptr_;
+ }
+
+ size_t size() const {
+ return length_;
+ }
+
+ inline bool empty() const {
+ return length_ == 0;
+ };
+
+ inline bool is_immutable() const {
+ return is_immutable_;
+ }
+
+ // STL requires this to be public for hash_map, though I'd rather not.
+ inline bool operator==(const TemplateString& x) const {
+ return GetGlobalId() == x.GetGlobalId();
+ }
+
+ private:
+ // Only TemplateDictionaries and template expansion code can read these.
+ friend class TemplateDictionary;
+ friend class TemplateCache; // for GetGlobalId
+ friend class StaticTemplateStringInitializer; // for AddToGlo...
+ friend struct TemplateStringHasher; // for GetGlobalId
+ friend TemplateId GlobalIdForTest(const char* ptr, int len);
+ friend TemplateId GlobalIdForSTS_INIT(const TemplateString& s);
+
+ TemplateString(const char* s, size_t slen, bool is_immutable, TemplateId id)
+ : ptr_(s), length_(slen), is_immutable_(is_immutable), id_(id) {
+ }
+
+ // This returns true if s is in the .text segment of the binary.
+ // (Note this only checks .text of the main executable, not of
+ // shared libraries. So it may not be all that useful.)
+ // This requires the gnu linker (and probably elf), to define
+ // _start and data_start.
+ static bool InTextSegment(const char* s) {
+#if @ac_have_attribute_weak@
+ return (s >= _start && s < data_start); // in .text
+#else
+ return false; // the conservative choice: assume it's not static memory
+#endif
+ }
+
+ protected:
+ inline void CacheGlobalId() { // used by HashedTemplateString
+ id_ = GetGlobalId();
+ };
+
+ private:
+ // Returns the global id, computing it for the first time if
+ // necessary. Note that since this is a const method, we don't
+ // store the computed value in id_, even if id_ is 0.
+ TemplateId GetGlobalId() const;
+ // Adds this TemplateString to the map from global-id to name.
+ void AddToGlobalIdToNameMap();
+
+ // Use sparingly. Converting to a string loses information about the
+ // id of the template string, making operations require extra hash
+ // computations.
+ std::string ToString() const { return std::string(ptr_, length_); }
+
+ // Does the reverse map from TemplateId to TemplateString contents.
+ // Returns a TemplateString(kStsEmpty) if id isn't found. Note that
+ // the TemplateString returned is not necessarily NUL terminated.
+ static TemplateString IdToString(TemplateId id);
+
+ const char* ptr_;
+ size_t length_;
+ // Do we need to manage memory for this string?
+ bool is_immutable_;
+ // Id for hash lookups. If 0, we don't have one and it should be
+ // computed as-needed.
+ TemplateId id_;
+};
+
+// ----------------------- THE CODE -------------------------------
+
+// Use the low-bit from TemplateId as the "initialized" flag. Note
+// that since all initialized TemplateId have the lower bit set, it's
+// safe to have used 0 for kIllegalTemplateId, as we did above.
+const TemplateId kTemplateStringInitializedFlag = 1;
+
+inline bool IsTemplateIdInitialized(TemplateId id) {
+ return id & kTemplateStringInitializedFlag;
+}
+
+// This is a helper struct used in TemplateString::Hasher/TemplateStringHasher
+struct TemplateIdHasher {
+ size_t operator()(TemplateId id) const {
+ // The shift has two effects: it randomizes the "initialized" flag,
+ // and slightly improves the randomness of the low bits. This is
+ // slightly useful when size_t is 32 bits, or when using a small
+ // hash tables with power-of-2 sizes.
+ return static_cast<size_t>(id ^ (id >> 33));
+ }
+ bool operator()(TemplateId a, TemplateId b) const { // <, for MSVC
+ return a < b;
+ }
+ static const size_t bucket_size = 4; // These are required by MSVC
+ static const size_t min_buckets = 8; // 4 and 8 are the defaults
+};
+
+
+inline size_t StaticTemplateString::Hasher::operator()(
+ const StaticTemplateString& sts) const {
+ TemplateId id = sts.do_not_use_directly_.id_;
+ assert(IsTemplateIdInitialized(id));
+ return TemplateIdHasher()(id);
+}
+
+inline bool StaticTemplateString::Hasher::operator()(
+ const StaticTemplateString& a, const StaticTemplateString& b) const {
+ TemplateId id_a = a.do_not_use_directly_.id_;
+ TemplateId id_b = b.do_not_use_directly_.id_;
+ assert(IsTemplateIdInitialized(id_a));
+ assert(IsTemplateIdInitialized(id_b));
+ return TemplateIdHasher()(id_a, id_b);
+}
+
+inline bool StaticTemplateString::operator==(
+ const StaticTemplateString& x) const {
+ return (do_not_use_directly_.length_ == x.do_not_use_directly_.length_ &&
+ (do_not_use_directly_.ptr_ == x.do_not_use_directly_.ptr_ ||
+ memcmp(do_not_use_directly_.ptr_, x.do_not_use_directly_.ptr_,
+ do_not_use_directly_.length_) == 0));
+}
+
+// We set up as much of StaticTemplateString as we can at
+// static-initialization time (using brace-initialization), but some
+// things can't be set up then. This class is for those things; it
+// runs at dynamic-initialization time. If you add logic here, only
+// do so as an optimization: this may be called rather late (though
+// before main), so other code should not depend on this being called
+// before them.
+class @ac_windows_dllexport@ StaticTemplateStringInitializer {
+ public:
+ // This constructor operates on a const StaticTemplateString - we should
+ // only change those things that are mutable.
+ explicit StaticTemplateStringInitializer(const StaticTemplateString* sts);
+};
+
+// Don't use this. This is used only in auto-generated .varnames.h files.
+#define STS_INIT_WITH_HASH(name, str, hash) \
+ { { str, sizeof("" str "")-1, hash } }; \
+ namespace ctemplate_sts_init { \
+ static const @ac_google_namespace@::StaticTemplateStringInitializer name##_init(&name); \
+ }
+
+// We computed this hash value for the empty string online. In debug
+// mode, we verify it's correct during runtime (that is, that we
+// verify the hash function used by make_tpl_varnames_h hasn't changed
+// since we computed this number). Note this struct is logically
+// static, but since it's in a .h file, we don't say 'static' but
+// instead rely on the linker to provide the POD-with-internal-linkage
+// magic.
+const StaticTemplateString kStsEmpty =
+ STS_INIT_WITH_HASH(kStsEmpty, "", 1457976849674613049ULL);
+
+}
+
+#endif // TEMPLATE_TEMPLATE_STRING_H_
diff --git a/src/diff_tpl_auto_escape.cc b/src/diff_tpl_auto_escape.cc
new file mode 100644
index 0000000..5a833f8
--- /dev/null
+++ b/src/diff_tpl_auto_escape.cc
@@ -0,0 +1,337 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Heavily inspired from make_tpl_varnames_h.cc
+//
+// A utility for evaluating the changes in escaping modifiers
+// applied to variables between two versions of a template file.
+// This may come in handy when converting a template to Auto-Escape:
+// If the template previously had escaping modifiers, this tool will show
+// the variables for which Auto-Escaped determined a different escaping.
+//
+// How it works:
+// . You provide two template files, assumed to be identical in content
+// (same variables in the same order) except for escaping modifiers
+// and possibly the AUTOESCAPE pragma. You also provide the Strip mode
+// or a default of STRIP_WHITESPACE is assumed.
+//
+// . The tool loads both files and invokes DumpToString on both. It then
+// compares the escaping modifiers for each variable and when they do
+// not match, it prints a line with the variable name as well as
+// the differing modifiers.
+//
+// . We accept some command-line flags, the most notable are:
+// --template_dir to set a template root directory other than cwd
+// --strip to set the Strip mode to other than STRIP_WHITESPACE.
+// For correct operation of Auto-Escape, ensure this matches
+// the Strip mode you normally use on these templates.
+//
+//
+// Exit code is zero if there were no differences. It is non-zero
+// if we failed to load the templates or we found one or more
+// differences.
+//
+// TODO(jad): Add flag to optionally report differences when a variable
+// does not have modifiers in either template.
+
+// This is for opensource ctemplate on windows. Even though we
+// #include config.h, just like the files used to compile the dll, we
+// are actually a *client* of the dll, so we don't get to decl anything.
+#include <config.h>
+#undef CTEMPLATE_DLL_DECL
+
+#include <stdlib.h>
+#include <stdio.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+#include <stdarg.h>
+#ifdef HAVE_GETOPT_H
+# include <getopt.h>
+#endif
+#include <string.h>
+#include <string>
+#include <ctemplate/template.h>
+#include <ctemplate/template_pathops.h>
+using std::string;
+using std::vector;
+using GOOGLE_NAMESPACE::Template;
+using GOOGLE_NAMESPACE::TemplateContext;
+using GOOGLE_NAMESPACE::Strip;
+using GOOGLE_NAMESPACE::STRIP_WHITESPACE;
+using GOOGLE_NAMESPACE::STRIP_BLANK_LINES;
+using GOOGLE_NAMESPACE::DO_NOT_STRIP;
+
+enum {LOG_VERBOSE, LOG_INFO, LOG_WARNING, LOG_ERROR, LOG_FATAL};
+
+// A variable name and optional modifiers.
+// For example: in {{NAME:j:x-bla}}
+// variable_name is "NAME" and modifiers is "j:x-bla".
+struct VariableAndMod {
+ VariableAndMod(string name, string mods)
+ : variable_name(name), modifiers(mods) { }
+ string variable_name;
+ string modifiers;
+};
+typedef vector<VariableAndMod> VariableAndMods;
+
+static string FLAG_template_dir(GOOGLE_NAMESPACE::kCWD); // "./"
+static string FLAG_strip = ""; // cmd-line arg -s
+static bool FLAG_verbose = false; // cmd-line arg -v
+
+static void LogPrintf(int severity, const char* pat, ...) {
+ if (severity == LOG_VERBOSE && !FLAG_verbose)
+ return;
+ if (severity == LOG_FATAL)
+ fprintf(stderr, "FATAL ERROR: ");
+ if (severity == LOG_VERBOSE)
+ fprintf(stdout, "[VERBOSE] ");
+ va_list ap;
+ va_start(ap, pat);
+ vfprintf(severity == LOG_INFO || severity == LOG_VERBOSE ? stdout: stderr,
+ pat, ap);
+ va_end(ap);
+ if (severity == LOG_FATAL)
+ exit(1);
+}
+
+// Prints to outfile -- usually stdout or stderr -- and then exits
+static int Usage(const char* argv0, FILE* outfile) {
+ fprintf(outfile, "USAGE: %s [-t<dir>] [-v] [-b] [-s<n>] <file1> <file2>\n",
+ argv0);
+
+ fprintf(outfile,
+ " -t --template_dir=<dir> Root directory of templates\n"
+ " -s --strip=<strip> STRIP_WHITESPACE [default],\n"
+ " STRIP_BLANK_LINES, DO_NOT_STRIP\n"
+ " -h --help This help\n"
+ " -v --verbose For a bit more output\n"
+ " -V --version Version information\n");
+ fprintf(outfile, "\n"
+ "This program reports changes to modifiers between two template\n"
+ "files assumed to be identical except for modifiers applied\n"
+ "to variables. One use case is converting a template to\n"
+ "Auto-Escape and using this program to obtain the resulting\n"
+ "changes in escaping modifiers.\n"
+ "The Strip value should match what you provide in\n"
+ "Template::GetTemplate.\n"
+ "NOTE: Variables that do not have escaping modifiers in one of\n"
+ "two templates are ignored and do not count in the differences.\n");
+ exit(0);
+}
+
+static int Version(FILE* outfile) {
+ fprintf(outfile,
+ "diff_tpl_auto_escape (part of google-template 0.9x)\n"
+ "\n"
+ "Copyright 2008 Google Inc.\n"
+ "\n"
+ "This is BSD licensed software; see the source for copying conditions\n"
+ "and license information.\n"
+ "There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A\n"
+ "PARTICULAR PURPOSE.\n"
+ );
+ exit(0);
+}
+
+// Populates the vector of VariableAndMods from the DumpToString
+// representation of the template file.
+//
+// Each VariableAndMod represents a variable node found in the template
+// along with the optional modifiers attached to it (or empty string).
+// The parsing is very simple. It looks for lines of the form:
+// "Variable Node: <VAR_NAME>[:<VAR_MODS>]\n"
+// as outputted by DumpToString() and extracts from each such line the
+// variable name and modifiers when present.
+// Because DumpToString also outputs text nodes, it is possible
+// to trip this function. Probably ok since this is just a helper tool.
+bool LoadVariables(const char* filename, Strip strip,
+ VariableAndMods& vars_and_mods) {
+ const string kVariablePreambleText = "Variable Node: ";
+ Template *tpl;
+ tpl = Template::GetTemplate(filename, strip);
+ if (tpl == NULL) {
+ LogPrintf(LOG_FATAL, "Could not load file: %s\n", filename);
+ return false;
+ }
+
+ string output;
+ tpl->DumpToString(filename, &output);
+
+ string::size_type index = 0;
+ string::size_type delim, end;
+ // TODO(jad): Switch to using regular expressions.
+ while((index = output.find(kVariablePreambleText, index)) != string::npos) {
+ index += kVariablePreambleText.length();
+ end = output.find('\n', index);
+ if (end == string::npos) {
+ // Should never happen but no need to LOG_FATAL.
+ LogPrintf(LOG_ERROR, "%s: Did not find terminating newline...\n",
+ filename);
+ end = output.length();
+ }
+ string name_and_mods = output.substr(index, end - index);
+ delim = name_and_mods.find(":");
+ if (delim == string::npos) // no modifiers.
+ delim = name_and_mods.length();
+ VariableAndMod var_mod(name_and_mods.substr(0, delim),
+ name_and_mods.substr(delim));
+ vars_and_mods.push_back(var_mod);
+ }
+ return true;
+}
+
+// Returns true if the difference in the modifier strings
+// is non-significant and can be safely omitted. This is the
+// case when one is ":j:h" and the other is ":j" since
+// the :h is a no-op after a :j.
+bool SuppressLameDiff(string modifiers_a, string modifiers_b) {
+ if ((modifiers_a == ":j:h" && modifiers_b == ":j") ||
+ (modifiers_a == ":j" && modifiers_b == ":j:h"))
+ return true;
+ return false;
+}
+
+// Main function to analyze differences in escaping modifiers between
+// two template files. These files are assumed to be identical in
+// content [strictly speaking: same number of variables in the same order].
+// If that is not the case, we fail.
+// We return true if there were no differences, false if we failed
+// or we found one or more differences.
+bool DiffTemplates(const char* filename_a, const char* filename_b,
+ Strip strip) {
+ vector<VariableAndMod> vars_and_mods_a, vars_and_mods_b;
+
+ if (!LoadVariables(filename_a, strip, vars_and_mods_a) ||
+ !LoadVariables(filename_b, strip, vars_and_mods_b))
+ return false;
+
+ if (vars_and_mods_a.size() != vars_and_mods_b.size())
+ LogPrintf(LOG_FATAL, "Templates differ: %s [%d vars] vs. %s [%d vars].\n",
+ filename_a, vars_and_mods_a.size(),
+ filename_b, vars_and_mods_b.size());
+
+ int mismatch_count = 0; // How many differences there were.
+ int no_modifiers_count = 0; // How many variables without modifiers.
+ VariableAndMods::const_iterator iter_a, iter_b;
+ for (iter_a = vars_and_mods_a.begin(), iter_b = vars_and_mods_b.begin();
+ iter_a != vars_and_mods_a.end() && iter_b != vars_and_mods_b.end();
+ ++iter_a, ++iter_b) {
+ // The templates have different variables, we fail!
+ if (iter_a->variable_name != iter_b->variable_name)
+ LogPrintf(LOG_FATAL, "Variable name mismatch: %s vs. %s\n",
+ iter_a->variable_name.c_str(),
+ iter_b->variable_name.c_str());
+ // Variables without modifiers are ignored from the diff. They simply
+ // get counted and the count is shown in verbose logging/
+ if (iter_a->modifiers == "" || iter_b->modifiers == "") {
+ no_modifiers_count++;
+ } else {
+ if (iter_a->modifiers != iter_b->modifiers &&
+ !SuppressLameDiff(iter_a->modifiers, iter_b->modifiers)) {
+ mismatch_count++;
+ LogPrintf(LOG_INFO, "Difference for variable %s -- %s vs. %s\n",
+ iter_a->variable_name.c_str(),
+ iter_a->modifiers.c_str(), iter_b->modifiers.c_str());
+ }
+ }
+ }
+
+ LogPrintf(LOG_VERBOSE, "Variables Found: Total=%d; Diffs=%d; NoMods=%d\n",
+ vars_and_mods_a.size(), mismatch_count, no_modifiers_count);
+
+ return (mismatch_count == 0);
+}
+
+int main(int argc, char **argv) {
+#if defined(HAVE_GETOPT_LONG)
+ static struct option longopts[] = {
+ {"help", 0, NULL, 'h'},
+ {"strip", 1, NULL, 's'},
+ {"template_dir", 1, NULL, 't'},
+ {"verbose", 0, NULL, 'v'},
+ {"version", 0, NULL, 'V'},
+ {0, 0, 0, 0}
+ };
+ int option_index;
+# define GETOPT(argc, argv) getopt_long(argc, argv, "t:s:hvV", \
+ longopts, &option_index)
+#elif defined(HAVE_GETOPT_H)
+# define GETOPT(argc, argv) getopt(argc, argv, "t:s:hvV")
+#else
+ // TODO(csilvers): implement something reasonable for windows/etc
+# define GETOPT(argc, argv) -1
+ int optind = 1; // first non-opt argument
+ const char* optarg = ""; // not used
+#endif
+
+ int r = 0;
+ while (r != -1) { // getopt()/getopt_long() return -1 upon no-more-input
+ r = GETOPT(argc, argv);
+ switch (r) {
+ case 's': FLAG_strip.assign(optarg); break;
+ case 't': FLAG_template_dir.assign(optarg); break;
+ case 'v': FLAG_verbose = true; break;
+ case 'V': Version(stdout); break;
+ case -1: break; // means 'no more input'
+ default: Usage(argv[0], stderr);
+ }
+ }
+
+ Template::SetTemplateRootDirectory(FLAG_template_dir);
+
+
+ if (argc != (optind + 2))
+ LogPrintf(LOG_FATAL,
+ "Must specify exactly two template files on the command line.\n");
+
+ // Validate the Strip value. Default is STRIP_WHITESPACE.
+ Strip strip = STRIP_WHITESPACE; // To avoid compiler warnings.
+ if (FLAG_strip == "STRIP_WHITESPACE" || FLAG_strip == "")
+ strip = STRIP_WHITESPACE;
+ else if (FLAG_strip == "STRIP_BLANK_LINES")
+ strip = STRIP_BLANK_LINES;
+ else if (FLAG_strip == "DO_NOT_STRIP")
+ strip = DO_NOT_STRIP;
+ else
+ LogPrintf(LOG_FATAL, "Unrecognized Strip: %s. Must be one of: "
+ "STRIP_WHITESPACE, STRIP_BLANK_LINES or DO_NOT_STRIP\n",
+ FLAG_strip.c_str());
+
+ const char* filename_a = argv[optind];
+ const char* filename_b = argv[optind + 1];
+ LogPrintf(LOG_VERBOSE, "------ Diff of [%s, %s] ------\n",
+ filename_a, filename_b);
+
+ if (DiffTemplates(filename_a, filename_b, strip))
+ return 0;
+ else
+ return 1;
+}
diff --git a/src/htmlparser/README b/src/htmlparser/README
new file mode 100644
index 0000000..83ff6bb
--- /dev/null
+++ b/src/htmlparser/README
@@ -0,0 +1,13 @@
+This directory contains an implementation of an html context scanner with no
+lookahead. Its purpose is to scan an html stream and provide context
+information at any point within the input stream. This is used in the
+auto escaping functionality of the templating system, which would require
+html context information at very specific points within the html stream. The
+implementation is based on a simplified state machine of HTML4.1.
+
+The implementation is written in C, with the idea of making is usable
+in other contexts besides this html paresr; however, we compile all
+the files as c++ files in order to be able to use a namespace (and
+avoid global namespace pollution). There's also a c++ shim to give an
+object-oriented look at the html parser state, which is what the
+ctemplate system actually uses.
diff --git a/src/htmlparser/fsm_config.py b/src/htmlparser/fsm_config.py
new file mode 100755
index 0000000..ff27c89
--- /dev/null
+++ b/src/htmlparser/fsm_config.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ---
+#
+# Create a state machine object based on a definition file.
+#
+
+__author__ = 'falmeida@google.com (Filipe Almeida)'
+
+class OrderedDict:
+ """Ordered dictionary implementation."""
+
+ # Define the minimum functionality we need for our application.
+ # Easiser would be to subclass from UserDict.DictMixin, and only
+ # define __getitem__, __setitem__, __delitem__, and keys, but that's
+ # not as portable. We don't need to define much more, so we just do.
+
+ def __init__(self):
+ self._dict = {}
+ self._keys = []
+
+ def __getitem__(self, key):
+ return self._dict[key]
+
+ def __setitem__(self, key, value):
+ if key not in self._keys:
+ self._keys.append(key)
+ self._dict[key] = value
+
+ def __delitem__(self, key):
+ self._keys.remove(key)
+ del self._dict[key]
+
+ def keys(self):
+ return self._keys
+
+ # Below are all we have to define in addition to what DictMixin would need
+ def __len__(self):
+ return len(self.keys())
+
+ def __contains__(self, key):
+ return self.has_key(key)
+
+ def __iter__(self):
+ # It's not as portable -- though it would be more space-efficient -- to do
+ # for k in self.keys(): yield k
+ return iter(self.keys())
+
+class State(object):
+ """Contains information about a specific state."""
+
+ def __init__(self):
+ pass
+
+ name = None
+ external_name = None
+ transitions = []
+
+
+class Transition(object):
+ """Contains information about a specific transition."""
+
+ def __init__(self, condition, source, destination):
+ self.condition = condition
+ self.source = source
+ self.destination = destination
+
+
+class FSMConfig(object):
+ """Container for the statemachine definition."""
+
+ sm = {} # dictionary that contains the finite state machine definition
+ # loaded from a config file.
+ transitions = [] # List of transitions.
+ conditions = {} # Mapping between the condition name and the bracket
+ # expression.
+ states = OrderedDict() # Ordered dictionary of states.
+ name = None
+ comment = None
+
+ def AddState(self, **dic):
+ """Called from the definition file with the description of the state.
+
+ Receives a dictionary and populates internal structures based on it. The
+ dictionary is in the following format:
+
+ {'name': state_name,
+ 'external': exposed state name,
+ 'transitions': [
+ [condition, destination_state ],
+ [condition, destination_state ]
+ ]
+ }
+
+ """
+
+ state = State()
+ state.name = dic['name']
+ state.external_name = dic['external']
+
+ state_transitions = []
+
+ for (condition, destination) in dic['transitions']:
+ transition = Transition(condition, state.name, destination)
+ state_transitions.append(transition)
+
+ self.transitions.extend(state_transitions)
+ state.transitions = state_transitions
+ self.states[state.name] = state
+
+ def AddCondition(self, name, expression):
+ """Called from the definition file with the definition of a condition.
+
+ Receives the name of the condition and it's expression.
+ """
+ self.conditions[name] = expression
+
+ def Load(self, filename):
+ """Load the state machine definition file.
+
+ In the definition file, which is based on the python syntax, the following
+ variables and functions are defined.
+
+ name: Name of the state machine
+ comment: Comment line on the generated file.
+ condition(): A mapping between condition names and bracket expressions.
+ state(): Defines a state and it's transitions. It accepts the following
+ attributes:
+ name: name of the state
+ external: exported name of the state. The exported name can be used
+ multiple times in order to create a super state.
+ transitions: List of pairs containing the condition for the transition
+ and the destination state. Transitions are ordered so if
+ a default rule is used, it must be the last one in the list.
+
+ Example:
+
+ name = 'c comment parser'
+
+ condition('/', '/')
+ condition('*', '*')
+ condition('linefeed', '\\n')
+ condition('default', '[:default:]')
+
+ state(name = 'text',
+ external = 'comment',
+ transitions = [
+ [ '/', 'comment_start' ],
+ [ 'default', 'text' ]
+ ])
+
+ state(name = 'comment_start',
+ external = 'comment',
+ transitions = [
+ [ '/', 'comment_line' ],
+ [ '*', 'comment_multiline' ],
+ [ 'default', 'text' ]
+ ])
+
+ state(name = 'comment_line',
+ external = 'comment',
+ transitions = [
+ [ 'linefeed', 'text' ],
+ [ 'default', 'comment_line' ]
+ ])
+
+ state(name = 'comment_multiline',
+ external = 'comment',
+ transitions = [
+ [ '*', 'comment_multiline_close' ],
+ [ 'default', 'comment_multiline' ]
+ ])
+
+ state(name = 'comment_multiline_close',
+ external = 'comment',
+ transitions = [
+ [ '/', 'text' ],
+ [ 'default', 'comment_multiline' ]
+ ])
+
+ """
+
+ self.sm['state'] = self.AddState
+ self.sm['condition'] = self.AddCondition
+ execfile(filename, self.sm)
+ self.name = self.sm['name']
+ if not self.name.isalnum():
+ raise Exception("State machine name must consist of only alphanumeric"
+ "characters.")
+ self.comment = self.sm['comment']
+
+ def __init__(self):
+ pass
diff --git a/src/htmlparser/generate_fsm.py b/src/htmlparser/generate_fsm.py
new file mode 100755
index 0000000..9106b96
--- /dev/null
+++ b/src/htmlparser/generate_fsm.py
@@ -0,0 +1,330 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ---
+#
+# Generate a C include file from a finite state machine definition.
+#
+# Right now the form is the one expected by htmlparser.c so this file is pretty
+# tightly coupled with htmlparser.c.
+#
+
+__author__ = 'falmeida@google.com (Filipe Almeida)'
+
+import sys
+
+from fsm_config import FSMConfig
+
+
+class FSMGenerateAbstract(object):
+
+ def __init__(self, config):
+ self._config = config
+
+ def Generate(self):
+ """Returns the generated FSM description for the specified language.
+
+ Raises a TypeError, because abstract methods can not be called.
+
+ Raises:
+ TypeError
+ """
+ raise TypeError('Abstract method %s.%s called' % (self._class.__name__,
+ self._function))
+
+
+class FSMGenerateC(FSMGenerateAbstract):
+ """Generate the C definition from a statemachien configuration object."""
+
+ TABSTOP_ = 2
+
+ def _Prefix(self):
+ """Return a c declaration prefix."""
+
+ return self._config.name.lower() + '_'
+
+ def _StateInternalC(self, st):
+ """Return the internal name of the state."""
+
+ return '%sSTATE_INT_%s' % (self._Prefix().upper(), st.upper())
+
+ def _StateExternalC(self, st):
+ """Return the external name of the state."""
+
+ return '%sSTATE_%s' % (self._Prefix().upper(), st.upper())
+
+ def _MakeTuple(self, data):
+ """Converts data to a string representation of a C tuple."""
+
+ return '{ %s }' % ', '.join(data)
+
+ def _CreateHeader(self):
+ """Print the include file header."""
+
+ out = []
+
+ if self._config.comment:
+ out.append('/* ' + self._config.comment)
+ else:
+ out.append('/* State machine definition for ' + self._config.name)
+ out.append(' * Auto generated by generate_fsm.py. Please do not edit.')
+ out.append(' */')
+
+ return '\n'.join(out)
+
+ def _ListToIndentedString(self, list):
+ indented_list = [' ' + e for e in list]
+ return ',\n'.join(indented_list)
+
+ def _CreateEnum(self, name, data):
+ """Print a c enum definition."""
+
+ return 'enum %s {\n%s\n};\n' % (name,
+ self._ListToIndentedString(data))
+
+ def _CreateStructList(self, name, type, data):
+ """Print a c flat list.
+
+ Generic function to print list in c in the form of a struct.
+
+ Args:
+ name: name of the structure.
+ type: type of the struct.
+ data: contents of the struct as a list of elements
+
+ Returns:
+ String with the generated list.
+ """
+
+ return "static const %s %s[] = {\n%s\n};\n" % (
+ type,
+ name,
+ self._ListToIndentedString(data))
+
+ def _CreateStatesEnum(self):
+ """Print the internal states enum.
+
+ Prints an enum containing all the valid states.
+
+ Returns:
+ String containing a C enumeration of the states.
+ """
+ list = [] # output list
+
+ for state in self._config.states:
+ list.append(self._StateInternalC(state))
+ return self._CreateEnum(self._Prefix() + 'state_internal_enum', list)
+
+ def _CreateStatesExternal(self):
+ """Print a struct with a mapping from internal to external states."""
+ list = [] # output list
+
+ for state_name in self._config.states:
+ list.append(self._StateExternalC(
+ self._config.states[state_name].external_name))
+
+ return self._CreateStructList(self._Prefix() + 'states_external',
+ 'int',
+ list)
+
+ def _CreateStatesInternalNames(self):
+ """Return a struct mapping internal states to a strings."""
+ out = [] # output list
+
+ for state_name in self._config.states:
+ out.append('"' + state_name + '"')
+
+ return self._CreateStructList(self._Prefix() + 'states_internal_names',
+ 'char *',
+ out)
+
+ def _CreateNumStates(self):
+ """Print a Macro defining the number of states."""
+
+ return "#define %s_NUM_STATES %s" % (self._config.name.upper(),
+ str(len(self._config.states) + 1))
+
+ def _ExpandBracketExpression(self, expression):
+ """Expand ranges in a regexp bracket expression.
+
+ Returns a string with the ranges in a bracket expression expanded.
+
+ The bracket expression is similar to grep(1) or regular expression bracket
+ expressions but it does not support the negation (^) modifier or named
+ character classes like [:alpha:] or [:alnum:].
+
+ The especial character class [:default:] will expand to all elements in the
+ ascii range.
+
+ For example, the expression 'a-c13A-D' will expand to 'abc13ABCD'.
+
+ Args:
+ expression: A regexp bracket expression. Ie: 'A-Z0-9'.
+
+ Returns:
+ A string with the ranges in the bracket expression expanded.
+ """
+
+ def ExpandRange(start, end):
+ """Return a sequence of characters between start and end.
+
+ Args:
+ start: first character of the sequence.
+ end: last character of the sequence.
+
+ Returns:
+ string containing the sequence of characters between start and end.
+ """
+ return [chr(c) for c in range(ord(start), ord(end) + 1)]
+
+ def ListNext(input_list):
+ """Pop the first element of a list.
+
+ Args:
+ input_list: python list object.
+
+ Returns:
+ First element of the list or None if the list is empty.
+ """
+ if input_list:
+ return input_list.pop(0)
+ else:
+ return None
+
+ out = [] # List containing the output
+
+ # Special case for the character class [:default:]
+ if expression == '[:default:]':
+ out = [chr(c) for c in range(0, 255)]
+ return ''.join(out)
+
+ chars = [c for c in expression] # list o characters in the expression.
+
+ current = ListNext(chars)
+ while current:
+ next = ListNext(chars)
+ if next == '-':
+ next = ListNext(chars)
+ if next:
+ out.extend(ExpandRange(current, next))
+ else:
+ out.append(current)
+ out.append('-')
+ current = ListNext(chars)
+ else:
+ out.append(current)
+ current = next
+
+ return ''.join(out)
+
+ def _CreateTransitionTable(self):
+ """Print the state transition list.
+
+ Returns a set of C structures that define the transition table for the state
+ machine. This structure is a list of lists of ints (int **). The outer list
+ indexes the source state and the inner list contains the destination state
+ for each of the possible input characters:
+
+ const int * const* transitions[source][input] == destination.
+
+ The conditions are mapped from the conditions variable.
+
+ Returns:
+ String containing the generated transition table in a C struct.
+ """
+ out = [] # output list
+ default_state = 'STATEMACHINE_ERROR'
+ state_table = {}
+
+ for state in self._config.states:
+ state_table[state] = [default_state for col in xrange(255)]
+
+ # We process the transition in reverse order while updating the table.
+ for i_transition in range(len(self._config.transitions) - 1, -1, -1):
+ transition = self._config.transitions[i_transition]
+ (condition_name, src, dst) = (transition.condition,
+ transition.source,
+ transition.destination)
+ condition = self._config.conditions[condition_name]
+ char_list = self._ExpandBracketExpression(condition)
+
+ for c in char_list:
+ state_table[src][ord(c)] = self._StateInternalC(dst)
+
+ # Create the inner lists which map input characters to destination states.
+ for state in self._config.states:
+ transition_row = []
+ for c in xrange(0, 255):
+ transition_row.append(' /* %06s */ %s' % (repr(chr(c)),
+ state_table[state][c]))
+
+ out.append(self._CreateStructList('%stransition_row_%s' %
+ (self._Prefix(),
+ state),
+ 'int',
+ transition_row))
+ out.append('\n')
+
+ # Create the outer list, which map source states to input characters.
+ out.append('static const %s %s[] = {\n' % ('int *', self._Prefix() +
+ 'state_transitions'))
+
+ row_list = [' %stransition_row_%s' %
+ (self._Prefix(), row) for row in self._config.states]
+ out.append(',\n'.join(row_list))
+ out.append('\n};\n')
+
+ return ''.join(out)
+
+ def Generate(self):
+ """Returns the generated the C include statements for the statemachine."""
+
+ print '\n'.join((self._CreateHeader(),
+ self._CreateNumStates(),
+ self._CreateStatesEnum(),
+ self._CreateStatesExternal(),
+ self._CreateStatesInternalNames(),
+ self._CreateTransitionTable()))
+
+
+def main():
+ if len(sys.argv) != 2:
+ print "usage: generate_fsm.py config_file"
+ sys.exit(1)
+
+ config = FSMConfig()
+ config.Load(sys.argv[1])
+
+ gen = FSMGenerateC(config)
+ gen.Generate()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/src/htmlparser/htmlparser.cc b/src/htmlparser/htmlparser.cc
new file mode 100644
index 0000000..749a74e
--- /dev/null
+++ b/src/htmlparser/htmlparser.cc
@@ -0,0 +1,1096 @@
+/*
+ * Copyright (c) 2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ---
+ *
+ * Author: falmeida@google.com (Filipe Almeida)
+ */
+
+/* TODO(falmeida): Breaks on NULL characters in the stream. fix.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <assert.h>
+
+#include "htmlparser/statemachine.h"
+#include "htmlparser/htmlparser.h"
+#include "htmlparser/jsparser.h"
+
+/* So we can support both C and C++ compilers, we use the CAST() macro instead
+ * of using C style casts or static_cast<>() directly.
+ */
+#ifdef __cplusplus
+ #define CAST(type, expression) (static_cast<type>(expression))
+#else
+ #define CAST(type, expression) ((type)(expression))
+#endif
+
+#ifdef __cplusplus
+namespace ctemplate_htmlparser {
+#endif
+
+/* Generated state machine definition. */
+#include "htmlparser/htmlparser_fsm.h"
+
+#define is_js_attribute(attr) ((attr)[0] == 'o' && (attr)[1] == 'n')
+#define is_style_attribute(attr) (strcmp((attr), "style") == 0)
+
+/* html entity filter */
+static struct entityfilter_table_s {
+ const char *entity;
+ const char *value;
+} entityfilter_table[] = {
+ { "lt", "<" },
+ { "gt", ">" },
+ { "quot", "\"" },
+ { "amp", "&" },
+ { "apos", "\'" },
+ { NULL, NULL }
+};
+
+/* Utility functions */
+
+/* Similar to strncpy() but avoids the NULL padding. */
+static inline void nopad_strncpy(char *dst, const char *src, size_t dst_size,
+ size_t src_size)
+{
+ size_t size;
+
+ /* size = min(dst_size, src_size) */
+ size = dst_size > src_size ? src_size : dst_size;
+ strncpy(dst, src, size);
+ if (size > 0)
+ dst[size - 1] = '\0';
+}
+
+/* Converts the internal state into the external superstate.
+ */
+static int state_external(int st)
+{
+ if (st == STATEMACHINE_ERROR)
+ return HTMLPARSER_STATE_ERROR;
+ else
+ return htmlparser_states_external[st];
+}
+
+/* Returns true if the character is considered an html whitespace character.
+ *
+ * From: http://www.w3.org/TR/html401/struct/text.html#h-9.1
+ */
+static inline int html_isspace(char chr)
+{
+ if (chr == ' ' || chr == '\t' || chr == '\n' || chr == '\r') {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Returns true if the attribute is expected to contain a url
+ * This list was taken from: http://www.w3.org/TR/html4/index/attributes.html
+ */
+static int is_uri_attribute(char *attr)
+{
+ if (attr == NULL)
+ return 0;
+
+ switch (attr[0]) {
+ case 'a':
+ if (strcmp(attr, "action") == 0)
+ return 1;
+ /* TODO(falmeida): This is a uri list. Should we treat it diferently? */
+ if (strcmp(attr, "archive") == 0) /* This is a uri list */
+ return 1;
+ break;
+
+ case 'b':
+ if (strcmp(attr, "background") == 0)
+ return 1;
+ break;
+
+ case 'c':
+ if (strcmp(attr, "cite") == 0)
+ return 1;
+ if (strcmp(attr, "classid") == 0)
+ return 1;
+ if (strcmp(attr, "codebase") == 0)
+ return 1;
+ break;
+
+ case 'd':
+ if (strcmp(attr, "data") == 0)
+ return 1;
+ if (strcmp(attr, "dynsrc") == 0) /* from msdn */
+ return 1;
+ break;
+
+ case 'h':
+ if (strcmp(attr, "href") == 0)
+ return 1;
+ break;
+
+ case 'l':
+ if (strcmp(attr, "longdesc") == 0)
+ return 1;
+ break;
+
+ case 's':
+ if (strcmp(attr, "src") == 0)
+ return 1;
+ break;
+
+ case 'u':
+ if (strcmp(attr, "usemap") == 0)
+ return 1;
+ break;
+ }
+
+ return 0;
+
+}
+
+/* Convert a string to lower case characters inplace.
+ */
+static void tolower_str(char *s)
+{
+ while (*s != '\0') {
+ *s = CAST(char, tolower(CAST(unsigned char,*s)));
+ s++;
+ }
+}
+
+static const char *ignore_spaces_or_digits(const char *value) {
+ while (html_isspace(*value) || ((*value >= '0' && *value <= '9')))
+ value++;
+
+ return value;
+}
+
+static const char *ignore_spaces(const char *value) {
+ while (html_isspace(*value))
+ value++;
+
+ return value;
+}
+
+/* Return type of the function meta_redirect_type.
+ */
+enum meta_redirect_type_enum {
+ META_REDIRECT_TYPE_NONE,
+ META_REDIRECT_TYPE_URL_START,
+ META_REDIRECT_TYPE_URL
+};
+
+/* Analyzes a string for the presence of a meta refresh type url.
+ *
+ * This function receives the value of the content attribute of a meta tag and
+ * parses it in order to identify if a url is going to be present. This is the
+ * format of such tag:
+ *
+ * <meta http-equiv="refresh" content="5; URL=http://www.google.com">
+ *
+ * Using a regular expression library would be the most obvious way to implement
+ * this functionality, but introducing such a dependency is undesirable. We
+ * opted instead to parse programmaticly since the expression is simple enough.
+ *
+ * For reference, this is the spec on the meta http refresh tag:
+ * http://dev.w3.org/html5/spec/Overview.html#attr-meta-http-equiv-refresh
+ *
+ * If the value has no content after the expression, we know we are at the start
+ * of the URL. Otherwise we are past the start of the URL.
+ *
+ *
+ * Returns:
+ *
+ * This functions returns one of the following values:
+ * META_REDIRECT_TYPE_NONE - A url was not identified in the input string.
+ * META_REDIRECT_TYPE_URL_START - The input string ends exactly at the start
+ * of the url.
+ * META_REDIRECT_TYPE_URL - The input string ends somewhere in the middle or
+ * the end of the url.
+ *
+ * A few examples:
+ * "5"
+ * Returns META_REDIRECT_TYPE_NONE since we don't expect a url to follow.
+ *
+ * "5; URL = "
+ * The function returns META_REDIRECT_TYPE_URL_START since we expect a url to
+ * follow.
+ *
+ * "5; URL = http://www.google.com/?"
+ * Returns META_REDIRECT_TYPE_URL since the input value terminates in the
+ * middle or end of a url.
+ *
+ *
+ * Caveats: We are only recording up to 256 characters of attribute values, so
+ * our analysis is limited to that. This shouldn't be an issue in practice
+ * though as it would be unexpected for the part of the string that we are
+ * matching to be so long.
+ */
+enum meta_redirect_type_enum meta_redirect_type(const char *value) {
+
+ if (value == NULL)
+ return META_REDIRECT_TYPE_NONE;
+
+ /* Match while [ \t\r\n0-9]* */
+ value = ignore_spaces_or_digits(value);
+
+ /* Verify that we got a semi-colon character */
+ if (*value != ';')
+ return META_REDIRECT_TYPE_NONE;
+ value++;
+
+ /* Match while [ \t\r\n]* */
+ value = ignore_spaces(value);
+
+ /* Validate that we have 'URL' */
+ if (strncasecmp(value, "url", strlen("url")) != 0)
+ return META_REDIRECT_TYPE_NONE;
+
+ value += strlen("url");
+
+ /* Match while [ \t\r\n]* */
+ value = ignore_spaces(value);
+
+ if (*value != '=')
+ return META_REDIRECT_TYPE_NONE;
+ value++;
+
+ /* Match while [ \t\r\n]* */
+ value = ignore_spaces(value);
+
+ /* The HTML5 spec allows for the url to be quoted, so we skip a single or
+ * double quote if we find one.
+ */
+ if (*value == '"' || *value == '\'')
+ value++;
+
+ if (*value == '\0')
+ return META_REDIRECT_TYPE_URL_START;
+ else
+ return META_REDIRECT_TYPE_URL;
+}
+
+
+/* Resets the entityfilter to it's initial state so it can be reused.
+ */
+void entityfilter_reset(entityfilter_ctx *ctx)
+{
+ ctx->buffer[0] = 0;
+ ctx->buffer_pos = 0;
+ ctx->in_entity = 0;
+}
+
+/* Initializes a new entity filter object.
+ */
+entityfilter_ctx *entityfilter_new()
+{
+ entityfilter_ctx *ctx;
+ ctx = CAST(entityfilter_ctx *,
+ malloc(sizeof(entityfilter_ctx)));
+
+ if (ctx == NULL)
+ return NULL;
+ ctx->buffer[0] = 0;
+ ctx->buffer_pos = 0;
+ ctx->in_entity = 0;
+
+ return ctx;
+}
+
+/* Copies the context of the entityfilter pointed to by src to the entityfilter
+ * dst.
+ */
+void entityfilter_copy(entityfilter_ctx *dst, entityfilter_ctx *src)
+{
+ assert(src != NULL);
+ assert(dst != NULL);
+ assert(src != dst);
+ memcpy(dst, src, sizeof(entityfilter_ctx));
+}
+
+
+/* Deallocates an entity filter object.
+ */
+void entityfilter_delete(entityfilter_ctx *ctx)
+{
+ free(ctx);
+}
+
+/* Converts a string containing an hexadecimal number to a string containing
+ * one character with the corresponding ascii value.
+ *
+ * The provided output char array must be at least 2 chars long.
+ */
+static const char *parse_hex(const char *s, char *output)
+{
+ int n;
+ n = strtol(s, NULL, 16);
+ output[0] = n;
+ output[1] = 0;
+ /* TODO(falmeida): Make this function return void */
+ return output;
+}
+
+/* Converts a string containing a decimal number to a string containing one
+ * character with the corresponding ascii value.
+ *
+ * The provided output char array must be at least 2 chars long.
+ */
+static const char *parse_dec(const char *s, char *output)
+{
+ int n;
+ n = strtol(s, NULL, 10);
+ output[0] = n;
+ output[1] = 0;
+ return output;
+}
+
+/* Converts a string with an html entity to it's encoded form, which is written
+ * to the output string.
+ */
+static const char *entity_convert(const char *s, char *output, char terminator)
+{
+ /* TODO(falmeida): Handle wide char encodings */
+ struct entityfilter_table_s *t = entityfilter_table;
+
+ if (s[0] == '#') {
+ if (s[1] == 'x' || s[1] == 'X') { /* hex */
+ return parse_hex(s + 2, output);
+ } else { /* decimal */
+ return parse_dec(s + 1, output);
+ }
+ }
+
+ while (t->entity != NULL) {
+ if (strcasecmp(t->entity, s) == 0)
+ return t->value;
+ t++;
+ }
+
+ snprintf(output, HTMLPARSER_MAX_ENTITY_SIZE, "&%s%c", s, terminator);
+ output[HTMLPARSER_MAX_ENTITY_SIZE - 1] = '\0';
+
+ return output;
+}
+
+
+/* Processes a character from the input stream and decodes any html entities
+ * in the processed input stream.
+ */
+const char *entityfilter_process(entityfilter_ctx *ctx, char c)
+{
+ if (ctx->in_entity) {
+ if (c == ';' || html_isspace(c)) {
+ ctx->in_entity = 0;
+ ctx->buffer[ctx->buffer_pos] = '\0';
+ ctx->buffer_pos = 0;
+ return entity_convert(ctx->buffer, ctx->output, c);
+ } else {
+ ctx->buffer[ctx->buffer_pos++] = c;
+ if (ctx->buffer_pos >= HTMLPARSER_MAX_ENTITY_SIZE - 2) {
+ /* No more buffer to use, finalize and return.
+ * We need two characters left, one for the '&' character and
+ * another for the NULL termination. */
+ ctx->buffer[ctx->buffer_pos] = '\0';
+ ctx->in_entity=0;
+ ctx->buffer_pos = 0;
+ snprintf(ctx->output, HTMLPARSER_MAX_ENTITY_SIZE, "&%s",
+ ctx->buffer);
+ ctx->output[HTMLPARSER_MAX_ENTITY_SIZE - 1] = '\0';
+ return ctx->output;
+ }
+ }
+ } else {
+ if (c == '&') {
+ ctx->in_entity = 1;
+ ctx->buffer_pos = 0;
+ } else {
+ ctx->output[0] = c;
+ ctx->output[1] = 0;
+ return ctx->output;
+ }
+ }
+ return "";
+}
+
+/* Called when the parser enters a new tag. Starts recording it's name into
+ * html->tag.
+ */
+static void enter_tag_name(statemachine_ctx *ctx, int start, char chr, int end)
+{
+ htmlparser_ctx *html = CAST(htmlparser_ctx *, ctx->user);
+ assert(html != NULL);
+
+ html->tag[0] = '\0';
+ statemachine_start_record(ctx);
+}
+
+/* Called when the parser exits the tag name in order to finalize the recording.
+ *
+ * It converts the tag name to lowercase, and if the tag was closed, just
+ * clears html->tag.
+ */
+static void exit_tag_name(statemachine_ctx *ctx, int start, char chr, int end)
+{
+ htmlparser_ctx *html = CAST(htmlparser_ctx *, ctx->user);
+ assert(html != NULL);
+
+ nopad_strncpy(html->tag, statemachine_stop_record(ctx),
+ HTMLPARSER_MAX_STRING, statemachine_record_length(ctx));
+
+ tolower_str(html->tag);
+
+ if (html->tag[0] == '/')
+ html->tag[0] = '\0';
+}
+
+/* Called when the parser enters a new tag. Starts recording it's name into
+ * html->attr
+ */
+static void enter_attr(statemachine_ctx *ctx, int start, char chr, int end)
+{
+ htmlparser_ctx *html = CAST(htmlparser_ctx *, ctx->user);
+ assert(html != NULL);
+
+ html->attr[0] = '\0';
+ statemachine_start_record(ctx);
+}
+
+/* Called when the parser exits the attribute name in order to finalize the
+ * recording.
+ *
+ * It converts the tag name to lowercase.
+ */
+static void exit_attr(statemachine_ctx *ctx, int start, char chr, int end)
+{
+ htmlparser_ctx *html = CAST(htmlparser_ctx *, ctx->user);
+ assert(html != NULL);
+
+ nopad_strncpy(html->attr, statemachine_stop_record(ctx),
+ HTMLPARSER_MAX_STRING, statemachine_record_length(ctx));
+
+ tolower_str(html->attr);
+}
+
+/* Called when we enter an attribute value.
+ *
+ * Keeps track of a position index inside the value and initializes the
+ * javascript state machine for attributes that accept javascript.
+ */
+static void enter_value(statemachine_ctx *ctx, int start, char chr, int end)
+{
+ htmlparser_ctx *html = CAST(htmlparser_ctx *, ctx->user);
+ assert(html != NULL);
+
+ html->value_index = 0;
+
+ if (is_js_attribute(html->attr)) {
+ entityfilter_reset(html->entityfilter);
+ jsparser_reset(html->jsparser);
+ html->in_js = 1;
+ } else {
+ html->in_js = 0;
+ }
+}
+
+/* Called when we enter the contents of an attribute value.
+ *
+ * Initializes the recording of the contents of the value.
+ */
+static void enter_value_content(statemachine_ctx *ctx, int start, char chr,
+ int end)
+{
+ htmlparser_ctx *html = CAST(htmlparser_ctx *, ctx->user);
+ assert(html != NULL);
+
+ html->value[0] = '\0';
+ statemachine_start_record(ctx);
+}
+
+/* Called when we exit the contents of an attribute value.
+ *
+ * Finalizes the recording of the contents of the value.
+ */
+static void exit_value_content(statemachine_ctx *ctx, int start, char chr,
+ int end)
+{
+ htmlparser_ctx *html = CAST(htmlparser_ctx *, ctx->user);
+ assert(html != NULL);
+
+ nopad_strncpy(html->value, statemachine_stop_record(ctx),
+ HTMLPARSER_MAX_STRING, statemachine_record_length(ctx));
+
+ html->in_js = 0;
+}
+
+/* Called for every character inside an attribute value.
+ *
+ * Used to process javascript and keep track of the position index inside the
+ * attribute value.
+ */
+static void in_state_value(statemachine_ctx *ctx, int start, char chr, int end)
+{
+ htmlparser_ctx *html = CAST(htmlparser_ctx *, ctx->user);
+ assert(html != NULL);
+
+ html->value_index++;
+
+ if (html->in_js == 1) {
+ const char *output;
+ output = entityfilter_process(html->entityfilter, chr);
+ jsparser_parse_str(html->jsparser, output);
+ }
+}
+
+/* Called everytime the parser leaves a tag definition.
+ *
+ * When we encounter a script tag, we initialize the js parser and switch the
+ * state to cdata. We also switch to the cdata state when we encounter any
+ * other CDATA/RCDATA tag (style, title or textarea) except that we do not
+ * initialize the js parser.
+ *
+ * To simplify the code, we treat RCDATA and CDATA sections the same since the
+ * differences between them don't affect the context we are in.
+ */
+static void tag_close(statemachine_ctx *ctx, int start, char chr, int end)
+{
+ htmlparser_ctx *html = CAST(htmlparser_ctx *, ctx->user);
+ assert(html != NULL);
+
+ if (strcmp(html->tag, "script") == 0) {
+ ctx->next_state = HTMLPARSER_STATE_INT_CDATA_TEXT;
+ jsparser_reset(html->jsparser);
+ html->in_js = 1;
+ } else if (strcmp(html->tag, "style") == 0 ||
+ strcmp(html->tag, "title") == 0 ||
+ strcmp(html->tag, "textarea") == 0) {
+ ctx->next_state = HTMLPARSER_STATE_INT_CDATA_TEXT;
+ html->in_js = 0;
+ }
+}
+
+/* Called inside cdata blocks in order to parse the javascript.
+ *
+ * Calls the javascript parser if currently in a script tag.
+ */
+static void in_state_cdata(statemachine_ctx *ctx, int start, char chr, int end)
+{
+ htmlparser_ctx *html = CAST(htmlparser_ctx *, ctx->user);
+ assert(html != NULL);
+
+ if (html->in_js)
+ jsparser_parse_chr(html->jsparser, chr);
+}
+
+/* Called if we encounter a '<' character in a cdata section.
+ *
+ * When encountering a '<' character inside cdata, we need to find the closing
+ * tag name in order to know if the tag is going to be closed or not, so we
+ * start recording the name of what could be the closing tag.
+ */
+static void enter_state_cdata_may_close(statemachine_ctx *ctx, int start,
+ char chr, int end)
+{
+ statemachine_start_record(ctx);
+}
+
+/* Called when we finish reading what could be a closing cdata tag.
+ *
+ * Checks if the closing tag name matches the current entity, and if so closes
+ * the element.
+ */
+static void exit_state_cdata_may_close(statemachine_ctx *ctx, int start,
+ char chr, int end)
+{
+ htmlparser_ctx *html = CAST(htmlparser_ctx *, ctx->user);
+ const char *cdata_close_tag;
+ assert(html != NULL);
+
+ cdata_close_tag = statemachine_stop_record(ctx);
+ assert(cdata_close_tag[0] == '/');
+
+ if (strcasecmp(&cdata_close_tag[1], html->tag) == 0 &&
+ (chr == '>' || html_isspace(chr))) { /* Make sure we have a delimiter */
+ html->tag[0] = '\0'; /* Empty tag mimicking exit_tag_name(). */
+ html->in_js = 0; /* In case this was a script tag. */
+ } else {
+ /* Does not close the CDATA section. Go back to CDATA. */
+ ctx->next_state = HTMLPARSER_STATE_INT_CDATA_TEXT;
+ }
+}
+
+/* Resets the parser to it's initial state and changes the parser mode.
+ */
+void htmlparser_reset_mode(htmlparser_ctx *ctx, int mode)
+{
+ assert(ctx != NULL);
+ statemachine_reset(ctx->statemachine);
+ ctx->in_js = 0;
+ ctx->tag[0] = '\0';
+ ctx->attr[0] = '\0';
+ ctx->value[0] = '\0';
+
+ jsparser_reset(ctx->jsparser);
+
+ switch (mode) {
+ case HTMLPARSER_MODE_HTML:
+ ctx->statemachine->current_state = HTMLPARSER_STATE_INT_TEXT;
+ break;
+ case HTMLPARSER_MODE_JS:
+ ctx->statemachine->current_state = HTMLPARSER_STATE_INT_JS_FILE;
+ ctx->in_js = 1;
+ break;
+ case HTMLPARSER_MODE_CSS:
+ ctx->statemachine->current_state = HTMLPARSER_STATE_INT_CSS_FILE;
+ break;
+ case HTMLPARSER_MODE_HTML_IN_TAG:
+ ctx->statemachine->current_state = HTMLPARSER_STATE_INT_TAG_SPACE;
+ break;
+ default:
+ assert("Invalid mode in htmlparser_reset_mode()." && 0);
+ }
+}
+
+/* Resets the parser to it's initial state and to the default mode, which
+ * is MODE_HTML.
+ */
+void htmlparser_reset(htmlparser_ctx *ctx)
+{
+ assert(ctx != NULL);
+ htmlparser_reset_mode(ctx, HTMLPARSER_MODE_HTML);
+}
+
+/* Creates a new state machine definition and initializes the events for the
+ * state transitions.
+ *
+ * Although each instance of the parser has it's own private instance of a
+ * statemachine definition, they are still identical across html parser objects
+ * and are never modified after creation. As such, changes to this definition
+ * should not occur outside this function and should not depend on properties
+ * of this particular parser instance as in the future we may opt to use a
+ * single public definition across parser objects.
+ */
+static statemachine_definition *create_statemachine_definition()
+{
+ statemachine_definition *def;
+ def = statemachine_definition_new(HTMLPARSER_NUM_STATES);
+ if (def == NULL)
+ return NULL;
+
+ statemachine_definition_populate(def, htmlparser_state_transitions,
+ htmlparser_states_internal_names);
+
+ statemachine_enter_state(def, HTMLPARSER_STATE_INT_TAG_NAME,
+ enter_tag_name);
+ statemachine_exit_state(def, HTMLPARSER_STATE_INT_TAG_NAME, exit_tag_name);
+
+ statemachine_enter_state(def, HTMLPARSER_STATE_INT_ATTR, enter_attr);
+ statemachine_exit_state(def, HTMLPARSER_STATE_INT_ATTR, exit_attr);
+
+ statemachine_enter_state(def, HTMLPARSER_STATE_INT_TAG_CLOSE, tag_close);
+
+ /* CDATA states. We must list all cdata and javascript states here. */
+ /* TODO(falmeida): Declare this list in htmlparser_fsm.config so it doesn't
+ * go out of sync.
+ */
+ statemachine_in_state(def, HTMLPARSER_STATE_INT_CDATA_TEXT, in_state_cdata);
+ statemachine_in_state(def, HTMLPARSER_STATE_INT_CDATA_COMMENT_START,
+ in_state_cdata);
+ statemachine_in_state(def, HTMLPARSER_STATE_INT_CDATA_COMMENT_START_DASH,
+ in_state_cdata);
+ statemachine_in_state(def, HTMLPARSER_STATE_INT_CDATA_COMMENT_BODY,
+ in_state_cdata);
+ statemachine_in_state(def, HTMLPARSER_STATE_INT_CDATA_COMMENT_DASH,
+ in_state_cdata);
+ statemachine_in_state(def, HTMLPARSER_STATE_INT_CDATA_COMMENT_DASH_DASH,
+ in_state_cdata);
+ statemachine_in_state(def, HTMLPARSER_STATE_INT_CDATA_LT, in_state_cdata);
+ statemachine_in_state(def, HTMLPARSER_STATE_INT_CDATA_MAY_CLOSE,
+ in_state_cdata);
+
+ /* For simplification, we treat the javascript mode as if it were cdata. */
+ statemachine_in_state(def, HTMLPARSER_STATE_INT_JS_FILE, in_state_cdata);
+
+ statemachine_enter_state(def, HTMLPARSER_STATE_INT_CDATA_MAY_CLOSE,
+ enter_state_cdata_may_close);
+ statemachine_exit_state(def, HTMLPARSER_STATE_INT_CDATA_MAY_CLOSE,
+ exit_state_cdata_may_close);
+ /* value states */
+ statemachine_enter_state(def, HTMLPARSER_STATE_INT_VALUE, enter_value);
+
+ /* Called when we enter the content of the value */
+ statemachine_enter_state(def, HTMLPARSER_STATE_INT_VALUE_TEXT,
+ enter_value_content);
+ statemachine_enter_state(def, HTMLPARSER_STATE_INT_VALUE_Q,
+ enter_value_content);
+ statemachine_enter_state(def, HTMLPARSER_STATE_INT_VALUE_DQ,
+ enter_value_content);
+
+ /* Called when we exit the content of the value */
+ statemachine_exit_state(def, HTMLPARSER_STATE_INT_VALUE_TEXT,
+ exit_value_content);
+ statemachine_exit_state(def, HTMLPARSER_STATE_INT_VALUE_Q,
+ exit_value_content);
+ statemachine_exit_state(def, HTMLPARSER_STATE_INT_VALUE_DQ,
+ exit_value_content);
+
+ statemachine_in_state(def, HTMLPARSER_STATE_INT_VALUE_TEXT, in_state_value);
+ statemachine_in_state(def, HTMLPARSER_STATE_INT_VALUE_Q, in_state_value);
+ statemachine_in_state(def, HTMLPARSER_STATE_INT_VALUE_DQ, in_state_value);
+
+ return def;
+}
+
+
+/* Initializes a new htmlparser instance.
+ *
+ * Returns a pointer to the new instance or NULL if the initialization fails.
+ * Initialization failure is fatal, and if this function fails it may not
+ * deallocate all previsouly allocated memory.
+ */
+htmlparser_ctx *htmlparser_new()
+{
+ htmlparser_ctx *html;
+
+ html = CAST(htmlparser_ctx *, calloc(1, sizeof(htmlparser_ctx)));
+ if (html == NULL)
+ return NULL;
+
+ html->statemachine_def = create_statemachine_definition();
+ if (html->statemachine_def == NULL)
+ return NULL;
+
+ html->statemachine = statemachine_new(html->statemachine_def, html);
+ if (html->statemachine == NULL)
+ return NULL;
+
+ html->jsparser = jsparser_new();
+ if (html->jsparser == NULL)
+ return NULL;
+
+ html->entityfilter = entityfilter_new();
+ if (html->entityfilter == NULL)
+ return NULL;
+
+ htmlparser_reset(html);
+
+ return html;
+}
+
+/* Copies the context of the htmlparser pointed to by src to the htmlparser dst.
+ */
+void htmlparser_copy(htmlparser_ctx *dst, const htmlparser_ctx *src)
+{
+ dst->value_index = src->value_index;
+ dst->in_js = src->in_js;
+ strcpy(dst->tag, src->tag);
+ strcpy(dst->attr, src->attr);
+ strcpy(dst->value, src->value);
+
+ statemachine_copy(dst->statemachine,
+ src->statemachine,
+ dst->statemachine_def,
+ dst);
+
+ jsparser_copy(dst->jsparser, src->jsparser);
+
+ entityfilter_copy(dst->entityfilter, src->entityfilter);
+
+}
+
+/* Receives an htmlparser context and Returns the current html state.
+ */
+int htmlparser_state(htmlparser_ctx *ctx)
+{
+ return state_external(ctx->statemachine->current_state);
+}
+
+/* Parses the input html stream and returns the finishing state.
+ */
+int htmlparser_parse(htmlparser_ctx *ctx, const char *str, int size)
+{
+ int internal_state;
+ internal_state = statemachine_parse(ctx->statemachine, str, size);
+ return state_external(internal_state);
+}
+
+
+/* Returns true if the parser is inside an attribute value and the value is
+ * surrounded by single or double quotes. */
+int htmlparser_is_attr_quoted(htmlparser_ctx *ctx) {
+ int st = statemachine_get_state(ctx->statemachine);
+ if (st == HTMLPARSER_STATE_INT_VALUE_Q_START ||
+ st == HTMLPARSER_STATE_INT_VALUE_Q ||
+ st == HTMLPARSER_STATE_INT_VALUE_DQ_START ||
+ st == HTMLPARSER_STATE_INT_VALUE_DQ)
+ return 1;
+ else
+ return 0;
+}
+
+/* Returns true if the parser is currently in javascript.
+ */
+int htmlparser_in_js(htmlparser_ctx *ctx) {
+ int st = statemachine_get_state(ctx->statemachine);
+
+/* CDATA states plus JS_FILE. We must list all cdata and javascript states
+ * here. */
+/* TODO(falmeida): Declare this list in htmlparser_fsm.config so it doesn't go
+ * out of sync. */
+ if (ctx->in_js &&
+ (st == HTMLPARSER_STATE_INT_CDATA_TEXT ||
+ st == HTMLPARSER_STATE_INT_CDATA_COMMENT_START ||
+ st == HTMLPARSER_STATE_INT_CDATA_COMMENT_START_DASH ||
+ st == HTMLPARSER_STATE_INT_CDATA_COMMENT_BODY ||
+ st == HTMLPARSER_STATE_INT_CDATA_COMMENT_DASH ||
+ st == HTMLPARSER_STATE_INT_CDATA_COMMENT_DASH_DASH ||
+ st == HTMLPARSER_STATE_INT_CDATA_LT ||
+ st == HTMLPARSER_STATE_INT_CDATA_MAY_CLOSE ||
+ st == HTMLPARSER_STATE_INT_JS_FILE))
+ return 1;
+
+ if (state_external(st) == HTMLPARSER_STATE_VALUE && ctx->in_js)
+ return 1;
+ else
+ return 0;
+}
+
+/* Returns the current tag or NULL if not available or we haven't seen the
+ * entire tag yet.
+ */
+const char *htmlparser_tag(htmlparser_ctx *ctx)
+{
+ if (ctx->tag[0] != '\0')
+ return ctx->tag;
+ else
+ return NULL;
+}
+
+/* Returns true if inside an attribute or a value */
+int htmlparser_in_attr(htmlparser_ctx *ctx)
+{
+ int ext_state = state_external(statemachine_get_state(ctx->statemachine));
+ return ext_state == HTMLPARSER_STATE_ATTR ||
+ ext_state == HTMLPARSER_STATE_VALUE;
+}
+
+/* Returns the current attribute name if after an attribute name or in an
+ * attribute value. Returns NULL otherwise. */
+const char *htmlparser_attr(htmlparser_ctx *ctx)
+{
+ if (htmlparser_in_attr(ctx))
+ return ctx->attr;
+ else
+ return NULL;
+}
+
+/* Returns true if the parser is currently inside a CSS construct.
+ */
+int htmlparser_in_css(htmlparser_ctx *ctx) {
+ int state = statemachine_get_state(ctx->statemachine);
+ const char *tag = htmlparser_tag(ctx);
+ int external_state = state_external(state);
+
+ if (state == HTMLPARSER_STATE_INT_CSS_FILE ||
+ (external_state == HTMLPARSER_STATE_VALUE &&
+ htmlparser_attr_type(ctx) == HTMLPARSER_ATTR_STYLE) ||
+ (tag && strcmp(tag, "style") == 0)) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Returns the contents of the current attribute value.
+ */
+const char *htmlparser_value(htmlparser_ctx *ctx)
+{
+ int ext_state = state_external(statemachine_get_state(ctx->statemachine));
+ if (ext_state == HTMLPARSER_STATE_VALUE) {
+ strncpy(ctx->value, statemachine_record_buffer(ctx->statemachine),
+ HTMLPARSER_MAX_STRING);
+ ctx->value[HTMLPARSER_MAX_STRING - 1] = '\0';
+ return ctx->value;
+ } else {
+ return NULL;
+ }
+}
+
+
+/* Returns the current state of the javascript state machine
+ *
+ * Currently only present for testing purposes.
+ */
+int htmlparser_js_state(htmlparser_ctx *ctx)
+{
+ return jsparser_state(ctx->jsparser);
+}
+
+/* True is currently inside a javascript string literal
+ */
+int htmlparser_is_js_quoted(htmlparser_ctx *ctx)
+{
+ if (htmlparser_in_js(ctx)) {
+ int st = jsparser_state(ctx->jsparser);
+ if (st == JSPARSER_STATE_Q ||
+ st == JSPARSER_STATE_DQ)
+ return 1;
+ }
+ return 0;
+}
+
+/* True if currently inside an attribute value
+ */
+int htmlparser_in_value(htmlparser_ctx *ctx)
+{
+ int ext_state = state_external(statemachine_get_state(ctx->statemachine));
+ return ext_state == HTMLPARSER_STATE_VALUE;
+}
+
+/* Returns the position inside the current attribute value
+ */
+int htmlparser_value_index(htmlparser_ctx *ctx)
+{
+ if (htmlparser_in_value(ctx))
+ return ctx->value_index;
+
+ return -1;
+}
+
+/* Returns true if this is the first character of a url inside an attribute.
+ */
+int htmlparser_is_url_start(htmlparser_ctx *ctx)
+{
+ if (htmlparser_attr_type(ctx) == HTMLPARSER_ATTR_URI) {
+ const char* tag = htmlparser_tag(ctx);
+ /*const char* attr =*/ htmlparser_attr(ctx);
+
+ if ((tag && strcmp(tag, "meta") == 0 &&
+ meta_redirect_type(htmlparser_value(ctx)) ==
+ META_REDIRECT_TYPE_URL_START) ||
+ htmlparser_value_index(ctx) == 0)
+ return 1;
+
+ }
+ return 0;
+}
+
+/* Returns the current attribute type.
+ */
+int htmlparser_attr_type(htmlparser_ctx *ctx)
+{
+ if (!htmlparser_in_attr(ctx))
+ return HTMLPARSER_ATTR_NONE;
+
+ if (is_js_attribute(ctx->attr))
+ return HTMLPARSER_ATTR_JS;
+
+ if (is_uri_attribute(ctx->attr))
+ return HTMLPARSER_ATTR_URI;
+
+ if (is_style_attribute(ctx->attr))
+ return HTMLPARSER_ATTR_STYLE;
+
+ const char* tag = htmlparser_tag(ctx);
+ const char* attr = htmlparser_attr(ctx);
+
+ /* Special logic to handle meta redirect type tags. */
+ if (tag && strcmp(tag, "meta") == 0 &&
+ attr && strcmp(attr, "content") == 0) {
+
+ const char* value = htmlparser_value(ctx);
+ meta_redirect_type_enum redirect_type = meta_redirect_type(value);
+
+ if (redirect_type == META_REDIRECT_TYPE_URL ||
+ redirect_type == META_REDIRECT_TYPE_URL_START)
+ return HTMLPARSER_ATTR_URI;
+ }
+
+ return HTMLPARSER_ATTR_REGULAR;
+}
+
+/* Return the current line number. */
+int htmlparser_get_line_number(htmlparser_ctx *ctx) {
+ return statemachine_get_line_number(ctx->statemachine);
+}
+
+/* Set the current line number. */
+void htmlparser_set_line_number(htmlparser_ctx *ctx, int line) {
+ statemachine_set_line_number(ctx->statemachine, line);
+}
+
+/* Return the current column number. */
+int htmlparser_get_column_number(htmlparser_ctx *ctx) {
+ return statemachine_get_column_number(ctx->statemachine);
+}
+
+/* Set the current column number. */
+void htmlparser_set_column_number(htmlparser_ctx *ctx, int column) {
+ statemachine_set_column_number(ctx->statemachine, column);
+}
+
+/* Retrieve a human readable error message in case an error occurred.
+ *
+ * NULL is returned if the parser didn't encounter an error.
+ */
+const char *htmlparser_get_error_msg(htmlparser_ctx *ctx) {
+ return statemachine_get_error_msg(ctx->statemachine);
+}
+
+/* Invoked by the caller when text is expanded by the caller.
+ */
+int htmlparser_insert_text(htmlparser_ctx *ctx)
+{
+ /* TODO(falmeida): Generalize and use a table for these values. */
+
+ if (statemachine_get_state(ctx->statemachine) == HTMLPARSER_STATE_INT_VALUE) {
+ statemachine_set_state(ctx->statemachine, HTMLPARSER_STATE_INT_VALUE_TEXT);
+ }
+ return 1;
+}
+
+/* Deallocates an htmlparser context object.
+ */
+void htmlparser_delete(htmlparser_ctx *ctx)
+{
+ assert(ctx != NULL);
+ statemachine_definition_delete(ctx->statemachine_def);
+ statemachine_delete(ctx->statemachine);
+ jsparser_delete(ctx->jsparser);
+ entityfilter_delete(ctx->entityfilter);
+ free(ctx);
+}
+
+#ifdef __cplusplus
+} /* namespace security_streamhtmlparser */
+#endif
diff --git a/src/htmlparser/htmlparser.h b/src/htmlparser/htmlparser.h
new file mode 100644
index 0000000..120fb3e
--- /dev/null
+++ b/src/htmlparser/htmlparser.h
@@ -0,0 +1,441 @@
+/*
+ * Copyright (c) 2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ---
+ *
+ * Author: falmeida@google.com (Filipe Almeida)
+ */
+
+#ifndef SECURITY_STREAMHTMLPARSER_HTMLPARSER_H
+#define SECURITY_STREAMHTMLPARSER_HTMLPARSER_H
+
+#include <config.h>
+#include "htmlparser/statemachine.h"
+#include "htmlparser/jsparser.h"
+
+// Annoying stuff for windows in opensource-land -- make sure clients
+// (in this case unittests) can import the functions.
+#ifndef CTEMPLATE_DLL_DECL
+# ifdef _MSC_VER
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+# else
+# define CTEMPLATE_DLL_DECL /* should be the empty string for non-windows */
+# endif
+#endif
+
+#ifdef __cplusplus
+namespace ctemplate_htmlparser {
+#endif
+
+/* entity filter */
+
+/* String sizes used in htmlparser and entityfilter structures including the
+ * NULL terminator.
+ */
+#define HTMLPARSER_MAX_STRING STATEMACHINE_RECORD_BUFFER_SIZE
+#define HTMLPARSER_MAX_ENTITY_SIZE 10
+
+
+enum htmlparser_state_external_enum {
+ HTMLPARSER_STATE_TEXT,
+ HTMLPARSER_STATE_TAG,
+ HTMLPARSER_STATE_ATTR,
+ HTMLPARSER_STATE_VALUE,
+ HTMLPARSER_STATE_COMMENT,
+ HTMLPARSER_STATE_JS_FILE,
+ HTMLPARSER_STATE_CSS_FILE,
+ HTMLPARSER_STATE_ERROR
+};
+
+enum htmlparser_mode {
+ HTMLPARSER_MODE_HTML,
+ HTMLPARSER_MODE_JS,
+ HTMLPARSER_MODE_CSS,
+ HTMLPARSER_MODE_HTML_IN_TAG
+};
+
+enum htmlparser_attr_type {
+ HTMLPARSER_ATTR_NONE,
+ HTMLPARSER_ATTR_REGULAR,
+ HTMLPARSER_ATTR_URI,
+ HTMLPARSER_ATTR_JS,
+ HTMLPARSER_ATTR_STYLE
+};
+
+
+/* TODO(falmeida): Maybe move some of these declaration to the .c and only keep
+ * a forward declaration in here, since these structures are private.
+ */
+
+/* entityfilter context structure.
+ *
+ * The entity filter collection of routines provide a way to decode html
+ * entities from an html document in a streaming way.
+ *
+ * The html_process() function receives a character at a time from the input
+ * stream and returns 0 or more characters which should be appended to the
+ * resulting decoded document.
+ *
+ * Currently this collection of functions are only exported for testing purposes
+ * and shouldn't be called from outside of htmlparser.c.
+ *
+ * Since we really only use these functions with the very specific purpose of
+ * decoding html entities for javascript attributes, only a small subset of
+ * entities are supported: <, >, "e;, &, &apos, and the numeric
+ * character references for both decimal and hexadecimal.
+ */
+typedef struct entityfilter_ctx_s {
+
+ /* Current position into the buffer. */
+ int buffer_pos;
+
+ /* True if currently processing an html entity. */
+ int in_entity;
+
+ /* Temporary character buffer that is used while processing html entities.
+ */
+ char buffer[HTMLPARSER_MAX_ENTITY_SIZE];
+
+ /* String buffer returned to the application after we decoded an html
+ * entity.
+ */
+ char output[HTMLPARSER_MAX_ENTITY_SIZE];
+} entityfilter_ctx;
+
+/* Resets the entityfilter to its initial state so it can be reused.
+ */
+void entityfilter_reset(entityfilter_ctx *ctx);
+
+/* Initializes a new entity filter object.
+ */
+entityfilter_ctx *entityfilter_new(void);
+
+/* Deallocates an entity filter object.
+ */
+void entityfilter_delete(entityfilter_ctx *ctx);
+
+/* Copies the context of the entityfilter pointed to by src to the entityfilter
+ * dst.
+ */
+void entityfilter_copy(entityfilter_ctx *dst, entityfilter_ctx *src);
+
+/* Processes a character from the input stream and decodes any html entities
+ * in the accumulated buffer.
+ *
+ * Returns a reference to a string that points to an internal buffer. This
+ * buffer will be changed after every call to entityfilter_process(). As
+ * such this string should be duplicated before subsequent calls to
+ * entityfilter_process().
+ */
+const char *entityfilter_process(entityfilter_ctx *ctx, char c);
+
+
+/* html parser */
+
+/* Stores the context of the html parser.
+ * If this structure is changed, htmlparser_new(), htmlparser_copy() and
+ * htmlparser_reset() should be updated accordingly.
+ */
+typedef struct htmlparser_ctx_s {
+
+ /* Holds a reference to the statemachine context. */
+ statemachine_ctx *statemachine;
+
+ /* Holds a reference to the statemachine definition in use. Right now this is
+ * only used so we can deallocate it at the end.
+ *
+ * It should be readonly and contain the same values across jsparser
+ * instances.
+ */
+ /* TODO(falmeida): Change statemachine_def to const. */
+ statemachine_definition *statemachine_def;
+
+ /* Holds a reference to the javascript parser. */
+ jsparser_ctx *jsparser;
+
+ /* Holds a reference to the entity filter. Used for decoding html entities
+ * inside javascript attributes. */
+ entityfilter_ctx *entityfilter;
+
+ /* Offset into the current attribute value where 0 is the first character in
+ * the value. */
+ int value_index;
+
+ /* True if currently processing javascript. */
+ int in_js;
+
+ /* Current tag name. */
+ char tag[HTMLPARSER_MAX_STRING];
+
+ /* Current attribute name. */
+ char attr[HTMLPARSER_MAX_STRING];
+
+ /* Contents of the current value capped to HTMLPARSER_MAX_STRING. */
+ char value[HTMLPARSER_MAX_STRING];
+
+} htmlparser_ctx;
+
+/* Resets the parser to its initial state and to the default mode, which
+ * is MODE_HTML.
+ *
+ * All internal context like tag name, attribute name or the state of the
+ * statemachine are reset to its original values as if the object was just
+ * created.
+ */
+extern CTEMPLATE_DLL_DECL
+void htmlparser_reset(htmlparser_ctx *ctx);
+
+/* Resets the parser to its initial state and changes the parser mode.
+ * All internal context like tag name, attribute name or the state of the
+ * statemachine are reset to their original values as if the object was just
+ * created.
+ *
+ * Available modes:
+ * HTMLPARSER_MODE_HTML - Parses html text
+ * HTMLPARSER_MODE_JS - Parses javascript files
+ * HTMLPARSER_MODE_CSS - Parses CSS files. No actual parsing is actually done
+ * but htmlparser_in_css() always returns true.
+ * HTMLPARSER_MODE_HTML_IN_TAG - Parses an attribute list inside a tag. To
+ * be used in a template expanded in the
+ * following context: <a $template>
+ *
+ */
+extern CTEMPLATE_DLL_DECL
+void htmlparser_reset_mode(htmlparser_ctx *ctx, int mode);
+
+/* Initializes a new htmlparser instance.
+ *
+ * Returns a pointer to the new instance or NULL if the initialization fails.
+ * Initialization failure is fatal, and if this function fails it may not
+ * deallocate all previsouly allocated memory.
+ */
+extern CTEMPLATE_DLL_DECL
+htmlparser_ctx *htmlparser_new(void);
+
+/* Copies the context of the htmlparser pointed to by src to the htmlparser dst.
+ *
+ * Also copies over the instances of the state machine, the jsparser and the
+ * entity filter but not the statemachine definition since this one is read
+ * only.
+ */
+extern CTEMPLATE_DLL_DECL
+void htmlparser_copy(htmlparser_ctx *dst, const htmlparser_ctx *src);
+
+/* Receives an htmlparser context and returns the current html state.
+ *
+ * The return value will be one of the states of htmlparser_state_external_enum.
+ */
+extern CTEMPLATE_DLL_DECL
+int htmlparser_state(htmlparser_ctx *ctx);
+
+/* Parses the input html stream and returns the finishing state.
+ *
+ * Returns HTMLPARSER_ERROR if unable to parse the input. If htmlparser_parse()
+ * is called after an error situation was encountered the behaviour is
+ * unspecified. At this point, htmlparser_reset() or htmlparser_reset_mode()
+ * can be called to reset the state.
+ */
+extern CTEMPLATE_DLL_DECL
+int htmlparser_parse(htmlparser_ctx *ctx, const char *str, int size);
+
+/* Returns true if the parser is inside an attribute value and the value is
+ * surrounded by single or double quotes. */
+extern CTEMPLATE_DLL_DECL
+int htmlparser_is_attr_quoted(htmlparser_ctx *ctx);
+
+/* Returns true if the parser is currently in javascript. This can be a
+ * an attribute that takes javascript, a javascript block or the parser
+ * can just be in MODE_JS. */
+extern CTEMPLATE_DLL_DECL
+int htmlparser_in_js(htmlparser_ctx *ctx);
+
+/* Returns the current tag or NULL if not available or we haven't seen the
+ * entire tag yet.
+ *
+ * There is no stack implemented because we currently don't have a need for
+ * it, which means tag names are tracked only one level deep.
+ *
+ * This is better understood by looking at the following example:
+ *
+ * <b [tag=b]>
+ * [tag=b]
+ * <i>
+ * [tag=i]
+ * </i>
+ * [tag=NULL]
+ * </b>
+ *
+ * The tag is correctly filled inside the tag itself and before any new inner
+ * tag is closed, at which point the tag will be null.
+ *
+ * For our current purposes this is not a problem, but we may implement a tag
+ * tracking stack in the future for completeness.
+ *
+ */
+extern CTEMPLATE_DLL_DECL
+const char *htmlparser_tag(htmlparser_ctx *ctx);
+
+/* Returns the current attribute name if after an attribute name or in an
+ * attribute value. Returns NULL otherwise. */
+extern CTEMPLATE_DLL_DECL
+const char *htmlparser_attr(htmlparser_ctx *ctx);
+
+/* Returns the contents of the current attribute value.
+ *
+ * Returns NULL if not inside an attribute value.
+ */
+extern CTEMPLATE_DLL_DECL
+const char *htmlparser_value(htmlparser_ctx *ctx);
+
+/* Returns true if the parser is currently inside a CSS construct.
+ *
+ * Currently this can be either a STYLE tag, a STYLE attribute or the fact that
+ * the parser was reset in HTMLPARSER_MODE_CSS using
+ * htmlparser_reset_mode().
+ */
+extern CTEMPLATE_DLL_DECL
+int htmlparser_in_css(htmlparser_ctx *ctx);
+
+/* Returns the current state of the javascript state machine.
+ *
+ * Currently only present for testing purposes.
+ */
+extern CTEMPLATE_DLL_DECL
+int htmlparser_js_state(htmlparser_ctx *ctx);
+
+/* Returns non-zero if currently inside a javascript string literal and zero
+ * otherwise.
+ */
+extern CTEMPLATE_DLL_DECL
+int htmlparser_is_js_quoted(htmlparser_ctx *ctx);
+
+/* Returns non-zero if currently inside an attribute value and zero otherwise.
+ */
+extern CTEMPLATE_DLL_DECL
+int htmlparser_value_index(htmlparser_ctx *ctx);
+
+/* Returns true if this is the first character of a url inside an attribute.
+ *
+ * This function can be used by an html sanitizer or auto escaping system as a
+ * hint that it should validate the url for a whitelist of protocol handlers and
+ * for well-formedness, or that it should just escape a component of it.
+ *
+ * For attributes that expect a URL, this will return true if we are at the
+ * first character of the URL, false otherwise.
+ * For most attributes, this is the same as checking that we are at the first
+ * character of the attribute value but it also works correctly for the
+ * "content" attribute of the "meta" tag where the URL follows some earlier
+ * content.
+ * e.g: <meta http-equiv="refresh" * content="5; URL=http://bla.">
+ *
+ * For any other attributes, the result will always be false.
+ */
+extern CTEMPLATE_DLL_DECL
+int htmlparser_is_url_start(htmlparser_ctx *ctx);
+
+/* Returns the current attribute type.
+ *
+ * The attribute type can be one of:
+ * HTMLPARSER_ATTR_NONE - not inside an attribute.
+ * HTMLPARSER_ATTR_REGULAR - Inside a normal attribute.
+ * HTMLPARSER_ATTR_URI - Inside an attribute that accepts a uri.
+ * HTMLPARSER_ATTR_JS - Inside a javascript attribute.
+ * HTMLPARSER_ATTR_STYLE - Inside a css style attribute.
+ */
+extern CTEMPLATE_DLL_DECL
+int htmlparser_attr_type(htmlparser_ctx *ctx);
+
+/* Return the current line number. */
+extern CTEMPLATE_DLL_DECL
+int htmlparser_get_line_number(htmlparser_ctx *ctx);
+
+/* Set the current line number. */
+extern CTEMPLATE_DLL_DECL
+void htmlparser_set_line_number(htmlparser_ctx *ctx, int line);
+
+/* Return the current column number. */
+extern CTEMPLATE_DLL_DECL
+int htmlparser_get_column_number(htmlparser_ctx *ctx);
+
+/* Set the current column number. */
+extern CTEMPLATE_DLL_DECL
+void htmlparser_set_column_number(htmlparser_ctx *ctx, int column);
+
+/* Retrieve a human readable error message in case an error occurred.
+ *
+ * NULL is returned if the parser didn't encounter an error.
+ */
+extern CTEMPLATE_DLL_DECL
+const char *htmlparser_get_error_msg(htmlparser_ctx *ctx);
+
+/* Invoked by the caller when text is expanded by the caller.
+ *
+ * Should be invoked when a template directive that expands to content is
+ * executed but we don't provide this content to the parser itself. This changes
+ * the current state by following the default rule, ensuring we stay in sync
+ * with the template.
+ *
+ * Returns 1 if template directives are accepted for this state and 0 if they
+ * are not, which should result in an error condition.
+ *
+ * Right now the only case being handled are unquoted attribute values and it
+ * always returns 1. When insert_text() is called after the equals sign, we
+ * assume some text was consumed and we are now in the middle of the attribute
+ * value itself. Example:
+ *
+ * <a href=$HREF_VALUE alt=alternate_text>
+ *
+ * The template calls insert_text() when it encounters $HREF_VALUE. If it didn't
+ * the parser would only have seen the following html:
+ *
+ * <a href= alt=alternate_text>
+ *
+ * and would interpret alt=alternate_text as the value of the href attribute.
+ */
+extern CTEMPLATE_DLL_DECL
+int htmlparser_insert_text(htmlparser_ctx *ctx);
+
+/* Deallocates an htmlparser context object.
+ */
+extern CTEMPLATE_DLL_DECL
+void htmlparser_delete(htmlparser_ctx *ctx);
+
+#define htmlparser_parse_chr(a,b) htmlparser_parse(a, &(b), 1);
+#ifdef __cplusplus
+#define htmlparser_parse_str(a,b) htmlparser_parse(a, b, \
+ static_cast<int>(strlen(b)));
+#else
+#define htmlparser_parse_str(a,b) htmlparser_parse(a, b, (int)strlen(b));
+#endif
+
+#ifdef __cplusplus
+} /* namespace security_streamhtmlparser */
+#endif
+
+#endif /* SECURITY_STREAMHTMLPARSER_HTMLPARSER_H */
diff --git a/src/htmlparser/htmlparser_cpp.h b/src/htmlparser/htmlparser_cpp.h
new file mode 100644
index 0000000..0557783
--- /dev/null
+++ b/src/htmlparser/htmlparser_cpp.h
@@ -0,0 +1,318 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ---
+// Author: falmeida@google.com (Filipe Almeida)
+//
+// c++ bindings for htmlparser.
+
+#ifndef SECURITY_STREAMHTMLPARSER_HTMLPARSER_CPP_H__
+#define SECURITY_STREAMHTMLPARSER_HTMLPARSER_CPP_H__
+
+#include <config.h>
+#include <string>
+#include "htmlparser/htmlparser.h"
+#include "htmlparser/jsparser.h"
+#include "base/util.h"
+
+namespace ctemplate_htmlparser {
+
+class JavascriptParser {
+ public:
+ enum State {
+ STATE_TEXT = JSPARSER_STATE_TEXT,
+ STATE_Q = JSPARSER_STATE_Q,
+ STATE_DQ = JSPARSER_STATE_DQ,
+ STATE_REGEXP = JSPARSER_STATE_REGEXP,
+ STATE_COMMENT = JSPARSER_STATE_COMMENT,
+ };
+};
+
+class HtmlParser {
+ public:
+
+ /* html states */
+ enum State {
+ STATE_TEXT = HTMLPARSER_STATE_TEXT,
+ STATE_TAG = HTMLPARSER_STATE_TAG,
+ STATE_ATTR = HTMLPARSER_STATE_ATTR,
+ STATE_VALUE = HTMLPARSER_STATE_VALUE,
+ STATE_COMMENT = HTMLPARSER_STATE_COMMENT,
+ STATE_JS_FILE = HTMLPARSER_STATE_JS_FILE,
+ STATE_CSS_FILE = HTMLPARSER_STATE_CSS_FILE,
+ STATE_ERROR = HTMLPARSER_STATE_ERROR
+ };
+
+ /* attribute types */
+ enum AttributeType {
+ ATTR_NONE = HTMLPARSER_ATTR_NONE,
+ ATTR_REGULAR = HTMLPARSER_ATTR_REGULAR,
+ ATTR_URI = HTMLPARSER_ATTR_URI,
+ ATTR_JS = HTMLPARSER_ATTR_JS,
+ ATTR_STYLE = HTMLPARSER_ATTR_STYLE
+ };
+
+ /* Parser modes */
+ enum Mode {
+ MODE_HTML = HTMLPARSER_MODE_HTML,
+ MODE_JS = HTMLPARSER_MODE_JS,
+ MODE_CSS = HTMLPARSER_MODE_CSS,
+ MODE_HTML_IN_TAG = HTMLPARSER_MODE_HTML_IN_TAG
+ };
+
+ HtmlParser() {
+ parser_ = htmlparser_new();
+ CHECK(parser_ != NULL);
+ };
+
+ /* Parses the input html stream and returns the finishing state.
+ *
+ * Returns HtmlParser::STATE_ERROR if unable to parse the input. If
+ * htmlparser_parse() is called after an error situation was encountered
+ * the behaviour is unspecified. At this point, Reset() or ResetMode()
+ * can be called to reset the state so it can be used to parse a new file.
+ */
+ int Parse(const char *str, int len) {
+ return htmlparser_parse(parser_, str, len);
+ };
+
+ int Parse(const std::string &str) {
+ return Parse(str.c_str(), static_cast<int>(str.length()));
+ };
+
+ /* Returns the current state the parser is in */
+ int state() const {
+ return htmlparser_state(parser_);
+ };
+
+ /* Returns the current tag or NULL if not available.
+ *
+ * There is no stack implemented because we currently don't have a need for
+ * it, which means tag names are tracked only one level deep.
+ *
+ * This is better understood by looking at the following example:
+ *
+ * <b [tag=b]>
+ * [tag=b]
+ * <i>
+ * [tag=i]
+ * </i>
+ * [tag=NULL]
+ * </b>
+ *
+ * The tag is correctly filled inside the tag itself and before any new
+ * inner tag is closed, at which point the tag will be set to NULL.
+ *
+ * For our current purposes this is not a problem, but we may implement a
+ * tag tracking stack in the future for completeness.
+ */
+ const char *tag() const {
+ return htmlparser_tag(parser_);
+ }
+
+ /* Returns the current attribute name if inside an attribute name or an
+ * attribute value. Returns NULL otherwise. */
+ const char *attribute() const {
+ return htmlparser_attr(parser_);
+ }
+
+ /* Returns the contents of the current attribute value. */
+ const char *value() const {
+ return htmlparser_value(parser_);
+ }
+
+ /* Returns true if inside javascript. This can be a javascript block, a
+ * javascript attribute value or the parser may just be in javascript mode
+ * (HtmlParser::MODE_JS) */
+ bool InJavascript() const {
+ return static_cast<bool>(htmlparser_in_js(parser_));
+ }
+
+ /* Returns true if the parser is currently inside a CSS construct.
+ *
+ * Currently this can be either a STYLE tag, a STYLE attribute or the fact
+ * that the parser was reset using MODE_CSS using ResetMode().
+ */
+ bool InCss() const {
+ return static_cast<bool>(htmlparser_in_css(parser_));
+ }
+
+ /* Returns true if the current attribute is quoted */
+ bool IsAttributeQuoted() const {
+ return static_cast<bool>(htmlparser_is_attr_quoted(parser_));
+ }
+
+ /* Returns true if the parser is inside a js string literal.
+ */
+ bool IsJavascriptQuoted() const {
+ return static_cast<bool>(htmlparser_is_js_quoted(parser_));
+ }
+
+ /* Returns the index within the current value or -1 if the parser is not
+ * inside an attribute value */
+ int ValueIndex() const {
+ return htmlparser_value_index(parser_);
+ }
+
+ /* Returns true if this is the first character of a url inside an attribute.
+ *
+ * This function can be used by an html sanitizer or auto escaping system as
+ * a hint that it should validate the url for a whitelist of protocol
+ * handlers and for well-formedness, or that it should just escape a
+ * component of it.
+ *
+ * For attributes that expect a url this will return true if we are at the
+ * first character of the attribute, but for the special case of a meta
+ * redirect tag some analysis is made in order to verify if we are at the
+ * start of a url or not.
+ *
+ * For any other attributes, the result will always be false.
+ *
+ */
+ bool IsUrlStart() const {
+ return htmlparser_is_url_start(parser_);
+ }
+
+ /* Returns the current attribute type.
+ *
+ * The attribute type can be one of:
+ * ATTR_NONE - not inside an attribute
+ * ATTR_REGULAR - Inside a normal attribute
+ * ATTR_URI - Inside an attribute that accepts a uri
+ * ATTR_JS - Inside a javascript attribute
+ * ATTR_STYLE - Inside a css style attribute
+ * */
+ int AttributeType() const {
+ return htmlparser_attr_type(parser_);
+ }
+
+ /* Return the current line number. */
+ int line_number() const {
+ return htmlparser_get_line_number(parser_);
+ }
+
+ /* Set the current line number. */
+ void set_line_number(int line) {
+ return htmlparser_set_line_number(parser_, line);
+ }
+
+ /* Return the current column number. */
+ int column_number() const {
+ return htmlparser_get_column_number(parser_);
+ }
+
+ /* Set the current line number. */
+ void set_column_number(int column) {
+ return htmlparser_set_column_number(parser_, column);
+ }
+
+ /* Retrieve a human readable error message in case an error occurred.
+ *
+ * NULL is returned if the parser didn't encounter an error.
+ */
+ const char *GetErrorMessage() {
+ return htmlparser_get_error_msg(parser_);
+ }
+
+ /* Returns the current state the javascript parser is in.
+ *
+ * Should only be used for testing.
+ */
+ int javascript_state() const {
+ return htmlparser_js_state(parser_);
+ };
+
+ /* Resets the parser to it's initial state and changes the parser mode.
+ *
+ * Internal state (tag name, attribute name, state of statemachine) is
+ * reset as * though the object was just created.
+ *
+ * Available modes:
+ * MODE_HTML - Parses html text
+ * MODE_JS - Parses javascript files
+ * MODE_CSS - Parses CSS files. No actual parsing is actually done
+ * but InCss() always returns true.
+ * MODE_HTML_IN_TAG - Parses an attribute list inside a tag. To
+ * be used in a template expanded in the
+ * following context: <a $template>
+ */
+ void ResetMode(enum Mode mode) {
+ return htmlparser_reset_mode(parser_, mode);
+ }
+
+ /* Resets the parser to it's initial state and to the default mode, which is
+ * MODE_HTML.
+ *
+ * All internal context like tag name, attribute name or the state of the
+ * statemachine are reset to it's original values as if the object was just
+ * created.
+ */
+ void Reset() {
+ return htmlparser_reset(parser_);
+ }
+
+ /* Invoked when text is inserted by the caller.
+ *
+ * Should be called before a template directive that expands to content is
+ * found. This changes the current state by following the default rule,
+ * ensuring we stay in sync with template.
+ *
+ * Returns true if template directives are accepted for this state and
+ * false if they are not, which should result in an error condition.
+ *
+ * Right now the only case being handled are unquoted attribute values and
+ * it always returns true. In the future we can handle more cases and
+ * restrict the states were we allow template directives by returning false
+ * for those.
+ */
+ bool InsertText() {
+ return static_cast<bool>(htmlparser_insert_text(parser_));
+ }
+
+ /* Copies the context of the HtmlParser object referenced in source to the
+ * current object.
+ */
+ void CopyFrom(const HtmlParser *source) {
+ CHECK(this != source);
+ CHECK(source != NULL);
+ htmlparser_copy(parser_, source->parser_);
+ }
+
+ ~HtmlParser() {
+ htmlparser_delete(parser_);
+ };
+
+
+ private:
+ htmlparser_ctx *parser_;
+ DISALLOW_COPY_AND_ASSIGN(HtmlParser);
+};
+
+} // namespace security_streamhtmlparser
+
+#endif // SECURITY_STREAMHTMLPARSER_HTMLPARSER_CPP_H__
diff --git a/src/htmlparser/htmlparser_fsm.config b/src/htmlparser/htmlparser_fsm.config
new file mode 100644
index 0000000..8ca4b7b
--- /dev/null
+++ b/src/htmlparser/htmlparser_fsm.config
@@ -0,0 +1,365 @@
+# Copyright (c) 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ---
+#
+# Author: falmeida@google.com (Filipe Almeida)
+
+# TODO(falmeida): Add more descriptive names to the states and drop the
+# abbreviations.
+# TODO(falmeida): Reorder the states so that it's easier to read.
+# TODO(falmeida): Support CDATA blocks in the form: <![CDATA[.
+
+name = 'htmlparser'
+
+comment = 'Definition of a finite state machine for a subset of HTTP 4.1'
+
+condition('<', '<')
+condition('>', '>')
+condition('=', '=')
+
+# TODO(falmeida): This is not the correct expression. tag and attribute names
+# can only consist of alpha character.
+condition('id', 'A-Za-z0-9_:.-')
+condition('idtag', 'A-Za-z0-9/_:.-')
+
+# Whitespace according to: http://www.w3.org/TR/html401/struct/text.html#h-9.1
+condition('space', ' \t\n\r')
+condition('!', '!')
+condition('q', '\'')
+condition('dq', '\"')
+condition('/', '/')
+condition('*', '*')
+condition('-', '-')
+condition('?', '?')
+condition('lf', '\n')
+condition('quote', '\\')
+
+# TODO(falmeida): This default rule is a hack and shouldn't be here.
+condition('default', '[:default:]')
+
+state(name = 'text',
+ external = 'text',
+ transitions = [
+ ['<', 'tag_start'],
+ ['default', 'text']
+ ])
+
+# When we found the < character in text.
+# Tag opening is defined in the HTML5 draft here:
+# http://www.whatwg.org/specs/web-apps/current-work/#tag-open-state
+# We don't exactly follow this and are much more loose in order to mimic the way
+# the major browsers behave.
+state(name = 'tag_start',
+ external = 'tag',
+ transitions = [
+ ['idtag', 'tag_name'],
+ ['?', 'pi'],
+ ['!', 'declaration_start'],
+ ['<', 'tag_start'],
+ ['default', 'text']
+ ])
+
+# Name of the tag. Includes the closing tag character '/'.
+state(name = 'tag_name',
+ external = 'tag',
+ transitions = [
+ ['idtag', 'tag_name'],
+ ['space', 'tag_space'],
+ ['>', 'tag_close']
+ ])
+
+# HTML declaration and comment parsing
+#
+# We don't expose declaration state because at this point we only want to
+# ensure that we are parsing them correctly so we don't get out of sync.
+# This is specifically made for DOCTYPE declarations and won't work if DTD's
+# are defined inside the declaration.
+# The HTML5 spec says we should specificly look for the string '<!DOCTYPE HTML'
+# but that will add a lot of unecessary states, and unless we build a simple
+# declarative way to unfold a string match into multiple states, I don't
+# think it's worth worrying about for now.
+
+# Got '<!'. The next character will decide if we open a declaration or a
+# comment.
+state(name = 'declaration_start',
+ external = 'text',
+ transitions = [
+ ['-', 'comment_open'],
+ ['>', 'text'],
+ ['default', 'declaration_body']
+ ])
+
+# Inside a declaration. Ie: <!DOCTYPE. We close when we see a '>'
+state(name = 'declaration_body',
+ external = 'text',
+ transitions = [
+ ['>', 'text'],
+ ['default', 'declaration_body']
+ ])
+
+# Got '<!-'.
+state(name = 'comment_open',
+ external = 'text',
+ transitions = [
+ ['-', 'comment_body'],
+ ['default', 'text']
+ ])
+
+# Inside a comment. We only close when we see '-->'
+state(name = 'comment_body',
+ external = 'comment',
+ transitions = [
+ ['-', 'comment_dash'],
+ ['default', 'comment_body']
+ ])
+
+# Got '-' inside a comment.
+state(name = 'comment_dash',
+ external = 'comment',
+ transitions = [
+ ['-', 'comment_dash_dash'],
+ ['default', 'comment_body']
+ ])
+
+# Got '--' inside a comment.
+state(name = 'comment_dash_dash',
+ external = 'comment',
+ transitions = [
+ ['-', 'comment_dash_dash'],
+ ['>', 'text'],
+ ['default', 'comment_body']
+ ])
+
+# XML Processing instruction parsing according to:
+# http://www.w3.org/TR/REC-xml/#sec-pi
+#
+# Everything between the characters <? and ?> is considered to be part of the
+# processing instruction.
+state(name = 'pi',
+ external = 'text',
+ transitions = [
+ ['?', 'pi_may_end'],
+ ['default', 'pi']
+ ])
+
+state(name = 'pi_may_end',
+ external = 'text',
+ transitions = [
+ ['>', 'text'],
+ ['default', 'pi']
+ ])
+
+# Whitespace between tag name, attributes.
+state(name = 'tag_space',
+ external = 'tag',
+ transitions = [
+ ['>', 'tag_close'],
+ ['space', 'tag_space'],
+ ['id', 'attr'],
+ ['/', 'tag_space']
+ ])
+
+state(name = 'tag_close',
+ external = 'text',
+ transitions = [
+ ['<', 'tag_start'],
+ ['default', 'text']
+ ])
+
+# Name of the attribute.
+state(name = 'attr',
+ external = 'attr',
+ transitions = [
+ ['id', 'attr'],
+ ['>', 'tag_close'],
+ ['/', 'tag_space'],
+ ['=', 'value'],
+ ['space', 'attr_space']
+ ])
+
+# After the attribute name.
+state(name = 'attr_space',
+ external = 'attr',
+ transitions = [
+ ['>', 'tag_close'],
+ ['space', 'attr_space'],
+ ['id', 'attr'],
+ ['/', 'tag_space'],
+ ['=', 'value']
+ ])
+
+# Expecting a value, after attribute=
+state(name = 'value',
+ external = 'value',
+ transitions = [
+ ['q', 'value_q_start'],
+ ['dq', 'value_dq_start'],
+ ['space', 'value'],
+ ['>', 'tag_close'],
+ ['default', 'value_text']
+ ])
+
+# Unquoted attribute value.
+state(name = 'value_text',
+ external = 'value',
+ transitions = [
+ ['>', 'tag_close'],
+ ['space', 'tag_space'],
+ ['default', 'value_text']
+ ])
+
+# First character of a single quoted attribute value.
+state(name = 'value_q_start',
+ external = 'value',
+ transitions = [
+ ['q', 'tag_space'],
+ ['default', 'value_q']
+ ])
+
+# In the middle of a single quoted attribute value.
+state(name = 'value_q',
+ external = 'value',
+ transitions = [
+ ['q', 'tag_space'],
+ ['default', 'value_q']
+ ])
+
+# First character of a double quoted attribute value.
+state(name = 'value_dq_start',
+ external = 'value',
+ transitions = [
+ ['dq', 'tag_space'],
+ ['default', 'value_dq']
+ ])
+
+# In the middle of a double quoted attribute value.
+state(name = 'value_dq',
+ external = 'value',
+ transitions = [
+ ['dq', 'tag_space'],
+ ['default', 'value_dq']
+ ])
+
+# CDATA escaping text spans.
+# TODO(falmeida): These states should go after cdata_text.
+
+# Got '<!'
+state(name = 'cdata_comment_start',
+ external = 'text',
+ transitions = [
+ ['-', 'cdata_comment_start_dash'],
+ ['default', 'cdata_text'],
+ ])
+
+# Got '<!-'.
+state(name = 'cdata_comment_start_dash',
+ external = 'text',
+ transitions = [
+ ['-', 'cdata_comment_body'],
+ ['default', 'cdata_text']
+ ])
+
+# Inside a comment
+state(name = 'cdata_comment_body',
+ external = 'text',
+ transitions = [
+ ['-', 'cdata_comment_dash'],
+ ['default', 'cdata_comment_body']
+ ])
+
+# Got '-' inside a comment.
+state(name = 'cdata_comment_dash',
+ external = 'text',
+ transitions = [
+ ['-', 'cdata_comment_dash_dash'],
+ ['default', 'cdata_comment_body']
+ ])
+
+# Got '--' inside a comment.
+state(name = 'cdata_comment_dash_dash',
+ external = 'text',
+ transitions = [
+ ['-', 'cdata_comment_dash_dash'],
+ ['>', 'cdata_text'],
+ ['default', 'cdata_comment_body']
+ ])
+
+# CDATA processing
+#
+# To simplify the code, we treat RCDATA and CDATA sections the same since the
+# differences between them don't affect the context we are in.
+state(name = 'cdata_text',
+ external = 'text',
+ transitions = [
+ ['<', 'cdata_lt'],
+ ['default', 'cdata_text']
+ ])
+
+# Possible beginning of the closing tag.
+state(name = 'cdata_lt',
+ external = 'text',
+ transitions = [
+ ['/', 'cdata_may_close'],
+ ['!', 'cdata_comment_start'],
+ ['default', 'cdata_text']
+ ])
+
+# If we encounter </tag where tag matches the last opened tag, we exit the
+# CDATA section. Part of this logic is handled in the code.
+state(name = 'cdata_may_close',
+ external = 'text',
+ transitions = [
+ ['idtag', 'cdata_may_close'],
+ ['>', 'text'],
+ ['space', 'tag_space'],
+ ['default', 'cdata_text']
+ ])
+
+# The next states are used for specialized parser modes.
+state(name = 'js_file',
+ external = 'js_file',
+ transitions = [
+ ['default', 'js_file']
+ ])
+
+# TODO(falmeida): Having css_file and js_file as the external name doesn't make
+# sense. This should instead be text and the js/css state be
+# returned by # in_js() and in_css().
+state(name = 'css_file',
+ external = 'css_file',
+ transitions = [
+ ['default', 'css_file']
+ ])
+
+state(name = 'null',
+ external = 'text',
+ transitions = [
+ ['default', 'null']
+ ])
+
diff --git a/src/htmlparser/jsparser.cc b/src/htmlparser/jsparser.cc
new file mode 100644
index 0000000..15be35d
--- /dev/null
+++ b/src/htmlparser/jsparser.cc
@@ -0,0 +1,659 @@
+/* Copyright (c) 2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ---
+ *
+ * Author: falmeida@google.com (Filipe Almeida)
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "htmlparser/statemachine.h"
+#include "htmlparser/jsparser.h"
+
+/* So we can support both C and C++ compilers, we use the CAST() macro instead
+ * of using C style casts or static_cast<>() directly.
+ */
+#ifdef __cplusplus
+ #define CAST(type, expression) (static_cast<type>(expression))
+#else
+ #define CAST(type, expression) ((type)(expression))
+#endif
+
+#ifdef __cplusplus
+namespace ctemplate_htmlparser {
+#endif /* __cplusplus */
+
+/* Generated state machine definition. */
+#include "htmlparser/jsparser_fsm.h"
+
+/* List of keywords that can precede a regular expression literal. Taken from:
+ * http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
+ *
+ * 'void' was added to this list.
+ * The list is used as input to a binary search, so it must be kept in a sorted
+ * form.
+ * There are a large number of keywords in here that don't exist in
+ * ecmascript 3, either because they are reserved or because they are part of
+ * ecmascript 4. However they weren't removed in order to keep the list in sync
+ * with the previous document.
+ */
+static const char *regexp_token_prefix[] = {
+ "abstract",
+ "break",
+ "case",
+ "catch",
+ "class",
+ "const",
+ "continue",
+ "debugger",
+ "default",
+ "delete",
+ "do",
+ "else",
+ "enum",
+ "eval",
+ "export",
+ "extends",
+ "field",
+ "final",
+ "finally",
+ "for",
+ "function",
+ "goto",
+ "if",
+ "implements",
+ "import",
+ "in",
+ "instanceof",
+ "native",
+ "new",
+ "package",
+ "private",
+ "protected",
+ "public",
+ "return",
+ "static",
+ "switch",
+ "synchronized",
+ "throw",
+ "throws",
+ "transient",
+ "try",
+ "typeof",
+ "var",
+ "void",
+ "volatile",
+ "while",
+ "with"
+};
+
+/* Utility functions */
+
+/* Converts the internal state into the external superstate.
+ */
+static inline int state_external(int state)
+{
+ assert(state < JSPARSER_NUM_STATES);
+ assert(state >= 0);
+ return jsparser_states_external[state];
+}
+
+/* Returns true if the character is an ecmascript whitespace or line terminator
+ * character. Includes the characters in sections 7.2 and 7.3 of ECMAScript 3
+ * with the exception of unicode space and line terminators:
+ * http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
+ */
+static inline int js_is_whitespace(char c)
+{
+ return c == '\t' || /* Tab 0x09 */
+ c == '\v' || /* Vertical Tab 0x0B */
+ c == '\f' || /* Form Feed 0x0C */
+ c == ' ' || /* Space 0x20 */
+ c == '\xa0' || /* No-Break Space 0xA0 */
+ c == '\n' || /* line Feed 0x0A */
+ c == '\r'; /* Carriage Return 0x0D */
+}
+
+/* Returns true if the character is part of a javascript identifier. The rules
+ * are pretty relaxed here since we don't accept unicode and don't care if the
+ * first character doesn't contain numbers or not.
+ *
+ * For more detail on the limitations of having this relaxed set of characters
+ * please see the comments in_state_js_text().
+ */
+static inline int js_is_identifier(char c) {
+ return (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ c == '_' ||
+ c == '$';
+}
+
+/* Appends a character to the ring buffer. Sequences of whitespace and newlines
+ * are folded into one.
+ *
+ * js->buffer_start points to the first character in the buffer and
+ * js->buffer_end points to the position of the next character to be written,
+ * or one plus the last character written. If the buffer is full there will be
+ * an empty slot position so we can diferentiate an empty buffer from a full
+ * buffer.
+ *
+ * If the buffer is empty, then:
+ * js->buffer_start == js->buffer_end.
+ * If the buffer is full, then:
+ * (js->buffer_end + 1) % JSPARSER_RING_BUFFER_SIZE == js->buffer_start.
+ *
+ */
+void jsparser_buffer_append_chr(jsparser_ctx *js, char chr)
+{
+ /* Fold whitespace so we have enough space in the buffer. */
+ if (js_is_whitespace(chr) &&
+ js_is_whitespace(jsparser_buffer_get(js, -1))) {
+ return;
+ }
+
+ js->buffer[js->buffer_end] = chr;
+ js->buffer_end = (js->buffer_end + 1) % JSPARSER_RING_BUFFER_SIZE;
+ if (js->buffer_end == js->buffer_start) {
+ js->buffer_start = (js->buffer_end + 1) %
+ JSPARSER_RING_BUFFER_SIZE;
+ }
+}
+
+/* Appends a string to the ring buffer. Sequences of whitespace and newlines
+ * are folded into one.
+ */
+void jsparser_buffer_append_str(jsparser_ctx *js, const char *str)
+{
+ assert(js != NULL);
+ assert(str != NULL);
+
+ while(*str != '\0') {
+ jsparser_buffer_append_chr(js, *str++);
+ }
+}
+
+/* Returns the position relative to the start of the buffer or -1 if past the
+ * size of the buffer..
+ */
+static inline int jsparser_buffer_absolute_pos(jsparser_ctx *js, int pos)
+{
+ int absolute_pos;
+ int buffer_len;
+ assert(pos < 0);
+
+ if(pos <= -JSPARSER_RING_BUFFER_SIZE) {
+ return -1;
+ }
+
+ buffer_len = js->buffer_end - js->buffer_start;
+ if (buffer_len < 0) {
+ buffer_len += JSPARSER_RING_BUFFER_SIZE;
+ }
+
+ if (pos < -buffer_len) {
+ return -1;
+ }
+
+ absolute_pos = (pos + js->buffer_end) % JSPARSER_RING_BUFFER_SIZE;
+ if (absolute_pos < 0) {
+ absolute_pos += JSPARSER_RING_BUFFER_SIZE;
+ }
+
+ return absolute_pos;
+}
+
+/* Returns the last appended character and removes it from the buffer. If the
+ * buffer is empty, then it returns ASCII 0 ('\0').
+ */
+char jsparser_buffer_pop(jsparser_ctx *js)
+{
+ if (js->buffer_start == js->buffer_end) {
+ return '\0';
+ }
+
+ js->buffer_end--;
+ if (js->buffer_end < 0) {
+ js->buffer_end += JSPARSER_RING_BUFFER_SIZE;
+ }
+
+ return js->buffer[js->buffer_end];
+}
+
+/* Returns the value of the character at a certain index in the buffer or an
+ * ASCII 0 ('\0') character if the index is outside the buffer boundaries.
+ *
+ * Index positions are negative, were -1 is the last character appended to the
+ * buffer. Using positive integers for the index will result in undefined
+ * behaviour.
+ */
+char jsparser_buffer_get(jsparser_ctx *js, int pos)
+{
+ int absolute_pos;
+ assert(pos < 0);
+
+ absolute_pos = jsparser_buffer_absolute_pos(js, pos);
+ if (absolute_pos < 0) {
+ return '\0';
+ }
+
+ return js->buffer[absolute_pos];
+}
+
+/* Sets the value of the character at a certain index in the buffer. Returns
+ * true if the write was successful or false if there was an attempt to write
+ * outside of the buffer boundaries.
+ *
+ * Index positions are negative, were -1 is the last character appended to the
+ * buffer. Using positive integers for the index will result in undefined
+ * behaviour.
+ */
+int jsparser_buffer_set(jsparser_ctx *js, int pos, char value)
+{
+ int absolute_pos;
+ assert(pos < 0);
+
+ absolute_pos = jsparser_buffer_absolute_pos(js, pos);
+ if (absolute_pos < 0) {
+ return 0;
+ }
+
+ js->buffer[absolute_pos] = value;
+ return 1;
+}
+
+/* Copies a slice of the buffer to the string pointed to by output. start and
+ * end are the indexes of the sliced region. If start extends beyond the
+ * beginning of the buffer, the slice will only contain character from the
+ * beginning of the buffer.
+ */
+void jsparser_buffer_slice(jsparser_ctx *js, char *output, int start, int end)
+{
+ int pos;
+
+ assert(start <= end);
+ assert(start < 0);
+ assert(end < 0);
+
+ for (pos = start; pos <= end; ++pos) {
+ char c;
+ c = jsparser_buffer_get(js, pos);
+ if (c != '\0') {
+ *output++ = jsparser_buffer_get(js, pos);
+ }
+ }
+ *output++ = '\0';
+}
+
+/* Copy the last javascript identifier or keyword found in the buffer to the
+ * string pointed by identifier.
+ *
+ * For simplicity, we consider an identifier to be a sequence of alphanumeric
+ * characters (as opposed to a digit followed by an alphanumeric character.
+ *
+ * Returns 0 if no identifier was matched, in which case the identifier
+ * argument is replaced with an empty string, or non 0 if the identifier was
+ * found.
+ */
+int jsparser_buffer_last_identifier(jsparser_ctx *js, char *identifier)
+{
+ int end;
+ int pos;
+
+ assert(identifier != NULL);
+
+ end = -1;
+ /* Ignore the optional whitespace delimiter */
+ if (js_is_whitespace(jsparser_buffer_get(js, -1))) {
+ --end;
+ }
+
+ /* Find the beginning of the identifier. This loop ends either when we find a
+ * character that doesn't belong to an identifier, or when we find a '\0'
+ * character, which means we reached the end of the buffer. */
+ for(pos = end; js_is_identifier(jsparser_buffer_get(js, pos)); --pos) {
+ }
+ if (pos + 1 >= end) {
+ identifier[0] = '\0';
+ return 0;
+ }
+
+ jsparser_buffer_slice(js, identifier, pos + 1, end);
+ return 1;
+}
+
+/* Callback used in bsearch() for comparing a string against an array of
+ * strings.
+ */
+static int bsearch_strcmp(const void *a, const void *b)
+{
+ return strcmp(CAST(const char*, a), *CAST(const char * const *, b));
+}
+
+/* Returns true if the token argument can be a token prefix to a javascript
+ * regular expression.
+ *
+ * The token argument is compared against a list of identifiers that can
+ * precede a regular expression in the javascript grammar, and returns true if
+ * the argument is found on that list.
+ */
+static inline int is_regexp_token_prefix(char *token)
+{
+ assert(token != NULL);
+
+ return bsearch(token,
+ regexp_token_prefix,
+ sizeof(regexp_token_prefix) / sizeof(char *),
+ sizeof(char *), bsearch_strcmp) != NULL;
+}
+
+/* Called for every character in state text.
+ *
+ * We copy every character we find when we are in state text to the ring
+ * buffer. This has the side effect of also pushing slash characters that are
+ * part of comments into the buffer, although for parsing purposes these should
+ * be treated as whitespace. This issue is addressed in
+ * enter_state_js_comment_ml_after().
+ */
+static void in_state_js_text(statemachine_ctx *ctx, int start, char chr,
+ int end)
+{
+ jsparser_ctx *js = CAST(jsparser_ctx *, ctx->user);
+ assert(js != NULL);
+
+ jsparser_buffer_append_chr(js, chr);
+}
+
+/* This function is called every time we find a slash ('/') character in the
+ * javascript text (except for slashes that close comments or regexp literals).
+ *
+ * Implements the logic to figure out if this slash character is a division
+ * operator or if it opens a regular expression literal. This is heavily
+ * inspired by the syntactic resynchronization for javascript 2.0:
+ * http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
+ *
+ * When we receive a '/', we look at the previous non space character to figure
+ * out if it's the ending of a punctuator that can precede a regexp literal, in
+ * which case we assume the current '/' is part of a regular expression literal
+ * (or the opening of a javascript comment, but that part is dealt with in the
+ * state machine). The exceptions to this are unary operators, so we look back
+ * a second character to rule out '++' and '--'. Although it is not
+ * straightforward to figure out if the binary operator is a postfix of the
+ * previous expression or a prefix of the regular expression, we rule out the
+ * later as it is an uncommon practice.
+ *
+ * If we ruled out the previous token to be a valid regexp preceding
+ * punctuator, we extract the last identifier in the buffer and match against a
+ * list of keywords that are known to precede expressions in the grammar. If we
+ * get a match on any of these keywords, then we are opening a regular
+ * expression, if not, then we have a division operator.
+ *
+ * Known cases that are accepted by the grammar but we handle differently,
+ * although I don't believe there is a legitimate usage for those:
+ *
+ * Division of a regular expression:
+ * var result = /test/ / 5;
+ *
+ * Prefix unary increment of a regular expression:
+ * var result = ++/test/;
+ *
+ * Division of an object literal:
+ * { a: 1 } /x/.exec('x');
+ *
+ * We only support ascii right now, so unicode characters in identifiers will
+ * be treated as delimiters, effectively breaking the identifier name where
+ * they appear, and this may cause issues in very specific situations. Namely,
+ * if we have a unicode character in an identifier directly preceding a suffix
+ * that matches one of the keywords in regexp_token_prefix[], if this
+ * identifier precedes a / (slash) character:
+ *
+ * var x = test<unicode char>return / 5;
+ *
+ * We will interpret that slash as the start of a regular expression, when in
+ * reality it is a division operator.
+ */
+static void enter_state_js_slash(statemachine_ctx *ctx, int start, char chr,
+ int end)
+{
+ jsparser_ctx *js;
+ char buffer[JSPARSER_RING_BUFFER_SIZE];
+ int pos;
+
+ assert(ctx != NULL);
+ assert(ctx->user != NULL);
+
+ js = CAST(jsparser_ctx *, ctx->user);
+ assert(js != NULL);
+
+ pos = -1;
+ /* Consume the last whitespace. */
+ if (js_is_whitespace(jsparser_buffer_get(js, pos))) {
+ --pos;
+ }
+
+ switch (jsparser_buffer_get(js, pos)) {
+ /* Ignore unary increment */
+ case '+':
+ if (jsparser_buffer_get(js, pos - 1) != '+') {
+ ctx->next_state = JSPARSER_STATE_INT_JS_REGEXP_SLASH;
+ }
+ break;
+
+ /* Ignore unary decrement */
+ case '-':
+ if (jsparser_buffer_get(js, pos - 1) != '-') {
+ ctx->next_state = JSPARSER_STATE_INT_JS_REGEXP_SLASH;
+ }
+ break;
+
+ /* List of punctuator endings except ), ], }, + and - */
+ case '=':
+ case '<':
+ case '>':
+ case '&':
+ case '|':
+ case '!':
+ case '%':
+ case '*':
+ case '/':
+ case ',':
+ case ';':
+ case '?':
+ case ':':
+ case '^':
+ case '~':
+ case '{':
+ case '(':
+ case '[':
+ case '}':
+ case '\0':
+ ctx->next_state = JSPARSER_STATE_INT_JS_REGEXP_SLASH;
+ break;
+
+ default:
+ if (jsparser_buffer_last_identifier(js, buffer) &&
+ is_regexp_token_prefix(buffer)) {
+ ctx->next_state = JSPARSER_STATE_INT_JS_REGEXP_SLASH;
+ }
+ }
+
+ jsparser_buffer_append_chr(js, chr);
+}
+
+/* Called at the end of a javascript comment.
+ *
+ * When we open a comment, the initial '/' was inserted into the ring buffer,
+ * but it is not a token and should be considered whitespace for parsing
+ * purposes.
+ *
+ * When we first saw the '/' character, we didn't yet know if it was the
+ * beginning of a comment, a division operator, or a regexp.
+ *
+ * In this function we just replace the inital '/' with a whitespace character,
+ * unless we had a preceding whitespace character, in which case we just remove
+ * the '/'. This is needed to ensure all spaces in the buffer are correctly
+ * folded.
+ */
+static void enter_state_js_comment_after(statemachine_ctx *ctx, int start,
+ char chr, int end)
+{
+ jsparser_ctx *js;
+
+ assert(ctx != NULL);
+ assert(ctx->user != NULL);
+
+ js = CAST(jsparser_ctx *, ctx->user);
+
+ if (js_is_whitespace(jsparser_buffer_get(js, -2))) {
+ (void)jsparser_buffer_pop(js);
+ } else {
+ jsparser_buffer_set(js, -1, ' ');
+ }
+}
+
+static statemachine_definition *create_statemachine_definition()
+{
+ statemachine_definition *def;
+ def = statemachine_definition_new(JSPARSER_NUM_STATES);
+ if (def == NULL)
+ return NULL;
+
+ /* TODO(falmeida): Check return value. */
+ statemachine_definition_populate(def, jsparser_state_transitions,
+ jsparser_states_internal_names);
+
+ statemachine_in_state(def, JSPARSER_STATE_INT_JS_TEXT,
+ in_state_js_text);
+ statemachine_enter_state(def, JSPARSER_STATE_INT_JS_SLASH,
+ enter_state_js_slash);
+ statemachine_enter_state(def, JSPARSER_STATE_INT_JS_COMMENT_AFTER,
+ enter_state_js_comment_after);
+
+ return def;
+}
+
+
+/* Initializes a new jsparser instance.
+ *
+ * Returns a pointer to the new instance or NULL if the initialization
+ * fails.
+ *
+ * Initialization failure is fatal, and if this function fails it may not
+ * deallocate all previsouly allocated memory.
+ */
+
+jsparser_ctx *jsparser_new()
+{
+ jsparser_ctx *js;
+
+ js = CAST(jsparser_ctx *, calloc(1, sizeof(jsparser_ctx)));
+ if (js == NULL)
+ return NULL;
+
+ js->statemachine_def = create_statemachine_definition();
+ if (js->statemachine_def == NULL)
+ return NULL;
+
+ js->statemachine = statemachine_new(js->statemachine_def, js);
+ if (js->statemachine == NULL)
+ return NULL;
+
+ jsparser_reset(js);
+
+ return js;
+}
+
+/* Returns a pointer to a context which is a duplicate of the jsparser src.
+ */
+jsparser_ctx *jsparser_duplicate(jsparser_ctx *src)
+{
+ jsparser_ctx *dst;
+ assert(src != NULL);
+
+ dst = jsparser_new();
+ if (dst == NULL)
+ return NULL;
+
+ jsparser_copy(dst, src);
+
+ return dst;
+}
+
+/* Copies the context of the jsparser pointed to by src to the jsparser dst.
+ *
+ * The state machine definition is preserved since it is read only.
+ */
+void jsparser_copy(jsparser_ctx *dst, jsparser_ctx *src)
+{
+
+ dst->buffer_start = src->buffer_start;
+ dst->buffer_end = src->buffer_end;
+ memcpy(dst->buffer, src->buffer, sizeof(src->buffer));
+
+ statemachine_copy(dst->statemachine,
+ src->statemachine,
+ dst->statemachine_def,
+ dst);
+
+}
+
+void jsparser_reset(jsparser_ctx *ctx)
+{
+ assert(ctx != NULL);
+ ctx->statemachine->current_state = 0;
+ ctx->buffer_start = 0;
+ ctx->buffer_end = 0;
+}
+
+int jsparser_state(jsparser_ctx *ctx)
+{
+ return state_external(ctx->statemachine->current_state);
+}
+
+int jsparser_parse(jsparser_ctx *ctx, const char *str, int size)
+{
+ int internal_state;
+ internal_state = statemachine_parse(ctx->statemachine, str, size);
+ return state_external(internal_state);
+}
+
+void jsparser_delete(jsparser_ctx *ctx)
+{
+ assert(ctx != NULL);
+ statemachine_delete(ctx->statemachine);
+ statemachine_definition_delete(ctx->statemachine_def);
+ free(ctx);
+}
+
+#ifdef __cplusplus
+} /* namespace security_streamhtmlparser */
+#endif /* __cplusplus */
diff --git a/src/htmlparser/jsparser.h b/src/htmlparser/jsparser.h
new file mode 100644
index 0000000..6987cb4
--- /dev/null
+++ b/src/htmlparser/jsparser.h
@@ -0,0 +1,172 @@
+/* Copyright (c) 2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ---
+ *
+ * Author: falmeida@google.com (Filipe Almeida)
+ */
+
+#ifndef SECURITY_STREAMHTMLPARSER_JSPARSER_H
+#define SECURITY_STREAMHTMLPARSER_JSPARSER_H
+
+#include <config.h>
+#include "htmlparser/statemachine.h"
+
+#ifdef __cplusplus
+namespace ctemplate_htmlparser {
+#endif /* __cplusplus */
+
+/* Size of the ring buffer used to lookup the last token in the javascript
+ * stream. The size is pretty much arbitrary at this point but must be bigger
+ * than the biggest token we want to lookup plus 3: Two delimiters plus an empty
+ * ring buffer slot. */
+#define JSPARSER_RING_BUFFER_SIZE 18
+
+enum js_state_external_enum {
+ JSPARSER_STATE_TEXT,
+ JSPARSER_STATE_Q,
+ JSPARSER_STATE_DQ,
+ JSPARSER_STATE_REGEXP,
+ JSPARSER_STATE_COMMENT
+};
+
+/* Stores the context of the javascript parser.
+ *
+ * If this structure is changed, jsparser_new(), jsparser_copy() and
+ * jsparser_reset() should be updated accordingly.
+ */
+typedef struct jsparser_ctx_s {
+
+ /* Reference to the statemachine context. */
+ statemachine_ctx *statemachine;
+
+ /* Reference to the statemachine definition.
+ *
+ * It should be readonly and contain the same values across jsparser
+ * instances.
+ */
+ /* TODO(falmeida): Change statemachine_def to const. */
+ statemachine_definition *statemachine_def;
+
+ /* Index to the start of the buffer. */
+ int buffer_start;
+
+ /* Index the current writing position (end of the buffer plus one). */
+ int buffer_end;
+
+ /* Ring buffer used to lookup the last token. */
+ char buffer[JSPARSER_RING_BUFFER_SIZE];
+
+} jsparser_ctx;
+
+
+void jsparser_reset(jsparser_ctx *ctx);
+jsparser_ctx *jsparser_new(void);
+
+/* Returns a pointer to a context which is a duplicate of the jsparser src.
+ */
+jsparser_ctx *jsparser_duplicate(jsparser_ctx *src);
+
+/* Copies the context of the jsparser pointed to by src to the jsparser dst.
+ */
+void jsparser_copy(jsparser_ctx *dst, jsparser_ctx *src);
+int jsparser_state(jsparser_ctx *ctx);
+int jsparser_parse(jsparser_ctx *ctx, const char *str, int size);
+
+void jsparser_delete(jsparser_ctx *ctx);
+
+/**
+ * Ring buffer functions.
+ *
+ * These functions are only exported for testing and should not be called from
+ * outside of jsparser.c in production code.
+ */
+
+/* Appends a character to the ring buffer.
+ *
+ * Sequences of whitespaces and newlines are folded into one character.
+ */
+void jsparser_buffer_append_chr(jsparser_ctx *js, char chr);
+
+/* Appends a string to the ring buffer.
+ *
+ * Sequences of whitespaces and newlines are folded into one character.
+ */
+void jsparser_buffer_append_str(jsparser_ctx *js, const char *str);
+
+/* Returns the last appended character and removes it from the buffer. If the
+ * buffer is empty, then it returns ASCII 0 ('\0').
+ */
+char jsparser_buffer_pop(jsparser_ctx *js);
+
+/* Returns the value of the character at a certain index in the buffer or an
+ * ASCII 0 ('\0') character if the index is extends beyond the size of the
+ * buffer, either because we don't have as many characters in the buffer, or
+ * because the index points to a place bigger than the size of the buffer..
+ *
+ * Index positions must be negative, where -1 is the last character appended to
+ * the buffer.
+ */
+char jsparser_buffer_get(jsparser_ctx *js, int pos);
+
+/* Sets the value of the character at a certain index in the buffer. Returns
+ * true if the write was successful or false if there was an attempt to write
+ * outside of the buffer boundaries.
+ *
+ * Index positions are negative, were -1 is the last character appended to the
+ * buffer. Using positive integers for the index will result in undefined
+ * behaviour.
+ */
+int jsparser_buffer_set(jsparser_ctx *js, int pos, char value);
+
+/* Copies a slice of the buffer to the string pointed to by output. start and
+ * end are the indexes of the sliced region. If the start argument extends
+ * beyond the beginning of the buffer, the slice will only contain characters
+ * starting from beginning of the buffer.
+ */
+void jsparser_buffer_slice(jsparser_ctx *js, char *buffer, int start, int end);
+
+/* Copy the last javascript identifier or keyword found in the buffer to the
+ * string pointed by identifier.
+ */
+int jsparser_buffer_last_identifier(jsparser_ctx *js, char *identifier);
+
+
+#define jsparser_parse_chr(a,b) jsparser_parse(a, &(b), 1);
+#ifdef __cplusplus
+#define jsparser_parse_str(a,b) jsparser_parse(a, b, \
+ static_cast<int>(strlen(b)));
+#else
+#define jsparser_parse_str(a,b) jsparser_parse(a, b, (int)strlen(b));
+#endif
+
+#ifdef __cplusplus
+} /* namespace security_streamhtmlparser */
+#endif /* __cplusplus */
+
+#endif /* SECURITY_STREAMHTMLPARSER_JSPARSER_H */
diff --git a/src/htmlparser/jsparser_fsm.config b/src/htmlparser/jsparser_fsm.config
new file mode 100644
index 0000000..874b735
--- /dev/null
+++ b/src/htmlparser/jsparser_fsm.config
@@ -0,0 +1,186 @@
+# Copyright (c) 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ---
+#
+# Author: falmeida@google.com (Filipe Almeida)
+
+name = 'jsparser'
+
+comment = 'Simplified finite state machine for tracking of javascript states'
+
+condition('q', '\''),
+condition('dq', '\"'),
+condition('/', '/'),
+condition('*', '*'),
+condition('[', '['),
+condition(']', ']'),
+condition('lf', '\n'),
+condition('backslash', '\\'),
+condition('default', '[:default:]')
+
+# Main javascript body.
+state(name = 'js_text',
+ external = 'text',
+ transitions = [
+ ['q', 'js_q'],
+ ['dq', 'js_dq'],
+ ['/', 'js_slash'],
+ ['default', 'js_text']
+ ])
+
+# Single quoted string literal.
+state(name = 'js_q',
+ external = 'q',
+ transitions = [
+ ['backslash', 'js_q_e'],
+ ['q', 'js_text'],
+ ['default', 'js_q']
+ ])
+
+# Javascript escaped character in a single quoted string literal.
+state(name = 'js_q_e',
+ external = 'q',
+ transitions = [
+ ['default', 'js_q']
+ ])
+
+# Double quoted string literal
+state(name = 'js_dq',
+ external = 'dq',
+ transitions = [
+ ['backslash', 'js_dq_e'],
+ ['dq', 'js_text'],
+ ['default', 'js_dq']
+ ])
+
+# Javascript escaped character in a double quoted string literal.
+state(name = 'js_dq_e',
+ external = 'dq',
+ transitions = [
+ ['default', 'js_dq']
+ ])
+
+# Possible start of a javascript comment.
+state(name = 'js_slash',
+ external = 'text',
+ transitions = [
+ ['/', 'js_comment_ln'],
+ ['*', 'js_comment_ml'],
+ ['default', 'js_text']
+ ])
+
+# Possible start of a regular expression literal.
+#
+# The state diagram does not reach this state directly. When js_slash is
+# reached, the function enter_state_js_slash() is called, which checks if the
+# last token belongs to the set of tokens that can precede a regular
+# expression, in which case it changes the state to js_regexp_slash.
+#
+# For more information please read the comments in
+# jsparser.c:enter_state_js_slash().
+state(name = 'js_regexp_slash',
+ external = 'text',
+ transitions = [
+ ['/', 'js_comment_ln'],
+ ['*', 'js_comment_ml'],
+ ['backslash', 'js_regexp_e'],
+ ['[', 'js_regexp_bracket'],
+ ['default', 'js_regexp']
+ ])
+
+# Regular expression literal.
+state(name = 'js_regexp',
+ external = 'regexp',
+ transitions = [
+ ['backslash', 'js_regexp_e'],
+ ['[', 'js_regexp_bracket'],
+ ['/', 'js_text'],
+ ['default', 'js_regexp']
+ ])
+
+# Regexp bracket expression
+state(name = 'js_regexp_bracket',
+ external = 'regexp',
+ transitions = [
+ ['backslash', 'js_regexp_bracket_e'],
+ [']', 'js_regexp'],
+ ['default', 'js_regexp_bracket']
+ ])
+
+# Backslash escaped regexp bracket expression
+state(name = 'js_regexp_bracket_e',
+ external = 'regexp',
+ transitions = [
+ ['default', 'js_regexp_bracket']
+ ])
+
+# Escaped regular expression char.
+state(name = 'js_regexp_e',
+ external = 'regexp',
+ transitions = [
+ ['default', 'js_regexp']
+ ])
+
+# Start of a single line javascript comment (//).
+state(name = 'js_comment_ln',
+ external = 'comment',
+ transitions = [
+ ['lf', 'js_comment_after'],
+ ['default', 'js_comment_ln']
+ ])
+
+# Start of a multiline javascript comment (/*).
+state(name = 'js_comment_ml',
+ external = 'comment',
+ transitions = [
+ ['*', 'js_comment_ml_close'],
+ ['default', 'js_comment_ml']
+ ])
+
+# Close of a multiline javascript comment (*/).
+state(name = 'js_comment_ml_close',
+ external = 'comment',
+ transitions = [
+ ['/', 'js_comment_after'],
+ ['default', 'js_comment_ml']
+ ])
+
+# Ending character of a javascript comment.
+# In can either be a '/ in the case of a multiline comment, or a line
+# terminator in the case of a single line comment.
+# This is needed so we don't insert the '/' or the new line character into the
+# ring buffer.
+state(name = 'js_comment_after',
+ external = 'text',
+ transitions = [
+ ['q', 'js_q'],
+ ['dq', 'js_dq'],
+ ['/', 'js_slash'],
+ ['default', 'js_text']
+ ])
diff --git a/src/htmlparser/statemachine.cc b/src/htmlparser/statemachine.cc
new file mode 100644
index 0000000..00fbe26
--- /dev/null
+++ b/src/htmlparser/statemachine.cc
@@ -0,0 +1,457 @@
+/* Copyright (c) 2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ---
+ *
+ * Author: falmeida@google.com (Filipe Almeida)
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "htmlparser/statemachine.h"
+
+/* So we can support both C and C++ compilers, we use the CAST() macro instead
+ * of using C style casts or static_cast<>() directly.
+ */
+#ifdef __cplusplus
+ #define CAST(type, expression) (static_cast<type>(expression))
+#else
+ #define CAST(type, expression) ((type)(expression))
+#endif
+
+#ifdef __cplusplus
+namespace ctemplate_htmlparser {
+#endif
+
+#define MAX_CHAR_8BIT 256
+
+/* Populates the statemachine definition.
+ */
+void statemachine_definition_populate(statemachine_definition *def,
+ const int* const* transition_table,
+ const char* const* state_names)
+{
+ assert(def != NULL);
+ assert(transition_table != NULL);
+
+ def->transition_table = transition_table;
+
+ def->state_names = state_names;
+}
+
+/* Add's the callback for the event in_state that is called when the
+ * statemachine is in state st.
+ *
+ * This event is called everytime the the statemachine is in the specified
+ * state forevery character in the input stream even if the state remains
+ * the same.
+ *
+ * This is event is the last event to be called and is fired after both events
+ * exit_state and enter_state.
+ */
+void statemachine_in_state(statemachine_definition *def, int st,
+ state_event_function func)
+{
+ assert(def != NULL);
+ assert(st < def->num_states);
+ def->in_state_events[st] = func;
+}
+
+/* Add's the callback for the event enter_state that is called when the
+ * statemachine enters state st.
+ *
+ * This event is fired after the event exit_state but before the event
+ * in_state.
+ */
+void statemachine_enter_state(statemachine_definition *def, int st,
+ state_event_function func)
+{
+ assert(def != NULL);
+ assert(st < def->num_states);
+ def->enter_state_events[st] = func;
+}
+
+/* Add's the callback for the event exit_state that is called when the
+ * statemachine exits from state st.
+ *
+ * This is the first event to be called and is fired before both the events
+ * enter_state and in_state.
+ */
+void statemachine_exit_state(statemachine_definition *def, int st,
+ state_event_function func)
+{
+ assert(def != NULL);
+ assert(st < def->num_states);
+ def->exit_state_events[st] = func;
+}
+
+/* Initializes a new statemachine definition with a defined number of states.
+ *
+ * Returns NULL if initialization fails.
+ *
+ * Initialization failure is fatal, and if this function fails it may not
+ * deallocate all previsouly allocated memory.
+ */
+statemachine_definition *statemachine_definition_new(int states)
+{
+ statemachine_definition *def;
+ def = CAST(statemachine_definition *,
+ malloc(sizeof(statemachine_definition)));
+ if (def == NULL)
+ return NULL;
+
+ def->in_state_events = CAST(state_event_function *,
+ calloc(states, sizeof(state_event_function)));
+ if (def->in_state_events == NULL)
+ return NULL;
+
+ def->enter_state_events =CAST(state_event_function *,
+ calloc(states,
+ sizeof(state_event_function)));
+ if (def->enter_state_events == NULL)
+ return NULL;
+
+ def->exit_state_events = CAST(state_event_function *,
+ calloc(states, sizeof(state_event_function)));
+ if (def->exit_state_events == NULL)
+ return NULL;
+
+ def->num_states = states;
+ def->state_names = NULL;
+ return def;
+}
+
+/* Deallocates a statemachine definition object
+ */
+void statemachine_definition_delete(statemachine_definition *def)
+{
+ assert(def != NULL);
+ free(def->in_state_events);
+ free(def->enter_state_events);
+ free(def->exit_state_events);
+ free(def);
+}
+
+/* Returns the current state.
+ */
+int statemachine_get_state(statemachine_ctx *ctx) {
+ return ctx->current_state;
+}
+
+/* Sets the current state.
+ *
+ * It calls the exit event for the old state and the enter event for the state
+ * we intend to move into.
+ *
+ * Since this state change was not initiated by a character in the input stream
+ * we pass a null char to the event functions.
+ */
+void statemachine_set_state(statemachine_ctx *ctx, int state)
+{
+
+ statemachine_definition *def;
+
+ assert(ctx != NULL);
+ assert(ctx->definition != NULL);
+
+ def = ctx->definition;
+
+ assert(state < def->num_states);
+
+ ctx->next_state = state;
+
+ if (ctx->current_state != ctx->next_state) {
+ if (def->exit_state_events[ctx->current_state])
+ def->exit_state_events[ctx->current_state](ctx,
+ ctx->current_state,
+ '\0',
+ ctx->next_state);
+
+ if (def->enter_state_events[ctx->next_state])
+ def->enter_state_events[ctx->next_state](ctx,
+ ctx->current_state,
+ '\0',
+ ctx->next_state);
+ }
+
+ ctx->current_state = state;
+}
+
+/* Reset the statemachine.
+ *
+ * The state is set to the initialization values. This includes setting the
+ * state to the default state (0), stopping recording and setting the line
+ * number to 1.
+ */
+void statemachine_reset(statemachine_ctx *ctx)
+{
+ ctx->current_state = 0;
+ ctx->next_state = 0;
+ ctx->record_buffer[0] = '\0';
+ ctx->record_pos = 0;
+ ctx->recording = 0;
+ ctx->line_number = 1;
+ ctx->column_number = 1;
+}
+
+/* Initializes a new statemachine. Receives a statemachine definition object
+ * that should have been initialized with statemachine_definition_new() and a
+ * user reference to be used by the caller.
+ *
+ * The user reference is used by the caller to store any instance specific data
+ * the caller may need and is typically used to propagate context information
+ * to the event callbacks. The user pointer can just be set to NULL if the
+ * caller doesn't need it.
+ *
+ * Returns NULL if initialization fails.
+ *
+ * Initialization failure is fatal, and if this function fails it may not
+ * deallocate all previously allocated memory.
+ */
+statemachine_ctx *statemachine_new(statemachine_definition *def,
+ void *user)
+{
+ statemachine_ctx *ctx;
+ assert(def != NULL);
+ ctx = CAST(statemachine_ctx *, malloc(sizeof(statemachine_ctx)));
+ if (ctx == NULL)
+ return NULL;
+
+ statemachine_reset(ctx);
+
+ ctx->definition = def;
+ ctx->user = user;
+
+ return ctx;
+}
+
+/* Returns a pointer to a context which is a duplicate of the statemachine src.
+ * The statemachine definition and the user pointer have to be provided since
+ * these references are not owned by the statemachine itself, but this will be
+ * shallow copies as they point to data structures we do not own.
+ */
+statemachine_ctx *statemachine_duplicate(statemachine_ctx *src,
+ statemachine_definition *def,
+ void *user)
+{
+ statemachine_ctx *dst;
+ assert(src != NULL);
+ dst = statemachine_new(def, user);
+ if (dst == NULL)
+ return NULL;
+
+ statemachine_copy(dst, src, def, user);
+
+ return dst;
+}
+
+/* Copies the context of the statemachine pointed to by src to the statemachine
+ * provided by dst.
+ * The statemachine definition and the user pointer have to be provided since
+ * these references are not owned by the statemachine itself.
+ */
+void statemachine_copy(statemachine_ctx *dst,
+ statemachine_ctx *src,
+ statemachine_definition *def,
+ void *user)
+{
+ memcpy(dst, src, sizeof(statemachine_ctx));
+ dst->definition = def;
+ dst->user = user;
+}
+
+/* Deallocates a statemachine object
+ */
+void statemachine_delete(statemachine_ctx *ctx)
+{
+ assert(ctx != NULL);
+ free(ctx);
+}
+
+/* Starts recording the current input stream into an internal buffer.
+ * The current input character is included in the recording.
+ */
+void statemachine_start_record(statemachine_ctx *ctx)
+{
+ assert(ctx != NULL);
+ ctx->record_buffer[0] = '\0';
+ ctx->record_pos = 0;
+ ctx->recording = 1;
+}
+
+/* Stops recording the current input stream.
+ * The last input character is not included in the recording.
+ * This function returns a pointer to the recorded string buffer.
+ */
+const char *statemachine_stop_record(statemachine_ctx *ctx)
+{
+ assert(ctx != NULL);
+ assert(ctx->recording);
+ ctx->record_buffer[ctx->record_pos] = '\0';
+ ctx->recording = 0;
+ return ctx->record_buffer;
+}
+
+ /* Returns a pointer to the record string buffer.
+ */
+const char *statemachine_record_buffer(statemachine_ctx *ctx)
+{
+ return ctx->record_buffer;
+}
+
+void statemachine_encode_char(char schr, char *output, size_t len)
+{
+ unsigned char chr = schr;
+ if (chr == '\'') {
+ strncpy(output, "\\'", len);
+ } else if (chr == '\\') {
+ strncpy(output, "\\\\", len);
+
+ /* Like isprint() but not dependent on locale. */
+ } else if (chr >= 32 && chr <= 126) {
+ snprintf(output, len, "%c", chr);
+ } else if (chr == '\n') {
+ strncpy(output, "\\n", len);
+ } else if (chr == '\r') {
+ strncpy(output, "\\r", len);
+ } else if (chr == '\t') {
+ strncpy(output, "\\t", len);
+ } else {
+ snprintf(output, len, "\\x%.2x", chr);
+ }
+
+ output[len - 1] = '\0';
+}
+
+/* Sets the error message in case of a transition error.
+ *
+ * Called from statemachine_parse to set the error message in case of a
+ * transition error.
+ */
+static void statemachine_set_transition_error_message(statemachine_ctx *ctx)
+{
+ char encoded_char[10];
+ statemachine_encode_char(ctx->current_char, encoded_char,
+ sizeof(encoded_char));
+
+ if (ctx->definition->state_names) {
+ snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR,
+ "Unexpected character '%s' in state '%s'",
+ encoded_char,
+ ctx->definition->state_names[ctx->current_state]);
+ } else {
+ snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR,
+ "Unexpected character '%s'", encoded_char);
+ }
+
+}
+
+/* Parses the input html stream and returns the finishing state.
+ *
+ * Returns STATEMACHINE_ERROR if unable to parse the input. If
+ * statemachine_parse() is called after an error situation was encountered
+ * the behaviour is unspecified.
+ */
+/* TODO(falmeida): change int size to size_t size */
+int statemachine_parse(statemachine_ctx *ctx, const char *str, int size)
+{
+ int i;
+ const int* const* state_table = ctx->definition->transition_table;
+ statemachine_definition *def;
+
+ assert(ctx !=NULL &&
+ ctx->definition != NULL &&
+ ctx->definition->transition_table != NULL);
+
+ if (size < 0) {
+ snprintf(ctx->error_msg, STATEMACHINE_MAX_STR_ERROR, "%s",
+ "Negative size in statemachine_parse().");
+ return STATEMACHINE_ERROR;
+ }
+
+ def = ctx->definition;
+
+ for (i = 0; i < size; i++) {
+ ctx->current_char = *str;
+ ctx->next_state =
+ state_table[ctx->current_state][CAST(unsigned char, *str)];
+ if (ctx->next_state == STATEMACHINE_ERROR) {
+ statemachine_set_transition_error_message(ctx);
+ return STATEMACHINE_ERROR;
+ }
+
+ if (ctx->current_state != ctx->next_state) {
+ if (def->exit_state_events[ctx->current_state])
+ def->exit_state_events[ctx->current_state](ctx,
+ ctx->current_state,
+ *str,
+ ctx->next_state);
+ }
+ if (ctx->current_state != ctx->next_state) {
+ if (def->enter_state_events[ctx->next_state])
+ def->enter_state_events[ctx->next_state](ctx,
+ ctx->current_state,
+ *str,
+ ctx->next_state);
+ }
+
+ if (def->in_state_events[ctx->next_state])
+ def->in_state_events[ctx->next_state](ctx,
+ ctx->current_state,
+ *str,
+ ctx->next_state);
+
+ /* We need two bytes left so we can NULL terminate the string. */
+ if (ctx->recording &&
+ STATEMACHINE_RECORD_BUFFER_SIZE - 1 > ctx->record_pos) {
+ ctx->record_buffer[ctx->record_pos++] = *str;
+ ctx->record_buffer[ctx->record_pos] = '\0';
+ }
+
+/* TODO(falmeida): Should clarify the contract here, since an event can change
+ * ctx->next_state and we need this functionality */
+
+ ctx->current_state = ctx->next_state;
+ ctx->column_number++;
+
+ if (*str == '\n') {
+ ctx->line_number++;
+ ctx->column_number = 1;
+ }
+ str++;
+ }
+
+ return ctx->current_state;
+}
+
+#ifdef __cplusplus
+} /* namespace security_streamhtmlparser */
+#endif
diff --git a/src/htmlparser/statemachine.h b/src/htmlparser/statemachine.h
new file mode 100644
index 0000000..2784604
--- /dev/null
+++ b/src/htmlparser/statemachine.h
@@ -0,0 +1,234 @@
+/* Copyright (c) 2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ---
+ *
+ * Author: falmeida@google.com (Filipe Almeida)
+ */
+
+#ifndef SECURITY_STREAMHTMLPARSER_STATEMACHINE_H
+#define SECURITY_STREAMHTMLPARSER_STATEMACHINE_H
+
+#include <config.h>
+#ifdef __cplusplus
+namespace ctemplate_htmlparser {
+#endif
+
+/* TODO(falmeida): I'm not sure about these limits, but since right now we only
+ * have 24 states it should be fine */
+
+enum {
+ STATEMACHINE_ERROR = 127
+};
+
+#define STATEMACHINE_RECORD_BUFFER_SIZE 256
+
+#define STATEMACHINE_MAX_STR_ERROR 80
+
+struct statemachine_ctx_s;
+
+typedef void(*state_event_function)(struct statemachine_ctx_s *, int, char,
+ int);
+
+typedef struct statemachine_definition_s {
+ int num_states;
+ const int* const* transition_table;
+
+ /* Array containing the name of the states as a C string.
+ * This field is optional and if not in use it should be set to NULL.
+ */
+ const char* const* state_names;
+ state_event_function *in_state_events;
+ state_event_function *enter_state_events;
+ state_event_function *exit_state_events;
+} statemachine_definition;
+
+typedef struct statemachine_ctx_s {
+ int current_state;
+ int next_state;
+ statemachine_definition *definition;
+ char current_char;
+
+ /* Current line number. */
+ int line_number;
+
+ /* Current column number. */
+ int column_number;
+ char record_buffer[STATEMACHINE_RECORD_BUFFER_SIZE];
+ size_t record_pos;
+
+ /* True if we are recording the stream to record_buffer. */
+ int recording;
+
+ /* In case there was an error (we are in state STATEMACHINE_ERROR), it will
+ * contain a human readable description of the error.
+ */
+ char error_msg[STATEMACHINE_MAX_STR_ERROR];
+
+ /* Storage space for the layer above. */
+ void *user;
+} statemachine_ctx;
+
+/* Populates the statemachine definition.
+ *
+ * Receives a transition table and an optional array of state names. It uses
+ * this data to populate the state machine definition.
+ *
+ * The transition table structure is a list of lists of ints (int **). The
+ * outer list indexes the source state and the inner list contains the
+ * destination state for each of the possible input characters:
+ *
+ * const int* const* transitions[source][input] == destination.
+ *
+ * The optional argument state_names points to a list of strings containing
+ * human readable state names. These strings are used when reporting error
+ * messages.
+ */
+void statemachine_definition_populate(statemachine_definition *def,
+ const int* const* transition_table,
+ const char* const* state_names);
+
+void statemachine_in_state(statemachine_definition *def, int st,
+ state_event_function func);
+void statemachine_enter_state(statemachine_definition *def, int st,
+ state_event_function func);
+void statemachine_exit_state(statemachine_definition *def, int st,
+ state_event_function func);
+
+statemachine_definition *statemachine_definition_new(int states);
+void statemachine_definition_delete(statemachine_definition *def);
+
+int statemachine_get_state(statemachine_ctx *ctx);
+void statemachine_set_state(statemachine_ctx *ctx, int state);
+
+void statemachine_start_record(statemachine_ctx *ctx);
+const char *statemachine_stop_record(statemachine_ctx *ctx);
+const char *statemachine_record_buffer(statemachine_ctx *ctx);
+
+/* Returns the the number of characters currently stored in the record buffer.
+ */
+static inline size_t statemachine_record_length(statemachine_ctx *ctx) {
+ return ctx->record_pos + 1;
+}
+
+/* Return the current line number. */
+static inline int statemachine_get_line_number(statemachine_ctx *ctx) {
+ return ctx->line_number;
+}
+
+/* Set the current line number. */
+static inline void statemachine_set_line_number(statemachine_ctx *ctx,
+ int line) {
+ ctx->line_number = line;
+}
+
+/* Return the current column number. */
+static inline int statemachine_get_column_number(statemachine_ctx *ctx) {
+ return ctx->column_number;
+}
+
+/* Set the current column number. */
+static inline void statemachine_set_column_number(statemachine_ctx *ctx,
+ int column) {
+ ctx->column_number = column;
+}
+
+
+/* Retrieve a human readable error message in case an error occurred.
+ *
+ * NULL is returned if the parser didn't encounter an error.
+ */
+static inline const char *statemachine_get_error_msg(statemachine_ctx *ctx) {
+ if (ctx->next_state == STATEMACHINE_ERROR) {
+ return ctx->error_msg;
+ } else {
+ return NULL;
+ }
+}
+
+/* Reset the statemachine.
+ *
+ * The state is set to the initialization values. This includes setting the
+ * state to the default state (0), stopping recording and setting the line
+ * number to 1.
+ */
+void statemachine_reset(statemachine_ctx *ctx);
+
+/* Initializes a new statemachine. Receives a statemachine definition object
+ * that should have been initialized with statemachine_definition_new() and a
+ * user reference to be used by the caller.
+ *
+ * Returns NULL if initialization fails.
+ *
+ * Initialization failure is fatal, and if this function fails it may not
+ * deallocate all previsouly allocated memory.
+ */
+statemachine_ctx *statemachine_new(statemachine_definition *def,
+ void *user);
+
+/* Returns a pointer to a context which is a duplicate of the statemachine src.
+ * The statemachine definition and the user pointer have to be provided since
+ * these references are not owned by the statemachine itself.
+ */
+statemachine_ctx *statemachine_duplicate(statemachine_ctx *ctx,
+ statemachine_definition *def,
+ void *user);
+
+/* Copies the context of the statemachine pointed to by src to the statemachine
+ * provided by dst.
+ * The statemachine definition and the user pointer have to be provided since
+ * these references are not owned by the statemachine itself.
+ */
+void statemachine_copy(statemachine_ctx *dst,
+ statemachine_ctx *src,
+ statemachine_definition *def,
+ void *user);
+
+int statemachine_parse(statemachine_ctx *ctx, const char *str, int size);
+
+void statemachine_delete(statemachine_ctx *ctx);
+
+
+/*****
+ * The following functions are only exported for testing purposes and should
+ * be treated as private. */
+
+
+/* Encode the character as an escaped C string.
+ *
+ * Encode the character chr into the string output. Writes at most len
+ * characters to the output string but makes sure output is NULL terminated.
+ */
+void statemachine_encode_char(char chr, char *output, size_t len);
+
+
+#ifdef __cplusplus
+} /* namespace security_streamhtmlparser */
+#endif
+
+#endif /* SECURITY_STREAMHTMLPARSER_STATEMACHINE_H */
diff --git a/src/indented_writer.h b/src/indented_writer.h
new file mode 100644
index 0000000..6df190e
--- /dev/null
+++ b/src/indented_writer.h
@@ -0,0 +1,134 @@
+// Copyright (c) 2009, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: williasr@google.com (Scott Williams)
+
+#ifndef TEMPLATE_INDENTED_WRITER_H_
+#define TEMPLATE_INDENTED_WRITER_H_
+
+#include <config.h>
+#include <string>
+
+namespace ctemplate {
+
+using std::string;
+
+// An indented writer is a wrapper around a string buffer. It takes care of
+// tracking and applying leading whitespace to the buffer at the beginning of
+// new lines.
+class IndentedWriter {
+ public:
+ IndentedWriter(string* out, int starting_indentation)
+ : out_(out), current_indentation_(starting_indentation),
+ original_indentation_(starting_indentation), line_state_(AT_BEGINNING) { }
+
+ ~IndentedWriter() {
+ assert(original_indentation_ == current_indentation_);
+ }
+
+ // Append some output to the buffer. If the string ends with a newline, then
+ // the output buffer will be indented before the next Write() call. If the
+ // output contains embedded newlines, these won't have proper indentation, so
+ // call Write() at least once per physical line of output.
+ void Write(string s1,
+ string s2 = string(),
+ string s3 = string(),
+ string s4 = string(),
+ string s5 = string(),
+ string s6 = string(),
+ string s7 = string()) {
+ DoWrite(s1);
+ if (!s2.empty()) DoWrite(s2);
+ if (!s3.empty()) DoWrite(s3);
+ if (!s4.empty()) DoWrite(s4);
+ if (!s5.empty()) DoWrite(s5);
+ if (!s6.empty()) DoWrite(s6);
+ if (!s7.empty()) DoWrite(s7);
+ }
+
+ // Increment the indentation level. This only has a meaning after outputting a
+ // complete line (otherwise, are you saying you want to modify the indentation
+ // of the current line or the next line?)
+ void Indent() {
+ assert(line_state_ == AT_BEGINNING);
+ current_indentation_ += kIndent;
+ }
+
+ // Decrement the indentation level. This only has a meaning after outputting a
+ // complete line (otherwise, are you saying you want to modify the indentation
+ // of the current line or the next line?)
+ void Dedent() {
+ assert(line_state_ == AT_BEGINNING);
+ current_indentation_ -= kIndent;
+ assert(current_indentation_ >= original_indentation_);
+ }
+
+ // Get access to the underlying indentation level and string buffer. Most
+ // useful for interfacing with non-IndentedWriter printing code.
+ int GetIndent() const { return current_indentation_; }
+ string* GetBuffer() { return out_; }
+
+ private:
+ void DoWrite(const string& line) {
+ if (line_state_ == AT_BEGINNING) {
+ IndentLine();
+ }
+ out_->append(line);
+ if (EndsWithNewline(line)) {
+ line_state_ = AT_BEGINNING;
+ } else {
+ line_state_ = MID_LINE;
+ }
+ }
+
+ static bool EndsWithNewline(const string& line) {
+ return !line.empty() && (*(line.end() - 1) == '\n');
+ }
+
+ void IndentLine() {
+ assert(line_state_ == AT_BEGINNING);
+ out_->append(string(current_indentation_, ' ') +
+ (current_indentation_ ? " " : ""));
+ }
+
+ string* out_;
+ int current_indentation_;
+ int original_indentation_;
+ enum LineState {
+ AT_BEGINNING,
+ MID_LINE
+ } line_state_;
+
+ const static int kIndent = 2; // num spaces to indent each level
+};
+
+}
+
+#endif // TEMPLATE_INDENTED_WRITER_H_
diff --git a/src/make_tpl_varnames_h.cc b/src/make_tpl_varnames_h.cc
new file mode 100644
index 0000000..1dcb069
--- /dev/null
+++ b/src/make_tpl_varnames_h.cc
@@ -0,0 +1,468 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+//
+// A utility for checking syntax and generating headers to
+// use with Google Templates.
+//
+// For example:
+//
+// > <path_to>/make_tpl_varnames_h some_template_file.tpl
+//
+// This creates the header file some_template_file.tpl.varnames.h. If
+// there are any syntax errors they are reported to stderr (in which
+// case, no header file is created).
+//
+//
+// Exit code is the number of templates we were unable to parse.
+//
+// Headers can be all written to one output file (via --outputfile)
+// or written to one output file per template processed (via --header_dir).
+// As such, we have a first stage where we load each template and generate
+// its headers and a second stage where we write the headers to disk.
+//
+// TODO(jad): Prevent -f and -o from being used together.
+// Previously -o would be silently ignored.
+
+// This is for windows. Even though we #include config.h, just like
+// the files used to compile the dll, we are actually a *client* of
+// the dll, so we don't get to decl anything.
+#include <config.h>
+#undef CTEMPLATE_DLL_DECL
+#include <ctype.h> // for toupper(), isalnum()
+#include <errno.h>
+#ifdef HAVE_GETOPT_H
+# include <getopt.h>
+#endif
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+#include <string>
+#include <set>
+#include <vector>
+
+#include <ctemplate/template_pathops.h>
+#include <ctemplate/template.h>
+using std::set;
+using std::string;
+using std::vector;
+using GOOGLE_NAMESPACE::Template;
+
+enum {LOG_INFO, LOG_WARNING, LOG_ERROR, LOG_FATAL};
+
+// Holds information on each template we process.
+struct TemplateRecord {
+ const string name; // filename given on cmd-line (may be relative
+ bool error; // true iff an error occurred during template loading
+ string header_entries; // output of tpl->WriteHeaderEntries()
+
+ explicit TemplateRecord(const string& aname)
+ : name(aname), error(false) {
+ }
+};
+
+static void LogPrintf(int severity, int should_log_info, const char* pat, ...) {
+ if (severity == LOG_INFO && !should_log_info)
+ return;
+ if (severity == LOG_FATAL)
+ fprintf(stderr, "FATAL ERROR: ");
+ va_list ap;
+ va_start(ap, pat);
+ vfprintf(stderr, pat, ap);
+ va_end(ap);
+ fprintf(stderr, "\n");
+ if (severity == LOG_FATAL)
+ exit(1);
+}
+
+// prints to outfile -- usually stdout or stderr
+static void Usage(const char* argv0, FILE* outfile) {
+ fprintf(outfile, "USAGE: %s [-t<dir>] [-o<dir>] [-s<suffix>] [-f<filename>]"
+ " [-n] [-d] [-q] <template_filename> ...\n", argv0);
+ fprintf(outfile,
+ " -t<dir> --template_dir=<dir> Root directory of templates\n"
+ " -o<dir> --header_dir=<dir> Where to place output files\n"
+ " -s<suffix> --outputfile_suffix=<suffix>\n"
+ " outname = inname + suffix\n"
+ " -f<filename> --outputfile=<filename>\n"
+ " outname = filename (when given, \n"
+ " --header_dir is ignored)\n"
+ " -n --noheader Just check syntax, no output\n"
+ " -d --dump_templates Cause templates dump contents\n"
+ " -q --nolog_info Only log on error\n"
+ " --v=-1 Obsolete, confusing synonym for -q\n"
+ " -h --help This help\n"
+ " -V --version Version information\n");
+ fprintf(outfile, "\n"
+ "This program checks the syntax of one or more google templates.\n"
+ "By default (without -n) it also emits a header file to an output\n"
+ "directory that defines all valid template keys. This can be used\n"
+ "in programs to minimze the probability of typos in template code.\n");
+}
+
+static void Version(FILE* outfile) {
+ fprintf(outfile,
+ "make_tpl_varnames_h "
+ " (part of " PACKAGE_STRING ")"
+ "\n\n"
+ "Copyright 1998 Google Inc.\n"
+ "\n"
+ "This is BSD licensed software; see the source for copying conditions\n"
+ "and license information.\n"
+ "There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A\n"
+ "PARTICULAR PURPOSE.\n"
+ );
+}
+
+// Removes all non alphanumeric characters from a string to form a
+// valid C identifier to use as a double-inclusion guard.
+static void ConvertToIdentifier(string* s) {
+ for (string::size_type i = 0; i < s->size(); i++) {
+ if (!isalnum((*s)[i]))
+ (*s)[i] = '_';
+ else
+ (*s)[i] = toupper((*s)[i]);
+ }
+}
+
+// Returns the given header entries wrapped with a compiler guard
+// whose name is generated from the output_file name.
+static string WrapWithGuard(const string& output_file,
+ const string& header_entries) {
+ string guard(string("TPL_") + output_file);
+ ConvertToIdentifier(&guard);
+ guard.append("_H_");
+
+ string out;
+ out.append(string("#ifndef ") + guard + "\n");
+ out.append(string("#define ") + guard + "\n\n");
+
+ // Now append the header-entry info to the intro above
+ out.append(header_entries);
+
+ out.append(string("\n#endif // ") + guard + "\n");
+ return out;
+}
+
+// Generates a multi-line comment that will go at the top of the output file.
+// The comment includes the filename(s) that produced the output, one per line.
+static string Boilerplate(const string& progname,
+ const vector<string>& filenames) {
+ string out(string("//\n"));
+ if (filenames.size() > 1)
+ out.append("// This header file auto-generated for the templates\n");
+ else
+ out.append("// This header file auto-generated for the template\n");
+
+ for (vector<string>::size_type i = 0; i < filenames.size(); ++i)
+ out.append("// " + filenames[i] + "\n");
+
+ out.append("// by " + progname + "\n" +
+ "// DO NOT MODIFY THIS FILE DIRECTLY\n" +
+ "//\n");
+ return out;
+}
+
+// Returns true iff line is empty or only contains whitespace
+// (space, horizontal tab, vertical tab, form feed, carriage return).
+static bool LineIsAllWhitespace(const string& input) {
+ static const string kWhitespace(" \f\t\v\r");
+ return input.find_first_not_of(kWhitespace) == string::npos;
+}
+
+// Splits the input string into lines using the newline (\n)
+// as delimiter. The newlines are discarded.
+// An empty string input results in one empty line output.
+//
+// Examples: "Hello\nWorld\n" input results in two lines,
+// "Hello" and "World".
+// Same result for "Hello\nWorld" (not newline terminated).
+//
+static vector<string> SplitIntoLines(const string &input) {
+ vector<string> lines;
+
+ string::size_type begin_index = 0;
+ string::size_type input_len = input.length();
+
+ while (1) {
+ string::size_type end_index = input.find_first_of('\n', begin_index);
+ if (end_index == string::npos) {
+ lines.push_back(input.substr(begin_index));
+ break;
+ }
+ lines.push_back(input.substr(begin_index, (end_index - begin_index)));
+ begin_index = end_index + 1;
+ if (begin_index >= input_len) // To avoid adding a trailing empty line.
+ break;
+ }
+ return lines;
+}
+
+// Receives header entries concatenated together from one or more
+// templates and returns a string with the duplicate lines removed.
+//
+// Duplicate lines that contain only whitespace are not removed,
+// all other duplicate lines (identical #include directives and
+// identical variable definitions) are removed. If the last
+// (or only) input line did not terminate with newline, we add one.
+//
+// Consider the following two templates:
+// ex1.tpl: <p>{{USER}}</p>
+// ex2.tpl: <a href="{{URL}}">{{USER}}</a>
+//
+// The header entries for ex1.tpl are:
+// #include "template/template_string.h"
+// static const ::GOOGLE_NAMESPACE::StaticTemplateString ke_USER =
+// STS_INIT_WITH_HASH(ke_USER, "USER", 3254611514008215315LLU);
+//
+// The header entries for ex2.tpl are:
+// #include "template/template_string.h"
+// static const ::GOOGLE_NAMESPACE::StaticTemplateString ke_URL =
+// STS_INIT_WITH_HASH(ke_URL, "URL", 1026025273225241985LLU);
+// static const ::GOOGLE_NAMESPACE::StaticTemplateString ke_USER =
+// STS_INIT_WITH_HASH(ke_USER, "USER", 3254611514008215315LLU);
+//
+// Simply concatenating both header entries will result in
+// duplicate #include directives and duplicate definitions of
+// the ke_USER variable. This function instead outputs:
+//
+// #include "template/template_string.h"
+// static const ::GOOGLE_NAMESPACE::StaticTemplateString ke_USER =
+// STS_INIT_WITH_HASH(ke_USER, "USER", 3254611514008215315LLU);
+// static const ::GOOGLE_NAMESPACE::StaticTemplateString ke_URL =
+// STS_INIT_WITH_HASH(ke_URL, "URL", 1026025273225241985LLU);
+//
+static string TextWithDuplicateLinesRemoved(const string& header_entries) {
+ string output;
+ set<string> lines_seen;
+ vector<string> lines = SplitIntoLines(header_entries);
+ const int lines_len = lines.size();
+ for (int i = 0; i < lines_len; ++i) {
+ const string& line = lines[i];
+ if (LineIsAllWhitespace(line) || // Blank lines always go in
+ !lines_seen.count(line)) { // So do new lines
+ output.append(line);
+ output.append("\n");
+ lines_seen.insert(line);
+ }
+ }
+ return output;
+}
+
+// Writes the given text to the filename header_file.
+// Returns true if it succeeded, false otherwise.
+static bool WriteToDisk(bool log_info, const string& output_file,
+ const string& text) {
+ FILE* outfile = fopen(output_file.c_str(), "wb");
+ if (!outfile) {
+ LogPrintf(LOG_ERROR, log_info, "Can't open %s", output_file.c_str());
+ return false;
+ }
+ LogPrintf(LOG_INFO, log_info, "Creating %s", output_file.c_str());
+ if (fwrite(text.c_str(), 1, text.length(), outfile) != text.length()) {
+ LogPrintf(LOG_ERROR, log_info, "Can't write %s: %s",
+ output_file.c_str(), strerror(errno));
+ }
+ fclose(outfile);
+ return true;
+}
+
+int main(int argc, char **argv) {
+ string FLAG_template_dir(GOOGLE_NAMESPACE::kCWD); // "./"
+ string FLAG_header_dir(GOOGLE_NAMESPACE::kCWD);
+ GOOGLE_NAMESPACE::NormalizeDirectory(&FLAG_header_dir); // adds trailing slash
+ string FLAG_outputfile_suffix(".varnames.h");
+ string FLAG_outputfile("");
+ bool FLAG_header = true;
+ bool FLAG_dump_templates = false;
+ bool FLAG_log_info = true;
+
+#if defined(HAVE_GETOPT_LONG)
+ static struct option longopts[] = {
+ {"help", 0, NULL, 'h'},
+ {"version", 0, NULL, 'V'},
+ {"template_dir", 1, NULL, 't'},
+ {"header_dir", 1, NULL, 'o'},
+ {"outputfile_suffix", 1, NULL, 's'},
+ {"outputfile", 1, NULL, 'f'},
+ {"noheader", 0, NULL, 'n'},
+ {"dump_templates", 0, NULL, 'd'},
+ {"nolog_info", 0, NULL, 'q'},
+ {"v", 1, NULL, 'q'},
+ {0, 0, 0, 0}
+ };
+ int option_index;
+# define GETOPT(argc, argv) getopt_long(argc, argv, "t:o:s:f:ndqhV", \
+ longopts, &option_index)
+#elif defined(HAVE_GETOPT_H)
+# define GETOPT(argc, argv) getopt(argc, argv, "t:o:s:f:ndqhV")
+#else // TODO(csilvers): implement something reasonable for windows
+# define GETOPT(argc, argv) -1
+ int optind = 1; // first non-opt argument
+ const char* optarg = ""; // not used
+#endif
+
+ int r = 0;
+ while (r != -1) { // getopt()/getopt_long() return -1 upon no-more-input
+ r = GETOPT(argc, argv);
+ switch (r) {
+ case 't': FLAG_template_dir.assign(optarg); break;
+ case 'o': FLAG_header_dir.assign(optarg); break;
+ case 's': FLAG_outputfile_suffix.assign(optarg); break;
+ case 'f': FLAG_outputfile.assign(optarg); break;
+ case 'n': FLAG_header = false; break;
+ case 'd': FLAG_dump_templates = true; break;
+ case 'q': FLAG_log_info = false; break;
+ case 'V': Version(stdout); return 0; break;
+ case 'h': Usage(argv[0], stderr); return 0; break;
+ case -1: break; // means 'no more input'
+ default: Usage(argv[0], stderr); return 1; break;
+ }
+ }
+
+ if (optind >= argc) {
+ LogPrintf(LOG_FATAL, FLAG_log_info,
+ "Must specify at least one template file on the command line.");
+ }
+
+ Template::SetTemplateRootDirectory(FLAG_template_dir);
+
+
+ // Initialize the TemplateRecord array. It holds one element per
+ // template given on the command-line.
+ vector<TemplateRecord*> template_records;
+ for (int i = optind; i < argc; ++i) {
+ TemplateRecord *template_rec = new TemplateRecord(argv[i]);
+ template_records.push_back(template_rec);
+ }
+
+ // Iterate through each template and (unless -n is given), write
+ // its header entries into the headers array.
+ int num_errors = 0;
+ for (vector<TemplateRecord*>::iterator it = template_records.begin();
+ it != template_records.end(); ++it) {
+ const char* tplname = (*it)->name.c_str();
+ LogPrintf(LOG_INFO, FLAG_log_info, "\n------ Checking %s ------", tplname);
+
+ // The last two arguments in the following call do not matter
+ // since they control how the template gets expanded and we never
+ // expand the template after loading it here
+ Template * tpl = Template::GetTemplate(tplname, GOOGLE_NAMESPACE::DO_NOT_STRIP);
+
+ // The call to GetTemplate (above) loads the template from disk
+ // and attempts to parse it. If it cannot find the file or if it
+ // detects any template syntax errors, the parsing routines
+ // report the error and GetTemplate returns NULL. Syntax errors
+ // include such things as mismatched double-curly-bracket pairs,
+ // e.g. '{{VAR}', Invalid characters in template variables or
+ // section names, e.g. '{{BAD_VAR?}}' [the question mark is
+ // illegal], improperly nested section/end section markers,
+ // e.g. a section close marker with no section start marker or a
+ // section start of a different name.
+ // If that happens, since the parsing errors have already been reported
+ // we just continue on to the next one.
+ if (!tpl) {
+ LogPrintf(LOG_ERROR, FLAG_log_info, "Could not load file: %s", tplname);
+ num_errors++;
+ (*it)->error = true;
+ continue;
+ } else {
+ LogPrintf(LOG_INFO, FLAG_log_info, "No syntax errors detected in %s",
+ tplname);
+ if (FLAG_dump_templates)
+ tpl->Dump(tpl->template_file());
+ }
+
+ // The rest of the loop creates the header file
+ if (!FLAG_header)
+ continue; // They don't want header files
+
+ tpl->WriteHeaderEntries(&((*it)->header_entries));
+ }
+
+ // We have headers to emit:
+ // . If --outputfile was given, we combine all the header entries and
+ // write them to the given output file. If any template had errors,
+ // we fail and do not generate an output file.
+ // . Otherwise, we write one output file per template we processed.
+ // . In both cases, we add proper boilerplate first.
+ if (FLAG_header) {
+ string progname = argv[0];
+
+ if (!FLAG_outputfile.empty()) { // All header entries written to one file.
+ // If any template had an error, we do not produce an output file.
+ if (num_errors == 0) {
+ vector<string> template_filenames;
+ string all_header_entries;
+ for (vector<TemplateRecord*>::const_iterator
+ it = template_records.begin(); it != template_records.end(); ++it) {
+ all_header_entries.append((*it)->header_entries);
+ template_filenames.push_back((*it)->name);
+ }
+ string output = Boilerplate(progname, template_filenames);
+ const string cleantext =
+ TextWithDuplicateLinesRemoved(all_header_entries);
+ output.append(WrapWithGuard(FLAG_outputfile, cleantext));
+ if (!WriteToDisk(FLAG_log_info, FLAG_outputfile, output))
+ num_errors++;
+ }
+ } else {
+ // Each template will have its own output file. Skip any that had errors.
+ for (vector<TemplateRecord*>::const_iterator
+ it = template_records.begin(); it != template_records.end(); ++it) {
+ if ((*it)->error)
+ continue;
+ string basename = GOOGLE_NAMESPACE::Basename((*it)->name);
+ string output_file =
+ GOOGLE_NAMESPACE::PathJoin(FLAG_header_dir,
+ basename + FLAG_outputfile_suffix);
+ vector<string> template_filenames; // Contains one template filename.
+ template_filenames.push_back((*it)->name);
+ string output = Boilerplate(progname, template_filenames);
+ output.append(WrapWithGuard(output_file, (*it)->header_entries));
+ if (!WriteToDisk(FLAG_log_info, output_file, output))
+ num_errors++;
+ }
+ }
+ }
+
+ // Free dynamic memory
+ for (vector<TemplateRecord*>::iterator it = template_records.begin();
+ it != template_records.end(); ++it) {
+ delete *it;
+ }
+
+ // Cap at 127 to avoid causing problems with return code
+ return num_errors > 127 ? 127 : num_errors;
+}
diff --git a/src/per_expand_data.cc b/src/per_expand_data.cc
new file mode 100644
index 0000000..0a468cf
--- /dev/null
+++ b/src/per_expand_data.cc
@@ -0,0 +1,78 @@
+// Copyright (c) 2009, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+//
+// This contains some implementation of PerExpandData that is still simple
+// but is not conveniently defined in the header file, e.g., because it would
+// introduce new include dependencies.
+
+#include <config.h>
+#include <ctemplate/find_ptr.h>
+#include <ctemplate/per_expand_data.h>
+#include <ctemplate/template_annotator.h>
+
+namespace ctemplate {
+
+using std::string;
+
+#ifndef _MSC_VER
+bool PerExpandData::DataEq::operator()(const char* s1, const char* s2) const {
+ return ((s1 == 0 && s2 == 0) ||
+ (s1 && s2 && *s1 == *s2 && strcmp(s1, s2) == 0));
+}
+#endif
+
+PerExpandData::~PerExpandData() {
+ delete map_;
+}
+
+TemplateAnnotator* PerExpandData::annotator() const {
+ if (annotator_ != NULL) {
+ return annotator_;
+ }
+ // TextTemplateAnnotator has no static state. So direct static definition
+ // should be safe.
+ static TextTemplateAnnotator g_default_annotator;
+ return &g_default_annotator;
+}
+
+void PerExpandData::InsertForModifiers(const char* key, const void* value) {
+ if (!map_)
+ map_ = new DataMap;
+ (*map_)[key] = value;
+}
+
+ // Retrieve data specific to this Expand call. Returns NULL if key
+ // is not found. This should only be used by template modifiers.
+const void* PerExpandData::LookupForModifiers(const char* key) const {
+ return map_ ? find_ptr2(*map_, key) : NULL;
+}
+
+}
diff --git a/src/solaris/libstdc++.la b/src/solaris/libstdc++.la
new file mode 100644
index 0000000..3edf425
--- /dev/null
+++ b/src/solaris/libstdc++.la
@@ -0,0 +1,51 @@
+# libstdc++.la - a libtool library file
+# Generated by ltmain.sh - GNU libtool 1.4a-GCC3.0 (1.641.2.256 2001/05/28 20:09:07 with GCC-local changes)
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+# ---
+# NOTE: This file lives in /usr/sfw/lib on Solaris 10. Unfortunately,
+# due to an apparent bug in the Solaris 10 6/06 release,
+# /usr/sfw/lib/libstdc++.la is empty. Below is the correct content,
+# according to
+# http://forum.java.sun.com/thread.jspa?threadID=5073150
+# By passing LDFLAGS='-Lsrc/solaris' to configure, make will pick up
+# this copy of the file rather than the empty copy in /usr/sfw/lib.
+#
+# Also see
+# http://www.technicalarticles.org/index.php/Compiling_MySQL_5.0_on_Solaris_10
+#
+# Note: this is for 32-bit systems. If you have a 64-bit system,
+# uncomment the appropriate dependency_libs line below.
+# ----
+
+# The name that we can dlopen(3).
+dlname='libstdc++.so.6'
+
+# Names of this library.
+library_names='libstdc++.so.6.0.3 libstdc++.so.6 libstdc++.so'
+
+# The name of the static archive.
+old_library='libstdc++.a'
+
+# Libraries that this one depends upon.
+# 32-bit version:
+dependency_libs='-lc -lm -L/usr/sfw/lib -lgcc_s'
+# 64-bit version:
+#dependency_libs='-L/lib/64 -lc -lm -L/usr/sfw/lib/64 -lgcc_s'
+
+# Version information for libstdc++.
+current=6
+age=0
+revision=3
+
+# Is this an already installed library?
+installed=yes
+
+# Files to dlopen/dlpreopen
+dlopen=''
+dlpreopen=''
+
+# Directory that this library needs to be installed in:
+libdir='/usr/sfw/lib'
diff --git a/src/template-converter b/src/template-converter
new file mode 100755
index 0000000..7aedaed
--- /dev/null
+++ b/src/template-converter
@@ -0,0 +1,111 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2001 Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ---
+# Author: Andrew Fikes
+#
+# Tool to convert a template file (.tpl) into a C++ header
+# file with a const string defining the same template. This string
+# can then be used to create/retrieve a template using
+# Template::StringToTemplate()/StringToTemplateCache() (see template.h).
+#
+# Usage: template-converter <template_name> < infile > outfile
+#
+# template_name is the name of the variable we export. A good choice
+# is to pass in the outfile name. To make that easier, we treat
+# <template_name> as a pathname, and take the basename, strip the
+# suffix if it's .h, and sanitize the rest of the name to be a legal
+# C variable name.
+# MOE:insert # NOTE: See doc/index.html for a general description of Google ctemplate.
+
+
+# Store the input argv.
+my $argv = join(" ", $0, @ARGV);
+
+# Open template file
+(my $template_name = shift) || usage("Need to specify template variable name.");
+
+# If a second argument is supplied, treat it as an input filename.
+if (my $infile = shift) {
+ open(STDIN, "<", $infile) or usage("Can't open $infile for reading.");
+}
+
+# If a third argument is supplied, treat it as an output filename.
+if (my $outfile = shift) {
+ open(STDOUT, ">", $outfile) or usage("Can't open $outfile for writing.");
+}
+
+# Get base name of template file
+$base_name = $template_name;
+$base_name =~ s|^.*/([^/]*)$|$1|; # Strip out directory name
+$base_name =~ s|\.h$||; # Strip out suffix, if it's .h
+$base_name =~ tr|A-Za-z0-9_|_|c; # Sanitize name to remove non-letters/nums
+
+# Print header
+print "// This file automatically generated by template-converter:\n";
+print "// $argv\n";
+print "//\n";
+print "// DO NOT EDIT!\n\n";
+print "#ifndef " . uc($base_name) . "_H_\n";
+print "#define " . uc($base_name) . "_H_\n\n";
+print "#include <string>\n\n";
+
+# Read in template file and print template as a string
+# MOE:begin_strip
+print "const string ${base_name} (\n";
+# MOE:end_strip_and_replace print "const std::string ${base_name} (\n";
+while (<>) {
+ chomp;
+ my $escaped_line = escape_line($_);
+ print "\"$escaped_line\\n\"\n";
+}
+print ");\n\n";
+
+# Print footer and exit
+print "#endif /* " . uc($base_name) . "_H_ */\n";
+exit(0);
+
+# Prints usage message
+sub usage {
+ my $msg = shift;
+ print STDERR "\n$msg\n";
+ print STDERR "Usage: template-converter <template-varname>",
+ " [infile] [outfile]\n\n";
+ exit(1);
+}
+
+# Escapes line (adds a '\' to quotes and possible control characters)
+sub escape_line {
+ (my $line) = (@_);
+ $line =~ s|\\|\\\\|g;
+ $line =~ s|\"|\\"|g;
+ return $line;
+}
diff --git a/src/template.cc b/src/template.cc
new file mode 100644
index 0000000..de20d06
--- /dev/null
+++ b/src/template.cc
@@ -0,0 +1,2777 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+
+#include <config.h>
+#include "base/mutex.h" // This must go first so we get _XOPEN_SOURCE
+#include <ctemplate/template.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h> // for fwrite, fflush
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif // for stat() and open() and getcwd()
+#include <algorithm> // for binary_search()
+#include <functional> // for binary_function()
+#include HASH_MAP_H
+#include <iterator>
+#include <list>
+#include <string>
+#include <utility> // for pair
+#include <vector>
+
+#include "base/thread_annotations.h"
+#include "htmlparser/htmlparser_cpp.h"
+#include <ctemplate/per_expand_data.h>
+#include <ctemplate/template_annotator.h>
+#include <ctemplate/template_cache.h>
+#include <ctemplate/template_dictionary.h>
+#include <ctemplate/template_dictionary_interface.h> // also gets kIndent
+#include <ctemplate/template_modifiers.h>
+#include "template_modifiers_internal.h"
+#include <ctemplate/template_pathops.h>
+#include <ctemplate/template_string.h>
+#include "base/fileutil.h"
+#include <ctype.h>
+#include <iostream>
+#include <sstream> // for ostringstream
+
+#ifndef PATH_MAX
+#ifdef MAXPATHLEN
+#define PATH_MAX MAXPATHLEN
+#else
+#define PATH_MAX 4096 // seems conservative for max filename len!
+#endif
+#endif
+
+#define arraysize(x) ( sizeof(x) / sizeof(*(x)) )
+
+#define AS_STR1(x) #x
+#define AS_STR(x) AS_STR1(x)
+
+// A very simple logging system
+#undef LOG // a non-working version is provided in base/util.h; redefine it
+static int kVerbosity = 0; // you can change this by hand to get vlogs
+#define LOG(level) std::cerr << #level ": "
+#define VLOG(level) if (kVerbosity >= level) LOG(level)
+
+// TODO(csilvers): use our own tables for these?
+static bool ascii_isalnum(char c) {
+ return ((c & 0x80) == 0) && isalnum(c); // 7-bit ascii, and an alnum
+}
+
+static bool ascii_isspace(char c) {
+ return ((c & 0x80) == 0) && isspace(c); // 7-bit ascii, and a space
+}
+
+#define strsuffix(str, suffix) \
+ ( strlen(str) > (sizeof("" suffix "") - 1) && \
+ strcmp(str + strlen(str) - (sizeof(suffix) - 1), suffix) == 0 )
+
+using std::endl;
+using std::string;
+using std::list;
+using std::vector;
+using std::pair;
+using std::binary_search;
+#ifdef HAVE_UNORDERED_MAP
+using HASH_NAMESPACE::unordered_map;
+// This is totally cheap, but minimizes the need for #ifdef's below...
+#define hash_map unordered_map
+#else
+using HASH_NAMESPACE::hash_map;
+#endif
+
+namespace ctemplate {
+
+using ctemplate_htmlparser::HtmlParser;
+
+TemplateId GlobalIdForSTS_INIT(const TemplateString& s) {
+ return s.GetGlobalId(); // normally this method is private
+}
+
+int Template::num_deletes_ = 0;
+
+namespace {
+// Mutex for protecting Expand calls against ReloadIfChanged, which
+// might change a template while it's being expanded. This mutex used
+// to be a per-template mutex, rather than a global mutex, which seems
+// like it would be strictly better, but we ran into subtle problems
+// with deadlocks when a template would sub-include itself (thus
+// requiring a recursive read-lock during Expand), and the template
+// was Expanded and ReloadIfChanged at the same time. Rather than
+// deal with that complication, we just go with a global mutex. Since
+// ReloadIfChanged is deprecated, in most applications all the mutex
+// uses will be as read-locks, so this shouldn't cause much contention.
+static Mutex g_template_mutex(base::LINKER_INITIALIZED);
+
+// Mutex for protecting vars_seen in WriteOneHeaderEntry, below.
+// g_template_mutex and g_header_mutex are never held at the same time.
+// TODO(csilvers): assert this in the codebase.
+static Mutex g_header_mutex(base::LINKER_INITIALIZED);
+
+// It's not great to have a global variable with a constructor, but
+// it's safe in this case: the constructor is trivial and does not
+// depend on any other global constructors running first, and the
+// variable is used in only one place below, always after main() has
+// started.
+// It is ok for this modifier to be in XssClass XSS_WEB_STANDARD because
+// it only adds indentation characters - typically whitespace - iff these
+// are already present in the text. If such characters were XSS-harmful
+// in a given context, they would have already been escaped or replaced
+// by earlier escaping such as H=attribute.
+static const ModifierInfo g_prefix_line_info("", '\0', XSS_WEB_STANDARD,
+ &prefix_line);
+
+const char * const kDefaultTemplateDirectory = kCWD; // "./"
+// Note this name is syntactically impossible for a user to accidentally use.
+const char * const kMainSectionName = "__{{MAIN}}__";
+
+// A TemplateString object that precomputes its hash. This can be
+// useful in places like template filling code, where we'd like to
+// hash the string once then reuse it many times. This should not be
+// used for filling any part of a template dictionary, since we don't
+// map the id to its corresponding string or manage memory for the
+// string - it is for lookups *only*.
+class HashedTemplateString : public TemplateString {
+ public:
+ HashedTemplateString(const char* s, size_t slen) : TemplateString(s, slen) {
+ CacheGlobalId();
+ }
+};
+
+#define LOG_TEMPLATE_NAME(severity, template) \
+ LOG(severity) << "Template " << template->template_file() << ": "
+
+#define LOG_AUTO_ESCAPE_ERROR(error_msg, my_template) do { \
+ LOG_TEMPLATE_NAME(ERROR, my_template); \
+ LOG(ERROR) << "Auto-Escape: " << error_msg << endl; \
+ } while (0)
+
+// We are in auto-escape mode.
+#define AUTO_ESCAPE_MODE(context) ((context) != TC_MANUAL)
+
+// Auto-Escape contexts which utilize the HTML Parser.
+#define AUTO_ESCAPE_PARSING_CONTEXT(context) \
+ ((context) == TC_HTML || (context) == TC_JS || (context) == TC_CSS)
+
+// ----------------------------------------------------------------------
+// PragmaId
+// PragmaDefinition
+// PragmaMarker
+// Functionality to support the PRAGMA marker in the template, i.e
+// the {{%IDENTIFIER [name1="value1" [name2="value2"]...]}} syntax:
+// . IDENTIFIER as well as all attribute names are case-insensitive
+// whereas attribute values are case-sensitive.
+// . No extraneous whitespace is allowed (e.g. between name and '=').
+// . Double quotes inside an attribute value need to be backslash
+// escaped, i.e. " -> \". We unescape them during parsing.
+//
+// The only identifier currently supported is AUTOESCAPE which is
+// used to auto-escape a given template. Its syntax is:
+// {{%AUTOESCAPE context="context" [state="state"]}} where:
+// . context is one of: "HTML", "JAVASCRIPT", "CSS", "XML", "JSON".
+// . state may be omitted or equivalently, it may be set to "default".
+// It also accepts the value "IN_TAG" in the HTML context to
+// indicate the template contains HTML attribute name/value
+// pairs that are enclosed in a tag specified in a parent template.
+// e.g: Consider the parent template:
+// <a href="/bla" {{>INC}}>text</a>
+// and the included template:
+// class="{{CLASS}}" target="{{TARGET}}"
+// Then, for the included template to be auto-escaped properly, it
+// must have the pragma: {{%AUTOESCAPE context="HTML" state="IN_TAG"}}.
+// This is a very uncommon template structure.
+//
+// To add a new pragma identifier, you'll have to at least:
+// 1. Add a new id for it in PragmaId enum.
+// 2. Add the corresponding definition in static g_pragmas array
+// 3. If you accept more than 2 attributes, increase the size
+// of attribute_names in the PragmaDefinition struct.
+// 4. Add handling of that pragma in SectionTemplateNode::GetNextToken()
+// and possibly SectionTemplateNode::AddPragmaNode()
+// ----------------------------------------------------------------------
+
+// PragmaId
+// Identify all the pragma identifiers we support. Currently only
+// one (for AutoEscape). PI_ERROR is only for internal error reporting,
+// and is not a valid pragma identifier.
+enum PragmaId { PI_UNUSED, PI_ERROR, PI_AUTOESCAPE, NUM_PRAGMA_IDS };
+
+// Each pragma definition has a unique identifier as well as a list of
+// attribute names it accepts. This allows initial error checking while
+// parsing a pragma definition. Such error checking will need supplementing
+// with more pragma-specific logic in SectionTemplateNode::GetNextToken().
+static struct PragmaDefinition {
+ PragmaId pragma_id;
+ const char* identifier;
+ const char* attribute_names[2]; // Increase as needed.
+} g_pragmas[NUM_PRAGMA_IDS] = {
+ /* PI_UNUSED */ { PI_UNUSED, NULL, {} },
+ /* PI_ERROR */ { PI_ERROR, NULL, {} },
+ /* PI_AUTOESCAPE */ { PI_AUTOESCAPE, "AUTOESCAPE", {"context", "state"} }
+};
+
+// PragmaMarker
+// Functionality to parse the {{%...}} syntax and extract the
+// provided attribute values. We store the PragmaId as well
+// as a vector of all the attribute names and values provided.
+class PragmaMarker {
+ public:
+ // Constructs a PragmaMarker object from the PRAGMA marker
+ // {{%ID [[name1=\"value1"] ...]}}. On error (unable to parse
+ // the marker), returns an error description in error_msg. On
+ // success, error_msg is cleared.
+ PragmaMarker(const char* token_start, const char* token_end,
+ string* error_msg);
+
+ // Returns the attribute value for the corresponding attribute name
+ // or NULL if none is found (as is the case with optional attributes).
+ // Ensure you only call it on attribute names registered in g_pragmas
+ // for that PragmaId.
+ const string* GetAttributeValue(const char* attribute_name) const;
+
+ private:
+ // Checks that the identifier given matches one of the pragma
+ // identifiers we know of, in which case returns the corresponding
+ // PragmaId. In case of error, returns PI_ERROR.
+ static PragmaId GetPragmaId(const char* id, size_t id_len);
+
+ // Parses an attribute value enclosed in double quotes and updates
+ // value_end to point at ending double quotes. Returns the attribute
+ // value. If an error occurred, error_msg is set with information.
+ // It is cleared on success.
+ // Unescapes backslash-escaped double quotes ('\"' -> '"') if present.
+ static string ParseAttributeValue(const char* value_start,
+ const char** value_end,
+ string* error_msg);
+
+ // Returns true if the attribute name is an accepted one for that
+ // given PragmaId. Otherwise returns false.
+ static bool IsValidAttribute(PragmaId pragma_id, const char* name,
+ size_t namelen);
+
+ PragmaId pragma_id_;
+ // A vector of attribute (name, value) pairs.
+ vector<pair<string, string> > names_and_values_;
+};
+
+PragmaId PragmaMarker::GetPragmaId(const char* id, size_t id_len) {
+ for (int i = 0; i < NUM_PRAGMA_IDS; ++i) {
+ if (g_pragmas[i].identifier == NULL) // PI_UNUSED, PI_ERROR
+ continue;
+ if ((strlen(g_pragmas[i].identifier) == id_len) &&
+ (strncasecmp(id, g_pragmas[i].identifier, id_len) == 0))
+ return g_pragmas[i].pragma_id;
+ }
+ return PI_ERROR;
+}
+
+bool PragmaMarker::IsValidAttribute(PragmaId pragma_id, const char* name,
+ size_t namelen) {
+ const int kMaxAttributes = sizeof(g_pragmas[0].attribute_names) /
+ sizeof(*g_pragmas[0].attribute_names);
+ for (int i = 0; i < kMaxAttributes; ++i) {
+ const char* attr_name = g_pragmas[pragma_id].attribute_names[i];
+ if (attr_name == NULL)
+ break;
+ if ((strlen(attr_name) == namelen) &&
+ (strncasecmp(attr_name, name, namelen) == 0))
+ // We found the given name in our accepted attribute list.
+ return true;
+ }
+ return false; // We did not find the name.
+}
+
+const string* PragmaMarker::GetAttributeValue(
+ const char* attribute_name) const {
+ // Developer error if assert triggers.
+ assert(IsValidAttribute(pragma_id_, attribute_name, strlen(attribute_name)));
+ for (vector<pair<string, string> >::const_iterator it =
+ names_and_values_.begin(); it != names_and_values_.end(); ++it) {
+ if (strcasecmp(attribute_name, it->first.c_str()) == 0)
+ return &it->second;
+ }
+ return NULL;
+}
+
+string PragmaMarker::ParseAttributeValue(const char* value_start,
+ const char** value_end,
+ string* error_msg) {
+ assert(error_msg);
+ if (*value_start != '"') {
+ error_msg->append("Attribute value is not enclosed in double quotes.");
+ return "";
+ }
+ const char* current = ++value_start; // Advance past the leading '"'
+ const char* val_end;
+ do {
+ if (current >= *value_end ||
+ ((val_end =
+ (const char*)memchr(current, '"', *value_end - current)) == NULL)) {
+ error_msg->append("Attribute value not terminated.");
+ return "";
+ }
+ current = val_end + 1; // Advance past the current '"'
+ } while (val_end[-1] == '\\');
+
+ string attribute_value(value_start, val_end - value_start);
+ // Now replace \" with "
+ size_t found;
+ while ((found = attribute_value.find("\\\"")) != string::npos)
+ attribute_value.erase(found, 1);
+ *value_end = val_end;
+ error_msg->clear();
+ return attribute_value;
+}
+
+PragmaMarker::PragmaMarker(const char* token_start, const char* token_end,
+ string* error_msg) {
+ assert(error_msg);
+ string error;
+ const char* identifier_end =
+ (const char*)memchr(token_start, ' ', token_end - token_start);
+ if (identifier_end == NULL)
+ identifier_end = token_end;
+ pragma_id_ = PragmaMarker::GetPragmaId(token_start,
+ identifier_end - token_start);
+ if (pragma_id_ == PI_ERROR) {
+ error = "Unrecognized pragma identifier.";
+ } else {
+ const char* val_end;
+ // Loop through attribute name/value pairs.
+ for (const char* nameval = identifier_end; nameval < token_end;
+ nameval = val_end + 1) {
+ // Either after identifier or afer a name/value pair. Must be whitespace.
+ if (*nameval++ != ' ') {
+ error = "Extraneous text.";
+ break;
+ }
+ const char* val = (const char*)memchr(nameval, '=', token_end - nameval);
+ if (val == NULL || val == nameval) {
+ error = "Missing attribute name or value";
+ break;
+ }
+ const string attribute_name(nameval, val - nameval);
+ if (!PragmaMarker::IsValidAttribute(pragma_id_, attribute_name.data(),
+ attribute_name.length())) {
+ error = "Unrecognized attribute name: " + attribute_name;
+ break;
+ }
+ ++val; // Advance past '='
+ val_end = token_end;
+ const string attribute_value = ParseAttributeValue(val, &val_end, &error);
+ if (!error.empty()) // Failed to parse attribute value.
+ break;
+ names_and_values_.push_back(pair<const string, const string>(
+ attribute_name, attribute_value));
+ }
+ }
+ if (error.empty()) // Success
+ error_msg->clear();
+ else // Error
+ error_msg->append("In PRAGMA directive '" +
+ string(token_start, token_end - token_start) +
+ "' Error: " + error);
+}
+
+// ----------------------------------------------------------------------
+// memmatch()
+// Return a pointer to the first occurrences of the given
+// length-denominated string, inside a bigger length-denominated
+// string, or NULL if not found. The mem version of strstr.
+// ----------------------------------------------------------------------
+
+static const char *memmatch(const char *haystack, size_t haystack_len,
+ const char *needle, size_t needle_len) {
+ if (needle_len == 0)
+ return haystack; // even if haystack_len is 0
+ else if (needle_len > haystack_len)
+ return NULL;
+
+ const char* match;
+ const char* hayend = haystack + haystack_len - needle_len + 1;
+ while ((match = (const char*)memchr(haystack, needle[0],
+ hayend - haystack))) {
+ if (memcmp(match, needle, needle_len) == 0)
+ return match;
+ else
+ haystack = match + 1;
+ }
+ return NULL;
+}
+
+// ----------------------------------------------------------------------
+// FilenameValidForContext()
+// GetTemplateContextFromPragma()
+// GetModifierForContext()
+// FindLongestMatch()
+// PrettyPrintTokenModifiers()
+// Static methods for the auto-escape mode specifically.
+
+// Perfoms matching of filename against the TemplateContext
+// and warns in the log on mismatch using "unwritten" filename
+// conventions below for templates in our codebase:
+// 1. If filename contains "css", "stylesheet" or "style"
+// check that it has type TC_CSS.
+// 2. If filename contains "js" or "javascript" check that
+// it has type TC_JS.
+// Returns false if there was a mismatch although currently
+// we ignore it and just rely on the LOG(WARNING) in the logs.
+static bool FilenameValidForContext(const string& filename,
+ TemplateContext context) {
+ string stripped_filename = Basename(filename);
+
+ if (GOOGLE_NAMESPACE::ContainsFullWord(stripped_filename, "css") ||
+ GOOGLE_NAMESPACE::ContainsFullWord(stripped_filename, "stylesheet") ||
+ GOOGLE_NAMESPACE::ContainsFullWord(stripped_filename, "style")) {
+ if (context != TC_CSS) {
+ LOG(WARNING) << "Template filename " << filename
+ << " indicates CSS but given TemplateContext"
+ << " was not TC_CSS." << endl;
+ return false;
+ }
+ } else if (GOOGLE_NAMESPACE::ContainsFullWord(stripped_filename, "js") ||
+ GOOGLE_NAMESPACE::ContainsFullWord(stripped_filename, "javascript")) {
+ if (context != TC_JS) {
+ LOG(WARNING) << "Template filename " << filename
+ << " indicates javascript but given TemplateContext"
+ << " was not TC_JS." << endl;
+ return false;
+ }
+ }
+ return true;
+}
+
+// Returns a string containing a human-readable description of
+// the modifiers in the vector. The format is:
+// :modifier1[=val1][:modifier2][=val2]...
+static string PrettyPrintTokenModifiers(
+ const vector<ModifierAndValue>& modvals) {
+ string out;
+ for (vector<ModifierAndValue>::const_iterator it =
+ modvals.begin(); it != modvals.end(); ++it) {
+ string one_mod = PrettyPrintOneModifier(*it);
+ out.append(one_mod);
+ }
+ return out;
+}
+
+// Returns the TemplateContext corresponding to the "context" attribute
+// of the AUTOESCAPE pragma. Returns TC_MANUAL to indicate an error,
+// meaning an invalid context was given in the pragma.
+static TemplateContext GetTemplateContextFromPragma(
+ const PragmaMarker& pragma) {
+ const string* context = pragma.GetAttributeValue("context");
+ if (context == NULL)
+ return TC_MANUAL;
+ if (*context == "HTML" || *context == "html")
+ return TC_HTML;
+ else if (*context == "JAVASCRIPT" || *context == "javascript")
+ return TC_JS;
+ else if (*context == "CSS" || *context == "css")
+ return TC_CSS;
+ else if (*context == "JSON" || *context == "json")
+ return TC_JSON;
+ else if (*context == "XML" || *context == "xml")
+ return TC_XML;
+ return TC_MANUAL;
+}
+
+// Based on the state of the parser, determines the appropriate escaping
+// directive and returns a pointer to the corresponding
+// global ModifierAndValue vector. Called when a variable template node
+// is traversed.
+// Returns NULL if there is no suitable modifier for that context in
+// which the case the caller is expected to fail the template initialization.
+static const vector<const ModifierAndValue*> GetModifierForContext(
+ TemplateContext my_context, HtmlParser *htmlparser,
+ const Template* my_template) {
+ assert(AUTO_ESCAPE_MODE(my_context));
+ vector<const ModifierAndValue*> modvals;
+ string error_msg;
+
+ switch (my_context) {
+ case TC_XML:
+ modvals = GetModifierForXml(htmlparser, &error_msg);
+ break;
+ case TC_JSON:
+ modvals = GetModifierForJson(htmlparser, &error_msg);
+ break;
+ case TC_CSS:
+ assert(htmlparser); // Parser is active in CSS
+ modvals = GetModifierForCss(htmlparser, &error_msg);
+ break;
+ default:
+ // Must be in TC_HTML or TC_JS. Parser is active in these modes.
+ assert(AUTO_ESCAPE_PARSING_CONTEXT(my_context));
+ assert(htmlparser);
+ modvals = GetModifierForHtmlJs(htmlparser, &error_msg);
+ }
+ // In any mode, there should be at least one modifier.
+ if (modvals.empty())
+ LOG_AUTO_ESCAPE_ERROR(error_msg, my_template);
+ return modvals;
+}
+
+// Returns the largest int N indicating how many XSS safe alternative
+// modifiers are in the in-template modifiers already.
+// . If N is equal to the number of modifiers determined by the Auto Escaper,
+// we have a full match and the in-template modifiers were safe. We leave
+// them untouched.
+// . Otherwise, N is less (or zero) and we have a partial match (or none).
+// The in-template modifiers are not XSS safe and need the missing ones,
+// i.e. those in the auto escape modifiers which are not in the first N.
+//
+// We allow in-template modifiers to have extra modifiers than we deem
+// necessary, for e.g. :j:h when :j would have sufficed. But to make sure
+// these modifiers do not introduce XSS concerns we require that they
+// be in the same XssClass as the modifier we had.
+// For example :h:x-bla is not safe in HTML context because x-bla is
+// in a different XssClass as our :h whereas :h:j would be safe.
+static size_t FindLongestMatch(
+ const vector<ModifierAndValue>& modvals_man,
+ const vector<const ModifierAndValue*>& modvals_auto) {
+ if (modvals_auto.empty())
+ return 0;
+
+ // See if modvals_auto is "consistent" with the modifiers that are
+ // already present (modvals_man). This is true if all the
+ // modifiers in auto also occur in man, and any gaps between them
+ // (if any) are filled by "neutral" modifiers that do not affect
+ // xss-safety. We go through the vectors backwards.
+ // If all of modvals_auto is not consistent, maybe a prefix of it
+ // is; that's better than nothing, since we only need to auto-apply
+ // the suffix that's not already in modvals_man.
+ typedef vector<const ModifierAndValue*>::const_reverse_iterator
+ ModAutoIterator;
+ typedef vector<ModifierAndValue>::const_reverse_iterator ModManIterator;
+ for (ModAutoIterator end_of_prefix = modvals_auto.rbegin();
+ end_of_prefix != modvals_auto.rend();
+ ++end_of_prefix) {
+ ModAutoIterator curr_auto = end_of_prefix;
+ ModManIterator curr_man = modvals_man.rbegin();
+ while (curr_auto != modvals_auto.rend() &&
+ curr_man != modvals_man.rend()) {
+ if (IsSafeXSSAlternative(*(*curr_auto)->modifier_info,
+ *curr_man->modifier_info)) {
+ ++curr_auto;
+ ++curr_man;
+ } else if ((curr_man->modifier_info->xss_class ==
+ (*curr_auto)->modifier_info->xss_class) &&
+ (curr_man->modifier_info->xss_class != XSS_UNIQUE)) {
+ ++curr_man; // Ignore this modifier: it's harmless.
+ } else {
+ break; // An incompatible modifier; we've failed
+ }
+ }
+ if (curr_auto == modvals_auto.rend()) // got through them all, full match!
+ return curr_auto - end_of_prefix;
+ }
+ return 0;
+}
+
+// ----------------------------------------------------------------------
+// WriteOneHeaderEntry()
+// This dumps information about a template that is useful to
+// make_tpl_varnames_h -- information about the variable and
+// section names used in a template, so we can define constants
+// to refer to them instead of having to type them in by hand.
+// Output is *appended* to outstring.
+// ----------------------------------------------------------------------
+
+static void WriteOneHeaderEntry(
+ string *outstring, const string& variable, const string& full_pathname)
+ LOCKS_EXCLUDED(g_header_mutex) {
+ MutexLock ml(&g_header_mutex);
+
+ // we use hash_map instead of hash_set just to keep the stl size down
+ static hash_map<string, bool, StringHash> vars_seen
+ GUARDED_BY(g_header_mutex);
+ static string current_file GUARDED_BY(g_header_mutex);
+ static string prefix GUARDED_BY(g_header_mutex);
+
+ if (full_pathname != current_file) {
+ // changed files so re-initialize the static variables
+ vars_seen.clear();
+ current_file = full_pathname;
+
+ // remove the path before the filename
+ string filename(Basename(full_pathname));
+
+ prefix = "k";
+ bool take_next = true;
+
+ for (string::size_type i = 0; i < filename.length(); i++) {
+ if (filename[i] == '.') {
+ // stop when we find the dot
+ break;
+ }
+ if (take_next) {
+ if (filename.substr(i, 4) == "post") {
+ // stop before we process post...
+ break;
+ }
+ prefix = prefix + filename[i];
+ take_next = false;
+ }
+ if (filename[i] == '_') {
+ take_next = true;
+ }
+ }
+ prefix = prefix + "_";
+ }
+
+ // print out the variable, but only if we haven't seen it before.
+ if (!vars_seen.count(variable)) {
+ if (variable == kMainSectionName || variable.find("BI_") == 0) {
+ // We don't want to write entries for __MAIN__ or the built-ins
+ } else {
+ const TemplateId id = GlobalIdForSTS_INIT(TemplateString(variable));
+ std::ostringstream outstream;
+ outstream << "static const "
+ << AS_STR(GOOGLE_NAMESPACE) << "::StaticTemplateString "
+ << prefix << variable << " = STS_INIT_WITH_HASH("
+ << prefix << variable << ", \"" << variable << "\", "
+ << id << "ULL);\n";
+ outstring->append(outstream.str());
+ }
+ vars_seen[variable] = true;
+ }
+}
+
+// ----------------------------------------------------------------------
+// TemplateToken
+// A TemplateToken is a string marked with a token type enum. The string
+// has different meanings for different token types. For text, the
+// string is the text itself. For variable and template types, the
+// string is the name of the variable holding the value or the
+// template name, resp. For section types, the string is the name
+// of the section, used to retrieve the hidden/visible state and
+// the associated list of dictionaries, if any. For pragma type,
+// the string is the full text of the marker and is only used for
+// debug information.
+// ----------------------------------------------------------------------
+
+enum TemplateTokenType { TOKENTYPE_UNUSED, TOKENTYPE_TEXT,
+ TOKENTYPE_VARIABLE, TOKENTYPE_SECTION_START,
+ TOKENTYPE_SECTION_END, TOKENTYPE_TEMPLATE,
+ TOKENTYPE_COMMENT, TOKENTYPE_SET_DELIMITERS,
+ TOKENTYPE_PRAGMA, TOKENTYPE_NULL,
+ TOKENTYPE_HIDDEN_DEFAULT_SECTION,
+ };
+
+} // unnamed namespace
+
+// A sorted array of Template variable names that Auto-Escape should
+// not escape. Variables that you may want to add here typically
+// satisfy all the following conditions:
+// 1. Are "trusted" variables, meaning variables you know to not
+// contain potentially harmful content.
+// 2. Contain some markup that gets broken when escaping is
+// applied to them.
+// 3. Are used often such that requiring developers to add
+// ":none" to each use is error-prone and inconvenient.
+//
+// Note: Keep this array sorted as you add new elements!
+//
+const char * const Template::kSafeWhitelistedVariables[] = {
+ "" // a placekeeper element: replace with your real values!
+};
+const size_t Template::kNumSafeWhitelistedVariables =
+ arraysize(Template::kSafeWhitelistedVariables);
+
+// A TemplateToken is a typed string. The semantics of the string depends on the
+// token type, as follows:
+// TOKENTYPE_TEXT - the text
+// TOKENTYPE_VARIABLE - the name of the variable
+// TOKENTYPE_SECTION_START - the name of the section being started
+// TOKENTYPE_SECTION_END - the name of the section being ended
+// TOKENTYPE_TEMPLATE - the name of the variable whose value will be
+// the template filename
+// TOKENTYPE_COMMENT - the empty string, not used
+// TOKENTYPE_SET_DELIMITERS- the empty string, not used
+// TOKENTYPE_PRAGMA - identifier and optional set of name/value pairs
+// - exactly as given in the template
+// TOKENTYPE_NULL - the empty string
+// TOKENTYPE_HIDDEN_DEFAULT_SECTION
+// - like TOKENTYPE_SECTION_START, but defaults to
+// hidden
+// All non-comment tokens may also have modifiers, which follow the name
+// of the token: the syntax is {{<PREFIX><NAME>:<mod>:<mod>:<mod>...}}
+// The modifiers are also stored as a string, starting with the first :
+struct TemplateToken {
+ TemplateTokenType type;
+ const char* text;
+ size_t textlen;
+ vector<ModifierAndValue> modvals;
+ TemplateToken(TemplateTokenType t, const char* txt, size_t len,
+ const vector<ModifierAndValue>* m)
+ : type(t), text(txt), textlen(len) {
+ if (m) modvals = *m;
+ }
+
+ string ToString() const { // used for debugging (annotations)
+ string retval(text, textlen);
+ for (vector<ModifierAndValue>::const_iterator it = modvals.begin();
+ it != modvals.end(); ++it) {
+ const string& modname = it->modifier_info->long_name;
+ retval += string(":") + modname;
+ if (!it->modifier_info->is_registered)
+ retval += "<not registered>";
+ }
+ return retval;
+ }
+
+ // Updates the correct modifiers for the token (variable or template node)
+ // based on our computed modifiers from the HTML parser context as well
+ // as the in-template modifiers that may have been provided.
+ // If the in-template modifiers are considered safe, we use them
+ // without modification. This could happen in one of three cases:
+ // 1. The token has the ":none" modifier as one of the modifiers.
+ // 2. The token has a custom modifier considered XSS-Safe as one of
+ // the modifiers. The modifier was added via AddXssSafeModifier()
+ // and has the XSS_SAFE XssClass.
+ // 3. The escaping modifiers are XSS-equivalent to the ones we computed.
+ //
+ // If the in-template modifiers are not found to be safe, we add
+ // the escaping modifiers we determine missing. This is done based on a
+ // longest match search between the two modifiers vectors, refer to comment
+ // in FindLongestMatch. We also issue a warning in the log, unless the
+ // in-template modifiers were all not escaping related (e.g. custom)
+ // since that case is similar to that of not providing any modifiers.
+ void UpdateModifier(const vector<const ModifierAndValue*>& auto_modvals) {
+ // Common case: no modifiers given in template. Assign our own. No warning.
+ if (modvals.empty()) {
+ for (vector<const ModifierAndValue*>::const_iterator it
+ = auto_modvals.begin(); it != auto_modvals.end(); ++it) {
+ modvals.push_back(**it);
+ }
+ return;
+ }
+
+ // Look for any XSS-Safe modifiers (added via AddXssSafeModifier or :none).
+ // If one is found anywhere in the vector, consider the variable safe.
+ for (vector<ModifierAndValue>::const_iterator it = modvals.begin();
+ it != modvals.end(); ++it) {
+ if (it->modifier_info->xss_class == XSS_SAFE)
+ return;
+ }
+
+ size_t longest_match = FindLongestMatch(modvals, auto_modvals);
+ if (longest_match == auto_modvals.size()) {
+ return; // We have a complete match, nothing to do.
+ } else { // Copy missing ones and issue warning.
+ assert(longest_match >= 0 && longest_match < auto_modvals.size());
+ // We only log if one or more of the in-template modifiers was
+ // escaping-related which we infer from the XssClass. Currently,
+ // all escaping modifiers are in XSS_WEB_STANDARD except for 'none'
+ // but that one is handled above.
+ bool do_log = false;
+ for (vector<ModifierAndValue>::const_iterator it = modvals.begin();
+ it != modvals.end(); ++it) {
+ if (it->modifier_info->xss_class == XSS_WEB_STANDARD) {
+ do_log = true;
+ break;
+ }
+ }
+ string before = PrettyPrintTokenModifiers(modvals); // for logging
+ for (vector<const ModifierAndValue*>::const_iterator it
+ = auto_modvals.begin() + longest_match;
+ it != auto_modvals.end(); ++it) {
+ modvals.push_back(**it);
+ }
+ if (do_log)
+ LOG(ERROR)
+ << "Token: " << string(text, textlen)
+ << " has missing in-template modifiers. You gave " << before
+ << " and we computed " << PrettyPrintModifiers(auto_modvals, "")
+ << ". We changed to " << PrettyPrintTokenModifiers(modvals) << endl;
+ }
+ }
+};
+
+static bool AnyMightModify(const vector<ModifierAndValue>& modifiers,
+ const PerExpandData* data) {
+ for (vector<ModifierAndValue>::const_iterator it = modifiers.begin();
+ it != modifiers.end(); ++it) {
+ string value_string(it->value, it->value_len);
+ if (it->modifier_info->modifier->MightModify(data, value_string)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// This applies the modifiers to the string in/inlen, and writes the end
+// result directly to the end of outbuf. Precondition: |modifiers| > 0.
+//
+// TODO(user): In the case of multiple modifiers, we are applying
+// all of them if any of them MightModify the output. We can do
+// better. We should store the MightModify values that we use to
+// compute AnyMightModify and respect them here.
+static void EmitModifiedString(const vector<ModifierAndValue>& modifiers,
+ const char* in, size_t inlen,
+ const PerExpandData* data,
+ ExpandEmitter* outbuf) {
+ string result;
+ string value_string;
+ if (modifiers.size() > 1) {
+ // If there's more than one modifiers, we need to store the
+ // intermediate results in a temp-buffer. We use a string.
+ // We'll assume that each modifier adds about 12% to the input
+ // size.
+ result.reserve((inlen + inlen/8) + 16);
+ StringEmitter scratchbuf(&result);
+ value_string = string(modifiers.front().value, modifiers.front().value_len);
+ modifiers.front().modifier_info->modifier->Modify(in, inlen, data,
+ &scratchbuf,
+ value_string);
+ // Only used when modifiers.size() > 2
+ for (vector<ModifierAndValue>::const_iterator it = modifiers.begin() + 1;
+ it != modifiers.end()-1; ++it) {
+ string output_of_this_modifier;
+ output_of_this_modifier.reserve(result.size() + result.size()/8 + 16);
+ StringEmitter scratchbuf2(&output_of_this_modifier);
+ value_string = string(it->value, it->value_len);
+ it->modifier_info->modifier->Modify(result.c_str(), result.size(), data,
+ &scratchbuf2, value_string);
+ result.swap(output_of_this_modifier);
+ }
+ in = result.data();
+ inlen = result.size();
+ }
+ // For the last modifier, we can write directly into outbuf
+ assert(!modifiers.empty());
+ value_string = string(modifiers.back().value, modifiers.back().value_len);
+ modifiers.back().modifier_info->modifier->Modify(in, inlen, data, outbuf,
+ value_string);
+}
+
+static void AppendTokenWithIndent(int level, string *out, const string& before,
+ const TemplateToken& token,
+ const string& after) {
+ out->append(string(level * kIndent, ' '));
+ string token_string(token.text, token.textlen);
+ out->append(before + token_string + after);
+}
+
+// ----------------------------------------------------------------------
+// TemplateNode
+// When we read a template, we decompose it into its components:
+// variables, sections, include-templates, and runs of raw text.
+// Each of these we see becomes one TemplateNode. TemplateNode
+// is the abstract base class; each component has its own type.
+// ----------------------------------------------------------------------
+
+class TemplateNode {
+ public:
+ TemplateNode() {}
+ virtual ~TemplateNode() {}
+
+ // Expands the template node using the supplied dictionary. The
+ // result is placed into output_buffer. If
+ // per_expand_data->annotate() is true, the output is annotated.
+ // Returns true iff all the template files load and parse correctly.
+ virtual bool Expand(ExpandEmitter *output_buffer,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData *per_expand_data,
+ const TemplateCache *cache) const = 0;
+
+ // Writes entries to a header file to provide syntax checking at
+ // compile time.
+ virtual void WriteHeaderEntries(string *outstring,
+ const string& filename) const = 0;
+
+ // Appends a representation of the node and its subnodes to a string
+ // as a debugging aid.
+ virtual void DumpToString(int level, string *out) const = 0;
+
+ protected:
+ typedef list<TemplateNode *> NodeList;
+
+ private:
+ TemplateNode(const TemplateNode&); // disallow copying
+ void operator=(const TemplateNode&);
+};
+
+// ----------------------------------------------------------------------
+// TextTemplateNode
+// The simplest template-node: it holds runs of raw template text,
+// that should be emitted verbatim. The text points into
+// template_text_.
+// ----------------------------------------------------------------------
+
+class TextTemplateNode : public TemplateNode {
+ public:
+ explicit TextTemplateNode(const TemplateToken& token)
+ : token_(token) {
+ VLOG(2) << "Constructing TextTemplateNode: "
+ << string(token_.text, token_.textlen) << endl;
+ }
+ virtual ~TextTemplateNode() {
+ VLOG(2) << "Deleting TextTemplateNode: "
+ << string(token_.text, token_.textlen) << endl;
+ }
+
+ // Expands the text node by simply outputting the text string. This
+ // virtual method does not use TemplateDictionaryInterface or PerExpandData.
+ // Returns true iff all the template files load and parse correctly.
+ virtual bool Expand(ExpandEmitter *output_buffer,
+ const TemplateDictionaryInterface *,
+ PerExpandData *,
+ const TemplateCache *) const {
+ output_buffer->Emit(token_.text, token_.textlen);
+ return true;
+ }
+
+ // A noop for text nodes
+ virtual void WriteHeaderEntries(string *outstring,
+ const string& filename) const {
+ return;
+ }
+
+ // Appends a representation of the text node to a string.
+ virtual void DumpToString(int level, string *out) const {
+ assert(out);
+ AppendTokenWithIndent(level, out, "Text Node: -->|", token_, "|<--\n");
+ }
+
+ private:
+ TemplateToken token_; // The text held by this node.
+};
+
+// ----------------------------------------------------------------------
+// VariableTemplateNode
+// Holds a variable to be replaced when the template is expanded.
+// The variable is stored in a token object, which has a char*
+// that points into template_text_. There may also be modifiers,
+// which are applied at Expand time.
+// ----------------------------------------------------------------------
+
+class VariableTemplateNode : public TemplateNode {
+ public:
+ explicit VariableTemplateNode(const TemplateToken& token)
+ : token_(token),
+ variable_(token_.text, token_.textlen) {
+ VLOG(2) << "Constructing VariableTemplateNode: "
+ << string(token_.text, token_.textlen) << endl;
+ }
+ virtual ~VariableTemplateNode() {
+ VLOG(2) << "Deleting VariableTemplateNode: "
+ << string(token_.text, token_.textlen) << endl;
+ }
+
+ // Expands the variable node by outputting the value (if there is one)
+ // of the node variable which is retrieved from the dictionary
+ // Returns true iff all the template files load and parse correctly.
+ virtual bool Expand(ExpandEmitter *output_buffer,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData *per_expand_data,
+ const TemplateCache *cache) const;
+
+ virtual void WriteHeaderEntries(string *outstring,
+ const string& filename) const {
+ WriteOneHeaderEntry(outstring, string(token_.text, token_.textlen),
+ filename);
+ }
+
+ // Appends a representation of the variable node to a string. We
+ // also append the modifiers for that variable in the form:
+ // :modifier1[=val1][:modifier2][=val2]...\n
+ virtual void DumpToString(int level, string *out) const {
+ assert(out);
+ AppendTokenWithIndent(level, out, "Variable Node: ", token_,
+ PrettyPrintTokenModifiers(token_.modvals) + "\n");
+ }
+
+ private:
+ const TemplateToken token_;
+ const HashedTemplateString variable_;
+};
+
+bool VariableTemplateNode::Expand(ExpandEmitter *output_buffer,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data,
+ const TemplateCache *cache) const {
+ if (per_expand_data->annotate()) {
+ per_expand_data->annotator()->EmitOpenVariable(output_buffer,
+ token_.ToString());
+ }
+
+ const TemplateString value = dictionary->GetValue(variable_);
+
+ if (AnyMightModify(token_.modvals, per_expand_data)) {
+ EmitModifiedString(token_.modvals, value.data(), value.size(),
+ per_expand_data, output_buffer);
+ } else {
+ // No need to modify value, so just emit it.
+ output_buffer->Emit(value.data(), value.size());
+ }
+
+ if (per_expand_data->annotate()) {
+ per_expand_data->annotator()->EmitCloseVariable(output_buffer);
+ }
+
+ return true;
+}
+
+// ----------------------------------------------------------------------
+// PragmaTemplateNode
+// It simply stores the text given inside the pragma marker
+// {{%...}} for possible use in DumpToString().
+// ----------------------------------------------------------------------
+
+class PragmaTemplateNode : public TemplateNode {
+ public:
+ explicit PragmaTemplateNode(const TemplateToken& token)
+ : token_(token) {
+ VLOG(2) << "Constructing PragmaTemplateNode: "
+ << string(token_.text, token_.textlen) << endl;
+ }
+ virtual ~PragmaTemplateNode() {
+ VLOG(2) << "Deleting PragmaTemplateNode: "
+ << string(token_.text, token_.textlen) << endl;
+ }
+
+ // A no-op for pragma nodes.
+ virtual bool Expand(ExpandEmitter *output_buffer,
+ const TemplateDictionaryInterface *,
+ PerExpandData *,
+ const TemplateCache *) const {
+ return true;
+ };
+
+ // A no-op for pragma nodes.
+ virtual void WriteHeaderEntries(string *outstring,
+ const string& filename) const { }
+
+ // Appends a representation of the pragma node to a string. We output
+ // the full text given in {{%...}} verbatim.
+ virtual void DumpToString(int level, string *out) const {
+ assert(out);
+ AppendTokenWithIndent(level, out, "Pragma Node: -->|", token_, "|<--\n");
+ }
+
+ private:
+ TemplateToken token_; // The text of the pragma held by this node.
+};
+
+// ----------------------------------------------------------------------
+// TemplateTemplateNode
+// Holds a variable to be replaced by an expanded (included)
+// template whose filename is the value of the variable in the
+// dictionary.
+// Also holds the TemplateContext which it passes on to
+// GetTemplateCommon when this included template is initialized.
+// The indentation_ string is used by the PrefixLine modifier so be
+// careful not to perform any operation on it that might invalidate
+// its character array (indentation_.data()).
+//
+// In the Auto Escape mode, the PrefixLine modifier is added *after*
+// auto-escape has updated the modifiers that may be present for that
+// template include, but that is ok because PrefixLine does not invalidate
+// their XSS-safety.
+// ----------------------------------------------------------------------
+
+class TemplateTemplateNode : public TemplateNode {
+ public:
+ explicit TemplateTemplateNode(const TemplateToken& token, Strip strip,
+ const string& indentation)
+ : token_(token),
+ variable_(token_.text, token_.textlen),
+ strip_(strip), indentation_(indentation) {
+ VLOG(2) << "Constructing TemplateTemplateNode: "
+ << string(token_.text, token_.textlen) << endl;
+
+ // If this template is indented (eg, " {{>SUBTPL}}"), make sure
+ // every line of the expanded template is indented, not just the
+ // first one. We do this by adding a modifier that applies to
+ // the entire template node, that inserts spaces after newlines.
+ if (!indentation_.empty()) {
+ token_.modvals.push_back(ModifierAndValue(&g_prefix_line_info,
+ indentation_.data(),
+ indentation_.length()));
+ }
+ }
+ virtual ~TemplateTemplateNode() {
+ VLOG(2) << "Deleting TemplateTemplateNode: "
+ << string(token_.text, token_.textlen) << endl;
+ }
+
+ // Expands the template node by retrieving the name of a template
+ // file from the supplied dictionary, expanding it (using this
+ // dictionary if none other is provided in the TemplateDictionary),
+ // and then outputting this newly expanded template in place of the
+ // original variable.
+ // Returns true iff all the template files load and parse correctly.
+ virtual bool Expand(ExpandEmitter *output_buffer,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData *per_expand_data,
+ const TemplateCache *cache) const;
+
+ virtual void WriteHeaderEntries(string *outstring,
+ const string& filename) const {
+ WriteOneHeaderEntry(outstring, string(token_.text, token_.textlen),
+ filename);
+ }
+
+ virtual void DumpToString(int level, string *out) const {
+ assert(out);
+ AppendTokenWithIndent(level, out, "Template Node: ", token_, "\n");
+ }
+
+ private:
+ TemplateToken token_; // text is the name of a template file.
+ const HashedTemplateString variable_;
+ Strip strip_; // Flag to pass from parent template to included template.
+ const string indentation_; // Used by ModifierAndValue for g_prefix_line.
+
+ // A helper used for expanding one child dictionary.
+ bool ExpandOnce(ExpandEmitter *output_buffer,
+ const TemplateDictionaryInterface &dictionary,
+ const char* const filename,
+ PerExpandData *per_expand_data,
+ const TemplateCache *cache) const;
+};
+
+// If no value is found in the dictionary for the template variable
+// in this node, then no output is generated in place of this variable.
+bool TemplateTemplateNode::Expand(ExpandEmitter *output_buffer,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData *per_expand_data,
+ const TemplateCache *cache) const {
+ if (dictionary->IsHiddenTemplate(variable_)) {
+ // if this "template include" section is "hidden", do nothing
+ return true;
+ }
+
+ TemplateDictionaryInterface::Iterator* di =
+ dictionary->CreateTemplateIterator(variable_);
+
+ if (!di->HasNext()) { // empty dict means 'expand once using containing dict'
+ delete di;
+ // TODO(csilvers): have this return a TemplateString instead?
+ const char* const filename =
+ dictionary->GetIncludeTemplateName(variable_, 0);
+ // If the filename wasn't set then treat it as if it were "hidden", i.e, do
+ // nothing
+ if (filename && *filename) {
+ return ExpandOnce(output_buffer, *dictionary, filename, per_expand_data,
+ cache);
+ } else {
+ return true;
+ }
+ }
+
+ bool error_free = true;
+ for (int dict_num = 0; di->HasNext(); ++dict_num) {
+ const TemplateDictionaryInterface& child = di->Next();
+ // We do this in the loop, because maybe one day we'll support
+ // each expansion having its own template dictionary. That's also
+ // why we pass in the dictionary-index as an argument.
+ const char* const filename = dictionary->GetIncludeTemplateName(
+ variable_, dict_num);
+ // If the filename wasn't set then treat it as if it were "hidden", i.e, do
+ // nothing
+ if (filename && *filename) {
+ error_free &= ExpandOnce(output_buffer, child, filename, per_expand_data,
+ cache);
+ }
+ }
+ delete di;
+
+ return error_free;
+}
+
+static void EmitMissingInclude(const char* const filename,
+ ExpandEmitter *output_buffer,
+ PerExpandData *per_expand_data) {
+ // if there was a problem retrieving the template, bail!
+ if (per_expand_data->annotate()) {
+ TemplateAnnotator* annotator = per_expand_data->annotator();
+ annotator->EmitFileIsMissing(output_buffer, filename);
+ }
+ LOG(ERROR) << "Failed to load included template: \"" << filename << "\"\n";
+}
+
+bool TemplateTemplateNode::ExpandOnce(
+ ExpandEmitter *output_buffer,
+ const TemplateDictionaryInterface &dictionary,
+ const char* const filename,
+ PerExpandData *per_expand_data,
+ const TemplateCache *cache) const {
+ bool error_free = true;
+ // NOTE: Although we do this const_cast here, if the cache is frozen
+ // the expansion doesn't mutate the cache, and is effectively 'const'.
+ TemplateCache* cache_ptr = const_cast<TemplateCache*>(cache);
+
+ // Expand the included template once for each "template specific"
+ // dictionary. Normally this will only iterate once, but it's
+ // possible to supply a list of more than one sub-dictionary and
+ // then the template explansion will be iterative, just as though
+ // the included template were an iterated section.
+ if (per_expand_data->annotate()) {
+ per_expand_data->annotator()->EmitOpenInclude(output_buffer,
+ token_.ToString());
+ }
+ // sub-dictionary NULL means 'just use the current dictionary instead'.
+ // We force children to annotate the output if we have to.
+ // If the include-template has modifiers, we need to expand to a string,
+ // modify the string, and append to output_buffer. Otherwise (common
+ // case), we can just expand into the output-buffer directly.
+ if (AnyMightModify(token_.modvals, per_expand_data)) {
+ string sub_template;
+ StringEmitter subtemplate_buffer(&sub_template);
+ if (!cache_ptr->ExpandLocked(filename, strip_,
+ &subtemplate_buffer,
+ &dictionary,
+ per_expand_data)) {
+ EmitMissingInclude(filename, output_buffer, per_expand_data);
+ error_free = false;
+ } else {
+ EmitModifiedString(token_.modvals,
+ sub_template.data(), sub_template.size(),
+ per_expand_data, output_buffer);
+ }
+ } else {
+ // No need to modify sub-template
+ if (!cache_ptr->ExpandLocked(filename, strip_,
+ output_buffer,
+ &dictionary,
+ per_expand_data)) {
+ EmitMissingInclude(filename, output_buffer, per_expand_data);
+ error_free = false;
+ }
+ }
+ if (per_expand_data->annotate()) {
+ per_expand_data->annotator()->EmitCloseInclude(output_buffer);
+ }
+ return error_free;
+}
+
+// ----------------------------------------------------------------------
+// SectionTemplateNode
+// Holds the name of a section and a list of subnodes contained
+// in that section.
+// ----------------------------------------------------------------------
+
+class SectionTemplateNode : public TemplateNode {
+ public:
+ SectionTemplateNode(const TemplateToken& token, bool hidden_by_default);
+ virtual ~SectionTemplateNode();
+
+ // The highest level parsing method. Reads a single token from the
+ // input -- taken from my_template->parse_state_ -- and adds the
+ // corresponding type of node to the template's parse
+ // tree. It may add a node of any type, whether text, variable,
+ // section, or template to the list of nodes contained in this
+ // section. Returns true iff we really added a node and didn't just
+ // end a section or hit a syntax error in the template file.
+ // You should hold the g_template_mutex write-lock when calling this
+ // (unless you're calling it from a constructor).
+ bool AddSubnode(Template *my_template);
+
+ // Expands a section node as follows:
+ // - Checks to see if the section is hidden and if so, does nothing but
+ // return
+ // - Tries to retrieve a list of dictionaries from the supplied dictionary
+ // stored under this section's name
+ // - If it finds a non-empty list of dictionaries, it iterates over the
+ // list and calls itself recursively to expand the section once for
+ // each dictionary
+ // - If there is no dictionary list (or an empty dictionary list somehow)
+ // is found, then the section is expanded once using the supplied
+ // dictionary. (This is the mechanism used to expand each single
+ // iteration of the section as well as to show a non-hidden section,
+ // allowing the section template syntax to be used for both conditional
+ // and iterative text).
+ // Returns true iff all the template files load and parse correctly.
+ virtual bool Expand(ExpandEmitter *output_buffer,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data,
+ const TemplateCache *cache) const;
+
+ // Writes a header entry for the section name and calls the same
+ // method on all the nodes in the section
+ virtual void WriteHeaderEntries(string *outstring,
+ const string& filename) const;
+
+ virtual void DumpToString(int level, string *out) const;
+
+ private:
+ const TemplateToken token_; // text is the name of the section
+ const HashedTemplateString variable_;
+ NodeList node_list_; // The list of subnodes in the section
+ // A sub-section named "OURNAME_separator" is special. If we see it
+ // when parsing our section, store a pointer to it for ease of use.
+ SectionTemplateNode* separator_section_;
+
+ // When the last node read was literal text that ends with "\n? +"
+ // (that is, leading whitespace on a line), this stores the leading
+ // whitespace. This is used to properly indent included
+ // sub-templates.
+ string indentation_;
+
+ // If true, hide sections that have not explicitly had their hidden/visible
+ // state set. If false, use the underlying template dictionary's default
+ // behavior for hiding.
+ // This bool is currently always set to true.
+ bool hidden_by_default_;
+
+ // A protected method used in parsing the template file
+ // Finds the next token in the file and return it. Anything not inside
+ // a template marker is just text. Each template marker type, delimited
+ // by "{{" and "}}" (or parser_state_->marker_delimiters.start_marker
+ // and .end_marker, more precisely) is a different type of token. The
+ // first character inside the opening curly braces indicates the type
+ // of the marker, as follows:
+ // # - Start a section
+ // / - End a section
+ // > - A template file variable (the "include" directive)
+ // ! - A template comment
+ // % - A pragma such as AUTOESCAPE
+ // = - Change marker delimiters (from the default of '{{' and '}}')
+ // <alnum or _> - A scalar variable
+ // One more thing. Before a name token is returned, if it happens to be
+ // any type other than a scalar variable, and if the next character after
+ // the closing curly braces is a newline, then the newline is eliminated
+ // from the output. This reduces the number of extraneous blank
+ // lines in the output. If the template author desires a newline to be
+ // retained after a final marker on a line, they must add a space character
+ // between the marker and the linefeed character.
+ TemplateToken GetNextToken(Template* my_template);
+
+ // Helper routine used by Expand
+ virtual bool ExpandOnce(
+ ExpandEmitter *output_buffer,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data,
+ bool is_last_child_dict,
+ const TemplateCache *cache) const;
+
+ // The specific methods called used by AddSubnode to add the
+ // different types of nodes to this section node.
+ // Currently only reasons to fail (return false) are if the
+ // HTML parser failed to parse in auto-escape mode or the
+ // PRAGMA marker was invalid in the template.
+ bool AddTextNode(const TemplateToken* token, Template* my_template);
+ bool AddVariableNode(TemplateToken* token, Template* my_template);
+ bool AddPragmaNode(TemplateToken* token, Template* my_template);
+ bool AddTemplateNode(TemplateToken* token, Template* my_template,
+ const string& indentation);
+ bool AddSectionNode(const TemplateToken* token, Template* my_template,
+ bool hidden_by_default);
+ bool AddSectionNode(const TemplateToken* token, Template* my_template);
+};
+
+// --- constructor and destructor, Expand, Dump, and WriteHeaderEntries
+
+SectionTemplateNode::SectionTemplateNode(const TemplateToken& token,
+ bool hidden_by_default)
+
+ : token_(token),
+ variable_(token_.text, token_.textlen),
+ separator_section_(NULL), indentation_("\n"),
+ hidden_by_default_(hidden_by_default) {
+ VLOG(2) << "Constructing SectionTemplateNode: "
+ << string(token_.text, token_.textlen) << endl;
+}
+
+SectionTemplateNode::~SectionTemplateNode() {
+ VLOG(2) << "Deleting SectionTemplateNode: "
+ << string(token_.text, token_.textlen) << " and its subnodes"
+ << endl;
+
+ // Need to delete the member of the list because the list is a list
+ // of pointers to these instances.
+ NodeList::iterator iter = node_list_.begin();
+ for (; iter != node_list_.end(); ++iter) {
+ delete (*iter);
+ }
+ VLOG(2) << "Finished deleting subnodes of SectionTemplateNode: "
+ << string(token_.text, token_.textlen) << endl;
+}
+
+bool SectionTemplateNode::ExpandOnce(
+ ExpandEmitter *output_buffer,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData *per_expand_data,
+ bool is_last_child_dict,
+ const TemplateCache* cache) const {
+ bool error_free = true;
+
+ if (per_expand_data->annotate()) {
+ per_expand_data->annotator()->EmitOpenSection(output_buffer,
+ token_.ToString());
+ }
+
+ // Expand using the section-specific dictionary.
+ // We force children to annotate the output if we have to.
+ NodeList::const_iterator iter = node_list_.begin();
+ for (; iter != node_list_.end(); ++iter) {
+ error_free &=
+ (*iter)->Expand(output_buffer, dictionary, per_expand_data, cache);
+ // If this sub-node is a "separator section" -- a subsection
+ // with the name "OURNAME_separator" -- expand it every time
+ // through but the last.
+ if (*iter == separator_section_ && !is_last_child_dict) {
+ // We call ExpandOnce to make sure we always expand,
+ // even if *iter would normally be hidden.
+ error_free &= separator_section_->ExpandOnce(output_buffer, dictionary,
+ per_expand_data, true,
+ cache);
+ }
+ }
+
+ if (per_expand_data->annotate()) {
+ per_expand_data->annotator()->EmitCloseSection(output_buffer);
+ }
+
+ return error_free;
+}
+
+bool SectionTemplateNode::Expand(
+ ExpandEmitter *output_buffer,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData *per_expand_data,
+ const TemplateCache *cache) const {
+ // The section named __{{MAIN}}__ is special: you always expand it
+ // exactly once using the containing (main) dictionary.
+ if (token_.text == kMainSectionName) {
+ return ExpandOnce(output_buffer, dictionary, per_expand_data, true, cache);
+ } else if (hidden_by_default_ ?
+ !dictionary->IsUnhiddenSection(variable_) :
+ dictionary->IsHiddenSection(variable_)) {
+ // Some dictionaries might have sections that can be explicitly hidden
+ // and unhidden, so by default both IsHidden() and IsUnhidden() are false,
+ // in which case hidden_by_default_ controls the behavior.
+ return true; // if this section is "hidden", do nothing
+ }
+
+ TemplateDictionaryInterface::Iterator* di =
+ dictionary->CreateSectionIterator(variable_);
+
+ // If there are no child dictionaries, that means we should expand with the
+ // current dictionary instead. This corresponds to the situation where
+ // template variables within a section are set on the template-wide dictionary
+ // instead of adding a dictionary to the section and setting them there.
+ if (!di->HasNext()) {
+ delete di;
+ return ExpandOnce(output_buffer, dictionary, per_expand_data,
+ true, cache);
+ }
+
+ // Otherwise, there's at least one child dictionary, and when expanding this
+ // section, we should use the child dictionaries instead of the current one.
+ bool error_free = true;
+ while (di->HasNext()) {
+ const TemplateDictionaryInterface& child = di->Next();
+ error_free &= ExpandOnce(output_buffer, &child, per_expand_data,
+ !di->HasNext(), cache);
+ }
+ delete di;
+ return error_free;
+}
+
+void SectionTemplateNode::WriteHeaderEntries(string *outstring,
+ const string& filename) const {
+ WriteOneHeaderEntry(outstring, string(token_.text, token_.textlen),
+ filename);
+
+ NodeList::const_iterator iter = node_list_.begin();
+ for (; iter != node_list_.end(); ++iter) {
+ (*iter)->WriteHeaderEntries(outstring, filename);
+ }
+}
+
+void SectionTemplateNode::DumpToString(int level, string *out) const {
+ assert(out);
+ AppendTokenWithIndent(level, out, "Section Start: ", token_, "\n");
+ NodeList::const_iterator iter = node_list_.begin();
+ for (; iter != node_list_.end(); ++iter) {
+ (*iter)->DumpToString(level + 1, out);
+ }
+ AppendTokenWithIndent(level, out, "Section End: ", token_, "\n");
+}
+
+// --- AddSubnode and its sub-routines
+
+// Under auto-escape (and parsing-enabled modes) advance the parser state.
+// TextTemplateNode is the only TemplateNode type that can change
+// the state of the parser.
+// Returns false only if the HTML parser failed to parse in
+// auto-escape mode.
+bool SectionTemplateNode::AddTextNode(const TemplateToken* token,
+ Template* my_template) {
+ assert(token);
+ bool success = true;
+ HtmlParser *htmlparser = my_template->htmlparser_;
+
+ if (token->textlen > 0) { // ignore null text sections
+ node_list_.push_back(new TextTemplateNode(*token));
+ if (AUTO_ESCAPE_PARSING_CONTEXT(my_template->initial_context_)) {
+ assert(htmlparser);
+ if (htmlparser->state() == HtmlParser::STATE_ERROR ||
+ htmlparser->Parse(token->text, static_cast<int>(token->textlen)) ==
+ HtmlParser::STATE_ERROR) {
+ string error_msg = "Failed parsing: " +
+ string(token->text, token->textlen) +
+ "\nIn: " + string(token_.text, token_.textlen);
+ LOG_AUTO_ESCAPE_ERROR(error_msg, my_template);
+ success = false;
+ }
+ }
+ }
+ return success;
+}
+
+// In Auto Escape mode, we update the variable modifiers based on what
+// modifiers are specified in the template and what Auto-Escape computes
+// for that context. Returns false only if the HTML parser failed to parse
+// in auto-escape mode.
+//
+// We also have special logic for BI_SPACE and BI_NEWLINE.
+// Even though they look like variables, they're really not: the user
+// is expected to use them in situations where they'd normally put
+// a space character or a newline character, but can't for technical
+// reasons (namely, that the template parser would strip these
+// characters because of the STRIP mode it's in). So unlike other
+// variables, we want to treat these variables as literal text. This
+// means that we never add modifiers to them, but we do let the
+// htmlparser know about them in order to update its state. Existing
+// modifiers will be honored.
+//
+// Finally, we check if the variable is whitelisted, in which case
+// Auto-Escape does not apply escaping to it. See comment for global
+// array kSafeWhitelistedVariables[].
+bool SectionTemplateNode::AddVariableNode(TemplateToken* token,
+ Template* my_template) {
+ assert(token);
+ bool success = true;
+ HtmlParser *htmlparser = my_template->htmlparser_;
+ TemplateContext initial_context = my_template->initial_context_;
+
+ if (AUTO_ESCAPE_MODE(initial_context)) {
+ // Determines modifiers for the variable in auto escape mode.
+ string variable_name(token->text, token->textlen);
+ // We declare in the documentation that if the user changes the
+ // value of these variables, they must only change it to a value
+ // that's "equivalent" from the point of view of an html parser.
+ // So it's ok to hard-code in that these are " " and "\n",
+ // respectively, even though in theory the user could change them
+ // (to say, BI_NEWLINE == "\r\n").
+ if (variable_name == "BI_SPACE" || variable_name == "BI_NEWLINE") {
+ if (AUTO_ESCAPE_PARSING_CONTEXT(initial_context)) {
+ assert(htmlparser);
+ if (htmlparser->state() == HtmlParser::STATE_ERROR ||
+ htmlparser->Parse(variable_name == "BI_SPACE" ? " " : "\n") ==
+ HtmlParser::STATE_ERROR)
+ success = false;
+ }
+ } else if (binary_search(Template::kSafeWhitelistedVariables,
+ Template::kSafeWhitelistedVariables +
+ arraysize(Template::kSafeWhitelistedVariables),
+ variable_name.c_str(),
+ // Luckily, StringHash(a, b) is defined as "a < b"
+ StringHash())) {
+ // Do not escape the variable, it is whitelisted.
+ } else {
+ vector<const ModifierAndValue*> modvals =
+ GetModifierForContext(initial_context, htmlparser, my_template);
+ // There should always be at least one modifier in any Auto-Escape mode.
+ if (modvals.empty())
+ success = false;
+ else
+ token->UpdateModifier(modvals);
+ }
+ }
+ node_list_.push_back(new VariableTemplateNode(*token));
+ return success;
+}
+
+// AddPragmaNode
+// Create a pragma node from the given token and add it
+// to the node list.
+// The AUTOESCAPE pragma is only allowed at the top of a template
+// file (above any non-comment node) to minimize the chance of the
+// HTML parser being out of sync with the template text. So we check
+// that the section is the MAIN section and we are the first node.
+// Note: Since currently we only support one pragma, we apply the check
+// always but when other pragmas are added we'll need to propagate the
+// Pragma identifier from GetNextToken().
+bool SectionTemplateNode::AddPragmaNode(TemplateToken* token,
+ Template* my_template) {
+ if (token_.text != kMainSectionName || !node_list_.empty())
+ return false;
+
+ node_list_.push_back(new PragmaTemplateNode(*token));
+ return true;
+}
+
+// AddSectionNode
+bool SectionTemplateNode::AddSectionNode(const TemplateToken* token,
+ Template* my_template,
+ bool hidden_by_default) {
+ assert(token);
+ SectionTemplateNode *new_node = new SectionTemplateNode(*token,
+ hidden_by_default);
+
+ // Not only create a new section node, but fill it with all *its*
+ // subnodes by repeatedly calling AddSubNode until it returns false
+ // (indicating either the end of the section or a syntax error)
+ while (new_node->AddSubnode(my_template)) {
+ // Found a new subnode to add
+ }
+ node_list_.push_back(new_node);
+ // Check the name of new_node. If it's "OURNAME_separator", store it
+ // as a special "separator" section.
+ if (token->textlen == token_.textlen + sizeof("_separator")-1 &&
+ memcmp(token->text, token_.text, token_.textlen) == 0 &&
+ memcmp(token->text + token_.textlen, "_separator", sizeof("_separator")-1)
+ == 0)
+ separator_section_ = new_node;
+ return true;
+}
+
+// Note: indentation will be used in constructor of TemplateTemplateNode.
+// Note on Auto-Escape: Each template is Auto-Escaped independently of
+// the template it may be included from or templates it may include.
+// The context does not carry on and hence does not need to be provided
+// to the new TemplateNode.
+bool SectionTemplateNode::AddTemplateNode(TemplateToken* token,
+ Template* my_template,
+ const string& indentation) {
+ assert(token);
+ bool success = true;
+ node_list_.push_back(
+ new TemplateTemplateNode(*token, my_template->strip_, indentation));
+ return success;
+}
+
+// If "text" ends with a newline followed by whitspace, returns a
+// string holding that whitespace. Otherwise, returns the empty
+// string. If implicit_newline is true, also consider the text to be
+// an indentation if it consists entirely of whitespace; this is set
+// when we know that right before this text there was a newline, or
+// this text is the beginning of a document.
+static string GetIndentation(const char* text, size_t textlen,
+ bool implicit_newline) {
+ const char* nextline; // points to one char past the last newline
+ for (nextline = text + textlen; nextline > text; --nextline)
+ if (nextline[-1] == '\n') break;
+ if (nextline == text && !implicit_newline)
+ return ""; // no newline found, so no indentation
+
+ bool prefix_is_whitespace = true;
+ for (const char* p = nextline; p < text + textlen; ++p) {
+ if (*p != ' ' && *p != '\t') {
+ prefix_is_whitespace = false;
+ break;
+ }
+ }
+ if (prefix_is_whitespace && text + textlen > nextline)
+ return string(nextline, text + textlen - nextline);
+ else
+ return "";
+}
+
+bool SectionTemplateNode::AddSubnode(Template *my_template) {
+ bool auto_escape_success = true;
+ // Don't proceed if we already found an error
+ if (my_template->state() == TS_ERROR) {
+ return false;
+ }
+
+ // Stop when the buffer is empty.
+ if (my_template->parse_state_.bufstart >= my_template->parse_state_.bufend) {
+ // running out of file contents ends the section too
+ if (token_.text != kMainSectionName) {
+ // if we are not in the main section, we have a syntax error in the file
+ LOG_TEMPLATE_NAME(ERROR, my_template);
+ LOG(ERROR) << "File ended before all sections were closed" << endl;
+ my_template->set_state(TS_ERROR);
+ }
+ return false;
+ }
+
+ TemplateToken token = GetNextToken(my_template);
+
+ switch (token.type) {
+ case TOKENTYPE_TEXT:
+ auto_escape_success = this->AddTextNode(&token, my_template);
+ // Store the indentation (trailing whitespace after a newline), if any.
+ this->indentation_ = GetIndentation(token.text, token.textlen,
+ indentation_ == "\n");
+ break;
+ case TOKENTYPE_VARIABLE:
+ auto_escape_success = this->AddVariableNode(&token, my_template);
+ this->indentation_.clear(); // clear whenever last read wasn't whitespace
+ break;
+ case TOKENTYPE_SECTION_START:
+ auto_escape_success = this->AddSectionNode(&token, my_template, false);
+ this->indentation_.clear(); // clear whenever last read wasn't whitespace
+ break;
+ case TOKENTYPE_HIDDEN_DEFAULT_SECTION:
+ auto_escape_success = this->AddSectionNode(&token, my_template, true);
+ this->indentation_.clear(); // clear whenever last read wasn't whitespace
+ break;
+ case TOKENTYPE_SECTION_END:
+ // Don't add a node. Just make sure we are ending the right section
+ // and return false to indicate the section is complete
+ if (token.textlen != token_.textlen ||
+ memcmp(token.text, token_.text, token.textlen)) {
+ LOG_TEMPLATE_NAME(ERROR, my_template);
+ LOG(ERROR) << "Found end of different section than the one I am in"
+ << "\nFound: " << string(token.text, token.textlen)
+ << "\nIn: " << string(token_.text, token_.textlen) << endl;
+ my_template->set_state(TS_ERROR);
+ }
+ this->indentation_.clear(); // clear whenever last read wasn't whitespace
+ return false;
+ break;
+ case TOKENTYPE_TEMPLATE:
+ auto_escape_success = this->AddTemplateNode(&token, my_template,
+ this->indentation_);
+ this->indentation_.clear(); // clear whenever last read wasn't whitespace
+ break;
+ case TOKENTYPE_COMMENT:
+ // Do nothing. Comments just drop out of the file altogether.
+ break;
+ case TOKENTYPE_SET_DELIMITERS:
+ if (!Template::ParseDelimiters(
+ token.text, token.textlen,
+ &my_template->parse_state_.current_delimiters)) {
+ LOG_TEMPLATE_NAME(ERROR, my_template);
+ LOG(ERROR) << "Invalid delimiter-setting command."
+ << "\nFound: " << string(token.text, token.textlen)
+ << "\nIn: " << string(token_.text, token_.textlen) << endl;
+ my_template->set_state(TS_ERROR);
+ }
+ break;
+ case TOKENTYPE_PRAGMA:
+ // We can do nothing and simply drop the pragma of the file as is done
+ // for comments. But, there is value in keeping it for debug purposes
+ // (via DumpToString) so add it as a pragma node.
+ if (!this->AddPragmaNode(&token, my_template)) {
+ LOG_TEMPLATE_NAME(ERROR, my_template);
+ LOG(ERROR) << "Pragma marker must be at the top of the template: '"
+ << string(token.text, token.textlen) << "'" << endl;
+ my_template->set_state(TS_ERROR);
+ }
+ break;
+ case TOKENTYPE_NULL:
+ // GetNextToken either hit the end of the file or a syntax error
+ // in the file. Do nothing more here. Just return false to stop
+ // processing.
+ return false;
+ break;
+ default:
+ // This shouldn't happen. If it does, it's a programmer error.
+ LOG_TEMPLATE_NAME(ERROR, my_template);
+ LOG(ERROR) << "Invalid token type returned from GetNextToken" << endl;
+ }
+
+ if (!auto_escape_success) {
+ // The error is logged where it happens. Here indicate
+ // the initialization failed.
+ my_template->set_state(TS_ERROR);
+ return false;
+ }
+
+ // for all the cases where we did not return false
+ return true;
+}
+
+// --- GetNextToken and its subroutines
+
+// A valid marker name is made up of alphanumerics and underscores...
+// nothing else.
+static bool IsValidName(const char* name, int namelen) {
+ for (const char *cur_char = name; cur_char - name < namelen; ++cur_char) {
+ if (!ascii_isalnum(*cur_char) && *cur_char != '_')
+ return false;
+ }
+ return true;
+}
+
+// If we're pointing to the end of a line, and in a high enough strip mode,
+// pass over the newline. If the line ends in a \, we skip over the \ and
+// keep the newline. Returns a pointer to the new 'start' location, which
+// is either 'start' or after a newline.
+static const char* MaybeEatNewline(const char* start, const char* end,
+ Strip strip) {
+ // first, see if we have the escaped linefeed sequence
+ if (end - start >= 2 && start[0] == '\\' && start[1] == '\n') {
+ ++start; // skip over the \, which keeps the \n
+ } else if (end - start >= 1 && start[0] == '\n' &&
+ strip >= STRIP_WHITESPACE) {
+ ++start; // skip over the \n in high strip_ modes
+ }
+ return start;
+}
+
+// When the parse fails, we take several actions. msg is a stream
+#define FAIL(msg) do { \
+ LOG_TEMPLATE_NAME(ERROR, my_template); \
+ LOG(ERROR) << msg << endl; \
+ my_template->set_state(TS_ERROR); \
+ /* make extra-sure we never try to parse anything more */ \
+ my_template->parse_state_.bufstart = my_template->parse_state_.bufend; \
+ return TemplateToken(TOKENTYPE_NULL, "", 0, NULL); \
+ } while (0)
+
+// Parses the text of the template file in the input_buffer as
+// follows: If the buffer is empty, return the null token. If getting
+// text, search for the next "{{" sequence (more precisely, for
+// parse_state_->marker_delimiters.start_marker). If one is found,
+// return all the text collected up to that sequence in a TextToken
+// and change the token-parsing phase variable to GETTING_NAME, so the
+// next call will know to look for a named marker, instead of more
+// text. If getting a name, read the next character to learn what
+// kind of marker it is. Then collect the characters of the name up
+// to the "}}" sequence. If the "name" is a template comment, then we
+// do not return the text of the comment in the token. If it is any
+// other valid type of name, we return the token with the appropriate
+// type and the name. If any syntax errors are discovered (like
+// inappropriate characters in a name, not finding the closing curly
+// braces, etc.) an error message is logged, the error state of the
+// template is set, and a NULL token is returned. Updates
+// parse_state_. You should hold the g_template_mutex write-lock
+// when calling this (unless you're calling it from a constructor).
+TemplateToken SectionTemplateNode::GetNextToken(Template *my_template) {
+ Template::ParseState* ps = &my_template->parse_state_; // short abbrev.
+ const char* token_start = ps->bufstart;
+
+ if (ps->bufstart >= ps->bufend) { // at end of buffer
+ return TemplateToken(TOKENTYPE_NULL, "", 0, NULL);
+ }
+
+ switch (ps->phase) {
+ case Template::ParseState::GETTING_TEXT: {
+ const char* token_end = memmatch(ps->bufstart, ps->bufend - ps->bufstart,
+ ps->current_delimiters.start_marker,
+ ps->current_delimiters.start_marker_len);
+ if (!token_end) {
+ // Didn't find the start-marker ('{{'), so just grab all the
+ // rest of the buffer.
+ token_end = ps->bufend;
+ ps->bufstart = ps->bufend; // next token will start at EOF
+ } else {
+ // If we see code like this: "{{{VAR}}, we want to match the
+ // second "{{", not the first.
+ while ((token_end + 1 + ps->current_delimiters.start_marker_len
+ <= ps->bufend) &&
+ memcmp(token_end + 1, ps->current_delimiters.start_marker,
+ ps->current_delimiters.start_marker_len) == 0)
+ token_end++;
+ ps->phase = Template::ParseState::GETTING_NAME;
+ ps->bufstart = token_end + ps->current_delimiters.start_marker_len;
+ }
+ return TemplateToken(TOKENTYPE_TEXT, token_start,
+ token_end - token_start, NULL);
+ }
+
+ case Template::ParseState::GETTING_NAME: {
+ TemplateTokenType ttype;
+ const char* token_end = NULL;
+ // Find out what type of name we are getting
+ switch (token_start[0]) {
+ case '#':
+ ttype = TOKENTYPE_SECTION_START;
+ ++token_start;
+ break;
+ case '/':
+ ttype = TOKENTYPE_SECTION_END;
+ ++token_start;
+ break;
+ case '!':
+ ttype = TOKENTYPE_COMMENT;
+ ++token_start;
+ break;
+ case '=':
+ ttype = TOKENTYPE_SET_DELIMITERS;
+ // Keep token_start the same; the token includes the leading '='.
+ // But we have to figure token-end specially: it should be "=}}".
+ if (ps->bufend > (token_start + 1))
+ token_end = (char*)memchr(token_start + 1, '=',
+ ps->bufend - (token_start + 1));
+ if (!token_end ||
+ token_end + ps->current_delimiters.end_marker_len > ps->bufend ||
+ memcmp(token_end + 1, ps->current_delimiters.end_marker,
+ ps->current_delimiters.end_marker_len) != 0)
+ token_end = NULL; // didn't find it, fall through to code below
+ else
+ token_end++; // advance past the "=" to the "}}".
+ break;
+ case '>':
+ ttype = TOKENTYPE_TEMPLATE;
+ ++token_start;
+ break;
+ case '%':
+ ttype = TOKENTYPE_PRAGMA;
+ ++token_start;
+ break;
+ default:
+ // the assumption that the next char is alnum or _ will be
+ // tested below in the call to IsValidName().
+ ttype = TOKENTYPE_VARIABLE;
+ }
+
+ // Now get the name (or the comment, as the case may be)
+ if (!token_end) // that is, it wasn't set in special-case code above
+ token_end = memmatch(token_start, ps->bufend - token_start,
+ ps->current_delimiters.end_marker,
+ ps->current_delimiters.end_marker_len);
+ if (!token_end) { // Didn't find the '}}', so name never ended. Error!
+ FAIL("No ending '" << string(ps->current_delimiters.end_marker,
+ ps->current_delimiters.end_marker_len)
+ << "' when parsing name starting with "
+ << "'" << string(token_start, ps->bufend-token_start) << "'");
+ }
+
+ if (ttype == TOKENTYPE_PRAGMA) {
+ string error_msg;
+ const PragmaMarker pragma(token_start, token_end, &error_msg);
+ if (!error_msg.empty())
+ FAIL(error_msg);
+ TemplateContext context = GetTemplateContextFromPragma(pragma);
+ if (context == TC_MANUAL) // TC_MANUAL is used to indicate error.
+ FAIL("Invalid context in Pragma directive.");
+ const string* parser_state = pragma.GetAttributeValue("state");
+ bool in_tag = false;
+ if (parser_state != NULL) {
+ if (context == TC_HTML && (*parser_state == "IN_TAG" ||
+ *parser_state == "in_tag"))
+ in_tag = true;
+ else if (*parser_state != "default")
+ FAIL("Unsupported state '" + *parser_state +
+ "'in Pragma directive.");
+ }
+ // Only an AUTOESCAPE pragma can change the initial_context
+ // away from TC_MANUAL and we do not support multiple such pragmas.
+ assert(my_template->initial_context_ == TC_MANUAL);
+ my_template->initial_context_ = context;
+ my_template->MaybeInitHtmlParser(in_tag);
+ // ParseState change will happen below.
+ }
+
+ // Comments are a special case, since they don't have a name or action.
+ // The set-delimiters command is the same way.
+ if (ttype == TOKENTYPE_COMMENT || ttype == TOKENTYPE_SET_DELIMITERS ||
+ ttype == TOKENTYPE_PRAGMA) {
+ ps->phase = Template::ParseState::GETTING_TEXT;
+ ps->bufstart = token_end + ps->current_delimiters.end_marker_len;
+ // If requested, remove any unescaped linefeed following a comment
+ ps->bufstart = MaybeEatNewline(ps->bufstart, ps->bufend,
+ my_template->strip_);
+ // For comments, don't bother returning the text
+ if (ttype == TOKENTYPE_COMMENT)
+ token_start = token_end;
+ return TemplateToken(ttype, token_start, token_end - token_start, NULL);
+ }
+
+ // Now we have the name, possibly with following modifiers.
+ // Find the modifier-start.
+ const char* mod_start = (const char*)memchr(token_start, ':',
+ token_end - token_start);
+ if (mod_start == NULL)
+ mod_start = token_end;
+
+ // Make sure the name is legal.
+ if (!IsValidName(token_start, mod_start - token_start)) {
+ FAIL("Illegal name in template '"
+ << string(token_start, mod_start-token_start) << "'");
+ }
+
+ // Figure out what all the modifiers are. Mods are colon-separated.
+ vector<ModifierAndValue> modifiers;
+ const char* mod_end;
+ for (const char* mod = mod_start; mod < token_end; mod = mod_end) {
+ assert(*mod == ':');
+ ++mod; // skip past the starting colon
+ mod_end = (const char*)memchr(mod, ':', token_end - mod);
+ if (mod_end == NULL)
+ mod_end = token_end;
+ // Modifiers can be of the form :modname=value. Extract out value
+ const char* value = (const char*)memchr(mod, '=', mod_end - mod);
+ if (value == NULL)
+ value = mod_end;
+ string value_string(value, mod_end - value);
+ // Convert the string to a functor, and error out if we can't.
+ const ModifierInfo* modstruct = FindModifier(mod, value - mod,
+ value, mod_end - value);
+ // There are various ways a modifier syntax can be illegal.
+ if (modstruct == NULL) {
+ FAIL("Unknown modifier for variable "
+ << string(token_start, mod_start - token_start) << ": "
+ << "'" << string(mod, value - mod) << "'");
+ } else if (!modstruct->modval_required && value < mod_end) {
+ FAIL("Modifier for variable "
+ << string(token_start, mod_start - token_start) << ":"
+ << string(mod, value - mod) << " "
+ << "has illegal mod-value '" << value_string << "'");
+ } else if (modstruct->modval_required && value == mod_end) {
+ FAIL("Modifier for variable "
+ << string(token_start, mod_start - token_start) << ":"
+ << string(mod, value - mod) << " "
+ << "is missing a required mod-value");
+ }
+
+ // We rely on the fact that the memory pointed to by 'value'
+ // remains valid throughout the life of this token since
+ // ModifierAndValue does not itself manage its memory.
+ modifiers.push_back(
+ ModifierAndValue(modstruct, value, mod_end - value));
+ }
+
+ // For now, we only allow variable and include nodes to have
+ // modifiers. I think it's better not to have this for
+ // sections, but instead to modify all the text and vars in the
+ // section appropriately, but I could be convinced otherwise.
+ if (!modifiers.empty() &&
+ ttype != TOKENTYPE_VARIABLE && ttype != TOKENTYPE_TEMPLATE) {
+ FAIL(string(token_start, token_end - token_start)
+ << "malformed: only variables and template-includes "
+ << "are allowed to have modifiers");
+ }
+
+ // Whew! We passed the gauntlet. Get ready for the next token
+ ps->phase = Template::ParseState::GETTING_TEXT;
+ ps->bufstart = token_end + ps->current_delimiters.end_marker_len;
+ // If requested, remove any linefeed following a comment,
+ // or section start or end, or template marker, unless
+ // it is escaped by '\'
+ if (ttype != TOKENTYPE_VARIABLE) {
+ ps->bufstart = MaybeEatNewline(ps->bufstart, ps->bufend,
+ my_template->strip_);
+ }
+
+ // create and return the TEXT token that we found
+ return TemplateToken(ttype, token_start, mod_start - token_start,
+ &modifiers);
+ }
+
+ default: {
+ FAIL("Programming error: Unexpected parse phase while "
+ << "parsing template: " << ps->phase);
+ }
+ }
+}
+
+// ----------------------------------------------------------------------
+// CreateTemplateCache()
+// default_template_cache()
+// mutable_default_template_cache()
+// These create the default TemplateCache object, that Template
+// often just delegates (deprecated) operations to.
+// ----------------------------------------------------------------------
+
+static TemplateCache* g_default_template_cache = NULL;
+GoogleOnceType g_default_cache_init_once = GOOGLE_ONCE_INIT;
+
+static void CreateTemplateCache() {
+ g_default_template_cache = new TemplateCache();
+}
+
+const TemplateCache* default_template_cache() {
+ GoogleOnceInit(&g_default_cache_init_once, &CreateTemplateCache);
+ return g_default_template_cache;
+}
+
+TemplateCache* mutable_default_template_cache() {
+ GoogleOnceInit(&g_default_cache_init_once, &CreateTemplateCache);
+ return g_default_template_cache;
+}
+
+// ----------------------------------------------------------------------
+// Template::StringToTemplate()
+// StringToTemplate reads a string representing a template (eg
+// "Hello {{WORLD}}"), and parses it to a Template*. It returns
+// the parsed template, or NULL if there was a parsing error.
+// StringToTemplateCache does the same, but then inserts the
+// resulting Template* into the template cache, for future retrieval
+// via GetTemplate. You pass in the key to use with GetTemplate.
+// It returns a bool indicating success or failure of template
+// creation/insertion. (Insertion will fail if a string or file
+// with that key already exists in the cache.)
+// RemoveStringFromTemplateCache() lets you remove a string that
+// you had previously interned via StringToTemplateCache().
+// ----------------------------------------------------------------------
+
+Template* Template::StringToTemplate(const TemplateString& content,
+ Strip strip) {
+ // An empty original_filename_ keeps ReloadIfChangedLocked from performing
+ // file operations.
+
+ Template *tpl = new Template("", strip, NULL);
+
+ // But we have to do the "loading" and parsing ourselves:
+
+ // BuildTree deletes the buffer when done, so we need a copy for it.
+ char* buffer = new char[content.size()];
+ size_t content_len = content.size();
+ memcpy(buffer, content.data(), content_len);
+ tpl->StripBuffer(&buffer, &content_len);
+ if ( tpl->BuildTree(buffer, buffer + content_len) ) {
+ assert(tpl->state() == TS_READY);
+ } else {
+ assert(tpl->state() != TS_READY);
+ delete tpl;
+ return NULL;
+ }
+ return tpl;
+}
+
+// ----------------------------------------------------------------------
+// Template::Template()
+// Template::~Template()
+// Template::MaybeInitHtmlParser()
+// Calls ReloadIfChanged to load the template the first time.
+// The constructor is private; GetTemplate() is the factory
+// method used to actually construct a new template if needed.
+// GetTemplateCommon() first looks in the two caches -- the
+// cache of parsed template trees, and the cache of raw
+// template-file contents -- before trying to load the
+// template-file from disk.
+// ----------------------------------------------------------------------
+
+Template::Template(const TemplateString& filename, Strip strip,
+ TemplateCache* owner)
+ // TODO(csilvers): replace ToString() with an is_immutable() check
+ : original_filename_(filename.data(), filename.size()), resolved_filename_(),
+ filename_mtime_(0), strip_(strip), state_(TS_EMPTY),
+ template_cache_(owner), template_text_(NULL), template_text_len_(0),
+ tree_(NULL), parse_state_(),
+ initial_context_(TC_MANUAL), htmlparser_(NULL) {
+ VLOG(2) << "Constructing Template for " << template_file()
+ << "; with context " << initial_context_
+ << "; and strip " << strip_ << endl;
+
+ // Preserve whitespace in Javascript files because carriage returns
+ // can convey meaning for comment termination and closures
+ if (strsuffix(original_filename_.c_str(), ".js") &&
+ strip_ == STRIP_WHITESPACE) {
+ strip_ = STRIP_BLANK_LINES;
+ }
+ ReloadIfChangedLocked();
+}
+
+Template::~Template() {
+ VLOG(2) << endl << "Deleting Template for " << template_file()
+ << "; with context " << initial_context_
+ << "; and strip " << strip_ << endl;
+ // Since this is only used by tests, we don't bother with locking
+ num_deletes_++;
+ delete tree_;
+ // Delete this last, since tree has pointers into template_text_
+ delete[] template_text_;
+ delete htmlparser_;
+}
+
+// In TemplateContexts where the HTML parser is needed, we initialize it in
+// the appropriate mode. Also we do a sanity check (cannot fail) on the
+// template filename. This function is invoked when an AUTOESCAPE pragma is
+// found during template parsing and should at most be called once per template.
+//
+// In_tag is only meaningful for TC_HTML: It is true for templates that
+// start inside an HTML tag and hence are expected to contain HTML attribute
+// name/value pairs only. It is false for standard HTML templates.
+void Template::MaybeInitHtmlParser(bool in_tag) {
+ assert(!htmlparser_);
+ if (AUTO_ESCAPE_PARSING_CONTEXT(initial_context_)) {
+ htmlparser_ = new HtmlParser();
+ switch (initial_context_) {
+ case TC_JS:
+ htmlparser_->ResetMode(HtmlParser::MODE_JS);
+ break;
+ case TC_CSS:
+ htmlparser_->ResetMode(HtmlParser::MODE_CSS);
+ break;
+ default:
+ if (in_tag)
+ htmlparser_->ResetMode(HtmlParser::MODE_HTML_IN_TAG);
+ break;
+ }
+ FilenameValidForContext(original_filename_, initial_context_);
+ }
+}
+
+// ----------------------------------------------------------------------
+// Template::BuildTree()
+// Template::WriteHeaderEntry()
+// Template::Dump()
+// These kick off their various parsers -- BuildTree for the
+// main task of parsing a Template when it's read from memory,
+// WriteHeaderEntry for parsing for make_tpl_varnames_h, and
+// Dump() for when Dump() is called by the caller.
+// ----------------------------------------------------------------------
+
+// NOTE: BuildTree takes over ownership of input_buffer, and will delete it.
+// It should have been created via new[].
+// You should hold a write-lock on g_template_mutex before calling this
+// (unless you're calling it from a constructor).
+// In auto-escape mode, the HTML context is tracked as the tree is being
+// built, in a single pass. When this function completes, all variables
+// will have the proper modifiers set.
+bool Template::BuildTree(const char* input_buffer,
+ const char* input_buffer_end) {
+ set_state(TS_EMPTY);
+ parse_state_.bufstart = input_buffer;
+ parse_state_.bufend = input_buffer_end;
+ parse_state_.phase = ParseState::GETTING_TEXT;
+ parse_state_.current_delimiters = Template::MarkerDelimiters();
+ // Assign an arbitrary name to the top-level node
+ SectionTemplateNode *top_node = new SectionTemplateNode(
+ TemplateToken(TOKENTYPE_SECTION_START,
+ kMainSectionName, strlen(kMainSectionName), NULL),
+ false);
+ while (top_node->AddSubnode(this)) {
+ // Add the rest of the template in.
+ }
+
+ // get rid of the old tree, whenever we try to build a new one.
+ delete tree_;
+ delete[] template_text_;
+ tree_ = top_node;
+ template_text_ = input_buffer;
+ template_text_len_ = input_buffer_end - input_buffer;
+
+ // TS_ERROR can also be set by the auto-escape mode, at the point
+ // where the parser failed to parse.
+ if (state() != TS_ERROR) {
+ set_state(TS_READY);
+ return true;
+ } else {
+ delete tree_;
+ tree_ = NULL;
+ delete[] template_text_;
+ template_text_ = NULL;
+ template_text_len_ = 0;
+ return false;
+ }
+}
+
+void Template::WriteHeaderEntries(string *outstring) const {
+ if (state() == TS_READY) { // only write header entries for 'good' tpls
+ outstring->append("#include <ctemplate/template_string.h>\n");
+ tree_->WriteHeaderEntries(outstring, template_file());
+ }
+}
+
+// Dumps the parsed structure of the template for debugging assistance.
+// It goes to stdout instead of LOG to avoid possible truncation due to size.
+void Template::Dump(const char *filename) const {
+ string out;
+ DumpToString(filename, &out);
+ fwrite(out.data(), 1, out.length(), stdout);
+ fflush(stdout);
+}
+
+void Template::DumpToString(const char *filename, string *out) const {
+ if (!out)
+ return;
+ out->append("------------Start Template Dump [" + string(filename) +
+ "]--------------\n");
+ if (tree_) {
+ tree_->DumpToString(1, out);
+ } else {
+ out->append("No parse tree has been produced for this template\n");
+ }
+ out->append("------------End Template Dump----------------\n");
+}
+
+// -------------------------------------------------------------------------
+// Template::state()
+// Template::set_state()
+// Template::template_file()
+// Template::original_filename()
+// Template::strip()
+// Template::mtime()
+// Various introspection methods. state() is the parse-state
+// (success, error). template_file() is the resolved filename of a
+// given template object's input. original_filename() is the unresolved,
+// original filename, strip() is the Strip type. mtime() is
+// the lastmod time. For string-based templates, not backed by a file,
+// mtime() returns 0.
+// -------------------------------------------------------------------------
+
+void Template::set_state(TemplateState new_state) {
+ state_ = new_state;
+}
+
+TemplateState Template::state() const {
+ return state_;
+}
+
+const char *Template::template_file() const {
+ return resolved_filename_.c_str();
+}
+
+const char *Template::original_filename() const {
+ return original_filename_.c_str();
+}
+
+Strip Template::strip() const {
+ return strip_;
+}
+
+time_t Template::mtime() const {
+ return filename_mtime_;
+}
+
+// ----------------------------------------------------------------------
+// Template::GetTemplate()
+// Template::StringToTemplateCache()
+// Template::SetTemplateRootDirectory()
+// Template::AddAlternateTemplateRootDirectory()
+// Template::template_root_directory()
+// Template::FindTemplateFilename()
+// Template::RemoveStringFromTemplateCache()
+// Template::ClearCache()
+// Template::ReloadAllIfChanged()
+// These are deprecated static methods that have been moved to
+// template_cache.h. We just forward to them, using the global
+// default template cache.
+// ----------------------------------------------------------------------
+
+Template *Template::GetTemplate(const TemplateString& filename, Strip strip) {
+ // Until I've resolved the TODO that lets me return a const Template*
+ // here, I have to do an ugly cast. :-(
+ return const_cast<Template*>(
+ mutable_default_template_cache()->GetTemplate(filename, strip));
+}
+
+// This method is deprecated (and slow). Instead, use the above
+// StringToTemplateCache method that takes a Strip argument.
+bool Template::StringToTemplateCache(const TemplateString& key,
+ const TemplateString& content) {
+ // We say the insert succeeded only if it succeded for all strip values.
+ bool retval = true;
+ for (int i = 0; i < static_cast<int>(NUM_STRIPS); ++i) {
+ if (!GOOGLE_NAMESPACE::StringToTemplateCache(key, content, static_cast<Strip>(i)))
+ retval = false;
+ }
+ return retval;
+}
+
+// ----------------------------------------------------------------------
+// Template::ParseDelimiters()
+// Given an input that looks like =XXX YYY=, set the
+// MarkerDelimiters to point to XXX and YYY. This is used to parse
+// {{=XXX YYY=}} markers, which reset the marker delimiters.
+// Returns true if successfully parsed (starts and ends with =,
+// exactly one space, no internal ='s), false else.
+// ----------------------------------------------------------------------
+
+bool Template::ParseDelimiters(const char* text, size_t textlen,
+ MarkerDelimiters* delim) {
+ const char* space = (const char*)memchr(text, ' ', textlen);
+ if (textlen < 3 ||
+ text[0] != '=' || text[textlen - 1] != '=' || // no = at ends
+ memchr(text + 1, '=', textlen - 2) || // = in the middle
+ !space || // no interior space
+ memchr(space + 1, ' ', text + textlen - (space+1))) // too many spaces
+ return false;
+
+ delim->start_marker = text + 1;
+ delim->start_marker_len = space - delim->start_marker;
+ delim->end_marker = space + 1;
+ delim->end_marker_len = text + textlen - 1 - delim->end_marker;
+ return true;
+}
+
+// ----------------------------------------------------------------------
+// StripTemplateWhiteSpace()
+// Template::IsBlankOrOnlyHasOneRemovableMarker()
+// Template::InsertLine()
+// Template::StripBuffer()
+// This mini-parser modifies an input buffer, replacing it with a
+// new buffer that is the same as the old, but with whitespace
+// removed as is consistent with the given strip-mode:
+// STRIP_WHITESPACE, STRIP_BLANK_LINES, DO_NOT_STRIP (the last
+// of these is a no-op). This parser may work by allocating
+// a new buffer and deleting the input buffer when it's done).
+// The trickiest bit if in STRIP_BLANK_LINES mode, if we see
+// a line that consits entirely of one "removable" marker on it,
+// and nothing else other than whitespace. ("Removable" markers
+// are comments, start sections, end sections, pragmas and
+// template-include.) In such a case, we elide the newline at
+// the end of that line.
+// ----------------------------------------------------------------------
+
+// We define our own version rather than using the one in strutil, mostly
+// so we can take a size_t instead of an int. The code is simple enough.
+static void StripTemplateWhiteSpace(const char** str, size_t* len) {
+ // Strip off trailing whitespace.
+ while ((*len) > 0 && ascii_isspace((*str)[(*len)-1])) {
+ (*len)--;
+ }
+
+ // Strip off leading whitespace.
+ while ((*len) > 0 && ascii_isspace((*str)[0])) {
+ (*len)--;
+ (*str)++;
+ }
+}
+
+// Adjusts line and length iff condition is met, and RETURNS true.
+// MarkerDelimiters are {{ and }}, or equivalent.
+bool Template::IsBlankOrOnlyHasOneRemovableMarker(
+ const char** line, size_t* len, const Template::MarkerDelimiters& delim) {
+ const char *clean_line = *line;
+ size_t new_len = *len;
+ StripTemplateWhiteSpace(&clean_line, &new_len);
+
+ // If there was only white space on the line, new_len will now be zero.
+ // In that case the line should be removed, so return true.
+ if (new_len == 0) {
+ *line = clean_line;
+ *len = new_len;
+ return true;
+ }
+
+ // The smallest removable marker is at least start_marker_len +
+ // end_marker_len + 1 characters long. If there aren't enough
+ // characters, then keep the line by returning false.
+ if (new_len < delim.start_marker_len + delim.end_marker_len + 1) {
+ return false;
+ }
+
+ // Only {{#...}}, {{/....}, {{>...}, {{!...}, {{%...}} and {{=...=}}
+ // are "removable"
+ if (memcmp(clean_line, delim.start_marker, delim.start_marker_len) != 0 ||
+ !strchr("#/>!%=", clean_line[delim.start_marker_len])) {
+ return false;
+ }
+
+ const char *found_end_marker = memmatch(clean_line + delim.start_marker_len,
+ new_len - delim.start_marker_len,
+ delim.end_marker,
+ delim.end_marker_len);
+
+ // Make sure the end marker comes at the end of the line.
+ if (!found_end_marker ||
+ found_end_marker + delim.end_marker_len != clean_line + new_len) {
+ return false;
+ }
+
+ // else return the line stripped of its white space chars so when the
+ // marker is removed in expansion, no white space is left from the line
+ // that has now been removed
+ *line = clean_line;
+ *len = new_len;
+ return true;
+}
+
+size_t Template::InsertLine(const char *line, size_t len, Strip strip,
+ const MarkerDelimiters& delim, char* buffer) {
+ bool add_newline = (len > 0 && line[len-1] == '\n');
+ if (add_newline)
+ len--; // so we ignore the newline from now on
+
+ if (strip >= STRIP_WHITESPACE) {
+ StripTemplateWhiteSpace(&line, &len);
+ add_newline = false;
+
+ // IsBlankOrOnlyHasOneRemovableMarker may modify the two input
+ // parameters if the line contains only spaces or only one input
+ // marker. This modification must be done before the line is
+ // written to the input buffer. Hence the need for the boolean flag
+ // add_newline to be referenced after the Write statement.
+ } else if (strip >= STRIP_BLANK_LINES
+ && IsBlankOrOnlyHasOneRemovableMarker(&line, &len, delim)) {
+ add_newline = false;
+ }
+
+ memcpy(buffer, line, len);
+
+ if (add_newline) {
+ buffer[len++] = '\n';
+ }
+ return len;
+}
+
+void Template::StripBuffer(char **buffer, size_t* len) {
+ if (strip_ == DO_NOT_STRIP)
+ return;
+
+ char* bufend = *buffer + *len;
+ char* retval = new char[*len];
+ char* write_pos = retval;
+
+ MarkerDelimiters delim;
+
+ const char* next_pos = NULL;
+ for (const char* prev_pos = *buffer; prev_pos < bufend; prev_pos = next_pos) {
+ next_pos = (char*)memchr(prev_pos, '\n', bufend - prev_pos);
+ if (next_pos)
+ next_pos++; // include the newline
+ else
+ next_pos = bufend; // for the last line, when it has no newline
+
+ write_pos += InsertLine(prev_pos, next_pos - prev_pos, strip_, delim,
+ write_pos);
+ assert(write_pos >= retval &&
+ static_cast<size_t>(write_pos-retval) <= *len);
+
+ // Before looking at the next line, see if the current line
+ // changed the marker-delimiter. We care for
+ // IsBlankOrOnlyHasOneRemovableMarker, so we don't need to be
+ // perfect -- we don't have to handle the delimiter changing in
+ // the middle of a line -- just make sure that the next time
+ // there's only one marker on a line, we notice because we know
+ // the right delim.
+ const char* end_marker = NULL;
+ for (const char* marker = prev_pos; marker; marker = end_marker) {
+ marker = memmatch(marker, next_pos - marker,
+ delim.start_marker, delim.start_marker_len);
+ if (!marker) break;
+ end_marker = memmatch(marker + delim.start_marker_len,
+ next_pos - (marker + delim.start_marker_len),
+ delim.end_marker, delim.end_marker_len);
+ if (!end_marker) break;
+ end_marker += delim.end_marker_len; // needed for the for loop
+ // This tries to parse the marker as a set-delimiters marker.
+ // If it succeeds, it updates delim. If not, it ignores it.
+ assert(((end_marker - delim.end_marker_len)
+ - (marker + delim.start_marker_len)) >= 0);
+ Template::ParseDelimiters(marker + delim.start_marker_len,
+ ((end_marker - delim.end_marker_len)
+ - (marker + delim.start_marker_len)),
+ &delim);
+ }
+ }
+ assert(write_pos >= retval);
+
+ // Replace the input retval with our new retval.
+ delete[] *buffer;
+ *buffer = retval;
+ *len = static_cast<size_t>(write_pos - retval);
+}
+
+// ----------------------------------------------------------------------
+// Template::ReloadIfChanged()
+// Template::ReloadIfChangedLocked()
+// If one template, try immediately to reload it from disk. If all
+// templates, just set all their reload statuses to true, so next time
+// GetTemplate() is called on the template, it will be reloaded from disk if
+// the disk version is newer than the one currently in memory.
+// ReloadIfChanged() returns true if the file changed and disk *and* we
+// successfully reloaded and parsed it. It never returns true if
+// original_filename_ is "".
+// ----------------------------------------------------------------------
+
+// Besides being called when locked, it's also ok to call this from
+// the constructor, when you know nobody else will be messing with
+// this object.
+bool Template::ReloadIfChangedLocked()
+ EXCLUSIVE_LOCKS_REQUIRED(g_template_mutex) {
+ // TODO(panicker): Remove this duplicate code when constructing the template,
+ // after deprecating this method.
+ // TemplateCache::GetTemplate() already checks if the template filename is
+ // valid and resolvable. It also checks if the file needs to be reloaded
+ // based on mtime.
+
+ // NOTE(panicker): we should not be using original_filename_ to determine
+ // if a template is string-based, instead use the boolean 'string_based'
+ // in the template cache.
+ if (original_filename_.empty()) {
+ // string-based templates don't reload
+ return false;
+ }
+
+ FileStat statbuf;
+ if (resolved_filename_.empty()) {
+ if (!template_cache_->ResolveTemplateFilename(original_filename_,
+ &resolved_filename_,
+ &statbuf)) {
+ LOG(WARNING) << "Unable to locate file " << original_filename_ << endl;
+ set_state(TS_ERROR);
+ return false;
+ }
+ } else {
+ if (!File::Stat(resolved_filename_, &statbuf)) {
+ LOG(WARNING) << "Unable to stat file " << resolved_filename_ << endl;
+ // We keep the old tree if there is one, otherwise we're in error
+ set_state(TS_ERROR);
+ return false;
+ }
+ }
+
+ if (statbuf.IsDirectory()) {
+ LOG(WARNING) << resolved_filename_
+ << "is a directory and thus not readable" << endl;
+ // We keep the old tree if there is one, otherwise we're in error
+ set_state(TS_ERROR);
+ return false;
+ }
+ if (statbuf.mtime == filename_mtime_ && filename_mtime_ > 0
+ && tree_) { // force a reload if we don't already have a tree_
+ VLOG(1) << "Not reloading file " << resolved_filename_
+ << ": no new mod-time" << endl;
+ set_state(TS_READY);
+ return false; // file's timestamp hasn't changed, so no need to reload
+ }
+
+ File* fp = File::Open(resolved_filename_.c_str(), "r");
+ if (fp == NULL) {
+ LOG(ERROR) << "Can't find file " << resolved_filename_
+ << "; skipping" << endl;
+ // We keep the old tree if there is one, otherwise we're in error
+ set_state(TS_ERROR);
+ return false;
+ }
+ size_t buflen = statbuf.length;
+ char* file_buffer = new char[buflen];
+ if (fp->Read(file_buffer, buflen) != buflen) {
+ LOG(ERROR) << "Error reading file " << resolved_filename_
+ << ": " << strerror(errno) << endl;
+ fp->Close();
+ delete[] file_buffer;
+ // We could just keep the old tree, but probably safer to say 'error'
+ set_state(TS_ERROR);
+ return false;
+ }
+ fp->Close();
+
+ // Now that we know we've read the file ok, mark the new mtime
+ filename_mtime_ = statbuf.mtime;
+
+ // Parse the input one line at a time to get the "stripped" input.
+ StripBuffer(&file_buffer, &buflen);
+
+ // Re-initialize Auto-Escape data. Delete the parser and reset the template
+ // context back to TC_MANUAL. If the new content has the AUTOESCAPE pragma,
+ // the parser will then be re-created.
+ initial_context_ = TC_MANUAL;
+ delete htmlparser_;
+ htmlparser_ = NULL;
+
+ // Now parse the template we just read. BuildTree takes over ownership
+ // of input_buffer in every case, and will eventually delete it.
+ if ( BuildTree(file_buffer, file_buffer + buflen) ) {
+ assert(state() == TS_READY);
+ return true;
+ } else {
+ assert(state() != TS_READY);
+ return false;
+ }
+}
+
+// ----------------------------------------------------------------------
+// Template::ExpandLocked()
+// Template::ExpandWithDataAndCache()
+// This is the main function clients call: it expands a template
+// by expanding its parse tree (which starts with a top-level
+// section node). For each variable/section/include-template it
+// sees, it replaces the name stored in the parse-tree with the
+// appropriate value from the passed-in dictionary.
+// ----------------------------------------------------------------------
+
+bool Template::ExpandLocked(ExpandEmitter *expand_emitter,
+ const TemplateDictionaryInterface *dict,
+ PerExpandData *per_expand_data,
+ const TemplateCache *cache) const
+ SHARED_LOCKS_REQUIRED(g_template_mutex) {
+ // Accumulator for the results of Expand for each sub-tree.
+ bool error_free = true;
+
+ // TODO(csilvers): could make this static if it's expensive to construct.
+ PerExpandData empty_per_expand_data;
+ if (per_expand_data == NULL)
+ per_expand_data = &empty_per_expand_data;
+
+ if (state() != TS_READY) {
+ // We'd like to reload if reload status is true, but ExpandWD() is const
+ return false;
+ }
+
+ if (per_expand_data->annotate()) {
+ // Remove the machine dependent prefix from the template file name.
+ const char* file = template_file();
+ const char* short_file = strstr(file, per_expand_data->annotate_path());
+ if (short_file != NULL) {
+ file = short_file;
+ }
+ per_expand_data->annotator()->EmitOpenFile(expand_emitter,
+ string(file));
+ }
+
+ // If the client registered an expand-modifier, which is a modifier
+ // meant to modify all templates after they are expanded, apply it
+ // now.
+ const TemplateModifier* modifier =
+ per_expand_data->template_expansion_modifier();
+ if (modifier && modifier->MightModify(per_expand_data, template_file())) {
+ // We found a expand TemplateModifier. Apply it.
+ //
+ // Since the expand-modifier doesn't ever have an arg (it doesn't
+ // have a name and can't be applied in the text of a template), we
+ // pass the template name in as the string arg in this case.
+ string value;
+ StringEmitter tmp_emitter(&value);
+ error_free &= tree_->Expand(&tmp_emitter, dict, per_expand_data, cache);
+ modifier->Modify(value.data(), value.size(), per_expand_data,
+ expand_emitter, template_file());
+ } else {
+ // No need to modify this template.
+ error_free &= tree_->Expand(expand_emitter, dict, per_expand_data, cache);
+ }
+
+ if (per_expand_data->annotate()) {
+ per_expand_data->annotator()->EmitCloseFile(expand_emitter);
+ }
+
+ return error_free;
+}
+
+bool Template::ExpandWithDataAndCache(
+ ExpandEmitter *expand_emitter,
+ const TemplateDictionaryInterface *dict,
+ PerExpandData *per_expand_data,
+ const TemplateCache *cache) const LOCKS_EXCLUDED(g_template_mutex) {
+ // We hold g_template_mutex the entire time we expand, because
+ // ReloadIfChanged(), which also holds template_mutex, is allowed to
+ // delete tree_, and we want to make sure it doesn't do that (in another
+ // thread) while we're expanding. We also protect state_, etc.
+ // Note we only need a read-lock here, so many expands can go on at once.
+ // TODO(csilvers): We can remove this once we delete ReloadIfChanged.
+ // When we do that, ExpandLocked() can go away as well.
+ ReaderMutexLock ml(&g_template_mutex);
+ return ExpandLocked(expand_emitter, dict, per_expand_data, cache);
+}
+
+}
diff --git a/src/template_annotator.cc b/src/template_annotator.cc
new file mode 100644
index 0000000..57d5a2b
--- /dev/null
+++ b/src/template_annotator.cc
@@ -0,0 +1,108 @@
+// Copyright (c) 2009, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+
+#include <config.h>
+#include <ctemplate/template_annotator.h>
+#include <string>
+#include <ctemplate/template_emitter.h>
+
+// Emits an open annotation string. 'name' must be a string literal.
+#define EMIT_OPEN_ANNOTATION(emitter, name, value) \
+ (emitter)->Emit("{{#" name "=", 4 + sizeof(name)-1); \
+ (emitter)->Emit(value); \
+ (emitter)->Emit("}}", 2);
+
+// Emits a close annotation string. 'name' must be a string literal.
+#define EMIT_CLOSE_ANNOTATION(emitter, name) \
+ (emitter)->Emit("{{/" name "}}", 5 + sizeof(name)-1);
+
+#define EMIT_MISSING_ANNOTATION(emitter, name, value) \
+ (emitter)->Emit("{{" name "=", 3 + sizeof(name)-1); \
+ (emitter)->Emit(value); \
+ (emitter)->Emit("}}", 2);
+
+namespace ctemplate {
+
+using std::string;
+
+// Implementation note: TextTemplateAnnotator contains no state, and
+// code elsewhere is depending on this. E.g., a statically allocated
+// instance is used as the default annotator in the implementation of
+// PerExpandData. If you add state to this class, please revisit
+// the setup of such static instances.
+
+// This implements precisely the same annotation that was originally
+// built into the template.cc. Many upstream tools depend on the
+// exact formatting that this implementation happens to produce--
+// so do not consider changes to this lightly.
+
+void TextTemplateAnnotator::EmitOpenInclude(ExpandEmitter* emitter,
+ const string& value) {
+ EMIT_OPEN_ANNOTATION(emitter, "INC", value);
+}
+
+void TextTemplateAnnotator::EmitCloseInclude(ExpandEmitter* emitter) {
+ EMIT_CLOSE_ANNOTATION(emitter, "INC");
+}
+
+void TextTemplateAnnotator::EmitOpenFile(ExpandEmitter* emitter,
+ const string& value) {
+ EMIT_OPEN_ANNOTATION(emitter, "FILE", value);
+}
+
+void TextTemplateAnnotator::EmitCloseFile(ExpandEmitter* emitter) {
+ EMIT_CLOSE_ANNOTATION(emitter, "FILE");
+}
+
+void TextTemplateAnnotator::EmitOpenSection(ExpandEmitter* emitter,
+ const string& value) {
+ EMIT_OPEN_ANNOTATION(emitter, "SEC", value);
+}
+
+void TextTemplateAnnotator::EmitCloseSection(ExpandEmitter* emitter) {
+ EMIT_CLOSE_ANNOTATION(emitter, "SEC");
+}
+
+void TextTemplateAnnotator::EmitOpenVariable(ExpandEmitter* emitter,
+ const string& value) {
+ EMIT_OPEN_ANNOTATION(emitter, "VAR", value);
+}
+
+void TextTemplateAnnotator::EmitCloseVariable(ExpandEmitter* emitter) {
+ EMIT_CLOSE_ANNOTATION(emitter, "VAR");
+}
+
+void TextTemplateAnnotator::EmitFileIsMissing(ExpandEmitter* emitter,
+ const string& value) {
+ EMIT_MISSING_ANNOTATION(emitter,"MISSING_FILE", value);
+}
+
+}
diff --git a/src/template_cache.cc b/src/template_cache.cc
new file mode 100644
index 0000000..819a0b6
--- /dev/null
+++ b/src/template_cache.cc
@@ -0,0 +1,772 @@
+// Copyright (c) 2009, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+
+#include <config.h>
+#include "base/mutex.h" // This must go first so we get _XOPEN_SOURCE
+#include <ctemplate/template_cache.h>
+#include <assert.h> // for assert()
+#include <errno.h>
+#include <stddef.h> // for size_t
+#include <stdlib.h> // for strerror()
+#include <sys/stat.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif // for getcwd()
+#include HASH_MAP_H // for hash_map<>::iterator, hash_map<>, etc
+#include <utility> // for pair<>, make_pair()
+#include <vector> // for vector<>::size_type, vector<>, etc
+#include "base/thread_annotations.h" // for GUARDED_BY
+#include <ctemplate/find_ptr.h>
+#include <ctemplate/template.h> // for Template, TemplateState
+#include <ctemplate/template_enums.h> // for Strip, DO_NOT_STRIP
+#include <ctemplate/template_pathops.h> // for PathJoin(), IsAbspath(), etc
+#include <ctemplate/template_string.h> // for StringHash
+#include "base/fileutil.h"
+#include <iostream> // for cerr
+
+#ifndef PATH_MAX
+#ifdef MAXPATHLEN
+#define PATH_MAX MAXPATHLEN
+#else
+#define PATH_MAX 4096 // seems conservative for max filename len!
+#endif
+#endif
+
+using std::endl;
+using std::string;
+using std::vector;
+using std::pair;
+using std::make_pair;
+#ifdef HAVE_UNORDERED_MAP
+using HASH_NAMESPACE::unordered_map;
+// This is totally cheap, but minimizes the need for #ifdef's below...
+#define hash_map unordered_map
+#else
+using HASH_NAMESPACE::hash_map;
+#endif
+
+static int kVerbosity = 0; // you can change this by hand to get vlogs
+#define LOG(level) std::cerr << #level ": "
+#define PLOG(level) std::cerr << #level ": [" << strerror(errno) << "] "
+#define VLOG(level) if (kVerbosity >= level) std::cerr << "V" #level ": "
+
+namespace ctemplate {
+
+// ----------------------------------------------------------------------
+// TemplateCache::RefcountedTemplate
+// A simple refcounting class to keep track of templates, which
+// might be shared between caches. It also owns the pointer to
+// the template itself.
+// ----------------------------------------------------------------------
+
+class TemplateCache::RefcountedTemplate {
+ public:
+ explicit RefcountedTemplate(const Template* ptr) : ptr_(ptr), refcount_(1) { }
+ void IncRef() {
+ MutexLock ml(&mutex_);
+ assert(refcount_ > 0);
+ ++refcount_;
+ }
+ void DecRefN(int n) {
+ bool refcount_is_zero;
+ {
+ MutexLock ml(&mutex_);
+ assert(refcount_ >= n);
+ refcount_ -= n;
+ refcount_is_zero = (refcount_ == 0);
+ }
+ // We can't delete this within the MutexLock, because when the
+ // MutexLock tries to unlock Mutex at function-exit, the mutex
+ // will have been deleted! This is just as safe as doing the
+ // delete within the lock -- in either case, if anyone tried to do
+ // anything to this class after the refcount got to 0, bad things
+ // would happen.
+ if (refcount_is_zero)
+ delete this;
+ }
+ void DecRef() {
+ DecRefN(1);
+ }
+ int refcount() const {
+ MutexLock ml(&mutex_); // could be ReaderMutexLock, but whatever
+ return refcount_;
+ }
+ const Template* tpl() const { return ptr_; }
+
+ private:
+ ~RefcountedTemplate() { delete ptr_; }
+ const Template* const ptr_;
+ int refcount_ GUARDED_BY(mutex_);
+ mutable Mutex mutex_;
+};
+
+// ----------------------------------------------------------------------
+// TemplateCache::RefTplPtrHash
+// TemplateCache::TemplateCacheHash
+// TemplateCache::CachedTemplate
+// These are used for the cache-map. CachedTemplate is what is
+// actually stored in the map: the Template* and some information
+// about it (whether we need to reload it, etc.). Refcount is
+// a simple refcounting class, used to keep track of templates.
+// ----------------------------------------------------------------------
+
+// This is needed just because many STLs (eg FreeBSD's) are unable to
+// hash pointers by default.
+class TemplateCache::RefTplPtrHash {
+ public:
+ size_t operator()(const RefcountedTemplate* p) const {
+ return reinterpret_cast<size_t>(p);
+ }
+ // Less operator for MSVC's hash containers.
+ bool operator()(const RefcountedTemplate* a,
+ const RefcountedTemplate* b) const {
+ return a < b;
+ }
+ // These two public members are required by msvc. 4 and 8 are defaults.
+ static const size_t bucket_size = 4;
+ static const size_t min_buckets = 8;
+};
+
+class TemplateCache::TemplateCacheHash {
+ public:
+ size_t operator()(const TemplateCacheKey& p) const {
+ // Using + here is silly, but should work ok in practice.
+ return p.first + p.second;
+}
+ // Less operator for MSVC's hash containers.
+ bool operator()(const TemplateCacheKey& a,
+ const TemplateCacheKey& b) const {
+ return (a.first == b.first
+ ? a.second < b.second
+ : a.first < b.first);
+ }
+ // These two public members are required by msvc. 4 and 8 are defaults.
+ static const size_t bucket_size = 4;
+ static const size_t min_buckets = 8;
+};
+
+struct TemplateCache::CachedTemplate {
+ enum TemplateType { UNUSED, FILE_BASED, STRING_BASED };
+ CachedTemplate()
+ : refcounted_tpl(NULL),
+ should_reload(false),
+ template_type(UNUSED) {
+ }
+ CachedTemplate(const Template* tpl_ptr, TemplateType type)
+ : refcounted_tpl(new TemplateCache::RefcountedTemplate(tpl_ptr)),
+ should_reload(false),
+ template_type(type) {
+ }
+
+ // we won't remove the template from the cache until refcount drops to 0
+ TemplateCache::RefcountedTemplate* refcounted_tpl; // shared across Clone()
+ // reload status
+ bool should_reload;
+ // indicates if the template is string-based or file-based
+ TemplateType template_type;
+};
+
+
+// ----------------------------------------------------------------------
+// TemplateCache::TemplateCache()
+// TemplateCache::~TemplateCache()
+// ----------------------------------------------------------------------
+
+TemplateCache::TemplateCache()
+ : parsed_template_cache_(new TemplateMap),
+ is_frozen_(false),
+ search_path_(),
+ get_template_calls_(new TemplateCallMap),
+ mutex_(new Mutex),
+ search_path_mutex_(new Mutex) {
+}
+
+TemplateCache::~TemplateCache() {
+ ClearCache();
+ delete parsed_template_cache_;
+ delete get_template_calls_;
+ delete mutex_;
+ delete search_path_mutex_;
+}
+
+
+// ----------------------------------------------------------------------
+// HasTemplateChangedOnDisk
+// Indicates whether the template has changed, based on the
+// backing file's last modtime.
+// ----------------------------------------------------------------------
+
+bool HasTemplateChangedOnDisk(const char* resolved_filename,
+ time_t mtime,
+ FileStat* statbuf) {
+ if (!File::Stat(resolved_filename, statbuf)) {
+ LOG(WARNING) << "Unable to stat file " << resolved_filename << endl;
+ // If we can't Stat the file then the file may have been deleted,
+ // so reload the template.
+ return true;
+ }
+ if (statbuf->mtime == mtime && mtime > 0) {
+ // No need to reload yet.
+ return false;
+ }
+ return true;
+}
+
+
+// ----------------------------------------------------------------------
+// TemplateCache::LoadTemplate()
+// TemplateCache::GetTemplate()
+// TemplateCache::GetTemplateLocked()
+// TemplateCache::StringToTemplateCache()
+// The routines for adding a template to the cache. LoadTemplate
+// loads the template into the cache and returns true if the
+// template was successfully loaded or if it already exists in the
+// cache. GetTemplate loads the template into the cache from disk
+// and returns the parsed template. StringToTemplateCache parses
+// and loads the template from the given string into the parsed
+// cache, or returns false if an older version already exists in
+// the cache.
+// ----------------------------------------------------------------------
+
+bool TemplateCache::LoadTemplate(const TemplateString& filename, Strip strip) {
+ TemplateCacheKey cache_key = TemplateCacheKey(filename.GetGlobalId(), strip);
+ WriterMutexLock ml(mutex_);
+ return GetTemplateLocked(filename, strip, cache_key) != NULL;
+}
+
+const Template *TemplateCache::GetTemplate(const TemplateString& filename,
+ Strip strip) {
+ // No need to have the cache-mutex acquired for this step
+ TemplateCacheKey cache_key = TemplateCacheKey(filename.GetGlobalId(), strip);
+ CachedTemplate retval;
+ WriterMutexLock ml(mutex_);
+ RefcountedTemplate* refcounted_tpl =
+ GetTemplateLocked(filename, strip, cache_key);
+ if (!refcounted_tpl)
+ return NULL;
+
+ refcounted_tpl->IncRef(); // DecRef() is in DoneWithGetTemplatePtrs()
+ (*get_template_calls_)[refcounted_tpl]++; // set up for DoneWith...()
+ return refcounted_tpl->tpl();
+}
+
+TemplateCache::RefcountedTemplate* TemplateCache::GetTemplateLocked(
+ const TemplateString& filename,
+ Strip strip,
+ const TemplateCacheKey& template_cache_key) {
+ // NOTE: A write-lock must be held on mutex_ when this method is called.
+ CachedTemplate* it = find_ptr(*parsed_template_cache_, template_cache_key);
+ if (!it) {
+ // If the cache is frozen and the template doesn't already exist in cache,
+ // do not load the template, return NULL.
+ if (is_frozen_) {
+ return NULL;
+ }
+ // TODO(panicker): Validate the filename here, and if the file can't be
+ // resolved then insert a NULL in the cache.
+ // If validation succeeds then pass in resolved filename, mtime &
+ // file length (from statbuf) to the constructor.
+ const Template* tpl = new Template(filename, strip, this);
+ it = &(*parsed_template_cache_)[template_cache_key];
+ *it = CachedTemplate(tpl, CachedTemplate::FILE_BASED);
+ assert(it);
+ }
+ if (it->should_reload) {
+ // check if the template has changed on disk or if a new template with the
+ // same name has been added earlier in the search path:
+ const string resolved = FindTemplateFilename(
+ it->refcounted_tpl->tpl()->original_filename());
+ FileStat statbuf;
+ if (it->template_type == CachedTemplate::FILE_BASED &&
+ (resolved != it->refcounted_tpl->tpl()->template_file() ||
+ HasTemplateChangedOnDisk(
+ it->refcounted_tpl->tpl()->template_file(),
+ it->refcounted_tpl->tpl()->mtime(),
+ &statbuf))) {
+ // Create a new template, insert it into the cache under
+ // template_cache_key, and DecRef() the old one to indicate
+ // the cache no longer has a reference to it.
+ const Template* tpl = new Template(filename, strip, this);
+ // DecRef after creating the new template since DecRef may free up
+ // the storage for filename,
+ it->refcounted_tpl->DecRef();
+ *it = CachedTemplate(tpl, CachedTemplate::FILE_BASED);
+ }
+ it->should_reload = false;
+ }
+
+ // If the state is TS_ERROR, we leave the state as is, but return
+ // NULL. We won't try to load the template file again until the
+ // reload status is set to true by another call to ReloadAllIfChanged.
+ return it->refcounted_tpl->tpl()->state() == TS_READY ? it->refcounted_tpl : NULL;
+}
+
+bool TemplateCache::StringToTemplateCache(const TemplateString& key,
+ const TemplateString& content,
+ Strip strip) {
+ TemplateCacheKey template_cache_key = TemplateCacheKey(key.GetGlobalId(), strip);
+ {
+ ReaderMutexLock ml(mutex_);
+ if (is_frozen_) {
+ return false;
+ }
+ // If the key is already in the parsed-cache, we just return false.
+ CachedTemplate* it = find_ptr(*parsed_template_cache_, template_cache_key);
+ if (it && it->refcounted_tpl->tpl()->state() != TS_ERROR) {
+ return false;
+ }
+ }
+ Template* tpl = Template::StringToTemplate(content, strip);
+ if (tpl == NULL) {
+ return false;
+ }
+ if (tpl->state() != TS_READY) {
+ delete tpl;
+ return false;
+ }
+
+ WriterMutexLock ml(mutex_);
+ // Double-check it wasn't just inserted.
+ CachedTemplate* it = find_ptr(*parsed_template_cache_, template_cache_key);
+ if (it) {
+ if (it->refcounted_tpl->tpl()->state() == TS_ERROR) {
+ // replace the old entry with the new one
+ it->refcounted_tpl->DecRef();
+ } else {
+ delete tpl;
+ return false;
+ }
+ }
+ // Insert into cache.
+ (*parsed_template_cache_)[template_cache_key] =
+ CachedTemplate(tpl, CachedTemplate::STRING_BASED);
+ return true;
+}
+
+// ----------------------------------------------------------------------
+// TemplateCache::ExpandWithData()
+// TemplateCache::ExpandFrozen()
+// TemplateCache::ExpandLocked()
+// ExpandWithData gets the template from the parsed-cache, possibly
+// loading the template on-demand, and then expands the template.
+// ExpandFrozen is for frozen caches only -- if the filename isn't
+// in the cache, the routine fails (returns false) rather than trying
+// to fetch the template. ExpandLocked is used for recursive
+// sub-template includes, and just tells template.cc it doesn't
+// need to recursively acquire any locks.
+// ----------------------------------------------------------------------
+
+bool TemplateCache::ExpandWithData(const TemplateString& filename,
+ Strip strip,
+ const TemplateDictionaryInterface *dict,
+ PerExpandData *per_expand_data,
+ ExpandEmitter *expand_emitter) {
+ TemplateCacheKey template_cache_key(filename.GetGlobalId(), strip);
+ // We make a local copy of this struct so we don't have to worry about
+ // what happens to our cache while we don't hold the lock (during Expand).
+ RefcountedTemplate* refcounted_tpl = NULL;
+ {
+ WriterMutexLock ml(mutex_);
+ // Optionally load the template (depending on whether the cache is frozen,
+ // the reload bit is set etc.)
+ refcounted_tpl = GetTemplateLocked(filename, strip, template_cache_key);
+ if (!refcounted_tpl)
+ return false;
+ refcounted_tpl->IncRef();
+ }
+ const bool result = refcounted_tpl->tpl()->ExpandWithDataAndCache(
+ expand_emitter, dict, per_expand_data, this);
+ {
+ WriterMutexLock ml(mutex_);
+ refcounted_tpl->DecRef();
+ }
+ return result;
+}
+
+bool TemplateCache::ExpandNoLoad(
+ const TemplateString& filename,
+ Strip strip,
+ const TemplateDictionaryInterface *dict,
+ PerExpandData *per_expand_data,
+ ExpandEmitter *expand_emitter) const {
+ TemplateCacheKey template_cache_key(filename.GetGlobalId(), strip);
+ CachedTemplate cached_tpl;
+ {
+ ReaderMutexLock ml(mutex_);
+ if (!is_frozen_) {
+ LOG(DFATAL) << ": ExpandNoLoad() only works on frozen caches.";
+ return false;
+ }
+ CachedTemplate* it = find_ptr(*parsed_template_cache_, template_cache_key);
+ if (!it) {
+ return false;
+ }
+ cached_tpl = *it;
+ cached_tpl.refcounted_tpl->IncRef();
+ }
+ const bool result = cached_tpl.refcounted_tpl->tpl()->ExpandWithDataAndCache(
+ expand_emitter, dict, per_expand_data, this);
+ {
+ WriterMutexLock ml(mutex_);
+ cached_tpl.refcounted_tpl->DecRef();
+ }
+ return result;
+}
+
+// Note: "Locked" in this name refers to the template object, not to
+// use; we still need to acquire our locks as per normal.
+bool TemplateCache::ExpandLocked(const TemplateString& filename,
+ Strip strip,
+ ExpandEmitter *expand_emitter,
+ const TemplateDictionaryInterface *dict,
+ PerExpandData *per_expand_data) {
+ TemplateCacheKey template_cache_key(filename.GetGlobalId(), strip);
+ RefcountedTemplate* refcounted_tpl = NULL;
+ {
+ WriterMutexLock ml(mutex_);
+ refcounted_tpl = GetTemplateLocked(filename, strip, template_cache_key);
+ if (!refcounted_tpl)
+ return false;
+ refcounted_tpl->IncRef();
+ }
+ const bool result = refcounted_tpl->tpl()->ExpandLocked(
+ expand_emitter, dict, per_expand_data, this);
+ {
+ WriterMutexLock ml(mutex_);
+ refcounted_tpl->DecRef();
+ }
+ return result;
+}
+
+// ----------------------------------------------------------------------
+// TemplateCache::SetTemplateRootDirectory()
+// TemplateCache::AddAlternateTemplateRootDirectory()
+// TemplateCache::template_root_directory()
+// TemplateCache::FindTemplateFilename()
+// The template-root-directory is where we look for template
+// files (in GetTemplate and include templates) when they're
+// given with a relative rather than absolute name. You can
+// set a 'main' root directory (where we look first), as well
+// as alternates.
+// ----------------------------------------------------------------------
+
+bool TemplateCache::AddAlternateTemplateRootDirectoryHelper(
+ const string& directory,
+ bool clear_template_search_path) {
+ {
+ ReaderMutexLock ml(mutex_);
+ if (is_frozen_) { // Cannot set root-directory on a frozen cache.
+ return false;
+ }
+ }
+ string normalized = directory;
+ // make sure it ends with '/'
+ NormalizeDirectory(&normalized);
+ // Make the directory absolute if it isn't already. This makes code
+ // safer if client later does a chdir.
+ if (!IsAbspath(normalized)) {
+ char* cwdbuf = new char[PATH_MAX]; // new to avoid stack overflow
+ const char* cwd = getcwd(cwdbuf, PATH_MAX);
+ if (!cwd) { // probably not possible, but best to be defensive
+ PLOG(WARNING) << "Unable to convert '" << normalized
+ << "' to an absolute path, with cwd=" << cwdbuf;
+ } else {
+ normalized = PathJoin(cwd, normalized);
+ }
+ delete[] cwdbuf;
+ }
+
+ VLOG(2) << "Setting Template directory to " << normalized << endl;
+ {
+ WriterMutexLock ml(search_path_mutex_);
+ if (clear_template_search_path) {
+ search_path_.clear();
+ }
+ search_path_.push_back(normalized);
+ }
+
+ // NOTE(williasr): The template root is not part of the template
+ // cache key, so we need to invalidate the cache contents.
+ ReloadAllIfChanged(LAZY_RELOAD);
+ return true;
+}
+
+bool TemplateCache::SetTemplateRootDirectory(const string& directory) {
+ return AddAlternateTemplateRootDirectoryHelper(directory, true);
+}
+
+bool TemplateCache::AddAlternateTemplateRootDirectory(
+ const string& directory) {
+ return AddAlternateTemplateRootDirectoryHelper(directory, false);
+}
+
+string TemplateCache::template_root_directory() const {
+ ReaderMutexLock ml(search_path_mutex_);
+ if (search_path_.empty()) {
+ return kCWD;
+ }
+ return search_path_[0];
+}
+
+// Given an unresolved filename, look through the template search path
+// to see if the template can be found. If so, resolved contains the
+// resolved filename, statbuf contains the stat structure for the file
+// (to avoid double-statting the file), and the function returns
+// true. Otherwise, the function returns false.
+bool TemplateCache::ResolveTemplateFilename(const string& unresolved,
+ string* resolved,
+ FileStat* statbuf) const {
+ ReaderMutexLock ml(search_path_mutex_);
+ if (search_path_.empty() || IsAbspath(unresolved)) {
+ *resolved = unresolved;
+ if (File::Stat(*resolved, statbuf)) {
+ VLOG(1) << "Resolved " << unresolved << " to " << *resolved << endl;
+ return true;
+ }
+ } else {
+ for (TemplateSearchPath::const_iterator path = search_path_.begin();
+ path != search_path_.end();
+ ++path) {
+ *resolved = PathJoin(*path, unresolved);
+ if (File::Stat(*resolved, statbuf)) {
+ VLOG(1) << "Resolved " << unresolved << " to " << *resolved << endl;
+ return true;
+ }
+ }
+ }
+
+ resolved->clear();
+ return false;
+}
+
+string TemplateCache::FindTemplateFilename(const string& unresolved)
+ const {
+ string resolved;
+ FileStat statbuf;
+ if (!ResolveTemplateFilename(unresolved, &resolved, &statbuf))
+ resolved.clear();
+ return resolved;
+}
+
+
+// ----------------------------------------------------------------------
+// TemplateCache::Delete()
+// TemplateCache::ClearCache()
+// Delete deletes one entry from the cache.
+// ----------------------------------------------------------------------
+
+bool TemplateCache::Delete(const TemplateString& key) {
+ WriterMutexLock ml(mutex_);
+ if (is_frozen_) { // Cannot delete from a frozen cache.
+ return false;
+ }
+ vector<TemplateCacheKey> to_erase;
+ const TemplateId key_id = key.GetGlobalId();
+ for (TemplateMap::iterator it = parsed_template_cache_->begin();
+ it != parsed_template_cache_->end(); ++it) {
+ if (it->first.first == key_id) {
+ // We'll delete the content pointed to by the entry here, since
+ // it's handy, but we won't delete the entry itself quite yet.
+ it->second.refcounted_tpl->DecRef();
+ to_erase.push_back(it->first);
+ }
+ }
+ for (vector<TemplateCacheKey>::iterator it = to_erase.begin();
+ it != to_erase.end(); ++it) {
+ parsed_template_cache_->erase(*it);
+ }
+ return !to_erase.empty();
+}
+
+void TemplateCache::ClearCache() {
+ // NOTE: We allow a frozen cache to be cleared with this method, although
+ // no other changes can be made to the cache.
+ // We clear the cache by swapping it with an empty cache. This lets
+ // us delete the items in the cache at our leisure without needing
+ // to hold mutex_.
+ TemplateMap tmp_cache;
+ {
+ WriterMutexLock ml(mutex_);
+ parsed_template_cache_->swap(tmp_cache);
+ is_frozen_ = false;
+ }
+ for (TemplateMap::iterator it = tmp_cache.begin();
+ it != tmp_cache.end();
+ ++it) {
+ it->second.refcounted_tpl->DecRef();
+ }
+
+ // Do a decref for all templates ever returned by GetTemplate().
+ DoneWithGetTemplatePtrs();
+}
+
+// ----------------------------------------------------------------------
+// TemplateCache::DoneWithGetTemplatePtrs()
+// DoneWithGetTemplatePtrs() DecRefs every template in the
+// get_template_calls_ list. This is because the user of
+// GetTemplate() didn't have a pointer to the refcounted Template
+// to do this themselves. Note we only provide this as a batch
+// operation, so the user should be careful to only call this when
+// they are no longer using *any* template ever retrieved by
+// this cache's GetTemplate().
+// ----------------------------------------------------------------------
+
+void TemplateCache::DoneWithGetTemplatePtrs() {
+ WriterMutexLock ml(mutex_);
+ for (TemplateCallMap::iterator it = get_template_calls_->begin();
+ it != get_template_calls_->end(); ++it) {
+ it->first->DecRefN(it->second); // it.second: # of times GetTpl was called
+ }
+ get_template_calls_->clear();
+}
+
+// ----------------------------------------------------------------------
+// TemplateCache::ReloadAllIfChanged()
+// IMMEDIATE_RELOAD attempts to immediately reload and parse
+// all templates if the corresponding template files have changed.
+// LAZY_RELOAD just sets the reload bit in the cache so that the next
+// GetTemplate will reload and parse the template, if it changed.
+
+// NOTE: Suppose the search path is "dira:dirb", and a template is
+// created with name "foo", which resolves to "dirb/foo" because
+// dira/foo does not exist. Then suppose dira/foo is created and then
+// ReloadAllIfChanged() is called. Then ReloadAllIfChanged() will replace
+// the contents of the template with dira/foo, *not* dirb/foo, even if
+// dirb/foo hasn't changed.
+// ----------------------------------------------------------------------
+
+void TemplateCache::ReloadAllIfChanged(ReloadType reload_type) {
+ WriterMutexLock ml(mutex_);
+ if (is_frozen_) { // do not reload a frozen cache.
+ return;
+ }
+ for (TemplateMap::iterator it = parsed_template_cache_->begin();
+ it != parsed_template_cache_->end();
+ ++it) {
+ it->second.should_reload = true;
+ if (reload_type == IMMEDIATE_RELOAD) {
+ const Template* tpl = it->second.refcounted_tpl->tpl();
+ // Reload should always use the original filename.
+ // For instance on reload, we may replace an existing template with a
+ // new one that came earlier on the search path.
+ GetTemplateLocked(tpl->original_filename(), tpl->strip(), it->first);
+ }
+ }
+}
+
+// ----------------------------------------------------------------------
+// TemplateCache::Freeze()
+// This method marks the cache as 'frozen'. After this method is called,
+// the cache is immutable, and cannot be modified. New templates cannot be
+// loaded and existing templates cannot be reloaded.
+// ----------------------------------------------------------------------
+
+void TemplateCache::Freeze() {
+ {
+ ReaderMutexLock ml(mutex_);
+ if (is_frozen_) { // if already frozen, then this is a no-op.
+ return;
+ }
+ }
+ // A final reload before freezing the cache.
+ ReloadAllIfChanged(IMMEDIATE_RELOAD);
+ {
+ WriterMutexLock ml(mutex_);
+ is_frozen_ = true;
+ }
+}
+
+// ----------------------------------------------------------------------
+// TemplateCache::Clone()
+// Clone makes a shallow copy of the parsed cache by incrementing
+// templates' refcount.
+// The caller is responsible for deallocating the returned TemplateCache.
+// ----------------------------------------------------------------------
+
+TemplateCache* TemplateCache::Clone() const {
+ ReaderMutexLock ml(mutex_);
+ TemplateCache* new_cache = new TemplateCache();
+ *(new_cache->parsed_template_cache_) = *parsed_template_cache_;
+ for (TemplateMap::iterator it = parsed_template_cache_->begin();
+ it != parsed_template_cache_->end(); ++it) {
+ it->second.refcounted_tpl->IncRef();
+ }
+
+ return new_cache;
+}
+
+// ----------------------------------------------------------------------
+// TemplateCache::Refcount()
+// This routine is DEBUG-only. It returns the refcount of a template,
+// given the TemplateCacheKey.
+// ----------------------------------------------------------------------
+
+int TemplateCache::Refcount(const TemplateCacheKey template_cache_key) const {
+ ReaderMutexLock ml(mutex_);
+ CachedTemplate* it = find_ptr(*parsed_template_cache_, template_cache_key);
+ return it ? it->refcounted_tpl->refcount() : 0;
+}
+
+// ----------------------------------------------------------------------
+// TemplateCache::TemplateIsCached()
+// This routine is for testing only -- is says whether a given
+// template is already in the cache or not.
+// ----------------------------------------------------------------------
+
+bool TemplateCache::TemplateIsCached(const TemplateCacheKey template_cache_key)
+ const {
+ ReaderMutexLock ml(mutex_);
+ return parsed_template_cache_->count(template_cache_key);
+}
+
+// ----------------------------------------------------------------------
+// TemplateCache::ValidTemplateFilename
+// Validates the filename before constructing the template.
+// ----------------------------------------------------------------------
+
+bool TemplateCache::IsValidTemplateFilename(const string& filename,
+ string* resolved_filename,
+ FileStat* statbuf) const {
+ if (!ResolveTemplateFilename(filename,
+ resolved_filename,
+ statbuf)) {
+ LOG(WARNING) << "Unable to locate file " << filename << endl;
+ return false;
+ }
+ if (statbuf->IsDirectory()) {
+ LOG(WARNING) << *resolved_filename
+ << "is a directory and thus not readable" << endl;
+ return false;
+ }
+ return true;
+}
+
+}
diff --git a/src/template_dictionary.cc b/src/template_dictionary.cc
new file mode 100644
index 0000000..14e45a3
--- /dev/null
+++ b/src/template_dictionary.cc
@@ -0,0 +1,1032 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// Based on the 'old' TemplateDictionary by Frank Jernigan.
+
+#include <config.h>
+#include "base/mutex.h" // This must go first so we get _XOPEN_SOURCE
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <algorithm> // for sort()
+#include HASH_MAP_H
+#include <map>
+#include <string>
+#include <utility> // for pair<>
+#include <vector>
+
+#include "base/arena-inl.h"
+#include "base/thread_annotations.h"
+#include "indented_writer.h"
+#include <ctemplate/find_ptr.h>
+#include <ctemplate/template_dictionary.h>
+#include <ctemplate/template_modifiers.h>
+#include "base/small_map.h"
+#include "base/util.h" // for DCHECK
+
+using std::vector;
+using std::string;
+using std::map;
+using std::pair;
+using std::make_pair;
+
+namespace ctemplate {
+
+// Guards the initialization of the global dictionary.
+static GoogleOnceType g_once = GOOGLE_ONCE_INIT;
+// Guard access to the global dictionary.
+static Mutex g_static_mutex(base::LINKER_INITIALIZED);
+
+/*static*/ UnsafeArena* const TemplateDictionary::NO_ARENA = NULL;
+/*static*/ TemplateDictionary::GlobalDict* TemplateDictionary::global_dict_
+GUARDED_BY(g_static_mutex) PT_GUARDED_BY(g_static_mutex) = NULL;
+/*static*/ TemplateString* TemplateDictionary::empty_string_ = NULL;
+
+
+static const char* const kAnnotateOutput = "__ctemplate_annotate_output__";
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::map_arena_init
+// This class is what small_map<> uses to create a new
+// arena-allocated map<> when it decides it needs to do that.
+// ----------------------------------------------------------------------
+
+class TemplateDictionary::map_arena_init {
+ public:
+ map_arena_init(UnsafeArena* arena) : arena_(arena) { }
+ template<typename T> void operator ()(ManualConstructor<T>* map) const {
+ map->Init(typename T::key_compare(), arena_);
+ }
+ private:
+ UnsafeArena* arena_;
+};
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::LazilyCreateDict()
+// TemplateDictionary::LazilyCreateTemplateGlobalDict()
+// TemplateDictionary::CreateDictVector()
+// TemplateDictionary::CreateTemplateSubdict()
+// These routines allocate the objects that TemplateDictionary
+// allocates (sub-dictionaries, variable maps, etc). Each
+// allocates memory on the arena, and instructs the STL objects
+// to use the arena for their own internal allocations as well.
+// ----------------------------------------------------------------------
+
+template<typename T>
+inline void TemplateDictionary::LazilyCreateDict(T** dict) {
+ if (*dict != NULL)
+ return;
+ // Placement new: construct the map in the memory used by *dict.
+ void* buffer = arena_->AllocAligned(sizeof(**dict),
+ BaseArena::kDefaultAlignment);
+ new (buffer) T(arena_);
+ *dict = reinterpret_cast<T*>(buffer);
+}
+
+inline void TemplateDictionary::LazyCreateTemplateGlobalDict() {
+ if (!template_global_dict_owner_->template_global_dict_) {
+ template_global_dict_owner_->template_global_dict_ =
+ CreateTemplateSubdict("Template Globals", arena_,
+ template_global_dict_owner_,
+ template_global_dict_owner_);
+ }
+}
+
+inline TemplateDictionary::DictVector* TemplateDictionary::CreateDictVector() {
+ void* buffer = arena_->AllocAligned(sizeof(DictVector),
+ BaseArena::kDefaultAlignment);
+ // Placement new: construct the vector in the memory used by buffer.
+ new (buffer) DictVector(arena_);
+ return reinterpret_cast<DictVector*>(buffer);
+}
+
+inline TemplateDictionary* TemplateDictionary::CreateTemplateSubdict(
+ const TemplateString& name,
+ UnsafeArena* arena,
+ TemplateDictionary* parent_dict,
+ TemplateDictionary* template_global_dict_owner) {
+ void* buffer = arena->AllocAligned(sizeof(TemplateDictionary),
+ BaseArena::kDefaultAlignment);
+ // Placement new: construct the sub-tpl in the memory used by tplbuf.
+ new (buffer) TemplateDictionary(name, arena, parent_dict,
+ template_global_dict_owner);
+ return reinterpret_cast<TemplateDictionary*>(buffer);
+}
+
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::HashInsert()
+// A convenience function that's equivalent to m[key] = value, but
+// converting the key to an id first, and without necessarily needing
+// key to have a default constructor like operator[] does. It also
+// inserts (key, id(key)) into a map to allow for id->key mapping.
+// ----------------------------------------------------------------------
+
+// By default, prefer the m[key] = value construct. We do something
+// more complex for TemplateString, though, since m[key] requires a
+// zero-arg constructor, which TemplateString doesn't have. We could
+// do the more complex thing everywhere, but that seems to trigger a
+// bug in in gcc 4.1.2 (at least) when compiled with -O2. Shrug.
+namespace {
+template<typename MapType, typename ValueType>
+inline void DoHashInsert(MapType* m, TemplateId id, ValueType value) {
+ (*m)[id] = value;
+}
+
+template<typename MapType>
+inline void DoHashInsert(MapType* m, TemplateId id, TemplateString value) {
+ pair<typename MapType::iterator, bool> r
+ = m->insert(typename MapType::value_type(id, value));
+ // Unfortunately, insert() doesn't actually replace if key is
+ // already in the map. Thus, in that case (insert().second == false),
+ // we need to overwrite the old value. Since TemplateString
+ // doesn't define operator=, the easiest legal way to overwrite is
+ // to use the copy-constructor with placement-new. Note that since
+ // TemplateString has no destructor, we don't need to call the
+ // destructor to 'clear out' the old value.
+ if (r.second == false) {
+ new (&r.first->second) TemplateString(value);
+ }
+}
+}
+
+template<typename MapType, typename ValueType>
+void TemplateDictionary::HashInsert(MapType* m,
+ TemplateString key, ValueType value) {
+ const TemplateId id = key.GetGlobalId();
+ DoHashInsert(m, id, value);
+ AddToIdToNameMap(id, key); // allows us to do the hash-key -> name mapping
+}
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::SetupGlobalDict()
+// Must be called exactly once before accessing global_dict_.
+// GoogleOnceInit() is used to manage that initialization in a thread-safe
+// way.
+// ----------------------------------------------------------------------
+/*static*/ void TemplateDictionary::SetupGlobalDict()
+ NO_THREAD_SAFETY_ANALYSIS {
+ global_dict_ = new TemplateDictionary::GlobalDict;
+ // Initialize the built-ins
+ HashInsert(global_dict_, TemplateString("BI_SPACE"), TemplateString(" "));
+ HashInsert(global_dict_, TemplateString("BI_NEWLINE"), TemplateString("\n"));
+ // This is used for name-lookup misses.
+ empty_string_ = new TemplateString("");
+}
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::TemplateDictionary()
+// TemplateDictionary::~TemplateDictionary()
+// The only tricky thing is that we make sure the static vars are
+// set up properly. This must be done at each construct time,
+// because it's the responsibility of the first dictionary created
+// in the program to set up the globals, and that could be us.
+// The UnsafeArena() argument is how big to make each arena
+// block. Too big and space is wasted. Too small and we spend
+// a lot of time allocating new arena blocks. 32k seems right.
+// ----------------------------------------------------------------------
+
+TemplateDictionary::TemplateDictionary(const TemplateString& name,
+ UnsafeArena* arena)
+ : arena_(arena ? arena : new UnsafeArena(32768)),
+ should_delete_arena_(arena ? false : true), // true if we called new
+ name_(Memdup(name)), // arena must have been set up first
+ variable_dict_(NULL),
+ section_dict_(NULL),
+ include_dict_(NULL),
+ template_global_dict_(NULL),
+ template_global_dict_owner_(this),
+ parent_dict_(NULL),
+ filename_(NULL) {
+ GoogleOnceInit(&g_once, &SetupGlobalDict);
+}
+
+TemplateDictionary::TemplateDictionary(
+ const TemplateString& name,
+ UnsafeArena* arena,
+ TemplateDictionary* parent_dict,
+ TemplateDictionary* template_global_dict_owner)
+ : arena_(arena), should_delete_arena_(false), // parents own it
+ name_(Memdup(name)), // arena must have been set up first
+ variable_dict_(NULL),
+ section_dict_(NULL),
+ include_dict_(NULL),
+ template_global_dict_(NULL),
+ template_global_dict_owner_(template_global_dict_owner),
+ parent_dict_(parent_dict),
+ filename_(NULL) {
+ assert(template_global_dict_owner_ != NULL);
+ GoogleOnceInit(&g_once, &SetupGlobalDict);
+}
+
+TemplateDictionary::~TemplateDictionary() {
+ // Everything we allocate, we allocate on the arena, so we
+ // don't need to free anything here.
+ if (should_delete_arena_) {
+ delete arena_;
+ }
+}
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::MakeCopy()
+// Makes a recursive copy: so we copy any include dictionaries and
+// section dictionaries we see as well. InternalMakeCopy() is
+// needed just so we can ensure that if we're doing a copy of a
+// subtree, it's due to a recursive call. Returns NULL if there
+// is an error copying.
+// ----------------------------------------------------------------------
+
+TemplateDictionary* TemplateDictionary::InternalMakeCopy(
+ const TemplateString& name_of_copy,
+ UnsafeArena* arena,
+ TemplateDictionary* parent_dict,
+ TemplateDictionary* template_global_dict_owner) {
+
+ TemplateDictionary* newdict;
+ if (template_global_dict_owner_ == this) {
+ // We're a root-level template. We want the copy to be just like
+ // us, and have its own template_global_dict_, that it owns.
+ // We use the normal global new, since newdict will be returned
+ // to the user.
+ newdict = new TemplateDictionary(name_of_copy, arena);
+ } else { // recursive calls use private contructor
+ // We're not a root-level template, so we want the copy to refer to the
+ // same template_global_dict_ owner that we do.
+ // Note: we always use our own arena, even when we have a parent
+ // (though we have the same arena as our parent when we have one).
+ assert(arena);
+ assert(parent_dict ? arena == parent_dict->arena_ : true);
+ newdict = CreateTemplateSubdict(name_of_copy, arena,
+ parent_dict, template_global_dict_owner);
+ }
+
+ // Copy the variable dictionary
+ if (variable_dict_) {
+ newdict->LazilyCreateDict(&newdict->variable_dict_);
+ for (VariableDict::const_iterator it = variable_dict_->begin();
+ it != variable_dict_->end(); ++it) {
+ newdict->variable_dict_->insert(make_pair(it->first,
+ newdict->Memdup(it->second)));
+ }
+ }
+ // ...and the template-global-dict, if we have one (only root-level tpls do)
+ if (template_global_dict_) {
+ newdict->template_global_dict_ = template_global_dict_->InternalMakeCopy(
+ template_global_dict_->name(), newdict->arena_, newdict,
+ newdict->template_global_dict_owner_);
+ }
+ // Copy the section dictionary
+ if (section_dict_) {
+ newdict->LazilyCreateDict(&newdict->section_dict_);
+ for (SectionDict::iterator it = section_dict_->begin();
+ it != section_dict_->end(); ++it) {
+ DictVector* dicts = newdict->CreateDictVector();
+ newdict->section_dict_->insert(make_pair(it->first, dicts));
+ for (DictVector::iterator it2 = it->second->begin();
+ it2 != it->second->end(); ++it2) {
+ TemplateDictionary* subdict = *it2;
+ // In this case, we pass in newdict as the parent of our new dict.
+ dicts->push_back(subdict->InternalMakeCopy(
+ subdict->name(), newdict->arena_,
+ newdict, newdict->template_global_dict_owner_));
+ }
+ }
+ }
+ // Copy the includes-dictionary
+ if (include_dict_) {
+ newdict->LazilyCreateDict(&newdict->include_dict_);
+ for (IncludeDict::iterator it = include_dict_->begin();
+ it != include_dict_->end(); ++it) {
+ DictVector* dicts = newdict->CreateDictVector();
+ newdict->include_dict_->insert(make_pair(it->first, dicts));
+ for (DictVector::iterator it2 = it->second->begin();
+ it2 != it->second->end(); ++it2) {
+ TemplateDictionary* subdict = *it2;
+ // In this case, we pass in NULL as the parent of our new dict:
+ // parents are not inherited across include-dictionaries.
+ dicts->push_back(subdict->InternalMakeCopy(
+ subdict->name(), newdict->arena_,
+ NULL, newdict->template_global_dict_owner_));
+ }
+ }
+ }
+
+ // Finally, copy everything else not set properly by the constructor
+ newdict->filename_ = newdict->Memdup(filename_).ptr_;
+
+ return newdict;
+}
+
+TemplateDictionary* TemplateDictionary::MakeCopy(
+ const TemplateString& name_of_copy, UnsafeArena* arena) {
+ if (template_global_dict_owner_ != this) {
+ // We're not at the root, which is illegal.
+ return NULL;
+ }
+ return InternalMakeCopy(name_of_copy, arena,
+ NULL, template_global_dict_owner_);
+}
+
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::StringAppendV()
+// Does an snprintf to a string. Idea is to grow string as needed.
+// Writes to space if possible -- caller must ensure space has
+// size at least 1024 -- and if not allocates a buffer of its
+// own which the caller must free. Sets out to the buffer written
+// to (space or something else). Returns the number of bytes
+// written into out.
+// ----------------------------------------------------------------------
+
+int TemplateDictionary::StringAppendV(char* space, char** out,
+ const char* format, va_list ap) {
+ const int kBufsize = 1024;
+ // It's possible for methods that use a va_list to invalidate
+ // the data in it upon use. The fix is to make a copy
+ // of the structure before using it and use that copy instead.
+ va_list backup_ap;
+ va_copy(backup_ap, ap);
+ int result = vsnprintf(space, kBufsize, format, backup_ap);
+ va_end(backup_ap);
+
+ if ((result >= 0) && (result < kBufsize)) {
+ *out = space;
+ return result; // It fit
+ }
+
+ // Repeatedly increase buffer size until it fits
+ int length = kBufsize;
+ while (true) {
+ if (result < 0) {
+ // Older snprintf() behavior. :-( Just try doubling the buffer size
+ length *= 2;
+ } else {
+ // We need exactly "result+1" characters
+ length = result+1;
+ }
+ char* buf = new char[length];
+
+ // Restore the va_list before we use it again
+ va_copy(backup_ap, ap);
+ result = vsnprintf(buf, length, format, backup_ap);
+ va_end(backup_ap);
+
+ if ((result >= 0) && (result < length)) {
+ *out = buf;
+ return result;
+ }
+ delete[] buf;
+ }
+}
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::SetValue()
+// TemplateDictionary::SetIntValue()
+// TemplateDictionary::SetFormattedValue()
+// TemplateDictionary::SetEscapedValue()
+// TemplateDictionary::SetEscapedFormattedValue()
+// The functions to set the value of a variable. For each,
+// I first define the char*+length version. Then, after those
+// five definitions, I define a zillion alternate versions:
+// strings, char*s, etc. The only non-obvious thing about
+// each function is I make sure to copy both key and value to
+// the arena, so we have our own, persistent copy of them.
+// ----------------------------------------------------------------------
+
+void TemplateDictionary::SetValue(const TemplateString variable,
+ const TemplateString value) {
+ LazilyCreateDict(&variable_dict_);
+ HashInsert(variable_dict_, variable, Memdup(value));
+}
+
+void TemplateDictionary::SetValueWithoutCopy(const TemplateString variable,
+ const TemplateString value) {
+ LazilyCreateDict(&variable_dict_);
+ // Don't memdup value - the caller will manage memory.
+ HashInsert(variable_dict_, variable, value);
+}
+
+void TemplateDictionary::SetIntValue(const TemplateString variable,
+ long value) {
+ char buffer[64]; // big enough for any int
+ int valuelen = snprintf(buffer, sizeof(buffer), "%ld", value);
+ LazilyCreateDict(&variable_dict_);
+ HashInsert(variable_dict_, variable, Memdup(buffer, valuelen));
+}
+
+void TemplateDictionary::SetFormattedValue(const TemplateString variable,
+ const char* format, ...) {
+ char* buffer;
+
+ char* scratch = arena_->Alloc(1024); // StringAppendV requires >=1024 bytes
+ va_list ap;
+ va_start(ap, format);
+ const int buflen = StringAppendV(scratch, &buffer, format, ap);
+ va_end(ap);
+
+ LazilyCreateDict(&variable_dict_);
+
+ // If it fit into scratch, great, otherwise we need to copy into arena
+ if (buffer == scratch) {
+ scratch = arena_->Shrink(scratch, buflen+1); // from 1024 to |value+\0|
+ HashInsert(variable_dict_, variable, TemplateString(scratch, buflen));
+ } else {
+ arena_->Shrink(scratch, 0); // reclaim arena space we didn't use
+ HashInsert(variable_dict_, variable, Memdup(buffer, buflen));
+ delete[] buffer;
+ }
+}
+
+void TemplateDictionary::SetEscapedValue(TemplateString variable,
+ TemplateString value,
+ const TemplateModifier& escfn) {
+ SetValue(variable, string(escfn(value.data(), value.size())));
+}
+
+void TemplateDictionary::SetEscapedFormattedValue(TemplateString variable,
+ const TemplateModifier& escfn,
+ const char* format, ...) {
+ char* buffer;
+
+ char* scratch = arena_->Alloc(1024); // StringAppendV requires >=1024 bytes
+ va_list ap;
+ va_start(ap, format);
+ const int buflen = StringAppendV(scratch, &buffer, format, ap);
+ va_end(ap);
+
+ string escaped_string(escfn(buffer, buflen));
+ // Reclaim the arena space: the value we care about is now in escaped_string
+ arena_->Shrink(scratch, 0); // reclaim arena space we didn't use
+ if (buffer != scratch)
+ delete[] buffer;
+
+ SetValue(variable, escaped_string);
+}
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::SetTemplateGlobalValue()
+// Sets a value in the template-global dict. Unlike normal
+// variable lookups, these persist across sub-includes.
+// ----------------------------------------------------------------------
+
+void TemplateDictionary::SetTemplateGlobalValue(const TemplateString variable,
+ const TemplateString value) {
+ assert(template_global_dict_owner_ != NULL);
+ LazyCreateTemplateGlobalDict();
+ template_global_dict_owner_->template_global_dict_->SetValue(variable, value);
+}
+
+void TemplateDictionary::SetTemplateGlobalValueWithoutCopy(
+ const TemplateString variable,
+ const TemplateString value) {
+ assert(template_global_dict_owner_ != NULL);
+ LazyCreateTemplateGlobalDict();
+ // Don't memdup value - the caller will manage memory.
+ template_global_dict_owner_->template_global_dict_->
+ SetValueWithoutCopy(variable, value);
+}
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::SetGlobalValue()
+// Sets a value in the global dict. Note this is a static method.
+// ----------------------------------------------------------------------
+
+/*static*/ void TemplateDictionary::SetGlobalValue(
+ const TemplateString variable,
+ const TemplateString value) LOCKS_EXCLUDED(g_static_mutex) {
+ // We can't use memdup here, since we're a static method. We do a strdup,
+ // which is fine, since global_dict_ lives the entire program anyway.
+ // It's unnecessary to copy the variable, since HashInsert takes care of
+ // that for us.
+ char* value_copy = new char[value.length_ + 1];
+ memcpy(value_copy, value.ptr_, value.length_);
+ value_copy[value.length_] = '\0';
+
+ GoogleOnceInit(&g_once, &SetupGlobalDict);
+
+ MutexLock ml(&g_static_mutex);
+ HashInsert(global_dict_,
+ variable,
+ TemplateString(value_copy, value.length_));
+}
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::AddSectionDictionary()
+// TemplateDictionary::ShowSection()
+// TemplateDictionary::ShowTemplateGlobalSection()
+// The new dictionary starts out empty, with us as the parent.
+// It shares our arena. The name is constructed out of our
+// name plus the section name. ShowSection() is the equivalent
+// to AddSectionDictionary("empty_dict").
+// ----------------------------------------------------------------------
+
+/*static*/ string TemplateDictionary::CreateSubdictName(
+ const TemplateString& dict_name, const TemplateString& sub_name,
+ size_t index, const char* suffix) {
+ char index_str[64];
+ snprintf(index_str, sizeof(index_str), "%" PRIuS, index);
+ return (PrintableTemplateString(dict_name) + "/" +
+ PrintableTemplateString(sub_name) + "#" + index_str + suffix);
+}
+
+TemplateDictionary* TemplateDictionary::AddSectionDictionary(
+ const TemplateString section_name) {
+ LazilyCreateDict(§ion_dict_);
+ DictVector* dicts = find_ptr2(*section_dict_, section_name.GetGlobalId());
+ if (!dicts) {
+ dicts = CreateDictVector();
+ // Since most lists will remain under 8 or 16 entries but will frequently
+ // be more than four, this prevents copying from 1->2->4->8.
+ dicts->reserve(8);
+ HashInsert(section_dict_, section_name, dicts);
+ }
+ assert(dicts != NULL);
+ const string newname(CreateSubdictName(name_, section_name,
+ dicts->size() + 1, ""));
+ TemplateDictionary* retval = CreateTemplateSubdict(
+ newname, arena_, this, template_global_dict_owner_);
+ dicts->push_back(retval);
+ return retval;
+}
+
+
+void TemplateDictionary::ShowSection(const TemplateString section_name) {
+ LazilyCreateDict(§ion_dict_);
+ if (!section_dict_->count(section_name.GetGlobalId())) {
+ TemplateDictionary* empty_dict = CreateTemplateSubdict(
+ "empty dictionary", arena_, this, template_global_dict_owner_);
+ DictVector* sub_dict = CreateDictVector();
+ sub_dict->push_back(empty_dict);
+ HashInsert(section_dict_, section_name, sub_dict);
+ }
+}
+
+void TemplateDictionary::ShowTemplateGlobalSection(
+ const TemplateString section_name) {
+ assert(template_global_dict_owner_ != NULL);
+ LazyCreateTemplateGlobalDict();
+ template_global_dict_owner_->template_global_dict_->
+ ShowSection(section_name);
+}
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::SetValueAndShowSection()
+// TemplateDictionary::SetEscapedValueAndShowSection()
+// If value is "", do nothing. Otherwise, call AddSectionDictionary()
+// on the section and add exactly one entry to the sub-dictionary:
+// the given variable/value pair.
+// ----------------------------------------------------------------------
+
+void TemplateDictionary::SetValueAndShowSection(const TemplateString variable,
+ const TemplateString value,
+ const TemplateString section_name) {
+ if (value.length_ == 0) // no value: the do-nothing case
+ return;
+ TemplateDictionary* sub_dict = AddSectionDictionary(section_name);
+ sub_dict->SetValue(variable, value);
+}
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::AddIncludeDictionary()
+// This is much like AddSectionDictionary(). One major difference
+// is that the new dictionary does not have a parent dictionary:
+// there's no automatic variable inclusion across template-file
+// boundaries. Note there is no ShowTemplate() -- you must always
+// specify the dictionary to use explicitly.
+// ----------------------------------------------------------------------
+
+TemplateDictionary* TemplateDictionary::AddIncludeDictionary(
+ const TemplateString include_name) {
+ LazilyCreateDict(&include_dict_);
+ DictVector* dicts = find_ptr2(*include_dict_, include_name.GetGlobalId());
+ if (!dicts) {
+ dicts = CreateDictVector();
+ HashInsert(include_dict_, include_name, dicts);
+ }
+ assert(dicts != NULL);
+ const string newname(CreateSubdictName(name_, include_name,
+ dicts->size() + 1, ""));
+ TemplateDictionary* retval = CreateTemplateSubdict(
+ newname, arena_, NULL, template_global_dict_owner_);
+ dicts->push_back(retval);
+ return retval;
+}
+
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::SetFilename()
+// Sets the filename this dictionary is meant to be associated with.
+// When set, it's possible to expand a template with just the
+// template-dict; the template is loaded via SetFilename() (though
+// we'd have to assume a value for strip). This is required for
+// dictionaries that are meant to be used with an include-template.
+// ----------------------------------------------------------------------
+
+void TemplateDictionary::SetFilename(const TemplateString filename) {
+ filename_ = Memdup(filename).ptr_;
+}
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::AddToIdToNameMap()
+// We have a problem when we try to dump the contents of the
+// dictionary, because instead of storing the keys to global_dict_
+// etc as strings, we store them as integer id's. We need this
+// map, from id->string, to be able to dump. This should be called
+// every time we add a string to a TemplateDictionary hashtable.
+// ----------------------------------------------------------------------
+
+/*static*/ void TemplateDictionary::AddToIdToNameMap(TemplateId id,
+ const TemplateString& str) {
+ // If str.id_ is set, that means we were added to the id-to-name map
+ // at TemplateString constructor time, when the id_ was set. So we
+ // don't need to bother again here.
+ if (str.id_ != 0) {
+ return;
+ }
+ // Verify that if this id is already in the map, it's there with our
+ // contents. If not, that would mean a hash collision (since our
+ // id's are hash values).
+ DCHECK(TemplateString::IdToString(id) == kStsEmpty ||
+ memcmp(str.ptr_, TemplateString::IdToString(id).ptr_,
+ str.length_) == 0)
+ << string(str.ptr_, str.length_) << " vs "
+ << string(TemplateString::IdToString(id).ptr_,
+ TemplateString::IdToString(id).length_);
+ TemplateString str_with_id(str.ptr_, str.length_, str.is_immutable(), id);
+ str_with_id.AddToGlobalIdToNameMap();
+}
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::DumpToString()
+// TemplateDictionary::Dump()
+// The values are shown in the following order:
+// - Scalar values
+// - Sub-dictionaries and their associated section names.
+// - Sub-dictionaries and their associated template names, with filename.
+// ----------------------------------------------------------------------
+
+// DictionaryPrinter knows how to dump a whole dictionary tree.
+class TemplateDictionary::DictionaryPrinter {
+ public:
+ DictionaryPrinter(string* out, int initial_indent)
+ : writer_(out, initial_indent) {
+ }
+
+ void DumpToString(const TemplateDictionary& dict) {
+ // Show globals if we're a top-level dictionary
+ if (dict.parent_dict_ == NULL) {
+ DumpGlobals();
+ }
+
+ // Show template-globals
+ if (dict.template_global_dict_ && !dict.template_global_dict_->Empty()) {
+ DumpTemplateGlobals(*dict.template_global_dict_);
+ }
+
+ DumpDictionary(dict);
+ }
+
+ private:
+ void FillSortedGlobalDictMap(map<string, string>* sorted_global_dict)
+ LOCKS_EXCLUDED(g_static_mutex) {
+ ReaderMutexLock ml(&g_static_mutex);
+ for (GlobalDict::const_iterator it = global_dict_->begin();
+ it != global_dict_->end(); ++it) {
+ const TemplateString key = TemplateDictionary::IdToString(it->first);
+ assert(!InvalidTemplateString(key)); // checks key.ptr_ != NULL
+ (*sorted_global_dict)[PrintableTemplateString(key)] =
+ PrintableTemplateString(it->second);
+ }
+ }
+ void DumpGlobals() {
+ writer_.Write("global dictionary {\n");
+ writer_.Indent();
+
+ // We could be faster than converting every TemplateString into a
+ // string and inserted into an ordered data structure, but why bother?
+ map<string, string> sorted_global_dict;
+ FillSortedGlobalDictMap(&sorted_global_dict);
+ for (map<string, string>::const_iterator it = sorted_global_dict.begin();
+ it != sorted_global_dict.end(); ++it) {
+ writer_.Write(it->first + ": >" + it->second + "<\n");
+ }
+
+ writer_.Dedent();
+ writer_.Write("};\n");
+ }
+
+ void DumpTemplateGlobals(const TemplateDictionary& template_global_dict) {
+ writer_.Write("template dictionary {\n");
+ writer_.Indent();
+ DumpDictionaryContent(template_global_dict);
+ writer_.Dedent();
+ writer_.Write("};\n");
+ }
+
+ void DumpDictionary(const TemplateDictionary& dict) {
+ string intended_for = dict.filename_ && dict.filename_[0] ?
+ string(" (intended for ") + dict.filename_ + ")" : "";
+ writer_.Write("dictionary '", PrintableTemplateString(dict.name_),
+ intended_for, "' {\n");
+ writer_.Indent();
+ DumpDictionaryContent(dict);
+ writer_.Dedent();
+ writer_.Write("}\n");
+ }
+
+ void DumpDictionaryContent(const TemplateDictionary& dict) {
+ if (dict.variable_dict_) { // Show variables
+ DumpVariables(*dict.variable_dict_);
+ }
+
+
+ if (dict.section_dict_) { // Show section sub-dictionaries
+ DumpSectionDict(*dict.section_dict_);
+ }
+
+
+ if (dict.include_dict_) { // Show template-include sub-dictionaries
+ DumpIncludeDict(*dict.include_dict_);
+ }
+ }
+
+ void DumpVariables(const VariableDict& dict) {
+ map<string, string> sorted_variable_dict;
+ for (VariableDict::const_iterator it = dict.begin();
+ it != dict.end(); ++it) {
+ const TemplateString key = TemplateDictionary::IdToString(it->first);
+ assert(!InvalidTemplateString(key)); // checks key.ptr_ != NULL
+ sorted_variable_dict[PrintableTemplateString(key)] =
+ PrintableTemplateString(it->second);
+ }
+ for (map<string,string>::const_iterator it = sorted_variable_dict.begin();
+ it != sorted_variable_dict.end(); ++it) {
+ writer_.Write(it->first + ": >" + it->second + "<\n");
+ }
+ }
+
+
+ template<typename MyMap, typename MySectionDict>
+ void SortSections(MyMap* sorted_section_dict,
+ const MySectionDict& section_dict) {
+ typename MySectionDict::const_iterator it = section_dict.begin();
+ for (; it != section_dict.end(); ++it) {
+ const TemplateString key = TemplateDictionary::IdToString(it->first);
+ assert(!InvalidTemplateString(key)); // checks key.ptr_ != NULL
+ (*sorted_section_dict)[PrintableTemplateString(key)] = it->second;
+ }
+ }
+
+ void DumpSectionDict(const SectionDict& section_dict) {
+ map<string, const DictVector*> sorted_section_dict;
+ SortSections(&sorted_section_dict, section_dict);
+ for (map<string, const DictVector*>::const_iterator it =
+ sorted_section_dict.begin();
+ it != sorted_section_dict.end(); ++it) {
+ for (DictVector::const_iterator it2 = it->second->begin();
+ it2 != it->second->end(); ++it2) {
+ TemplateDictionary* dict = *it2;
+ writer_.Write("section ", it->first, " (dict ",
+ GetDictNum(it2 - it->second->begin() + 1,
+ it->second->size()),
+ ") -->\n");
+ writer_.Indent();
+ DumpToString(*dict);
+ writer_.Dedent();
+ }
+ }
+ }
+
+ void DumpIncludeDict(const IncludeDict& include_dict) {
+ map<string, const DictVector*> sorted_include_dict;
+ SortSections(&sorted_include_dict, include_dict);
+ for (map<string, const DictVector*>::const_iterator it =
+ sorted_include_dict.begin();
+ it != sorted_include_dict.end(); ++it) {
+ for (vector<TemplateDictionary*>::size_type i = 0;
+ i < it->second->size(); ++i) {
+ TemplateDictionary* dict = (*it->second)[i];
+ string from_name = (dict->filename_ && *dict->filename_) ?
+ string(", from ") + dict->filename_ :
+ string(", **NO FILENAME SET; THIS DICT WILL BE IGNORED**");
+ writer_.Write("include-template ", it->first, " (dict ",
+ GetDictNum(static_cast<int>(i + 1), it->second->size()),
+ from_name, ") -->\n");
+ writer_.Indent();
+ DumpToString(*dict);
+ writer_.Dedent();
+ }
+ }
+ }
+
+ string GetDictNum(size_t index, size_t size) const {
+ char buf[64]; // big enough for two ints
+ snprintf(buf, sizeof(buf), "%" PRIuS " of %" PRIuS, index, size);
+ return buf;
+ }
+
+ IndentedWriter writer_;
+};
+
+void TemplateDictionary::DumpToString(string* out, int indent) const {
+ DictionaryPrinter printer(out, indent);
+ printer.DumpToString(*this);
+}
+
+void TemplateDictionary::Dump(int indent) const {
+ string out;
+ DumpToString(&out, indent);
+ fwrite(out.data(), 1, out.length(), stdout);
+ fflush(stdout);
+}
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::Memdup()
+// Copy the input into the arena, so we have a permanent copy of
+// it. Returns a pointer to the arena-copy, as a TemplateString
+// (in case the input has internal NULs).
+// ----------------------------------------------------------------------
+
+TemplateString TemplateDictionary::Memdup(const char* s, size_t slen) {
+ return TemplateString(arena_->MemdupPlusNUL(s, slen), slen); // add a \0 too
+}
+
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::GetSectionValue()
+// TemplateDictionary::IsHiddenSection()
+// TemplateDictionary::IsHiddenTemplate()
+// TemplateDictionary::GetIncludeTemplateName()
+// The 'introspection' routines that tell Expand() what's in the
+// template dictionary. GetSectionValue() does variable lookup:
+// first look in this dict, then in parent dicts, etc. IsHidden*()
+// returns true iff the name is not present in the appropriate
+// dictionary. None of these functions ever returns NULL.
+// ----------------------------------------------------------------------
+
+TemplateString TemplateDictionary::GetValue(
+ const TemplateString& variable) const LOCKS_EXCLUDED(g_static_mutex) {
+ for (const TemplateDictionary* d = this; d; d = d->parent_dict_) {
+ if (d->variable_dict_) {
+ if (const TemplateString* it = find_ptr(*d->variable_dict_, variable.GetGlobalId()))
+ return *it;
+ }
+ }
+
+ // No match in the dict tree. Check the template-global dict.
+ assert(template_global_dict_owner_ != NULL);
+ if (template_global_dict_owner_->template_global_dict_
+ && template_global_dict_owner_->template_global_dict_->variable_dict_) {
+ const VariableDict* template_global_vars =
+ template_global_dict_owner_->template_global_dict_->variable_dict_;
+
+ if (const TemplateString* it = find_ptr(*template_global_vars, variable.GetGlobalId()))
+ return *it;
+ }
+
+ // No match in dict tree or template-global dict. Last chance: global dict.
+ {
+ ReaderMutexLock ml(&g_static_mutex);
+ if (const TemplateString* it = find_ptr(*global_dict_, variable.GetGlobalId()))
+ return *it;
+ return *empty_string_;
+ }
+}
+
+bool TemplateDictionary::IsHiddenSection(const TemplateString& name) const {
+ for (const TemplateDictionary* d = this; d; d = d->parent_dict_) {
+ if (d->section_dict_ &&
+ d->section_dict_->count(name.GetGlobalId()))
+ return false;
+ }
+ assert(template_global_dict_owner_ != NULL);
+ if (template_global_dict_owner_->template_global_dict_ &&
+ template_global_dict_owner_->template_global_dict_->section_dict_) {
+ SectionDict* sections =
+ template_global_dict_owner_->template_global_dict_->section_dict_;
+ if (sections->count(name.GetGlobalId())) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool TemplateDictionary::IsHiddenTemplate(const TemplateString& name) const {
+ for (const TemplateDictionary* d = this; d; d = d->parent_dict_) {
+ if (d->include_dict_ &&
+ d->include_dict_->count(name.GetGlobalId()))
+ return false;
+ }
+ return true;
+}
+
+const char *TemplateDictionary::GetIncludeTemplateName(
+ const TemplateString& variable, int dictnum) const {
+ for (const TemplateDictionary* d = this; d; d = d->parent_dict_) {
+ if (d->include_dict_) {
+ if (DictVector* it = find_ptr2(*d->include_dict_, variable.GetGlobalId())) {
+ TemplateDictionary* dict = (*it)[dictnum];
+ return dict->filename_ ? dict->filename_ : ""; // map NULL to ""
+ }
+ }
+ }
+ assert("Call IsHiddenTemplate before GetIncludeTemplateName" && 0);
+ abort();
+}
+
+bool TemplateDictionary::Empty() const {
+ if ((variable_dict_ && !variable_dict_->empty()) ||
+ (section_dict_ && section_dict_->empty()) ||
+ (include_dict_ && include_dict_->empty())) {
+ return false;
+ }
+ return true;
+}
+
+// ----------------------------------------------------------------------
+// TemplateDictionary::CreateSectionIterator()
+// TemplateDictionary::CreateTemplateIterator()
+// TemplateDictionary::Iterator::HasNext()
+// TemplateDictionary::Iterator::Next()
+// Iterator framework.
+// ----------------------------------------------------------------------
+
+template <typename T> bool TemplateDictionary::Iterator<T>::HasNext() const {
+ return begin_ != end_;
+}
+
+template <typename T> const TemplateDictionaryInterface&
+TemplateDictionary::Iterator<T>::Next() {
+ return **(begin_++);
+}
+
+TemplateDictionaryInterface::Iterator*
+TemplateDictionary::CreateTemplateIterator(
+ const TemplateString& section_name) const {
+ for (const TemplateDictionary* d = this; d; d = d->parent_dict_) {
+ if (d->include_dict_) {
+ if (DictVector* it = find_ptr2(*d->include_dict_, section_name.GetGlobalId())) {
+ // Found it! Return it as an Iterator
+ return MakeIterator(*it);
+ }
+ }
+ }
+ assert("Call IsHiddenTemplate before CreateTemplateIterator" && 0);
+ abort();
+}
+
+TemplateDictionaryInterface::Iterator*
+TemplateDictionary::CreateSectionIterator(
+ const TemplateString& section_name) const {
+ for (const TemplateDictionary* d = this; d; d = d->parent_dict_) {
+ if (d->section_dict_) {
+ if (const DictVector* it = find_ptr2(*d->section_dict_, section_name.GetGlobalId())) {
+ // Found it! Return it as an Iterator
+ return MakeIterator(*it);
+ }
+ }
+ }
+ // Check the template global dictionary.
+ assert(template_global_dict_owner_);
+ const TemplateDictionary* template_global_dict =
+ template_global_dict_owner_->template_global_dict_;
+ if (template_global_dict && template_global_dict->section_dict_) {
+ if (const DictVector* it = find_ptr2(*template_global_dict->section_dict_, section_name.GetGlobalId())) {
+ return MakeIterator(*it);
+ }
+ }
+ assert("Call IsHiddenSection before GetDictionaries" && 0);
+ abort();
+}
+
+}
diff --git a/src/template_modifiers.cc b/src/template_modifiers.cc
new file mode 100644
index 0000000..4e35281
--- /dev/null
+++ b/src/template_modifiers.cc
@@ -0,0 +1,1417 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// template_modifiers.h has a description of what each escape-routine does.
+//
+// When creating a new modifier, you must subclass TemplateModifier
+// and define your own Modify() method. This method takes the string
+// to be modified as a char*/int pair. It then emits the modified
+// version of the string to outbuf. Outbuf is an ExpandEmitter, as
+// defined in template_modifiers.h. It's a very simple type that
+// supports appending to a data stream.
+//
+// Be very careful editing an existing modifier. Subtle changes can
+// introduce the possibility for cross-site scripting attacks. If you
+// do change a modifier, be careful that it does not affect
+// the list of Safe XSS Alternatives.
+//
+
+#include <config.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <string>
+#include <vector>
+#include "htmlparser/htmlparser_cpp.h"
+#include <ctemplate/template_modifiers.h>
+#include "template_modifiers_internal.h"
+#include <ctemplate/per_expand_data.h>
+using std::string;
+using std::vector;
+
+#define strliterallen(s) (sizeof("" s "") - 1)
+
+// Really we should be using uint_16_t or something, but this is good
+// enough, and more portable...
+typedef unsigned int uint16;
+
+namespace URL {
+bool HasInsecureProtocol(const char* in, int inlen) {
+ if (inlen > strliterallen("http://") &&
+ strncasecmp(in, "http://", strliterallen("http://")) == 0) {
+ return false; // We're ok, it's an http protocol
+ }
+ if (inlen > strliterallen("https://") &&
+ strncasecmp(in, "https://", strliterallen("https://")) == 0) {
+ return false; // https is ok as well
+ }
+ if (inlen > strliterallen("ftp://") &&
+ strncasecmp(in, "ftp://", strliterallen("ftp://")) == 0) {
+ return false; // and ftp
+ }
+ return true;
+}
+} // namespace URL
+
+namespace ctemplate {
+
+using ctemplate_htmlparser::HtmlParser;
+
+// A most-efficient way to append a string literal to the var named 'out'.
+// The ""s ensure literal is actually a string literal
+#define APPEND(literal) out->Emit("" literal "", sizeof(literal)-1)
+
+// Check whether the string of length len is identical to the literal.
+// The ""s ensure literal is actually a string literal
+#define STR_IS(str, len, literal) \
+ ((len) == sizeof("" literal "") - 1 && \
+ memcmp(str, literal, sizeof("" literal "") - 1) == 0)
+
+TemplateModifier::~TemplateModifier() {}
+
+void NullModifier::Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* out, const string& arg) const {
+ out->Emit(in, inlen);
+}
+NullModifier null_modifier;
+
+static inline void EmitRun(const char* start, const char* limit,
+ ExpandEmitter* out) {
+ if (start < limit) {
+ out->Emit(start, (limit - start));
+ }
+}
+
+void HtmlEscape::Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* out, const string& arg) const {
+ const char* pos = in;
+ const char* start = pos;
+ const char* const limit = in + inlen;
+ while (pos < limit) {
+ switch (*pos) {
+ default:
+ // Increment our counter and look at the next character.
+ ++pos;
+ continue;
+
+ case '&': EmitRun(start, pos, out); APPEND("&"); break;
+ case '"': EmitRun(start, pos, out); APPEND("""); break;
+ case '\'': EmitRun(start, pos, out); APPEND("'"); break;
+ case '<': EmitRun(start, pos, out); APPEND("<"); break;
+ case '>': EmitRun(start, pos, out); APPEND(">"); break;
+
+ case '\r': case '\n': case '\v': case '\f': case '\t':
+ EmitRun(start, pos, out); APPEND(" "); break;
+ }
+ start = ++pos;
+ }
+ EmitRun(start, pos, out);
+}
+HtmlEscape html_escape;
+
+void PreEscape::Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* out, const string& arg) const {
+ const char* pos = in;
+ const char* start = pos;
+ const char* const limit = in + inlen;
+ while (pos < limit) {
+ switch (*pos) {
+ default:
+ // Increment our counter and look at the next character.
+ ++pos;
+ continue;
+
+ // Unlike HtmlEscape, we leave whitespace as is.
+ case '&': EmitRun(start, pos, out); APPEND("&"); break;
+ case '"': EmitRun(start, pos, out); APPEND("""); break;
+ case '\'': EmitRun(start, pos, out); APPEND("'"); break;
+ case '<': EmitRun(start, pos, out); APPEND("<"); break;
+ case '>': EmitRun(start, pos, out); APPEND(">"); break;
+ }
+ start = ++pos;
+ }
+ EmitRun(start, pos, out);
+}
+PreEscape pre_escape;
+
+// We encode the presence and ordering of unclosed tags in a string, using the
+// letters b, i, s, and e to stand for <b>, <i>, <span>, and <em> respectively.
+// The most recently opened tag is appended onto the end of the string, so in
+// the common case of properly nested tags, we need only look at the last
+// character. If we don't find it there, we need to continue looking at
+// everything until we find it, because tags may not necessarily be in order.
+// Similarly, when we add a tag, we need to check each existing tag for a match
+// so that we don't nest.
+class UnclosedSnippetTags {
+ public:
+ // We could use ordinary ints for the enum values, but using mnemonic
+ // characters potentially makes debugging easier.
+ typedef enum {
+ TAG_B = 'b',
+ TAG_I = 'i',
+ TAG_EM = 'e',
+ TAG_SPAN = 's',
+ } Tag;
+
+ UnclosedSnippetTags() : tag_length(0) {
+ memset(tags, 0, 5);
+ }
+
+ // Adds a tag to the set of open tags if it's not already open, or otherwise
+ // return false.
+ inline bool MaybeAdd(Tag tag) {
+ if (strchr(tags, tag)) {
+ return false;
+ } else {
+ tags[tag_length++] = tag;
+ return true;
+ }
+ }
+
+ // Removes a tag from the set of open tags if it's open, or otherwise return
+ // false.
+ inline bool MaybeRemove(Tag tag) {
+ char* tag_location = strchr(tags, tag);
+ if (tag_location) {
+ for (char* c = tag_location; *c; ++c) {
+ // Have to copy all later tags down by one so we don't leave a gap in the
+ // array.
+ *c = *(c + 1);
+ }
+ --tag_length;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ inline void PrintClosingTags(ExpandEmitter* out) {
+ for (int i = tag_length; i >= 0; --i) {
+ switch (tags[i]) {
+ case TAG_B:
+ out->Emit("</b>"); break;
+ case TAG_I:
+ out->Emit("</i>"); break;
+ case TAG_EM:
+ out->Emit("</em>"); break;
+ case TAG_SPAN:
+ out->Emit("</span>"); break;
+ }
+ }
+ }
+
+ private:
+ char tags[5];
+ int tag_length;
+};
+
+void SnippetEscape::Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* out, const string& arg) const {
+ UnclosedSnippetTags unclosed;
+ const char* pos = in;
+ const char* start = pos;
+ const char* const limit = in + inlen;
+ while (pos < limit) {
+ switch (*pos) {
+ default:
+ // Increment our counter and look at the next character.
+ ++pos;
+ continue;
+
+ case '<': {
+ // If there is a permissible tag, just advance pos past it to
+ // make it part of the current run. Notice the use of
+ // "continue" below.
+ const char* const next_pos = pos + 1;
+ const int chars_left = limit - next_pos;
+ if ((chars_left >= 2) && !memcmp(next_pos, "b>", 2)
+ && unclosed.MaybeAdd(UnclosedSnippetTags::TAG_B)) {
+ pos += strliterallen("<b>");
+ continue;
+ } else if ((chars_left >= 2) && !memcmp(next_pos, "i>", 2)
+ && unclosed.MaybeAdd(UnclosedSnippetTags::TAG_I)) {
+ pos += strliterallen("<i>");
+ continue;
+ } else if ((chars_left >= 3) && !memcmp(next_pos, "em>", 3)
+ && unclosed.MaybeAdd(UnclosedSnippetTags::TAG_EM)) {
+ pos += strliterallen("<em>");
+ continue;
+ } else if ((chars_left >= 13) && !memcmp(next_pos, "span dir=", 9)
+ && (!memcmp(next_pos + 9, "ltr>", 4) ||
+ !memcmp(next_pos + 9, "rtl>", 4))
+ && unclosed.MaybeAdd(UnclosedSnippetTags::TAG_SPAN)) {
+ pos += strliterallen("<span dir=ltr>");
+ continue;
+ } else if ((chars_left >= 3) && !memcmp(next_pos, "/b>", 3)
+ && unclosed.MaybeRemove(UnclosedSnippetTags::TAG_B)) {
+ pos += strliterallen("</b>");
+ continue;
+ } else if ((chars_left >= 3) && !memcmp(next_pos, "/i>", 3)
+ && unclosed.MaybeRemove(UnclosedSnippetTags::TAG_I)) {
+ pos += strliterallen("</i>");
+ continue;
+ } else if ((chars_left >= 4) && !memcmp(next_pos, "/em>", 4)
+ && unclosed.MaybeRemove(UnclosedSnippetTags::TAG_EM)) {
+ pos += strliterallen("</em>");
+ continue;
+ } else if ((chars_left >= 6) && !memcmp(next_pos, "/span>", 6)
+ && unclosed.MaybeRemove(UnclosedSnippetTags::TAG_SPAN)) {
+ pos += strliterallen("</span>");
+ continue;
+ } else if ((chars_left >= 3) && !memcmp(next_pos, "br>", 3)) {
+ pos += strliterallen("<br>");
+ continue;
+ } else if ((chars_left >= 4) && !memcmp(next_pos, "wbr>", 4)) {
+ pos += strliterallen("<wbr>");
+ continue;
+ }
+
+ // Emit the entity and break out of the switch.
+ EmitRun(start, pos, out);
+ APPEND("<");
+ break;
+ }
+
+ case '&':
+ EmitRun(start, pos, out);
+ if (pos + 1 < limit && pos[1] == '{') {
+ // Could be a javascript entity, so we need to escape.
+ // (Javascript entities are an xss risk in Netscape 4.)
+ APPEND("&");
+ } else {
+ APPEND("&");
+ }
+ break;
+
+ case '"': EmitRun(start, pos, out); APPEND("""); break;
+ case '\'': EmitRun(start, pos, out); APPEND("'"); break;
+ case '>': EmitRun(start, pos, out); APPEND(">"); break;
+
+ case '\r': case '\n': case '\v': case '\f': case '\t':
+ // non-space whitespace
+ EmitRun(start, pos, out); APPEND(" "); break;
+
+ }
+ start = ++pos;
+ }
+ EmitRun(start, pos, out);
+ unclosed.PrintClosingTags(out);
+}
+SnippetEscape snippet_escape;
+
+void CleanseAttribute::Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* out, const string& arg) const {
+ for (size_t i = 0; i < inlen; ++i) {
+ char c = in[i];
+ switch (c) {
+ case '=': {
+ if (i == 0 || i == (inlen - 1))
+ out->Emit('_');
+ else
+ out->Emit(c);
+ break;
+ }
+ case '-':
+ case '.':
+ case '_':
+ case ':': {
+ out->Emit(c);
+ break;
+ }
+ default: {
+ if ((c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9')) {
+ out->Emit(c);
+ } else {
+ APPEND("_");
+ }
+ break;
+ }
+ }
+ }
+}
+CleanseAttribute cleanse_attribute;
+
+void CleanseCss::Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* out, const string& arg) const {
+ for (size_t i = 0; i < inlen; ++i) {
+ char c = in[i];
+ switch (c) {
+ case ' ':
+ case '_':
+ case '.':
+ case ',':
+ case '!':
+ case '#':
+ case '%':
+ case '-': {
+ out->Emit(c);
+ break;
+ }
+ default: {
+ if ((c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9')) {
+ out->Emit(c);
+ }
+ break;
+ }
+ }
+ }
+}
+CleanseCss cleanse_css;
+
+// CssUrlEscape is used as a chained modifier by ValidateUrl
+// (validate_url_and_css_escape) and is not directly exposed.
+class CssUrlEscape : public TemplateModifier {
+ public:
+ virtual void Modify(const char* in, size_t inlen,
+ const PerExpandData*, ExpandEmitter* outbuf,
+ const string& arg) const;
+};
+
+// URL-encodes the characters [\n\r\\'"()<>*] to ensure the URL can be safely
+// inserted in a CSS context, e.g:
+// . In an '@import url("URL");' statement
+// . In a CSS property such as 'background: url("URL");'
+// In both locations above, enclosing quotes are optional but parens are not.
+// We want to make sure the URL cannot exit the parens enclosure, close a
+// STYLE tag or reset the browser's CSS parser (via comments or newlines).
+//
+// References:
+// . CSS 2.1 URLs: http://www.w3.org/TR/CSS21/syndata.html#url
+// . CSS 1 URLs: http://www.w3.org/TR/REC-CSS1/#url
+void CssUrlEscape::Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* out, const string& arg) const {
+ for (size_t i = 0; i < inlen; ++i) {
+ char c = in[i];
+ switch (c) {
+ case '\n': APPEND("%0A"); break;
+ case '\r': APPEND("%0D"); break;
+ case '"': APPEND("%22"); break;
+ case '\'': APPEND("%27"); break;
+ case '(': APPEND("%28"); break;
+ case ')': APPEND("%29"); break;
+ case '*': APPEND("%2A"); break;
+ case '<': APPEND("%3C"); break;
+ case '>': APPEND("%3E"); break;
+ case '\\': APPEND("%5C"); break;
+ default: out->Emit(c); break;
+ }
+ }
+}
+CssUrlEscape css_url_escape;
+
+// These URLs replace unsafe URLs for :U and :I url-escaping modes.
+const char* const ValidateUrl::kUnsafeUrlReplacement = "#";
+const char* const ValidateUrl::kUnsafeImgSrcUrlReplacement =
+ "/images/cleardot.gif";
+
+void ValidateUrl::Modify(const char* in, size_t inlen,
+ const PerExpandData* per_expand_data,
+ ExpandEmitter* out, const string& arg) const {
+ const char* slashpos = (char*)memchr(in, '/', inlen);
+ if (slashpos == NULL) {
+ slashpos = in + inlen;
+ }
+ const void* colonpos = memchr(in, ':', slashpos - in);
+ // colon before first slash, could be a protocol
+ if (colonpos != NULL && URL::HasInsecureProtocol(in, inlen)) {
+ // It's a bad protocol, so return something safe
+ chained_modifier_.Modify(unsafe_url_replacement_,
+ unsafe_url_replacement_length_,
+ per_expand_data,
+ out,
+ "");
+ return;
+ }
+ // If we get here, it's a valid url, so just escape it
+ chained_modifier_.Modify(in, inlen, per_expand_data, out, "");
+}
+ValidateUrl validate_url_and_html_escape(
+ html_escape,
+ ValidateUrl::kUnsafeUrlReplacement);
+ValidateUrl validate_url_and_javascript_escape(
+ javascript_escape,
+ ValidateUrl::kUnsafeUrlReplacement);
+ValidateUrl validate_url_and_css_escape(
+ css_url_escape,
+ ValidateUrl::kUnsafeUrlReplacement);
+ValidateUrl validate_img_src_url_and_html_escape(
+ html_escape,
+ ValidateUrl::kUnsafeImgSrcUrlReplacement);
+ValidateUrl validate_img_src_url_and_javascript_escape(
+ javascript_escape,
+ ValidateUrl::kUnsafeImgSrcUrlReplacement);
+ValidateUrl validate_img_src_url_and_css_escape(
+ css_url_escape,
+ ValidateUrl::kUnsafeImgSrcUrlReplacement);
+
+void XmlEscape::Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* out, const string& arg) const {
+ const char* pos = in;
+ const char* start = pos;
+ const char* const limit = in + inlen;
+ while (pos < limit) {
+ char ch = *pos;
+
+ // According to section 2.2 of the spec
+ // http://www.w3.org/TR/REC-xml/#charsets control characters in range
+ // 0x00-0x1F (except \t, \r and \n) are not valid XML characters. In
+ // particular, conformant parsers are allowed to die when encountering a FF
+ // char in PCDATA sections. These chars are replaced by a space.
+ if (ch >= 0x00 && ch < 0x20 && ch != '\t' && ch != '\r' && ch != '\n') {
+ EmitRun(start, pos, out);
+ out->Emit(' ');
+ start = ++pos;
+ continue;
+ }
+
+ switch (ch) {
+ default:
+ // Increment our counter and look at the next character.
+ ++pos;
+ continue;
+
+ case '&': EmitRun(start, pos, out); APPEND("&"); break;
+ case '"': EmitRun(start, pos, out); APPEND("""); break;
+ case '\'': EmitRun(start, pos, out); APPEND("'"); break;
+ case '<': EmitRun(start, pos, out); APPEND("<"); break;
+ case '>': EmitRun(start, pos, out); APPEND(">"); break;
+ }
+ start = ++pos;
+ }
+ EmitRun(start, pos, out);
+}
+XmlEscape xml_escape;
+
+// This table maps initial characters to code lengths. This could be
+// done with a 16-byte table and a shift, but there's a substantial
+// performance increase by eliminating the shift.
+static const char kCodeLengths[256] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+};
+
+// Returns the UTF-8 code-unit starting at start, or the special codepoint
+// 0xFFFD if the input ends abruptly or is not well-formed UTF-8.
+// start -- address of the start of the code unit which also receives the
+// address past the end of the code unit returned.
+// end -- exclusive end of the string
+static inline uint16 UTF8CodeUnit(const char** start, const char *end) {
+ // Use kCodeLengths table to calculate the length of the code unit
+ // from the first character.
+ unsigned char first_char = static_cast<unsigned char>(**start);
+ size_t code_unit_len = kCodeLengths[first_char];
+ if (code_unit_len == 1) {
+ // Return the current byte as a codepoint.
+ // Either it is a valid single byte codepoint, or it's not part of a valid
+ // UTF-8 sequence, and so has to be handled individually.
+ ++*start;
+ return first_char;
+ }
+ const char *code_unit_end = *start + code_unit_len;
+ if (code_unit_end < *start || code_unit_end > end) { // Truncated code unit.
+ ++*start;
+ return 0xFFFDU;
+ }
+ const char* pos = *start;
+ uint16 code_unit = *pos & (0xFFU >> code_unit_len);
+ while (--code_unit_len) {
+ uint16 tail_byte = *(++pos);
+ if ((tail_byte & 0xC0U) != 0x80U) { // Malformed code unit.
+ ++*start;
+ return 0xFFFDU;
+ }
+ code_unit = (code_unit << 6) | (tail_byte & 0x3FU);
+ }
+ *start = code_unit_end;
+ return code_unit;
+}
+
+// A good reference is the ECMA standard (3rd ed), section 7.8.4:
+// http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
+void JavascriptEscape::Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* out, const string& arg) const {
+ const char* pos = in;
+ const char* start = pos;
+ const char* const limit = in + inlen;
+
+ if (limit < in) { return; }
+
+ while (pos < limit) {
+ const char* next_pos = pos;
+ uint16 code_unit = UTF8CodeUnit(&next_pos, limit);
+
+ // Test for 16-bit values outside the switch below, because gcc
+ // will emit chained branches rather than a jump table for such a
+ // wide range of values.
+ if (code_unit & 0xFF00) {
+ // Linebreaks according to EcmaScript 262 which cannot appear in strings.
+ if (code_unit == 0x2028) {
+ // Line separator
+ EmitRun(start, pos, out); APPEND("\\u2028");
+ } else if (code_unit == 0x2029) {
+ // Paragraph separator
+ EmitRun(start, pos, out); APPEND("\\u2029");
+ } else {
+ pos = next_pos;
+ continue;
+ }
+ } else {
+ switch (code_unit) {
+ default:
+ // Increment our counter and look at the next character.
+ pos = next_pos;
+ continue;
+
+ case '\0': EmitRun(start, pos, out); APPEND("\\x00"); break;
+ case '"': EmitRun(start, pos, out); APPEND("\\x22"); break;
+ case '\'': EmitRun(start, pos, out); APPEND("\\x27"); break;
+ case '\\': EmitRun(start, pos, out); APPEND("\\\\"); break;
+ case '\t': EmitRun(start, pos, out); APPEND("\\t"); break;
+ case '\r': EmitRun(start, pos, out); APPEND("\\r"); break;
+ case '\n': EmitRun(start, pos, out); APPEND("\\n"); break;
+ case '\b': EmitRun(start, pos, out); APPEND("\\b"); break;
+ case '\f': EmitRun(start, pos, out); APPEND("\\f"); break;
+ case '&': EmitRun(start, pos, out); APPEND("\\x26"); break;
+ case '<': EmitRun(start, pos, out); APPEND("\\x3c"); break;
+ case '>': EmitRun(start, pos, out); APPEND("\\x3e"); break;
+ case '=': EmitRun(start, pos, out); APPEND("\\x3d"); break;
+
+ case '\v':
+ // Do not escape vertical tabs to "\\v" since it is interpreted as 'v'
+ // by JScript according to section 2.1 of
+ // http://wiki.ecmascript.org/lib/exe/fetch.php?
+ // id=resources%3Aresources&cache=cache&
+ // media=resources:jscriptdeviationsfromes3.pdf
+ EmitRun(start, pos, out); APPEND("\\x0b"); break;
+ }
+ }
+ start = pos = next_pos;
+ }
+ EmitRun(start, pos, out);
+}
+JavascriptEscape javascript_escape;
+
+
+void JavascriptNumber::Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* out, const string& arg) const {
+ if (inlen == 0)
+ return;
+
+ if (STR_IS(in, inlen, "true") || STR_IS(in, inlen, "false")) {
+ out->Emit(in, inlen);
+ return;
+ }
+
+ bool valid = true;
+ if (in[0] == '0' && inlen > 2 && (in[1] == 'x' || in[1] == 'X')) {
+ // There must be at least one hex digit after the 0x for it to be valid.
+ // Hex number. Check that it is of the form 0(x|X)[0-9A-Fa-f]+
+ for (size_t i = 2; i < inlen; i++) {
+ char c = in[i];
+ if (!((c >= 'a' && c <= 'f') ||
+ (c >= 'A' && c <= 'F') ||
+ (c >= '0' && c <= '9'))) {
+ valid = false;
+ break;
+ }
+ }
+ } else {
+ // Must be a base-10 (or octal) number.
+ // Check that it has the form [0-9+-.eE]+
+ for (size_t i = 0; i < inlen; i++) {
+ char c = in[i];
+ if (!((c >= '0' && c <= '9') ||
+ c == '+' || c == '-' || c == '.' ||
+ c == 'e' || c == 'E')) {
+ valid = false;
+ break;
+ }
+ }
+ }
+ if (valid) {
+ out->Emit(in, inlen); // Number was valid, output it.
+ } else {
+ APPEND("null"); // Number was not valid, output null instead.
+ }
+}
+JavascriptNumber javascript_number;
+
+static inline bool IsUrlQueryEscapeSafeChar(unsigned char c) {
+ // Everything not matching [0-9a-zA-Z.,_*/~!()-] is escaped.
+ static unsigned long _safe_characters[8] = {
+ 0x00000000L, 0x03fff702L, 0x87fffffeL, 0x47fffffeL,
+ 0x00000000L, 0x00000000L, 0x00000000L, 0x00000000L
+ };
+
+ return (_safe_characters[(c)>>5] & (1 << ((c) & 31)));
+}
+
+void UrlQueryEscape::Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* out, const string& arg) const {
+ const char* pos = in;
+ const char* const limit = in + inlen;
+ while (true) {
+ // Peel off any initial runs of safe characters and emit them all
+ // at once.
+ const char* start = pos;
+ while (pos < limit && IsUrlQueryEscapeSafeChar(*pos)) {
+ pos++;
+ }
+ EmitRun(start, pos, out);
+
+ // Now deal with a single unsafe character.
+ if (pos < limit) {
+ unsigned char c = *pos;
+ if (c == ' ') {
+ out->Emit('+');
+ } else {
+ out->Emit('%');
+ out->Emit(((c>>4) < 10 ? ((c>>4) + '0') : (((c>>4) - 10) + 'A')));
+ out->Emit(((c&0xf) < 10 ? ((c&0xf) + '0') : (((c&0xf) - 10) + 'A')));
+ }
+ pos++;
+ } else {
+ // We're done!
+ break;
+ }
+ }
+}
+UrlQueryEscape url_query_escape;
+
+// For more information on escaping JSON, see section 2.5 in
+// http://www.ietf.org/rfc/rfc4627.txt.
+// Escaping '&', '<', '>' is optional in the JSON proposed RFC
+// but alleviates concerns with content sniffing if JSON is used
+// in a context where the browser may attempt to interpret HTML.
+void JsonEscape::Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* out, const string& arg) const {
+ const char* pos = in;
+ const char* start = pos;
+ const char* const limit = in + inlen;
+ while (pos < limit) {
+ switch (*pos) {
+ default:
+ // Increment our counter and look at the next character.
+ ++pos;
+ continue;
+
+ case '"': EmitRun(start, pos, out); APPEND("\\\""); break;
+ case '\\': EmitRun(start, pos, out); APPEND("\\\\"); break;
+ case '/': EmitRun(start, pos, out); APPEND("\\/"); break;
+ case '\b': EmitRun(start, pos, out); APPEND("\\b"); break;
+ case '\f': EmitRun(start, pos, out); APPEND("\\f"); break;
+ case '\n': EmitRun(start, pos, out); APPEND("\\n"); break;
+ case '\r': EmitRun(start, pos, out); APPEND("\\r"); break;
+ case '\t': EmitRun(start, pos, out); APPEND("\\t"); break;
+ case '&': EmitRun(start, pos, out); APPEND("\\u0026"); break;
+ case '<': EmitRun(start, pos, out); APPEND("\\u003C"); break;
+ case '>': EmitRun(start, pos, out); APPEND("\\u003E"); break;
+ }
+ start = ++pos;
+ }
+ EmitRun(start, pos, out);
+}
+JsonEscape json_escape;
+
+void PrefixLine::Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* out, const string& arg) const {
+ while (inlen > 0) {
+ const char* nl = (const char*)memchr(in, '\n', inlen);
+ const char* cr = (const char*)memchr(in, '\r', nl ? nl - in : inlen);
+ size_t linelen;
+ if (nl == NULL && cr == NULL) {
+ // We're at the last line
+ out->Emit(in, inlen);
+ break;
+ } else {
+ // One or both of \r and \n is set; point to the first char past
+ // the newline. Note for \r\n, that's the char after the \n,
+ // otherwise, it's the char past the \r or the \n we see.
+ if ((nl == NULL) != (cr == NULL)) // one is set, the other is NULL
+ linelen = (nl ? nl : cr) + 1 - in;
+ else if (nl == cr + 1 || nl < cr) // \r\n, or \n comes first
+ linelen = nl + 1 - in;
+ else
+ linelen = cr + 1 - in;
+ }
+ out->Emit(in, linelen);
+ out->Emit(arg); // a new line, so emit the prefix
+ in += linelen;
+ inlen -= linelen;
+ assert(inlen >= 0);
+ }
+}
+PrefixLine prefix_line;
+
+
+// Must be at least one more than the maximum number of alternative modifiers
+// specified in any given element of g_modifiers.
+# define MAX_SAFE_ALTERNATIVES 10 // If the compiler complains, increase it.
+
+// Use the empty string if you want a modifier not to have a long-name.
+// Use '\0' if you want a modifier not to have a short-name.
+// Note: not all modifiers are in this array:
+// 1) SnippetEscape: use html_escape_with_arg=snippet to get this
+// 2) CleanseAttribute: use html_escape_with_arg=attribute to get this
+// 3) ValidateUrl: use html_escape_with_arg=url to get this
+//
+// Some modifiers define other modifiers that are safe replacements
+// from an XSS perspective. Replacements are not commutative so for
+// example H=pre considers H=attribute a safe replacement to it
+// but H=attribute has no safe replacements.
+// This struct is not pretty but allows the definitions to be
+// done without the need for a global initialization method.
+// Be very careful making a change to g_modifiers as modifiers
+// point to other ones within that same array so elements
+// may not be re-ordered easily. Also you need to change
+// the global g_am_dirs correspondingly.
+//
+static struct ModifierWithAlternatives {
+ ModifierInfo modifier_info;
+ ModifierInfo* safe_alt_mods[MAX_SAFE_ALTERNATIVES];
+} g_modifiers[] = {
+ /* 0 */ { ModifierInfo("cleanse_css", 'c',
+ XSS_WEB_STANDARD, &cleanse_css),
+ {&g_modifiers[16].modifier_info, // url_escape_with_arg=css
+ // img_src_url_escape_with_arg=css
+ &g_modifiers[19].modifier_info} },
+ /* 1 */ { ModifierInfo("html_escape", 'h',
+ XSS_WEB_STANDARD, &html_escape),
+ {&g_modifiers[2].modifier_info, // html_escape_with_arg=snippet
+ &g_modifiers[3].modifier_info, // html_escape_with_arg=pre
+ &g_modifiers[4].modifier_info, // html_escape_with_arg=attribute
+ &g_modifiers[5].modifier_info, // html_escape_with_arg=url
+ &g_modifiers[8].modifier_info, // pre_escape
+ &g_modifiers[9].modifier_info, // url_query_escape
+ &g_modifiers[11].modifier_info, // url_escape_with_arg=html
+ &g_modifiers[12].modifier_info, // url_escape_with_arg=query
+ // img_src_url_escape_with_arg=html
+ &g_modifiers[18].modifier_info} },
+ /* 2 */ { ModifierInfo("html_escape_with_arg=snippet", 'H',
+ XSS_WEB_STANDARD, &snippet_escape),
+ {&g_modifiers[1].modifier_info, // html_escape
+ &g_modifiers[3].modifier_info, // html_escape_with_arg=pre
+ &g_modifiers[4].modifier_info, // html_escape_with_arg=attribute
+ &g_modifiers[8].modifier_info, // pre_escape
+ &g_modifiers[9].modifier_info, // url_query_escape
+ &g_modifiers[12].modifier_info} }, // url_escape_with_arg=query
+ /* 3 */ { ModifierInfo("html_escape_with_arg=pre", 'H',
+ XSS_WEB_STANDARD, &pre_escape),
+ {&g_modifiers[1].modifier_info, // html_escape
+ &g_modifiers[2].modifier_info, // html_escape_with_arg=snippet
+ &g_modifiers[4].modifier_info, // html_escape_with_arg=attribute
+ &g_modifiers[8].modifier_info, // pre_escape
+ &g_modifiers[9].modifier_info, // url_query_escape
+ &g_modifiers[12].modifier_info} }, // url_escape_with_arg=query
+ /* 4 */ { ModifierInfo("html_escape_with_arg=attribute", 'H',
+ XSS_WEB_STANDARD, &cleanse_attribute), {} },
+ /* 5 */ { ModifierInfo("html_escape_with_arg=url", 'H',
+ XSS_WEB_STANDARD, &validate_url_and_html_escape),
+ // img_src_url_escape_with_arg=html
+ {&g_modifiers[18].modifier_info} },
+ /* 6 */ { ModifierInfo("javascript_escape", 'j',
+ XSS_WEB_STANDARD, &javascript_escape),
+ {&g_modifiers[7].modifier_info, // json_escape
+ &g_modifiers[10].modifier_info, // url_escape_with_arg=javascript
+ // img_src_url_escape_with_arg=javascript
+ &g_modifiers[17].modifier_info} },
+ /* 7 */ { ModifierInfo("json_escape", 'o', XSS_WEB_STANDARD, &json_escape),
+ {&g_modifiers[6].modifier_info} }, // javascript_escape
+ /* 8 */ { ModifierInfo("pre_escape", 'p', XSS_WEB_STANDARD, &pre_escape),
+ {&g_modifiers[1].modifier_info, // html_escape
+ &g_modifiers[2].modifier_info, // html_escape_with_arg=snippet
+ &g_modifiers[3].modifier_info, // html_escape_with_arg=pre
+ &g_modifiers[4].modifier_info, // html_escape_with_arg=attr...
+ &g_modifiers[9].modifier_info, // url_query_escape
+ &g_modifiers[12].modifier_info} }, // url_escape_with_arg=query
+ /* 9 */ { ModifierInfo("url_query_escape", 'u',
+ XSS_WEB_STANDARD, &url_query_escape), {} },
+ /* 10 */ { ModifierInfo("url_escape_with_arg=javascript", 'U',
+ XSS_WEB_STANDARD,
+ &validate_url_and_javascript_escape),
+ // img_src_url_escape_with_arg=javascript
+ {&g_modifiers[17].modifier_info} },
+ /* 11 */ { ModifierInfo("url_escape_with_arg=html", 'U',
+ XSS_WEB_STANDARD, &validate_url_and_html_escape),
+ // img_src_url_escape_with_arg=html
+ {&g_modifiers[18].modifier_info} },
+ /* 12 */ { ModifierInfo("url_escape_with_arg=query", 'U',
+ XSS_WEB_STANDARD, &url_query_escape), {} },
+ /* 13 */ { ModifierInfo("none", '\0', XSS_SAFE, &null_modifier), {} },
+ /* 14 */ { ModifierInfo("xml_escape", '\0', XSS_WEB_STANDARD, &xml_escape),
+ {&g_modifiers[1].modifier_info, // html_escape
+ &g_modifiers[4].modifier_info,} }, // H=attribute
+ /* 15 */ { ModifierInfo("javascript_escape_with_arg=number", 'J',
+ XSS_WEB_STANDARD, &javascript_number), {} },
+ /* 16 */ { ModifierInfo("url_escape_with_arg=css", 'U',
+ XSS_WEB_STANDARD, &validate_url_and_css_escape), {} },
+ /* 17 */ { ModifierInfo("img_src_url_escape_with_arg=javascript", 'I',
+ XSS_WEB_STANDARD,
+ &validate_img_src_url_and_javascript_escape), {} },
+ /* 18 */ { ModifierInfo("img_src_url_escape_with_arg=html", 'I',
+ XSS_WEB_STANDARD,
+ &validate_img_src_url_and_html_escape), {} },
+ /* 19 */ { ModifierInfo("img_src_url_escape_with_arg=css", 'I',
+ XSS_WEB_STANDARD,
+ &validate_img_src_url_and_css_escape), {} },
+};
+
+static vector<const ModifierInfo*> g_extension_modifiers;
+static vector<const ModifierInfo*> g_unknown_modifiers;
+
+// Returns whether or not candidate can be safely (w.r.t XSS)
+// used in lieu of our ModifierInfo. This is true iff:
+// 1. Both have the same modifier function OR
+// 2. Candidate's modifier function is in our ModifierInfo's
+// list (vector) of safe alternative modifier functions.
+//
+// This is used with the auto-escaping code, which automatically
+// figures out which modifier to apply to a variable based on the
+// variable's context (in an html "<A HREF", for instance). Some
+// built-in modifiers are considered safe alternatives from the perspective
+// of preventing XSS (cross-site-scripting) attacks, in which case
+// the auto-escaper should allow the choice of which to use in the
+// template. This is intended only for internal use as it is dangerous
+// and complicated to figure out which modifier is an XSS-safe
+// replacement for a given one. Custom modifiers currently may not
+// indicate safe replacements, only built-in ones may do so.
+//
+// Note that this function is not commutative therefore
+// IsSafeXSSAlternative(a, b) may not be equal to IsSafeXSSAlternative(b, a).
+bool IsSafeXSSAlternative(const ModifierInfo& our,
+ const ModifierInfo& candidate) {
+ // Succeeds even for non built-in modifiers but no harm.
+ if (our.modifier == candidate.modifier)
+ return true;
+
+ for (const ModifierWithAlternatives* mod_with_alts = g_modifiers;
+ mod_with_alts < g_modifiers + sizeof(g_modifiers)/sizeof(*g_modifiers);
+ ++mod_with_alts) {
+ if (mod_with_alts->modifier_info.long_name == our.long_name)
+ // We found our Modifier in the built-in array g_modifiers.
+ for (int i = 0; mod_with_alts->safe_alt_mods[i] != NULL &&
+ i < MAX_SAFE_ALTERNATIVES; ++i)
+ if (mod_with_alts->safe_alt_mods[i]->long_name == candidate.long_name)
+ // We found candidate in our Modifier's list of safe alternatives.
+ return true;
+ }
+ // our is not built-in or candidate is not a safe replacement to our.
+ return false;
+}
+
+static inline bool IsExtensionModifier(const char* long_name) {
+ return memcmp(long_name, "x-", 2) == 0;
+}
+
+static bool AddModifierCommon(const char* long_name,
+ const TemplateModifier* modifier, bool xss_safe) {
+ if (!IsExtensionModifier(long_name))
+ return false;
+
+ // TODO(csilvers): store in a map or multimap, rather than a vector
+ for (vector<const ModifierInfo*>::const_iterator mod =
+ g_extension_modifiers.begin();
+ mod != g_extension_modifiers.end();
+ ++mod) {
+ // Check if mod has the same name as us. For modifiers that also take
+ // values, this is everything before the =. The only time it's ok to
+ // have the same name is when we have different modval specializations:
+ // "foo=bar" and "foo=baz" are both valid names. Note "foo" and
+ // "foo=bar" is not valid: foo has no modval, but "foo=bar" does.
+ const size_t new_modifier_namelen = strcspn(long_name, "=");
+ const size_t existing_modifier_namelen = strcspn((*mod)->long_name.c_str(),
+ "=");
+ if (new_modifier_namelen == existing_modifier_namelen &&
+ !memcmp(long_name, (*mod)->long_name.c_str(), new_modifier_namelen)) {
+ if (long_name[new_modifier_namelen] == '=' &&
+ (*mod)->long_name[existing_modifier_namelen] == '=' &&
+ (*mod)->long_name != long_name) {
+ // It's ok, we're different specializations!
+ } else {
+ // It's not ok: we have the same name and no good excuse.
+ return false;
+ }
+ }
+ }
+
+ g_extension_modifiers.push_back(
+ new ModifierInfo(long_name, '\0',
+ xss_safe ? XSS_SAFE : XSS_UNIQUE,
+ modifier));
+ return true;
+}
+
+// Modifier added with XSS_UNIQUE XssClass.
+bool AddModifier(const char* long_name,
+ const TemplateModifier* modifier) {
+ return AddModifierCommon(long_name, modifier, false);
+}
+
+// Modifier added with XSS_SAFE XssClass.
+bool AddXssSafeModifier(const char* long_name,
+ const TemplateModifier* modifier) {
+ return AddModifierCommon(long_name, modifier, true);
+}
+
+// If candidate_match is a better match for modname/modval than bestmatch,
+// update bestmatch. To be a better match, two conditions must be met:
+// 1) The candidate's name must match modname
+// 2) If the candidate is a specialization (that is, name is of the form
+// "foo=bar", then modval matches the specialization value).
+// 3) If the candidate is not a specialization, bestmatch isn't a
+// specialization either.
+// Condition (3) makes sure that if we match the ModifierInfo with name
+// "foo=bar", we don't claim the ModifierInfo "foo=" is a better match.
+// Recall that by definition, modval will always start with a '=' if present.
+static void UpdateBestMatch(const char* modname, size_t modname_len,
+ const char* modval, size_t modval_len,
+ const ModifierInfo* candidate_match,
+ const ModifierInfo** best_match) {
+ // It's easiest to handle the two case differently: (1) candidate_match
+ // refers to a modifier that expects a modifier-value; (2) it doesn't.
+ if (candidate_match->modval_required) {
+ // To be a match, we have to fulfill three requirements: we have a
+ // modval, our modname matches candidate_match's modname (either
+ // shortname or longname), and our modval is consistent with the
+ // value specified in the longname (whatever might follow the =).
+ const char* const longname_start = candidate_match->long_name.c_str();
+ const char* const equals = strchr(longname_start, '=');
+ assert(equals != NULL);
+ if (modval_len > 0 &&
+ ((modname_len == 1 && *modname == candidate_match->short_name) ||
+ (modname_len == equals - longname_start &&
+ memcmp(modname, longname_start, modname_len) == 0)) &&
+ ((equals[1] == '\0') || // name is "foo=" (not a specialization)
+ (modval_len
+ == longname_start + candidate_match->long_name.size() - equals &&
+ memcmp(modval, equals, modval_len) == 0))) {
+ // Condition (3) above is satisfied iff our longname is longer than
+ // best-match's longname (so we prefer "foo=bar" to "foo=").
+ if (*best_match == NULL ||
+ candidate_match->long_name.size() > (*best_match)->long_name.size())
+ *best_match = candidate_match;
+ }
+ } else {
+ // In this case, to be a match: we must *not* have a modval. Our
+ // modname still must match modinfo's modname (either short or long).
+ if (modval_len == 0 &&
+ ((modname_len == 1 && *modname == candidate_match->short_name) ||
+ (modname_len == candidate_match->long_name.size() &&
+ !memcmp(modname, candidate_match->long_name.data(), modname_len)))) {
+ // In the no-modval case, only one match should exist.
+ assert(*best_match == NULL);
+ *best_match = candidate_match;
+ }
+ }
+}
+
+const ModifierInfo* FindModifier(const char* modname, size_t modname_len,
+ const char* modval, size_t modval_len) {
+ // More than one modifier can match, in the case of modval specializations
+ // (e.g., the modifier "foo=" and "foo=bar" will both match on input of
+ // modname="foo", modval="bar"). In that case, we take the ModifierInfo
+ // with the longest longname, since that's the most specialized match.
+ const ModifierInfo* best_match = NULL;
+ if (modname_len >= 2 && IsExtensionModifier(modname)) {
+ for (vector<const ModifierInfo*>::const_iterator mod =
+ g_extension_modifiers.begin();
+ mod != g_extension_modifiers.end();
+ ++mod) {
+ UpdateBestMatch(modname, modname_len, modval, modval_len,
+ *mod, &best_match);
+ }
+ if (best_match != NULL)
+ return best_match;
+
+ for (vector<const ModifierInfo*>::const_iterator mod =
+ g_unknown_modifiers.begin();
+ mod != g_unknown_modifiers.end();
+ ++mod) {
+ UpdateBestMatch(modname, modname_len, modval, modval_len,
+ *mod, &best_match);
+ }
+ if (best_match != NULL)
+ return best_match;
+ // This is the only situation where we can pass in a modifier of NULL.
+ // It means "we don't know about this modifier-name."
+ string fullname(modname, modname_len);
+ if (modval_len) {
+ fullname.append(modval, modval_len);
+ }
+ // TODO(csilvers): store in a map or multimap, rather than a vector
+ g_unknown_modifiers.push_back(new ModifierInfo(fullname, '\0',
+ XSS_UNIQUE, NULL));
+ return g_unknown_modifiers.back();
+ } else {
+ for (const ModifierWithAlternatives* mod_with_alts = g_modifiers;
+ mod_with_alts < g_modifiers + sizeof(g_modifiers)/sizeof(*g_modifiers);
+ ++mod_with_alts) {
+ UpdateBestMatch(modname, modname_len, modval, modval_len,
+ &mod_with_alts->modifier_info, &best_match);
+ }
+ return best_match;
+ }
+}
+
+// For escaping variables under the auto-escape mode:
+// Each directive below maps to a distinct sequence of
+// escaping directives (i.e a vector<ModifierAndValue>) applied
+// to a variable during run-time substitution.
+// The directives are stored in a global array (g_mods_ae)
+// initialized under lock in InitializeGlobalModifiers.
+enum AutoModifyDirective {
+ AM_EMPTY, // Unused, kept as marker.
+ AM_HTML,
+ AM_HTML_UNQUOTED,
+ AM_JS,
+ AM_JS_NUMBER,
+ AM_URL_HTML,
+ AM_URL_QUERY,
+ AM_STYLE,
+ AM_XML,
+ NUM_ENTRIES_AM,
+};
+
+// Populates the global vector of hard-coded modifiers that
+// Auto-Escape may pick. We point to the appropriate modifier in
+// the global g_modifiers.
+// Reference these globals via the global array g_am_dirs[] for consistency.
+// Note: We allow for more than one ModifierAndValue in the array hence
+// the need to terminate with a Null marker. However currently all the
+// escaping directives have exactly one ModifierAndValue.
+static const ModifierAndValue g_am_empty[] = {
+ ModifierAndValue(NULL, "", 0)
+};
+static const ModifierAndValue g_am_html[] = {
+ ModifierAndValue(&g_modifiers[1].modifier_info, "", 0),
+ ModifierAndValue(NULL, "", 0)
+};
+static const ModifierAndValue g_am_html_unquoted[] = {
+ ModifierAndValue(&g_modifiers[4].modifier_info, "=attribute", 10),
+ ModifierAndValue(NULL, "", 0)
+};
+static const ModifierAndValue g_am_js[] = {
+ ModifierAndValue(&g_modifiers[6].modifier_info, "", 0),
+ ModifierAndValue(NULL, "", 0)
+};
+static const ModifierAndValue g_am_js_number[] = {
+ ModifierAndValue(&g_modifiers[15].modifier_info, "=number", 7),
+ ModifierAndValue(NULL, "", 0)
+};
+static const ModifierAndValue g_am_url_html[] = {
+ ModifierAndValue(&g_modifiers[11].modifier_info, "=html", 5),
+ ModifierAndValue(NULL, "", 0)
+};
+static const ModifierAndValue g_am_url_query[] = {
+ ModifierAndValue(&g_modifiers[9].modifier_info, "", 0),
+ ModifierAndValue(NULL, "", 0)
+};
+static const ModifierAndValue g_am_style[] = {
+ ModifierAndValue(&g_modifiers[0].modifier_info, "", 0),
+ ModifierAndValue(NULL, "", 0)
+};
+static const ModifierAndValue g_am_xml[] = {
+ ModifierAndValue(&g_modifiers[14].modifier_info, "", 0),
+ ModifierAndValue(NULL, "", 0)
+};
+
+static const ModifierAndValue* g_am_dirs[NUM_ENTRIES_AM] = {
+ g_am_empty, /* AM_EMPTY */
+ g_am_html, /* AM_HTML */
+ g_am_html_unquoted, /* AM_HTML_UNQUOTED */
+ g_am_js, /* AM_JS */
+ g_am_js_number, /* AM_JS_NUMBER */
+ g_am_url_html, /* AM_URL_HTML */
+ g_am_url_query, /* AM_URL_QUERY */
+ g_am_style, /* AM_STYLE */
+ g_am_xml, /* AM_XML */
+};
+
+string PrettyPrintOneModifier(const ModifierAndValue& modval) {
+ string out;
+ out.append(":");
+ if (modval.modifier_info->short_name) // short_name is a char.
+ out.append(1, modval.modifier_info->short_name);
+ else
+ out.append(modval.modifier_info->long_name);
+ if (modval.value_len != 0)
+ out.append(modval.value, modval.value_len);
+ return out;
+}
+
+string PrettyPrintModifiers(const vector<const ModifierAndValue*>& modvals,
+ const string& separator) {
+ string out;
+ for (vector<const ModifierAndValue*>::const_iterator it =
+ modvals.begin(); it != modvals.end(); ++it) {
+ if (it != modvals.begin())
+ out.append(separator);
+ out.append(PrettyPrintOneModifier(**it));
+ }
+ return out;
+}
+
+// Return the sequence of escaping directives to apply for the given context.
+// An empty vector indicates an error occurred. Currently we never need
+// to chain escaping directives hence on success, the vector is always of
+// size 1. This may change in the future.
+vector<const ModifierAndValue*> GetModifierForHtmlJs(
+ HtmlParser* htmlparser, string* error_msg) {
+ assert(htmlparser);
+ assert(error_msg);
+ vector<const ModifierAndValue*> modvals;
+
+ // Two cases of being inside javascript:
+ // 1. Inside raw javascript (within a <script> tag). If the value
+ // is quoted we apply javascript_escape, if not we have to coerce
+ // it to a safe value due to the risk of javascript code execution
+ // hence apply :J=number. If arbitrary code needs to be inserted
+ // at run-time, the developer must use :none.
+ // 2. In the value of an attribute that takes javascript such
+ // as onmouseevent in '<a href="someUrl" onmousevent="{{EVENT}}">'.
+ // That will be covered in the STATE_VALUE state logic below.
+ if (htmlparser->InJavascript() &&
+ htmlparser->state() != HtmlParser::STATE_VALUE) {
+ if (htmlparser->IsJavascriptQuoted()) {
+ modvals.push_back(g_am_dirs[AM_JS]);
+ assert(modvals.size() == 1);
+ return modvals;
+ } else {
+ modvals.push_back(g_am_dirs[AM_JS_NUMBER]);
+ assert(modvals.size() == 1);
+ return modvals;
+ }
+ }
+ switch (htmlparser->state()) {
+ case HtmlParser::STATE_VALUE:{
+ string attribute_name = htmlparser->attribute();
+ switch (htmlparser->AttributeType()) {
+ case HtmlParser::ATTR_URI:
+ // Case 1: The URL is quoted:
+ // . Apply :U=html if it is a complete URL or :h if it is a fragment.
+ // Case 2: The URL is not quoted:
+ // . If it is a complete URL, we have no safe modifiers that
+ // won't break it so we have to fail.
+ // . If it is a URL fragment, then :u is safe and not likely to
+ // break the URL.
+ if (!htmlparser->IsAttributeQuoted()) {
+ if (htmlparser->IsUrlStart()) { // Complete URL.
+ error_msg->append("Value of URL attribute \"" + attribute_name +
+ "\" must be enclosed in quotes.");
+ assert(modvals.empty());
+ return modvals; // Empty
+ } else { // URL fragment.
+ modvals.push_back(g_am_dirs[AM_URL_QUERY]);
+ }
+ } else {
+ // Only validate the URL if we have a complete URL,
+ // otherwise simply html_escape.
+ if (htmlparser->IsUrlStart())
+ modvals.push_back(g_am_dirs[AM_URL_HTML]);
+ else
+ modvals.push_back(g_am_dirs[AM_HTML]);
+ }
+ break;
+ case HtmlParser::ATTR_REGULAR:
+ // If the value is quoted, simply HTML escape, otherwise
+ // apply stricter escaping using H=attribute.
+ if (htmlparser->IsAttributeQuoted())
+ modvals.push_back(g_am_dirs[AM_HTML]);
+ else
+ modvals.push_back(g_am_dirs[AM_HTML_UNQUOTED]);
+ break;
+ case HtmlParser::ATTR_STYLE:
+ // If the value is quoted apply :c, otherwise fail.
+ if (htmlparser->IsAttributeQuoted()) {
+ modvals.push_back(g_am_dirs[AM_STYLE]);
+ } else {
+ error_msg->append("Value of style attribute \"" + attribute_name +
+ "\" must be enclosed in quotes.");
+ assert(modvals.empty());
+ return modvals; // Empty
+ }
+ break;
+ case HtmlParser::ATTR_JS:
+ // We require javascript accepting attributes (such as onclick)
+ // to be HTML quoted, otherwise they are vulnerable to
+ // HTML attribute insertion via the use of whitespace.
+ if (!htmlparser->IsAttributeQuoted()) {
+ error_msg->append("Value of javascript attribute \"" +
+ attribute_name +
+ "\" must be enclosed in quotes.");
+ assert(modvals.empty());
+ return modvals; // Empty
+ }
+ // If the variable is quoted apply javascript_escape otherwise
+ // apply javascript_number which will ensure it is safe against
+ // code injection.
+ // Note: We normally need to HTML escape after javascript escape
+ // but the javascript escape implementation provided makes the
+ // HTML escape redundant so simply javascript escape.
+ if (htmlparser->IsJavascriptQuoted())
+ modvals.push_back(g_am_dirs[AM_JS]);
+ else
+ modvals.push_back(g_am_dirs[AM_JS_NUMBER]);
+ break;
+ case HtmlParser::ATTR_NONE:
+ assert("We should be in attribute!" && 0);
+ default:
+ assert("Should not be able to get here." && 0);
+ return modvals; // Empty
+ }
+ // In STATE_VALUE particularly, the parser may get out of sync with
+ // the correct state - that the browser sees - due to the fact that
+ // it does not get to parse run-time content (variables). So we tell
+ // the parser there is content that will be expanded here.
+ // A good example is:
+ // <a href={{URL}} alt={{NAME}}>
+ // The parser sees <a href= alt=> and interprets 'alt=' to be
+ // the value of href.
+ htmlparser->InsertText(); // Ignore return value.
+ assert(modvals.size() == 1);
+ return modvals;
+ }
+ case HtmlParser::STATE_TAG:{
+ // Apply H=attribute to tag names since they are alphabetic.
+ // Examples of tag names: TITLE, BODY, A and BR.
+ modvals.push_back(g_am_dirs[AM_HTML_UNQUOTED]);
+ assert(modvals.size() == 1);
+ return modvals;
+ }
+ case HtmlParser::STATE_ATTR:{
+ // Apply H=attribute to attribute names since they are alphabetic.
+ // Examples of attribute names: HREF, SRC and WIDTH.
+ modvals.push_back(g_am_dirs[AM_HTML_UNQUOTED]);
+ assert(modvals.size() == 1);
+ return modvals;
+ }
+ case HtmlParser::STATE_COMMENT:
+ case HtmlParser::STATE_TEXT:{
+ // Apply :h to regular HTML text and :c if within a style tag.
+ if (htmlparser->InCss())
+ modvals.push_back(g_am_dirs[AM_STYLE]);
+ else
+ modvals.push_back(g_am_dirs[AM_HTML]);
+ assert(modvals.size() == 1);
+ return modvals;
+ }
+ default:{
+ assert("Should not be able to get here." && 0);
+ return modvals; // Empty
+ }
+ }
+ assert("Should not be able to get here." && 0);
+ return modvals; // Empty
+}
+
+// TODO(jad): Memoize all GetModifierForXXX functions below.
+// They don't depend on parser context (from csilvers).
+vector<const ModifierAndValue*> GetModifierForCss(HtmlParser* htmlparser,
+ string* error_msg) {
+ vector<const ModifierAndValue*> modvals;
+ modvals.push_back(g_am_dirs[AM_STYLE]);
+ return modvals;
+}
+
+vector<const ModifierAndValue*> GetModifierForXml(HtmlParser* htmlparser,
+ string* error_msg) {
+ vector<const ModifierAndValue*> modvals;
+ modvals.push_back(g_am_dirs[AM_XML]);
+ return modvals;
+}
+
+vector<const ModifierAndValue*> GetModifierForJson(HtmlParser* htmlparser,
+ string* error_msg) {
+ vector<const ModifierAndValue*> modvals;
+ modvals.push_back(g_am_dirs[AM_JS]);
+ return modvals;
+}
+
+vector<const ModifierAndValue*> GetDefaultModifierForHtml() {
+ vector<const ModifierAndValue*> modvals;
+ modvals.push_back(g_am_dirs[AM_HTML]);
+ return modvals;
+}
+
+vector<const ModifierAndValue*> GetDefaultModifierForJs() {
+ vector<const ModifierAndValue*> modvals;
+ modvals.push_back(g_am_dirs[AM_JS]);
+ return modvals;
+}
+
+vector<const ModifierAndValue*> GetDefaultModifierForCss() {
+ return GetModifierForCss(NULL, NULL);
+}
+
+vector<const ModifierAndValue*> GetDefaultModifierForXml() {
+ return GetModifierForXml(NULL, NULL);
+}
+
+vector<const ModifierAndValue*> GetDefaultModifierForJson() {
+ return GetModifierForJson(NULL, NULL);
+}
+
+}
diff --git a/src/template_modifiers_internal.h b/src/template_modifiers_internal.h
new file mode 100644
index 0000000..b453e18
--- /dev/null
+++ b/src/template_modifiers_internal.h
@@ -0,0 +1,246 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// These are used by template.cc and when registering new modifiers.
+// (Or more exactly, registering new modifier/value pairs.)
+// They are not intended for any other users.
+//
+// If you do find yourself needing to use them directly, please email
+// template-users.
+//
+// Known outside-template users of this class:
+// template/bidi/bidi_modifiers_test.cc
+//
+// These routines are implemented in template_modifiers.cc.
+
+#ifndef TEMPLATE_TEMPLATE_MODIFIERS_INTERNAL_H_
+#define TEMPLATE_TEMPLATE_MODIFIERS_INTERNAL_H_
+
+#include <config.h>
+#include <sys/types.h> // for size_t
+#include <string.h> // for strchr
+#include <string>
+#include <vector>
+#include <ctemplate/template_modifiers.h> // for null_modifier
+
+// Annoying stuff for windows -- make sure clients (in this case
+// unittests) can import the class definitions and variables.
+#ifndef CTEMPLATE_DLL_DECL
+# ifdef _MSC_VER
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+# else
+# define CTEMPLATE_DLL_DECL /* should be the empty string for non-windows */
+# endif
+#endif
+
+using std::string;
+using std::vector;
+
+namespace ctemplate_htmlparser {
+class HtmlParser;
+}
+
+namespace ctemplate {
+
+class TemplateModifier;
+
+// A Modifier belongs to an XssClass which determines whether
+// it is an XSS safe addition to a modifier chain or not. This
+// is used by the Auto-Escape mode when determining how to handle
+// extra modifiers provided in template. For example, :j is a safe
+// addition to :h because they are both in the same class (XSS_WEB_STANDARD).
+//
+// XssClass is not exposed in any API and cannot be set in custom
+// modifiers, it is for internal use only (for Auto-Escape). We currently
+// have only three classes.
+//
+// XSS_UNUSED: not used.
+// XSS_WEB_STANDARD: All the curent built-in escaping modifiers.
+// XSS_UNIQUE: Set for all custom modifiers added via AddModifier()
+// and may need to be escaped.
+// XSS_SAFE: Set for all custom modifiers added via AddXssSafeModifier()
+// that are considered to produce safe output and hence
+// do not need further escaping. Also includes the :none modifier.
+enum XssClass {
+ XSS_UNUSED,
+ XSS_WEB_STANDARD,
+ XSS_UNIQUE,
+ XSS_SAFE,
+};
+
+// TODO(csilvers): collapse this into the TemplateModifier class?
+struct ModifierInfo {
+ // longname should end in an '=' iff the modifier takes a value
+ // (same as in getopt(3)).
+ // To specialize -- add a modifier that applies only when we see the name
+ // with a particular value -- specify longname like so: "longname=value".
+ // (See example in the comment-doc below, for AddModifier.)
+ // sn can be '\0' if there is no associated shortname.
+ // m should be NULL *only if* default-registering a user-defined longname
+ // that the user neglected to register themselves. In this case, we
+ // use the null modifier as the actual modifier.
+ // xss_class indicates an equivalence class this modifier is
+ // in, such that any other modifier in the class could be applied
+ // after this modifier without affecting its XSS-safety. If in
+ // doubt, say XSS_UNIQUE, which is the most conservative choice.
+ ModifierInfo(string ln, char sn, XssClass xc, const TemplateModifier* m)
+ : long_name(ln), short_name(sn),
+ modval_required(strchr(ln.c_str(), '=') != NULL),
+ is_registered(m != NULL), xss_class(xc),
+ modifier(m ? m : &null_modifier) { }
+ string long_name;
+ char short_name;
+ bool modval_required; // true iff ln has an '=' in it
+ bool is_registered; // true for built-in and AddModifier mods
+ XssClass xss_class;
+ const TemplateModifier* modifier;
+};
+
+// An escaping directive is completely defined by the escaping function to use
+// (ModifierInfo.modifier) as well as the optional value it may require. This
+// structure is a small wrapper on ModifierInfo to convey that needed value.
+// Note: The given value pointer must be valid for the life of the struct.
+// Also, value is not null-terminated.
+struct ModifierAndValue {
+ ModifierAndValue(const ModifierInfo* mod_info, const char* val,
+ size_t val_len)
+ : modifier_info(mod_info), value(val), value_len(val_len) { }
+ const ModifierInfo* modifier_info;
+ const char* value;
+ size_t value_len;
+};
+
+// Returns whether or not candidate can be safely (w.r.t XSS)
+// used in lieu of our ModifierInfo. This is true iff:
+// 1. Both have the same modifier function OR
+// 2. Candidate's modifier function is in our ModifierInfo's
+// list (vector) of safe alternative modifier functions.
+// Note that this function is not commutative therefore
+// IsSafeXSSAlternative(a, b) may not be equal to IsSafeXSSAlternative(b, a).
+extern CTEMPLATE_DLL_DECL
+bool IsSafeXSSAlternative(const ModifierInfo& our,
+ const ModifierInfo& candidate);
+
+// modname is the name of the modifier (shortname or longname).
+// value is the modifier-value (empty string if there is no modval).
+// Returns a pointer into g_modifiers, or NULL if not found.
+extern CTEMPLATE_DLL_DECL
+const ModifierInfo* FindModifier(const char* modname, size_t modname_len,
+ const char* modval, size_t modval_len);
+
+
+// Convenience function to dump the (zero or more) modifiers (and values)
+// in the format:
+// :<modifier1>[=<val1>]<seperator>[:<modifier2>][=<val2>]...
+// If the modifier does not have a short_name, we print its long_name instead.
+// The separator may be an empty string.
+extern CTEMPLATE_DLL_DECL
+string PrettyPrintModifiers(
+ const vector<const ModifierAndValue*>& modvals,
+ const string& separator);
+
+extern CTEMPLATE_DLL_DECL
+string PrettyPrintOneModifier(const ModifierAndValue& modval);
+
+// Returns the appropriate escaping directives to escape content in an
+// HTML or Javascript context. HTML and Javascript contexts exercise
+// the same parser APIs and hence are combined here.
+// If an error occurs, we return NULL and append and error to error_msg.
+// The htmlparser and error_msg arguments must be non-NULL.
+// Currently, on success, we always return a vector of length 1, meaning
+// we never need to chain escaping directives. However, this is subject
+// to change.
+extern CTEMPLATE_DLL_DECL
+vector<const ModifierAndValue*> GetModifierForHtmlJs(
+ ctemplate_htmlparser::HtmlParser* htmlparser, string* error_msg);
+
+// Returns the appropriate escaping directives to escape content
+// in a CSS context.
+// Currently always returns cleanse_css and hence does not require the
+// parser nor can it fail. This will change once the parser is able to
+// distinguish between different CSS contexts, in particular CSS properties
+// that take URLs, which require a different escaping function (non-existent).
+extern CTEMPLATE_DLL_DECL
+vector<const ModifierAndValue*> GetModifierForCss(
+ ctemplate_htmlparser::HtmlParser* htmlparser, string* error_msg);
+
+// Returns the appropriate escaping directives to escape content
+// in an XML context.
+// Currently always returns xml_escape and hence does not require the
+// parser nor can it fail. This may change once the parser can parse XML.
+extern CTEMPLATE_DLL_DECL
+vector<const ModifierAndValue*> GetModifierForXml(
+ ctemplate_htmlparser::HtmlParser* htmlparser, string* error_msg);
+
+// Returns the appropriate escaping directives to escape content
+// in a JSON context.
+// Currently always returns javascript_escape and hence does not require the
+// parser nor can it fail. This may change once the parser can parse
+// and distinguish different contexts within JSON.
+extern CTEMPLATE_DLL_DECL
+vector<const ModifierAndValue*> GetModifierForJson(
+ ctemplate_htmlparser::HtmlParser* htmlparser, string* error_msg);
+
+// Return the default escaping directives to escape content for the given
+// context. These methods are useful when the caller does not have
+// access to a parser or when the parsed failed to parse.
+
+// GetDefaultModifierForHtml
+// GetDefaultModifierForJs
+// GetDefaultModifierForCss
+// GetDefaultModifierForXxml
+// GetDefaultModifierForJson
+// These functions are different from the GetModifierForXXX functions
+// in that they do not take a parser and cannot fail. They simply
+// return the most common escaping directive for the context they refer to.
+//
+// Some of these contexts (currently HTML and Javascript) have more than
+// one escaping directive associated with them and so we usually rely on
+// the current state of the parser to determine which directive to chose.
+// However, in some cases, the parser may fail to parse a given input
+// and so we may want to select the most likely escaping directive that
+// applies to the given context. Hence we use these functions instead of
+// the corresponding GetModifierForXXX ones.
+extern CTEMPLATE_DLL_DECL
+std::vector<const ModifierAndValue*> GetDefaultModifierForHtml();
+extern CTEMPLATE_DLL_DECL
+std::vector<const ModifierAndValue*> GetDefaultModifierForJs();
+extern CTEMPLATE_DLL_DECL
+std::vector<const ModifierAndValue*> GetDefaultModifierForCss();
+extern CTEMPLATE_DLL_DECL
+std::vector<const ModifierAndValue*> GetDefaultModifierForXml();
+extern CTEMPLATE_DLL_DECL
+std::vector<const ModifierAndValue*> GetDefaultModifierForJson();
+
+}
+
+#endif // TEMPLATE_TEMPLATE_MODIFIERS_INTERNAL_H_
diff --git a/src/template_namelist.cc b/src/template_namelist.cc
new file mode 100644
index 0000000..c834120
--- /dev/null
+++ b/src/template_namelist.cc
@@ -0,0 +1,194 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+
+#include <config.h>
+#include <stdlib.h>
+#include <sys/stat.h> // for stat()
+#include <time.h> // for time_t
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+#include <algorithm> // for binary_search
+#include HASH_SET_H // that's NameListType
+#include <string>
+#include <vector> // that's MissingListType, SyntaxListType
+#include <ctemplate/template_namelist.h>
+#include <ctemplate/template_pathops.h>
+#include <ctemplate/template.h> // for Strip, GetTemplate(), etc.
+#include <assert.h>
+#include <iostream> // for cerr
+#include "base/fileutil.h"
+
+using std::max;
+using std::pair;
+using std::string;
+using std::vector;
+
+#define LOG(level) std::cerr << #level << ": "
+
+namespace ctemplate {
+
+TemplateNamelist::NameListType *TemplateNamelist::namelist_ = NULL;
+TemplateNamelist::MissingListType *TemplateNamelist::missing_list_ = NULL;
+TemplateNamelist::SyntaxListType *TemplateNamelist::bad_syntax_list_ = NULL;
+
+// Make sure there is a namelist_ and then insert the name onto it
+const char* TemplateNamelist::RegisterTemplate(const char* name) {
+ if (!namelist_) {
+ namelist_ = new NameListType;
+ }
+ pair<NameListType::iterator, bool> insert_result = namelist_->insert(name);
+ // return a pointer to the entry corresponding to name;
+ return insert_result.first->c_str();
+}
+
+// GetList
+// Make sure there is a namelist_ and return a reference to it.
+const TemplateNamelist::NameListType& TemplateNamelist::GetList() {
+ if ( !namelist_ ) {
+ namelist_ = new NameListType;
+ }
+ return *namelist_;
+}
+
+// GetMissingList
+// On the first invocation, it creates a new missing list and sets
+// refresh to true.
+// If refresh is true, whether from being passed to the function
+// or being set when the list is created the first time, it iterates
+// through the complete list of registered template files
+// and adds to the list any that are missing
+// On subsequent calls, if refresh is false it merely returns the
+// list created in the prior call that refreshed the list.
+// Returns a sorted list of missing templates.
+const TemplateNamelist::MissingListType& TemplateNamelist::GetMissingList(
+ bool refresh) {
+ if (!missing_list_) {
+ missing_list_ = new MissingListType;
+ refresh = true; // always refresh the first time
+ }
+
+ if (refresh) {
+ const NameListType& the_list = TemplateNamelist::GetList();
+ missing_list_->clear();
+
+ for (NameListType::const_iterator iter = the_list.begin();
+ iter != the_list.end();
+ ++iter) {
+ const string path = Template::FindTemplateFilename(*iter);
+ if (path.empty() || !File::Readable(path.c_str())) {
+ missing_list_->push_back(*iter);
+ LOG(ERROR) << "Template file missing: " << *iter
+ << " at path: " << (path.empty() ? "(empty path)" : path)
+ << "\n";
+ }
+ }
+ }
+
+ sort(missing_list_->begin(), missing_list_->end());
+ return *missing_list_;
+}
+
+// GetBadSyntaxList
+// On the first invocation, it creates a new "bad syntax" list and
+// sets refresh to true.
+// If refresh is true, whether from being passed to the function
+// or being set when the list is created the first time, it
+// iterates through the complete list of registered template files
+// and adds to the list any that cannot be loaded. In the process, it
+// calls GetMissingList, refreshing it. It does not include any
+// files in the bad syntax list which are in the missing list.
+// On subsequent calls, if refresh is false it merely returns the
+// list created in the prior call that refreshed the list.
+const TemplateNamelist::SyntaxListType& TemplateNamelist::GetBadSyntaxList(
+ bool refresh, Strip strip) {
+ if (!bad_syntax_list_) {
+ bad_syntax_list_ = new SyntaxListType;
+ refresh = true; // always refresh the first time
+ }
+
+ if (refresh) {
+ const NameListType& the_list = TemplateNamelist::GetList();
+
+ bad_syntax_list_->clear();
+
+ const MissingListType& missing_list = GetMissingList(true);
+ for (NameListType::const_iterator iter = the_list.begin();
+ iter != the_list.end();
+ ++iter) {
+ Template *tpl = Template::GetTemplate((*iter), strip);
+ if (!tpl) {
+ if (!binary_search(missing_list.begin(), missing_list.end(), *iter)) {
+ // If it's not in the missing list, then we're here because
+ // it caused an error during parsing
+ bad_syntax_list_->push_back(*iter);
+ LOG(ERROR) << "Error loading template: " << (*iter) << "\n";
+ }
+ }
+ }
+ }
+ return *bad_syntax_list_;
+}
+
+// Look at all the existing template files, and get their lastmod time via stat()
+time_t TemplateNamelist::GetLastmodTime() {
+ time_t retval = -1;
+
+ const NameListType& the_list = TemplateNamelist::GetList();
+ for (NameListType::const_iterator iter = the_list.begin();
+ iter != the_list.end();
+ ++iter) {
+ // Only prepend root_dir if *iter isn't an absolute path:
+ const string path = Template::FindTemplateFilename(*iter);
+ struct stat statbuf;
+ if (path.empty() || stat(path.c_str(), &statbuf) != 0)
+ continue; // ignore files we can't find
+ retval = max(retval, statbuf.st_mtime);
+ }
+ return retval;
+}
+
+// AllDoExist
+bool TemplateNamelist::AllDoExist() {
+ // AllDoExist always refreshes the list, hence the "true"
+ const MissingListType& missing_list = TemplateNamelist::GetMissingList(true);
+ return missing_list.empty();
+}
+
+// IsAllSyntaxOkay
+bool TemplateNamelist::IsAllSyntaxOkay(Strip strip) {
+ // IsAllSyntaxOkay always refreshes the list, hence the "true"
+ const SyntaxListType& bad_syntax_list =
+ TemplateNamelist::GetBadSyntaxList(true, strip);
+ return bad_syntax_list.empty();
+}
+
+}
diff --git a/src/template_pathops.cc b/src/template_pathops.cc
new file mode 100644
index 0000000..177d730
--- /dev/null
+++ b/src/template_pathops.cc
@@ -0,0 +1,145 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// Routines for dealing with filesystem paths. Mostly to make porting
+// to windows easier, though it's nice to have an API for this kind of
+// thing.
+
+#include <config.h>
+#include <string>
+#include <ctype.h> // for isalpha, used on windows
+#include <string.h> // for strchr
+#include <ctemplate/template_pathops.h>
+
+#ifndef PATH_SEP
+# ifdef _WIN32
+# define PATH_SEP '\\'
+# else
+# define PATH_SEP '/' // assume a unix-like system
+# endif
+#endif
+
+namespace ctemplate {
+
+using std::string;
+
+const char kCWD[] = { '.', PATH_SEP, '\0' };
+const char kRootdir[] = { PATH_SEP, '\0' };
+
+// Windows is bi-slashual: we always write separators using PATH_SEP (\),
+// but accept either PATH_SEP or the unix / as a separator on input.
+inline bool IsPathSep(char c) {
+#ifdef _WIN32
+ if (c == '/') return true;
+#endif
+ return c == PATH_SEP;
+}
+
+// ----------------------------------------------------------------------
+// PathJoin()
+// Joins a and b together to form a path. If 'b' starts with '/'
+// then we just return b, otherwise a + b. If 'a' does not end in
+// a slash we put a slash in the middle. Does *not* resolve ..'s
+// and stuff like that, for now. Not very efficient.
+// Returns a string which is the joining.
+// ----------------------------------------------------------------------
+
+string PathJoin(const string& a, const string& b) {
+ if (b.empty()) return a; // degenerate case 1
+ if (a.empty()) return b; // degenerate case 2
+ if (IsAbspath(b)) return b; // absolute path
+ if (IsDirectory(a)) return a + b; // 'well-formed' case
+ return a + PATH_SEP + b;
+}
+
+bool IsAbspath(const string& path) {
+#ifdef _WIN32
+ if (path.size() > 2 && // c:\ is an absolute path on windows
+ path[1] == ':' && IsPathSep(path[2]) && isalpha(path[0])) {
+ return true;
+ }
+#endif
+ return !path.empty() && IsPathSep(path[0]);
+}
+
+bool IsDirectory(const string& path) {
+ return !path.empty() && IsPathSep(path[path.size()-1]);
+}
+
+void NormalizeDirectory(string* dir) {
+ if (dir->empty()) return; // I guess "" means 'current directory'
+ if (!IsPathSep((*dir)[dir->size()-1]))
+ *dir += PATH_SEP;
+}
+
+string Basename(const string& path) {
+ for (const char* p = path.data() + path.size()-1; p >= path.data(); --p) {
+ if (IsPathSep(*p))
+ return string(p+1, path.data() + path.size() - (p+1));
+ }
+ return path; // no path-separator found, so whole string is the basename
+}
+
+bool ContainsFullWord(const string& text, const string& word) {
+ // List of delimiter characters to be considered. Please update the comment in
+ // the header file if you change this list.
+ static const char* delim = ".,_-#*?:";
+
+ const int inputlength = text.length();
+ const int wordlength = word.length();
+
+ // corner cases
+ if (inputlength == 0 || wordlength == 0 || wordlength > inputlength) {
+ return false;
+ }
+
+ int nextmatchpos = 0; // position from where search in the input string
+ while (nextmatchpos < inputlength) {
+ const int pos = text.find(word, nextmatchpos);
+ if (pos == string::npos) {
+ return false; // no match at all
+ }
+
+ // if found, check that it is surrounded by delimiter characters.
+ bool pre_delimited = (pos == 0) ||
+ (strchr(delim, text.at(pos - 1)) != NULL);
+ bool post_delimited = (pos >= inputlength - wordlength) ||
+ (strchr(delim, text.at(pos + wordlength)) != NULL);
+ if (pre_delimited && post_delimited) return true;
+
+ nextmatchpos = (pos + wordlength + 1);
+ }
+
+ return false;
+}
+
+}
diff --git a/src/template_string.cc b/src/template_string.cc
new file mode 100644
index 0000000..a77d37f
--- /dev/null
+++ b/src/template_string.cc
@@ -0,0 +1,256 @@
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Authors: jcrim@google.com (Jay Crim)
+// csilvers@google.com (Craig Silverstein)
+
+#include <config.h>
+#include "base/mutex.h" // This has to come first to get _XOPEN_SOURCE
+#include <ctemplate/find_ptr.h>
+#include <ctemplate/template_string.h>
+#include HASH_SET_H
+#include "base/arena.h"
+#include "base/thread_annotations.h"
+#include <assert.h>
+#include "base/macros.h" // for uint32, uint64, UNALIGNED_LOAD32
+#include "base/util.h"
+
+#ifdef HAVE_UNORDERED_MAP
+using HASH_NAMESPACE::unordered_set;
+// This is totally cheap, but minimizes the need for #ifdef's below...
+#define hash_set unordered_set
+#else
+using HASH_NAMESPACE::hash_set;
+#endif
+
+namespace ctemplate {
+
+// Based on public domain MurmurHashUnaligned2, by Austin Appleby.
+// http://murmurhash.googlepages.com/
+// This variation:
+// - interleaves odd/even 32-bit words to improve performance and
+// to generate more random bits,
+// - has a more complex final mix to combine the 32-bit hashes into
+// 64-bits,
+// - uses a fixed seed.
+// This is not static because template_string_test accesses it directly.
+uint64 MurmurHash64(const char* ptr, size_t len) {
+ const uint32 kMultiplyVal = 0x5bd1e995;
+ const int kShiftVal = 24;
+ const uint32 kHashSeed1 = 0xc86b14f7;
+ const uint32 kHashSeed2 = 0x650f5c4d;
+
+ uint32 h1 = kHashSeed1 ^ len, h2 = kHashSeed2;
+ while (len >= 8) {
+ uint32 k1 = UNALIGNED_LOAD32(ptr);
+ k1 *= kMultiplyVal;
+ k1 ^= k1 >> kShiftVal;
+ k1 *= kMultiplyVal;
+
+ h1 *= kMultiplyVal;
+ h1 ^= k1;
+ ptr += 4;
+
+ uint32 k2 = UNALIGNED_LOAD32(ptr);
+ k2 *= kMultiplyVal;
+ k2 ^= k2 >> kShiftVal;
+ k2 *= kMultiplyVal;
+
+ h2 *= kMultiplyVal;
+ h2 ^= k2;
+ ptr += 4;
+
+ len -= 8;
+ }
+
+ if (len >= 4) {
+ uint32 k1 = UNALIGNED_LOAD32(ptr);
+ k1 *= kMultiplyVal;
+ k1 ^= k1 >> kShiftVal;
+ k1 *= kMultiplyVal;
+
+ h1 *= kShiftVal;
+ h1 ^= k1;
+
+ ptr += 4;
+ len -= 4;
+ }
+
+ switch(len) {
+ case 3:
+ h2 ^= ptr[2] << 16; // fall through.
+ case 2:
+ h2 ^= ptr[1] << 8; // fall through.
+ case 1:
+ h2 ^= ptr[0]; // fall through.
+ default:
+ h2 *= kMultiplyVal;
+ }
+
+ h1 ^= h2 >> 18;
+ h1 *= kMultiplyVal;
+ h2 ^= h1 >> 22;
+ h2 *= kMultiplyVal;
+ h1 ^= h2 >> 17;
+ h1 *= kMultiplyVal;
+
+ uint64 h = h1;
+ h = (h << 32) | h2;
+ return h;
+}
+
+// Unlike StaticTemplateString, it is not a good idea to have a
+// default TemplateString::Hasher because TemplateString does not
+// provide any lifetime guarantees. The global template_string_set is
+// an obvious exception.
+struct TemplateStringHasher {
+ size_t operator()(const TemplateString& ts) const {
+ TemplateId id = ts.GetGlobalId();
+ DCHECK(IsTemplateIdInitialized(id));
+ return hasher(id);
+ }
+ // Less operator for MSVC's hash containers.
+ bool operator()(const TemplateString& a, const TemplateString& b) const {
+ const TemplateId id_a = a.GetGlobalId();
+ const TemplateId id_b = b.GetGlobalId();
+ assert(IsTemplateIdInitialized(id_a));
+ assert(IsTemplateIdInitialized(id_b));
+ return hasher(id_a, id_b);
+ }
+ TemplateIdHasher hasher;
+ // These two public members are required by msvc. 4 and 8 are defaults.
+ static const size_t bucket_size = 4;
+ static const size_t min_buckets = 8;
+};
+
+namespace {
+Mutex mutex(base::LINKER_INITIALIZED);
+
+typedef hash_set<TemplateString, TemplateStringHasher> TemplateStringSet;
+
+TemplateStringSet* template_string_set
+GUARDED_BY(mutex) PT_GUARDED_BY(mutex) = NULL;
+
+UnsafeArena* arena
+GUARDED_BY(mutex) PT_GUARDED_BY(mutex) = NULL;
+} // unnamed namespace
+
+
+size_t StringHash::Hash(const char* s, size_t slen) const {
+ return static_cast<size_t>(MurmurHash64(s, slen));
+}
+
+void TemplateString::AddToGlobalIdToNameMap() LOCKS_EXCLUDED(mutex) {
+ // shouldn't be calling this if we don't have an id.
+ CHECK(IsTemplateIdInitialized(id_));
+ {
+ // Check to see if it's already here.
+ ReaderMutexLock reader_lock(&mutex);
+ if (template_string_set) {
+ const TemplateString* iter =
+ find_ptr0(*template_string_set, *this);
+ if (iter) {
+ DCHECK_EQ(TemplateString(ptr_, length_),
+ TemplateString(iter->ptr_, iter->length_))
+ << "TemplateId collision!";
+ return;
+ }
+ }
+ }
+ WriterMutexLock writer_lock(&mutex);
+ // First initialize our data structures if we need to.
+ if (!template_string_set) {
+ template_string_set = new TemplateStringSet;
+ }
+
+ if (!arena) {
+ arena = new UnsafeArena(1024); // 1024 was picked out of a hat.
+ }
+
+ if (template_string_set->count(*this)) {
+ return;
+ }
+ // If we are immutable, we can store ourselves directly in the map.
+ // Otherwise, we need to make an immutable copy.
+ if (is_immutable()) {
+ template_string_set->insert(*this);
+ } else {
+ const char* immutable_copy = arena->Memdup(ptr_, length_);
+ template_string_set->insert(
+ TemplateString(immutable_copy, length_, true, id_));
+ }
+}
+
+TemplateId TemplateString::GetGlobalId() const {
+ if (IsTemplateIdInitialized(id_)) {
+ return id_;
+ }
+ // Initialize the id and sets the "initialized" flag.
+ return static_cast<TemplateId>(MurmurHash64(ptr_, length_) |
+ kTemplateStringInitializedFlag);
+}
+
+// static
+TemplateString TemplateString::IdToString(TemplateId id) LOCKS_EXCLUDED(mutex) {
+ ReaderMutexLock reader_lock(&mutex);
+ if (!template_string_set)
+ return TemplateString(kStsEmpty);
+ // To search the set by TemplateId, we must first construct a dummy
+ // TemplateString. This may seem weird, but it lets us use a
+ // hash_set instead of a hash_map.
+ TemplateString id_as_template_string(NULL, 0, false, id);
+ const TemplateString* iter = find_ptr0(*template_string_set, id_as_template_string);
+ return iter ? *iter : TemplateString(kStsEmpty);
+}
+
+StaticTemplateStringInitializer::StaticTemplateStringInitializer(
+ const StaticTemplateString* sts) {
+ // Compute the sts's id if it wasn't specified at static-init
+ // time. If it was specified at static-init time, verify it's
+ // correct. This is necessary because static-init id's are, by
+ // nature, pre-computed, and the id-generating algorithm may have
+ // changed between the time they were computed and now.
+ if (sts->do_not_use_directly_.id_ == 0) {
+ sts->do_not_use_directly_.id_ = TemplateString(*sts).GetGlobalId();
+ } else {
+ // Don't use the TemplateString(const StaticTemplateString& sts)
+ // constructor below, since if we do, GetGlobalId will just return
+ // sts->do_not_use_directly_.id_ and the check will be pointless.
+ DCHECK_EQ(TemplateString(sts->do_not_use_directly_.ptr_,
+ sts->do_not_use_directly_.length_).GetGlobalId(),
+ sts->do_not_use_directly_.id_);
+ }
+
+ // Now add this id/name pair to the backwards map from id to name.
+ TemplateString ts_copy_of_sts(*sts);
+ ts_copy_of_sts.AddToGlobalIdToNameMap();
+}
+
+}
diff --git a/src/tests/compile_test.cc b/src/tests/compile_test.cc
new file mode 100644
index 0000000..4951e56
--- /dev/null
+++ b/src/tests/compile_test.cc
@@ -0,0 +1,76 @@
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Craig Silverstein
+//
+// Most other tests use "config_for_unittests.h" to make testing easier.
+// This brings in some code most users won't use. This test is meant
+// entirely to use ctemplate as users will, just #including the public
+// .h files directly. It does hardly any work, and is mainly intended
+// as a compile check for the .h files. It will not work if you use
+// a non-standard name for the package namespace (via
+// ./configure --enable-namespace=foo
+// ), though you can fix that by changing the namespace alias below.
+
+// These are all the .h files that we export
+#include <ctemplate/per_expand_data.h>
+#include <ctemplate/template.h>
+#include <ctemplate/template_dictionary.h>
+#include <ctemplate/template_dictionary_interface.h>
+#include <ctemplate/template_emitter.h>
+#include <ctemplate/template_enums.h>
+#include <ctemplate/template_modifiers.h>
+#include <ctemplate/template_namelist.h>
+#include <ctemplate/template_pathops.h>
+#include <ctemplate/template_string.h>
+#include <stdio.h>
+#include <string>
+
+// If you used ./configure --enable-namespace=foo, replace 'ctemplate'
+// here with 'foo'.
+namespace template_ns = ctemplate;
+
+int main() {
+ template_ns::Template::StringToTemplateCache("key", "example");
+ template_ns::Template* tpl = template_ns::Template::GetTemplate(
+ "key", template_ns::DO_NOT_STRIP);
+ template_ns::TemplateDictionary dict("my dict");
+ std::string nothing_will_come_of_nothing;
+ tpl->Expand(¬hing_will_come_of_nothing, &dict);
+
+ // Try using a bit more functionality.
+ template_ns::PerExpandData data;
+ nothing_will_come_of_nothing.clear();
+ template_ns::ExpandWithData("key", template_ns::DO_NOT_STRIP, &dict, &data,
+ ¬hing_will_come_of_nothing);
+
+ printf("PASS.\n");
+ return 0;
+}
diff --git a/src/tests/config_for_unittests.h b/src/tests/config_for_unittests.h
new file mode 100644
index 0000000..aa20d9f
--- /dev/null
+++ b/src/tests/config_for_unittests.h
@@ -0,0 +1,63 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// All Rights Reserved.
+//
+//
+// This file is needed for windows -- unittests are not part of the
+// ctemplate dll, but still want to include config.h just like the
+// dll does, so they can use internal tools and APIs for testing.
+//
+// The problem is that config.h declares CTEMPLATE_DLL_DECL to be
+// for exporting symbols, but the unittest needs to *import* symbols
+// (since it's not the dll).
+//
+// The solution is to have this file, which is just like config.h but
+// sets CTEMPLATE_DLL_DECL to do a dllimport instead of a dllexport.
+//
+// The reason we need this extra CTEMPLATE_DLL_DECL_FOR_UNITTESTS
+// variable is in case people want to set CTEMPLATE_DLL_DECL explicitly
+// to something other than __declspec(dllexport). In that case, they
+// may want to use something other than __declspec(dllimport) for the
+// unittest case. For that, we allow folks to define both
+// CTEMPLATE_DLL_DECL and CTEMPLATE_DLL_DECL_FOR_UNITTESTS explicitly.
+//
+// NOTE: This file is equivalent to config.h on non-windows systems,
+// which never defined CTEMPLATE_DLL_DECL_FOR_UNITTESTS and always
+// define CTEMPLATE_DLL_DECL to the empty string.
+
+#include "config.h"
+
+#undef CTEMPLATE_DLL_DECL
+#ifdef CTEMPLATE_DLL_DECL_FOR_UNITTESTS
+# define CTEMPLATE_DLL_DECL CTEMPLATE_DLL_DECL_FOR_UNITTESTS
+#else
+# define CTEMPLATE_DLL_DECL // if DLL_DECL_FOR_UNITTESTS isn't defined, use ""
+#endif
diff --git a/src/tests/diff_tpl_auto_escape_unittest.sh b/src/tests/diff_tpl_auto_escape_unittest.sh
new file mode 100755
index 0000000..b74f112
--- /dev/null
+++ b/src/tests/diff_tpl_auto_escape_unittest.sh
@@ -0,0 +1,196 @@
+#!/bin/sh
+
+# Copyright (c) 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ---
+# Author: jad@google.com (Jad Boutros)
+# Heavily inspired from make_tpl_varnames_h_unittest.sh
+#
+#
+# TODO(jad): Add better testing of Strip mode.
+# TODO(jad): Add testing for (lame) suppressed diffs.
+
+die() {
+ echo "Test failed: $@" 1>&2
+ exit 1
+}
+
+TEST_SRCDIR=${TEST_SRCDIR-"."}
+TEST_TMPDIR=${TMPDIR-"/tmp"}
+
+# Optional first argument is where the executable lives
+DIFFTPL=${1-"$TEST_SRCDIR/diff_tpl_auto_escape"}
+
+# Optional second argument is tmpdir to use
+TMPDIR=${2-"$TEST_TMPDIR/difftpl"}
+
+rm -rf $TMPDIR
+mkdir $TMPDIR || die "$LINENO: Can't make $TMPDIR"
+
+# Let's make some templates
+# ok1.tpl is valid HTML and has one correct modifier.
+echo '<a href="{{URL}}">{{USER:h}}</a>' > $TMPDIR/ok1.tpl
+
+# ok2.tpl is valid HTML and has one right and one wrong modifier.
+echo '<a href="{{URL:U=html}}">{{USER:j}}</a>' > $TMPDIR/ok2.tpl
+
+# ok3.tpl is valid HTML and has both wrong modifiers.
+echo '<a href="{{URL:h}}">{{USER:c}}</a>' > $TMPDIR/ok3.tpl
+
+# ok4.tpl is valid HTML and is auto-escaped.
+echo '{{%AUTOESCAPE context="HTML"}}' \
+ '<a href="{{URL}}">{{USER}}</a>' > $TMPDIR/ok4.tpl
+# bad1.tpl will fail auto-escape parsing.
+echo '{{%AUTOESCAPE context="HTML"}}<a href={{QC' > $TMPDIR/bad1.tpl
+
+# First, test commandline flags
+$DIFFTPL >/dev/null 2>&1 \
+ && die "$LINENO: $DIFFTPL with no args didn't give an error"
+$DIFFTPL --strip=STRIP_WHITESPACE >/dev/null 2>&1 \
+ && die "$LINENO: $DIFFTPL with no template didn't give an error"
+$DIFFTPL $TMPDIR/ok1.tpl >/dev/null 2>&1 \
+ && die "$LINENO: $DIFFTPL with only one template didn't give an error"
+$DIFFTPL -sFOO $TMPDIR/ok1.tpl $TMPDIR/ok1.tpl >/dev/null 2>&1 \
+ && die "$LINENO: $DIFFTPL with bad strip didn't given an error"
+$DIFFTPL --strip=BLA $TMPDIR/ok1.tpl $TMPDIR/ok1.tpl >/dev/null 2>&1 \
+ && die "$LINENO: $DIFFTPL with other bad strip didn't given an error"
+$DIFFTPL -h >/dev/null 2>&1 \
+ || die "$LINENO: $DIFFTPL -h failed"
+$DIFFTPL --help >/dev/null 2>&1 \
+ || die "$LINENO: $DIFFTPL --help failed"
+
+# Some weird (broken) shells leave the ending EOF in the here-document,
+# hence the grep.
+# Diff between ok1.tpl and ok4.tpl. No differences.
+expected_test1_verbose=`cat <<EOF | grep -v '^EOF$'
+[VERBOSE] ------ Diff of [$TMPDIR/ok1.tpl, $TMPDIR/ok4.tpl] ------
+[VERBOSE] Variables Found: Total=2; Diffs=0; NoMods=1
+EOF`
+
+expected_test1=`cat <<EOF | grep -v '^EOF$'
+EOF`
+
+# Diff between ok1.tpl and ok2.tpl. Expect one difference.
+expected_test2_verbose=`cat <<EOF | grep -v '^EOF$'
+[VERBOSE] ------ Diff of [$TMPDIR/ok1.tpl, $TMPDIR/ok2.tpl] ------
+Difference for variable USER -- :h vs. :j
+[VERBOSE] Variables Found: Total=2; Diffs=1; NoMods=1
+EOF`
+
+expected_test2=`cat <<EOF | grep -v '^EOF$'
+Difference for variable USER -- :h vs. :j
+EOF`
+
+# Diff between ok3.tpl and ok4.tpl. Expect two differences.
+expected_test3_verbose=`cat <<EOF | grep -v '^EOF$'
+[VERBOSE] ------ Diff of [$TMPDIR/ok3.tpl, $TMPDIR/ok4.tpl] ------
+Difference for variable URL -- :h vs. :U=html
+Difference for variable USER -- :c vs. :h
+[VERBOSE] Variables Found: Total=2; Diffs=2; NoMods=0
+EOF`
+
+expected_test3=`cat <<EOF | grep -v '^EOF$'
+Difference for variable URL -- :h vs. :U=html
+Difference for variable USER -- :c vs. :h
+EOF`
+
+# Diff between ok2.tpl and ok3.tpl. Expect two differences.
+expected_test4_verbose=`cat <<EOF | grep -v '^EOF$'
+[VERBOSE] ------ Diff of [$TMPDIR/ok2.tpl, $TMPDIR/ok3.tpl] ------
+Difference for variable URL -- :U=html vs. :h
+Difference for variable USER -- :j vs. :c
+[VERBOSE] Variables Found: Total=2; Diffs=2; NoMods=0
+EOF`
+
+expected_test4=`cat <<EOF | grep -v '^EOF$'
+Difference for variable URL -- :U=html vs. :h
+Difference for variable USER -- :j vs. :c
+EOF`
+
+
+# syntax-check these templates
+echo "TMPDIR is: $TMPDIR"
+$DIFFTPL $TMPDIR/ok1.tpl $TMPDIR/ok1.tpl >/dev/null 2>&1 \
+ || die "$LINENO: $DIFFTPL gave error parsing identical templates"
+$DIFFTPL $TMPDIR/ok1.tpl $TMPDIR/bad1.tpl >/dev/null 2>&1 \
+ && die "$LINENO: $DIFFTPL gave no error parsing bad template"
+$DIFFTPL $TMPDIR/ok100.tpl $TMPDIR/ok1.tpl >/dev/null 2>&1 \
+ && die "$LINENO: $DIFFTPL gave no error parsing non-existent template"
+
+# Now try the same thing, but use template-root so we don't need absdirs
+$DIFFTPL --template_root=$TMPDIR ok2.tpl ok2.tpl >/dev/null 2>&1 \
+ || die "$LINENO: $DIFFTPL gave error parsing identical templates"
+
+# Diffing the same template produces exit code 0. Check with all Strip values.
+$DIFFTPL -sSTRIP_WHITESPACE $TMPDIR/ok1.tpl $TMPDIR/ok1.tpl >/dev/null 2>&1
+[ $? = 0 ] || die "$LINENO: $DIFFTPL: wrong error-code on same template: $?"
+$DIFFTPL -sSTRIP_BLANK_LINES $TMPDIR/ok1.tpl $TMPDIR/ok1.tpl >/dev/null 2>&1
+[ $? = 0 ] || die "$LINENO: $DIFFTPL: wrong error-code on same template: $?"
+$DIFFTPL -sDO_NOT_STRIP $TMPDIR/ok1.tpl $TMPDIR/ok1.tpl >/dev/null 2>&1
+[ $? = 0 ] || die "$LINENO: $DIFFTPL: wrong error-code on same template: $?"
+
+# Diffing templates with diff, should produce exit code 1.
+$DIFFTPL $TMPDIR/ok1.tpl $TMPDIR/ok2.tpl >/dev/null 2>&1
+[ $? = 1 ] || die "$LINENO: $DIFFTPL: wrong error-code on diff. templates: $?"
+
+# Diffing templates with failure, should produce exit code 1.
+$DIFFTPL $TMPDIR/ok1.tpl $TMPDIR/ok100.tpl >/dev/null 2>&1
+[ $? = 1 ] || die "$LINENO: $DIFFTPL: wrong error-code on failed template: $?"
+
+# If you use relative filenames, must first fix expected outputs.
+out=`$DIFFTPL -v $TMPDIR/ok1.tpl $TMPDIR/ok4.tpl 2>&1`
+[ "$out" != "$expected_test1_verbose" ] &&\
+ die "$LINENO: $DIFFTPL: bad output for test1_verbose: $out\n"
+out=`$DIFFTPL $TMPDIR/ok1.tpl $TMPDIR/ok4.tpl 2>&1`
+[ "$out" != "$expected_test1" ] &&\
+ die "$LINENO: $DIFFTPL: bad output for test1: $out\n"
+
+out=`$DIFFTPL -v $TMPDIR/ok1.tpl $TMPDIR/ok2.tpl 2>&1`
+[ "$out" != "$expected_test2_verbose" ] &&\
+ die "$LINENO: $DIFFTPL: bad output for test2_verbose: $out\n"
+out=`$DIFFTPL $TMPDIR/ok1.tpl $TMPDIR/ok2.tpl 2>&1`
+[ "$out" != "$expected_test2" ] &&\
+ die "$LINENO: $DIFFTPL: bad output for test2: $out\n"
+
+out=`$DIFFTPL -v $TMPDIR/ok3.tpl $TMPDIR/ok4.tpl 2>&1`
+[ "$out" != "$expected_test3_verbose" ] &&\
+ die "$LINENO: $DIFFTPL: bad output for test3_verbose: $out\n"
+out=`$DIFFTPL $TMPDIR/ok3.tpl $TMPDIR/ok4.tpl 2>&1`
+[ "$out" != "$expected_test3" ] &&\
+ die "$LINENO: $DIFFTPL: bad output for test3: $out\n"
+
+out=`$DIFFTPL -v $TMPDIR/ok2.tpl $TMPDIR/ok3.tpl 2>&1`
+[ "$out" != "$expected_test4_verbose" ] &&\
+ die "$LINENO: $DIFFTPL: bad output for test4_verbose: $out\n"
+out=`$DIFFTPL $TMPDIR/ok2.tpl $TMPDIR/ok3.tpl 2>&1`
+[ "$out" != "$expected_test4" ] &&\
+ die "$LINENO: $DIFFTPL: bad output for test4: $out\n"
+
+echo "PASSED"
diff --git a/src/tests/generate_fsm_c_test.c b/src/tests/generate_fsm_c_test.c
new file mode 100644
index 0000000..83ca730
--- /dev/null
+++ b/src/tests/generate_fsm_c_test.c
@@ -0,0 +1,53 @@
+/* Copyright (c) 2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ---
+ *
+ * Author: falmeida@google.com (Filipe Almeida)
+ *
+ * Validate that sample_fsm.c compiles.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "htmlparser/statemachine.h"
+
+enum states {
+ STRINGPARSER_STATE_TEXT,
+ STRINGPARSER_STATE_STRING
+};
+
+#include "tests/htmlparser_testdata/sample_fsm.c"
+
+int main()
+{
+ (void)stringparser_states_internal_names;
+ (void)stringparser_state_transitions;
+ printf("DONE.\n");
+ exit(0);
+}
diff --git a/src/tests/generate_fsm_test.sh b/src/tests/generate_fsm_test.sh
new file mode 100755
index 0000000..6d26947
--- /dev/null
+++ b/src/tests/generate_fsm_test.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+#
+# Copyright (c) 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ---
+#
+# Author: falmeida@google.com (Filipe Almeida)
+
+die() {
+ echo "Test failed: $@" 1>&2
+ exit 1
+}
+TEST_SRCDIR=${1:-$TEST_SRCDIR}
+TOOLS_DIR="$TEST_SRCDIR/src/htmlparser"
+TESTDATA_DIR="$TEST_SRCDIR/src/tests/htmlparser_testdata"
+
+# Find input files
+INPUT_FILE="$TESTDATA_DIR/sample_fsm.config"
+OUTPUT_FILE="$TESTDATA_DIR/sample_fsm.c"
+GENERATE_FSM="$TOOLS_DIR/generate_fsm.py"
+
+EXPECTED="`cat $OUTPUT_FILE`"
+if [ -z "$EXPECTED" ]; then die "Error reading $OUTPUT_FILE"; fi
+
+# Let's make sure the script works with python2.2 and above
+for PYTHON in "" "python2.2" "python2.3" "python2.4" "python2.5" "python2.6"; do
+ # Skip the versions of python that are not installed.
+ if [ -n "$PYTHON" ]; then
+ $PYTHON -h >/dev/null 2>/dev/null || continue
+ else # use the python that's in the shebang line
+ SHEBANG_PYTHON=`head -n1 "$GENERATE_FSM" | tr -d '#!'`
+ # SHEBANG_PYTHON could be something like "env python" so don't quotify it
+ $SHEBANG_PYTHON -h >/dev/null 2>/dev/null || continue
+ fi
+ echo "-- Running $PYTHON $GENERATE_FSM $INPUT_FILE"
+ # The tr is to get rid of windows-style line endings (\r)
+ GENERATED="`$PYTHON $GENERATE_FSM $INPUT_FILE | tr -d '\015'`"
+ if [ -z "$GENERATED" ]; then die "Error running $GENERATE_FSM"; fi
+
+ if [ "$EXPECTED" != "$GENERATED" ]; then
+ echo "Test failed ($PYTHON $GENERATE_FSM $INPUT_FILE)" 1>&2
+ echo "-- EXPECTED --" 1>&2
+ echo "$EXPECTED" 1>&2
+ echo "-- GENERATED --" 1>&2
+ echo "$GENERATED" 1>&2
+ echo "--"
+ exit 1
+ fi
+done
+
+echo "PASS"
diff --git a/src/tests/htmlparser_cpp_test.cc b/src/tests/htmlparser_cpp_test.cc
new file mode 100644
index 0000000..a8bcf79
--- /dev/null
+++ b/src/tests/htmlparser_cpp_test.cc
@@ -0,0 +1,629 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ---
+// Author: falmeida@google.com (Filipe Almeida)
+//
+// Verify at different points during HTML processing that the parser is in the
+// correct state.
+//
+// The annotated file consists of regular html blocks and html processing
+// instructions with a target name of "state" and a list of comma separated key
+// value pairs describing the expected state or invoking a parser method.
+// Example:
+//
+// <html><body><?state state=text, tag=body ?>
+//
+// For a more detailed explanation of the acceptable values please consult
+// htmlparser_cpp.h. Following is a list of the possible keys:
+//
+// state: Current parser state as returned by HtmlParser::state().
+// Possible values: text, tag, attr, value, comment or error.
+// tag: Current tag name as returned by HtmlParser::tag()
+// attr: Current attribute name as returned by HtmlParser::attr()
+// attr_type: Current attribute type as returned by HtmlParser::attr_type()
+// Possible values: none, regular, uri, js or style.
+// attr_quoted: True if the attribute is quoted, false if it's not.
+// in_js: True if currently processing javascript (either an attribute value
+// that expects javascript, a script block or the parser being in
+// MODE_JS)
+// js_quoted: True if inside a javascript string literal.
+// js_state: Current javascript state as returned by
+// HtmlParser::javascript_state().
+// Possible values: text, q, dq, regexp or comment.
+// in_css: True if currently inside a CSS section or attribute.
+// line_number: Integer value containing the current line count.
+// column_number: Integer value containing the current column count.
+// value_index: Integer value containing the current character index in the
+// current value starting from 0.
+// is_url_start: True if if this is the first character of a url attribute.
+// reset: If true, resets the parser state to it's initial values.
+// reset_mode: Similar to reset but receives an argument that changes the
+// parser mode into either mode html or mode js.
+// insert_text: Executes HtmlParser::InsertText() if the argument is true.
+
+#include "config_for_unittests.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <string>
+#include <utility>
+#include <vector>
+#include <map>
+#include "htmlparser/htmlparser_cpp.h"
+#include "ctemplate/template_pathops.h"
+#include "base/util.h"
+
+#define FAIL() EXPECT_TRUE(false)
+TEST_INIT // Among other things, defines RUN_ALL_TESTS
+
+using std::map;
+using std::pair;
+using std::string;
+using std::vector;
+using GOOGLE_NAMESPACE::PathJoin;
+
+namespace ctemplate_htmlparser {
+
+// Maximum file size limit.
+static const int kMaxFileSize = 1000000;
+
+static void ReadToString(const char* filename, string* s) {
+ const int bufsize = 8092;
+ char buffer[bufsize];
+ size_t n;
+ FILE* fp = fopen(filename, "rb");
+ if (!fp) PFATAL(filename);
+ while ((n=fread(buffer, 1, bufsize, fp)) > 0) {
+ if (ferror(fp)) PFATAL(filename);
+ s->append(string(buffer, n));
+ }
+ fclose(fp);
+}
+
+class HtmlparserCppTest : public testing::Test {
+ protected:
+
+ typedef map<string, HtmlParser *> ContextMap;
+
+ // Structure that stores the mapping between an id and a name.
+ struct IdNameMap {
+ int id;
+ const char *name;
+ };
+
+ // Mapping between the enum and the string representation of the state.
+ static const struct IdNameMap kStateMap[];
+
+ // Mapping between the enum and the string representation of the javascript
+ // state.
+ static const struct IdNameMap kJavascriptStateMap[];
+
+ // Mapping between the enum and the string representation of the attribute
+ // type.
+ static const struct IdNameMap kAttributeTypeMap[];
+
+ // Mapping between the enum and the string representation of the reset mode.
+ static const struct IdNameMap kResetModeMap[];
+
+ // String that marks the start of an annotation.
+ static const char kDirectiveBegin[];
+
+ // String that marks the end of an annotation.
+ static const char kDirectiveEnd[];
+
+ // Count the number of lines in a string.
+ static int UpdateLines(const string &str, int line);
+
+ // Count the number of columns in a string.
+ static int UpdateColumns(const string &str, int column);
+
+ // Converts a string to a boolean.
+ static bool StringToBool(const string &value);
+
+ // Returns the name of the corresponding enum_id by consulting an array of
+ // type IdNameMap.
+ const char *IdToName(const struct IdNameMap *list, int enum_id);
+
+ // Returns the enum_id of the correspondent name by consulting an array of
+ // type IdNameMap.
+ int NameToId(const struct IdNameMap *list, const string &name);
+
+ // Reads the filename of an annotated html file and validates the
+ // annotations against the html parser state.
+ void ValidateFile(string filename);
+
+ // Validate an annotation string against the current parser state.
+ void ProcessAnnotation(const string &dir);
+
+ // Validate the parser state against the provided state.
+ void ValidateState(const string &tag);
+
+ // Validate the parser tag name against the provided tag name.
+ void ValidateTag(const string &tag);
+
+ // Validate the parser attribute name against the provided attribute name.
+ void ValidateAttribute(const string &attr);
+
+ // Validate the parser attribute value contents against the provided string.
+ void ValidateValue(const string &contents);
+
+ // Validate the parser attribute type against the provided attribute type.
+ void ValidateAttributeType(const string &attr);
+
+ // Validate the parser attribute quoted state against the provided
+ // boolean.
+ void ValidateAttributeQuoted(const string "ed);
+
+ // Validates the parser in javascript state against the provided boolean.
+ void ValidateInJavascript(const string "ed);
+
+ // Validate the current parser javascript quoted state against the provided
+ // boolean.
+ void ValidateJavascriptQuoted(const string "ed);
+
+ // Validate the javascript parser state against the provided state.
+ void ValidateJavascriptState(const string &expected_state);
+
+ // Validates the parser css state against the provided boolean.
+ void ValidateInCss(const string "ed);
+
+ // Validate the line count against the expected count.
+ void ValidateLine(const string &expected_line);
+
+ // Validate the line count against the expected count.
+ void ValidateColumn(const string &expected_column);
+
+ // Validate the current parser value index against the provided index.
+ void ValidateValueIndex(const string &value_index);
+
+ // Validate the parser is_url_start value against the provided one.
+ void ValidateIsUrlStart(const string &expected_is_url_start);
+
+ void SetUp() {
+ parser_.Reset();
+ }
+
+ void TearDown() {
+ // Delete all parser instances from the context map
+ for (ContextMap::iterator iter = contextMap.begin();
+ iter != contextMap.end(); ++iter) {
+ delete iter->second;
+ }
+ contextMap.clear();
+ }
+
+ // Map containing the registers where the parser context is saved.
+ ContextMap contextMap;
+
+ // Parser instance
+ HtmlParser parser_;
+
+ friend class Test_HtmlparserTest_TestFiles;
+};
+
+const char HtmlparserCppTest::kDirectiveBegin[] = "<?state";
+const char HtmlparserCppTest::kDirectiveEnd[] = "?>";
+
+const struct HtmlparserCppTest::IdNameMap
+ HtmlparserCppTest::kStateMap[] = {
+ { HtmlParser::STATE_TEXT, "text" },
+ { HtmlParser::STATE_TAG, "tag" },
+ { HtmlParser::STATE_ATTR, "attr" },
+ { HtmlParser::STATE_VALUE, "value" },
+ { HtmlParser::STATE_COMMENT, "comment" },
+ { HtmlParser::STATE_JS_FILE, "js_file" },
+ { HtmlParser::STATE_CSS_FILE, "css_file" },
+ { HtmlParser::STATE_ERROR, "error" },
+ { 0, NULL }
+};
+
+const struct HtmlparserCppTest::IdNameMap
+ HtmlparserCppTest::kAttributeTypeMap[] = {
+ { HtmlParser::ATTR_NONE, "none" },
+ { HtmlParser::ATTR_REGULAR, "regular" },
+ { HtmlParser::ATTR_URI, "uri" },
+ { HtmlParser::ATTR_JS, "js" },
+ { HtmlParser::ATTR_STYLE, "style" },
+ { 0, NULL }
+};
+
+const struct HtmlparserCppTest::IdNameMap
+ HtmlparserCppTest::kJavascriptStateMap[] = {
+ { JavascriptParser::STATE_TEXT, "text" },
+ { JavascriptParser::STATE_Q, "q" },
+ { JavascriptParser::STATE_DQ, "dq" },
+ { JavascriptParser::STATE_REGEXP, "regexp" },
+ { JavascriptParser::STATE_COMMENT, "comment" },
+ { 0, NULL }
+};
+
+const struct HtmlparserCppTest::IdNameMap
+ HtmlparserCppTest::kResetModeMap[] = {
+ { HtmlParser::MODE_HTML, "html" },
+ { HtmlParser::MODE_JS, "js" },
+ { HtmlParser::MODE_CSS, "css" },
+ { HtmlParser::MODE_HTML_IN_TAG, "html_in_tag" },
+ { 0, NULL }
+};
+
+
+// Count the number of lines in a string.
+int HtmlparserCppTest::UpdateLines(const string &str, int line) {
+ int linecount = line;
+ for (string::size_type i = 0; i < str.length(); ++i) {
+ if (str[i] == '\n')
+ ++linecount;
+ }
+ return linecount;
+}
+
+// Count the number of columns in a string.
+int HtmlparserCppTest::UpdateColumns(const string &str, int column) {
+ // Number of bytes since the last newline.
+ size_t last_newline = str.rfind('\n');
+
+ // If no newline was found, we just sum up all the characters in the
+ // annotation.
+ if (last_newline == string::npos) {
+ return static_cast<int>(column + str.size() +
+ strlen(kDirectiveBegin) + strlen(kDirectiveEnd));
+ // If a newline was found, the new column count becomes the number of
+ // characters after the last newline.
+ } else {
+ return static_cast<int>(str.size() + strlen(kDirectiveEnd) - last_newline);
+ }
+}
+
+
+// Converts a string to a boolean.
+bool HtmlparserCppTest::StringToBool(const string &value) {
+ if (strcasecmp(value.c_str(), "true") == 0) {
+ return true;
+ } else if (strcasecmp(value.c_str(), "false") == 0) {
+ return false;
+ } else {
+ LOG(FATAL) << "Unknown boolean value";
+ }
+}
+
+// Returns the name of the corresponding enum_id by consulting an array of
+// type IdNameMap.
+const char *HtmlparserCppTest::IdToName(const struct IdNameMap *list,
+ int enum_id) {
+ CHECK(list != NULL);
+ while (list->name) {
+ if (enum_id == list->id) {
+ return list->name;
+ }
+ list++;
+ }
+ LOG(FATAL) << "Unknown id";
+}
+
+// Returns the enum_id of the correspondent name by consulting an array of
+// type IdNameMap.
+int HtmlparserCppTest::NameToId(const struct IdNameMap *list,
+ const string &name) {
+ CHECK(list != NULL);
+ while (list->name) {
+ if (name.compare(list->name) == 0) {
+ return list->id;
+ }
+ list++;
+ }
+ LOG(FATAL) << "Unknown name";
+}
+
+// Validate the parser state against the provided state.
+void HtmlparserCppTest::ValidateState(const string &expected_state) {
+ const char* parsed_state = IdToName(kStateMap, parser_.state());
+ EXPECT_TRUE(parsed_state != NULL);
+ EXPECT_TRUE(!expected_state.empty());
+ EXPECT_EQ(expected_state, string(parsed_state))
+ << "Unexpected state at line " << parser_.line_number();
+}
+
+// Validate the parser tag name against the provided tag name.
+void HtmlparserCppTest::ValidateTag(const string &expected_tag) {
+ EXPECT_TRUE(parser_.tag() != NULL);
+ EXPECT_TRUE(expected_tag == parser_.tag())
+ << "Unexpected attr tag name at line " << parser_.line_number();
+}
+
+// Validate the parser attribute name against the provided attribute name.
+void HtmlparserCppTest::ValidateAttribute(const string &expected_attr) {
+ EXPECT_TRUE(parser_.attribute() != NULL);
+ EXPECT_EQ(expected_attr, parser_.attribute())
+ << "Unexpected attr name value at line " << parser_.line_number();
+}
+
+// Validate the parser attribute value contents against the provided string.
+void HtmlparserCppTest::ValidateValue(const string &expected_value) {
+ EXPECT_TRUE(parser_.value() != NULL);
+ const string parsed_state(parser_.value());
+ EXPECT_EQ(expected_value, parsed_state)
+ << "Unexpected value at line " << parser_.line_number();
+}
+
+// Validate the parser attribute type against the provided attribute type.
+void HtmlparserCppTest::ValidateAttributeType(
+ const string &expected_attr_type) {
+ const char *parsed_attr_type = IdToName(kAttributeTypeMap,
+ parser_.AttributeType());
+ EXPECT_TRUE(parsed_attr_type != NULL);
+ EXPECT_TRUE(!expected_attr_type.empty());
+ EXPECT_EQ(expected_attr_type, string(parsed_attr_type))
+ << "Unexpected attr_type value at line " << parser_.line_number();
+}
+
+// Validate the parser attribute quoted state against the provided
+// boolean.
+void HtmlparserCppTest::ValidateAttributeQuoted(
+ const string &expected_attr_quoted) {
+ bool attr_quoted_bool = StringToBool(expected_attr_quoted);
+ EXPECT_EQ(attr_quoted_bool, parser_.IsAttributeQuoted())
+ << "Unexpected attr_quoted value at line " << parser_.line_number();
+}
+
+// Validates the parser in javascript state against the provided boolean.
+void HtmlparserCppTest::ValidateInJavascript(const string &expected_in_js) {
+ bool in_js_bool = StringToBool(expected_in_js);
+ EXPECT_EQ(in_js_bool, parser_.InJavascript())
+ << "Unexpected in_js value at line " << parser_.line_number();
+}
+
+// Validate the current parser javascript quoted state against the provided
+// boolean.
+void HtmlparserCppTest::ValidateJavascriptQuoted(
+ const string &expected_js_quoted) {
+ bool js_quoted_bool = StringToBool(expected_js_quoted);
+ EXPECT_EQ(js_quoted_bool, parser_.IsJavascriptQuoted())
+ << "Unexpected js_quoted value at line " << parser_.line_number();
+}
+
+// Validate the javascript parser state against the provided state.
+void HtmlparserCppTest::ValidateJavascriptState(const string &expected_state) {
+ const char* parsed_state = IdToName(kJavascriptStateMap,
+ parser_.javascript_state());
+ EXPECT_TRUE(parsed_state != NULL);
+ EXPECT_TRUE(!expected_state.empty());
+ EXPECT_EQ(expected_state, string(parsed_state))
+ << "Unexpected javascript state at line " << parser_.line_number();
+}
+
+// Validates the parser css state against the provided boolean.
+void HtmlparserCppTest::ValidateInCss(const string &expected_in_css) {
+ bool in_css_bool = StringToBool(expected_in_css);
+ EXPECT_EQ(in_css_bool, parser_.InCss())
+ << "Unexpected in_css value at line " << parser_.line_number();
+}
+
+// Validate the line count against the expected count.
+void HtmlparserCppTest::ValidateLine(const string &expected_line) {
+ int line;
+ CHECK(safe_strto32(expected_line, &line));
+ EXPECT_EQ(line, parser_.line_number())
+ << "Unexpected line count at line " << parser_.line_number();
+}
+
+// Validate the line count against the expected count.
+void HtmlparserCppTest::ValidateColumn(const string &expected_column) {
+ int column;
+ CHECK(safe_strto32(expected_column, &column));
+ EXPECT_EQ(column, parser_.column_number())
+ << "Unexpected column count at line " << parser_.line_number();
+}
+
+// Validate the current parser value index against the provided index.
+void HtmlparserCppTest::ValidateValueIndex(const string &expected_value_index) {
+ int index;
+ CHECK(safe_strto32(expected_value_index, &index));
+ EXPECT_EQ(index, parser_.ValueIndex())
+ << "Unexpected value_index value at line " << parser_.line_number();
+}
+
+// Validate the parser is_url_start value against the provided one.
+void HtmlparserCppTest::ValidateIsUrlStart(
+ const string &expected_is_url_start) {
+ bool is_url_start_bool = StringToBool(expected_is_url_start);
+ EXPECT_EQ(is_url_start_bool, parser_.IsUrlStart())
+ << "Unexpected is_url_start value at line " << parser_.line_number();
+}
+
+// Validate an annotation string against the current parser state.
+//
+// Split the annotation into a list of key value pairs and call the appropriate
+// handler for each pair.
+void HtmlparserCppTest::ProcessAnnotation(const string &annotation) {
+ vector< pair< string, string > > pairs;
+ SplitStringIntoKeyValuePairs(annotation, "=", ",", &pairs);
+
+ vector< pair< string, string > >::iterator iter;
+
+ iter = pairs.begin();
+ for (iter = pairs.begin(); iter != pairs.end(); ++iter) {
+ StripWhiteSpace(&iter->first);
+ StripWhiteSpace(&iter->second);
+
+ if (iter->first.compare("state") == 0) {
+ ValidateState(iter->second);
+ } else if (iter->first.compare("tag") == 0) {
+ ValidateTag(iter->second);
+ } else if (iter->first.compare("attr") == 0) {
+ ValidateAttribute(iter->second);
+ } else if (iter->first.compare("value") == 0) {
+ ValidateValue(iter->second);
+ } else if (iter->first.compare("attr_type") == 0) {
+ ValidateAttributeType(iter->second);
+ } else if (iter->first.compare("attr_quoted") == 0) {
+ ValidateAttributeQuoted(iter->second);
+ } else if (iter->first.compare("in_js") == 0) {
+ ValidateInJavascript(iter->second);
+ } else if (iter->first.compare("js_quoted") == 0) {
+ ValidateJavascriptQuoted(iter->second);
+ } else if (iter->first.compare("js_state") == 0) {
+ ValidateJavascriptState(iter->second);
+ } else if (iter->first.compare("in_css") == 0) {
+ ValidateInCss(iter->second);
+ } else if (iter->first.compare("line_number") == 0) {
+ ValidateLine(iter->second);
+ } else if (iter->first.compare("column_number") == 0) {
+ ValidateColumn(iter->second);
+ } else if (iter->first.compare("value_index") == 0) {
+ ValidateValueIndex(iter->second);
+ } else if (iter->first.compare("is_url_start") == 0) {
+ ValidateIsUrlStart(iter->second);
+ } else if (iter->first.compare("save_context") == 0) {
+ if (!contextMap.count(iter->second)) {
+ contextMap[iter->second] = new HtmlParser();
+ }
+ contextMap[iter->second]->CopyFrom(&parser_);
+ } else if (iter->first.compare("load_context") == 0) {
+ CHECK(contextMap.count(iter->second));
+ parser_.CopyFrom(contextMap[iter->second]);
+ } else if (iter->first.compare("reset") == 0) {
+ if (StringToBool(iter->second)) {
+ parser_.Reset();
+ }
+ } else if (iter->first.compare("reset_mode") == 0) {
+ HtmlParser::Mode mode =
+ static_cast<HtmlParser::Mode>(NameToId(kResetModeMap, iter->second));
+ parser_.ResetMode(mode);
+ } else if (iter->first.compare("insert_text") == 0) {
+ if (StringToBool(iter->second)) {
+ parser_.InsertText();
+ }
+ } else {
+ FAIL() << "Unknown test directive: " << iter->first;
+ }
+ }
+}
+
+// Validates an html annotated file against the parser state.
+//
+// It iterates over the html file splitting it into html blocks and annotation
+// blocks. It sends the html block to the parser and uses the annotation block
+// to validate the parser state.
+void HtmlparserCppTest::ValidateFile(string filename) {
+ // If TEMPLATE_ROOTDIR is set in the environment, it overrides the
+ // default of ".". We use an env-var rather than argv because
+ // that's what automake supports most easily.
+ const char* template_rootdir = getenv("TEMPLATE_ROOTDIR");
+ if (template_rootdir == NULL)
+ template_rootdir = DEFAULT_TEMPLATE_ROOTDIR; // probably "."
+ string dir = PathJoin(template_rootdir, "src");
+ dir = PathJoin(dir, "tests");
+ dir = PathJoin(dir, "htmlparser_testdata");
+ const string fullpath = PathJoin(dir, filename);
+ fprintf(stderr, "Validating %s", fullpath.c_str());
+ string buffer;
+ ReadToString(fullpath.c_str(), &buffer);
+
+ // Start of the current html block.
+ size_t start_html = 0;
+
+ // Start of the next annotation.
+ size_t start_annotation = buffer.find(kDirectiveBegin, 0);
+
+ // Ending of the current annotation.
+ size_t end_annotation = buffer.find(kDirectiveEnd, start_annotation);
+
+ while (start_annotation != string::npos) {
+ string html_block(buffer, start_html, start_annotation - start_html);
+ parser_.Parse(html_block);
+
+ start_annotation += strlen(kDirectiveBegin);
+
+ string annotation_block(buffer, start_annotation,
+ end_annotation - start_annotation);
+ ProcessAnnotation(annotation_block);
+
+ // Update line and column count.
+ parser_.set_line_number(UpdateLines(annotation_block,
+ parser_.line_number()));
+ parser_.set_column_number(UpdateColumns(annotation_block,
+ parser_.column_number()));
+
+ start_html = end_annotation + strlen(kDirectiveEnd);
+ start_annotation = buffer.find(kDirectiveBegin, start_html);
+ end_annotation = buffer.find(kDirectiveEnd, start_annotation);
+
+ // Check for unclosed annotation.
+ CHECK(!(start_annotation != string::npos &&
+ end_annotation == string::npos));
+ }
+}
+
+static vector<string> g_filenames;
+#define TEST_FILE(testname, filename) \
+ struct Register_##testname { \
+ Register_##testname() { g_filenames.push_back(filename); } \
+ }; \
+ static Register_##testname g_register_##testname
+
+TEST(HtmlparserTest, TestFiles) {
+ HtmlparserCppTest tester;
+ for (vector<string>::const_iterator it = g_filenames.begin();
+ it != g_filenames.end(); ++it) {
+ tester.SetUp();
+ tester.ValidateFile(*it);
+ tester.TearDown();
+ }
+}
+
+TEST_FILE(SimpleHtml, "simple.html");
+TEST_FILE(Comments, "comments.html");
+TEST_FILE(JavascriptBlock, "javascript_block.html");
+TEST_FILE(JavascriptAttribute, "javascript_attribute.html");
+TEST_FILE(JavascriptRegExp, "javascript_regexp.html");
+TEST_FILE(Tags, "tags.html");
+TEST_FILE(Context, "context.html");
+TEST_FILE(Reset, "reset.html");
+TEST_FILE(CData, "cdata.html");
+TEST_FILE(LineCount, "position.html");
+
+TEST(Htmlparser, Error) {
+ HtmlParser html;
+
+ EXPECT_EQ(html.GetErrorMessage(), (const char *)NULL);
+ EXPECT_EQ(html.Parse("<a href='http://www.google.com' ''>\n"),
+ HtmlParser::STATE_ERROR);
+
+ EXPECT_STREQ(html.GetErrorMessage(),
+ "Unexpected character '\\'' in state 'tag_space'");
+ html.Reset();
+ EXPECT_EQ(html.GetErrorMessage(), (const char *)NULL);
+}
+
+} // namespace security_streamhtmlparser
+
+int main(int argc, char **argv) {
+
+ return RUN_ALL_TESTS();
+}
diff --git a/src/tests/htmlparser_testdata/cdata.html b/src/tests/htmlparser_testdata/cdata.html
new file mode 100644
index 0000000..817938b
--- /dev/null
+++ b/src/tests/htmlparser_testdata/cdata.html
@@ -0,0 +1,112 @@
+<html>
+<?state state=text, tag=html ?>
+
+ <head>
+ <?state state=text, tag=head ?>
+ <!-- Title element with markup -->
+ <title>
+ <?state state=text, tag=title ?>
+ <h1>
+ <?state state=text, tag=title ?>
+ </h1>
+ <!--
+ <?state state=text, tag=title ?>
+ </title>
+ <?state state=text, tag=title ?>
+ -->
+ <?state state=text, tag=title ?>
+ </title>
+ <?state state=text ?>
+
+ <!-- Style element with attributes -->
+ <style a=b>
+ <b><?state state=text, tag=style, in_js=false, in_css=true?></b>
+ </style>
+ <?state in_css=false?>
+ </head>
+<body>
+<?state state=text, in_js=false ?>
+ <!-- PCDATA nested block -->
+ <b>
+ <?state state=text, tag=b ?>
+ <i>
+ <?state state=text, tag=i ?>
+ </i>
+ <?state state=text ?>
+ </b>
+ <?state state=text ?>
+
+ <!-- Textarea element with space at the end of the closing tag -->
+ <textarea>
+ <?state state=text, tag=textarea ?>
+ <b>
+ <?state state=text, tag=textarea ?>
+ <i>
+ <?state state=text, tag=textarea, in_css=false ?>
+ <!--
+ <?state state=text, tag=textarea ?>
+ </textarea>
+ <?state state=text, tag=textarea ?>
+ -->
+ </i>
+ <?state state=text, tag=textarea ?>
+ </b>
+ <?state state=text, tag=textarea ?>
+ </textarea >
+
+<?state state=text ?>
+
+ <!-- script tag with other tags inside -->
+ <script>
+ document.write("
+ <?state in_js=true, js_quoted=true, tag=script ?>
+ <style>
+ .none { display:none }
+ </style>
+ <?state in_js=true, js_quoted=true ?>
+ ");
+ <?state in_js=true, js_quoted=false ?>
+ </script>
+
+ <?state in_js=false ?>
+
+ <!-- script tag with a backslash quoted script tag -->
+ <script>
+ <?state in_js=true, js_quoted=false ?>
+ document.body.innerHTML = '<script><\/script>'
+ <?state in_js=true, js_quoted=false ?>
+ </script>
+
+ <?state in_js=false ?>
+
+ <!-- </script> appearing between javascript comments -->
+ <script>
+ <!--
+ <?state in_js=true, js_quoted=false ?>
+ document.body.innerHTML = '<script></script>'
+ <?state in_js=true, js_quoted=false ?>
+ -->
+ </script>
+
+ <?state in_js=false ?>
+
+ <!-- Closing script with an extra space at the end of the tag. Some browsers
+ ignore this tag and some browsers honour it. We honour it. -->
+ <script>
+ <?state in_js=true, js_quoted=false ?>
+ document.body.innerHTML = '<script><\/script>'
+ <?state in_js=true, js_quoted=false ?>
+ </script >
+
+ <script>
+ <?state in_js=true, js_quoted=false ?>
+ </script%>
+ <?state in_js=true, js_quoted=false ?>
+ </script >
+
+ <?state in_js=false ?>
+
+</body>
+<?state in_js=false ?>
+</html>
+
diff --git a/src/tests/htmlparser_testdata/comments.html b/src/tests/htmlparser_testdata/comments.html
new file mode 100644
index 0000000..391f3f0
--- /dev/null
+++ b/src/tests/htmlparser_testdata/comments.html
@@ -0,0 +1,61 @@
+<!-- Tests for HTML comments and cdata escaping text spans. -->
+<html>
+
+<body>
+
+<?state state=text, tag=body ?>
+
+<!-- HTML doctype declaration -->
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+<?state state=text, tag=body?>
+"http://www.w3.org/TR/html4/strict.dtd">
+<?state state=text, tag=body ?>
+
+<!-- Regular HTML comment -->
+<!-- <?state state=comment, tag=body ?> -->
+<?state state=text, tag=body ?>
+
+<!-- HTML comment with tags -->
+<!-- > -> </b> <a href="<?state state=comment, tag=body ?>"></a>-->
+<?state state=text, tag=body ?>
+
+<!-- Should not be interpreted as an SGML comment -->
+<?state state=text, tag=body ?>
+<!-- -- -->
+<?state state=text, tag=body ?>
+
+<!-- -- Sync back the SGML comment for editors who parse SGML comments
+(ie: vim) -->
+<?state state=text, tag=body ?>
+
+<!-- Multiple dashes at the end. -->
+<!----- <?state state=comment, tag=body ?> --><?state state=text, tag=body ?>
+<!----- <?state state=comment, tag=body ?> ---><?state state=text, tag=body ?>
+<!----- <?state state=comment, tag=body ?> ----><?state state=text, tag=body ?>
+<!----- <?state state=comment, tag=body ?> -----><?state state=text, tag=body ?>
+
+<!-- Some more misc tests -->
+<!-- test <?state state=comment?> --><?state state=text?> test test --><?state state=text?>
+<!-- test -> test test --><?state state=text?>
+<!-- test test='--><?state state=text?>'
+<!----><?state state=text?>
+<!-----><?state state=text?>
+
+<!-- Make sure the double dash sequence is not interpreted as an SGML comment
+by introducing a legit postfix decrement operator -->
+<?state state=text, in_js=false ?>
+<script>
+<!--
+<?state state=text, in_js=true ?>
+
+var x = 1;
+x--;
+<?state state=text, in_js=true ?>
+-->
+</script>
+<?state state=text, in_js=false ?>
+
+</body>
+
+</html>
+<?state state=text ?>
diff --git a/src/tests/htmlparser_testdata/context.html b/src/tests/htmlparser_testdata/context.html
new file mode 100644
index 0000000..aaaaa46
--- /dev/null
+++ b/src/tests/htmlparser_testdata/context.html
@@ -0,0 +1,79 @@
+<!-- Tests for CopyFrom() -->
+<html>
+ <body>
+ <?state save_context=body?>
+ <?state tag=body?>
+ <h1>
+ <?state save_context=h1?>
+ <?state tag=h1?>
+ <?state load_context=body?>
+ <?state tag=body?>
+
+ <a href="http://www.google.com<?state save_context=href?>"></a>
+
+ <script>
+ <?state save_context=js?>
+ var x ='<?state save_context=js_str_literal?>
+ <?state load_context=href?><?state state=value,
+ tag=a,
+ attr=href,
+ in_js=false,
+ value=http://www.google.com?>
+ <?state load_context=js_str_literal?>
+ <?state state=text,
+ tag=script,
+ in_js=true,
+ js_quoted=true?>';
+
+ // Regexp handling
+ var expression = 10 / <?state save_context=js_expression?> / <?state save_context=js_regexp?> /;
+
+ <?state load_context=js_expression?><?state js_state=text?>
+ <?state load_context=js_regexp?><?state js_state=regexp?> /;
+ <?state js_state=text?>
+
+ </script>
+ <?state in_js=false?>
+ <?state load_context=js?>
+ <?state tag=script, js_state=text, in_js=true?>
+ </script>
+
+ <!-- html encoded script attribute -->
+ <a onclick="alert('<?state save_context=onclick_str_literal?>'"></a>
+ <?state in_js=false?>
+ <?state load_context=onclick_str_literal?><?state state=value,
+ tag=a,
+ attr=onclick,
+ attr_type=js,
+ in_js=true,
+ js_quoted=true?>'">
+ <?state state=text, tag=a?>
+ </a>
+
+<!-- ResetMode() tests -->
+
+<?state reset_mode=css?>
+<?state in_css=true?>
+<?state state=css_file?>
+<?state save_context=mode_css?>
+
+<?state reset_mode=html?>
+<?state state=text?>
+<?state in_css=false?>
+<?state load_context=mode_css?>
+<?state in_css=true?>
+<?state state=css_file?>
+
+<?state reset_mode=html_in_tag?>blah=<?state save_context=in_tag?>
+<?state load_context=onclick_str_literal?><?state state=value,
+ tag=a,
+ attr=onclick,
+ attr_type=js,
+ in_js=true,
+ js_quoted=true?>'">
+<?state load_context=in_tag?>
+<?state attr=blah?>xpto<?state value=xpto?>
+
+
+ </body>
+</html>
diff --git a/src/tests/htmlparser_testdata/google.html b/src/tests/htmlparser_testdata/google.html
new file mode 100644
index 0000000..45dddd8
--- /dev/null
+++ b/src/tests/htmlparser_testdata/google.html
@@ -0,0 +1,3 @@
+<html><head><meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"><title>Google</title><style>body,td,a,p,.h{font-family:arial,sans-serif}.h{font-size:20px}.h{color:#3366cc}.q{color:#00c}.ts td{padding:0}.ts{border-collapse:collapse}#gbar{height:22px;padding-left:2px}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}#gbi,#gbs{background:#fff;left:0;position:absolute;top:24px;visibility:hidden;z-index:1000}#gbi{border:1px solid;border-color:#c9d7f1 #36c #36c #a2bae7;z-index:1001}#guser{padding-bottom:7px !important}#gbar,#guser{font-size:13px;padding-top:1px !important}@media all{.gb1,.gb3{height:22px;margin-right:.73em;vertical-align:top}#gbar{float:left}}.gb2{display:block;padding:.2em .5em}a.gb1,a.gb2,a.gb3{color:#00c !important}.gb2,.gb3{text-decoration:none}a.gb2:hover{background:#36c;color:#fff !important}</style><script>window.google={kEI:"jigHScf6BKDwswP7-eSsAw",kEXPI:"17259,19016",kHL:"en"};
+function sf(){document.f.q.focus()}
+window.gbar={};(function(){var b=window.gbar,f,h;b.qs=function(a){var c=window.encodeURIComponent&&(document.forms[0].q||"").value;if(c)a.href=a.href.replace(/([?&])q=[^&]*|$/,function(i,g){return(g||"&")+"q="+encodeURIComponent(c)})};function j(a,c){a.visibility=h?"hidden":"visible";a.left=c+"px"}b.tg=function(a){a=a||window.event;var c=0,i,g=window.navExtra,d=document.getElementById("gbi"),e=a.target||a.srcElement;a.cancelBubble=true;if(!f){f=document.createElement(Array.every||window.createPopup?"iframe":"div");f.frameBorder="0";f.src="#";d.parentNode.appendChild(f).id="gbs";if(g)for(i in g)d.insertBefore(g[i],d.firstChild).className="gb2";document.onclick=b.close}if(e.className!="gb3")e=e.parentNode;do c+=e.offsetLeft;while(e=e.offsetParent);j(d.style,c);f.style.width=d.offsetWidth+"px";f.style.height=d.offsetHeight+"px";j(f.style,c);h=!h};b.close=function(a){h&&b.tg(a)}})();</script></head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onload="sf();if(document.images){new Image().src='/images/nav_logo3.png'}" topmargin=3 marginheight=3><div id=gbar><nobr><b class=gb1>Web</b> <a href="http://images.google.com/imghp?hl=en&tab=wi" onclick=gbar.qs(this) class=gb1>Images</a> <a href="http://maps.google.com/maps?hl=en&tab=wl" onclick=gbar.qs(this) class=gb1>Maps</a> <a href="http://news.google.com/nwshp?hl=en&tab=wn" onclick=gbar.qs(this) class=gb1>News</a> <a href="http://www.google.com/prdhp?hl=en&tab=wf" onclick=gbar.qs(this) class=gb1>Shopping</a> <a href="http://mail.google.com/mail/?hl=en&tab=wm" class=gb1>Gmail</a> <a href="http://www.google.com/intl/en/options/" onclick="this.blur();gbar.tg(event);return !1" class=gb3><u>more</u> <small>▼</small></a><div id=gbi> <a href="http://video.google.com/?hl=en&tab=wv" onclick=gbar.qs(this) class=gb2>Video</a> <a href="http://groups.google.com/grphp?hl=en&tab=wg" onclick=gbar.qs(this) class=gb2>Groups</a> <a href="http://books.google.com/bkshp?hl=en&tab=wp" onclick=gbar.qs(this) class=gb2>Books</a> <a href="http://scholar.google.com/schhp?hl=en&tab=ws" onclick=gbar.qs(this) class=gb2>Scholar</a> <a href="http://finance.google.com/finance?hl=en&tab=we" onclick=gbar.qs(this) class=gb2>Finance</a> <a href="http://blogsearch.google.com/?hl=en&tab=wb" onclick=gbar.qs(this) class=gb2>Blogs</a> <div class=gb2><div class=gbd></div></div> <a href="http://www.youtube.com/?hl=en&tab=w1" onclick=gbar.qs(this) class=gb2>YouTube</a> <a href="http://www.google.com/calendar/render?hl=en&tab=wc" class=gb2>Calendar</a> <a href="http://picasaweb.google.com/home?hl=en&tab=wq" onclick=gbar.qs(this) class=gb2>Photos</a> <a href="http://docs.google.com/?hl=en&tab=wo" class=gb2>Documents</a> <a href="http://www.google.com/reader/view/?hl=en&tab=wy" class=gb2>Reader</a> <a href="http://sites.google.com/?hl=en&tab=w3" class=gb2>Sites</a> <div class=gb2><div class=gbd></div></div> <a href="http://www.google.com/intl/en/options/" class=gb2>even more »</a></div> </nobr></div><div class=gbh style=left:0></div><div class=gbh style=right:0></div><div align=right id=guser style="font-size:84%;padding:0 0 4px" width=100%><nobr><a href="/url?sa=p&pref=ig&pval=3&q=http://www.google.com/ig%3Fhl%3Den%26source%3Diglk&usg=AFQjCNFA18XPfgb7dKnXfKz7x7g1GDH1tg">iGoogle</a> | <a href="https://www.google.com/accounts/Login?continue=http://www.google.com/&hl=en">Sign in</a></nobr></div><center><br clear=all id=lgpd><img alt="Google" height=110 src="/intl/en_ALL/images/logo.gif" width=276><br><br><form action="/search" name=f><table cellpadding=0 cellspacing=0><tr valign=top><td width=25%> </td><td align=center nowrap><input name=hl type=hidden value=en><input type=hidden name=ie value="ISO-8859-1"><input autocomplete="off" maxlength=2048 name=q size=55 title="Google Search" value=""><br><input name=btnG type=submit value="Google Search"><input name=btnI type=submit value="I'm Feeling Lucky"></td><td nowrap width=25%><font size=-2> <a href=/advanced_search?hl=en>Advanced Search</a><br> <a href=/preferences?hl=en>Preferences</a><br> <a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><font size=-1><font color=red>New!</font> The G1 phone is on sale now. <a href="/aclk?sa=L&ai=BuJQcgigHSbvbCqDUsAPGm6X7DvPUz3en34zVCcHZnNkT0IYDEAEYASDBVDgAUJL0-Mb8_____wFgyQY&num=1&sig=AGiWqtxZNijZyCsNtIwkfSx_S1WSW0Uh8A&q=http://www.google.com/intl/en_us/mobile/android/hpp.html">Learn more</a>.</font><br><br><br><font size=-1><a href="/intl/en/ads/">Advertising Programs</a> - <a href="/services/">Business Solutions</a> - <a href="/intl/en/about.html">About Google</a></font><p><font size=-2>©2008 - <a href="/intl/en/privacy.html">Privacy</a></font></p></center></body><script>google.y={first:[]};window.setTimeout(function(){var xjs=document.createElement('script');xjs.src='/extern_js/f/CgJlbhICdXMgACswCjgILCswGDgDLA/Vh5nhw3Xn6A.js';document.getElementsByTagName('head')[0].appendChild(xjs)},0);google.y.first.push(function(){google.ac.i(document.f,document.f.q,'','')})</script></html>
\ No newline at end of file
diff --git a/src/tests/htmlparser_testdata/javascript_attribute.html b/src/tests/htmlparser_testdata/javascript_attribute.html
new file mode 100644
index 0000000..db096f0
--- /dev/null
+++ b/src/tests/htmlparser_testdata/javascript_attribute.html
@@ -0,0 +1,27 @@
+<html>
+<body>
+
+<a onclick="alert('<?state state=value, tag=a, attr=onclick, attr_type=js,
+in_js=true, js_quoted=true?> x') &; &a; &x;/*blah <?state state=value,
+tag=a, attr=onclick, attr_type=js, in_js=true ?> */ "></a>
+
+<?state state=text, in_js=false ?>
+
+<a onmouseover='alert(document.domain<?state state=value, tag=a,
+attr=onmouseover, attr_type=js, in_js=true ?>)'>test</a>
+
+<?state state=text, in_js=false ?>
+
+<a onmouseover="">test</a>
+
+<?state state=text, in_js=false ?>
+
+<a onclick="<?state in_js=true, js_quoted=false?>">test</a>
+<?state state=text, in_js=false ?>
+
+<a onclick="'<?state in_js=true, js_quoted=true?>">test</a>
+<?state state=text, in_js=false ?>
+
+</body>
+</html>
+<?state state=text ?><?state state=text ?>
diff --git a/src/tests/htmlparser_testdata/javascript_block.html b/src/tests/htmlparser_testdata/javascript_block.html
new file mode 100644
index 0000000..539c1a6
--- /dev/null
+++ b/src/tests/htmlparser_testdata/javascript_block.html
@@ -0,0 +1,50 @@
+<html>
+<body>
+
+<script>
+
+x < 1;
+
+<?state state=text, tag=script, in_js=true ?>
+
+</script>
+
+<?state state=text?>
+
+<script>
+//<!--
+var x = 1;
+<?state state=text, tag=script, in_js=true ?>
+//--> </script>
+
+<?state state=text?>
+
+<script> //<!--
+var x = 1;
+<?state state=text, tag=script, in_js=true ?>
+</script>
+<?state state=text, tag=script, in_js=true ?>
+//--> </script>
+
+<?state state=text?>
+
+<script>
+<!--
+var x = 1;
+<?state state=text, tag=script, in_js=true ?>
+</script>
+<?state state=text, tag=script, in_js=true ?>
+-->
+</script>
+
+<?state state=text?>
+
+<script><?state tag=script, in_js=true?> </script><?state in_js=false?>
+<script><?state tag=script, in_js=true, js_quoted=false?></script><?state in_js=false?>
+<script>'<?state tag=script, in_js=true, js_quoted=true?></script><?state in_js=false?>
+<script>"<?state tag=script, in_js=true, js_quoted=true?></script><?state in_js=false?>
+
+</body>
+</html>
+<?state state=text ?>
+<?state state=text ?>
diff --git a/src/tests/htmlparser_testdata/javascript_regexp.html b/src/tests/htmlparser_testdata/javascript_regexp.html
new file mode 100644
index 0000000..7c1f88d
--- /dev/null
+++ b/src/tests/htmlparser_testdata/javascript_regexp.html
@@ -0,0 +1,171 @@
+<html>
+<body>
+
+
+<script>
+
+// General regular expression literal synching tests.
+
+var regexp = /x'/;
+<?state state=text, in_js=true, js_quoted=false?>
+
+var string = '<?state state=text, in_js=true, js_quoted=true?>';
+<?state state=text, in_js=true, js_quoted=false?>
+
+var op = 1 / 2;
+var string2 = '<?state state=text, in_js=true, js_quoted=true?>';
+<?state state=text, in_js=true, js_quoted=false?>
+
+return /x'/;
+<?state state=text, in_js=true, js_quoted=false?>
+
+
+// General regular expression state tests
+
+var regexp = / <?state js_state=regexp?> /; <?state js_state=text?>
+
+var a = /"hello/.exec("<?state state=text, in_js=true, js_quoted=true ?>");
+var a = /"hello"/.exec("<?state state=text, in_js=true, js_quoted=true ?>");
+
+var expression = 10 / <?state js_state=text?> / <?state js_state=regexp?> /;
+
+<?state js_state=text?>
+
+var expression2 = / <?state js_state=regexp?> /;
+
+if (window.frames.length < /\d+<?state js_state=regexp?>/.exec(<?state js_state=text?>)[0]) {
+ alert(/ '" <?state js_state=regexp?>/.exec(<?state js_state=text?>)/);
+ var quoted_string = "<?state js_state=dq?>" <?state js_state=text?>;
+}
+
+switch(/ <?state js_state=regexp?> /) { <?state js_state=text?>
+ case / <?state js_state=regexp?> /: <?state js_state=text?>
+ break;
+ case / \/<?state js_state=regexp?> /: <?state js_state=text?>
+ break;
+}
+
+delete / <?state js_state=regexp?> x / <?state js_state=text?>;
+id / <?state js_state=text?> x / <?state js_state=text?>;
+
+function test(/ <?state js_state=regexp?> /) {
+ return / <?state js_state=regexp?> /.exec(<?state js_state=text?>);
+}
+
+function test2(/ <?state js_state=regexp?> /, <?state js_state=text?>) {
+ return / '"<?state js_state=regexp?> /.exec(<?state js_state=text?>);
+}
+
+var a = "/<?state js_state=dq?>"/<?state js_state=text?>;
+
+test in / <?state js_state=regexp?>/;
+min / <?state js_state=text?>;
+IN / <?state js_state=text?>;
+
+3.. /<?state js_state=text?>/;
+0x3./<?state js_state=text?>/;
+
+// Escaping in regular expressions
+
+var a = / blah\/<?state js_state=regexp?>/<?state js_state=text?>,
+/\//<?state js_state=text?>,
+/\/*/<?state js_state=text?> /**/ <?state js_state=text?>,
+
+// Bracket expressions
+var a = [/[/] <?state js_state=regexp?> / <?state js_state=text?>,
+var a = /[/\]/ <?state js_state=regexp?> ]/ <?state js_state=text?>,
+var a = /[/\\]/ <?state js_state=text?>];
+
+/* Unary incremented/decremented variable, followed by a division. */
+
+var w = w++ / 1 <?state js_state=text?>;
+var w = w-- / 1 <?state js_state=text?>;
+
+/* Division after array acessor. */
+var test = xpto[2] / <?state js_state=text?>;
+
+/* Division after parenthesis expression. */
+var test = (2 + 2) / <?state js_state=text?>;
+
+/* Division with comments before the the previous token. */
+var test = x/* test *// <?state js_state=text?>;
+var test = x /* test *// <?state js_state=text?>;
+var test = x/* test */ / <?state js_state=text?>;
+var test = x /* test */ / <?state js_state=text?>;
+var test = x /* test */
+/ <?state js_state=text?>;
+
+var test = x // test
+/ <?state js_state=text?>;
+
+var test = x // test
+ / <?state js_state=text?>;
+
+var test = x // test
+
+/ <?state js_state=text?>;
+
+/* Regexp with multi line comment before the the previous token. */
+var test =/* test *// <?state js_state=regexp?> /;
+var test = /* test *// <?state js_state=regexp?> /;
+var test = /* test *// <?state js_state=regexp?> /;
+var test = /* test */ / <?state js_state=regexp?> /;
+var test = /* test */
+/ <?state js_state=regexp?> /;
+
+var test = // test
+/ <?state js_state=regexp?> /;
+
+var test = // test
+ / <?state js_state=regexp?> /;
+
+var test = // test
+
+/ <?state js_state=regexp?> /;
+
+
+/* Semicolon insertion after a code block */
+function() {} / <?state js_state=regexp?>/
+
+/****************************************************************************
+ Tests that won't pass right now due to design or implementation choices.
+*/
+
+/* Division after a regular expression.
+
+var test = <?nopstate js_state=text?>
+/ <?nopstate js_state=regexp?>
+/ <?nopstate js_state=text?>
+/ <?nopstate js_state=text?>
+/ <?nopstate js_state=regexp?>
+/ <?nopstate js_state=text?>;
+
+*/
+
+/* Division of an object literal
+
+{
+ a: 1,
+ b : 2
+} / <?nopstate js_state=text?>/
+
+*/
+
+/* Unary increment and decrement of regular expressions.
+
+var w = ++/ <?nopstate js_state=regexp?>/i;
+var x = --/ <?nopstate js_state=regexp?>/i
+
+*/
+
+
+</script>
+
+<script>
+
+/ <?state js_state=regexp?> /;
+
+</script>
+
+</body>
+</html>
diff --git a/src/tests/htmlparser_testdata/position.html b/src/tests/htmlparser_testdata/position.html
new file mode 100644
index 0000000..120ca4e
--- /dev/null
+++ b/src/tests/htmlparser_testdata/position.html
@@ -0,0 +1,33 @@
+<?state line_number=1?>
+<?state line_number=2?>
+<html>
+<?state column_number=1?>
+ <body><?state column_number=9?>
+ <?state line_number=6?><?state column_number=28?>
+ <?state
+
+ line_number=7
+ ?><?state column_number=7?>
+ <?state line_number=11?><?state column_number=29?>
+ </body>
+ <?state line_number=13?>
+
+
+
+<?state column_number=1?>
+ <?state column_number=2?>
+ <?state column_number=3?>
+
+
+
+
+
+<a href="http://ww.google.com" onclick="var x=<?state column_number=47?>">
+</a>
+
+<img src="http://www.google.com" onerror="var w = &qu<?state column_number=54?>ot;test"">
+
+
+
+ <?state line_number=32?>
+</html>
diff --git a/src/tests/htmlparser_testdata/reset.html b/src/tests/htmlparser_testdata/reset.html
new file mode 100644
index 0000000..cd0d070
--- /dev/null
+++ b/src/tests/htmlparser_testdata/reset.html
@@ -0,0 +1,31 @@
+<html>
+ <body>
+ <?state state=text, attr_type=none?>
+ <b font="<?state state=value, tag=b, attr=font, attr_quoted=true,
+ in_js=false, attr_type=regular ?>
+<?state reset=true ?>
+<?state state=text, attr_type=none ?>
+<b <?state state=tag, tag=b ?>
+<?state reset_mode=js ?>
+<?state state=js_file?>
+var unquoted =<?state js_quoted=false, in_js=true ?>;
+var single_quoted ='<?state js_quoted=true, in_js=true ?>';
+var unquoted =<?state js_quoted=false, in_js=true ?>;
+<?state reset_mode=html_in_tag?>blah=<?state attr=blah?>xpto<?state value=xpto?>
+test<?state state=attr?>
+<?state reset_mode=html_in_tag?>
+test="test123<?state attr=test, value=test123?>">
+<?state state=text?>
+<?state reset_mode=css?>
+<?state in_css=true?>
+<?state state=css_file?>
+
+<a href="<?state in_css=true?>"></style><?state in_css=true?>
+
+<123 <script><?state in_css=true?>
+
+<?state reset_mode=html?>
+<?state in_css=false?>
+ <?state state=text, attr_type=none?>
+ <b font="<?state state=value, tag=b, attr=font, attr_quoted=true,
+ in_js=false, attr_type=regular ?>
diff --git a/src/tests/htmlparser_testdata/sample_fsm.c b/src/tests/htmlparser_testdata/sample_fsm.c
new file mode 100644
index 0000000..ed85c5f
--- /dev/null
+++ b/src/tests/htmlparser_testdata/sample_fsm.c
@@ -0,0 +1,802 @@
+/* Parses C style strings
+ * Auto generated by generate_fsm.py. Please do not edit.
+ */
+#define STRINGPARSER_NUM_STATES 4
+enum stringparser_state_internal_enum {
+ STRINGPARSER_STATE_INT_TEXT,
+ STRINGPARSER_STATE_INT_STRING,
+ STRINGPARSER_STATE_INT_STRING_ESCAPE
+};
+
+static const int stringparser_states_external[] = {
+ STRINGPARSER_STATE_TEXT,
+ STRINGPARSER_STATE_STRING,
+ STRINGPARSER_STATE_STRING
+};
+
+static const char * stringparser_states_internal_names[] = {
+ "text",
+ "string",
+ "string_escape"
+};
+
+static const int stringparser_transition_row_text[] = {
+ /* '\x00' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x01' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x02' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x03' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x04' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x05' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x06' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x07' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x08' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\t' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\n' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x0b' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x0c' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\r' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x0e' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x0f' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x10' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x11' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x12' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x13' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x14' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x15' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x16' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x17' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x18' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x19' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x1a' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x1b' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x1c' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x1d' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x1e' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x1f' */ STRINGPARSER_STATE_INT_TEXT,
+ /* ' ' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '!' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '"' */ STRINGPARSER_STATE_INT_STRING,
+ /* '#' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '$' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '%' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '&' */ STRINGPARSER_STATE_INT_TEXT,
+ /* "'" */ STRINGPARSER_STATE_INT_TEXT,
+ /* '(' */ STRINGPARSER_STATE_INT_TEXT,
+ /* ')' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '*' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '+' */ STRINGPARSER_STATE_INT_TEXT,
+ /* ',' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '-' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '.' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '/' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '0' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '1' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '2' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '3' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '4' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '5' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '6' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '7' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '8' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '9' */ STRINGPARSER_STATE_INT_TEXT,
+ /* ':' */ STRINGPARSER_STATE_INT_TEXT,
+ /* ';' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '<' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '=' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '>' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '?' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '@' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'A' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'B' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'C' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'D' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'E' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'F' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'G' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'H' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'I' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'J' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'K' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'L' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'M' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'N' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'O' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'P' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'Q' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'R' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'S' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'T' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'U' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'V' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'W' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'X' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'Y' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'Z' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '[' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\\' */ STRINGPARSER_STATE_INT_STRING,
+ /* ']' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '^' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '_' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '`' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'a' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'b' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'c' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'd' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'e' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'f' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'g' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'h' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'i' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'j' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'k' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'l' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'm' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'n' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'o' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'p' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'q' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'r' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 's' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 't' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'u' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'v' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'w' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'x' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'y' */ STRINGPARSER_STATE_INT_TEXT,
+ /* 'z' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '{' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '|' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '}' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '~' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x7f' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x80' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x81' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x82' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x83' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x84' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x85' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x86' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x87' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x88' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x89' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x8a' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x8b' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x8c' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x8d' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x8e' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x8f' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x90' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x91' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x92' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x93' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x94' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x95' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x96' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x97' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x98' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x99' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x9a' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x9b' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x9c' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x9d' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x9e' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\x9f' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa0' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa1' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa2' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa3' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa4' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa5' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa6' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa7' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa8' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xa9' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xaa' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xab' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xac' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xad' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xae' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xaf' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb0' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb1' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb2' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb3' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb4' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb5' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb6' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb7' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb8' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xb9' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xba' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xbb' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xbc' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xbd' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xbe' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xbf' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc0' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc1' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc2' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc3' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc4' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc5' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc6' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc7' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc8' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xc9' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xca' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xcb' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xcc' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xcd' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xce' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xcf' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd0' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd1' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd2' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd3' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd4' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd5' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd6' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd7' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd8' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xd9' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xda' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xdb' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xdc' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xdd' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xde' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xdf' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe0' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe1' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe2' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe3' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe4' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe5' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe6' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe7' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe8' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xe9' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xea' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xeb' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xec' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xed' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xee' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xef' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf0' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf1' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf2' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf3' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf4' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf5' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf6' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf7' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf8' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xf9' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xfa' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xfb' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xfc' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xfd' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '\xfe' */ STRINGPARSER_STATE_INT_TEXT
+};
+
+static const int stringparser_transition_row_string[] = {
+ /* '\x00' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x01' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x02' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x03' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x04' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x05' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x06' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x07' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x08' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\t' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\n' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\r' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x10' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x11' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x12' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x13' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x14' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x15' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x16' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x17' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x18' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x19' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1a' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1d' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1f' */ STRINGPARSER_STATE_INT_STRING,
+ /* ' ' */ STRINGPARSER_STATE_INT_STRING,
+ /* '!' */ STRINGPARSER_STATE_INT_STRING,
+ /* '"' */ STRINGPARSER_STATE_INT_TEXT,
+ /* '#' */ STRINGPARSER_STATE_INT_STRING,
+ /* '$' */ STRINGPARSER_STATE_INT_STRING,
+ /* '%' */ STRINGPARSER_STATE_INT_STRING,
+ /* '&' */ STRINGPARSER_STATE_INT_STRING,
+ /* "'" */ STRINGPARSER_STATE_INT_STRING,
+ /* '(' */ STRINGPARSER_STATE_INT_STRING,
+ /* ')' */ STRINGPARSER_STATE_INT_STRING,
+ /* '*' */ STRINGPARSER_STATE_INT_STRING,
+ /* '+' */ STRINGPARSER_STATE_INT_STRING,
+ /* ',' */ STRINGPARSER_STATE_INT_STRING,
+ /* '-' */ STRINGPARSER_STATE_INT_STRING,
+ /* '.' */ STRINGPARSER_STATE_INT_STRING,
+ /* '/' */ STRINGPARSER_STATE_INT_STRING,
+ /* '0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '9' */ STRINGPARSER_STATE_INT_STRING,
+ /* ':' */ STRINGPARSER_STATE_INT_STRING,
+ /* ';' */ STRINGPARSER_STATE_INT_STRING,
+ /* '<' */ STRINGPARSER_STATE_INT_STRING,
+ /* '=' */ STRINGPARSER_STATE_INT_STRING,
+ /* '>' */ STRINGPARSER_STATE_INT_STRING,
+ /* '?' */ STRINGPARSER_STATE_INT_STRING,
+ /* '@' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'A' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'B' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'C' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'D' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'E' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'F' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'G' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'H' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'I' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'J' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'K' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'L' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'M' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'N' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'O' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'P' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'Q' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'R' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'S' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'T' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'U' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'V' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'W' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'X' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'Y' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'Z' */ STRINGPARSER_STATE_INT_STRING,
+ /* '[' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\\' */ STRINGPARSER_STATE_INT_STRING_ESCAPE,
+ /* ']' */ STRINGPARSER_STATE_INT_STRING,
+ /* '^' */ STRINGPARSER_STATE_INT_STRING,
+ /* '_' */ STRINGPARSER_STATE_INT_STRING,
+ /* '`' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'a' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'b' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'c' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'd' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'e' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'f' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'g' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'h' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'i' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'j' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'k' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'l' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'm' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'n' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'o' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'p' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'q' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'r' */ STRINGPARSER_STATE_INT_STRING,
+ /* 's' */ STRINGPARSER_STATE_INT_STRING,
+ /* 't' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'u' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'v' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'w' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'x' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'y' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'z' */ STRINGPARSER_STATE_INT_STRING,
+ /* '{' */ STRINGPARSER_STATE_INT_STRING,
+ /* '|' */ STRINGPARSER_STATE_INT_STRING,
+ /* '}' */ STRINGPARSER_STATE_INT_STRING,
+ /* '~' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x7f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x80' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x81' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x82' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x83' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x84' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x85' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x86' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x87' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x88' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x89' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8a' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8d' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x90' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x91' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x92' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x93' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x94' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x95' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x96' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x97' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x98' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x99' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9a' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9d' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xaa' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xab' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xac' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xad' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xae' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xaf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xba' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbe' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xca' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xce' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xda' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xde' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xea' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xeb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xec' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xed' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xee' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xef' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfa' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfe' */ STRINGPARSER_STATE_INT_STRING
+};
+
+static const int stringparser_transition_row_string_escape[] = {
+ /* '\x00' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x01' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x02' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x03' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x04' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x05' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x06' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x07' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x08' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\t' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\n' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\r' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x0f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x10' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x11' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x12' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x13' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x14' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x15' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x16' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x17' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x18' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x19' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1a' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1d' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x1f' */ STRINGPARSER_STATE_INT_STRING,
+ /* ' ' */ STRINGPARSER_STATE_INT_STRING,
+ /* '!' */ STRINGPARSER_STATE_INT_STRING,
+ /* '"' */ STRINGPARSER_STATE_INT_STRING,
+ /* '#' */ STRINGPARSER_STATE_INT_STRING,
+ /* '$' */ STRINGPARSER_STATE_INT_STRING,
+ /* '%' */ STRINGPARSER_STATE_INT_STRING,
+ /* '&' */ STRINGPARSER_STATE_INT_STRING,
+ /* "'" */ STRINGPARSER_STATE_INT_STRING,
+ /* '(' */ STRINGPARSER_STATE_INT_STRING,
+ /* ')' */ STRINGPARSER_STATE_INT_STRING,
+ /* '*' */ STRINGPARSER_STATE_INT_STRING,
+ /* '+' */ STRINGPARSER_STATE_INT_STRING,
+ /* ',' */ STRINGPARSER_STATE_INT_STRING,
+ /* '-' */ STRINGPARSER_STATE_INT_STRING,
+ /* '.' */ STRINGPARSER_STATE_INT_STRING,
+ /* '/' */ STRINGPARSER_STATE_INT_STRING,
+ /* '0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '9' */ STRINGPARSER_STATE_INT_STRING,
+ /* ':' */ STRINGPARSER_STATE_INT_STRING,
+ /* ';' */ STRINGPARSER_STATE_INT_STRING,
+ /* '<' */ STRINGPARSER_STATE_INT_STRING,
+ /* '=' */ STRINGPARSER_STATE_INT_STRING,
+ /* '>' */ STRINGPARSER_STATE_INT_STRING,
+ /* '?' */ STRINGPARSER_STATE_INT_STRING,
+ /* '@' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'A' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'B' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'C' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'D' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'E' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'F' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'G' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'H' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'I' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'J' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'K' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'L' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'M' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'N' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'O' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'P' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'Q' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'R' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'S' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'T' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'U' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'V' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'W' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'X' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'Y' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'Z' */ STRINGPARSER_STATE_INT_STRING,
+ /* '[' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\\' */ STRINGPARSER_STATE_INT_STRING,
+ /* ']' */ STRINGPARSER_STATE_INT_STRING,
+ /* '^' */ STRINGPARSER_STATE_INT_STRING,
+ /* '_' */ STRINGPARSER_STATE_INT_STRING,
+ /* '`' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'a' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'b' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'c' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'd' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'e' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'f' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'g' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'h' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'i' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'j' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'k' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'l' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'm' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'n' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'o' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'p' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'q' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'r' */ STRINGPARSER_STATE_INT_STRING,
+ /* 's' */ STRINGPARSER_STATE_INT_STRING,
+ /* 't' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'u' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'v' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'w' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'x' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'y' */ STRINGPARSER_STATE_INT_STRING,
+ /* 'z' */ STRINGPARSER_STATE_INT_STRING,
+ /* '{' */ STRINGPARSER_STATE_INT_STRING,
+ /* '|' */ STRINGPARSER_STATE_INT_STRING,
+ /* '}' */ STRINGPARSER_STATE_INT_STRING,
+ /* '~' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x7f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x80' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x81' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x82' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x83' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x84' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x85' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x86' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x87' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x88' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x89' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8a' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8d' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x8f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x90' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x91' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x92' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x93' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x94' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x95' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x96' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x97' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x98' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x99' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9a' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9b' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9c' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9d' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9e' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\x9f' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xa9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xaa' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xab' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xac' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xad' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xae' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xaf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xb9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xba' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbe' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xbf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xc9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xca' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xce' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xcf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xd9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xda' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xde' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xdf' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xe9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xea' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xeb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xec' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xed' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xee' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xef' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf0' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf1' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf2' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf3' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf4' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf5' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf6' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf7' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf8' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xf9' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfa' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfb' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfc' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfd' */ STRINGPARSER_STATE_INT_STRING,
+ /* '\xfe' */ STRINGPARSER_STATE_INT_STRING
+};
+
+static const int * stringparser_state_transitions[] = {
+ stringparser_transition_row_text,
+ stringparser_transition_row_string,
+ stringparser_transition_row_string_escape
+};
+
diff --git a/src/tests/htmlparser_testdata/sample_fsm.config b/src/tests/htmlparser_testdata/sample_fsm.config
new file mode 100644
index 0000000..df66e69
--- /dev/null
+++ b/src/tests/htmlparser_testdata/sample_fsm.config
@@ -0,0 +1,64 @@
+# Copyright (c) 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ---
+#
+# Author: falmeida@google.com (Filipe Almeida)
+
+name = 'stringparser'
+
+comment = 'Parses C style strings'
+
+condition('dq', '\\"'),
+condition('backslash', '\\\\'),
+condition('default', '[:default:]')
+
+# Outside a string
+state(name = 'text',
+ external = 'text',
+ transitions = [
+ ['dq', 'string'],
+ ['default', 'text']
+ ])
+
+# String literal
+state(name = 'string',
+ external = 'string',
+ transitions = [
+ ['backslash', 'string_escape'],
+ ['dq', 'text'],
+ ['default', 'string']
+ ])
+
+# Escaped character in a string literal. Ignore the next character
+state(name = 'string_escape',
+ external = 'string',
+ transitions = [
+ ['default', 'string']
+ ])
+
diff --git a/src/tests/htmlparser_testdata/simple.html b/src/tests/htmlparser_testdata/simple.html
new file mode 100644
index 0000000..555928f
--- /dev/null
+++ b/src/tests/htmlparser_testdata/simple.html
@@ -0,0 +1,26 @@
+<html>
+ <body>
+ <?state state=text,tag=body?>
+ <a href="<?state state=value,tag=a?>">test</a>
+
+ <test test123=<?state state=value, tag=test, attr=test123,
+ attr_type=regular ?>>
+
+ <?state state=text?>
+
+ <body blah='<?state state=value, tag=body, attr=blah, attr_type=regular
+ ?>'>
+
+ <style>
+ <?state in_css=true?>
+ </style>
+ <?state in_css=false?>
+
+ <h1 onclick="<?state state=value, tag=h1, attr=onclick, attr_type=js,
+ in_js=true ?>" style="<?state in_css=true?>" <?state in_css=false?>>
+ <?state state=text, tag=h1?>
+ </h1>
+
+
+ </body>
+</html>
diff --git a/src/tests/htmlparser_testdata/tags.html b/src/tests/htmlparser_testdata/tags.html
new file mode 100644
index 0000000..1caf68d
--- /dev/null
+++ b/src/tests/htmlparser_testdata/tags.html
@@ -0,0 +1,214 @@
+<html>
+
+<body blah='<?state state=value, tag=body, attr=blah, attr_type=regular,
+attr_quoted=true ?>'>
+
+<?state state=text, tag=body ?>
+<a href=<?state state=value, tag=a, attr=href, attr_type=uri ?>><?state state=text, tag=a ?></a>
+<a href=
+ "<?state state=value, tag=a, attr=href, attr_type=uri, attr_quoted=true ?>"></a>
+
+<a href=<?state state=value, tag=a, attr=href, attr_type=uri ?> blah=x></a>
+<a href=
+ "<?state state=value, tag=a, attr=href, attr_type=uri ?>" blah=x></a>
+
+<a href=
+ <?state state=value, tag=a, attr=href, attr_type=uri, attr_quoted=false ?> blah=x></a>
+
+<a href><?state state=text, tag=a ?></a>
+
+<a href=x<?state state=value, tag=a, attr=href, attr_type=uri ?> <?state state=tag, tag=a ?>></a>
+
+<a href =<?state state=value, tag=a, attr=href, attr_type=uri ?>></a>
+<a href
+=<?state state=value, tag=a, attr=href, attr_type=uri ?>></a>
+<a href
+ =<?state state=value, tag=a, attr=href, attr_type=uri ?>></a>
+
+<?state state=text?>
+
+<b font=<?state state=value, value_index=0?>></b>
+<b font=x<?state state=value, value_index=1?>></b>
+<b font='<?state state=value, value_index=0?>'></b>
+<b font='x<?state state=value, value_index=1?>'></b>
+
+<!-- XML Processing instruction -->
+
+<?example <?state state=text?> <a href=<?state state=text?>></a
+ <script>
+ <?state state=text, in_js=false?>
+ </script>
+?>
+
+<a href=http://www.google.com/<?state state=value, tag=a, attr=href, attr_type=uri ?>?q=tt<?state state=value, tag=a, attr=href, attr_type=uri ?>>test</a>
+
+<!-- Test javascript url handling -->
+<a href="test<?state value=test, in_js=false ?>">test</a>
+<a href="javascript<?state value=javascript, in_js=false ?>">test</a>
+<a href="javascript:<?state value=javascript:, in_js=false ?>">test</a>
+<a href="javascript:alert('<?state in_js=false ?>">test</a>
+<a href="http:<?state value=http:, in_js=false ?>">test</a>
+<a href="http://www.google.com"
+ alt="javascript:<?state value=javascript:, in_js=false ?>">test</a>
+
+<!-- Test calls to TemplateDirective() -->
+<b font=<?state state=value?>
+ color<?state state=value?>></b>
+
+<b font=<?state state=value?><?state insert_text=true?>
+ color<?state state=attr?>></b>
+
+<b font="<?state state=value?><?state insert_text=true?>
+ color<?state state=value?>"></b>
+
+<a href=
+ <?state state=value?><?state insert_text=true?> alt<?state state=attr?>>
+ link
+</a>
+
+<b font=<?state state=value?>><?state state=text, tag=b?></b>
+
+<!-- Large invalid HTML entity -->
+<a onclick="&testtesttesttesttesttesttesttesttesttesttesttest;"
+ href="http://www.google.com/"></a>
+
+<!-- URI attributes. The attribute list can be found in
+ htmlparser.c:is_uri_attribute() -->
+<a target="<?state attr_type=regular?>"></a>
+<!-- -->
+<form action="<?state attr_type=uri?>"></form>
+<applet archive="<?state attr_type=uri?>"></applet>
+<blockquote cite="<?state attr_type=uri?>"></blockquote>
+<object classid="<?state attr_type=uri?>"></object>
+<object codebase="<?state attr_type=uri?>"></object>
+<object data="<?state attr_type=uri?>"></object>
+<img dynsrc="<?state attr_type=uri?>"></img>
+<a href="<?state attr_type=uri?>"></a>
+<img longdesc="<?state attr_type=uri?>"></img>
+<img src="<?state attr_type=uri?>"></img>
+<img usemap="<?state attr_type=uri?>"></img>
+<!-- -->
+<form style="x" action="<?state attr_type=uri?>"></form>
+<applet style="x" archive="<?state attr_type=uri?>"></applet>
+<blockquote style="x" cite="<?state attr_type=uri?>"></blockquote>
+<object style="x" classid="<?state attr_type=uri?>"></object>
+<object style="x" codebase="<?state attr_type=uri?>"></object>
+<object style="x" data="<?state attr_type=uri?>"></object>
+<img style="x" dynsrc="<?state attr_type=uri?>"></img>
+<a style="x" href="<?state attr_type=uri?>"></a>
+<img style="x" longdesc="<?state attr_type=uri?>"></img>
+<img style="x" src="<?state attr_type=uri?>"></img>
+<img style="x" usemap="<?state attr_type=uri?>"></img>
+<!-- -->
+<img alt="<?state attr_type=regular?>"></a>
+
+
+<!-- Style attributes as returned by htmlparser.c:is_style_attribute() -->
+<a target="<?state attr_type=regular?>"></a>
+<!-- -->
+<b style="<?state attr_type=style?>"></b>
+<!-- -->
+<a target="<?state attr_type=regular?>"></a>
+
+<!-- Big attribute value. We can't do prefix checking right now so we can't
+ validate the contents of the value here, although statemachine_test.c has
+ a test for that. -->
+
+<a href="http://www.google.com/"
+ alt="01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ 01234567890123456789012345678901234567890123456789
+ <?state state=value, attr_quoted=true, tag=a, attr=alt?>"></a>
+
+<?state state=text?>
+
+<!-- is_url_start tests -->
+
+<a href="<?state is_url_start=true?>"></a>
+<a href="http://<?state is_url_start=false?>"></a>
+<a href="http://www.google.com?q=<?state is_url_start=false?>"></a>
+<b font="<?state is_url_start=false?>"></b>
+<b font="http://www.google.com?q=<?state is_url_start=false?>"></b>
+<?state is_url_start=false?>
+
+<!-- <?state is_url_start=false?> -->
+
+<!-- Tag opening tests -->
+
+<a <?state state=tag?>></a><?state state=text?>
+<br <?state state=tag?>></br><?state state=text?>
+< br <?state state=text?>></br><?state state=text?>
+<< <?state state=text?>><?state state=text?>
+< <?state state=text?> alt="<?state state=text?>">
+</blah <?state state=tag?>><?state state=text?>
+<<i<?state state=tag?>><?state state=text?></i>
+
+
+<!-- We do allow numbers to open html tags, which is not how most browsers
+behave. We still test this anyway. -->
+<0 <?state state=tag?>><?state state=text?>
+<1 <?state state=tag?>><?state state=text?>
+
+<!-- meta redirect url tests. -->
+<meta http-equiv="refresh" content="5;URL=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="10;URL=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="5 ;URL=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content=" 5 ;URL=<?state attr_type=uri, is_url_start=true?>">
+<?state attr_type=none, is_url_start=false?>
+<meta http-equiv="refresh" content=" 5 ; url = <?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="5;Url=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="5;UrL=<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content="5;uRL=<?state attr_type=uri, is_url_start=true?>">
+<?state attr_type=none, is_url_start=false?>
+<meta http-equiv="refresh" content="5;uRL=http://<?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content="5 ; URL=http://www.google.com/<?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;URL=/<?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;URL=../<?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content=" 123456789 ; url = ../<?state attr_type=uri, is_url_start=false?>">
+
+<!-- Quoted url's -->
+<meta http-equiv="refresh" content="5;URL = '<?state attr_type=uri, is_url_start=true?>">
+<meta http-equiv="refresh" content='5;URL = "<?state attr_type=uri, is_url_start=true?>"'>
+<meta http-equiv="refresh" content="5;URL = ' <?state attr_type=uri, is_url_start=false?>">
+<meta http-equiv="refresh" content='5;URL = " <?state attr_type=uri, is_url_start=false?>"'>
+
+<?state attr_type=none, is_url_start=false?>
+
+<meta http-equiv="refresh" content="5x;URL=<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;<?state attr_type=regular, is_url_start=false?>">
+<?state attr_type=none, is_url_start=false?>
+<meta http-equiv="refresh" content="5;U<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;UR<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;URL<?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;URL <?state attr_type=regular, is_url_start=false?>">
+<?state attr_type=none, is_url_start=false?>
+<meta http-equiv="refresh" content="5x;URL= <?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="5;UR L <?state attr_type=regular, is_url_start=false?>">
+<meta http-equiv="refresh" content="URL = <?state attr_type=regular, is_url_start=false?>">
+
+<meta http-equiv="refresh" content="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA <?state attr_type=regular?>">
+
+<span a:type="<?state state=value, attr=a:type?>"
+ a:abc.abc="<?state state=value, attr=a:abc.abc?>"
+ b:a.b.c.d.e.f=<?state state=value, attr=b:a.b.c.d.e.f?>>
+
+<tag.test>
+<?state state=text, tag=tag.test?>
+</tag.test>
+
+<!-- Tests regarding our specific implementation -->
+<meta content="5;URL=<?state attr_type=uri, is_url_start=true?>">
+
+</body>
+
+</html>
+<?state state=text ?>
diff --git a/src/tests/make_tpl_varnames_h_unittest.sh b/src/tests/make_tpl_varnames_h_unittest.sh
new file mode 100755
index 0000000..5d541cf
--- /dev/null
+++ b/src/tests/make_tpl_varnames_h_unittest.sh
@@ -0,0 +1,239 @@
+#!/bin/sh
+
+# Copyright (c) 2006, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ---
+# Author: csilvers@google.com (Craig Silverstein)
+#
+
+die() {
+ echo "Test failed: $@" 1>&2
+ exit 1
+}
+
+MAKETPL=${1-"$TEST_SRCDIR/make_tpl_varnames_h"}
+
+# Optional second argument is tmpdir to use
+TMPDIR=${2-"$TEST_TMPDIR/maketpl"}
+
+rm -rf $TMPDIR
+mkdir $TMPDIR || die "$LINENO: Can't make $TMPDIR"
+
+# Let's make some templates: three that are ok, and three that are not
+echo '<a href={{QCHAR}}{{HREF}}{{QCHAR}} {{PARAMS}}>' > $TMPDIR/ok1.tpl
+echo '<img {{#ATTRIBUTES}}{{ATTRIBUTE}}{{/ATTRIBUTES}}>' > $TMPDIR/ok2.tpl
+echo '<html><head><title>{{TITLE}}</title></head></html>' > $TMPDIR/ok3.tpl
+echo '<a href={{QCHAR}}{{HREF}}{{QC' > $TMPDIR/bad1.tpl
+echo '<img {{#ATTRIBUTES}}{{ATTRIBUTE}}>' > $TMPDIR/bad2.tpl
+echo '<html><head><title>{{TITLE?}}</title></head></html>' > $TMPDIR/bad3.tpl
+
+# We'll make some templates with modifiers as well.
+echo '<a href={{HREF:h}} {{PARAMS}}>' > $TMPDIR/ok4.tpl
+echo '<a href={{HREF:html_escape_with_arg=url}} {{PARAMS}}>' > $TMPDIR/ok5.tpl
+echo '<a href={{HREF:x-custom-modifier}} {{PARAMS}}>' > $TMPDIR/ok6.tpl
+echo '<a href={{HREF:x-custom-modifier=arg}} {{PARAMS}}>' > $TMPDIR/ok7.tpl
+echo '<a href={{HREF:x-custom-modifier=}} {{PARAMS}}>' > $TMPDIR/ok8.tpl
+
+
+# First, test commandline flags
+$MAKETPL >/dev/null 2>&1 \
+ && die "$LINENO: $MAKETPL with no args didn't give an error"
+$MAKETPL -o$TMPDIR/foo >/dev/null 2>&1 \
+ && die "$LINENO: $MAKETPL with no template didn't give an error"
+$MAKETPL -h >/dev/null 2>&1 \
+ || die "$LINENO: $MAKETPL -h failed"
+$MAKETPL --help >/dev/null 2>&1 \
+ || die "$LINENO: $MAKETPL --help failed"
+$MAKETPL --nonsense >/dev/null 2>&1 \
+ && die "$LINENO: $MAKETPL --nonsense didn't give an error"
+$MAKETPL -f$TMPDIR/bar.h >/dev/null 2>&1 \
+ && die "$LINENO: $MAKETPL -f with no templates didn't give an error"
+
+# Some weird (broken) shells leave the ending EOF in the here-document,
+# hence the grep.
+expected_ok1=`cat <<EOF | grep -v '^EOF$'
+#ifndef %%%OUTPUT_NAME%%%
+#define %%%OUTPUT_NAME%%%
+
+#include <ctemplate/template_string.h>
+static const ::ctemplate::StaticTemplateString ko_QCHAR = STS_INIT_WITH_HASH(ko_QCHAR, "QCHAR", 13739615363438531061ULL);
+static const ::ctemplate::StaticTemplateString ko_HREF = STS_INIT_WITH_HASH(ko_HREF, "HREF", 4441707909033668369ULL);
+static const ::ctemplate::StaticTemplateString ko_PARAMS = STS_INIT_WITH_HASH(ko_PARAMS, "PARAMS", 10755877064288701757ULL);
+
+#endif // %%%OUTPUT_NAME%%%
+EOF`
+
+expected_ok2=`cat <<EOF | grep -v '^EOF$'
+#ifndef %%%OUTPUT_NAME%%%
+#define %%%OUTPUT_NAME%%%
+
+#include <ctemplate/template_string.h>
+static const ::ctemplate::StaticTemplateString ko_ATTRIBUTES = STS_INIT_WITH_HASH(ko_ATTRIBUTES, "ATTRIBUTES", 11813232524653503831ULL);
+static const ::ctemplate::StaticTemplateString ko_ATTRIBUTE = STS_INIT_WITH_HASH(ko_ATTRIBUTE, "ATTRIBUTE", 14959290143384361001ULL);
+
+#endif // %%%OUTPUT_NAME%%%
+EOF`
+
+expected_ok3=`cat <<EOF | grep -v '^EOF$'
+#ifndef %%%OUTPUT_NAME%%%
+#define %%%OUTPUT_NAME%%%
+
+#include <ctemplate/template_string.h>
+static const ::ctemplate::StaticTemplateString ko_TITLE = STS_INIT_WITH_HASH(ko_TITLE, "TITLE", 8931122033088041025ULL);
+
+#endif // %%%OUTPUT_NAME%%%
+EOF`
+
+expected_ok4=`cat <<EOF | grep -v '^EOF$'
+#ifndef %%%OUTPUT_NAME%%%
+#define %%%OUTPUT_NAME%%%
+
+#include <ctemplate/template_string.h>
+static const ::ctemplate::StaticTemplateString ko_HREF = STS_INIT_WITH_HASH(ko_HREF, "HREF", 4441707909033668369ULL);
+static const ::ctemplate::StaticTemplateString ko_PARAMS = STS_INIT_WITH_HASH(ko_PARAMS, "PARAMS", 10755877064288701757ULL);
+
+#endif // %%%OUTPUT_NAME%%%
+EOF`
+
+expected_ok5=`echo "$expected_ok4" | sed s/ok4/ok5/g`
+expected_ok6=`echo "$expected_ok4" | sed s/ok4/ok6/g`
+expected_ok7=`echo "$expected_ok4" | sed s/ok4/ok7/g`
+expected_ok8=`echo "$expected_ok4" | sed s/ok4/ok8/g`
+
+# When -f (--output-file) is used on ok1.tpl and ok2.tpl
+# Note that there are no variables in common in these two templates.
+# All should be returned.
+expected_ok1and2=`cat <<EOF | grep -v '^EOF$'
+#ifndef %%%OUTPUT_NAME%%%
+#define %%%OUTPUT_NAME%%%
+
+#include <ctemplate/template_string.h>
+static const ::ctemplate::StaticTemplateString ko_QCHAR = STS_INIT_WITH_HASH(ko_QCHAR, "QCHAR", 13739615363438531061ULL);
+static const ::ctemplate::StaticTemplateString ko_HREF = STS_INIT_WITH_HASH(ko_HREF, "HREF", 4441707909033668369ULL);
+static const ::ctemplate::StaticTemplateString ko_PARAMS = STS_INIT_WITH_HASH(ko_PARAMS, "PARAMS", 10755877064288701757ULL);
+static const ::ctemplate::StaticTemplateString ko_ATTRIBUTES = STS_INIT_WITH_HASH(ko_ATTRIBUTES, "ATTRIBUTES", 11813232524653503831ULL);
+static const ::ctemplate::StaticTemplateString ko_ATTRIBUTE = STS_INIT_WITH_HASH(ko_ATTRIBUTE, "ATTRIBUTE", 14959290143384361001ULL);
+
+#endif // %%%OUTPUT_NAME%%%
+EOF`
+
+# When -f (--output-file) is used on ok1.tpl and ok4.tpl
+# Note that both variables in ok4.tpl will be duplicates and hence not returned.
+expected_ok1and4=`echo "$expected_ok1" | sed s/ok1/ok1and4/g`
+
+# Suppress unimportant aspects of the make_tpl_varnames_h output.
+Cleanse() {
+ # Replace the file name guard with %%%OUTPUT_NAME%%% so we can use
+ # the same expected_ok* variables for different file names.
+ # Note that we only append 'H_' to the end of the string, instead
+ # of '_H_'. This is because the first call to 'tr' is already
+ # adding a '_' at the end of the converted $1 (due to the newline
+ # emitted by echo).
+ n="`basename $1 | sed -e 's/[^0-9a-zA-Z]/_/g' | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'`"
+ grep -v '^//' "$1" | sed -e "s:TPL_.*${n}_H_:%%%OUTPUT_NAME%%%:" > "$1.cleansed"
+}
+
+# syntax-check these templates
+$MAKETPL -n $TMPDIR/ok1.tpl $TMPDIR/ok2.tpl $TMPDIR/ok3.tpl >/dev/null 2>&1 \
+ || die "$LINENO: $MAKETPL gave error parsing good templates"
+$MAKETPL -n $TMPDIR/ok1.tpl $TMPDIR/ok2.tpl $TMPDIR/bad3.tpl >/dev/null 2>&1 \
+ && die "$LINENO: $MAKETPL gave no error parsing bad template"
+$MAKETPL -n $TMPDIR/ok1.tpl $TMPDIR/ok2.tpl $TMPDIR/ok100.tpl >/dev/null 2>&1 \
+ && die "$LINENO: $MAKETPL gave no error parsing non-existent template"
+
+# Now try the same thing, but use template-root so we don't need absdirs
+$MAKETPL -n --template_dir=$TMPDIR ok1.tpl ok2.tpl ok3.tpl >/dev/null 2>&1 \
+ || die "$LINENO: $MAKETPL gave error parsing good templates"
+
+# Parse the templates. Bad one in the middle should be ignored.
+$MAKETPL --header_dir=$TMPDIR $TMPDIR/ok1.tpl $TMPDIR/bad2.tpl $TMPDIR/ok3.tpl >/dev/null 2>&1
+[ $? = 1 ] || die "$LINENO: $MAKETPL gave wrong error-code parsing 1 bad template: $?"
+Cleanse "$TMPDIR/ok1.tpl.varnames.h"
+echo "$expected_ok1" | diff - "$TMPDIR/ok1.tpl.varnames.h.cleansed" \
+ || die "$LINENO: $MAKETPL didn't make ok1 output correctly"
+[ -f "$TMPDIR/bad2.tpl.varnames.h" ] \
+ && die "$LINENO: $MAKETPL >did< make bad2 output"
+Cleanse "$TMPDIR/ok3.tpl.varnames.h"
+echo "$expected_ok3" | diff - "$TMPDIR/ok3.tpl.varnames.h.cleansed" \
+ || die "$LINENO: $MAKETPL didn't make ok3 output correctly"
+
+# Now try the same but with a different suffix. Try an alternate -t/-o form too.
+# Also test not being able to output the file for some reason.
+$MAKETPL -t$TMPDIR -o$TMPDIR -s.out ok1.tpl bad2.tpl ok3.tpl >/dev/null 2>&1
+[ $? = 1 ] || die "$LINENO: $MAKETPL gave wrong error-code parsing 1 bad template: $?"
+Cleanse "$TMPDIR/ok1.tpl.out"
+echo "$expected_ok1" | diff - "$TMPDIR/ok1.tpl.out.cleansed" \
+ || die "$LINENO: $MAKETPL didn't make ok1 output correctly"
+[ -f "$TMPDIR/bad2.tpl.out" ] && die "$LINENO: $MAKETPL >did< make bad2 output"
+Cleanse "$TMPDIR/ok3.tpl.out"
+echo "$expected_ok3" | diff - "$TMPDIR/ok3.tpl.out.cleansed" \
+ || die "$LINENO: $MAKETPL didn't make ok3 output correctly"
+
+# Verify that -f generates the requested output file:
+# -f with one file
+$MAKETPL -t$TMPDIR -f$TMPDIR/ok1.h ok1.tpl >/dev/null 2>&1
+Cleanse "$TMPDIR/ok1.h"
+echo "$expected_ok1" | diff - "$TMPDIR/ok1.h.cleansed" \
+ || die "$LINENO: $MAKETPL didn't make ok1.h output correctly"
+# -f with two files - no common template variables
+$MAKETPL -t$TMPDIR -f$TMPDIR/ok1and2.h ok1.tpl ok2.tpl >/dev/null 2>&1
+Cleanse "$TMPDIR/ok1and2.h"
+echo "$expected_ok1and2" | diff - "$TMPDIR/ok1and2.h.cleansed" \
+ || die "$LINENO: $MAKETPL didn't make ok1and2.h output correctly"
+# -f with two files - two common template variables
+$MAKETPL -t$TMPDIR -f$TMPDIR/ok1and4.h ok1.tpl ok4.tpl >/dev/null 2>&1
+Cleanse "$TMPDIR/ok1and4.h"
+echo "$expected_ok1and4" | diff - "$TMPDIR/ok1and4.h.cleansed" \
+ || die "$LINENO: $MAKETPL didn't make ok1and4.h output correctly"
+# -f with a bad file should not produce an output
+$MAKETPL -t$TMPDIR -f$TMPDIR/bar.h ok1.tpl bad1.tpl >/dev/null 2>&1 \
+ && die "$LINENO: $MAKETPL -f gave no error parsing bad template"
+
+# Verify we don't give any output iff everything works, with -q flag.
+# Also test using a different output dir. Also, test *every* ok template.
+mkdir $TMPDIR/output
+# Normally I'd do {1,2,3,4,...}, but solaris sh doesn't understand that syntax
+out=`$MAKETPL -q -t$TMPDIR -o$TMPDIR/output -s"#" \
+ ok1.tpl ok2.tpl ok3.tpl ok4.tpl ok5.tpl ok6.tpl ok7.tpl ok8.tpl \
+ 2>&1`
+[ -z "$out" ] || die "$LINENO: $MAKETPL -q wasn't so quiet: '$out'"
+for i in 1 2 3 4 5 6 7 8; do
+ Cleanse "$TMPDIR/output/ok$i.tpl#"
+ eval "echo \"\$expected_ok$i\"" | diff - "$TMPDIR/output/ok$i.tpl#.cleansed" \
+ || die "$LINENO: $MAKETPL didn't make ok$i output correctly"
+done
+
+out=`$MAKETPL -q --outputfile_suffix=2 $TMPDIR/bad{1,2,3}.tpl 2>&1`
+[ -z "$out" ] && die "$LINENO: $MAKETPL -q was too quiet"
+for i in 1 2 3; do
+ [ -f "$TMPDIR/output/bad$i.tpl2" ] && die "$LINENO: $MAKETPL made bad$i output"
+done
+
+echo "PASSED"
diff --git a/src/tests/statemachine_test.c b/src/tests/statemachine_test.c
new file mode 100644
index 0000000..33481d6
--- /dev/null
+++ b/src/tests/statemachine_test.c
@@ -0,0 +1,365 @@
+/* Copyright (c) 2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "htmlparser/statemachine.h"
+
+enum {
+ SIMPLE_STATE_A,
+ SIMPLE_STATE_B,
+ SIMPLE_STATE_C,
+ SIMPLE_STATE_D,
+ SIMPLE_STATE_ERROR_TEST
+};
+
+/* Include the test state machine definition. */
+#include "tests/statemachine_test_fsm.h"
+
+/* Taken from google templates */
+
+#define ASSERT(cond) do { \
+ if (!(cond)) { \
+ printf("%s: %d: ASSERT FAILED: %s\n", __FILE__, __LINE__, \
+ #cond); \
+ assert(cond); \
+ exit(1); \
+ } \
+} while (0)
+
+#define ASSERT_STREQ(a, b) do { \
+ if (strcmp((a), (b))) { \
+ printf("%s: %d: ASSERT FAILED: '%s' != '%s'\n", __FILE__, __LINE__, \
+ (a), (b)); \
+ assert(!strcmp((a), (b))); \
+ exit(1); \
+ } \
+} while (0)
+
+#define ASSERT_STRSTR(text, substr) do { \
+ if (!strstr((text), (substr))) { \
+ printf("%s: %d: ASSERT FAILED: '%s' not in '%s'\n", \
+ __FILE__, __LINE__, (substr), (text)); \
+ assert(strstr((text), (substr))); \
+ exit(1); \
+ } \
+} while (0)
+
+
+#define NUM_STATES 10
+
+/* To simply the tests */
+#define statemachine_parse_str(a,b) statemachine_parse(a, b, strlen(b));
+
+/* Simple state machine test. */
+int test_simple()
+{
+ statemachine_definition *def;
+ statemachine_ctx *sm;
+ def = statemachine_definition_new(NUM_STATES);
+ sm = statemachine_new(def, NULL);
+
+ statemachine_definition_populate(def, simple_state_transitions,
+ simple_states_internal_names);
+ ASSERT(sm->current_state == SIMPLE_STATE_A);
+
+ statemachine_parse(sm, "001", 3);
+ ASSERT(sm->current_state == SIMPLE_STATE_B);
+
+ statemachine_parse(sm, "001", 3);
+ ASSERT(sm->current_state == SIMPLE_STATE_C);
+
+ statemachine_parse(sm, "2", 1);
+ ASSERT(sm->current_state == SIMPLE_STATE_B);
+
+ statemachine_parse(sm, "11", 2);
+ ASSERT(sm->current_state == SIMPLE_STATE_D);
+
+ statemachine_delete(sm);
+ return 0;
+}
+
+/* Tests error handling logic when we try to follow non existent transitions. */
+int test_error()
+{
+ statemachine_definition *def;
+ statemachine_ctx *sm;
+ int res;
+
+ def = statemachine_definition_new(NUM_STATES);
+ sm = statemachine_new(def, NULL);
+
+ statemachine_definition_populate(def, simple_state_transitions,
+ NULL);
+ ASSERT(sm->current_state == SIMPLE_STATE_A);
+
+ ASSERT(statemachine_get_error_msg(sm) == NULL);
+
+ res = statemachine_parse_str(sm, "00E");
+ ASSERT(sm->current_state == SIMPLE_STATE_ERROR_TEST);
+ ASSERT(sm->current_state == res);
+
+ res = statemachine_parse(sm, "3", 1);
+ ASSERT(res == STATEMACHINE_ERROR);
+ ASSERT_STREQ(statemachine_get_error_msg(sm),
+ "Unexpected character '3'");
+
+ statemachine_reset(sm);
+ ASSERT(statemachine_get_error_msg(sm) == NULL);
+
+ statemachine_delete(sm);
+
+ def = statemachine_definition_new(NUM_STATES);
+ sm = statemachine_new(def, NULL);
+
+ statemachine_definition_populate(def, simple_state_transitions,
+ simple_states_internal_names);
+ ASSERT(sm->current_state == SIMPLE_STATE_A);
+
+ res = statemachine_parse_str(sm, "00E");
+ ASSERT(sm->current_state == SIMPLE_STATE_ERROR_TEST);
+ ASSERT(sm->current_state == res);
+
+ res = statemachine_parse(sm, "3", 1);
+ ASSERT(res == STATEMACHINE_ERROR);
+ ASSERT_STREQ(statemachine_get_error_msg(sm),
+ "Unexpected character '3' in state 'error_test'");
+
+ statemachine_delete(sm);
+
+ return 0;
+}
+
+/* Tests htmlparser_start_record() and htmlparser_end_record() logic. */
+
+int test_record()
+{
+ statemachine_definition *def;
+ statemachine_ctx *sm;
+ const char *actual;
+ char expected[STATEMACHINE_RECORD_BUFFER_SIZE];
+ int res;
+ int counter;
+ def = statemachine_definition_new(NUM_STATES);
+ sm = statemachine_new(def, NULL);
+
+ statemachine_definition_populate(def, simple_state_transitions,
+ simple_states_internal_names);
+
+ ASSERT(sm->current_state == SIMPLE_STATE_A);
+
+ res = statemachine_parse_str(sm, "001");
+ ASSERT(sm->current_state == SIMPLE_STATE_B);
+ ASSERT(sm->current_state == res);
+
+ statemachine_start_record(sm);
+ statemachine_parse_str(sm, "121212");
+ ASSERT_STREQ("121212", statemachine_stop_record(sm));
+
+ statemachine_parse_str(sm, "not recorded");
+
+ statemachine_start_record(sm);
+ statemachine_parse_str(sm, "121212000");
+ ASSERT_STREQ("121212000", statemachine_stop_record(sm));
+
+ statemachine_start_record(sm);
+ statemachine_parse_str(sm, "1234567890");
+ ASSERT_STREQ("1234567890", statemachine_record_buffer(sm));
+
+ statemachine_parse_str(sm, "test");
+ ASSERT_STREQ("1234567890test", statemachine_stop_record(sm));
+
+ statemachine_start_record(sm);
+
+ /* Record 1000 chars + strlen("beginning-") */
+ statemachine_parse_str(sm, "beginning-");
+ for (counter = 0; counter < 100; counter++) {
+ statemachine_parse_str(sm, "1234567890");
+ }
+
+ /* Make sure we preserved the start of the buffer. */
+ ASSERT_STRSTR(statemachine_record_buffer(sm), "beginning-");
+
+ /* And make sure the size is what we expect. */
+ ASSERT(STATEMACHINE_RECORD_BUFFER_SIZE - 1 ==
+ strlen(statemachine_stop_record(sm)));
+
+ statemachine_start_record(sm);
+ for (counter = 0; counter < 100; counter++) {
+ statemachine_parse_str(sm, "0123456789ABCDEF");
+ }
+
+ expected[0] = '\0';
+ /* Fill the buffer with a pattern 255 chars long (16 * 15 + 15). */
+ for (counter = 0; counter < 15; counter++) {
+ strcat(expected, "0123456789ABCDEF");
+ }
+ strcat(expected, "0123456789ABCDE");
+ actual = statemachine_stop_record(sm);
+ ASSERT_STREQ(expected, actual);
+
+ statemachine_delete(sm);
+ return 0;
+}
+
+/* Test with characters outside of the ascii range */
+int test_no_ascii()
+{
+ statemachine_definition *def;
+ statemachine_ctx *sm;
+ def = statemachine_definition_new(NUM_STATES);
+ sm = statemachine_new(def, NULL);
+
+ statemachine_definition_populate(def, simple_state_transitions,
+ simple_states_internal_names);
+
+ ASSERT(sm->current_state == SIMPLE_STATE_A);
+
+ statemachine_parse(sm, "\xf0\xf0\xf1", 3);
+ ASSERT(sm->current_state == SIMPLE_STATE_B);
+
+ statemachine_parse(sm, "\xf0\xf0\xf1", 3);
+ ASSERT(sm->current_state == SIMPLE_STATE_C);
+
+ statemachine_parse(sm, "\xf2", 1);
+ ASSERT(sm->current_state == SIMPLE_STATE_B);
+
+ statemachine_parse(sm, "\xf1\xf1", 2);
+ ASSERT(sm->current_state == SIMPLE_STATE_D);
+
+ statemachine_delete(sm);
+ return 0;
+
+}
+
+int test_copy()
+{
+ statemachine_definition *def;
+ statemachine_ctx *sm1;
+ statemachine_ctx *sm2;
+ statemachine_ctx *sm3;
+ def = statemachine_definition_new(NUM_STATES);
+ sm1 = statemachine_new(def, NULL);
+
+ statemachine_definition_populate(def, simple_state_transitions,
+ simple_states_internal_names);
+
+ ASSERT(sm1->current_state == SIMPLE_STATE_A);
+
+ sm2 = statemachine_duplicate(sm1, def, NULL);
+ ASSERT(sm2->current_state == SIMPLE_STATE_A);
+
+ statemachine_parse(sm1, "001", 3);
+ ASSERT(sm1->current_state == SIMPLE_STATE_B);
+ ASSERT(sm2->current_state == SIMPLE_STATE_A);
+
+
+ statemachine_parse(sm1, "001", 3);
+ statemachine_parse(sm2, "001", 3);
+ ASSERT(sm1->current_state == SIMPLE_STATE_C);
+ ASSERT(sm2->current_state == SIMPLE_STATE_B);
+
+ sm3 = statemachine_duplicate(sm2, def, NULL);
+ ASSERT(sm3->current_state == SIMPLE_STATE_B);
+
+ statemachine_parse(sm1, "001", 3);
+ statemachine_parse(sm2, "001", 3);
+ statemachine_parse(sm3, "002", 3);
+ ASSERT(sm1->current_state == SIMPLE_STATE_D);
+ ASSERT(sm2->current_state == SIMPLE_STATE_C);
+ ASSERT(sm3->current_state == SIMPLE_STATE_A);
+
+ statemachine_delete(sm1);
+ statemachine_delete(sm2);
+ statemachine_delete(sm3);
+
+ return 0;
+}
+
+/* Tests statemachine_encode_char().
+ */
+int test_encode_char()
+{
+ char encoded_char[10];
+ int i;
+
+ struct {
+ char chr;
+ const char *result;
+ } encode_map[] = {
+ { 'x', "x" },
+ { '0', "0" },
+ { '\n', "\\n" },
+ { '\r', "\\r" },
+ { '\t', "\\t" },
+ { '\\', "\\\\" },
+ { '\0', "\\x00" },
+ { '\xF0', "\\xf0" },
+ { '\0', NULL} // Terminates when output == NULL
+ };
+
+ for (i = 0; encode_map[i].result; i++) {
+ statemachine_encode_char(encode_map[i].chr, encoded_char,
+ sizeof(encoded_char) / sizeof(*encoded_char));
+ ASSERT_STREQ(encoded_char, encode_map[i].result);
+ }
+
+ statemachine_encode_char('\xFF', encoded_char, 1);
+ ASSERT_STREQ(encoded_char, "");
+
+ statemachine_encode_char('\xFF', encoded_char, 2);
+ ASSERT_STREQ(encoded_char, "\\");
+
+ statemachine_encode_char('\xFF', encoded_char, 3);
+ ASSERT_STREQ(encoded_char, "\\x");
+
+ statemachine_encode_char('\xFF', encoded_char, 4);
+ ASSERT_STREQ(encoded_char, "\\xf");
+
+ statemachine_encode_char('\xFF', encoded_char, 5);
+ ASSERT_STREQ(encoded_char, "\\xff");
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ test_simple();
+ test_error();
+ test_record();
+ test_no_ascii();
+ test_copy();
+ test_encode_char();
+ printf("DONE.\n");
+ return 0;
+}
diff --git a/src/tests/statemachine_test_fsm.config b/src/tests/statemachine_test_fsm.config
new file mode 100644
index 0000000..c781c0a
--- /dev/null
+++ b/src/tests/statemachine_test_fsm.config
@@ -0,0 +1,79 @@
+# Copyright (c) 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ---
+#
+# Author: falmeida@google.com (Filipe Almeida)
+#
+# Simple state machine definition used in for testing the state machine.
+
+name = 'simple'
+
+comment = 'Simple state machine'
+
+condition('1', '1\xf1')
+condition('2', '2\xf2')
+condition('E', 'E')
+condition('default', '[:default:]')
+
+state(name = 'A',
+ external = 'A',
+ transitions = [
+ ['1', 'B'],
+ ['E', 'error_test'],
+ ['default', 'A'],
+ ])
+
+state(name = 'B',
+ external = 'B',
+ transitions = [
+ ['1', 'C'],
+ ['2', 'A'],
+ ['default', 'B'],
+ ])
+
+state(name = 'C',
+ external = 'C',
+ transitions = [
+ ['1', 'D'],
+ ['2', 'B'],
+ ['default', 'C'],
+ ])
+
+state(name = 'D',
+ external = 'D',
+ transitions = [
+ ['2', 'C'],
+ ['default', 'D'],
+ ])
+
+state(name = 'error_test',
+ external = 'error_test',
+ transitions = [
+ ['2', 'A'],
+ ])
diff --git a/src/tests/template_cache_test.cc b/src/tests/template_cache_test.cc
new file mode 100644
index 0000000..5a23716
--- /dev/null
+++ b/src/tests/template_cache_test.cc
@@ -0,0 +1,1064 @@
+// Copyright (c) 2009, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+
+#include "config_for_unittests.h"
+#include <ctemplate/template_cache.h>
+#include <assert.h> // for assert()
+#include <stdio.h> // for printf()
+#include <stdlib.h> // for exit()
+#include <string.h> // for strcmp()
+#include <sys/types.h> // for mode_t
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif // for unlink()
+#include <ctemplate/template.h> // for Template
+#include <ctemplate/template_dictionary.h> // for TemplateDictionary
+#include <ctemplate/template_enums.h> // for DO_NOT_STRIP, etc
+#include <ctemplate/template_pathops.h> // for PathJoin(), kCWD
+#include <ctemplate/template_string.h> // for TemplateString
+#include "tests/template_test_util.h" // for AssertExpandIs(), etc
+using std::string;
+using GOOGLE_NAMESPACE::FLAGS_test_tmpdir;
+using GOOGLE_NAMESPACE::AssertExpandIs;
+using GOOGLE_NAMESPACE::CreateOrCleanTestDir;
+using GOOGLE_NAMESPACE::CreateOrCleanTestDirAndSetAsTmpdir;
+using GOOGLE_NAMESPACE::DO_NOT_STRIP;
+using GOOGLE_NAMESPACE::PathJoin;
+using GOOGLE_NAMESPACE::STRIP_BLANK_LINES;
+using GOOGLE_NAMESPACE::STRIP_WHITESPACE;
+using GOOGLE_NAMESPACE::StaticTemplateString;
+using GOOGLE_NAMESPACE::StringToFile;
+using GOOGLE_NAMESPACE::StringToTemplateCache;
+using GOOGLE_NAMESPACE::StringToTemplateFile;
+using GOOGLE_NAMESPACE::Template;
+using GOOGLE_NAMESPACE::TemplateCache;
+using GOOGLE_NAMESPACE::TemplateCachePeer;
+using GOOGLE_NAMESPACE::TemplateDictionary;
+using GOOGLE_NAMESPACE::kCWD;
+
+#define ASSERT(cond) do { \
+ if (!(cond)) { \
+ printf("ASSERT FAILED, line %d: %s\n", __LINE__, #cond); \
+ assert(cond); \
+ exit(1); \
+ } \
+} while (0)
+
+#define ASSERT_STREQ(a, b) ASSERT(strcmp(a, b) == 0)
+
+static const StaticTemplateString kKey = STS_INIT(kKey, "MY_KEY");
+static const StaticTemplateString kContent = STS_INIT(kContent, "content");
+
+// It would be nice to use the TEST framework, but it makes friendship
+// more difficult. (TemplateCache befriends TemplateCacheUnittest.)
+class TemplateCacheUnittest {
+ public:
+ static void TestGetTemplate() {
+ // Tests the cache
+ TemplateCache cache1;
+ const char* text = "{This is perfectly valid} yay!";
+ TemplateDictionary empty_dict("dict");
+
+ string filename = StringToTemplateFile(text);
+ const Template* tpl1 = cache1.GetTemplate(filename, DO_NOT_STRIP);
+ const Template* tpl2 = cache1.GetTemplate(filename.c_str(), DO_NOT_STRIP);
+ const Template* tpl3 = cache1.GetTemplate(filename, STRIP_WHITESPACE);
+ ASSERT(tpl1 && tpl2 && tpl3);
+ ASSERT(tpl1 == tpl2);
+ ASSERT(tpl1 != tpl3);
+ AssertExpandIs(tpl1, &empty_dict, text, true);
+ AssertExpandIs(tpl2, &empty_dict, text, true);
+ AssertExpandIs(tpl3, &empty_dict, text, true);
+
+ // Tests that a nonexistent template returns NULL
+ const Template* tpl4 = cache1.GetTemplate("/yakakak", STRIP_WHITESPACE);
+ ASSERT(!tpl4);
+
+ // Make sure we get different results if we use a different cache.
+ TemplateCache cache2;
+ const Template* tpl5 = cache2.GetTemplate(filename, DO_NOT_STRIP);
+ ASSERT(tpl5);
+ ASSERT(tpl5 != tpl1);
+ AssertExpandIs(tpl5, &empty_dict, text, true);
+
+ // And different results yet if we use the default cache.
+ const Template* tpl6 = Template::GetTemplate(filename, DO_NOT_STRIP);
+ ASSERT(tpl6);
+ ASSERT(tpl6 != tpl1);
+ AssertExpandIs(tpl6, &empty_dict, text, true);
+ }
+
+ static void TestLoadTemplate() {
+ // Tests the cache
+ TemplateCache cache1;
+ const char* text = "{This is perfectly valid} yay!";
+ TemplateDictionary empty_dict("dict");
+ string filename = StringToTemplateFile(text);
+
+ ASSERT(cache1.LoadTemplate(filename, DO_NOT_STRIP));
+
+ // Tests that a nonexistent template returns false
+ ASSERT(!cache1.LoadTemplate("/yakakak", STRIP_WHITESPACE));
+ }
+
+ static void TestStringGetTemplate() {
+ // If you use these same cache keys somewhere else,
+ // call Template::ClearCache first.
+ const string cache_key_a = "cache key a";
+ const string text = "Test template 1";
+ TemplateDictionary empty_dict("dict");
+
+ TemplateCache cache1;
+ const Template *tpl1;
+ ASSERT(cache1.StringToTemplateCache(cache_key_a, text, DO_NOT_STRIP));
+ tpl1 = cache1.GetTemplate(cache_key_a, DO_NOT_STRIP);
+ AssertExpandIs(tpl1, &empty_dict, text, true);
+
+ // A different cache should give different templates.
+ TemplateCache cache2;
+ const Template *tpl3;
+ ASSERT(cache2.StringToTemplateCache(cache_key_a, text, DO_NOT_STRIP));
+ tpl3 = cache2.GetTemplate(cache_key_a, DO_NOT_STRIP);
+ ASSERT(tpl3 != tpl1);
+ AssertExpandIs(tpl3, &empty_dict, text, true);
+
+ // And the main cache different still
+ const Template *tpl4;
+ ASSERT(StringToTemplateCache(cache_key_a, text, DO_NOT_STRIP));
+ tpl4 = Template::GetTemplate(cache_key_a, DO_NOT_STRIP);
+ ASSERT(tpl4 != tpl1);
+ AssertExpandIs(tpl4, &empty_dict, text, true);
+
+ // If we register a new string with the same text, it should be ignored.
+ ASSERT(!cache1.StringToTemplateCache(cache_key_a, "new text",
+ DO_NOT_STRIP));
+
+ Template::ClearCache();
+ }
+
+ static void TestStringToTemplateCacheWithStrip() {
+ const string cache_key_a = "cache key a";
+ const string text = "Test template 1";
+ TemplateDictionary empty_dict("dict");
+
+ TemplateCache cache;
+ ASSERT(cache.StringToTemplateCache(cache_key_a, text, DO_NOT_STRIP));
+
+ TemplateCachePeer cache_peer(&cache);
+ TemplateCachePeer::TemplateCacheKey cache_key1(cache_key_a, DO_NOT_STRIP);
+ ASSERT(cache_peer.TemplateIsCached(cache_key1));
+ const Template* tpl1 = cache_peer.GetTemplate(cache_key_a, DO_NOT_STRIP);
+ ASSERT(tpl1);
+ AssertExpandIs(tpl1, &empty_dict, text, true);
+
+ // Different strip: when a string template is registered via
+ // StringToTemplateCache with a strip, we cannot use a different
+ // strip later to fetch the template.
+ TemplateCachePeer::TemplateCacheKey cache_key2(cache_key_a,
+ STRIP_WHITESPACE);
+ ASSERT(!cache_peer.TemplateIsCached(cache_key2));
+ }
+
+ static void TestExpandNoLoad() {
+ TemplateCache cache;
+ string filename = StringToTemplateFile("alone");
+ string top_filename = StringToTemplateFile("Hello, {{>WORLD}}");
+ string inc_filename = StringToTemplateFile("world");
+
+ TemplateDictionary dict("ExpandNoLoad");
+ dict.AddIncludeDictionary("WORLD")->SetFilename(inc_filename);
+ string out;
+
+ // This should fail because the cache is empty.
+ cache.Freeze();
+ ASSERT(!cache.ExpandNoLoad(filename, DO_NOT_STRIP, &dict, NULL, &out));
+
+ cache.ClearCache(); // also clears the "frozen" state
+ // This should succeed -- it loads inc_filename from disk.
+ ASSERT(cache.ExpandWithData(filename, DO_NOT_STRIP, &dict, NULL, &out));
+ ASSERT(out == "alone");
+ out.clear();
+ // Now this should succeed -- it's in the cache.
+ cache.Freeze();
+ ASSERT(cache.ExpandNoLoad(filename, DO_NOT_STRIP, &dict, NULL, &out));
+ ASSERT(out == "alone");
+ out.clear();
+
+ // This should fail because neither top nor inc are in the cache.
+ cache.ClearCache();
+ cache.Freeze();
+ ASSERT(!cache.ExpandNoLoad(top_filename, DO_NOT_STRIP, &dict, NULL, &out));
+ cache.ClearCache();
+ ASSERT(cache.LoadTemplate(top_filename, DO_NOT_STRIP));
+ // This *should* fail, but because inc_filename isn't in the cache.
+ cache.Freeze();
+ ASSERT(!cache.ExpandNoLoad(top_filename, DO_NOT_STRIP, &dict, NULL, &out));
+ // TODO(csilvers): this should not be necessary. But expand writes
+ // to its output even before it fails.
+ out.clear();
+ cache.ClearCache();
+ ASSERT(cache.LoadTemplate(top_filename, DO_NOT_STRIP));
+ ASSERT(cache.LoadTemplate(inc_filename, DO_NOT_STRIP));
+ cache.Freeze();
+ // *Now* it should succeed, with everything it needs loaded.
+ ASSERT(cache.ExpandNoLoad(top_filename, DO_NOT_STRIP, &dict, NULL, &out));
+ ASSERT(out == "Hello, world");
+ out.clear();
+ // This should succeed too, of course.
+ ASSERT(cache.ExpandWithData(top_filename, DO_NOT_STRIP, &dict, NULL, &out));
+ ASSERT(out == "Hello, world");
+ out.clear();
+
+ cache.ClearCache();
+ ASSERT(cache.ExpandWithData(top_filename, DO_NOT_STRIP, &dict, NULL, &out));
+ ASSERT(out == "Hello, world");
+ out.clear();
+ // Now everything NoLoad needs should be in the cache again.
+ cache.Freeze();
+ ASSERT(cache.ExpandNoLoad(top_filename, DO_NOT_STRIP, &dict, NULL, &out));
+ ASSERT(out == "Hello, world");
+ out.clear();
+
+ cache.ClearCache();
+ ASSERT(cache.LoadTemplate(top_filename, DO_NOT_STRIP));
+ cache.Freeze();
+ // This fails, of course, because we're frozen.
+ ASSERT(!cache.LoadTemplate(inc_filename, DO_NOT_STRIP));
+ // And thus, this fails too.
+ ASSERT(!cache.ExpandNoLoad(top_filename, DO_NOT_STRIP, &dict, NULL, &out));
+ }
+
+ static void TestTemplateSearchPath() {
+ TemplateCache cache1;
+
+ const string pathA = PathJoin(FLAGS_test_tmpdir, "a/");
+ const string pathB = PathJoin(FLAGS_test_tmpdir, "b/");
+ CreateOrCleanTestDir(pathA);
+ CreateOrCleanTestDir(pathB);
+
+ TemplateDictionary dict("");
+ cache1.SetTemplateRootDirectory(pathA);
+ cache1.AddAlternateTemplateRootDirectory(pathB);
+ ASSERT(cache1.template_root_directory() == pathA);
+
+ // 1. Show that a template in the secondary path can be found.
+ const string path_b_bar = PathJoin(pathB, "template_bar");
+ StringToFile("b/template_bar", path_b_bar);
+ ASSERT_STREQ(path_b_bar.c_str(),
+ cache1.FindTemplateFilename("template_bar").c_str());
+ const Template* b_bar = cache1.GetTemplate("template_bar", DO_NOT_STRIP);
+ ASSERT(b_bar);
+ AssertExpandIs(b_bar, &dict, "b/template_bar", true);
+
+ // 2. Show that the search stops once the first match is found.
+ // Create two templates in separate directories with the same name.
+ const string path_a_foo = PathJoin(pathA, "template_foo");
+ const string path_b_foo = PathJoin(pathB, "template_foo");
+ StringToFile("a/template_foo", path_a_foo);
+ StringToFile("b/template_foo", path_b_foo);
+ ASSERT_STREQ(path_a_foo.c_str(),
+ cache1.FindTemplateFilename("template_foo").c_str());
+ const Template* a_foo = cache1.GetTemplate("template_foo", DO_NOT_STRIP);
+ ASSERT(a_foo);
+ AssertExpandIs(a_foo, &dict, "a/template_foo", true);
+
+ // 3. Show that attempting to find a non-existent template gives an
+ // empty path.
+ ASSERT(cache1.FindTemplateFilename("baz").empty());
+
+ // 4. If we make a new cache, its path will be followed.
+ TemplateCache cache2;
+ cache2.SetTemplateRootDirectory(pathB);
+ ASSERT_STREQ(path_b_foo.c_str(),
+ cache2.FindTemplateFilename("template_foo").c_str());
+ const Template* b_foo = cache2.GetTemplate("template_foo", DO_NOT_STRIP);
+ ASSERT(b_foo);
+ AssertExpandIs(b_foo, &dict, "b/template_foo", true);
+
+ // 5. Neither path will work for the default cache, which has no path.
+ ASSERT(Template::template_root_directory() == kCWD);
+ ASSERT(Template::FindTemplateFilename("template_foo").empty());
+ ASSERT(!Template::GetTemplate("template_foo", DO_NOT_STRIP));
+
+ CreateOrCleanTestDir(pathA);
+ CreateOrCleanTestDir(pathB);
+ }
+
+ static void TestDelete() {
+ Template::ClearCache(); // just for exercise.
+ const string cache_key = "TestRemoveStringFromTemplateCache";
+ const string text = "<html>here today...</html>";
+ const string text2 = "<html>on disk tomorrow</html>";
+
+ TemplateDictionary dict("test");
+ TemplateCache cache1;
+
+ ASSERT(cache1.StringToTemplateCache(cache_key, text, DO_NOT_STRIP));
+ const Template* tpl = cache1.GetTemplate(cache_key, DO_NOT_STRIP);
+ ASSERT(tpl);
+ AssertExpandIs(tpl, &dict, text, true);
+
+ cache1.Delete(cache_key);
+ tpl = cache1.GetTemplate(cache_key, DO_NOT_STRIP);
+ ASSERT(!tpl);
+ tpl = cache1.GetTemplate(cache_key, STRIP_WHITESPACE);
+ ASSERT(!tpl);
+ tpl = cache1.GetTemplate(cache_key, STRIP_BLANK_LINES);
+ ASSERT(!tpl);
+
+ // Try delete on a file-based template as well.
+ string filename = StringToTemplateFile(text2);
+ tpl = cache1.GetTemplate(filename, DO_NOT_STRIP);
+ ASSERT(tpl);
+ AssertExpandIs(tpl, &dict, text2, true);
+ cache1.Delete(filename);
+ tpl = cache1.GetTemplate(filename, DO_NOT_STRIP);
+ ASSERT(tpl);
+ AssertExpandIs(tpl, &dict, text2, true);
+
+ // Try re-adding a cache key after deleting it.
+ ASSERT(cache1.StringToTemplateCache(cache_key, text, DO_NOT_STRIP));
+ tpl = cache1.GetTemplate(cache_key, DO_NOT_STRIP);
+ ASSERT(tpl);
+ AssertExpandIs(tpl, &dict, text, true);
+
+ // Try ClearCache while we're at it.
+ cache1.ClearCache();
+ tpl = cache1.GetTemplate(cache_key, STRIP_BLANK_LINES);
+ ASSERT(!tpl);
+
+ // Test on the Template class, which has a different function name.
+ ASSERT(StringToTemplateCache(cache_key, text, DO_NOT_STRIP));
+ tpl = Template::GetTemplate(cache_key, DO_NOT_STRIP);
+ ASSERT(tpl);
+ AssertExpandIs(tpl, &dict, text, true);
+
+ Template::RemoveStringFromTemplateCache(cache_key);
+ tpl = Template::GetTemplate(cache_key, DO_NOT_STRIP);
+ ASSERT(!tpl);
+ tpl = Template::GetTemplate(cache_key, STRIP_WHITESPACE);
+ ASSERT(!tpl);
+ tpl = Template::GetTemplate(cache_key, STRIP_BLANK_LINES);
+ ASSERT(!tpl);
+ }
+
+ static void TestTemplateCache() {
+ const string filename_a = StringToTemplateFile("Test template 1");
+ const string filename_b = StringToTemplateFile("Test template 2.");
+
+ TemplateCache cache1;
+ const Template *tpl, *tpl2;
+ ASSERT(tpl = cache1.GetTemplate(filename_a, DO_NOT_STRIP));
+
+ ASSERT(tpl2 = cache1.GetTemplate(filename_b, DO_NOT_STRIP));
+ ASSERT(tpl2 != tpl); // different filenames.
+ ASSERT(tpl2 = cache1.GetTemplate(filename_a, STRIP_BLANK_LINES));
+ ASSERT(tpl2 != tpl); // different strip.
+ ASSERT(tpl2 = cache1.GetTemplate(filename_b, STRIP_BLANK_LINES));
+ ASSERT(tpl2 != tpl); // different filenames and strip.
+ ASSERT(tpl2 = cache1.GetTemplate(filename_a, DO_NOT_STRIP));
+ ASSERT(tpl2 == tpl); // same filename and strip.
+ }
+
+ static void TestReloadAllIfChangedLazyLoad() {
+ TemplateDictionary dict("empty");
+ TemplateCache cache1;
+
+ string filename = StringToTemplateFile("{valid template}");
+ string nonexistent = StringToTemplateFile("dummy");
+ unlink(nonexistent.c_str());
+
+ const Template* tpl = cache1.GetTemplate(filename, STRIP_WHITESPACE);
+ assert(tpl);
+ const Template* tpl2 = cache1.GetTemplate(nonexistent, STRIP_WHITESPACE);
+ assert(!tpl2);
+
+ StringToFile("exists now!", nonexistent);
+ tpl2 = cache1.GetTemplate(nonexistent, STRIP_WHITESPACE);
+ ASSERT(!tpl2);
+ cache1.ReloadAllIfChanged(TemplateCache::LAZY_RELOAD);
+ tpl = cache1.GetTemplate(filename, STRIP_WHITESPACE); // force the reload
+ tpl2 = cache1.GetTemplate(nonexistent, STRIP_WHITESPACE);
+ ASSERT(tpl2); // file exists now
+
+ unlink(nonexistent.c_str()); // here today...
+ cache1.ReloadAllIfChanged(TemplateCache::LAZY_RELOAD);
+ ASSERT(cache1.GetTemplate(filename, STRIP_WHITESPACE));
+ ASSERT(!cache1.GetTemplate(nonexistent, STRIP_WHITESPACE));
+
+ StringToFile("lazarus", nonexistent);
+ StringToFile("{new template}", filename);
+ tpl = cache1.GetTemplate(filename, STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "{valid template}", true); // haven't reloaded
+ // But a different cache (say, the default) should load the new content.
+ const Template* tpl3 = Template::GetTemplate(filename, STRIP_WHITESPACE);
+ AssertExpandIs(tpl3, &dict, "{new template}", true);
+
+ cache1.ReloadAllIfChanged(TemplateCache::LAZY_RELOAD);
+ tpl = cache1.GetTemplate(filename, STRIP_WHITESPACE); // needed
+ AssertExpandIs(tpl, &dict, "{new template}", true);
+ tpl2 = cache1.GetTemplate(nonexistent, STRIP_WHITESPACE);
+ ASSERT(tpl2);
+ AssertExpandIs(tpl2, &dict, "lazarus", true);
+
+ // Ensure that string templates don't reload
+ const string cache_key_a = "cache key a";
+ const string text = "Test template 1";
+ const Template *str_tpl;
+ ASSERT(cache1.StringToTemplateCache(cache_key_a, text, DO_NOT_STRIP));
+ str_tpl = cache1.GetTemplate(cache_key_a, DO_NOT_STRIP);
+ AssertExpandIs(str_tpl, &dict, text, true);
+ cache1.ReloadAllIfChanged(TemplateCache::LAZY_RELOAD);
+ ASSERT(cache1.GetTemplate(cache_key_a, DO_NOT_STRIP) == str_tpl);
+
+ cache1.ClearCache();
+ }
+
+ static void TestReloadAllIfChangedImmediateLoad() {
+ TemplateDictionary dict("empty");
+ TemplateCache cache1;
+ TemplateCachePeer cache_peer(&cache1);
+
+ // Add templates
+ string filename1 = StringToTemplateFile("{valid template}");
+ string filename2 = StringToTemplateFile("{another valid template}");
+
+ const Template* tpl1 = cache1.GetTemplate(filename1,
+ STRIP_WHITESPACE);
+ assert(tpl1);
+ const Template* tpl2 = cache1.GetTemplate(filename2,
+ STRIP_WHITESPACE);
+ assert(tpl2);
+
+ StringToFile("{file1 contents changed}", filename1);
+ cache1.ReloadAllIfChanged(TemplateCache::IMMEDIATE_RELOAD);
+
+ TemplateCachePeer::TemplateCacheKey cache_key1(filename1, STRIP_WHITESPACE);
+ ASSERT(cache_peer.TemplateIsCached(cache_key1));
+ const Template* tpl1_post_reload = cache_peer.GetTemplate(filename1,
+ STRIP_WHITESPACE);
+ ASSERT(tpl1_post_reload != tpl1);
+ // Check that cache1's tpl1 has the new contents
+ AssertExpandIs(tpl1_post_reload, &dict, "{file1 contents changed}",
+ true);
+
+ // Ensure tpl2 is unchanged
+ TemplateCachePeer::TemplateCacheKey cache_key2(filename2, STRIP_WHITESPACE);
+ ASSERT(cache_peer.TemplateIsCached(cache_key2));
+ const Template* tpl2_post_reload = cache_peer.GetTemplate(filename2,
+ STRIP_WHITESPACE);
+ ASSERT(tpl2_post_reload == tpl2);
+
+ // Test delete & re-add: delete tpl2, and reload.
+ unlink(filename2.c_str());
+ cache1.ReloadAllIfChanged(TemplateCache::IMMEDIATE_RELOAD);
+ ASSERT(!cache_peer.GetTemplate(filename2, STRIP_WHITESPACE));
+ // Re-add tpl2 and ensure it reloads.
+ StringToFile("{re-add valid template contents}", filename2);
+ cache1.ReloadAllIfChanged(TemplateCache::IMMEDIATE_RELOAD);
+ ASSERT(cache_peer.GetTemplate(filename2, STRIP_WHITESPACE));
+
+ // Ensure that string templates don't reload
+ const string cache_key_a = "cache key a";
+ const string text = "Test template 1";
+ const Template *str_tpl;
+ ASSERT(cache1.StringToTemplateCache(cache_key_a, text, DO_NOT_STRIP));
+ str_tpl = cache1.GetTemplate(cache_key_a, DO_NOT_STRIP);
+ AssertExpandIs(str_tpl, &dict, text, true);
+ cache1.ReloadAllIfChanged(TemplateCache::IMMEDIATE_RELOAD);
+ ASSERT(cache1.GetTemplate(cache_key_a, DO_NOT_STRIP) == str_tpl);
+
+ cache1.ClearCache();
+ }
+
+ static void TestReloadImmediateWithDifferentSearchPaths() {
+ TemplateDictionary dict("empty");
+ TemplateCache cache1;
+ TemplateCachePeer cache_peer(&cache1);
+
+ const string pathA = PathJoin(FLAGS_test_tmpdir, "a/");
+ const string pathB = PathJoin(FLAGS_test_tmpdir, "b/");
+ CreateOrCleanTestDir(pathA);
+ CreateOrCleanTestDir(pathB);
+
+ cache1.SetTemplateRootDirectory(pathA);
+ cache1.AddAlternateTemplateRootDirectory(pathB);
+ ASSERT(cache1.template_root_directory() == pathA);
+
+ // Add b/foo
+ const string path_b_foo = PathJoin(pathB, "template_foo");
+ StringToFile("b/template_foo", path_b_foo);
+ ASSERT_STREQ(path_b_foo.c_str(),
+ cache1.FindTemplateFilename("template_foo").c_str());
+ // Add b/foo to the template cache.
+ cache1.GetTemplate("template_foo", DO_NOT_STRIP);
+
+ // Add a/foo
+ const string path_a_foo = PathJoin(pathA, "template_foo");
+ StringToFile("a/template_foo", path_a_foo);
+ ASSERT_STREQ(path_a_foo.c_str(),
+ cache1.FindTemplateFilename("template_foo").c_str());
+
+ // Now, on reload we pick up foo from the earlier search path: a/foo
+ cache1.ReloadAllIfChanged(TemplateCache::IMMEDIATE_RELOAD);
+ const Template* foo_post_reload = cache_peer.GetTemplate("template_foo",
+ STRIP_WHITESPACE);
+ AssertExpandIs(foo_post_reload, &dict, "a/template_foo",
+ true);
+
+ // Delete a/foo and reload. Now we pick up the next available foo: b/foo
+ unlink(path_a_foo.c_str());
+ cache1.ReloadAllIfChanged(TemplateCache::IMMEDIATE_RELOAD);
+ foo_post_reload = cache_peer.GetTemplate("template_foo",
+ STRIP_WHITESPACE);
+ AssertExpandIs(foo_post_reload, &dict, "b/template_foo",
+ true);
+ }
+
+ static void TestReloadLazyWithDifferentSearchPaths() {
+ // Identical test as above with but with LAZY_RELOAD
+ TemplateDictionary dict("empty");
+ TemplateCache cache1;
+ TemplateCachePeer cache_peer(&cache1);
+
+ const string pathA = PathJoin(FLAGS_test_tmpdir, "a/");
+ const string pathB = PathJoin(FLAGS_test_tmpdir, "b/");
+ CreateOrCleanTestDir(pathA);
+ CreateOrCleanTestDir(pathB);
+
+ cache1.SetTemplateRootDirectory(pathA);
+ cache1.AddAlternateTemplateRootDirectory(pathB);
+ ASSERT(cache1.template_root_directory() == pathA);
+
+ // Add b/foo
+ const string path_b_foo = PathJoin(pathB, "template_foo");
+ StringToFile("b/template_foo", path_b_foo);
+ ASSERT_STREQ(path_b_foo.c_str(),
+ cache1.FindTemplateFilename("template_foo").c_str());
+ // Add b/foo to the template cache.
+ cache1.GetTemplate("template_foo", DO_NOT_STRIP);
+
+ // Add a/foo
+ const string path_a_foo = PathJoin(pathA, "template_foo");
+ StringToFile("a/template_foo", path_a_foo);
+ ASSERT_STREQ(path_a_foo.c_str(),
+ cache1.FindTemplateFilename("template_foo").c_str());
+
+ // Now, on reload we pick up foo from the earlier search path: a/foo
+ cache1.ReloadAllIfChanged(TemplateCache::LAZY_RELOAD);
+ const Template* foo_post_reload = cache_peer.GetTemplate("template_foo",
+ STRIP_WHITESPACE);
+ AssertExpandIs(foo_post_reload, &dict, "a/template_foo",
+ true);
+
+ // Delete a/foo and reload. Now we pick up the next available foo: b/foo
+ unlink(path_a_foo.c_str());
+ cache1.ReloadAllIfChanged(TemplateCache::LAZY_RELOAD);
+ foo_post_reload = cache_peer.GetTemplate("template_foo",
+ STRIP_WHITESPACE);
+ AssertExpandIs(foo_post_reload, &dict, "b/template_foo",
+ true);
+ }
+
+ static void TestRefcounting() {
+ TemplateCache cache1;
+ TemplateCachePeer cache_peer(&cache1);
+ TemplateDictionary dict("dict");
+
+ // Add templates
+ string filename1 = StringToTemplateFile("{valid template}");
+ string filename2 = StringToTemplateFile("{another valid template}");
+
+ const Template* cache1_tpl1 = cache1.GetTemplate(filename1,
+ STRIP_WHITESPACE);
+ assert(cache1_tpl1);
+ const Template* cache1_tpl2 = cache1.GetTemplate(filename2,
+ STRIP_WHITESPACE);
+ assert(cache1_tpl2);
+
+ // Check refcount. It should be 2 -- one for the originalvalue
+ // when it's constructed, and one for the call to GetTemplate.
+ TemplateCachePeer::TemplateCacheKey cache_key1(filename1, STRIP_WHITESPACE);
+ ASSERT(cache_peer.Refcount(cache_key1) == 2);
+ TemplateCachePeer::TemplateCacheKey cache_key2(filename2, STRIP_WHITESPACE);
+ ASSERT(cache_peer.Refcount(cache_key2) == 2);
+
+ // Clone cache2 from cache1
+ TemplateCache* cache2 = cache1.Clone();
+ TemplateCachePeer cache_peer2(cache2);
+
+ // Check refcount was incremented. It should be the same for both caches.
+ ASSERT(cache_peer.Refcount(cache_key1) == 3);
+ ASSERT(cache_peer2.Refcount(cache_key1) == 3);
+ ASSERT(cache_peer.Refcount(cache_key2) == 3);
+ ASSERT(cache_peer2.Refcount(cache_key2) == 3);
+
+ // Check that the template ptrs in both caches are the same.
+ const Template* cache2_tpl1 = cache2->GetTemplate(filename1,
+ STRIP_WHITESPACE);
+ const Template* cache2_tpl2 = cache2->GetTemplate(filename2,
+ STRIP_WHITESPACE);
+ ASSERT(cache2_tpl1 == cache1_tpl1);
+ ASSERT(cache2_tpl2 == cache1_tpl2);
+
+ // GetTemplate should have augmented the refcount.
+ ASSERT(cache_peer.Refcount(cache_key1) == 4);
+ ASSERT(cache_peer2.Refcount(cache_key1) == 4);
+ ASSERT(cache_peer.Refcount(cache_key2) == 4);
+ ASSERT(cache_peer2.Refcount(cache_key2) == 4);
+
+ // Change tpl1 file contents and reload.
+ StringToFile("{file1 contents changed}", filename1);
+ cache2->ReloadAllIfChanged(TemplateCache::LAZY_RELOAD);
+ // Since the template will be reloaded into a new instance,
+ // GetTemplate will return new pointers. The older template
+ // pointer was moved to the freelist.
+ const Template* cache2_tpl1_post_reload = cache2->GetTemplate(
+ filename1, STRIP_WHITESPACE);
+ ASSERT(cache2_tpl1_post_reload != cache2_tpl1);
+ // Check that cache1's tpl1 has the new contents
+ AssertExpandIs(cache2_tpl1_post_reload, &dict, "{file1 contents changed}",
+ true);
+
+ // Ensure tpl2 is unchanged
+ const Template* cache2_tpl2_post_reload = cache2->GetTemplate(
+ filename2, STRIP_WHITESPACE);
+ ASSERT(cache2_tpl2_post_reload == cache2_tpl2);
+
+ // Now key1 points to different templates in cache1 and cache2.
+ // cache1's version should have a refcount of 3 (was 4, went down
+ // by 1 when cache2 dropped its reference to it). cache2's
+ // version should be 2 (one for the new file, 1 for the call to
+ // GetTemplate() that followed it), while key2 should have a
+ // refcount of 5 in both caches (due to the new call, above, to
+ // GetTemplate()).
+ ASSERT(cache_peer.Refcount(cache_key1) == 3);
+ ASSERT(cache_peer2.Refcount(cache_key1) == 2);
+ ASSERT(cache_peer.Refcount(cache_key2) == 5);
+ ASSERT(cache_peer2.Refcount(cache_key2) == 5);
+
+ const int old_delete_count = cache_peer.NumTotalTemplateDeletes();
+
+ // Clear up the cache2's freelist, this should drop all refcounts,
+ // due to the calls cache_peer2 made to
+ // GetTemplate(the-old-filename1), GetTemplate(the-new-filename1),
+ // and GetTemplate(filename2) (twice!)
+ cache_peer2.DoneWithGetTemplatePtrs();
+ ASSERT(cache_peer.Refcount(cache_key1) == 2);
+ ASSERT(cache_peer2.Refcount(cache_key1) == 1);
+ ASSERT(cache_peer.Refcount(cache_key2) == 3);
+ ASSERT(cache_peer2.Refcount(cache_key2) == 3);
+
+ // Make sure that deleting from the cache causes deletion.
+ // ClearCache() on peer1 should finally get rid of the old filename1.
+ cache_peer.ClearCache();
+ ASSERT(cache_peer.NumTotalTemplateDeletes() == old_delete_count + 1);
+ cache_peer2.ClearCache();
+ // Delete-count should go up by 2 as both the new tpl1, and tpl2, go away.
+ ASSERT(cache_peer.NumTotalTemplateDeletes() == old_delete_count + 3);
+
+ delete cache2;
+ }
+
+ static void TestDoneWithGetTemplatePtrs() {
+ TemplateCache cache1;
+ TemplateCachePeer cache_peer1(&cache1);
+ TemplateDictionary dict("dict");
+
+ // Add templates
+ string fname = StringToTemplateFile("{valid template}");
+ TemplateCachePeer::TemplateCacheKey cache_key(fname, STRIP_WHITESPACE);
+ string out;
+
+ int old_delete_count = cache_peer1.NumTotalTemplateDeletes();
+
+ // OK, let's get the templates in the cache.
+ ASSERT(cache1.ExpandWithData(fname, STRIP_WHITESPACE, &dict, NULL, &out));
+ // This should not have changed the delete-count.
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == old_delete_count);
+ // And the refcount should be 1.
+ ASSERT(cache_peer1.Refcount(cache_key) == 1);
+ // Same holds if we expand again.
+ ASSERT(cache1.ExpandWithData(fname, STRIP_WHITESPACE, &dict, NULL, &out));
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == old_delete_count);
+ ASSERT(cache_peer1.Refcount(cache_key) == 1);
+
+ // Now we delete from the cache. Should up the delete_count.
+ ASSERT(cache1.Delete(fname));
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+
+ // Calling DoneWithGetTemplatePtrs() should be a noop -- we
+ // haven't called GetTemplate() yet.
+ cache1.DoneWithGetTemplatePtrs();
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == old_delete_count);
+
+ // Now do the same thing, but throw in a GetTemplate(). Now
+ // DoneWithGetTemplatePtrs() should still cause a delete, but only
+ // after a call to Delete() deletes the cache's refcount too.
+ ASSERT(cache1.ExpandWithData(fname, STRIP_WHITESPACE, &dict, NULL, &out));
+ cache1.GetTemplate(fname, STRIP_WHITESPACE);
+ cache1.DoneWithGetTemplatePtrs();
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == old_delete_count);
+ ASSERT(cache1.Delete(fname));
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+ cache1.ClearCache();
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == old_delete_count);
+
+ // Now load in a replacement. The loading itself should cause a
+ // delete (no GetTemplate calls, so no need to involve the freelist).
+ ASSERT(cache1.ExpandWithData(fname, STRIP_WHITESPACE, &dict, NULL, &out));
+ StringToFile("{file1 contents changed}", fname);
+ cache1.ReloadAllIfChanged(TemplateCache::IMMEDIATE_RELOAD);
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+ // DoneWithGetTemplatePtrs() should just be a noop.
+ cache1.DoneWithGetTemplatePtrs();
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == old_delete_count);
+ // Delete the new version of fname too!
+ cache1.Delete(fname);
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+
+ // Now load in a replacement, but having done a GetTemplate() first.
+ // We need DoneWithGetTemplatePtrs() to delete, in this case.
+ ASSERT(cache1.ExpandWithData(fname, STRIP_WHITESPACE, &dict, NULL, &out));
+ cache1.GetTemplate(fname, STRIP_WHITESPACE);
+ ASSERT(cache_peer1.Refcount(cache_key) == 2);
+ StringToFile("{file1 contents changed}", fname);
+ cache1.ReloadAllIfChanged(TemplateCache::IMMEDIATE_RELOAD);
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == old_delete_count);
+ cache1.DoneWithGetTemplatePtrs();
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+ // Delete the new version of fname too!
+ cache1.Delete(fname);
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+
+ // Add a Clone() into the mix. Now Delete() calls, even from both
+ // caches, won't up the delete-count until we DoneWithGetTemplatePtrs()
+ // -- but only from the cache that called GetTemplate().
+ ASSERT(cache1.ExpandWithData(fname, STRIP_WHITESPACE, &dict, NULL, &out));
+ cache1.GetTemplate(fname, STRIP_WHITESPACE);
+ ASSERT(cache_peer1.Refcount(cache_key) == 2);
+ {
+ TemplateCache* cache2 = cache1.Clone();
+ TemplateCachePeer cache_peer2(cache2);
+ ASSERT(cache_peer1.Refcount(cache_key) == 3);
+ ASSERT(cache_peer2.Refcount(cache_key) == 3);
+ // Do all sorts of Delete()s.
+ StringToFile("{file1 contents changed}", fname);
+ cache1.ReloadAllIfChanged(TemplateCache::IMMEDIATE_RELOAD);
+ ASSERT(cache_peer1.Refcount(cache_key) == 1); // the new file
+ ASSERT(cache_peer2.Refcount(cache_key) == 2); // the old file
+ cache2->ReloadAllIfChanged(TemplateCache::IMMEDIATE_RELOAD);
+ // Each cache has a different copy of the new file.
+ ASSERT(cache_peer1.Refcount(cache_key) == 1); // the new file
+ ASSERT(cache_peer2.Refcount(cache_key) == 1); // the new file
+ ASSERT(cache1.Delete(fname)); // should delete the new file
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+ ASSERT(cache2->Delete(fname));
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+ cache2->DoneWithGetTemplatePtrs();
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == old_delete_count);
+ cache1.DoneWithGetTemplatePtrs();
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+ cache1.ClearCache();
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == old_delete_count);
+ delete cache2;
+ }
+
+ // If we call DoneWithGetTemplatePtrs() while a clone points to the
+ // template, it won't delete the template yet.
+ ASSERT(cache1.ExpandWithData(fname, STRIP_WHITESPACE, &dict, NULL, &out));
+ {
+ TemplateCache* cache2 = cache1.Clone();
+ TemplateCachePeer cache_peer2(cache2);
+ StringToFile("{file1 contents changed}", fname);
+ cache1.ReloadAllIfChanged(TemplateCache::IMMEDIATE_RELOAD);
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == old_delete_count);
+ delete cache2;
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+ }
+ cache1.ClearCache();
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+
+ // If we throw an explicit GetTemplate() in, we still need
+ // DoneWithGetTemplatePtrs().
+ ASSERT(cache1.ExpandWithData(fname, STRIP_WHITESPACE, &dict, NULL, &out));
+ cache1.GetTemplate(fname, STRIP_WHITESPACE);
+ {
+ TemplateCache* cache2 = cache1.Clone();
+ TemplateCachePeer cache_peer2(cache2);
+ StringToFile("{file1 contents changed}", fname);
+ cache1.ReloadAllIfChanged(TemplateCache::IMMEDIATE_RELOAD);
+ cache1.DoneWithGetTemplatePtrs();
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == old_delete_count);
+ delete cache2;
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+ }
+ cache1.ClearCache();
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+
+ // Multiple GetTemplate()s should still all be cleared by
+ // DoneWithGetTemplatePtrs().
+ ASSERT(cache1.ExpandWithData(fname, STRIP_WHITESPACE, &dict, NULL, &out));
+ ASSERT(cache1.ExpandWithData(fname, STRIP_WHITESPACE, &dict, NULL, &out));
+ cache1.GetTemplate(fname, STRIP_WHITESPACE);
+ cache1.GetTemplate(fname, STRIP_WHITESPACE);
+ ASSERT(cache_peer1.Refcount(cache_key) == 3);
+ StringToFile("{file1 contents changed}", fname);
+ cache1.ReloadAllIfChanged(TemplateCache::IMMEDIATE_RELOAD);
+ cache1.DoneWithGetTemplatePtrs();
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+ cache1.ClearCache();
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+
+ // Calling ClearCache() deletes old templates too -- we don't even
+ // need to change the content.
+ ASSERT(cache1.ExpandWithData(fname, STRIP_WHITESPACE, &dict, NULL, &out));
+ cache1.GetTemplate(fname, STRIP_WHITESPACE);
+ cache1.GetTemplate(fname, STRIP_WHITESPACE);
+ cache1.ClearCache();
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+
+ // So does deleting the cache object.
+ ASSERT(cache1.ExpandWithData(fname, STRIP_WHITESPACE, &dict, NULL, &out));
+ {
+ TemplateCache* cache2 = cache1.Clone();
+ TemplateCachePeer cache_peer2(cache2);
+ ASSERT(cache_peer1.Refcount(cache_key) == 2);
+ cache2->GetTemplate(fname, STRIP_WHITESPACE);
+ ASSERT(cache_peer1.Refcount(cache_key) == 3);
+ ASSERT(cache_peer2.Refcount(cache_key) == 3);
+ ASSERT(cache1.Delete(fname));
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == old_delete_count);
+ ASSERT(cache_peer2.Refcount(cache_key) == 2);
+ delete cache2;
+ }
+ ASSERT(cache_peer1.NumTotalTemplateDeletes() == ++old_delete_count);
+ }
+
+ static void TestCloneStringTemplates() {
+ TemplateCache cache1;
+
+ // Create & insert a string template
+ const string cache_key_a = "cache key a";
+ const string text = "Test template 1";
+ TemplateDictionary empty_dict("dict");
+
+ ASSERT(cache1.StringToTemplateCache(cache_key_a, text, DO_NOT_STRIP));
+
+ // Clone cache2 from cache1
+ TemplateCache* cache2 = cache1.Clone();
+
+ // Check that the string template was copied into cache2
+ const Template* cache2_tpl = cache2->GetTemplate(cache_key_a,
+ DO_NOT_STRIP);
+ ASSERT(cache2_tpl);
+ AssertExpandIs(cache2_tpl, &empty_dict, text, true);
+
+ delete cache2;
+ }
+
+ static void TestInclude() {
+ TemplateCache cache;
+ string incname = StringToTemplateFile("include & print file\n");
+ string tpl_file = StringToTemplateFile("hi {{>INC:h}} bar\n");
+ const Template* tpl = cache.GetTemplate(tpl_file, DO_NOT_STRIP);
+ ASSERT(tpl);
+
+ TemplateDictionary dict("dict");
+ AssertExpandWithCacheIs(&cache, tpl_file, DO_NOT_STRIP, &dict, NULL,
+ "hi bar\n", true);
+ dict.AddIncludeDictionary("INC")->SetFilename(incname);
+ AssertExpandWithCacheIs(&cache, tpl_file, DO_NOT_STRIP, &dict, NULL,
+ "hi include & print file bar\n",
+ true);
+ }
+
+ // Make sure we don't deadlock when a template includes itself.
+ // This also tests we handle recursive indentation properly.
+ static void TestRecursiveInclude() {
+ TemplateCache cache;
+ string incname = StringToTemplateFile("hi {{>INC}} bar\n {{>INC}}!");
+ const Template* tpl = cache.GetTemplate(incname, DO_NOT_STRIP);
+ ASSERT(tpl);
+ TemplateDictionary dict("dict");
+ dict.AddIncludeDictionary("INC")->SetFilename(incname);
+ // Note the last line is indented 4 spaces instead of 2. This is
+ // because the last sub-include is indented.
+ AssertExpandWithCacheIs(&cache, incname, DO_NOT_STRIP, &dict, NULL,
+ "hi hi bar\n ! bar\n hi bar\n !!",
+ true);
+ }
+
+ static void TestStringTemplateInclude() {
+ const string cache_key = "TestStringTemplateInclude";
+ const string cache_key_inc = "TestStringTemplateInclude-inc";
+ const string text = "<html>{{>INC}}</html>";
+ const string text_inc = "<div>\n<p>\nUser {{USER}}\n</div>";
+
+ TemplateCache cache;
+ ASSERT(cache.StringToTemplateCache(cache_key, text, DO_NOT_STRIP));
+ ASSERT(cache.StringToTemplateCache(cache_key_inc, text_inc, DO_NOT_STRIP));
+
+ const Template *tpl = cache.GetTemplate(cache_key, DO_NOT_STRIP);
+ ASSERT(tpl);
+
+ TemplateDictionary dict("dict");
+ TemplateDictionary* sub_dict = dict.AddIncludeDictionary("INC");
+ sub_dict->SetFilename(cache_key_inc);
+
+ sub_dict->SetValue("USER", "John<>Doe");
+ string expected = "<html><div>\n<p>\nUser John<>Doe\n</div></html>";
+ AssertExpandWithCacheIs(&cache, cache_key, DO_NOT_STRIP, &dict, NULL,
+ expected, true);
+ }
+
+ static void TestTemplateString() {
+ TemplateCache cache;
+ ASSERT(cache.StringToTemplateCache(kKey, kContent, DO_NOT_STRIP));
+ const Template *tpl = cache.GetTemplate(kKey, DO_NOT_STRIP);
+ ASSERT(tpl);
+
+ TemplateDictionary dict("dict");
+ AssertExpandWithCacheIs(&cache, "MY_KEY", DO_NOT_STRIP, &dict, NULL,
+ "content", true);
+
+ // Try retrieving with a char* rather than a TemplateString*.
+ tpl = cache.GetTemplate("MY_KEY", DO_NOT_STRIP);
+ ASSERT(tpl);
+ AssertExpandWithCacheIs(&cache, "MY_KEY", DO_NOT_STRIP, &dict, NULL,
+ "content", true);
+
+ // Delete with a char* rather than a TemplateString*.
+ cache.Delete("MY_KEY");
+ tpl = cache.GetTemplate("MY_KEY", DO_NOT_STRIP);
+ ASSERT(!tpl);
+
+ ASSERT(cache.StringToTemplateCache("MY_KEY", "content", DO_NOT_STRIP));
+ tpl = cache.GetTemplate(kKey, DO_NOT_STRIP);
+ ASSERT(tpl);
+ cache.Delete(kKey);
+ tpl = cache.GetTemplate("MY_KEY", DO_NOT_STRIP);
+ ASSERT(!tpl);
+ }
+
+ static void TestFreeze() {
+ TemplateCache cache;
+ TemplateDictionary dict("dict");
+
+ // Load some templates
+ string filename1 = StringToTemplateFile("{valid template}");
+ string filename2 = StringToTemplateFile("hi {{>INC:h}} bar\n");
+
+ const Template* cache_tpl1 = cache.GetTemplate(filename1, STRIP_WHITESPACE);
+ assert(cache_tpl1);
+ AssertExpandIs(cache_tpl1, &dict, "{valid template}", true);
+ const Template* cache_tpl2 = cache.GetTemplate(filename2, DO_NOT_STRIP);
+ assert(cache_tpl2);
+ static_cast<void>(cache_tpl2); // avoid unused var warning in opt mode
+ AssertExpandWithCacheIs(&cache, filename2, DO_NOT_STRIP, &dict, NULL,
+ "hi bar\n", true);
+
+ // Set the root directory
+ const string pathA = PathJoin(FLAGS_test_tmpdir, "a/");
+ CreateOrCleanTestDir(pathA);
+ cache.SetTemplateRootDirectory(pathA);
+ ASSERT(cache.template_root_directory() == pathA);
+
+ // Freeze the cache now, and test its impact.
+ cache.Freeze();
+
+ // 1. Loading new templates fails.
+ string filename3 = StringToTemplateFile("{yet another valid template}");
+ const Template* cache_tpl3 = cache.GetTemplate(filename3, STRIP_WHITESPACE);
+ assert(!cache_tpl3);
+ static_cast<void>(cache_tpl3); // avoid unused var warning in opt mode
+
+ // 2. Reloading existing templates fails.
+ StringToFile("{file1 contents changed}", filename1);
+ cache.ReloadAllIfChanged(TemplateCache::LAZY_RELOAD);
+ const Template* cache_tpl1_post_reload = cache.GetTemplate(
+ filename1, STRIP_WHITESPACE);
+ ASSERT(cache_tpl1_post_reload == cache_tpl1);
+ // Check that cache's tpl1 has the same old contents
+ AssertExpandIs(cache_tpl1_post_reload, &dict, "{valid template}",
+ true);
+ // 3. Cannot delete from a frozen cache.
+ cache.Delete(filename1);
+ ASSERT(cache.GetTemplate(filename1, STRIP_WHITESPACE));
+
+ // 4. Expand won't load an included template on-demand.
+ string incname = StringToTemplateFile("include & print file\n");
+ dict.AddIncludeDictionary("INC")->SetFilename(incname);
+ AssertExpandWithCacheIs(&cache, filename2, DO_NOT_STRIP, &dict, NULL,
+ "hi bar\n", false);
+
+ // 5. Cannot change template root directory.
+ const string pathB = PathJoin(FLAGS_test_tmpdir, "b/");
+ CreateOrCleanTestDir(pathB);
+ cache.SetTemplateRootDirectory(pathB);
+ ASSERT(cache.template_root_directory() == pathA); // Still the old path
+
+ CreateOrCleanTestDir(pathA);
+ CreateOrCleanTestDir(pathB);
+ }
+};
+
+
+int main(int argc, char** argv) {
+
+ CreateOrCleanTestDirAndSetAsTmpdir(FLAGS_test_tmpdir);
+
+ TemplateCacheUnittest::TestGetTemplate();
+ TemplateCacheUnittest::TestLoadTemplate();
+ TemplateCacheUnittest::TestStringGetTemplate();
+ TemplateCacheUnittest::TestStringToTemplateCacheWithStrip();
+ TemplateCacheUnittest::TestExpandNoLoad();
+ TemplateCacheUnittest::TestTemplateSearchPath();
+ TemplateCacheUnittest::TestDelete();
+ TemplateCacheUnittest::TestTemplateCache();
+ TemplateCacheUnittest::TestReloadAllIfChangedLazyLoad();
+ TemplateCacheUnittest::TestReloadAllIfChangedImmediateLoad();
+ TemplateCacheUnittest::TestReloadImmediateWithDifferentSearchPaths();
+ TemplateCacheUnittest::TestReloadLazyWithDifferentSearchPaths();
+ TemplateCacheUnittest::TestRefcounting();
+ TemplateCacheUnittest::TestDoneWithGetTemplatePtrs();
+ TemplateCacheUnittest::TestCloneStringTemplates();
+ TemplateCacheUnittest::TestInclude();
+ TemplateCacheUnittest::TestRecursiveInclude();
+ TemplateCacheUnittest::TestStringTemplateInclude();
+ TemplateCacheUnittest::TestTemplateString();
+ TemplateCacheUnittest::TestFreeze();
+
+ printf("DONE\n");
+ return 0;
+}
diff --git a/src/tests/template_dictionary_unittest.cc b/src/tests/template_dictionary_unittest.cc
new file mode 100644
index 0000000..f524a21
--- /dev/null
+++ b/src/tests/template_dictionary_unittest.cc
@@ -0,0 +1,1012 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// This code is written to not use the google testing framework
+// as much as possible, to make it easier to opensource.
+
+#include "config_for_unittests.h"
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <vector>
+#include "base/arena.h"
+#include <ctemplate/template_dictionary.h>
+#include <ctemplate/template_modifiers.h>
+#include <ctemplate/per_expand_data.h>
+#include "tests/template_test_util.h"
+#include "base/util.h"
+TEST_INIT // defines RUN_ALL_TESTS
+
+using std::string;
+using std::vector;
+using GOOGLE_NAMESPACE::UnsafeArena;
+using GOOGLE_NAMESPACE::DO_NOT_STRIP;
+using GOOGLE_NAMESPACE::ExpandEmitter;
+using GOOGLE_NAMESPACE::PerExpandData;
+using GOOGLE_NAMESPACE::StaticTemplateString;
+using GOOGLE_NAMESPACE::StringToTemplateCache;
+using GOOGLE_NAMESPACE::TemplateDictionary;
+using GOOGLE_NAMESPACE::TemplateDictionaryInterface;
+using GOOGLE_NAMESPACE::TemplateDictionaryPeer;
+using GOOGLE_NAMESPACE::TemplateString;
+
+#define ASSERT_STRSTR(text, substr) do { \
+ if (!strstr((text), (substr))) { \
+ printf("%s: %d: ASSERT FAILED: '%s' not in '%s'\n", \
+ __FILE__, __LINE__, (substr), (text)); \
+ assert(strstr((text), (substr))); \
+ exit(1); \
+ } \
+} while (0)
+
+
+// test escape-functor that replaces all input with "foo"
+class FooEscaper : public GOOGLE_NAMESPACE::TemplateModifier {
+ public:
+ void Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* outbuf, const string& arg) const {
+ assert(arg.empty()); // we don't take an argument
+ outbuf->Emit("foo");
+ }
+};
+
+// test escape-functor that replaces all input with ""
+class NullEscaper : public GOOGLE_NAMESPACE::TemplateModifier {
+ public:
+ void Modify(const char* in, size_t inlen,
+ const PerExpandData*,
+ ExpandEmitter* outbuf, const string& arg) const {
+ assert(arg.empty()); // we don't take an argument
+ }
+};
+
+// first does javascript-escaping, then html-escaping
+class DoubleEscaper : public GOOGLE_NAMESPACE::TemplateModifier {
+ public:
+ void Modify(const char* in, size_t inlen,
+ const PerExpandData* data,
+ ExpandEmitter* outbuf, const string& arg) const {
+ assert(arg.empty()); // we don't take an argument
+ string tmp = GOOGLE_NAMESPACE::javascript_escape(in, inlen);
+ GOOGLE_NAMESPACE::html_escape.Modify(tmp.data(), tmp.size(), data, outbuf, "");
+ }
+};
+
+namespace {
+
+static const TemplateDictionary* GetSectionDict(
+ const TemplateDictionary* d, const char* name, int i) {
+ TemplateDictionaryPeer peer(d);
+ vector<const TemplateDictionary*> dicts;
+ EXPECT_GE(peer.GetSectionDictionaries(name, &dicts), i);
+ return dicts[i];
+}
+static const TemplateDictionary* GetIncludeDict(
+ const TemplateDictionary* d, const char* name, int i) {
+ TemplateDictionaryPeer peer(d);
+ vector<const TemplateDictionary*> dicts;
+ EXPECT_GE(peer.GetIncludeDictionaries(name, &dicts), i);
+ return dicts[i];
+}
+
+static void SetUp() {
+ TemplateDictionary::SetGlobalValue("GLOBAL", "top");
+}
+
+TEST(TemplateDictionary, SetValueAndTemplateStringAndArena) {
+ // Try both with the arena, and without.
+ UnsafeArena arena(100);
+ // We run the test with arena twice to double-check we don't ever delete it
+ UnsafeArena* arenas[] = {&arena, &arena, NULL};
+ for (int i = 0; i < sizeof(arenas)/sizeof(*arenas); ++i) {
+ TemplateDictionary dict(string("test_arena") + char('0'+i), arenas[i]);
+
+ // Test copying char*s, strings, and explicit TemplateStrings
+ dict.SetValue("FOO", "foo");
+ dict.SetValue(string("FOO2"), TemplateString("foo2andmore", 4));
+ dict["FOO3"] = "foo3";
+ dict[string("FOO4")] = TemplateString("foo4andmore", 4);
+ dict["FOO5"] = string("Olaf");
+ dict["FOO6"] = 6;
+ dict["FOO7"] = long(7);
+
+ TemplateDictionaryPeer peer(&dict);
+ // verify what happened
+ EXPECT_TRUE(peer.ValueIs("FOO", "foo"));
+ EXPECT_TRUE(peer.ValueIs("FOO2", "foo2"));
+ string dump;
+ dict.DumpToString(&dump);
+ char expected[256];
+ snprintf(expected, sizeof(expected),
+ ("global dictionary {\n"
+ " BI_NEWLINE: >\n"
+ "<\n"
+ " BI_SPACE: > <\n"
+ " GLOBAL: >top<\n"
+ "};\n"
+ "dictionary 'test_arena%d' {\n"
+ " FOO: >foo<\n"
+ " FOO2: >foo2<\n"
+ " FOO3: >foo3<\n"
+ " FOO4: >foo4<\n"
+ " FOO5: >Olaf<\n"
+ " FOO6: >6<\n"
+ " FOO7: >7<\n"
+ "}\n"), i);
+ EXPECT_STREQ(dump.c_str(), expected);
+ }
+}
+
+TEST(TemplateDictionary, SetValueWithoutCopy) {
+ UnsafeArena arena(100);
+ TemplateDictionary dict("Test arena", &arena);
+
+ char value[32];
+ snprintf(value, sizeof(value), "%s", "value");
+
+ const void* const ptr = arena.Alloc(0);
+ dict.SetValueWithoutCopy("key", value);
+ // We shouldn't have copied the value string.
+ EXPECT_EQ(ptr, arena.Alloc(0));
+
+ TemplateDictionaryPeer peer(&dict);
+ EXPECT_TRUE(peer.ValueIs("key", "value"));
+ // If our content changes, so does what's in the dictionary -- but
+ // only the contents of the buffer, not its length!
+ snprintf(value, sizeof(value), "%s", "not_value");
+ EXPECT_TRUE(peer.ValueIs("key", "not_v")); // sizeof("not_v") == sizeof("value")
+}
+
+TEST(TemplateDictionary, SetIntValue) {
+ TemplateDictionary dict("test_SetIntValue", NULL);
+ TemplateDictionaryPeer peer(&dict);
+
+ dict.SetIntValue("INT", 5);
+ // - is an illegal varname in templates, but perfectly fine in dicts
+ dict.SetIntValue("-INT", -5);
+
+ EXPECT_TRUE(peer.ValueIs("INT", "5"));
+ EXPECT_TRUE(peer.ValueIs("-INT", "-5"));
+ string dump;
+ dict.DumpToString(&dump);
+ ASSERT_STRSTR(dump.c_str(), "\n INT: >5<\n");
+ ASSERT_STRSTR(dump.c_str(), "\n -INT: >-5<\n");
+
+}
+
+TEST(TemplateDictionary, SetFormattedValue) {
+ TemplateDictionary dict("test_SetFormattedValue", NULL);
+ TemplateDictionaryPeer peer(&dict);
+
+ dict.SetFormattedValue(TemplateString("PRINTF", sizeof("PRINTF")-1),
+ "%s test %04d", "template test", 1);
+
+ EXPECT_TRUE(peer.ValueIs("PRINTF", "template test test 0001"));
+ string dump;
+ dict.DumpToString(&dump);
+ ASSERT_STRSTR(dump.c_str(), "\n PRINTF: >template test test 0001<\n");
+
+ // Now test something of size 4k or so, where we can't use scratchbuf
+ dict.SetFormattedValue(TemplateString("PRINTF", sizeof("PRINTF")-1),
+ "%s test %04444d", "template test", 2);
+ string expected("template test test ");
+ for (int i = 0; i < 4443; ++i)
+ expected.append("0");
+ expected.append("2");
+ EXPECT_TRUE(peer.ValueIs("PRINTF", expected));
+ string dump2;
+ dict.DumpToString(&dump2);
+ expected = string("\n PRINTF: >") + expected + string("<\n");
+ ASSERT_STRSTR(dump2.c_str(), expected.c_str());
+}
+
+TEST(TemplateDictionary, SetEscapedValue) {
+ TemplateDictionary dict("test_SetEscapedValue", NULL);
+ TemplateDictionaryPeer peer(&dict);
+
+ dict.SetEscapedValue("hardest HTML",
+ "<A HREF='foo'\nid=\"bar\t\t&&\vbaz\">",
+ GOOGLE_NAMESPACE::html_escape);
+ dict.SetEscapedValue("hardest JS",
+ ("f = 'foo';\r\n\tprint \"\\&foo = \b\", \"foo\""),
+ GOOGLE_NAMESPACE::javascript_escape);
+ dict.SetEscapedValue("query escape 0", "",
+ GOOGLE_NAMESPACE::url_query_escape);
+
+ EXPECT_TRUE(peer.ValueIs("hardest HTML",
+ "<A HREF='foo' id="bar && "
+ "baz">"));
+ EXPECT_TRUE(peer.ValueIs("hardest JS",
+ "f \\x3d \\x27foo\\x27;\\r\\n\\tprint \\x22\\\\\\x26"
+ "foo \\x3d \\b\\x22, \\x22foo\\x22"));
+ EXPECT_TRUE(peer.ValueIs("query escape 0", ""));
+
+ // Test using hand-made modifiers.
+ FooEscaper foo_escaper;
+ dict.SetEscapedValue("easy foo", "hello there!",
+ FooEscaper());
+ dict.SetEscapedValue("harder foo", "so much to say\nso many foos",
+ foo_escaper);
+ DoubleEscaper double_escaper;
+ dict.SetEscapedValue("easy double", "doo",
+ double_escaper);
+ dict.SetEscapedValue("harder double", "<A HREF='foo'>\n",
+ DoubleEscaper());
+ dict.SetEscapedValue("hardest double",
+ "print \"<A HREF='foo'>\";\r\n\\1;",
+ double_escaper);
+
+ EXPECT_TRUE(peer.ValueIs("easy foo", "foo"));
+ EXPECT_TRUE(peer.ValueIs("harder foo", "foo"));
+ EXPECT_TRUE(peer.ValueIs("easy double", "doo"));
+ EXPECT_TRUE(peer.ValueIs("harder double",
+ "\\x3cA HREF\\x3d\\x27foo\\x27\\x3e\\n"));
+ EXPECT_TRUE(peer.ValueIs("hardest double",
+ "print \\x22\\x3cA HREF\\x3d\\x27foo\\x27\\x3e\\x22;"
+ "\\r\\n\\\\1;"));
+}
+
+TEST(TemplateDictionary, SetEscapedFormattedValue) {
+ TemplateDictionary dict("test_SetEscapedFormattedValue", NULL);
+ TemplateDictionaryPeer peer(&dict);
+
+ dict.SetEscapedFormattedValue("HTML", GOOGLE_NAMESPACE::html_escape,
+ "This is <%s> #%.4f", "a & b", 1.0/3);
+ dict.SetEscapedFormattedValue("PRE", GOOGLE_NAMESPACE::pre_escape,
+ "if %s x = %.4f;", "(a < 1 && b > 2)\n\t", 1.0/3);
+ dict.SetEscapedFormattedValue("URL", GOOGLE_NAMESPACE::url_query_escape,
+ "pageviews-%s", "r?egex");
+ dict.SetEscapedFormattedValue("XML", GOOGLE_NAMESPACE::xml_escape,
+ "This&is%s -- ok?", "just&");
+
+ EXPECT_TRUE(peer.ValueIs("HTML",
+ "This is <a & b> #0.3333"));
+ EXPECT_TRUE(peer.ValueIs("PRE",
+ "if (a < 1 && b > 2)\n\t x = 0.3333;"));
+ EXPECT_TRUE(peer.ValueIs("URL", "pageviews-r%3Fegex"));
+
+ EXPECT_TRUE(peer.ValueIs("XML", "This&isjust& -- ok?"));
+}
+
+static const StaticTemplateString kSectName =
+ STS_INIT(kSectName, "test_SetAddSectionDictionary");
+
+TEST(TemplateDictionary, AddSectionDictionary) {
+ // For fun, we'll make this constructor take a static template string.
+ TemplateDictionary dict(kSectName, NULL);
+ TemplateDictionaryPeer peer(&dict);
+ dict.SetValue("TOPLEVEL", "foo");
+ dict.SetValue("TOPLEVEL2", "foo2");
+
+ TemplateDictionary* subdict_1a = dict.AddSectionDictionary("section1");
+ // This is the same dict, but name is specified a different way.
+ TemplateDictionary* subdict_1b = dict.AddSectionDictionary(
+ TemplateString("section1__ignored__", strlen("section1")));
+ TemplateDictionaryPeer subdict_1a_peer(subdict_1a);
+ TemplateDictionaryPeer subdict_1b_peer(subdict_1b);
+ subdict_1a->SetValue("SUBLEVEL", "subfoo");
+ subdict_1b->SetValue("SUBLEVEL", "subbar");
+
+ TemplateDictionary* subdict_2 = dict.AddSectionDictionary("section2");
+ TemplateDictionaryPeer subdict_2_peer(subdict_2);
+ subdict_2->SetValue("TOPLEVEL", "bar"); // overriding top dict
+ TemplateDictionary* subdict_2_1 = subdict_2->AddSectionDictionary("sub");
+ TemplateDictionaryPeer subdict_2_1_peer(subdict_2_1);
+ subdict_2_1->SetIntValue("GLOBAL", 21); // overrides value in setUp()
+
+ // Verify that all variables that should be look-up-able are, and that
+ // we have proper precedence.
+ EXPECT_TRUE(peer.ValueIs("GLOBAL", "top"));
+ EXPECT_TRUE(peer.ValueIs("TOPLEVEL", "foo"));
+ EXPECT_TRUE(peer.ValueIs("TOPLEVEL2", "foo2"));
+ EXPECT_TRUE(peer.ValueIs("SUBLEVEL", ""));
+
+ EXPECT_TRUE(subdict_1a_peer.ValueIs("GLOBAL", "top"));
+ EXPECT_TRUE(subdict_1a_peer.ValueIs("TOPLEVEL", "foo"));
+ EXPECT_TRUE(subdict_1a_peer.ValueIs("TOPLEVEL2", "foo2"));
+ EXPECT_TRUE(subdict_1a_peer.ValueIs("SUBLEVEL", "subfoo"));
+
+ EXPECT_TRUE(subdict_1b_peer.ValueIs("GLOBAL", "top"));
+ EXPECT_TRUE(subdict_1b_peer.ValueIs("TOPLEVEL", "foo"));
+ EXPECT_TRUE(subdict_1b_peer.ValueIs("TOPLEVEL2", "foo2"));
+ EXPECT_TRUE(subdict_1b_peer.ValueIs("SUBLEVEL", "subbar"));
+
+ EXPECT_TRUE(subdict_2_peer.ValueIs("GLOBAL", "top"));
+ EXPECT_TRUE(subdict_2_peer.ValueIs("TOPLEVEL", "bar"));
+ EXPECT_TRUE(subdict_2_peer.ValueIs("TOPLEVEL2", "foo2"));
+ EXPECT_TRUE(subdict_2_peer.ValueIs("SUBLEVEL", ""));
+
+ EXPECT_TRUE(subdict_2_1_peer.ValueIs("GLOBAL", "21"));
+ EXPECT_TRUE(subdict_2_1_peer.ValueIs("TOPLEVEL", "bar"));
+ EXPECT_TRUE(subdict_2_1_peer.ValueIs("TOPLEVEL2", "foo2"));
+ EXPECT_TRUE(subdict_2_1_peer.ValueIs("SUBLEVEL", ""));
+
+ // Verify that everyone knows about its sub-dictionaries, and also
+ // that these go 'up the chain' on lookup failure
+ EXPECT_FALSE(peer.IsHiddenSection("section1"));
+ EXPECT_FALSE(peer.IsHiddenSection("section2"));
+ EXPECT_TRUE(peer.IsHiddenSection("section3"));
+ EXPECT_TRUE(peer.IsHiddenSection("sub"));
+ EXPECT_FALSE(subdict_1a_peer.IsHiddenSection("section1"));
+ EXPECT_TRUE(subdict_1a_peer.IsHiddenSection("sub"));
+ EXPECT_FALSE(subdict_2_peer.IsHiddenSection("sub"));
+ EXPECT_FALSE(subdict_2_1_peer.IsHiddenSection("sub"));
+
+ // We should get the dictionary-lengths right as well
+ vector<const TemplateDictionary*> dummy;
+ EXPECT_EQ(2, peer.GetSectionDictionaries("section1", &dummy));
+ EXPECT_EQ(1, peer.GetSectionDictionaries("section2", &dummy));
+ EXPECT_EQ(1, subdict_2_peer.GetSectionDictionaries("sub", &dummy));
+ // Test some of the values
+ EXPECT_TRUE(TemplateDictionaryPeer(GetSectionDict(&dict, "section1", 0))
+ .ValueIs("SUBLEVEL", "subfoo"));
+ EXPECT_TRUE(TemplateDictionaryPeer(GetSectionDict(&dict, "section1", 1))
+ .ValueIs("SUBLEVEL", "subbar"));
+ EXPECT_TRUE(TemplateDictionaryPeer(GetSectionDict(&dict, "section2", 0))
+ .ValueIs("TOPLEVEL", "bar"));
+ EXPECT_TRUE(TemplateDictionaryPeer(
+ GetSectionDict(GetSectionDict(&dict, "section2", 0), "sub", 0))
+ .ValueIs("TOPLEVEL", "bar"));
+ EXPECT_TRUE(TemplateDictionaryPeer(
+ GetSectionDict(GetSectionDict(&dict, "section2", 0), "sub", 0))
+ .ValueIs("GLOBAL", "21"));
+
+ // Make sure we're making descriptive names
+ EXPECT_STREQ(dict.name().c_str(),
+ "test_SetAddSectionDictionary");
+ EXPECT_STREQ(subdict_1a->name().c_str(),
+ "test_SetAddSectionDictionary/section1#1");
+ EXPECT_STREQ(subdict_1b->name().c_str(),
+ "test_SetAddSectionDictionary/section1#2");
+ EXPECT_STREQ(subdict_2->name().c_str(),
+ "test_SetAddSectionDictionary/section2#1");
+ EXPECT_STREQ(subdict_2_1->name().c_str(),
+ "test_SetAddSectionDictionary/section2#1/sub#1");
+
+ // Finally, we can test the whole kit and kaboodle
+ string dump;
+ dict.DumpToString(&dump);
+ const char* const expected =
+ ("global dictionary {\n"
+ " BI_NEWLINE: >\n"
+ "<\n"
+ " BI_SPACE: > <\n"
+ " GLOBAL: >top<\n"
+ "};\n"
+ "dictionary 'test_SetAddSectionDictionary' {\n"
+ " TOPLEVEL: >foo<\n"
+ " TOPLEVEL2: >foo2<\n"
+ " section section1 (dict 1 of 2) -->\n"
+ " dictionary 'test_SetAddSectionDictionary/section1#1' {\n"
+ " SUBLEVEL: >subfoo<\n"
+ " }\n"
+ " section section1 (dict 2 of 2) -->\n"
+ " dictionary 'test_SetAddSectionDictionary/section1#2' {\n"
+ " SUBLEVEL: >subbar<\n"
+ " }\n"
+ " section section2 (dict 1 of 1) -->\n"
+ " dictionary 'test_SetAddSectionDictionary/section2#1' {\n"
+ " TOPLEVEL: >bar<\n"
+ " section sub (dict 1 of 1) -->\n"
+ " dictionary 'test_SetAddSectionDictionary/section2#1/sub#1' {\n"
+ " GLOBAL: >21<\n"
+ " }\n"
+ " }\n"
+ "}\n");
+ EXPECT_STREQ(dump.c_str(), expected);
+}
+
+TEST(TemplateDictionary, ShowSection) {
+ TemplateDictionary dict("test_SetShowSection", NULL);
+ // Let's say what filename dict is associated with
+ dict.SetFilename("bigmamainclude!.tpl");
+ dict.SetValue("TOPLEVEL", "foo");
+ dict.SetValue("TOPLEVEL2", "foo2");
+ dict.ShowSection("section1");
+ dict.ShowSection("section2");
+ // Test calling ShowSection twice on the same section
+ dict.ShowSection("section2");
+ // Test that ShowSection is a no-op if called after AddSectionDictionary()
+ TemplateDictionary* subdict = dict.AddSectionDictionary("section3");
+ TemplateDictionaryPeer subdict_peer(subdict);
+ subdict->SetValue("TOPLEVEL", "bar");
+ dict.ShowSection("section3");
+
+ EXPECT_TRUE(subdict_peer.ValueIs("TOPLEVEL", "bar"));
+
+ // Since ShowSection() doesn't return a sub-dict, the only way to
+ // probe what the dicts look like is via Dump()
+ string dump;
+ dict.DumpToString(&dump);
+ const char* const expected =
+ ("global dictionary {\n"
+ " BI_NEWLINE: >\n"
+ "<\n"
+ " BI_SPACE: > <\n"
+ " GLOBAL: >top<\n"
+ "};\n"
+ "dictionary 'test_SetShowSection (intended for bigmamainclude!.tpl)' {\n"
+ " TOPLEVEL: >foo<\n"
+ " TOPLEVEL2: >foo2<\n"
+ " section section1 (dict 1 of 1) -->\n"
+ " dictionary 'empty dictionary' {\n"
+ " }\n"
+ " section section2 (dict 1 of 1) -->\n"
+ " dictionary 'empty dictionary' {\n"
+ " }\n"
+ " section section3 (dict 1 of 1) -->\n"
+ " dictionary 'test_SetShowSection/section3#1' {\n"
+ " TOPLEVEL: >bar<\n"
+ " }\n"
+ "}\n");
+ EXPECT_STREQ(dump.c_str(), expected);
+}
+
+TEST(TemplateDictionary, SetValueAndShowSection) {
+ TemplateDictionary dict("test_SetValueAndShowSection");
+ TemplateDictionaryPeer peer(&dict);
+ dict.SetValue("TOPLEVEL", "foo");
+
+ dict.SetValueAndShowSection("INSEC", "bar", "SEC1");
+ dict.SetValueAndShowSection("NOTINSEC", "", "SEC2");
+ dict.SetValueAndShowSection("NOTINSEC2", NULL, "SEC3");
+
+ EXPECT_FALSE(peer.IsHiddenSection("SEC1"));
+ EXPECT_TRUE(peer.IsHiddenSection("SEC2"));
+ EXPECT_TRUE(peer.IsHiddenSection("SEC3"));
+
+ // Again, we don't get subdicts, so we have to dump to check values
+ string dump;
+ dict.DumpToString(&dump);
+ const char* const expected =
+ ("global dictionary {\n"
+ " BI_NEWLINE: >\n"
+ "<\n"
+ " BI_SPACE: > <\n"
+ " GLOBAL: >top<\n"
+ "};\n"
+ "dictionary 'test_SetValueAndShowSection' {\n"
+ " TOPLEVEL: >foo<\n"
+ " section SEC1 (dict 1 of 1) -->\n"
+ " dictionary 'test_SetValueAndShowSection/SEC1#1' {\n"
+ " INSEC: >bar<\n"
+ " }\n"
+ "}\n");
+ EXPECT_STREQ(dump.c_str(), expected);
+}
+
+TEST(TemplateDictionary, SetTemplateGlobalValue) {
+ // The functionality involving it passing across the included dictionaries
+ // is also tested in TestAddIncludeDictionary
+ TemplateDictionary dict("test_SetTemplateGlobalValue", NULL);
+ TemplateDictionary* subdict = dict.AddSectionDictionary("section1");
+ TemplateDictionary* subsubdict =
+ subdict->AddSectionDictionary("section1's child");
+ TemplateDictionary* includedict = dict.AddIncludeDictionary("include1");
+
+ TemplateDictionaryPeer peer(&dict);
+ TemplateDictionaryPeer subdict_peer(subdict);
+ TemplateDictionaryPeer subsubdict_peer(subsubdict);
+ TemplateDictionaryPeer includedict_peer(includedict);
+
+ // Setting a template value after sub dictionaries are created should
+ // affect the sub dictionaries as well.
+ dict.SetTemplateGlobalValue("TEMPLATEVAL", "templateval");
+ EXPECT_TRUE(peer.ValueIs("TEMPLATEVAL", "templateval"));
+ EXPECT_TRUE(subdict_peer.ValueIs("TEMPLATEVAL", "templateval"));
+ EXPECT_TRUE(subsubdict_peer.ValueIs("TEMPLATEVAL", "templateval"));
+ EXPECT_TRUE(includedict_peer.ValueIs("TEMPLATEVAL", "templateval"));
+
+ // sub dictionaries after you set the template value should also
+ // get the template value
+ TemplateDictionary* subdict2 = dict.AddSectionDictionary("section2");
+ TemplateDictionary* includedict2 = dict.AddIncludeDictionary("include2");
+ TemplateDictionaryPeer subdict2_peer(subdict2);
+ TemplateDictionaryPeer includedict2_peer(includedict2);
+
+ EXPECT_TRUE(subdict2_peer.ValueIs("TEMPLATEVAL", "templateval"));
+ EXPECT_TRUE(includedict2_peer.ValueIs("TEMPLATEVAL", "templateval"));
+
+ // setting a template value on a sub dictionary should affect all the other
+ // sub dictionaries and the parent as well
+ subdict->SetTemplateGlobalValue("TEMPLATEVAL2", "templateval2");
+ EXPECT_TRUE(peer.ValueIs("TEMPLATEVAL2", "templateval2"));
+ EXPECT_TRUE(subdict_peer.ValueIs("TEMPLATEVAL2", "templateval2"));
+ EXPECT_TRUE(subsubdict_peer.ValueIs("TEMPLATEVAL2", "templateval2"));
+ EXPECT_TRUE(includedict_peer.ValueIs("TEMPLATEVAL2", "templateval2"));
+ EXPECT_TRUE(subdict2_peer.ValueIs("TEMPLATEVAL2", "templateval2"));
+ EXPECT_TRUE(includedict2_peer.ValueIs("TEMPLATEVAL2", "templateval2"));
+
+ includedict->SetTemplateGlobalValue("TEMPLATEVAL3", "templateval3");
+ EXPECT_TRUE(peer.ValueIs("TEMPLATEVAL3", "templateval3"));
+ EXPECT_TRUE(subdict_peer.ValueIs("TEMPLATEVAL3", "templateval3"));
+ EXPECT_TRUE(subsubdict_peer.ValueIs("TEMPLATEVAL3", "templateval3"));
+ EXPECT_TRUE(includedict_peer.ValueIs("TEMPLATEVAL3", "templateval3"));
+ EXPECT_TRUE(subdict2_peer.ValueIs("TEMPLATEVAL3", "templateval3"));
+ EXPECT_TRUE(includedict2_peer.ValueIs("TEMPLATEVAL3", "templateval3"));
+
+ // you should be able to override a template value with a regular value
+ // and the overwritten regular value should pass on to its children
+ subdict->SetValue("TEMPLATEVAL2", "subdictval");
+ includedict->SetValue("TEMPLATEVAL2", "includedictval");
+ EXPECT_TRUE(peer.ValueIs("TEMPLATEVAL2", "templateval2"));
+ EXPECT_TRUE(subdict_peer.ValueIs("TEMPLATEVAL2", "subdictval"));
+ EXPECT_TRUE(subsubdict_peer.ValueIs("TEMPLATEVAL2", "subdictval"));
+ EXPECT_TRUE(includedict_peer.ValueIs("TEMPLATEVAL2", "includedictval"));
+ EXPECT_TRUE(subdict2_peer.ValueIs("TEMPLATEVAL2", "templateval2"));
+ EXPECT_TRUE(includedict2_peer.ValueIs("TEMPLATEVAL2", "templateval2"));
+
+ // A section shown template-globally will be shown in all its children.
+ dict.ShowTemplateGlobalSection("ShownTemplateGlobalSection");
+ EXPECT_FALSE(peer.IsHiddenSection("ShownTemplateGlobalSection"));
+
+ EXPECT_FALSE(subdict2_peer.IsHiddenSection("ShownTemplateGlobalSection"));
+ EXPECT_FALSE(subsubdict_peer.IsHiddenSection("ShownTemplateGlobalSection"));
+
+ // Showing a template-global section in a child will show it in all templates
+ // in the tree
+ subdict->ShowTemplateGlobalSection("ShownFromAChild");
+ EXPECT_FALSE(peer.IsHiddenSection("ShownFromAChild"));
+ EXPECT_FALSE(subsubdict_peer.IsHiddenSection("ShownFromAChild"));
+
+ // Asking for a section that doesn't exist shouldn't cause infinite recursion
+ peer.IsHiddenSection("NAVBAR_SECTION");
+}
+
+TEST(TemplateDictionary, SetTemplateGlobalValueWithoutCopy) {
+ UnsafeArena arena(100);
+ TemplateDictionary dict("Test arena", &arena);
+ TemplateDictionaryPeer peer(&dict);
+
+ char value[32];
+ snprintf(value, sizeof(value), "%s", "value");
+
+ const void* const ptr = arena.Alloc(0);
+ dict.SetTemplateGlobalValueWithoutCopy("key", value);
+ // We shouldn't have copied the value string.
+ EXPECT_EQ(ptr, arena.Alloc(0));
+
+ EXPECT_TRUE(peer.ValueIs("key", "value"));
+ // If our content changes, so does what's in the dictionary -- but
+ // only the contents of the buffer, not its length!
+ snprintf(value, sizeof(value), "%s", "not_value");
+ EXPECT_TRUE(peer.ValueIs("key", "not_v")); // "not_v" size == value" size
+}
+
+TEST(TemplateDictionary, AddIncludeDictionary) {
+ TemplateDictionary dict("test_SetAddIncludeDictionary", NULL);
+ TemplateDictionaryPeer peer(&dict);
+ dict.SetValue("TOPLEVEL", "foo");
+ dict.SetValue("TOPLEVEL2", "foo2");
+ dict.SetTemplateGlobalValue("TEMPLATELEVEL", "foo3");
+
+ TemplateDictionary* subdict_1a = dict.AddIncludeDictionary("include1");
+ TemplateDictionaryPeer subdict_1a_peer(subdict_1a);
+ subdict_1a->SetFilename("incfile1a");
+ // This is the same dict, but name is specified a different way.
+ TemplateDictionary* subdict_1b = dict.AddIncludeDictionary(
+ TemplateString("include1__ignored__", strlen("include1")));
+ TemplateDictionaryPeer subdict_1b_peer(subdict_1b);
+ // Let's try not calling SetFilename on this one.
+ subdict_1a->SetValue("SUBLEVEL", "subfoo");
+ subdict_1b->SetValue("SUBLEVEL", "subbar");
+
+ TemplateDictionary* subdict_2 = dict.AddIncludeDictionary("include2");
+ TemplateDictionaryPeer subdict_2_peer(subdict_2);
+ subdict_2->SetFilename("foo/bar");
+ subdict_2->SetValue("TOPLEVEL", "bar"); // overriding top dict
+ // overriding template dict
+ subdict_2->SetValue("TEMPLATELEVEL", "subfoo3");
+ TemplateDictionary* subdict_2_1 = subdict_2->AddIncludeDictionary("sub");
+ TemplateDictionaryPeer subdict_2_1_peer(subdict_2_1);
+ subdict_2_1->SetFilename("baz");
+ subdict_2_1->SetIntValue("GLOBAL", 21); // overrides value in setUp()
+
+ // Verify that all variables that should be look-up-able are, and that
+ // we have proper precedence. Unlike with sections, includes lookups
+ // do not go 'up the chain'.
+ EXPECT_TRUE(peer.ValueIs("GLOBAL", "top"));
+ EXPECT_TRUE(peer.ValueIs("TOPLEVEL", "foo"));
+ EXPECT_TRUE(peer.ValueIs("TOPLEVEL2", "foo2"));
+ EXPECT_TRUE(peer.ValueIs("TEMPLATELEVEL", "foo3"));
+ EXPECT_TRUE(peer.ValueIs("SUBLEVEL", ""));
+
+ EXPECT_TRUE(subdict_1a_peer.ValueIs("GLOBAL", "top"));
+ EXPECT_TRUE(subdict_1a_peer.ValueIs("TOPLEVEL", ""));
+ EXPECT_TRUE(subdict_1a_peer.ValueIs("TOPLEVEL2", ""));
+ EXPECT_TRUE(subdict_1a_peer.ValueIs("TEMPLATELEVEL", "foo3"));
+ EXPECT_TRUE(subdict_1a_peer.ValueIs("SUBLEVEL", "subfoo"));
+
+ EXPECT_TRUE(subdict_1b_peer.ValueIs("GLOBAL", "top"));
+ EXPECT_TRUE(subdict_1b_peer.ValueIs("TOPLEVEL", ""));
+ EXPECT_TRUE(subdict_1b_peer.ValueIs("TOPLEVEL2", ""));
+ EXPECT_TRUE(subdict_1b_peer.ValueIs("SUBLEVEL", "subbar"));
+
+ EXPECT_TRUE(subdict_2_peer.ValueIs("GLOBAL", "top"));
+ EXPECT_TRUE(subdict_2_peer.ValueIs("TOPLEVEL", "bar"));
+ EXPECT_TRUE(subdict_2_peer.ValueIs("TOPLEVEL2", ""));
+ EXPECT_TRUE(subdict_2_peer.ValueIs("TEMPLATELEVEL", "subfoo3"));
+ EXPECT_TRUE(subdict_2_peer.ValueIs("SUBLEVEL", ""));
+
+ EXPECT_TRUE(subdict_2_1_peer.ValueIs("GLOBAL", "21"));
+ EXPECT_TRUE(subdict_2_1_peer.ValueIs("TOPLEVEL", ""));
+ EXPECT_TRUE(subdict_2_1_peer.ValueIs("TOPLEVEL2", ""));
+ EXPECT_TRUE(subdict_2_1_peer.ValueIs("SUBLEVEL", ""));
+
+ // Verify that everyone knows about its sub-dictionaries, but that
+ // these do not try to go 'up the chain' on lookup failure
+ EXPECT_FALSE(peer.IsHiddenTemplate("include1"));
+ EXPECT_FALSE(peer.IsHiddenTemplate("include2"));
+ EXPECT_TRUE(peer.IsHiddenTemplate("include3"));
+ EXPECT_TRUE(peer.IsHiddenTemplate("sub"));
+ EXPECT_TRUE(subdict_1a_peer.IsHiddenTemplate("include1"));
+ EXPECT_TRUE(subdict_1a_peer.IsHiddenTemplate("sub"));
+ EXPECT_FALSE(subdict_2_peer.IsHiddenTemplate("sub"));
+ EXPECT_TRUE(subdict_2_1_peer.IsHiddenTemplate("sub"));
+
+ // We should get the dictionary-lengths right as well
+ vector<const TemplateDictionary*> dummy;
+ EXPECT_EQ(2, peer.GetIncludeDictionaries("include1", &dummy));
+ EXPECT_EQ(1, peer.GetIncludeDictionaries("include2", &dummy));
+ EXPECT_EQ(1, subdict_2_peer.GetIncludeDictionaries("sub", &dummy));
+
+ // We can also test the include-files are right
+ EXPECT_EQ(2, peer.GetIncludeDictionaries("include1", &dummy));
+ EXPECT_EQ(1, peer.GetIncludeDictionaries("include2", &dummy));
+ EXPECT_EQ(1, subdict_2_peer.GetIncludeDictionaries("sub", &dummy));
+ // Test some of the values
+ EXPECT_TRUE(TemplateDictionaryPeer(GetIncludeDict(&dict, "include1", 0))
+ .ValueIs("SUBLEVEL", "subfoo"));
+ EXPECT_TRUE(TemplateDictionaryPeer(GetIncludeDict(&dict, "include1", 1))
+ .ValueIs("SUBLEVEL", "subbar"));
+ EXPECT_TRUE(TemplateDictionaryPeer(GetIncludeDict(&dict, "include2", 0))
+ .ValueIs("TOPLEVEL", "bar"));
+ EXPECT_TRUE(TemplateDictionaryPeer(
+ GetIncludeDict(GetIncludeDict(&dict, "include2", 0), "sub", 0))
+ .ValueIs("TOPLEVEL", ""));
+ EXPECT_TRUE(TemplateDictionaryPeer(
+ GetIncludeDict(GetIncludeDict(&dict, "include2", 0), "sub", 0))
+ .ValueIs("GLOBAL", "21"));
+ // We can test the include-names as well
+ EXPECT_STREQ(peer.GetIncludeTemplateName("include1", 0), "incfile1a");
+ EXPECT_STREQ(peer.GetIncludeTemplateName("include1", 1), "");
+ EXPECT_STREQ(peer.GetIncludeTemplateName("include2", 0), "foo/bar");
+ EXPECT_STREQ(TemplateDictionaryPeer(GetIncludeDict(&dict, "include2", 0))
+ .GetIncludeTemplateName("sub", 0),
+ "baz");
+
+ // Make sure we're making descriptive names
+ EXPECT_STREQ(dict.name().c_str(),
+ "test_SetAddIncludeDictionary");
+ EXPECT_STREQ(subdict_1a->name().c_str(),
+ "test_SetAddIncludeDictionary/include1#1");
+ EXPECT_STREQ(subdict_1b->name().c_str(),
+ "test_SetAddIncludeDictionary/include1#2");
+ EXPECT_STREQ(subdict_2->name().c_str(),
+ "test_SetAddIncludeDictionary/include2#1");
+ EXPECT_STREQ(subdict_2_1->name().c_str(),
+ "test_SetAddIncludeDictionary/include2#1/sub#1");
+
+ // Finally, we can test the whole kit and kaboodle
+ string dump;
+ dict.DumpToString(&dump);
+ const char* const expected =
+ ("global dictionary {\n"
+ " BI_NEWLINE: >\n"
+ "<\n"
+ " BI_SPACE: > <\n"
+ " GLOBAL: >top<\n"
+ "};\n"
+ "template dictionary {\n"
+ " TEMPLATELEVEL: >foo3<\n"
+ "};\n"
+ "dictionary 'test_SetAddIncludeDictionary' {\n"
+ " TOPLEVEL: >foo<\n"
+ " TOPLEVEL2: >foo2<\n"
+ " include-template include1 (dict 1 of 2, from incfile1a) -->\n"
+ " global dictionary {\n"
+ " BI_NEWLINE: >\n"
+ "<\n"
+ " BI_SPACE: > <\n"
+ " GLOBAL: >top<\n"
+ " };\n"
+ " dictionary 'test_SetAddIncludeDictionary/include1#1 (intended for incfile1a)' {\n"
+ " SUBLEVEL: >subfoo<\n"
+ " }\n"
+ " include-template include1 (dict 2 of 2, **NO FILENAME SET; THIS DICT WILL BE IGNORED**) -->\n"
+ " global dictionary {\n"
+ " BI_NEWLINE: >\n"
+ "<\n"
+ " BI_SPACE: > <\n"
+ " GLOBAL: >top<\n"
+ " };\n"
+ " dictionary 'test_SetAddIncludeDictionary/include1#2' {\n"
+ " SUBLEVEL: >subbar<\n"
+ " }\n"
+ " include-template include2 (dict 1 of 1, from foo/bar) -->\n"
+ " global dictionary {\n"
+ " BI_NEWLINE: >\n"
+ "<\n"
+ " BI_SPACE: > <\n"
+ " GLOBAL: >top<\n"
+ " };\n"
+ " dictionary 'test_SetAddIncludeDictionary/include2#1 (intended for foo/bar)' {\n"
+ " TEMPLATELEVEL: >subfoo3<\n"
+ " TOPLEVEL: >bar<\n"
+ " include-template sub (dict 1 of 1, from baz) -->\n"
+ " global dictionary {\n"
+ " BI_NEWLINE: >\n"
+ "<\n"
+ " BI_SPACE: > <\n"
+ " GLOBAL: >top<\n"
+ " };\n"
+ " dictionary 'test_SetAddIncludeDictionary/include2#1/sub#1 (intended for baz)' {\n"
+ " GLOBAL: >21<\n"
+ " }\n"
+ " }\n"
+ "}\n");
+ EXPECT_STREQ(dump.c_str(), expected);
+}
+
+static void TestMakeCopy(bool use_local_arena) {
+ UnsafeArena local_arena(1024);
+ UnsafeArena* arena = NULL;
+ if (use_local_arena)
+ arena = &local_arena;
+
+ // First, let's make a non-trivial template dictionary (We use
+ // 'new' because later we'll test deleting this dict but keeping
+ // around the copy.)
+ TemplateDictionary* dict = new TemplateDictionary("testdict", arena);
+
+ dict->SetValue("TOPLEVEL", "foo");
+
+ dict->SetTemplateGlobalValue("TEMPLATELEVEL", "foo3");
+
+ TemplateDictionary* subdict_1a = dict->AddIncludeDictionary("include1");
+ subdict_1a->SetFilename("incfile1a");
+ subdict_1a->SetValue("SUBLEVEL", "subfoo");
+ TemplateDictionary* subdict_1b = dict->AddIncludeDictionary("include1");
+ // Let's try not calling SetFilename on this one.
+ subdict_1b->SetValue("SUBLEVEL", "subbar");
+
+ TemplateDictionary* subdict_2a = dict->AddSectionDictionary("section1");
+ TemplateDictionary* subdict_2b = dict->AddSectionDictionary("section1");
+ subdict_2a->SetValue("SUBLEVEL", "subfoo");
+ subdict_2b->SetValue("SUBLEVEL", "subbar");
+ TemplateDictionary* subdict_3 = dict->AddSectionDictionary("section2");
+ subdict_3->SetValue("TOPLEVEL", "bar"); // overriding top dict
+ TemplateDictionary* subdict_3_1 = subdict_3->AddSectionDictionary("sub");
+ subdict_3_1->SetIntValue("GLOBAL", 21); // overrides value in setUp()
+
+ string orig;
+ dict->DumpToString(&orig);
+
+ // Make a copy
+ TemplateDictionary* dict_copy = dict->MakeCopy("testdict", NULL);
+ // Make sure it doesn't work to copy a sub-dictionary
+ EXPECT_TRUE(subdict_1a->MakeCopy("copy of subdict") == NULL);
+ EXPECT_TRUE(subdict_2a->MakeCopy("copy of subdict") == NULL);
+
+ // Delete the original dict, to make sure the copy really is independent
+ delete dict;
+ dict = NULL;
+ string copy;
+ dict_copy->DumpToString(©);
+ delete dict_copy;
+
+ EXPECT_STREQ(orig.c_str(), copy.c_str());
+}
+
+TEST(MakeCopy, UseLocalArena) {
+ TestMakeCopy(true);
+}
+
+TEST(MakeCopy, DoNotUseLocalArena) {
+ TestMakeCopy(false);
+}
+
+TEST(TemplateDictionary, SetModifierData) {
+ PerExpandData per_expand_data;
+ const void* data = "test";
+ per_expand_data.InsertForModifiers("a", data);
+ EXPECT_EQ(data, per_expand_data.LookupForModifiers("a"));
+}
+
+TEST(TemplateDictionary, Iterator) {
+ // Build up a nice community of TemplateDictionaries.
+ TemplateDictionary farm("Farm");
+ TemplateDictionaryPeer farm_peer(&farm);
+ TemplateDictionaryInterface* grey_barn =
+ farm.AddIncludeDictionary("BARN");
+ TemplateDictionaryInterface* duck_pond =
+ farm.AddIncludeDictionary("POND");
+ TemplateDictionaryInterface* cattle_pond =
+ farm.AddIncludeDictionary("POND");
+ TemplateDictionaryInterface* irrigation_pond =
+ farm.AddIncludeDictionary("POND");
+
+ // A section name with repeated sections
+ TemplateDictionaryInterface* lillies = farm.AddSectionDictionary("FLOWERS");
+ TemplateDictionaryInterface* lilacs = farm.AddSectionDictionary("FLOWERS");
+ TemplateDictionaryInterface* daisies = farm.AddSectionDictionary("FLOWERS");
+ // A section name with one repeat
+ TemplateDictionaryInterface* wheat = farm.AddSectionDictionary("WHEAT");
+ // A section name, just shown
+ farm.ShowSection("CORN");
+
+ // Check that the iterators expose all of the dictionaries.
+ TemplateDictionaryPeer::Iterator* barns =
+ farm_peer.CreateTemplateIterator("BARN");
+ EXPECT_TRUE(barns->HasNext());
+ EXPECT_EQ(&barns->Next(), grey_barn);
+ EXPECT_FALSE(barns->HasNext());
+ delete barns;
+
+ TemplateDictionaryPeer::Iterator* ponds =
+ farm_peer.CreateTemplateIterator("POND");
+ EXPECT_TRUE(ponds->HasNext());
+ EXPECT_EQ(&ponds->Next(), duck_pond);
+ EXPECT_TRUE(ponds->HasNext());
+ EXPECT_EQ(&ponds->Next(), cattle_pond);
+ EXPECT_TRUE(ponds->HasNext());
+ EXPECT_EQ(&ponds->Next(), irrigation_pond);
+ EXPECT_FALSE(ponds->HasNext());
+ delete ponds;
+
+ TemplateDictionaryPeer::Iterator* flowers =
+ farm_peer.CreateSectionIterator("FLOWERS");
+ EXPECT_TRUE(flowers->HasNext());
+ EXPECT_EQ(&flowers->Next(), lillies);
+ EXPECT_TRUE(flowers->HasNext());
+ EXPECT_EQ(&flowers->Next(), lilacs);
+ EXPECT_TRUE(flowers->HasNext());
+ EXPECT_EQ(&flowers->Next(), daisies);
+ EXPECT_FALSE(flowers->HasNext());
+ delete flowers;
+
+ TemplateDictionaryPeer::Iterator* crop =
+ farm_peer.CreateSectionIterator("WHEAT");
+ EXPECT_TRUE(crop->HasNext());
+ EXPECT_EQ(&crop->Next(), wheat);
+ EXPECT_FALSE(crop->HasNext());
+ delete crop;
+
+ TemplateDictionaryPeer::Iterator* corn_crop =
+ farm_peer.CreateSectionIterator("CORN");
+ EXPECT_TRUE(corn_crop->HasNext());
+ EXPECT_TRUE(&corn_crop->Next()); // ShowSection doesn't give us the dict back
+ EXPECT_FALSE(corn_crop->HasNext());
+ delete corn_crop;
+}
+
+TEST(TemplateDictionary, IsHiddenSectionDefault) {
+ TemplateDictionary dict("dict");
+ TemplateDictionaryPeer peer(&dict);
+ EXPECT_TRUE(peer.IsHiddenSection("UNDEFINED"));
+ EXPECT_FALSE(peer.IsUnhiddenSection("UNDEFINED"));
+ dict.ShowSection("VISIBLE");
+ EXPECT_FALSE(peer.IsHiddenSection("VISIBLE"));
+ EXPECT_TRUE(peer.IsUnhiddenSection("VISIBLE"));
+}
+
+// This has to run last, since its SetGlobalValue modifies the global
+// state, which can affect other tests (especially given the embedded
+// NUL!) So we don't use the normal TEST() here, and call it manually
+// in main().
+
+void TestSetValueWithNUL() {
+ TemplateDictionary dict("test_SetValueWithNUL", NULL);
+ TemplateDictionaryPeer peer(&dict);
+
+ // Test copying char*s, strings, and explicit TemplateStrings
+ dict.SetValue(string("FOO\0BAR", 7), string("QUX\0QUUX", 8));
+ dict.SetGlobalValue(string("GOO\0GAR", 7), string("GUX\0GUUX", 8));
+
+ // FOO should not match FOO\0BAR
+ EXPECT_TRUE(peer.ValueIs("FOO", ""));
+ EXPECT_TRUE(peer.ValueIs("GOO", ""));
+
+ EXPECT_TRUE(peer.ValueIs(string("FOO\0BAR", 7), string("QUX\0QUUX", 8)));
+ EXPECT_TRUE(peer.ValueIs(string("GOO\0GAR", 7), string("GUX\0GUUX", 8)));
+
+ string dump;
+ dict.DumpToString(&dump);
+ // We can't use EXPECT_STREQ here because of the embedded NULs.
+ // They also require I count the length of the string by hand. :-(
+ string expected(("global dictionary {\n"
+ " BI_NEWLINE: >\n"
+ "<\n"
+ " BI_SPACE: > <\n"
+ " GLOBAL: >top<\n"
+ " GOO\0GAR: >GUX\0GUUX<\n"
+ "};\n"
+ "dictionary 'test_SetValueWithNUL' {\n"
+ " FOO\0BAR: >QUX\0QUUX<\n"
+ "}\n"),
+ 160);
+ EXPECT_EQ(dump, expected);
+}
+
+TEST(TemplateDictionary, TestShowTemplateGlobalSection) {
+ StringToTemplateCache("test.tpl", "{{#sect}}OK{{/sect}}", DO_NOT_STRIP);
+
+ TemplateDictionary dict("mydict");
+ dict.ShowTemplateGlobalSection("sect");
+
+ string out;
+ ExpandTemplate("test.tpl", DO_NOT_STRIP, &dict, &out);
+}
+
+TEST(TemplateDictionary, TestShowTemplateGlobalSection_Child) {
+ // The TemplateDictionary::template_global_dict_ behaves differently for child
+ // dictionaries than for the root parent dictionary.
+ StringToTemplateCache("test2.tpl",
+ "{{#foo}}{{#sect}}OK{{/sect}}{{/foo}}",
+ DO_NOT_STRIP);
+
+ TemplateDictionary dict("mydict");
+ dict.ShowTemplateGlobalSection("sect");
+
+ dict.AddSectionDictionary("foo");
+
+ string out;
+ ExpandTemplate("test2.tpl", DO_NOT_STRIP, &dict, &out);
+}
+
+TEST(TemplateDictionary, TestShowTemplateGlobalSection_SectionDoesntExist) {
+ StringToTemplateCache("test3.tpl",
+ "{{#bad}}bad{{/bad}}",
+ DO_NOT_STRIP);
+
+ TemplateDictionary dict("mydict");
+
+ string out;
+ ExpandTemplate("test3.tpl", DO_NOT_STRIP, &dict, &out);
+}
+
+
+} // unnamed namespace
+
+
+int main(int argc, char** argv) {
+
+ SetUp();
+
+ const int retval = RUN_ALL_TESTS();
+
+ // This has to run last, so we run it manually
+ TestSetValueWithNUL();
+
+ return retval;
+}
diff --git a/src/tests/template_modifiers_unittest.cc b/src/tests/template_modifiers_unittest.cc
new file mode 100644
index 0000000..781d3d3
--- /dev/null
+++ b/src/tests/template_modifiers_unittest.cc
@@ -0,0 +1,1117 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// This code is written to not use the google testing framework
+// as much as possible, to make it easier to opensource.
+
+#include "config_for_unittests.h"
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <string>
+#include <vector>
+#include <ctemplate/template.h>
+#include <ctemplate/template_dictionary.h>
+#include <ctemplate/template_emitter.h>
+#include <ctemplate/template_modifiers.h>
+#include "template_modifiers_internal.h"
+#include "tests/template_test_util.h"
+#include "base/util.h"
+TEST_INIT // defines RUN_ALL_TESTS
+
+using std::string;
+using std::vector;
+
+// Rather than put all these tests in the ctemplate namespace, or use
+// using-declarations, for this test I've decided to manually prepend
+// GOOGLE_NAMESPACE:: everywhere it's needed. This test can serve as an
+// example of how that approach looks.
+
+TEST(TemplateModifiers, HtmlEscape) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestHtmlEscape", NULL);
+ dict.SetEscapedValue("easy HTML", "foo",
+ GOOGLE_NAMESPACE::html_escape);
+ dict.SetEscapedValue("harder HTML", "foo & bar",
+ GOOGLE_NAMESPACE::html_escape);
+ dict.SetEscapedValue("hardest HTML",
+ "<A HREF='foo'\nid=\"bar\t\t&&\vbaz\">",
+ GOOGLE_NAMESPACE::html_escape);
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ // TODO(csilvers): change this (and all other expect_*'s in all files
+ // in this directory) to take the expected value first, not second.
+ EXPECT_STREQ(peer.GetSectionValue("easy HTML"), "foo");
+ EXPECT_STREQ(peer.GetSectionValue("harder HTML"), "foo & bar");
+ EXPECT_STREQ(peer.GetSectionValue("hardest HTML"),
+ "<A HREF='foo' id="bar && "
+ "baz">");
+}
+
+TEST(TemplateModifiers, SnippetEscape) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestSnippetEscape", NULL);
+ dict.SetEscapedValue("easy snippet", "foo",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("valid snippet",
+ "<b>foo<br> & b<wbr>­ar</b>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("invalid snippet",
+ "<b><A HREF='foo'\nid=\"bar\t\t&&{\vbaz\">",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("snippet with italics",
+ "<i>foo<br> & b<wbr>­ar</i>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("unclosed snippet",
+ "<b>foo",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("snippet with interleaving",
+ "<b><i>foo</b></i>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("unclosed interleaving",
+ "<b><i><b>foo</b>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("unclosed",
+ "<b><i>foo",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("unterminated 1",
+ "foo<",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("unterminated 2",
+ "foo<b",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("unterminated 3",
+ "foo</",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("unterminated 4",
+ "foo</b",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("unterminated 5",
+ "<b>foo</b",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("close b i",
+ "<i><b>foo",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("close i b",
+ "<b><i>foo",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("em",
+ "<em>foo</em>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("nested em",
+ "<b>This is foo<em>...</em></b>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("unclosed em",
+ "<em>foo",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("wrongly closed em",
+ "foo</em>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("misnested em",
+ "<i><em>foo</i></em>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("span ltr",
+ "<span dir=ltr>bidi text</span>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("span rtl",
+ "<span dir=rtl>bidi text</span>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("span garbage dir attr",
+ "<span dir=foo>bidi text</span>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("span no dir",
+ "<span>bidi text</span>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("span bad attribute",
+ "<span onclick=alert('foo')>bidi text</span>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("span quotes",
+ "<span dir=\"rtl\">bidi text</span>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("nested span",
+ "<b>This is <span dir=rtl>bidi text</span></b>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("doubly-nested span",
+ "<span dir=rtl>This is <span dir=rtl>"
+ "bidi text</span></span>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("two spans",
+ "<b>This is <span dir=rtl>text</span> that is "
+ "<span dir=rtl>bidi.</span></b>",
+ GOOGLE_NAMESPACE::snippet_escape);
+ dict.SetEscapedValue("unclosed span",
+ "<b>This is <span dir=rtl>bidi text",
+ GOOGLE_NAMESPACE::snippet_escape);
+
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("easy snippet"), "foo");
+ EXPECT_STREQ(peer.GetSectionValue("valid snippet"),
+ "<b>foo<br> & b<wbr>­ar</b>");
+ EXPECT_STREQ(peer.GetSectionValue("invalid snippet"),
+ "<b><A HREF='foo' id="bar &&{ "
+ "baz"></b>");
+ EXPECT_STREQ(peer.GetSectionValue("snippet with italics"),
+ "<i>foo<br> & b<wbr>­ar</i>");
+ EXPECT_STREQ(peer.GetSectionValue("unclosed snippet"),
+ "<b>foo</b>");
+ EXPECT_STREQ(peer.GetSectionValue("snippet with interleaving"),
+ "<b><i>foo</b></i>");
+ EXPECT_STREQ(peer.GetSectionValue("unclosed interleaving"),
+ "<b><i><b>foo</b></i>");
+ EXPECT_STREQ(peer.GetSectionValue("unclosed"),
+ "<b><i>foo</i></b>");
+ EXPECT_STREQ(peer.GetSectionValue("unterminated 1"), "foo<");
+ EXPECT_STREQ(peer.GetSectionValue("unterminated 2"), "foo<b");
+ EXPECT_STREQ(peer.GetSectionValue("unterminated 3"), "foo</");
+ EXPECT_STREQ(peer.GetSectionValue("unterminated 4"), "foo</b");
+ EXPECT_STREQ(peer.GetSectionValue("unterminated 5"), "<b>foo</b</b>");
+ EXPECT_STREQ(peer.GetSectionValue("close b i"), "<i><b>foo</b></i>");
+ EXPECT_STREQ(peer.GetSectionValue("close i b"), "<b><i>foo</i></b>");
+ EXPECT_STREQ(peer.GetSectionValue("em"), "<em>foo</em>");
+ EXPECT_STREQ(peer.GetSectionValue("nested em"),
+ "<b>This is foo<em>...</em></b>");
+ EXPECT_STREQ(peer.GetSectionValue("unclosed em"), "<em>foo</em>");
+ EXPECT_STREQ(peer.GetSectionValue("wrongly closed em"), "foo</em>");
+ EXPECT_STREQ(peer.GetSectionValue("misnested em"), "<i><em>foo</i></em>");
+ EXPECT_STREQ(peer.GetSectionValue("span ltr"),
+ "<span dir=ltr>bidi text</span>");
+ EXPECT_STREQ(peer.GetSectionValue("span rtl"),
+ "<span dir=rtl>bidi text</span>");
+ EXPECT_STREQ(peer.GetSectionValue("span garbage dir attr"),
+ "<span dir=foo>bidi text</span>");
+ EXPECT_STREQ(peer.GetSectionValue("span no dir"),
+ "<span>bidi text</span>");
+ EXPECT_STREQ(peer.GetSectionValue("span bad attribute"),
+ "<span onclick=alert('foo')>bidi text</span>");
+ EXPECT_STREQ(peer.GetSectionValue("span quotes"),
+ "<span dir="rtl">bidi text</span>");
+ EXPECT_STREQ(peer.GetSectionValue("nested span"),
+ "<b>This is <span dir=rtl>bidi text</span></b>");
+ EXPECT_STREQ(peer.GetSectionValue("doubly-nested span"),
+ "<span dir=rtl>This is <span dir=rtl>bidi text"
+ "</span></span>");
+ EXPECT_STREQ(peer.GetSectionValue("two spans"),
+ "<b>This is <span dir=rtl>text</span> that is "
+ "<span dir=rtl>bidi.</span></b>");
+ EXPECT_STREQ(peer.GetSectionValue("unclosed span"),
+ "<b>This is <span dir=rtl>bidi text</span></b>");
+}
+
+TEST(TemplateModifiers, PreEscape) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestPreEscape", NULL);
+ dict.SetEscapedValue("easy PRE", "foo",
+ GOOGLE_NAMESPACE::pre_escape);
+ dict.SetEscapedValue("harder PRE", "foo & bar",
+ GOOGLE_NAMESPACE::pre_escape);
+ dict.SetEscapedValue("hardest PRE",
+ " \"--\v--\f--\n--\t--&--<-->--'--\"",
+ GOOGLE_NAMESPACE::pre_escape);
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("easy PRE"), "foo");
+ EXPECT_STREQ(peer.GetSectionValue("harder PRE"), "foo & bar");
+ EXPECT_STREQ(peer.GetSectionValue("hardest PRE"),
+ " "--\v--\f--\n--\t--&--<-->--'--"");
+}
+
+TEST(TemplateModifiers, XmlEscape) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestXmlEscape", NULL);
+ dict.SetEscapedValue("no XML", "",
+ GOOGLE_NAMESPACE::xml_escape);
+ dict.SetEscapedValue("easy XML", "xoo",
+ GOOGLE_NAMESPACE::xml_escape);
+ dict.SetEscapedValue("harder XML-1", "<>&'\"",
+ GOOGLE_NAMESPACE::xml_escape);
+ dict.SetEscapedValue("harder XML-2", "Hello<script>alert('&')</script>",
+ GOOGLE_NAMESPACE::xml_escape);
+ dict.SetEscapedValue("hardest XML", "<<b>>&!''\"\"foo",
+ GOOGLE_NAMESPACE::xml_escape);
+ // Characters 0x00-0x1F (except \t, \r and \n) are not valid for XML and
+ // compliant parsers are allowed to die when they encounter them. They
+ // should be replaced with spaces.
+ dict.SetEscapedValue("Spacey XML", " \r\n\f",
+ GOOGLE_NAMESPACE::xml_escape);
+ dict.SetEscapedValue("XML with control chars",
+ "\x01\x02\x03\x09\x0A\x0B\x0D\x15\x16\x1F",
+ GOOGLE_NAMESPACE::xml_escape);
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("no XML"), "");
+ EXPECT_STREQ(peer.GetSectionValue("easy XML"), "xoo");
+ EXPECT_STREQ(peer.GetSectionValue("harder XML-1"),
+ "<>&'"");
+ EXPECT_STREQ(peer.GetSectionValue("harder XML-2"),
+ "Hello<script>alert('&')</script>");
+ EXPECT_STREQ(peer.GetSectionValue("hardest XML"),
+ "<<b>>&!''""foo");
+ EXPECT_STREQ(peer.GetSectionValue("Spacey XML"),
+ " \r\n ");
+ EXPECT_STREQ(peer.GetSectionValue("XML with control chars"),
+ " \t\n \r ");
+}
+
+TEST(TemplateModifiers, ValidateUrlHtmlEscape) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestValidateUrlHtmlEscape", NULL);
+ dict.SetEscapedValue("easy http URL", "http://www.google.com",
+ GOOGLE_NAMESPACE::validate_url_and_html_escape);
+ dict.SetEscapedValue("harder https URL",
+ "https://www.google.com/search?q=f&hl=en",
+ GOOGLE_NAMESPACE::validate_url_and_html_escape);
+ dict.SetEscapedValue("easy javascript URL",
+ "javascript:alert(document.cookie)",
+ GOOGLE_NAMESPACE::validate_url_and_html_escape);
+ dict.SetEscapedValue("harder javascript URL",
+ "javascript:alert(10/5)",
+ GOOGLE_NAMESPACE::validate_url_and_html_escape);
+ dict.SetEscapedValue("easy relative URL",
+ "foobar.html",
+ GOOGLE_NAMESPACE::validate_url_and_html_escape);
+ dict.SetEscapedValue("harder relative URL",
+ "/search?q=green flowers&hl=en",
+ GOOGLE_NAMESPACE::validate_url_and_html_escape);
+ dict.SetEscapedValue("ftp URL",
+ "ftp://ftp.example.org/pub/file.txt",
+ GOOGLE_NAMESPACE::validate_url_and_html_escape);
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("easy http URL"),
+ "http://www.google.com");
+ EXPECT_STREQ(peer.GetSectionValue("harder https URL"),
+ "https://www.google.com/search?q=f&hl=en");
+ EXPECT_STREQ(peer.GetSectionValue("easy javascript URL"),
+ GOOGLE_NAMESPACE::ValidateUrl::kUnsafeUrlReplacement);
+ EXPECT_STREQ(peer.GetSectionValue("harder javascript URL"),
+ GOOGLE_NAMESPACE::ValidateUrl::kUnsafeUrlReplacement);
+ EXPECT_STREQ(peer.GetSectionValue("easy relative URL"),
+ "foobar.html");
+ EXPECT_STREQ(peer.GetSectionValue("harder relative URL"),
+ "/search?q=green flowers&hl=en");
+ EXPECT_STREQ(peer.GetSectionValue("ftp URL"),
+ "ftp://ftp.example.org/pub/file.txt");
+}
+
+TEST(TemplateModifiers, ValidateImgSrcUrlHtmlEscape) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestValidateImgSrcUrlHtmlEscape", NULL);
+ dict.SetEscapedValue("easy http URL", "http://www.google.com",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_html_escape);
+ dict.SetEscapedValue("harder https URL",
+ "https://www.google.com/search?q=f&hl=en",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_html_escape);
+ dict.SetEscapedValue("easy javascript URL",
+ "javascript:alert(document.cookie)",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_html_escape);
+ dict.SetEscapedValue("harder javascript URL",
+ "javascript:alert(10/5)",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_html_escape);
+ dict.SetEscapedValue("easy relative URL",
+ "foobar.html",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_html_escape);
+ dict.SetEscapedValue("harder relative URL",
+ "/search?q=green flowers&hl=en",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_html_escape);
+ dict.SetEscapedValue("ftp URL",
+ "ftp://ftp.example.org/pub/file.txt",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_html_escape);
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("easy http URL"),
+ "http://www.google.com");
+ EXPECT_STREQ(peer.GetSectionValue("harder https URL"),
+ "https://www.google.com/search?q=f&hl=en");
+ EXPECT_STREQ(peer.GetSectionValue("easy javascript URL"),
+ GOOGLE_NAMESPACE::ValidateUrl::kUnsafeImgSrcUrlReplacement);
+ EXPECT_STREQ(peer.GetSectionValue("harder javascript URL"),
+ GOOGLE_NAMESPACE::ValidateUrl::kUnsafeImgSrcUrlReplacement);
+ EXPECT_STREQ(peer.GetSectionValue("easy relative URL"),
+ "foobar.html");
+ EXPECT_STREQ(peer.GetSectionValue("harder relative URL"),
+ "/search?q=green flowers&hl=en");
+ EXPECT_STREQ(peer.GetSectionValue("ftp URL"),
+ "ftp://ftp.example.org/pub/file.txt");
+}
+
+TEST(TemplateModifiers, ValidateUrlJavascriptEscape) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestValidateUrlJavascriptEscape", NULL);
+ dict.SetEscapedValue(
+ "easy http URL", "http://www.google.com",
+ GOOGLE_NAMESPACE::validate_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "harder https URL",
+ "https://www.google.com/search?q=f&hl=en",
+ GOOGLE_NAMESPACE::validate_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "mangled http URL", "HTTP://www.google.com",
+ GOOGLE_NAMESPACE::validate_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "easy javascript URL",
+ "javascript:alert(document.cookie)",
+ GOOGLE_NAMESPACE::validate_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "harder javascript URL",
+ "javascript:alert(10/5)",
+ GOOGLE_NAMESPACE::validate_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "easy relative URL",
+ "foobar.html",
+ GOOGLE_NAMESPACE::validate_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "harder relative URL",
+ "/search?q=green flowers&hl=en",
+ GOOGLE_NAMESPACE::validate_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "data URL",
+ "data: text/html",
+ GOOGLE_NAMESPACE::validate_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "mangled javascript URL",
+ "javaSCRIPT:alert(5)",
+ GOOGLE_NAMESPACE::validate_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "harder mangled javascript URL",
+ "java\nSCRIPT:alert(5)",
+ GOOGLE_NAMESPACE::validate_url_and_javascript_escape);
+
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("easy http URL"),
+ "http://www.google.com");
+ EXPECT_STREQ(peer.GetSectionValue("harder https URL"),
+ "https://www.google.com/search?q\\x3df\\x26hl\\x3den");
+ EXPECT_STREQ(peer.GetSectionValue("mangled http URL"),
+ "HTTP://www.google.com");
+ EXPECT_STREQ(peer.GetSectionValue("easy javascript URL"), "#");
+ EXPECT_STREQ(peer.GetSectionValue("harder javascript URL"),
+ GOOGLE_NAMESPACE::ValidateUrl::kUnsafeUrlReplacement);
+ EXPECT_STREQ(peer.GetSectionValue("easy relative URL"),
+ "foobar.html");
+ EXPECT_STREQ(peer.GetSectionValue("harder relative URL"),
+ "/search?q\\x3dgreen flowers\\x26hl\\x3den");
+ EXPECT_STREQ(peer.GetSectionValue("data URL"),
+ GOOGLE_NAMESPACE::ValidateUrl::kUnsafeUrlReplacement);
+ EXPECT_STREQ(peer.GetSectionValue("mangled javascript URL"),
+ GOOGLE_NAMESPACE::ValidateUrl::kUnsafeUrlReplacement);
+ EXPECT_STREQ(peer.GetSectionValue("harder mangled javascript URL"),
+ GOOGLE_NAMESPACE::ValidateUrl::kUnsafeUrlReplacement);
+}
+
+TEST(TemplateModifiers, ValidateImgSrcUrlJavascriptEscape) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestValidateImgSrcUrlJavascriptEscape", NULL);
+ dict.SetEscapedValue(
+ "easy http URL", "http://www.google.com",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "harder https URL",
+ "https://www.google.com/search?q=f&hl=en",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "mangled http URL", "HTTP://www.google.com",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "easy javascript URL",
+ "javascript:alert(document.cookie)",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "harder javascript URL",
+ "javascript:alert(10/5)",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "easy relative URL",
+ "foobar.html",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "harder relative URL",
+ "/search?q=green flowers&hl=en",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "data URL",
+ "data: text/html",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "mangled javascript URL",
+ "javaSCRIPT:alert(5)",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_javascript_escape);
+ dict.SetEscapedValue(
+ "harder mangled javascript URL",
+ "java\nSCRIPT:alert(5)",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_javascript_escape);
+
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("easy http URL"),
+ "http://www.google.com");
+ EXPECT_STREQ(peer.GetSectionValue("harder https URL"),
+ "https://www.google.com/search?q\\x3df\\x26hl\\x3den");
+ EXPECT_STREQ(peer.GetSectionValue("mangled http URL"),
+ "HTTP://www.google.com");
+ EXPECT_STREQ(peer.GetSectionValue("easy javascript URL"),
+ GOOGLE_NAMESPACE::ValidateUrl::kUnsafeImgSrcUrlReplacement);
+ EXPECT_STREQ(peer.GetSectionValue("harder javascript URL"),
+ GOOGLE_NAMESPACE::ValidateUrl::kUnsafeImgSrcUrlReplacement);
+ EXPECT_STREQ(peer.GetSectionValue("easy relative URL"),
+ "foobar.html");
+ EXPECT_STREQ(peer.GetSectionValue("harder relative URL"),
+ "/search?q\\x3dgreen flowers\\x26hl\\x3den");
+ EXPECT_STREQ(peer.GetSectionValue("data URL"),
+ "/images/cleardot.gif");
+ EXPECT_STREQ(peer.GetSectionValue("mangled javascript URL"),
+ GOOGLE_NAMESPACE::ValidateUrl::kUnsafeImgSrcUrlReplacement);
+ EXPECT_STREQ(peer.GetSectionValue("harder mangled javascript URL"),
+ GOOGLE_NAMESPACE::ValidateUrl::kUnsafeImgSrcUrlReplacement);
+}
+
+TEST(TemplateModifiers, ValidateUrlCssEscape) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestValidateUrlCssEscape", NULL);
+ dict.SetEscapedValue("easy http URL", "http://www.google.com",
+ GOOGLE_NAMESPACE::validate_url_and_css_escape);
+ dict.SetEscapedValue("harder https URL",
+ "https://www.google.com/search?q=f&hl=en",
+ GOOGLE_NAMESPACE::validate_url_and_css_escape);
+ dict.SetEscapedValue("javascript URL",
+ "javascript:alert(document.cookie)",
+ GOOGLE_NAMESPACE::validate_url_and_css_escape);
+ dict.SetEscapedValue("relative URL", "/search?q=green flowers&hl=en",
+ GOOGLE_NAMESPACE::validate_url_and_css_escape);
+ dict.SetEscapedValue("hardest URL", "http://www.google.com/s?q='bla'"
+ "&a=\"\"&b=(<tag>)&c=*\r\n\\\\bla",
+ GOOGLE_NAMESPACE::validate_url_and_css_escape);
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("easy http URL"),
+ "http://www.google.com");
+ EXPECT_STREQ(peer.GetSectionValue("harder https URL"),
+ "https://www.google.com/search?q=f&hl=en");
+ EXPECT_STREQ(peer.GetSectionValue("javascript URL"),
+ GOOGLE_NAMESPACE::ValidateUrl::kUnsafeUrlReplacement);
+ EXPECT_STREQ(peer.GetSectionValue("relative URL"),
+ "/search?q=green flowers&hl=en");
+ EXPECT_STREQ(peer.GetSectionValue("hardest URL"),
+ "http://www.google.com/s?q=%27bla%27"
+ "&a=%22%22&b=%28%3Ctag%3E%29&c=%2A%0D%0A%5C%5Cbla");
+}
+
+TEST(TemplateModifiers, ValidateImgSrcUrlCssEscape) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestValidateImgSrcUrlCssEscape", NULL);
+ dict.SetEscapedValue("easy http URL", "http://www.google.com",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_css_escape);
+ dict.SetEscapedValue("harder https URL",
+ "https://www.google.com/search?q=f&hl=en",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_css_escape);
+ dict.SetEscapedValue("javascript URL",
+ "javascript:alert(document.cookie)",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_css_escape);
+ dict.SetEscapedValue("relative URL", "/search?q=green flowers&hl=en",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_css_escape);
+ dict.SetEscapedValue("hardest URL", "http://www.google.com/s?q='bla'"
+ "&a=\"\"&b=(<tag>)&c=*\r\n\\\\bla",
+ GOOGLE_NAMESPACE::validate_img_src_url_and_css_escape);
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("easy http URL"),
+ "http://www.google.com");
+ EXPECT_STREQ(peer.GetSectionValue("harder https URL"),
+ "https://www.google.com/search?q=f&hl=en");
+ EXPECT_STREQ(peer.GetSectionValue("javascript URL"),
+ GOOGLE_NAMESPACE::ValidateUrl::kUnsafeImgSrcUrlReplacement);
+ EXPECT_STREQ(peer.GetSectionValue("relative URL"),
+ "/search?q=green flowers&hl=en");
+ EXPECT_STREQ(peer.GetSectionValue("hardest URL"),
+ "http://www.google.com/s?q=%27bla%27"
+ "&a=%22%22&b=%28%3Ctag%3E%29&c=%2A%0D%0A%5C%5Cbla");
+}
+
+TEST(TemplateModifiers, CleanseAttribute) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestCleanseAttribute", NULL);
+ dict.SetEscapedValue("easy attribute", "top",
+ GOOGLE_NAMESPACE::cleanse_attribute);
+ dict.SetEscapedValue("harder attribute", "foo & bar",
+ GOOGLE_NAMESPACE::cleanse_attribute);
+ dict.SetEscapedValue("hardest attribute",
+ "top onclick='alert(document.cookie)'",
+ GOOGLE_NAMESPACE::cleanse_attribute);
+ dict.SetEscapedValue("equal in middle", "foo = bar",
+ GOOGLE_NAMESPACE::cleanse_attribute);
+ dict.SetEscapedValue("leading equal", "=foo",
+ GOOGLE_NAMESPACE::cleanse_attribute);
+ dict.SetEscapedValue("trailing equal", "foo=",
+ GOOGLE_NAMESPACE::cleanse_attribute);
+ dict.SetEscapedValue("all equals", "===foo===bar===",
+ GOOGLE_NAMESPACE::cleanse_attribute);
+ dict.SetEscapedValue("just equals", "===",
+ GOOGLE_NAMESPACE::cleanse_attribute);
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("easy attribute"), "top");
+ EXPECT_STREQ(peer.GetSectionValue("harder attribute"), "foo___bar");
+ EXPECT_STREQ(peer.GetSectionValue("hardest attribute"),
+ "top_onclick=_alert_document.cookie__");
+
+ EXPECT_STREQ(peer.GetSectionValue("equal in middle"), "foo_=_bar");
+ EXPECT_STREQ(peer.GetSectionValue("leading equal"), "_foo");
+ EXPECT_STREQ(peer.GetSectionValue("trailing equal"), "foo_");
+ EXPECT_STREQ(peer.GetSectionValue("just equals"), "_=_");
+ EXPECT_STREQ(peer.GetSectionValue("all equals"), "_==foo===bar==_");
+}
+
+TEST(TemplateModifiers, CleanseCss) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestCleanseCss", NULL);
+ dict.SetEscapedValue("easy css", "top",
+ GOOGLE_NAMESPACE::cleanse_css);
+ dict.SetEscapedValue("harder css", "foo & bar",
+ GOOGLE_NAMESPACE::cleanse_css);
+ dict.SetEscapedValue("hardest css",
+ ";width:expression(document.cookie)",
+ GOOGLE_NAMESPACE::cleanse_css);
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("easy css"),
+ "top");
+ EXPECT_STREQ(peer.GetSectionValue("harder css"),
+ "foo bar");
+ EXPECT_STREQ(peer.GetSectionValue("hardest css"),
+ "widthexpressiondocument.cookie");
+}
+
+TEST(TemplateModifiers, JavascriptEscape) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestJavascriptEscape", NULL);
+ dict.SetEscapedValue("easy JS", "joo",
+ GOOGLE_NAMESPACE::javascript_escape);
+ dict.SetEscapedValue("harder JS", "f = 'joo';",
+ GOOGLE_NAMESPACE::javascript_escape);
+ dict.SetEscapedValue("hardest JS",
+ ("f = 'foo\f';\r\n\tprint \"\\&foo = \b\", \"foo\""),
+ GOOGLE_NAMESPACE::javascript_escape);
+ dict.SetEscapedValue("close script JS",
+ "//--></script><script>alert(123);</script>",
+ GOOGLE_NAMESPACE::javascript_escape);
+ dict.SetEscapedValue("unicode codepoints",
+ ("line1" "\xe2\x80\xa8" "line2" "\xe2\x80\xa9" "line3"
+ /* \u2027 */ "\xe2\x80\xa7"
+ /* \u202A */ "\xe2\x80\xaa"
+ /* malformed */ "\xe2" "\xe2\x80\xa8"
+ /* truncated */ "\xe2\x80"),
+ GOOGLE_NAMESPACE::javascript_escape);
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("easy JS"), "joo");
+ EXPECT_STREQ(peer.GetSectionValue("harder JS"), "f \\x3d \\x27joo\\x27;");
+ EXPECT_STREQ(peer.GetSectionValue("hardest JS"),
+ "f \\x3d \\x27foo\\f\\x27;\\r\\n\\tprint \\x22\\\\\\x26foo "
+ "\\x3d \\b\\x22, \\x22foo\\x22");
+ EXPECT_STREQ(peer.GetSectionValue("close script JS"),
+ "//--\\x3e\\x3c/script\\x3e\\x3cscript\\x3e"
+ "alert(123);\\x3c/script\\x3e");
+ EXPECT_STREQ(peer.GetSectionValue("unicode codepoints"),
+ "line1" "\\u2028" "line2" "\\u2029" "line3"
+ "\xe2\x80\xa7"
+ "\xe2\x80\xaa"
+ "\xe2" "\\u2028"
+ "\xe2\x80");
+}
+
+TEST(TemplateModifiers, JavascriptNumber) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestJavascriptNumber", NULL);
+ dict.SetEscapedValue("empty string", "",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("boolean true", "true",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("boolean false", "false",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("bad boolean 1", "tfalse",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("bad boolean 2", "tru",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("bad boolean 3", "truee",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("bad boolean 4", "invalid",
+ GOOGLE_NAMESPACE::javascript_number);
+
+ // Check that our string comparisons for booleans do not
+ // assume input is null terminated.
+ dict.SetEscapedValue("good boolean 5", GOOGLE_NAMESPACE::TemplateString("truee", 4),
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("bad boolean 6", GOOGLE_NAMESPACE::TemplateString("true", 3),
+ GOOGLE_NAMESPACE::javascript_number);
+
+ dict.SetEscapedValue("hex number 1", "0x123456789ABCDEF",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("hex number 2", "0X123456789ABCDEF",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("bad hex number 1", "0x123GAC",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("bad hex number 2", "0x",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("number zero", "0",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("invalid number", "A9",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("decimal zero", "0.0",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("octal number", "01234567",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("decimal number", "799.123",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("negative number", "-244",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("positive number", "+244",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("valid float 1", ".55",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("valid float 2", "8.55e-12",
+ GOOGLE_NAMESPACE::javascript_number);
+ dict.SetEscapedValue("invalid float", "8.55ABC",
+ GOOGLE_NAMESPACE::javascript_number);
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("empty string"), "");
+ EXPECT_STREQ(peer.GetSectionValue("boolean true"), "true");
+ EXPECT_STREQ(peer.GetSectionValue("boolean false"), "false");
+ EXPECT_STREQ(peer.GetSectionValue("bad boolean 1"), "null");
+ EXPECT_STREQ(peer.GetSectionValue("bad boolean 2"), "null");
+ EXPECT_STREQ(peer.GetSectionValue("bad boolean 3"), "null");
+ EXPECT_STREQ(peer.GetSectionValue("bad boolean 4"), "null");
+ EXPECT_STREQ(peer.GetSectionValue("good boolean 5"), "true");
+ EXPECT_STREQ(peer.GetSectionValue("bad boolean 6"), "null");
+ EXPECT_STREQ(peer.GetSectionValue("hex number 1"), "0x123456789ABCDEF");
+ EXPECT_STREQ(peer.GetSectionValue("hex number 2"), "0X123456789ABCDEF");
+ EXPECT_STREQ(peer.GetSectionValue("bad hex number 1"), "null");
+ EXPECT_STREQ(peer.GetSectionValue("bad hex number 2"), "null");
+ EXPECT_STREQ(peer.GetSectionValue("number zero"), "0");
+ EXPECT_STREQ(peer.GetSectionValue("invalid number"), "null");
+ EXPECT_STREQ(peer.GetSectionValue("decimal zero"), "0.0");
+ EXPECT_STREQ(peer.GetSectionValue("octal number"), "01234567");
+ EXPECT_STREQ(peer.GetSectionValue("decimal number"), "799.123");
+ EXPECT_STREQ(peer.GetSectionValue("negative number"), "-244");
+ EXPECT_STREQ(peer.GetSectionValue("positive number"), "+244");
+ EXPECT_STREQ(peer.GetSectionValue("valid float 1"), ".55");
+ EXPECT_STREQ(peer.GetSectionValue("valid float 2"), "8.55e-12");
+ EXPECT_STREQ(peer.GetSectionValue("invalid float"), "null");
+}
+
+TEST(TemplateModifiers, JsonEscape) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestJsonEscape", NULL);
+ dict.SetEscapedValue("easy JSON", "joo",
+ GOOGLE_NAMESPACE::json_escape);
+ dict.SetEscapedValue("harder JSON", "f = \"joo\"; e = 'joo';",
+ GOOGLE_NAMESPACE::json_escape);
+ dict.SetEscapedValue("hardest JSON",
+ "f = 'foo<>';\r\n\t\fprint \"\\&foo = /\b\", \"foo\"",
+ GOOGLE_NAMESPACE::json_escape);
+ dict.SetEscapedValue("html in JSON", "<html> </html>",
+ GOOGLE_NAMESPACE::json_escape);
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("easy JSON"), "joo");
+ EXPECT_STREQ(peer.GetSectionValue("harder JSON"), "f = \\\"joo\\\"; "
+ "e = 'joo';");
+ EXPECT_STREQ(peer.GetSectionValue("html in JSON"),
+ "\\u003Chtml\\u003E\\u0026nbsp;\\u003C\\/html\\u003E");
+ // There's a bug in MSVC 7.1 where you can't pass a literal string
+ // with more than one \" in it to a macro (!) -- see
+ // http://marc.info/?t=110853662500001&r=1&w=2
+ // We work around this by assigning the string to a variable first.
+ const char* expected = ("f = 'foo\\u003C\\u003E';\\r\\n\\t\\fprint \\\""
+ "\\\\\\u0026foo = \\/\\b\\\", \\\"foo\\\"");
+ EXPECT_STREQ(peer.GetSectionValue("hardest JSON"), expected);
+}
+
+TEST(TemplateModifiers, UrlQueryEscape) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestUrlQueryEscape", NULL);
+ // The first three tests do not need escaping.
+ dict.SetEscapedValue("query escape 0", "",
+ GOOGLE_NAMESPACE::url_query_escape);
+ dict.SetEscapedValue("query escape 1", "noop",
+ GOOGLE_NAMESPACE::url_query_escape);
+ dict.SetEscapedValue("query escape 2",
+ "0123456789abcdefghjijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ.-_*/~!(),",
+ GOOGLE_NAMESPACE::url_query_escape);
+ dict.SetEscapedValue("query escape 3", " ?a=b;c#d ",
+ GOOGLE_NAMESPACE::url_query_escape);
+ dict.SetEscapedValue("query escape 4", "#$%&+<=>?@[\\]^`{|}",
+ GOOGLE_NAMESPACE::url_query_escape);
+ dict.SetEscapedValue("query escape 5", "\xDE\xAD\xCA\xFE",
+ GOOGLE_NAMESPACE::url_query_escape);
+ dict.SetEscapedValue("query escape 6", "\"':",
+ GOOGLE_NAMESPACE::url_query_escape);
+
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&dict); // peer can look inside dicts
+ EXPECT_STREQ(peer.GetSectionValue("query escape 0"), "");
+ EXPECT_STREQ(peer.GetSectionValue("query escape 1"), "noop");
+ EXPECT_STREQ(peer.GetSectionValue("query escape 2"),
+ "0123456789abcdefghjijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ.-_*/~!(),");
+ EXPECT_STREQ(peer.GetSectionValue("query escape 3"), "+%3Fa%3Db%3Bc%23d+");
+ EXPECT_STREQ(peer.GetSectionValue("query escape 4"),
+ "%23%24%25%26%2B%3C%3D%3E%3F%40%5B%5C%5D%5E%60%7B%7C%7D");
+ EXPECT_STREQ(peer.GetSectionValue("query escape 5"), "%DE%AD%CA%FE");
+ EXPECT_STREQ(peer.GetSectionValue("query escape 6"), "%22%27%3A");
+}
+
+TEST(TemplateModifiers, PrefixLine) {
+ GOOGLE_NAMESPACE::TemplateDictionary dict("TestPrefixLine", NULL);
+ // These don't escape: we don't put the prefix before the first line
+ EXPECT_STREQ(GOOGLE_NAMESPACE::prefix_line("pt 1", " ").c_str(),
+ "pt 1");
+ EXPECT_STREQ(GOOGLE_NAMESPACE::prefix_line("pt 1", "::").c_str(),
+ "pt 1");
+
+ EXPECT_STREQ(GOOGLE_NAMESPACE::prefix_line("pt 1\npt 2", ":").c_str(),
+ "pt 1\n:pt 2");
+ EXPECT_STREQ(GOOGLE_NAMESPACE::prefix_line("pt 1\npt 2", " ").c_str(),
+ "pt 1\n pt 2");
+ EXPECT_STREQ(GOOGLE_NAMESPACE::prefix_line("pt 1\npt 2", "\n").c_str(),
+ "pt 1\n\npt 2");
+ EXPECT_STREQ(GOOGLE_NAMESPACE::prefix_line("pt 1\npt 2\n", " ").c_str(),
+ "pt 1\n pt 2\n ");
+
+ EXPECT_STREQ(GOOGLE_NAMESPACE::prefix_line("pt 1\rpt 2\n", ":").c_str(),
+ "pt 1\r:pt 2\n:");
+ EXPECT_STREQ(GOOGLE_NAMESPACE::prefix_line("pt 1\npt 2\r", ":").c_str(),
+ "pt 1\n:pt 2\r:");
+ EXPECT_STREQ(GOOGLE_NAMESPACE::prefix_line("pt 1\r\npt 2\r", ":").c_str(),
+ "pt 1\r\n:pt 2\r:");
+}
+
+TEST(TemplateModifiers, FindModifier) {
+ const GOOGLE_NAMESPACE::ModifierInfo* info;
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("html_escape", 11, "", 0)));
+ EXPECT_EQ(info->modifier, &GOOGLE_NAMESPACE::html_escape);
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("h", 1, "", 0)));
+ EXPECT_EQ(info->modifier, &GOOGLE_NAMESPACE::html_escape);
+
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("html_escape_with_arg", 20,
+ "=pre", 4)));
+ EXPECT_EQ(info->modifier, &GOOGLE_NAMESPACE::pre_escape);
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("H", 1, "=pre", 4)));
+ EXPECT_EQ(info->modifier, &GOOGLE_NAMESPACE::pre_escape);
+
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("javascript_escape_with_arg",
+ 26, "=number", 7)));
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("J", 1, "=number", 7)));
+ EXPECT_EQ(info->modifier, &GOOGLE_NAMESPACE::javascript_number);
+
+ // html_escape_with_arg doesn't have a default value, so these should fail.
+ EXPECT_FALSE(GOOGLE_NAMESPACE::FindModifier("H", 1, "=pre", 2)); // "=p"
+ EXPECT_FALSE(GOOGLE_NAMESPACE::FindModifier("H", 1, "=pree", 5));
+ EXPECT_FALSE(GOOGLE_NAMESPACE::FindModifier("H", 1, "=notpresent", 11));
+
+ // If we don't have a modifier-value when we ought, we should fail.
+ EXPECT_FALSE(GOOGLE_NAMESPACE::FindModifier("html_escape", 11, "=p", 2));
+ EXPECT_FALSE(GOOGLE_NAMESPACE::FindModifier("h", 1, "=p", 2));
+
+ EXPECT_FALSE(GOOGLE_NAMESPACE::FindModifier("html_escape_with_arg", 20,
+ "", 0));
+ EXPECT_FALSE(GOOGLE_NAMESPACE::FindModifier("H", 1, "", 0));
+
+ // Test with added modifiers as well.
+ GOOGLE_NAMESPACE::NullModifier foo_modifier1;
+ GOOGLE_NAMESPACE::NullModifier foo_modifier2;
+ GOOGLE_NAMESPACE::NullModifier foo_modifier3;
+ GOOGLE_NAMESPACE::NullModifier foo_modifier4;
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-test", &foo_modifier1));
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-test-arg=", &foo_modifier2));
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-test-arg=h", &foo_modifier3));
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-test-arg=json", &foo_modifier4));
+
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-test", 6, "", 0)));
+ EXPECT_TRUE(info->is_registered);
+ EXPECT_EQ(info->modifier, &foo_modifier1);
+ EXPECT_EQ(info->xss_class, GOOGLE_NAMESPACE::XSS_UNIQUE);
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-test", 6, "=h", 2)));
+ EXPECT_FALSE(info->is_registered);
+ // This tests default values
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-test-arg", 10, "=p", 2)));
+ EXPECT_TRUE(info->is_registered);
+ EXPECT_EQ(info->modifier, &foo_modifier2);
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-test-arg", 10, "=h", 2)));
+ EXPECT_TRUE(info->is_registered);
+ EXPECT_EQ(info->modifier, &foo_modifier3);
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-test-arg", 10, "=html", 5)));
+ EXPECT_TRUE(info->is_registered);
+ EXPECT_EQ(info->modifier, &foo_modifier2);
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-test-arg", 10, "=json", 5)));
+ EXPECT_TRUE(info->is_registered);
+ EXPECT_EQ(info->modifier, &foo_modifier4);
+ // The value is required to start with an '=' to match the
+ // specialization. If it doesn't, it will match the default.
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-test-arg", 10, "json", 4)));
+ EXPECT_TRUE(info->is_registered);
+ EXPECT_EQ(info->modifier, &foo_modifier2);
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-test-arg", 10,
+ "=jsonnabbe", 5)));
+ EXPECT_TRUE(info->is_registered);
+ EXPECT_EQ(info->modifier, &foo_modifier4);
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-test-arg", 10,
+ "=jsonnabbe", 6)));
+ EXPECT_TRUE(info->is_registered);
+ EXPECT_EQ(info->modifier, &foo_modifier2);
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-test-arg", 10,
+ "=jsonnabbe", 4)));
+ EXPECT_TRUE(info->is_registered);
+ EXPECT_EQ(info->modifier, &foo_modifier2);
+
+ // If we try to find an x- modifier that wasn't added, we should get
+ // a legit but "unknown" modifier back.
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-foo", 5, "", 0)));
+ EXPECT_FALSE(info->is_registered);
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-bar", 5, "=p", 2)));
+ EXPECT_FALSE(info->is_registered);
+
+ // Basic test with added XssSafe modifier.
+ GOOGLE_NAMESPACE::NullModifier foo_modifier5;
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddXssSafeModifier("x-safetest",
+ &foo_modifier5));
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-safetest", 10, "", 0)));
+ EXPECT_TRUE(info->is_registered);
+ EXPECT_EQ(info->xss_class, GOOGLE_NAMESPACE::XSS_SAFE);
+ EXPECT_EQ(info->modifier, &foo_modifier5);
+}
+
+TEST(TemplateModifiers, AddModifier) {
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-atest", &GOOGLE_NAMESPACE::html_escape));
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-atest-arg=", &GOOGLE_NAMESPACE::html_escape));
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-atest-arg=h", &GOOGLE_NAMESPACE::html_escape));
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-atest-arg=html", &GOOGLE_NAMESPACE::html_escape));
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-atest-arg=json", &GOOGLE_NAMESPACE::json_escape));
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-atest-arg=j", &GOOGLE_NAMESPACE::json_escape));
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-atest-arg=J", &GOOGLE_NAMESPACE::json_escape));
+
+ // Make sure AddModifier fails with an invalid name.
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddModifier("test", &GOOGLE_NAMESPACE::html_escape));
+
+ // Make sure AddModifier fails with a duplicate name.
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddModifier("x-atest", &GOOGLE_NAMESPACE::html_escape));
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddModifier("x-atest-arg=", &GOOGLE_NAMESPACE::html_escape));
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddModifier("x-atest-arg=h", &GOOGLE_NAMESPACE::html_escape));
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddModifier("x-atest-arg=html", &GOOGLE_NAMESPACE::html_escape));
+
+ const GOOGLE_NAMESPACE::ModifierInfo* info;
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-atest", 7, "", 0)));
+ EXPECT_FALSE(info->modval_required);
+
+ // Make sure we can still add a modifier after having already
+ // searched for it.
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-foo", 5, "", 0)));
+ EXPECT_FALSE(info->is_registered);
+
+ GOOGLE_NAMESPACE::NullModifier foo_modifier;
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-foo", &foo_modifier));
+ EXPECT_TRUE((info = GOOGLE_NAMESPACE::FindModifier("x-foo", 5, "", 0)));
+ EXPECT_EQ(info->modifier, &foo_modifier);
+}
+
+TEST(TemplateModifiers, AddXssSafeModifier) {
+ // For shorter lines.
+ const GOOGLE_NAMESPACE::TemplateModifier* esc_fn =
+ &GOOGLE_NAMESPACE::html_escape;
+
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddXssSafeModifier("x-asafetest", esc_fn));
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddXssSafeModifier("x-asafetest-arg=", esc_fn));
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddXssSafeModifier("x-asafetest-arg=h", esc_fn));
+
+ // Make sure AddXssSafeModifier fails with an invalid name.
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddXssSafeModifier("test", esc_fn));
+
+ // Make sure AddXssSafeModifier fails with a duplicate name.
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddXssSafeModifier("x-asafetest", esc_fn));
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddXssSafeModifier("x-asafetest-arg=", esc_fn));
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddXssSafeModifier("x-asafetest-arg=h",
+ esc_fn));
+
+ // Make sure AddXssSafeModifier fails if the same modifier was
+ // previously added via AddModifier.
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-safetest2", esc_fn));
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-safetest2-arg=", esc_fn));
+ EXPECT_TRUE(GOOGLE_NAMESPACE::AddModifier("x-safetest2-arg=h", esc_fn));
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddXssSafeModifier("x-safetest2", esc_fn));
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddXssSafeModifier("x-safetest2-arg=", esc_fn));
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddXssSafeModifier("x-safetest2-arg=h", esc_fn));
+
+ // and vice versa.
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddModifier("x-asafetest", esc_fn));
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddModifier("x-asafetest-arg=", esc_fn));
+ EXPECT_FALSE(GOOGLE_NAMESPACE::AddModifier("x-asafetest-arg=h", esc_fn));
+}
+
+// Helper function. Determines whether the Modifier specified by
+// alt_modname/alt_modval is a safe XSS alternative to
+// the Modifier specified by modname/modval.
+static bool CheckXSSAlternative(const string& modname, const string& modval,
+ const string& alt_modname,
+ const string& alt_modval) {
+ const GOOGLE_NAMESPACE::ModifierInfo *mod, *alt_mod;
+ mod = GOOGLE_NAMESPACE::FindModifier(modname.c_str(), modname.length(),
+ modval.c_str(), modval.length());
+ alt_mod = GOOGLE_NAMESPACE::FindModifier(alt_modname.c_str(),
+ alt_modname.length(),
+ alt_modval.c_str(),
+ alt_modval.length());
+ EXPECT_TRUE(mod != NULL && alt_mod != NULL);
+ return IsSafeXSSAlternative(*mod, *alt_mod);
+}
+
+TEST(TemplateModifiers, XSSAlternatives) {
+ // A modifier is always a safe replacement to itself, even non built-in.
+ EXPECT_TRUE(CheckXSSAlternative("h", "", "h", ""));
+ EXPECT_TRUE(CheckXSSAlternative("url_escape_with_arg", "=javascript",
+ "url_escape_with_arg", "=javascript"));
+ EXPECT_TRUE(CheckXSSAlternative("x-bla", "", "x-bla", ""));
+
+ // A built-in modifier is always a safe replacement to
+ // another with the same function.
+ EXPECT_TRUE(CheckXSSAlternative("H", "=pre", "p", ""));
+ EXPECT_TRUE(CheckXSSAlternative("url_query_escape", "",
+ "url_escape_with_arg", "=query"));
+
+ // H=(pre|snippet|attribute), p, u, U=query, U=html (a.k.a H=url)
+ // and I=html are all alternatives to h.
+ EXPECT_TRUE(CheckXSSAlternative("h", "", "H", "=pre"));
+ EXPECT_TRUE(CheckXSSAlternative("h", "", "H", "=snippet"));
+ EXPECT_TRUE(CheckXSSAlternative("h", "", "H", "=attribute"));
+ EXPECT_TRUE(CheckXSSAlternative("h", "", "H", "=url"));
+ EXPECT_TRUE(CheckXSSAlternative("h", "", "p", ""));
+ EXPECT_TRUE(CheckXSSAlternative("h", "", "u", ""));
+ EXPECT_TRUE(CheckXSSAlternative("h", "", "U", "=query"));
+ EXPECT_TRUE(CheckXSSAlternative("h", "", "U", "=html"));
+ EXPECT_TRUE(CheckXSSAlternative("h", "", "I", "=html"));
+
+ // But h is not an alternative to H=attribute and I=html,
+ // nor is json_escape an alternative to h.
+ EXPECT_FALSE(CheckXSSAlternative("H", "=attribute", "h", ""));
+ EXPECT_FALSE(CheckXSSAlternative("I", "=html", "h", ""));
+ EXPECT_FALSE(CheckXSSAlternative("h", "", "json_escape", ""));
+
+ // H=snippet and H=attribute are alternatives to H=pre
+ // But H=pre is not an alternative to H=attribute.
+ EXPECT_TRUE(CheckXSSAlternative("H", "=pre", "H", "=snippet"));
+ EXPECT_TRUE(CheckXSSAlternative("H", "=pre", "H", "=attribute"));
+ EXPECT_FALSE(CheckXSSAlternative("H", "=attribute", "H", "=pre"));
+
+ // javascript_escape is an alternative to json_escape and vice versa
+ EXPECT_TRUE(CheckXSSAlternative("json_escape", "", "javascript_escape", ""));
+ EXPECT_TRUE(CheckXSSAlternative("javascript_escape", "", "json_escape", ""));
+
+ // I=javascript is an alternative to :j and :U=javascript but not
+ // vice versa
+ EXPECT_TRUE(CheckXSSAlternative("javascript_escape", "", "I", "=javascript"));
+ EXPECT_TRUE(CheckXSSAlternative("U", "=javascript", "I", "=javascript"));
+ EXPECT_FALSE(CheckXSSAlternative("I", "=javascript", "javascript_escape", ""));
+ EXPECT_FALSE(CheckXSSAlternative("I", "=javascript", "U", "=javascript"));
+
+ // U=css and I=css are alternatives to :c but not vice versa
+ EXPECT_TRUE(CheckXSSAlternative("c", "", "U", "=css"));
+ EXPECT_TRUE(CheckXSSAlternative("c", "", "I", "=css"));
+ EXPECT_FALSE(CheckXSSAlternative("U", "=css", "c", ""));
+ EXPECT_FALSE(CheckXSSAlternative("I", "=css", "c", ""));
+
+ // Extended modifier should not match any other except itself.
+ EXPECT_FALSE(CheckXSSAlternative("x-bla", "", "x-foo", ""));
+}
+
+// This is a basic sanity check for the GetDefaultModifierForXXX() functions.
+// More testing happens in AutoEscaper code which uses them.
+TEST(TemplateModifiers, DefaultModifiersForContext) {
+ const GOOGLE_NAMESPACE::ModifierAndValue* modval;
+ string print_mods;
+
+ const vector<const GOOGLE_NAMESPACE::ModifierAndValue*> modvals_html =
+ GOOGLE_NAMESPACE::GetDefaultModifierForHtml();
+ EXPECT_EQ(1, modvals_html.size());
+ print_mods = GOOGLE_NAMESPACE::PrettyPrintModifiers(modvals_html, ";");
+ EXPECT_STREQ(":h", print_mods.c_str());
+ modval = modvals_html.front();
+ EXPECT_EQ(modval->modifier_info->modifier, &GOOGLE_NAMESPACE::html_escape);
+
+ const vector<const GOOGLE_NAMESPACE::ModifierAndValue*> modvals_js =
+ GOOGLE_NAMESPACE::GetDefaultModifierForJs();
+ EXPECT_EQ(1, modvals_js.size());
+ print_mods = GOOGLE_NAMESPACE::PrettyPrintModifiers(modvals_js, ";");
+ EXPECT_STREQ(":j", print_mods.c_str());
+ modval = modvals_js.front();
+ EXPECT_EQ(modval->modifier_info->modifier, &GOOGLE_NAMESPACE::javascript_escape);
+
+ const vector<const GOOGLE_NAMESPACE::ModifierAndValue*> modvals_xml =
+ GOOGLE_NAMESPACE::GetDefaultModifierForXml();
+ EXPECT_EQ(1, modvals_xml.size());
+ print_mods = GOOGLE_NAMESPACE::PrettyPrintModifiers(modvals_xml, ";");
+ EXPECT_STREQ(":xml_escape", print_mods.c_str());
+ modval = modvals_xml.front();
+ EXPECT_EQ(modval->modifier_info->modifier, &GOOGLE_NAMESPACE::xml_escape);
+
+ const vector<const GOOGLE_NAMESPACE::ModifierAndValue*> modvals_json =
+ GOOGLE_NAMESPACE::GetDefaultModifierForJson();
+ EXPECT_EQ(1, modvals_json.size());
+ print_mods = GOOGLE_NAMESPACE::PrettyPrintModifiers(modvals_json, ";");
+ EXPECT_STREQ(":j", print_mods.c_str());
+ modval = modvals_json.front();
+ EXPECT_EQ(modval->modifier_info->modifier, &GOOGLE_NAMESPACE::javascript_escape);
+}
+
+// This tests for a bug we had where we were returning a pointer into
+// a vector that became invalid after the vector was resized.
+TEST(TemplateModifiers, ManyUnknownModifiers) {
+ string tpl_str1 = "{{from_name:x-test=4}} sent you a message";
+ const GOOGLE_NAMESPACE::Template* tpl1 = GOOGLE_NAMESPACE::Template::StringToTemplate(
+ tpl_str1, GOOGLE_NAMESPACE::DO_NOT_STRIP);
+
+ string tpl_str2 = "{{from_name:x-test=4}} sent you a message:";
+ string expected_out = "me sent you a message:";
+ // All those new unknown varnames should cause g_unknown_modifiers
+ // to resize. 1111 is an arbitrary large number.
+ for (int i = 0; i < 1111; i++) {
+ tpl_str2.append("{{from_name:x-" + string(i, 't') + "=4}}");
+ expected_out.append("me");
+ }
+ const GOOGLE_NAMESPACE::Template* tpl2 = GOOGLE_NAMESPACE::Template::StringToTemplate(
+ tpl_str2, GOOGLE_NAMESPACE::DO_NOT_STRIP);
+
+ // Even after the resizing, the references to the unknown
+ // modifiers in tpl1 and tpl2 should still be valid.
+ GOOGLE_NAMESPACE::TemplateDictionary dict("test");
+ dict.SetValue("from_name", "me");
+ string out;
+
+ out.clear();
+ tpl1->Expand(&out, &dict);
+ EXPECT_STREQ("me sent you a message", out.c_str());
+ delete tpl1;
+
+ out.clear();
+ tpl2->Expand(&out, &dict);
+ EXPECT_STREQ(expected_out.c_str(), out.c_str());
+ delete tpl2;
+}
+
+
+int main(int argc, char** argv) {
+
+ return RUN_ALL_TESTS();
+}
diff --git a/src/tests/template_regtest.cc b/src/tests/template_regtest.cc
new file mode 100644
index 0000000..bc1297f
--- /dev/null
+++ b/src/tests/template_regtest.cc
@@ -0,0 +1,498 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// This test consists of creating a pretty complicated
+// dictionary, and then applying it to a bunch of templates
+// (specified in the testdata dir) and making sure the output
+// is as expected. We actually support testing with multiple
+// dictionaries. We glob the testdat dir, so it's possible to
+// add a new test just by creating a template and expected-output
+// file in the testdata directory. Files are named
+// template_unittest_testXX.in
+// template_unittest_testXX_dictYY.out
+// YY should start with 01 (not 00). XX can be an arbitrary string.
+
+#include "config_for_unittests.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#ifdef HAVE_DIRENT_H
+# include <dirent.h> // for opendir() etc
+#else
+# define dirent direct
+# ifdef HAVE_SYS_NDIR_H
+# include <sys/ndir.h>
+# endif
+# ifdef HAVE_SYS_DIR_H
+# include <sys/dir.h>
+# endif
+# ifdef HAVE_NDIR_H
+# include <ndir.h>
+# endif
+#endif // for opendir() etc
+#include <algorithm> // for sort() and stable_partition
+#include <string>
+#include <vector>
+#include <ctemplate/template.h>
+#include <ctemplate/template_dictionary.h>
+#include <ctemplate/template_modifiers.h>
+#include <ctemplate/template_pathops.h>
+#include "base/util.h"
+
+using std::vector;
+using std::string;
+using std::sort;
+
+using GOOGLE_NAMESPACE::DO_NOT_STRIP;
+using GOOGLE_NAMESPACE::PerExpandData;
+using GOOGLE_NAMESPACE::STRIP_BLANK_LINES;
+using GOOGLE_NAMESPACE::STRIP_WHITESPACE;
+using GOOGLE_NAMESPACE::TC_HTML;
+using GOOGLE_NAMESPACE::TC_MANUAL;
+using GOOGLE_NAMESPACE::Template;
+using GOOGLE_NAMESPACE::TemplateDictionary;
+
+#define ASSERT(cond) do { \
+ if (!(cond)) { \
+ printf("%s: %d: ASSERT FAILED: %s\n", __FILE__, __LINE__, \
+ #cond); \
+ assert(cond); \
+ exit(1); \
+ } \
+} while (0)
+
+#define ASSERT_STRING_EQ(a, b) do { \
+ assert(StringEq(a, b, __FILE__, __LINE__, #a, #b)); \
+} while (0)
+
+bool StringEq(const string& a, const string& b,
+ const char* filename, int lineno,
+ const char* namea, const char* nameb) {
+ if (a != b) {
+ printf("%s: %d: ASSERT FAILED: %s == %s:\n", filename, lineno,
+ namea, nameb);
+ printf("EXPECTED:\n%s\n", a.c_str());
+ printf("ACTUAL:\n%s\n", b.c_str());
+ return false;
+ }
+ return true;
+}
+
+#define ASSERT_STREQ_EXCEPT(a, b, except) ASSERT(StreqExcept(a, b, except))
+#define ASSERT_STREQ(a, b) ASSERT(strcmp(a, b) == 0)
+#define ASSERT_NOT_STREQ(a, b) ASSERT(strcmp(a, b) != 0)
+
+// First, (conceptually) remove all chars in "except" from both a and b.
+// Then return true iff munged_a == munged_b.
+bool StreqExcept(const char* a, const char* b, const char* except) {
+ const char* pa = a, *pb = b;
+ const size_t exceptlen = strlen(except);
+ while (1) {
+ // Use memchr instead of strchr because strchr(foo, '\0') always fails
+ while (memchr(except, *pa, exceptlen)) pa++; // ignore "except" chars in a
+ while (memchr(except, *pb, exceptlen)) pb++; // ignore "except" chars in b
+ if ((*pa == '\0') && (*pb == '\0'))
+ return true;
+ if (*pa++ != *pb++) // includes case where one is at \0
+ return false;
+ }
+}
+
+
+RegisterTemplateFilename(VALID1_FN, "template_unittest_test_valid1.in");
+RegisterTemplateFilename(INVALID1_FN, "template_unittest_test_invalid1.in");
+RegisterTemplateFilename(INVALID2_FN, "template_unittest_test_invalid2.in");
+RegisterTemplateFilename(NONEXISTENT_FN, "nonexistent__file.tpl");
+
+struct Testdata {
+ string input_template_name; // the filename of the input template
+ string input_template; // the contents of the input template
+ vector<string> output; // entry i is the output of using dict i.
+ vector<string> annotated_output; // used to test annotations
+};
+
+static void ReadToString(const string& filename, string* s) {
+ const int bufsize = 8092;
+ char buffer[bufsize];
+ size_t n;
+ FILE* fp = fopen(filename.c_str(), "rb");
+ if (!fp) PFATAL(filename.c_str());
+ while ((n=fread(buffer, 1, bufsize, fp)) > 0) {
+ if (ferror(fp)) PFATAL(filename.c_str());
+ s->append(string(buffer, n));
+ }
+ fclose(fp);
+}
+
+static bool EndsWith(const string& s, const string& suffix) {
+ return (s.length() >= suffix.length() &&
+ s.substr(s.length() - suffix.length()) == suffix);
+}
+
+#ifndef USING_PORT_CC /* windows defines its own version in windows/port.cc */
+static void GetNamelist(const char* testdata_dir, vector<string>* namelist) {
+ DIR* dir = opendir(testdata_dir);
+ struct dirent* dir_entry;
+ if (dir == NULL) PFATAL("opendir");
+ while ( (dir_entry=readdir(dir)) != NULL ) {
+ if (!strncmp(dir_entry->d_name, "template_unittest_test",
+ sizeof("template_unittest_test")-1)) {
+ namelist->push_back(dir_entry->d_name); // collect test files
+ }
+ }
+ if (closedir(dir) != 0) PFATAL("closedir");
+}
+#endif
+
+// expensive to resize this vector and copy it and all, but that's ok
+static vector<Testdata> ReadDataFiles(const char* testdata_dir) {
+ vector<Testdata> retval;
+ vector<string> namelist;
+
+ GetNamelist(testdata_dir, &namelist);
+ sort(namelist.begin(), namelist.end());
+
+ for (vector<string>::const_iterator it = namelist.begin();
+ it != namelist.end(); ++it) {
+ vector<string>* new_output = NULL;
+ const string fname = string(testdata_dir) + "/" + it->c_str();
+ if (EndsWith(fname, ".in")) {
+ retval.push_back(Testdata());
+ retval.back().input_template_name = *it;
+ ReadToString(fname, &retval.back().input_template);
+ } else if (EndsWith(fname, ".out")) {
+ new_output = &retval.back().output;
+ } else if (EndsWith(fname, ".anno_out")) {
+ new_output = &retval.back().annotated_output;
+ } else {
+ ASSERT(false); // Filename must end in either .in, .out, or .anno_out.
+ }
+ if (new_output) { // the .out and .anno_out cases
+ ASSERT(!retval.empty()); // an .out without any corresponding .in?
+ ASSERT(it->length() > retval.back().input_template_name.length() + 4);
+ // input file is foo.in, and output is foo_dictYY.out. This gets to YY.
+ const char* dictnum_pos = (it->c_str() +
+ retval.back().input_template_name.length() + 2);
+ int dictnum = atoi32(dictnum_pos); // just ignore chars after the YY
+ ASSERT(dictnum); // dictnums should start with 01
+ while (new_output->size() <
+ static_cast<vector<string>::size_type>(dictnum))
+ new_output->push_back(string());
+ ReadToString(fname, &((*new_output)[dictnum-1]));
+ }
+ }
+ return retval;
+}
+
+
+// Creates a complicated dictionary, using every TemplateDictionary
+// command under the sun. Returns a pointer to the new dictionary-root.
+// Should be freed by the caller.
+static TemplateDictionary* MakeDict1() {
+ TemplateDictionary* dict = new TemplateDictionary("dict1", NULL);
+ dict->SetFilename("just used for debugging, so doesn't matter.txt");
+
+ // --- These are used by template_unittest_test_simple.in
+ dict->SetValue("HEAD", " This is the head ");
+ // We leave BODY undefined, to make sure that expansion works properly.
+
+ // --- These are used by template_unittest_test_footer.in
+ TemplateDictionary* fbt = dict->AddSectionDictionary("FOOTER_BAR_TEXT");
+ fbt->SetValue("BODY", "Should never be shown"); // this is part of simple
+ fbt->SetEscapedValue("HOME_LINK", "<b>Time to go home!</b>",
+ GOOGLE_NAMESPACE::html_escape);
+ // Note: you should never have code like this in real life! The <b>
+ // and </b> should be part of the template proper.
+ fbt->SetFormattedValue("ADVERTISE_LINK", "<b>Be advertiser #%d</b>", 2);
+ fbt->SetValue("ABOUT_GOOGLE_LINK", "<A HREF=/>About Google!</A>");
+
+ // We show PROMO_LICENSING_SECTION in the main dict, even though
+ // it's defined in the fbt subsection. This will still work: section
+ // showing goes to the parent dict if not found in the current dict.
+ dict->ShowSection("PROMO_LICENSING_SECTION");
+ dict->SetValue("PROMO_LICENSING_LINK", "<A HREF='foo'>");
+
+ // We don't show the TRIM_LINE section, so these vars shouldn't be seen
+ dict->SetValue("TRIM_LINE_COLOR", "Who cares?");
+ dict->SetIntValue("TRIM_LINE_HEIGHT", 10);
+
+ dict->SetIntValue("MODIFIED_BY_GOOGLE", 2005);
+ dict->SetValue("MSG_copyright", "© Google Inc. (all rights reserved)");
+ // We don't set ODP_ATTRIBUTION, so this include is ignored.
+
+ dict->ShowSection("CLOSING_DIV_SECTION");
+
+ // We won't set any of the includes that follow, just to keep things simple
+
+ // First, call SetValueAndShowSection on a non-existence section, should noop
+ dict->SetValueAndShowSection("LATENCY_PREFETCH_URL", "/huh?",
+ "UNUSED_SECTION_NAME");
+ // Now try the real URL
+ dict->SetValueAndShowSection("LATENCY_PREFETCH_URL", string("/latency"),
+ "LATENCY_PREFETCH");
+
+ // JAVASCRIPT_FOOTER_SECTION was meant to be either shown or hidden, but
+ // hey, let's try showing it several times, each with a different include.
+ // And let's include each one several times.
+ TemplateDictionary* jfs1 = dict->AddSectionDictionary(
+ "JAVASCRIPT_FOOTER_SECTION");
+ // This first dictionary should have an empty HEAD and BODY
+ TemplateDictionary* inc1a = jfs1->AddIncludeDictionary("FAST_NEXT_JAVASCRIPT");
+ inc1a->SetFilename("template_unittest_test_simple.in");
+ // For the second dict, let's set an illegal filename: should be ignored
+ TemplateDictionary* inc1b = jfs1->AddIncludeDictionary("FAST_NEXT_JAVASCRIPT");
+ inc1b->SetFilename(INVALID1_FN);
+ // For the third dict, let's do the same as the first, but with a HEAD
+ TemplateDictionary* inc1c = jfs1->AddIncludeDictionary("FAST_NEXT_JAVASCRIPT");
+ inc1c->SetFilename("template_unittest_test_simple.in");
+ inc1c->SetValue("HEAD", "head");
+
+ // Let's expand the section again with two different includes, and again a
+ // third template not meant to be expanded (in this case, don't set filename)
+ TemplateDictionary* jfs2 = dict->AddSectionDictionary(
+ "JAVASCRIPT_FOOTER_SECTION");
+ TemplateDictionary* inc2a = jfs2->AddIncludeDictionary("FAST_NEXT_JAVASCRIPT");
+ inc2a->SetFilename("template_unittest_test_simple.in");
+ inc2a->SetValue("HEAD", "include-head");
+ inc2a->SetEscapedFormattedValue("BODY", GOOGLE_NAMESPACE::html_escape,
+ "<b>%s</b>: %.4f", "<A HREF=/>", 1.0/3);
+ inc2a->SetValue("BI_NEWLINE", ""); // override the global default
+ TemplateDictionary* inc2b = jfs2->AddIncludeDictionary("FAST_NEXT_JAVASCRIPT");
+ inc2b->SetFilename("template_unittest_test_html.in");
+ inc2b->SetValue("HEAD", "should be ignored");
+ jfs2->AddIncludeDictionary("FAST_NEXT_JAVASCRIPT"); // ignored: no filename
+
+ // --- These are used by template_unittest_test_html.in
+
+ // This should returns in NO_MOUSEOVER_FUNCTIONS remaining hidden
+ dict->SetValueAndShowSection("DUMMY", "", "NO_MOUSEOVER_FUNCTIONS");
+
+ dict->ShowSection("MOUSEOVER_FUNCTIONS");
+ TemplateDictionary* foo = dict->AddIncludeDictionary("MOUSEOVER_JAVASCRIPT");
+ foo->SetFilename(string("not_a_template"));
+ foo->SetValue("BI_NEWLINE", "not gonna matter");
+
+ dict->SetEscapedValue("GOTO_MESSAGE", "print \"Go home\"",
+ GOOGLE_NAMESPACE::javascript_escape);
+
+ dict->SetEscapedValue("UPDATE", "monday & tuesday",
+ GOOGLE_NAMESPACE::html_escape);
+ dict->ShowSection("UPDATE_SECTION");
+
+ dict->SetValue("ALIGNMENT", "\"right\""); // all results sections see this
+ for (int i = 0; i < 3; ++i) { // we'll do three results
+ TemplateDictionary* result = dict->AddSectionDictionary("RESULTS");
+ if (i % 2 == 0)
+ result->ShowSection("WHITE_BG"); // gives us striped results!
+ const char* res = "<&>\"result\" #%d'&'";
+ result->SetFormattedValue("RESULT", res, i);
+ result->SetEscapedFormattedValue("XML_RESULT",
+ GOOGLE_NAMESPACE::xml_escape,
+ res, i);
+ result->SetIntValue("GOODNESS", i + 5);
+ }
+
+ // For testing auto-escape.
+ dict->SetValue("AE_TITLE_GOOD", "Hello World!");
+ dict->SetValue("AE_TITLE_BAD", "Hello <script>alert(1)</script> World!");
+ dict->SetValue("AE_URL_GOOD", "http://www.google.com/");
+ dict->SetValue("AE_URL_BAD", "javascript:alert(1)");
+ dict->SetValue("AE_BG_COLOR_GOOD", "red");
+ dict->SetValue("AE_BG_COLOR_BAD", "evil! &");
+ dict->SetValue("AE_JS_GOOD", "your text here");
+ dict->SetValue("AE_JS_BAD", "your text'is clever'thanks");
+ dict->SetValue("AE_USERNAME_GOOD", "Mr. Nice");
+ dict->SetValue("AE_USERNAME_BAD", "Doctor<script>alert(2)</script>Evil");
+ dict->SetValue("AE_START_EDGE", "left");
+ dict->SetValue("AE_END_EDGE", ";:center()$$"); // Some invalid chars.
+ dict->SetValue("AE_FONT_SIZE_PC", "120%");
+ dict->SetValue("AE_FONT_SIZE_PT", "12pt");
+ dict->SetValue("AE_MAUVE_RGB", "#FF7BD5");
+ dict->SetValue("AE_ITALIC", "italic");
+
+ // This won't see any of the vars *we* set
+ TemplateDictionary* footer_dict = dict->AddIncludeDictionary("FOOTER");
+ footer_dict->SetFilename("template_unittest_test_footer.in");
+
+ // --- These are used by template_unittest_test_modifiers.in
+
+ // UPDATE and UPDATE_SECTION we inherit from test_html.in
+ TemplateDictionary* inc_simple = dict->AddIncludeDictionary("SIMPLE");
+ inc_simple->SetFilename("template_unittest_test_simple.in");
+
+ return dict;
+}
+
+
+// Quite opposite of dict1, dict2 is a dictionary that has nothing in it
+static TemplateDictionary* MakeDict2() {
+ return new TemplateDictionary("dict2");
+}
+
+
+// dict3 tests just the handling of whitespace
+static TemplateDictionary* MakeDict3() {
+ TemplateDictionary* dict = new TemplateDictionary("dict3");
+
+ dict->SetValue("HEAD", " ");
+ dict->SetValue("BODY", "\r\n");
+ return dict;
+}
+
+static TemplateDictionary* MakeDictionary(int i) {
+ switch (i) {
+ case 1: return MakeDict1();
+ case 2: return MakeDict2();
+ case 3: return MakeDict3();
+ default: ASSERT(false); // No dictionary with this number yet.
+ }
+ return NULL;
+}
+
+
+static void TestExpand(const vector<Testdata>::const_iterator& begin,
+ const vector<Testdata>::const_iterator& end) {
+ for (vector<Testdata>::const_iterator one_test = begin;
+ one_test != end; ++one_test) {
+ Template* tpl_none = Template::GetTemplate(one_test->input_template_name,
+ DO_NOT_STRIP);
+ Template* tpl_lines = Template::GetTemplate(one_test->input_template_name,
+ STRIP_BLANK_LINES);
+ Template* tpl_ws = Template::GetTemplate(one_test->input_template_name,
+ STRIP_WHITESPACE);
+
+ // Test TemplateToString while we're at it.
+ Template* tplstr_none = Template::StringToTemplate(
+ one_test->input_template, DO_NOT_STRIP);
+ Template* tplstr_lines = Template::StringToTemplate(
+ one_test->input_template, STRIP_BLANK_LINES);
+ Template* tplstr_ws = Template::StringToTemplate(
+ one_test->input_template, STRIP_WHITESPACE);
+
+ for (vector<string>::const_iterator out = one_test->output.begin();
+ out != one_test->output.end(); ++out) {
+ int dictnum = out - one_test->output.begin() + 1; // first dict is 01
+ // If output is the empty string, we assume the file does not exist
+ if (out->empty())
+ continue;
+
+ printf("Testing template %s on dict #%d\n",
+ one_test->input_template_name.c_str(), dictnum);
+ // If we're expecting output, the template better not have had an error
+ ASSERT(tpl_none && tpl_lines && tpl_ws);
+ ASSERT(tplstr_none && tplstr_lines && tplstr_ws);
+
+ TemplateDictionary* dict = MakeDictionary(dictnum);
+
+ string stroutput_none, stroutput_lines, stroutput_ws;
+ string stroutput_strnone, stroutput_strlines, stroutput_strws;
+
+ tpl_none->Expand(&stroutput_none, dict);
+ tpl_lines->Expand(&stroutput_lines, dict);
+ tpl_ws->Expand(&stroutput_ws, dict);
+ tplstr_none->Expand(&stroutput_strnone, dict);
+ tplstr_lines->Expand(&stroutput_strlines, dict);
+ tplstr_ws->Expand(&stroutput_strws, dict);
+
+ // "out" is the output for STRIP_WHITESPACE mode.
+ ASSERT_STRING_EQ(*out, stroutput_ws);
+
+ // Now compare the variants against each other.
+ // NONE and STRIP_LINES may actually be the same on simple inputs
+ //ASSERT(output_none != output_lines);
+ ASSERT(stroutput_none != stroutput_ws);
+ ASSERT(stroutput_lines != stroutput_ws);
+ ASSERT_STREQ_EXCEPT(stroutput_none.c_str(), stroutput_lines.c_str(),
+ " \t\v\f\r\n");
+ ASSERT_STREQ_EXCEPT(stroutput_none.c_str(), stroutput_ws.c_str(),
+ " \t\v\f\r\n");
+
+ // It shouldn't matter if we read stuff from a file or a string.
+ ASSERT(stroutput_none == stroutput_strnone);
+ ASSERT(stroutput_lines == stroutput_strlines);
+ ASSERT(stroutput_ws == stroutput_strws);
+
+
+ delete dict; // it's our responsibility
+ }
+
+ // The annotation test is a bit simpler; we only strip one way
+ for (vector<string>::const_iterator out = one_test->annotated_output.begin();
+ out != one_test->annotated_output.end(); ++out) {
+ int dictnum = out - one_test->annotated_output.begin() + 1;
+ // If output is the empty string, we assume the file does not exist
+ if (out->empty())
+ continue;
+
+ printf("Testing template %s on dict #%d (annotated)\n",
+ one_test->input_template_name.c_str(), dictnum);
+
+ TemplateDictionary* dict = MakeDictionary(dictnum);
+ PerExpandData per_expand_data;
+ per_expand_data.SetAnnotateOutput("template_unittest_test");
+ string output;
+ tpl_lines->ExpandWithData(&output, dict, &per_expand_data);
+ ASSERT_STREQ_EXCEPT(out->c_str(), output.c_str(), "\r\n");
+ delete dict; // it's our responsibility
+ }
+ delete tplstr_none; // these are our responsibility too
+ delete tplstr_lines;
+ delete tplstr_ws;
+ }
+}
+
+
+int main(int argc, char** argv) {
+ // If TEMPLATE_ROOTDIR is set in the environment, it overrides the
+ // default of ".". We use an env-var rather than argv because
+ // that's what automake supports most easily.
+ const char* template_rootdir = getenv("TEMPLATE_ROOTDIR");
+ if (template_rootdir == NULL)
+ template_rootdir = DEFAULT_TEMPLATE_ROOTDIR; // probably "."
+ string rootdir = GOOGLE_NAMESPACE::PathJoin(template_rootdir, "src");
+ rootdir = GOOGLE_NAMESPACE::PathJoin(rootdir, "tests");
+ Template::SetTemplateRootDirectory(rootdir);
+
+ vector<Testdata> testdata = ReadDataFiles(
+ Template::template_root_directory().c_str());
+ if (testdata.empty()) {
+ printf("FATAL ERROR: No test files found for template_regtest\n");
+ return 1;
+ }
+
+ TestExpand(testdata.begin(), testdata.end());
+
+ printf("DONE\n");
+ return 0;
+}
diff --git a/src/tests/template_setglobals_unittest.cc b/src/tests/template_setglobals_unittest.cc
new file mode 100644
index 0000000..4d59e22
--- /dev/null
+++ b/src/tests/template_setglobals_unittest.cc
@@ -0,0 +1,70 @@
+// Copyright (c) 2002, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+
+#include "config_for_unittests.h"
+#include <assert.h>
+#include <stdio.h>
+#include <ctemplate/template.h>
+#include <ctemplate/template_dictionary.h>
+#include "tests/template_test_util.h"
+#include "base/util.h"
+TEST_INIT // defines RUN_ALL_TESTS()
+
+using GOOGLE_NAMESPACE::Template;
+using GOOGLE_NAMESPACE::TemplateDictionary;
+
+TEST(SetGlobalValue, TemplateDictionary) {
+ // Test to see that the global dictionary object gets created when you
+ // first call the static function TemplateDictionary::SetGlobalValue().
+ TemplateDictionary::SetGlobalValue("TEST_GLOBAL_VAR", "test_value");
+ TemplateDictionary tpl("empty");
+ GOOGLE_NAMESPACE::TemplateDictionaryPeer peer(&tpl);
+ EXPECT_STREQ(peer.GetSectionValue("TEST_GLOBAL_VAR"),
+ "test_value");
+
+}
+
+TEST(SetGlobalValue, SetRootDirectory) {
+ // Test to see that the Template static variables get created when you
+ // first call the static function Template::SetRootDirectory().
+ Template::SetTemplateRootDirectory("/some/directory/path");
+ // We don't know if we appended a / or a \, so we test indirectly
+ EXPECT_EQ(strlen("/some/directory/path")+1, // assert they added a char
+ Template::template_root_directory().size());
+ EXPECT_EQ(0, memcmp(Template::template_root_directory().c_str(),
+ "/some/directory/path",
+ strlen("/some/directory/path")));
+}
+
+int main(int argc, char **argv) {
+
+ return RUN_ALL_TESTS();
+}
diff --git a/src/tests/template_test_util.cc b/src/tests/template_test_util.cc
new file mode 100644
index 0000000..57f7f91
--- /dev/null
+++ b/src/tests/template_test_util.cc
@@ -0,0 +1,309 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+
+#include "config_for_unittests.h"
+#include "base/mutex.h" // must come first, for _XOPEN_SOURCE
+#include "tests/template_test_util.h"
+#include <assert.h> // for assert()
+#ifdef HAVE_DIRENT_H
+# include <dirent.h> // for opendir() etc
+#else
+# define dirent direct
+# ifdef HAVE_SYS_NDIR_H
+# include <sys/ndir.h>
+# endif
+# ifdef HAVE_SYS_DIR_H
+# include <sys/dir.h>
+# endif
+# ifdef HAVE_NDIR_H
+# include <ndir.h>
+# endif
+#endif // for DIR, dirent, closedir(), opendir(), etc
+#include <stdio.h> // for printf(), FILE, fclose(), fopen(), etc
+#include <stdlib.h> // for exit()
+#include <string.h> // for strcmp(), strcpy(), strstr()
+#include <sys/stat.h> // for mkdir()
+#include <sys/types.h> // for mode_t
+#include <time.h> // for time_t
+#ifdef HAVE_UTIME_H
+# include <utime.h>
+#endif // for utime()
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif // for unlink()
+#include <vector> // for vector<>, vector<>::size_type
+#include <ctemplate/template.h> // for Template
+#include <ctemplate/template_dictionary.h> // for TemplateDictionary
+#include <ctemplate/template_dictionary_interface.h>
+#include <ctemplate/template_enums.h> // for Strip
+#include <ctemplate/template_namelist.h> // for TemplateNamelist, etc
+#include <ctemplate/template_pathops.h> // for PathJoin()
+#include "base/util.h" // for down_cast()
+
+using std::string;
+using std::vector;
+
+#ifdef ASSERT
+# undef ASSERT
+#endif
+#define ASSERT(cond) do { \
+ if (!(cond)) { \
+ printf("ASSERT FAILED, line %d: %s\n", __LINE__, #cond); \
+ assert(cond); \
+ exit(1); \
+ } \
+} while (0)
+
+namespace ctemplate {
+
+// Deletes all files named *template* in dir, and sets up dir as the
+// place where StringToTemplate writes.
+static char* g_tmpdir = NULL;
+
+#ifndef USING_PORT_CC /* windows defines its own version in windows/port.cc */
+void CreateOrCleanTestDir(const string& dirname) {
+ DIR* dir = opendir(dirname.c_str());
+ if (!dir) { // directory doesn't exist or something like that
+ mkdir(dirname.c_str(), 0755); // make the dir if we can
+ return;
+ }
+ while (struct dirent* d = readdir(dir)) {
+ if (strstr(d->d_name, "template"))
+ unlink(PathJoin(dirname, d->d_name).c_str());
+ }
+ closedir(dir);
+}
+
+static string TmpFile(const char* basename) {
+ return string("/tmp/") + basename;
+}
+
+#endif // #ifndef USING_PORT_CC
+
+void CreateOrCleanTestDirAndSetAsTmpdir(const string& dirname) {
+ CreateOrCleanTestDir(dirname);
+ delete[] g_tmpdir;
+ g_tmpdir = new char[dirname.length() + 1];
+ strcpy(g_tmpdir, dirname.c_str());
+}
+
+const string FLAGS_test_tmpdir(TmpFile("template_unittest_dir"));
+
+// This writes s to the given file. We want to make sure that every
+// time we create a file, it has a different mtime (just like would
+// be the case in real life), so we use a mock clock.
+static Mutex g_time_mutex(base::LINKER_INITIALIZED);
+static time_t mock_time = 946713600; // jan 1, 2000, in california
+
+void StringToFile(const string& s, const string& filename) {
+ FILE* fp = fopen(filename.c_str(), "wb");
+ ASSERT(fp);
+ size_t r = fwrite(s.data(), 1, s.length(), fp);
+ ASSERT(r == s.length());
+ fclose(fp);
+
+ g_time_mutex.Lock();
+ const time_t file_time = mock_time++;
+ g_time_mutex.Unlock();
+ struct utimbuf timbuf = { file_time, file_time };
+ utime(filename.c_str(), &timbuf);
+}
+
+time_t Now() {
+ g_time_mutex.Lock();
+ const time_t now = mock_time;
+ g_time_mutex.Unlock();
+ return now;
+}
+
+// This writes s to a file and returns the filename.
+string StringToTemplateFile(const string& s) {
+ static int filenum = 0;
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%03d", ++filenum);
+ string filename = PathJoin(g_tmpdir ? g_tmpdir : "",
+ string("template.") + buf);
+ StringToFile(s, filename);
+ return filename;
+}
+
+// This writes s to a file and then loads it into a template object.
+Template* StringToTemplate(const string& s, Strip strip) {
+ return Template::GetTemplate(StringToTemplateFile(s), strip);
+}
+
+// This is esp. useful for calling from within gdb.
+// The gdb nice-ness is balanced by the need for the caller to delete the buf.
+
+const char* ExpandIs(const Template* tpl, const TemplateDictionary *dict,
+ PerExpandData* per_expand_data, bool expected) {
+ string outstring;
+ if (per_expand_data)
+ ASSERT(expected == tpl->ExpandWithData(&outstring, dict, per_expand_data));
+ else
+ ASSERT(expected == tpl->Expand(&outstring, dict));
+
+
+ char* buf = new char[outstring.size()+1];
+ strcpy(buf, outstring.c_str());
+ return buf;
+}
+
+const char* ExpandWithCacheIs(TemplateCache* cache,
+ const string& filename, Strip strip,
+ const TemplateDictionary *dict,
+ PerExpandData* per_expand_data, bool expected) {
+ string outstring;
+ ASSERT(expected == cache->ExpandWithData(filename, strip, dict,
+ per_expand_data, &outstring));
+
+
+ char* buf = new char[outstring.size()+1];
+ strcpy(buf, outstring.c_str());
+ return buf;
+}
+
+void AssertExpandWithDataIs(const Template* tpl,
+ const TemplateDictionary *dict,
+ PerExpandData* per_expand_data,
+ const string& is, bool expected) {
+ const char* buf = ExpandIs(tpl, dict, per_expand_data, expected);
+ if (strcmp(buf, is.c_str())) {
+ printf("expected = '%s'\n", is.c_str());
+ printf("actual = '%s'\n", buf);
+ }
+ ASSERT(string(buf) == is);
+ delete [] buf;
+}
+
+void AssertExpandIs(const Template* tpl, const TemplateDictionary *dict,
+ const string& is, bool expected) {
+ AssertExpandWithDataIs(tpl, dict, NULL, is, expected);
+}
+
+void AssertExpandWithCacheIs(TemplateCache* cache,
+ const string& filename, Strip strip,
+ const TemplateDictionary *dict,
+ PerExpandData* per_expand_data,
+ const string& is, bool expected) {
+ const char* buf = ExpandWithCacheIs(cache, filename, strip, dict,
+ per_expand_data, expected);
+ if (strcmp(buf, is.c_str())) {
+ printf("expected = '%s'\n", is.c_str());
+ printf("actual = '%s'\n", buf);
+ }
+ ASSERT(string(buf) == is);
+ delete [] buf;
+}
+
+TemporaryRegisterTemplate::TemporaryRegisterTemplate(const char* name) {
+ old_namelist_ = TemplateNamelist::namelist_;
+ if (old_namelist_) {
+ namelist_ = *old_namelist_;
+ }
+
+ namelist_.insert(name);
+ TemplateNamelist::namelist_ = &namelist_;
+}
+
+TemporaryRegisterTemplate::~TemporaryRegisterTemplate() {
+ TemplateNamelist::namelist_ = old_namelist_;
+}
+
+const char* TemplateDictionaryPeer::GetSectionValue(
+ const TemplateString& variable)
+ const {
+ // Luckily, TemplateDictionary stores all values with a trailing NUL.
+ return dict_->GetValue(variable).data();
+}
+
+bool TemplateDictionaryPeer::ValueIs(const TemplateString& variable,
+ const TemplateString& expected) const {
+ return dict_->GetValue(variable) == expected;
+}
+
+bool TemplateDictionaryPeer::IsHiddenSection(
+ const TemplateString& name) const {
+ return dict_->IsHiddenSection(name);
+}
+
+bool TemplateDictionaryPeer::IsUnhiddenSection(
+ const TemplateString& name) const {
+ return dict_->IsUnhiddenSection(name);
+}
+
+bool TemplateDictionaryPeer::IsHiddenTemplate(
+ const TemplateString& name) const {
+ return dict_->IsHiddenTemplate(name);
+}
+
+int TemplateDictionaryPeer::GetSectionDictionaries(
+ const TemplateString& section_name,
+ vector<const TemplateDictionary*>* dicts) const {
+ dicts->clear();
+ if (dict_->IsHiddenSection(section_name))
+ return 0;
+
+ TemplateDictionaryInterface::Iterator* di =
+ dict_->CreateSectionIterator(section_name);
+ while (di->HasNext())
+ dicts->push_back(down_cast<const TemplateDictionary*>(&di->Next()));
+ delete di;
+
+ return static_cast<int>(dicts->size());
+}
+
+int TemplateDictionaryPeer::GetIncludeDictionaries(
+ const TemplateString& section_name,
+ vector<const TemplateDictionary*>* dicts) const {
+ dicts->clear();
+ if (dict_->IsHiddenTemplate(section_name))
+ return 0;
+
+ TemplateDictionaryInterface::Iterator* di =
+ dict_->CreateTemplateIterator(section_name);
+ while (di->HasNext())
+ dicts->push_back(down_cast<const TemplateDictionary*>(&di->Next()));
+ delete di;
+
+ return static_cast<int>(dicts->size());
+}
+
+const char* TemplateDictionaryPeer::GetIncludeTemplateName(
+ const TemplateString& variable, int dictnum) const {
+ return dict_->GetIncludeTemplateName(variable, dictnum);
+}
+
+const char* TemplateDictionaryPeer::GetFilename() const {
+ return dict_->filename_;
+}
+
+}
diff --git a/src/tests/template_test_util.h b/src/tests/template_test_util.h
new file mode 100644
index 0000000..ec3cc84
--- /dev/null
+++ b/src/tests/template_test_util.h
@@ -0,0 +1,283 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+//
+// Intended usage of TemplateDictionaryPeer:
+// Use this class if you need to TEST that a dictionary has certain
+// expected contents. This should be fairly uncommon outside the
+// template directory.
+//
+
+
+#ifndef TEMPLATE_TEMPLATE_TEST_UTIL_H_
+#define TEMPLATE_TEMPLATE_TEST_UTIL_H_
+
+#include "config_for_unittests.h"
+#include <time.h> // for time_t
+#include <string> // for string
+#include <vector> // for vector<>
+#include HASH_MAP_H // UNUSED
+#include <ctemplate/template.h> // for Template::num_deletes_
+#include <ctemplate/template_cache.h> // for TemplateCache
+#include <ctemplate/template_dictionary.h> // for TemplateDictionary
+#include <ctemplate/template_dictionary_interface.h>
+#include <ctemplate/template_enums.h> // for Strip
+#include <ctemplate/template_namelist.h>
+#include <ctemplate/template_string.h> // for TemplateString, TemplateId
+
+namespace ctemplate {
+
+using std::string;
+
+class PerExpandData;
+class TemplateCache;
+class TemplateDictionary;
+
+inline TemplateId GlobalIdForTest(const char* ptr, int len) {
+ return TemplateString(ptr, len).GetGlobalId();
+}
+
+// Call this to create a StaticTemplateString for testing when the ptr is
+// not guaranteed to be allocated for the entire length of the test.
+#define STS_INIT_FOR_TEST(ptr, len, arena) \
+ { { arena->Memdup(ptr, len), len, GOOGLE_NAMESPACE::GlobalIdForTest(ptr, len) } };
+
+extern const std::string FLAGS_test_tmpdir;
+
+// These are routines that are useful for creating template files for testing.
+
+// Deletes all files named *template* in dir.
+void CreateOrCleanTestDir(const string& dirname);
+// This delets all files named *template*, and also sets dirname to be
+// the directory that all future StringToFile calls will place their
+// templates.
+void CreateOrCleanTestDirAndSetAsTmpdir(const string& dirname);
+
+// This writes s to the given file. We want to make sure that every
+// time we create a file, it has a different mtime (just like would
+// be the case in real life), so we use a mock clock. Filenames created
+// by this routine will all have an mtime of around Jan 1, 2000.
+void StringToFile(const string& s, const string& filename);
+
+// This is the (mock) time used when creating the last file in StringToFile.
+time_t Now();
+
+// This writes s to a file and returns the filename.
+string StringToTemplateFile(const string& s);
+
+// This writes s to a file and then loads it into a template object.
+Template* StringToTemplate(const string& s, Strip strip);
+
+// This is esp. useful for calling from within gdb.
+// The gdb nice-ness is balanced by the need for the caller to delete the buf.
+const char* ExpandIs(const Template* tpl, const TemplateDictionary *dict,
+ PerExpandData* per_expand_data, bool expected);
+
+void AssertExpandWithDataIs(const Template* tpl,
+ const TemplateDictionary *dict,
+ PerExpandData* per_expand_data,
+ const string& is, bool expected);
+
+void AssertExpandIs(const Template* tpl, const TemplateDictionary *dict,
+ const string& is, bool expected);
+
+void AssertExpandWithCacheIs(TemplateCache* cache,
+ const string& filename, Strip strip,
+ const TemplateDictionary *dict,
+ PerExpandData* per_expand_data,
+ const string& is, bool expected);
+
+class TemporaryRegisterTemplate {
+ public:
+ explicit TemporaryRegisterTemplate(const char* name);
+ ~TemporaryRegisterTemplate();
+ private:
+ GOOGLE_NAMESPACE::TemplateNamelist::NameListType* old_namelist_;
+ GOOGLE_NAMESPACE::TemplateNamelist::NameListType namelist_;
+
+ // disallow copy constructor and assignment
+ TemporaryRegisterTemplate(const TemporaryRegisterTemplate&);
+ void operator=(const TemporaryRegisterTemplate&);
+};
+
+// For friendship reasons, we make this a top-level class rather
+// than a nested class. It's used only in TemplateDictionaryPeer.
+// We take ownership of the iterator passed to us. To make sure that
+// isn't a problem, we make this class not-copyable.
+class TemplateDictionaryPeerIterator {
+ public:
+ explicit TemplateDictionaryPeerIterator(
+ TemplateDictionaryInterface::Iterator* it) : it_(it) { }
+ ~TemplateDictionaryPeerIterator() { delete it_; }
+ bool HasNext() const { return it_->HasNext(); }
+ const TemplateDictionaryInterface& Next() { return it_->Next(); }
+ private:
+ TemplateDictionaryInterface::Iterator* it_;
+ TemplateDictionaryPeerIterator(const TemplateDictionaryPeerIterator&);
+ TemplateDictionaryPeerIterator& operator=(
+ const TemplateDictionaryPeerIterator&);
+};
+
+// This class is meant for use in unittests. This class wraps the
+// TemplateDictionary and provides access to internal data that should
+// not be used in production code. If you need this kind of
+// functionality in production, use TemplateDictionaryWrapper or
+// TemplateDictionaryInterface; see top of file for details.
+//
+// Example Usage:
+// TemplateDictionary dict("test dictionary");
+// FillDictionaryValues(&dict);
+//
+// TemplateDictionaryPeer peer(&dict);
+// EXPECT_EQ("5", peer.GetSectionValue("width"));
+class TemplateDictionaryPeer {
+ public:
+ explicit TemplateDictionaryPeer(const TemplateDictionary* dict)
+ : dict_(dict) {}
+
+ // Returns whether the named variable has value equal to "expected".
+ bool ValueIs(const TemplateString& variable,
+ const TemplateString& expected) const;
+
+ // DEPRECATED: Returns the value of the named variable. Does not
+ // deal properly with values that have an internal NUL. Use ValueIs
+ // for new code.
+ const char* GetSectionValue(const TemplateString& variable) const;
+
+ // Returns true if the named section is hidden.
+ bool IsHiddenSection(const TemplateString& name) const;
+
+ // IsUnhiddenSection
+ // Returns true if the section has been marked visible and false otherwise.
+ bool IsUnhiddenSection(const TemplateString& name) const;
+
+ // Returns true if the named sub-template is hidden.
+ bool IsHiddenTemplate(const TemplateString& name) const;
+
+ // Retrieves TemplateDictionary instances for the given section name. The
+ // caller does not assume ownership of the returned TemplateDictionary
+ // instances. The number of instances is returned. All prior entries in
+ // the dicts vector are cleared.
+ //
+ // NOTE: This method assumes that old-style template dictionaries are not in
+ // use. That is, it assumes that all section dictionaries have been added
+ // with AddSectionDictionary rather than AddOldstyleSectionDictionary.
+ int GetSectionDictionaries(const TemplateString& section_name,
+ std::vector<const TemplateDictionary*>* dicts)
+ const;
+
+ // Retrieves included TemplateDictionary instances for the given name. The
+ // caller does not assume ownership of the returned TemplateDictionary
+ // instances. The number of instances is returned. All prior entries in
+ // the dicts vector are cleared.
+ //
+ // NOTE: This method assumes that old-style template dictionaries are not in
+ // use. That is, it assumes that all section dictionaries have been added
+ // with AddIncludeDictionary rather than AddOldstyleIncludeDictionary.
+ int GetIncludeDictionaries(const TemplateString& section_name,
+ std::vector<const TemplateDictionary*>* dicts)
+ const;
+
+ const char* GetIncludeTemplateName(const TemplateString& variable,
+ int dictnum) const;
+
+ typedef TemplateDictionaryPeerIterator Iterator;
+
+ Iterator* CreateTemplateIterator(const TemplateString& section)
+ const {
+ return new Iterator(dict_->CreateTemplateIterator(section));
+ }
+
+ Iterator* CreateSectionIterator(const TemplateString& section)
+ const {
+ return new Iterator(dict_->CreateSectionIterator(section));
+ }
+
+ // Returns the filename associated with the TemplateDictionary.
+ const char* GetFilename() const;
+
+ private:
+ const TemplateDictionary* dict_; // Not owned.
+
+ // disallow copy constructor and assignment
+ TemplateDictionaryPeer(const TemplateDictionaryPeer&);
+ void operator=(const TemplateDictionaryPeer&);
+};
+
+class TemplateCachePeer {
+ public:
+ TemplateCachePeer(TemplateCache* cache)
+ : cache_(cache) {}
+
+ struct TemplateCacheKey : public TemplateCache::TemplateCacheKey {
+ TemplateCacheKey(const string& key, int strip) {
+ this->first = GlobalIdForTest(key.data(), key.length());
+ this->second = strip;
+ }
+ };
+
+ TemplateCache::TemplateMap* parsed_template_cache() {
+ return cache_->parsed_template_cache_;
+ }
+
+ bool TemplateIsCached(const TemplateCacheKey key) const {
+ return cache_->TemplateIsCached(key);
+ }
+
+ const Template* GetTemplate(const TemplateString& key, Strip strip) const {
+ return cache_->GetTemplate(key, strip);
+ }
+
+ int Refcount(const TemplateCacheKey key) const {
+ return cache_->Refcount(key);
+ }
+
+ void DoneWithGetTemplatePtrs() {
+ cache_->DoneWithGetTemplatePtrs();
+ }
+ void ClearCache() {
+ cache_->ClearCache();
+ }
+
+ static int NumTotalTemplateDeletes() {
+ return Template::num_deletes();
+ }
+
+ private:
+ TemplateCache* cache_; // Not owned.
+
+ // Don't allow copying
+ TemplateCachePeer(const TemplateCachePeer&);
+ void operator=(const TemplateCachePeer&);
+};
+
+}
+
+#endif // TEMPLATE_TEMPLATE_TEST_UTIL_H_
diff --git a/src/tests/template_test_util_test.cc b/src/tests/template_test_util_test.cc
new file mode 100644
index 0000000..190ac95
--- /dev/null
+++ b/src/tests/template_test_util_test.cc
@@ -0,0 +1,262 @@
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "config_for_unittests.h"
+#include "tests/template_test_util.h"
+
+#include <stdio.h>
+#include <string>
+#include <vector>
+
+#include "base/arena.h"
+#include <ctemplate/template_dictionary.h>
+#include <ctemplate/template_string.h>
+#include "base/util.h"
+TEST_INIT // defines RUN_ALL_TESTS()
+
+#define ASSERT_EQ(a, b) EXPECT_EQ(a, b)
+
+using std::vector;
+using std::string;
+using GOOGLE_NAMESPACE::UnsafeArena;
+
+using GOOGLE_NAMESPACE::TemplateDictionary;
+using GOOGLE_NAMESPACE::TemplateDictionaryPeer;
+using GOOGLE_NAMESPACE::TemplateString;
+using GOOGLE_NAMESPACE::StaticTemplateString;
+
+namespace {
+
+TEST(TemplateTestUtilTest, GetSectionValue) {
+ TemplateDictionary dict("test_GetSectionValue");
+ dict.SetValue("VALUE", "value");
+
+ TemplateDictionaryPeer peer(&dict);
+ EXPECT_STREQ("value", peer.GetSectionValue("VALUE"));
+}
+
+TEST(TemplateTestUtilTest, IsHiddenSection) {
+ TemplateDictionary dict("test_IsHiddenSection");
+
+ {
+ TemplateDictionaryPeer peer(&dict);
+ EXPECT_TRUE(peer.IsHiddenSection("SECTION"));
+ }
+
+ dict.AddSectionDictionary("SECTION");
+
+ {
+ TemplateDictionaryPeer peer(&dict);
+ EXPECT_FALSE(peer.IsHiddenSection("SECTION"));
+ }
+}
+
+TEST(TemplateTestUtilTest, GetSectionDictionaries) {
+ TemplateDictionary dict("test_GetSectionDictionaries");
+
+ {
+ TemplateDictionaryPeer peer(&dict);
+ vector<const TemplateDictionary*> dicts;
+ // Add some dummy value into the vector to confirm that the call to
+ // GetSectionDictionaries will correctly clear the vector.
+ dicts.push_back(NULL);
+ EXPECT_EQ(0, peer.GetSectionDictionaries("SECTION", &dicts));
+ EXPECT_TRUE(dicts.empty());
+ }
+
+ dict.AddSectionDictionary("SECTION")->SetValue("SECTION_VALUE", "0");
+
+ {
+ TemplateDictionaryPeer peer(&dict);
+ vector<const TemplateDictionary*> dicts;
+ ASSERT_EQ(1, peer.GetSectionDictionaries("SECTION", &dicts));
+
+ TemplateDictionaryPeer peer_section(dicts[0]);
+ EXPECT_STREQ("0", peer_section.GetSectionValue("SECTION_VALUE"));
+ }
+
+ dict.AddSectionDictionary("SECTION")->SetValue("SECTION_VALUE", "1");
+ dict.AddSectionDictionary("ANOTHER_SECTION")->SetValue("ANOTHER_VALUE", "2");
+
+ {
+ TemplateDictionaryPeer peer(&dict);
+ vector<const TemplateDictionary*> dicts;
+ ASSERT_EQ(2, peer.GetSectionDictionaries("SECTION", &dicts));
+
+ TemplateDictionaryPeer peer_section0(dicts[0]);
+ EXPECT_STREQ("0", peer_section0.GetSectionValue("SECTION_VALUE"));
+
+ TemplateDictionaryPeer peer_section1(dicts[1]);
+ EXPECT_STREQ("1", peer_section1.GetSectionValue("SECTION_VALUE"));
+ }
+}
+
+TEST(TemplateTestUtilTest, GetIncludeDictionaries) {
+ TemplateDictionary dict("test_GetIncludeDictionaries");
+
+ {
+ TemplateDictionaryPeer peer(&dict);
+ vector<const TemplateDictionary*> dicts;
+ // Add some dummy value into the vector to confirm that the call to
+ // GetSectionDictionaries will correctly clear the vector.
+ dicts.push_back(NULL);
+ EXPECT_EQ(0, peer.GetIncludeDictionaries("SECTION", &dicts));
+ EXPECT_TRUE(dicts.empty());
+ }
+
+ dict.AddIncludeDictionary("SECTION")->SetValue("SECTION_VALUE", "0");
+
+ {
+ TemplateDictionaryPeer peer(&dict);
+ vector<const TemplateDictionary*> dicts;
+ ASSERT_EQ(1, peer.GetIncludeDictionaries("SECTION", &dicts));
+
+ TemplateDictionaryPeer peer_section(dicts[0]);
+ EXPECT_STREQ("0", peer_section.GetSectionValue("SECTION_VALUE"));
+ }
+
+ dict.AddIncludeDictionary("SECTION")->SetValue("SECTION_VALUE", "1");
+ dict.AddIncludeDictionary("ANOTHER_SECTION")->SetValue("ANOTHER_VALUE", "2");
+
+ {
+ TemplateDictionaryPeer peer(&dict);
+ vector<const TemplateDictionary*> dicts;
+ ASSERT_EQ(2, peer.GetIncludeDictionaries("SECTION", &dicts));
+
+ TemplateDictionaryPeer peer_section0(dicts[0]);
+ EXPECT_STREQ("0", peer_section0.GetSectionValue("SECTION_VALUE"));
+
+ TemplateDictionaryPeer peer_section1(dicts[1]);
+ EXPECT_STREQ("1", peer_section1.GetSectionValue("SECTION_VALUE"));
+ }
+}
+
+TEST(TemplateTestUtilTest, GetIncludeAndSectionDictionaries) {
+ TemplateDictionary dict("test_GetIncludeAndSectionDictionaries");
+
+ {
+ TemplateDictionaryPeer peer(&dict);
+ vector<const TemplateDictionary*> dicts;
+ EXPECT_EQ(0, peer.GetIncludeDictionaries("SECTION", &dicts));
+ EXPECT_EQ(0, peer.GetSectionDictionaries("SECTION", &dicts));
+ }
+
+ dict.AddIncludeDictionary("SECTION")->SetValue("SECTION_VALUE", "0");
+ dict.AddSectionDictionary("SECTION")->SetValue("SECTION_VALUE", "1");
+
+ {
+ TemplateDictionaryPeer peer(&dict);
+ vector<const TemplateDictionary*> include_dicts;
+ ASSERT_EQ(1, peer.GetIncludeDictionaries("SECTION", &include_dicts));
+
+ TemplateDictionaryPeer include_peer(include_dicts[0]);
+ EXPECT_STREQ("0", include_peer.GetSectionValue("SECTION_VALUE"));
+
+ vector<const TemplateDictionary*> section_dicts;
+ ASSERT_EQ(1, peer.GetSectionDictionaries("SECTION", §ion_dicts));
+
+ TemplateDictionaryPeer section_peer(section_dicts[0]);
+ EXPECT_STREQ("1", section_peer.GetSectionValue("SECTION_VALUE"));
+ }
+
+ dict.AddIncludeDictionary("SECTION")->SetValue("SECTION_VALUE", "2");
+ dict.AddIncludeDictionary("ANOTHER_SECTION")->SetValue("ANOTHER_VALUE", "3");
+
+ dict.AddSectionDictionary("SECTION")->SetValue("SECTION_VALUE", "4");
+ dict.AddSectionDictionary("ONE_MORE_SECTION")->SetValue("ANOTHER_VALUE", "5");
+
+ {
+ TemplateDictionaryPeer peer(&dict);
+ vector<const TemplateDictionary*> dicts;
+ ASSERT_EQ(2, peer.GetIncludeDictionaries("SECTION", &dicts));
+
+ TemplateDictionaryPeer include_peer0(dicts[0]);
+ EXPECT_STREQ("0", include_peer0.GetSectionValue("SECTION_VALUE"));
+
+ TemplateDictionaryPeer include_peer1(dicts[1]);
+ EXPECT_STREQ("2", include_peer1.GetSectionValue("SECTION_VALUE"));
+
+ EXPECT_EQ(1, peer.GetIncludeDictionaries("ANOTHER_SECTION", &dicts));
+ EXPECT_EQ(0, peer.GetIncludeDictionaries("ONE_MORE_SECTION", &dicts));
+
+ vector<const TemplateDictionary*> section_dicts;
+ ASSERT_EQ(2, peer.GetSectionDictionaries("SECTION", §ion_dicts));
+
+ TemplateDictionaryPeer section_peer0(section_dicts[0]);
+ EXPECT_STREQ("1", section_peer0.GetSectionValue("SECTION_VALUE"));
+
+ TemplateDictionaryPeer section_peer1(section_dicts[1]);
+ EXPECT_STREQ("4", section_peer1.GetSectionValue("SECTION_VALUE"));
+
+ EXPECT_EQ(0, peer.GetSectionDictionaries("ANOTHER_SECTION", &dicts));
+ EXPECT_EQ(1, peer.GetSectionDictionaries("ONE_MORE_SECTION", &dicts));
+ }
+}
+
+TEST(TemplateTestUtilTest, GetFilename) {
+ TemplateDictionary parent("test_GetFilename");
+ TemplateDictionary* child = parent.AddIncludeDictionary("INCLUDE_marker");
+ child->SetFilename("included_filename");
+
+ TemplateDictionaryPeer parent_peer(&parent);
+ EXPECT_EQ(NULL, parent_peer.GetFilename());
+
+ TemplateDictionaryPeer child_peer(child);
+ EXPECT_STREQ("included_filename", child_peer.GetFilename());
+}
+
+StaticTemplateString GetTestTemplateString(UnsafeArena* arena) {
+ string will_go_out_of_scope("VALUE");
+ // We want to ensure that the STS_INIT_FOR_TEST macro:
+ // - Can produce a StaticTemplateString (guard again its format changing).
+ // - Produces a StaticTemplateString that is still valid after the string
+ // used to initialize it goes out-of-scope.
+ StaticTemplateString sts = STS_INIT_FOR_TEST(will_go_out_of_scope.c_str(),
+ will_go_out_of_scope.length(),
+ arena);
+ return sts;
+}
+
+TEST(TemplateUtilTest, InitStaticTemplateStringForTest) {
+ UnsafeArena arena(1024);
+ StaticTemplateString kValue = GetTestTemplateString(&arena);
+
+ TemplateDictionary dict("test_GetSectionValue");
+ dict.SetValue(kValue, "value");
+
+ TemplateDictionaryPeer peer(&dict);
+ EXPECT_STREQ("value", peer.GetSectionValue(kValue));
+}
+
+} // namespace anonymous
+
+int main(int argc, char **argv) {
+
+ return RUN_ALL_TESTS();
+}
diff --git a/src/tests/template_unittest.cc b/src/tests/template_unittest.cc
new file mode 100644
index 0000000..8d615c0
--- /dev/null
+++ b/src/tests/template_unittest.cc
@@ -0,0 +1,2149 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+
+#include "config_for_unittests.h"
+#include <ctemplate/template.h>
+#include <assert.h> // for assert()
+#if defined(HAVE_PTHREAD) && !defined(NO_THREADS)
+# include <pthread.h>
+#endif // for pthread_t, pthread_create(), etc
+#include <stddef.h> // for size_t
+#include <stdio.h> // for printf(), FILE, snprintf(), fclose(), etc
+#include <stdlib.h> // for exit()
+#include <string.h> // for strcmp(), memchr(), strlen(), strstr()
+#include <sys/types.h> // for mode_t
+#include <time.h> // for time_t, time()
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif // for link(), unlink()
+#include <list> // for list<>::size_type
+#include <vector> // for vector<>
+#include <ctemplate/per_expand_data.h> // for PerExpandData
+#include <ctemplate/template_annotator.h> // for TextTemplateAnnotator
+#include <ctemplate/template_dictionary.h> // for TemplateDictionary
+#include <ctemplate/template_emitter.h> // for ExpandEmitter
+#include <ctemplate/template_enums.h> // for STRIP_WHITESPACE, Strip, etc
+#include <ctemplate/template_modifiers.h> // for AddModifier(), HtmlEscape, etc
+#include <ctemplate/template_namelist.h> // for TemplateNamelist, etc
+#include <ctemplate/template_pathops.h> // for PathJoin(), IsAbspath(), etc
+#include <ctemplate/template_string.h> // for TemplateString, StringHash, etc
+#include "tests/template_test_util.h" // for StringToTemplate(), etc
+#include "base/util.h"
+TEST_INIT // defines RUN_ALL_TESTS()
+
+using std::vector;
+using std::string;
+using GOOGLE_NAMESPACE::FLAGS_test_tmpdir;
+
+using GOOGLE_NAMESPACE::AssertExpandIs;
+using GOOGLE_NAMESPACE::AssertExpandWithDataIs;
+using GOOGLE_NAMESPACE::CreateOrCleanTestDir;
+using GOOGLE_NAMESPACE::CreateOrCleanTestDirAndSetAsTmpdir;
+using GOOGLE_NAMESPACE::DO_NOT_STRIP;
+using GOOGLE_NAMESPACE::ExpandEmitter;
+using GOOGLE_NAMESPACE::IsAbspath;
+using GOOGLE_NAMESPACE::Now;
+using GOOGLE_NAMESPACE::PathJoin;
+using GOOGLE_NAMESPACE::PerExpandData;
+using GOOGLE_NAMESPACE::STRIP_BLANK_LINES;
+using GOOGLE_NAMESPACE::STRIP_WHITESPACE;
+using GOOGLE_NAMESPACE::StaticTemplateString;
+using GOOGLE_NAMESPACE::StringToFile;
+using GOOGLE_NAMESPACE::StringToTemplate;
+using GOOGLE_NAMESPACE::StringToTemplateFile;
+using GOOGLE_NAMESPACE::Strip;
+using GOOGLE_NAMESPACE::TC_CSS;
+using GOOGLE_NAMESPACE::TC_HTML;
+using GOOGLE_NAMESPACE::TC_JS;
+using GOOGLE_NAMESPACE::TC_JSON;
+using GOOGLE_NAMESPACE::TC_MANUAL;
+using GOOGLE_NAMESPACE::TC_UNUSED;
+using GOOGLE_NAMESPACE::TC_XML;
+using GOOGLE_NAMESPACE::Template;
+using GOOGLE_NAMESPACE::TemplateContext;
+using GOOGLE_NAMESPACE::TemplateDictionary;
+using GOOGLE_NAMESPACE::TemplateNamelist;
+using GOOGLE_NAMESPACE::TemplateString;
+using GOOGLE_NAMESPACE::kRootdir;
+
+using GOOGLE_NAMESPACE::ExpandTemplate;
+using GOOGLE_NAMESPACE::ExpandWithData;
+using GOOGLE_NAMESPACE::StringToTemplateCache;
+
+static const StaticTemplateString kHello = STS_INIT(kHello, "Hello");
+static const StaticTemplateString kWorld = STS_INIT(kWorld, "World");
+
+static const char* kPragmaHtml = "{{%AUTOESCAPE context=\"HTML\"}}\n";
+static const char* kPragmaJs = "{{%AUTOESCAPE context=\"JAVASCRIPT\"}}\n";
+static const char* kPragmaCss = "{{%AUTOESCAPE context=\"CSS\"}}\n";
+static const char* kPragmaXml = "{{%AUTOESCAPE context=\"XML\"}}\n";
+static const char* kPragmaJson = "{{%AUTOESCAPE context=\"JSON\"}}\n";
+
+// How many threads to use for our threading test.
+// This is a #define instead of a const int so we can use it in array-sizes
+// even on c++ compilers that don't support var-length arrays.
+#define kNumThreads 10
+
+#define PFATAL(s) do { perror(s); exit(1); } while (0)
+
+// TODO(csilvers): rewrite to be more gunit-like: use expectations
+// instead of asserts, and move assert-checking out of helper routines
+// and into tests proper. Ideally, replace AssertExpandIs() with
+// VerifyExpandIs().
+#define ASSERT(cond) do { \
+ if (!(cond)) { \
+ printf("ASSERT FAILED, line %d: %s\n", __LINE__, #cond); \
+ assert(cond); \
+ exit(1); \
+ } \
+} while (0)
+
+#define ASSERT_STREQ_EXCEPT(a, b, except) ASSERT(StreqExcept(a, b, except))
+#define ASSERT_STREQ(a, b) ASSERT(strcmp(a, b) == 0)
+#define ASSERT_NOT_STREQ(a, b) ASSERT(strcmp(a, b) != 0)
+#define ASSERT_STREQ_VERBOSE(a, b, c) ASSERT(StrEqVerbose(a, b, c))
+#define ASSERT_INTEQ(a, b) ASSERT(IntEqVerbose(a, b))
+
+namespace {
+
+// First, (conceptually) remove all chars in "except" from both a and b.
+// Then return true iff munged_a == munged_b.
+bool StreqExcept(const char* a, const char* b, const char* except) {
+ const char* pa = a, *pb = b;
+ const size_t exceptlen = strlen(except);
+ while (1) {
+ // Use memchr isntead of strchr because memchr(foo, '\0') always fails
+ while (memchr(except, *pa, exceptlen)) pa++; // ignore "except" chars in a
+ while (memchr(except, *pb, exceptlen)) pb++; // ignore "except" chars in b
+ if ((*pa == '\0') && (*pb == '\0'))
+ return true;
+ if (*pa++ != *pb++) // includes case where one is at \0
+ return false;
+ }
+}
+
+// If a and b do not match, print their values and that of text
+// and return false.
+bool StrEqVerbose(const string& a, const string& b,
+ const string& text) {
+ if (a != b) {
+ printf("EXPECTED: %s\n", a.c_str());
+ printf("ACTUAL: %s\n", b.c_str());
+ printf("TEXT: %s\n", text.c_str());
+ return false;
+ }
+ return true;
+}
+
+bool IntEqVerbose(int a, int b) {
+ if (a != b) {
+ printf("EXPECTED: %d\n", a);
+ printf("ACTUAL: %d\n", b);
+ return false;
+ }
+ return true;
+}
+
+// This test emitter writes to a string, but writes X's of the right
+// length, rather than the actual content passed in.
+class SizeofEmitter : public ExpandEmitter {
+ string* const outbuf_;
+ public:
+ SizeofEmitter(string* outbuf) : outbuf_(outbuf) {}
+ virtual void Emit(char c) { Emit(&c, 1); }
+ virtual void Emit(const string& s) { Emit(s.data(), s.length()); }
+ virtual void Emit(const char* s) { Emit(s, strlen(s)); }
+ virtual void Emit(const char*, size_t slen) { outbuf_->append(slen, 'X'); }
+};
+
+} // unnamed namespace
+
+RegisterTemplateFilename(VALID1_FN, "template_unittest_test_valid1.in");
+RegisterTemplateFilename(INVALID1_FN, "template_unittest_test_invalid1.in");
+RegisterTemplateFilename(INVALID2_FN, "template_unittest_test_invalid2.in");
+RegisterTemplateFilename(NONEXISTENT_FN, "nonexistent__file.tpl");
+
+// Returns the proper AUTOESCAPE pragma that corresponds to the
+// given TemplateContext.
+static string GetPragmaForContext(TemplateContext context) {
+ switch(context) {
+ case TC_HTML:
+ return kPragmaHtml;
+ case TC_JS:
+ return kPragmaJs;
+ case TC_CSS:
+ return kPragmaCss;
+ case TC_JSON:
+ return kPragmaJson;
+ case TC_XML:
+ return kPragmaXml;
+ case TC_MANUAL:
+ return ""; // No AUTOESCAPE pragma.
+ case TC_UNUSED:
+ ASSERT(false); // Developer error, this TC is not to be used.
+ }
+ ASSERT(false); // Developer error - invalid TemplateContext.
+ return "";
+}
+
+// This writes s to a file with the AUTOESCAPE pragma corresponding
+// to the given TemplateContext and then loads it into a template object.
+static Template* StringToTemplateWithAutoEscaping(const string& s,
+ Strip strip,
+ TemplateContext context) {
+ string text = GetPragmaForContext(context) + s;
+ return Template::GetTemplate(StringToTemplateFile(text), strip);
+}
+
+// A helper method used by TestCorrectModifiersForAutoEscape.
+// Populates out with lines of the form:
+// VARNAME:mod1[=val1][:mod2[=val2]]...\n from the dump of the template
+// and compares against the expected string.
+static void AssertCorrectModifiersInTemplate(Template* tpl,
+ const string& text,
+ const string& expected_out) {
+ ASSERT(tpl);
+ string dump_out, out;
+ tpl->DumpToString("bogus_filename", &dump_out);
+ string::size_type i, j;
+ i = 0;
+ while ((i = dump_out.find("Variable Node: ", i)) != string::npos) {
+ i += strlen("Variable Node: ");
+ j = dump_out.find("\n", i);
+ out.append(dump_out.substr(i, j - i)); // should be safe.
+ out.append("\n");
+ }
+ ASSERT_STREQ_VERBOSE(expected_out, out, text);
+}
+
+// Wrapper on top of AssertCorrectModifiersInTemplate which first
+// obtains a template from the given contents and template context.
+static void AssertCorrectModifiers(TemplateContext template_type,
+ const string& text,
+ const string& expected_out) {
+ Strip strip = STRIP_WHITESPACE;
+ Template *tpl = StringToTemplateWithAutoEscaping(text, strip, template_type);
+ AssertCorrectModifiersInTemplate(tpl, text, expected_out);
+}
+
+// A helper method used by TestCorrectModifiersForAutoEscape.
+// Initializes the template in the Auto Escape mode with the
+// given TemplateContext, expands it with the given dictionary
+// and checks that the output matches the expected value.
+static void AssertCorrectEscaping(TemplateContext template_type,
+ const TemplateDictionary& dict,
+ const string& text,
+ const string& expected_out) {
+ Strip strip = STRIP_WHITESPACE;
+ Template *tpl = StringToTemplateWithAutoEscaping(text, strip, template_type);
+ string outstring;
+ tpl->Expand(&outstring, &dict);
+ ASSERT_STREQ_VERBOSE(expected_out, outstring, text);
+}
+
+class DynamicModifier : public GOOGLE_NAMESPACE::TemplateModifier {
+ public:
+ void Modify(const char* in, size_t inlen,
+ const PerExpandData* per_expand_data,
+ ExpandEmitter* outbuf, const string& arg) const {
+ assert(arg.empty()); // we don't take an argument
+ assert(per_expand_data);
+ const char* value = per_expand_data->LookupForModifiersAsString("value");
+ if (value)
+ outbuf->Emit(value);
+ }
+};
+
+class EmphasizeTemplateModifier : public GOOGLE_NAMESPACE::TemplateModifier {
+ public:
+ EmphasizeTemplateModifier(const string& match)
+ : match_(match) {
+ }
+
+ bool MightModify(const PerExpandData* per_expand_data,
+ const string& arg) const {
+ return strstr(arg.c_str(), match_.c_str());
+ }
+
+ void Modify(const char* in, size_t inlen,
+ const PerExpandData* per_expand_data,
+ ExpandEmitter* outbuf, const string& arg) const {
+ outbuf->Emit(">>");
+ outbuf->Emit(in, inlen);
+ outbuf->Emit("<<");
+ }
+
+ private:
+ string match_;
+};
+
+// This is used by TestAnnotation(). It behaves like
+// TextTemplateAnnotator but just to test our ability to customize
+// annotation, and with stateful one, it prefixes each text annotation
+// with an event (call) count.
+class CustomTestAnnotator : public GOOGLE_NAMESPACE::TextTemplateAnnotator {
+ public:
+ CustomTestAnnotator() : event_count_(0) { }
+ void Reset() { event_count_ = 0; }
+
+ virtual void EmitOpenInclude(ExpandEmitter* emitter, const string& value) {
+ EmitTestPrefix(emitter);
+ GOOGLE_NAMESPACE::TextTemplateAnnotator::EmitOpenInclude(emitter, value);
+ }
+ virtual void EmitCloseInclude(ExpandEmitter* emitter) {
+ EmitTestPrefix(emitter);
+ GOOGLE_NAMESPACE::TextTemplateAnnotator::EmitCloseInclude(emitter);
+ }
+ virtual void EmitOpenFile(ExpandEmitter* emitter, const string& value) {
+ EmitTestPrefix(emitter);
+ GOOGLE_NAMESPACE::TextTemplateAnnotator::EmitOpenFile(emitter, value);
+ }
+ virtual void EmitCloseFile(ExpandEmitter* emitter) {
+ EmitTestPrefix(emitter);
+ GOOGLE_NAMESPACE::TextTemplateAnnotator::EmitCloseFile(emitter);
+ }
+ virtual void EmitOpenSection(ExpandEmitter* emitter, const string& value) {
+ EmitTestPrefix(emitter);
+ GOOGLE_NAMESPACE::TextTemplateAnnotator::EmitOpenSection(emitter, value);
+ }
+ virtual void EmitCloseSection(ExpandEmitter* emitter) {
+ EmitTestPrefix(emitter);
+ GOOGLE_NAMESPACE::TextTemplateAnnotator::EmitCloseSection(emitter);
+ }
+ virtual void EmitOpenVariable(ExpandEmitter* emitter, const string& value) {
+ EmitTestPrefix(emitter);
+ GOOGLE_NAMESPACE::TextTemplateAnnotator::EmitOpenVariable(emitter, value);
+ }
+ virtual void EmitCloseVariable(ExpandEmitter* emitter) {
+ EmitTestPrefix(emitter);
+ GOOGLE_NAMESPACE::TextTemplateAnnotator::EmitCloseVariable(emitter);
+ }
+ virtual void EmitFileIsMissing(ExpandEmitter* emitter,
+ const string& value) {
+ EmitTestPrefix(emitter);
+ GOOGLE_NAMESPACE::TextTemplateAnnotator::EmitFileIsMissing(emitter, value);
+ }
+
+ private:
+ void EmitTestPrefix(ExpandEmitter* emitter) {
+ char buf[128];
+ snprintf(buf, sizeof(buf), "{{EVENT=%d}}", ++event_count_);
+ emitter->Emit(buf);
+ }
+ int event_count_;
+ DISALLOW_COPY_AND_ASSIGN(CustomTestAnnotator);
+};
+
+class TemplateForTest : public Template {
+ public:
+ using Template::kSafeWhitelistedVariables;
+ using Template::kNumSafeWhitelistedVariables;
+ private:
+ // This quiets gcc3, which otherwise complains: "base `Template'
+ // with only non-default constructor in class without a constructor".
+ TemplateForTest();
+};
+
+// Tests annotation, in particular inheriting annotation among children
+// This should be called first, so the filenames don't change as we add
+// more tests.
+static void TestAnnotation() {
+ string incname = StringToTemplateFile("include {{#ISEC}}file{{/ISEC}}\n");
+ string incname2 = StringToTemplateFile("include #2\n");
+ Template* tpl = StringToTemplate(
+ "boo!\n{{>INC}}\nhi {{#SEC}}lo{{#SUBSEC}}jo{{/SUBSEC}}{{/SEC}} bar "
+ "{{VAR:x-foo}}",
+ DO_NOT_STRIP);
+ TemplateDictionary dict("dict");
+ PerExpandData per_expand_data;
+
+ dict.ShowSection("SEC");
+ TemplateDictionary* incdict = dict.AddIncludeDictionary("INC");
+ incdict->SetFilename(incname);
+ incdict->ShowSection("ISEC");
+ dict.AddIncludeDictionary("INC")->SetFilename(incname2);
+ dict.SetValue("VAR", "var");
+
+ // This string is equivalent to "/template." (at least on unix)
+ string slash_tpl(PathJoin(kRootdir, "template."));
+ per_expand_data.SetAnnotateOutput("");
+ char expected[10240]; // 10k should be big enough!
+ snprintf(expected, sizeof(expected),
+ "{{#FILE=%s003}}{{#SEC=__{{MAIN}}__}}boo!\n"
+ "{{#INC=INC}}{{#FILE=%s001}}"
+ "{{#SEC=__{{MAIN}}__}}include {{#SEC=ISEC}}file{{/SEC}}\n"
+ "{{/SEC}}{{/FILE}}{{/INC}}"
+ "{{#INC=INC}}{{#FILE=%s002}}"
+ "{{#SEC=__{{MAIN}}__}}include #2\n{{/SEC}}{{/FILE}}{{/INC}}"
+ "\nhi {{#SEC=SEC}}lo{{/SEC}} bar "
+ "{{#VAR=VAR:x-foo<not registered>}}var{{/VAR}}{{/SEC}}{{/FILE}}",
+ (FLAGS_test_tmpdir + slash_tpl).c_str(),
+ (FLAGS_test_tmpdir + slash_tpl).c_str(),
+ (FLAGS_test_tmpdir + slash_tpl).c_str());
+ AssertExpandWithDataIs(tpl, &dict, &per_expand_data, expected, true);
+
+ // Test ability to set custom annotator.
+ CustomTestAnnotator custom_annotator;
+ per_expand_data.SetAnnotator(&custom_annotator);
+ snprintf(expected, sizeof(expected),
+ "{{EVENT=1}}{{#FILE=%s003}}"
+ "{{EVENT=2}}{{#SEC=__{{MAIN}}__}}boo!\n"
+ "{{EVENT=3}}{{#INC=INC}}"
+ "{{EVENT=4}}{{#FILE=%s001}}"
+ "{{EVENT=5}}{{#SEC=__{{MAIN}}__}}include "
+ "{{EVENT=6}}{{#SEC=ISEC}}file"
+ "{{EVENT=7}}{{/SEC}}\n"
+ "{{EVENT=8}}{{/SEC}}"
+ "{{EVENT=9}}{{/FILE}}"
+ "{{EVENT=10}}{{/INC}}"
+ "{{EVENT=11}}{{#INC=INC}}"
+ "{{EVENT=12}}{{#FILE=%s002}}"
+ "{{EVENT=13}}{{#SEC=__{{MAIN}}__}}include #2\n"
+ "{{EVENT=14}}{{/SEC}}"
+ "{{EVENT=15}}{{/FILE}}"
+ "{{EVENT=16}}{{/INC}}\nhi "
+ "{{EVENT=17}}{{#SEC=SEC}}lo"
+ "{{EVENT=18}}{{/SEC}} bar "
+ "{{EVENT=19}}{{#VAR=VAR:x-foo<not registered>}}var"
+ "{{EVENT=20}}{{/VAR}}"
+ "{{EVENT=21}}{{/SEC}}"
+ "{{EVENT=22}}{{/FILE}}",
+ (FLAGS_test_tmpdir + slash_tpl).c_str(),
+ (FLAGS_test_tmpdir + slash_tpl).c_str(),
+ (FLAGS_test_tmpdir + slash_tpl).c_str());
+ // We can't use AssertExpandWithDataIs() on our deliberately stateful
+ // test annotator because it internally does a second expansion
+ // assuming no state change between calls.
+ string custom_outstring;
+ ASSERT(tpl->ExpandWithData(&custom_outstring, &dict, &per_expand_data));
+ ASSERT_STREQ(custom_outstring.c_str(), expected);
+
+ // Unset annotator and continue with next test as test of ability
+ // to revert to built-in annotator.
+ per_expand_data.SetAnnotator(NULL);
+
+ per_expand_data.SetAnnotateOutput(slash_tpl.c_str());
+ snprintf(expected, sizeof(expected),
+ "{{#FILE=%s003}}{{#SEC=__{{MAIN}}__}}boo!\n"
+ "{{#INC=INC}}{{#FILE=%s001}}"
+ "{{#SEC=__{{MAIN}}__}}include {{#SEC=ISEC}}file{{/SEC}}\n"
+ "{{/SEC}}{{/FILE}}{{/INC}}"
+ "{{#INC=INC}}{{#FILE=%s002}}"
+ "{{#SEC=__{{MAIN}}__}}include #2\n{{/SEC}}{{/FILE}}{{/INC}}"
+ "\nhi {{#SEC=SEC}}lo{{/SEC}} bar "
+ "{{#VAR=VAR:x-foo<not registered>}}var{{/VAR}}{{/SEC}}{{/FILE}}",
+ (slash_tpl).c_str(),
+ (slash_tpl).c_str(),
+ (slash_tpl).c_str());
+ AssertExpandWithDataIs(tpl, &dict, &per_expand_data, expected, true);
+
+ per_expand_data.SetAnnotateOutput(NULL); // should turn off annotations
+ AssertExpandWithDataIs(tpl, &dict, &per_expand_data,
+ "boo!\ninclude file\ninclude #2\n\nhi lo bar var",
+ true);
+
+ // Test that even if we set an annotator we shouldn't get annotation
+ // if it is not turned on with SetAnnotateOutput().
+ per_expand_data.SetAnnotator(&custom_annotator);
+ AssertExpandWithDataIs(tpl, &dict, &per_expand_data,
+ "boo!\ninclude file\ninclude #2\n\nhi lo bar var",
+ true);
+
+ // Test annotation of "missing include" condition.
+ Template* one_inc_tpl =
+ StringToTemplate("File contents: {{>INC}}\n", DO_NOT_STRIP);
+ TemplateDictionary dict_missing_file("dict_with_missing_file");
+ dict_missing_file.AddIncludeDictionary("INC")->SetFilename("missing.tpl");
+
+ per_expand_data.SetAnnotateOutput("");
+ per_expand_data.SetAnnotator(NULL);
+ snprintf(expected, sizeof(expected),
+ "{{#FILE=%s004}}{{#SEC=__{{MAIN}}__}}File contents: "
+ "{{#INC=INC}}{{MISSING_FILE=missing.tpl}}{{/INC}}\n"
+ "{{/SEC}}{{/FILE}}",
+ (FLAGS_test_tmpdir + slash_tpl).c_str());
+ // We expect a false return value because of the missing file.
+ AssertExpandWithDataIs(one_inc_tpl, &dict_missing_file, &per_expand_data,
+ expected, false);
+
+ // Same missing include test with custom annotator
+ custom_annotator.Reset();
+ per_expand_data.SetAnnotator(&custom_annotator);
+ snprintf(expected, sizeof(expected),
+ "{{EVENT=1}}{{#FILE=%s004}}"
+ "{{EVENT=2}}{{#SEC=__{{MAIN}}__}}File contents: "
+ "{{EVENT=3}}{{#INC=INC}}"
+ "{{EVENT=4}}{{MISSING_FILE=missing.tpl}}"
+ "{{EVENT=5}}{{/INC}}\n"
+ "{{EVENT=6}}{{/SEC}}"
+ "{{EVENT=7}}{{/FILE}}",
+ (FLAGS_test_tmpdir + slash_tpl).c_str());
+ // See comment above on why we can't use AssertExpandWithDataIs() for
+ // our stateful test annotator.
+ custom_outstring.clear();
+ ASSERT(!one_inc_tpl->ExpandWithData(&custom_outstring,
+ &dict_missing_file,
+ &per_expand_data));
+ ASSERT_STREQ(custom_outstring.c_str(), expected);
+}
+
+TEST(Template, CheckWhitelistedVariablesSorted) {
+ // NOTE(williasr): kSafeWhitelistedVariables must be sorted, it's accessed
+ // using binary search.
+ for (size_t i = 1; i < TemplateForTest::kNumSafeWhitelistedVariables; i++) {
+ assert(strcmp(TemplateForTest::kSafeWhitelistedVariables[i-1],
+ TemplateForTest::kSafeWhitelistedVariables[i]) < 0);
+ }
+}
+
+
+// The following tests test various aspects of how Expand() should behave.
+TEST(Template, WeirdSyntax) {
+ TemplateDictionary dict("dict");
+
+ // When we see {{{, we should match the second {{, not the first.
+ Template* tpl1 = StringToTemplate("hi {{{! VAR {{!VAR} }} lo",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl1, &dict, "hi { lo", true);
+
+ // Likewise for }}}
+ Template* tpl2 = StringToTemplate("fn(){{{BI_NEWLINE}} x=4;{{BI_NEWLINE}}}",
+ DO_NOT_STRIP);
+ AssertExpandIs(tpl2, &dict, "fn(){\n x=4;\n}", true);
+
+ // Try lots of {'s!
+ Template* tpl3 = StringToTemplate("{{{{{{VAR}}}}}}}}", DO_NOT_STRIP);
+ AssertExpandIs(tpl3, &dict, "{{{{}}}}}}", true);
+}
+
+TEST(Template, Comment) {
+ TemplateDictionary dict("dict");
+ Template* tpl1 = StringToTemplate("hi {{!VAR}} lo",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl1, &dict, "hi lo", true);
+
+ Template* tpl2 = StringToTemplate("hi {{!VAR {VAR} }} lo",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl2, &dict, "hi lo", true);
+
+ Template* tpl3 = StringToTemplate("hi {{! VAR {{!VAR} }} lo",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl3, &dict, "hi lo", true);
+}
+
+TEST(Template, SetMarkerDelimiters) {
+ TemplateDictionary dict("dict");
+ dict.SetValue("VAR", "yo");
+ Template* tpl1 = StringToTemplate("{{=| |=}}\nhi |VAR| {{lo}}",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl1, &dict, "hi yo {{lo}}", true);
+
+ Template* tpl2 = StringToTemplate("{{=| |=}}hi |VAR| {{lo}}",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl2, &dict, "hi yo {{lo}}", true);
+
+ Template* tpl3 = StringToTemplate("{{=| ||=}}hi ||VAR|||VAR|| {{lo}}",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl3, &dict, "hi |yoyo {{lo}}", true);
+
+ Template* tpl4 = StringToTemplate("{{=< >=}}hi <<VAR>> {{lo}}",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl4, &dict, "hi <yo> {{lo}}", true);
+
+ Template* tpl4b = StringToTemplate("{{=<< >>=}}hi <<VAR>> {{lo}}",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl4b, &dict, "hi yo {{lo}}", true);
+
+ Template* tpl4c = StringToTemplate("{{=<< <<=}}hi <<VAR<< {{lo}}",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl4c, &dict, "hi yo {{lo}}", true);
+
+ Template* tpl5 = StringToTemplate("hi {{VAR}} lo\n{{=< >=}}\n"
+ "hi {{VAR}} lo\n"
+ "hi <VAR> lo\n<={ }=>\n"
+ "hi {{VAR}} lo\n{={{ }}=}\n"
+ "hi {{VAR}} lo\n",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl5, &dict,
+ "hi yo lohi {{VAR}} lohi yo lohi {yo} lohi yo lo",
+ true);
+
+ Template* tpl6 = StringToTemplate("hi {{VAR}} lo\n{{=< >}}\n",
+ STRIP_WHITESPACE);
+ ASSERT(tpl6 == NULL);
+
+ Template* tpl7 = StringToTemplate("hi {{VAR}} lo\n{{=<>}}\n",
+ STRIP_WHITESPACE);
+ ASSERT(tpl7 == NULL);
+
+ Template* tpl8 = StringToTemplate("hi {{VAR}} lo\n{{=< >=}}\n",
+ STRIP_WHITESPACE);
+ ASSERT(tpl8 == NULL);
+
+ Template* tpl9 = StringToTemplate("hi {{VAR}} lo\n{{==}}\n",
+ STRIP_WHITESPACE);
+ ASSERT(tpl9 == NULL);
+
+ Template* tpl10 = StringToTemplate("hi {{VAR}} lo\n{{=}}\n",
+ STRIP_WHITESPACE);
+ ASSERT(tpl10 == NULL);
+
+ // Test that {{= =}} is a "removable" marker.
+ Template* tpl11 = StringToTemplate("line\n {{=| |=}} \nhi |VAR| {{lo}}\n",
+ STRIP_BLANK_LINES);
+ AssertExpandIs(tpl11, &dict, "line\nhi yo {{lo}}\n", true);
+
+ // Test that "removable" markers survive marker-modification.
+ Template* tpl12 = StringToTemplate(" {{#SEC1}} \n"
+ "{{=| |=}} |VAR|\n"
+ " |/SEC1|\ntada! |VAR|\n"
+ "hello|=<< >>=|\n"
+ " <<! a blank line>> \n"
+ "done",
+ STRIP_BLANK_LINES);
+ AssertExpandIs(tpl12, &dict, "tada! yo\nhello\ndone", true);
+}
+
+TEST(Template, Variable) {
+ Template* tpl = StringToTemplate("hi {{VAR}} lo", STRIP_WHITESPACE);
+ TemplateDictionary dict("dict");
+ AssertExpandIs(tpl, &dict, "hi lo", true);
+ dict.SetValue("VAR", "yo");
+ AssertExpandIs(tpl, &dict, "hi yo lo", true);
+ dict.SetValue("VAR", "yoyo");
+ AssertExpandIs(tpl, &dict, "hi yoyo lo", true);
+ dict.SetValue("VA", "noyo");
+ dict.SetValue("VAR ", "noyo2");
+ dict.SetValue("var", "noyo3");
+ AssertExpandIs(tpl, &dict, "hi yoyo lo", true);
+
+ // Sanity check string template behaviour while we're at it.
+ Template* tpl2 = Template::StringToTemplate("hi {{VAR}} lo",
+ STRIP_WHITESPACE);
+ TemplateDictionary dict2("dict");
+ AssertExpandIs(tpl2, &dict2, "hi lo", true);
+ dict2.SetValue("VAR", "yo");
+ AssertExpandIs(tpl2, &dict2, "hi yo lo", true);
+ dict2.SetValue("VAR", "yoyo");
+ AssertExpandIs(tpl2, &dict2, "hi yoyo lo", true);
+ dict2.SetValue("VA", "noyo");
+ dict2.SetValue("VAR ", "noyo2");
+ dict2.SetValue("var", "noyo3");
+ AssertExpandIs(tpl2, &dict2, "hi yoyo lo", true);
+ delete tpl2; // You have to delete StringToTemplate strings
+}
+
+TEST(Template, VariableWithModifiers) {
+ Template* tpl = StringToTemplate("hi {{VAR:html_escape}} lo",
+ STRIP_WHITESPACE);
+ TemplateDictionary dict("dict");
+
+ // Test with no modifiers.
+ dict.SetValue("VAR", "yo");
+ AssertExpandIs(tpl, &dict, "hi yo lo", true);
+ dict.SetValue("VAR", "yo&yo");
+ AssertExpandIs(tpl, &dict, "hi yo&yo lo", true);
+
+ // Test with URL escaping.
+ tpl = StringToTemplate("<a href=\"/servlet?param={{VAR:u}}\">",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "<a href=\"/servlet?param=yo%26yo\">", true);
+ tpl = StringToTemplate("<a href='/servlet?param={{VAR:url_query_escape}}'>",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "<a href='/servlet?param=yo%26yo'>", true);
+
+ // Test with multiple URL escaping.
+ tpl = StringToTemplate("<a href=\"/servlet?param={{VAR:u:u}}\">",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "<a href=\"/servlet?param=yo%2526yo\">", true);
+
+ // Test HTML escaping.
+ tpl = StringToTemplate("hi {{VAR:h}} lo", STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi yo&yo lo", true);
+
+ tpl = StringToTemplate("hi {{VAR:h:h}} lo", STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi yo&amp;yo lo", true);
+
+ // Test special HTML escaping
+ dict.SetValue("URL_VAR", "javascript:void");
+ dict.SetValue("SNIPPET_VAR", "<b>foo & bar</b>");
+ tpl = StringToTemplate("hi {{VAR:H=attribute}} {{URL_VAR:H=url}} "
+ "{{SNIPPET_VAR:H=snippet}} lo", STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi yo_yo # <b>foo & bar</b> lo", true);
+
+ // Test with custom modifiers [regular or XssSafe should not matter].
+ ASSERT(GOOGLE_NAMESPACE::AddModifier("x-test",
+ &GOOGLE_NAMESPACE::html_escape));
+ ASSERT(GOOGLE_NAMESPACE::AddModifier("x-test-arg=",
+ &GOOGLE_NAMESPACE::html_escape));
+ ASSERT(GOOGLE_NAMESPACE::AddXssSafeModifier("x-test-arg=snippet",
+ &GOOGLE_NAMESPACE::snippet_escape));
+
+ tpl = StringToTemplate("hi {{VAR:x-test}} lo", STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi yo&yo lo", true);
+ tpl = StringToTemplate("hi {{SNIPPET_VAR:x-test-arg=snippet}} lo",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi <b>foo & bar</b> lo", true);
+ tpl = StringToTemplate("hi {{VAR:x-unknown}} lo", STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi yo&yo lo", true);
+
+ // Test with a modifier taking per-expand data
+ DynamicModifier dynamic_modifier;
+ ASSERT(GOOGLE_NAMESPACE::AddModifier("x-dynamic", &dynamic_modifier));
+ PerExpandData per_expand_data;
+ tpl = StringToTemplate("hi {{VAR:x-dynamic}} lo", STRIP_WHITESPACE);
+ AssertExpandWithDataIs(tpl, &dict, &per_expand_data, "hi lo", true);
+ per_expand_data.InsertForModifiers("value", "foo");
+ AssertExpandWithDataIs(tpl, &dict, &per_expand_data, "hi foo lo", true);
+ per_expand_data.InsertForModifiers("value", "bar");
+ AssertExpandWithDataIs(tpl, &dict, &per_expand_data, "hi bar lo", true);
+ per_expand_data.InsertForModifiers("value", NULL);
+ AssertExpandWithDataIs(tpl, &dict, &per_expand_data, "hi lo", true);
+
+ // Test with no modifiers.
+ tpl = StringToTemplate("hi {{VAR}} lo", STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi yo&yo lo", true);
+
+ // Check that ordering is right
+ dict.SetValue("VAR", "yo\nyo");
+ tpl = StringToTemplate("hi {{VAR:h}} lo", STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi yo yo lo", true);
+ tpl = StringToTemplate("hi {{VAR:p}} lo", STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi yo\nyo lo", true);
+ tpl = StringToTemplate("hi {{VAR:j}} lo", STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi yo\\nyo lo", true);
+ tpl = StringToTemplate("hi {{VAR:h:j}} lo", STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi yo yo lo", true);
+ tpl = StringToTemplate("hi {{VAR:j:h}} lo", STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi yo\\nyo lo", true);
+
+ // Check more complicated modifiers using fullname
+ tpl = StringToTemplate("hi {{VAR:javascript_escape:h}} lo",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi yo\\nyo lo", true);
+ tpl = StringToTemplate("hi {{VAR:j:html_escape}} lo",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi yo\\nyo lo", true);
+ tpl = StringToTemplate("hi {{VAR:pre_escape:j}} lo",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict, "hi yo\\nyo lo", true);
+
+ // Check that illegal modifiers are rejected
+ tpl = StringToTemplate("hi {{VAR:j:h2}} lo", STRIP_WHITESPACE);
+ ASSERT(tpl == NULL);
+ tpl = StringToTemplate("hi {{VAR:html_ecap}} lo", STRIP_WHITESPACE);
+ ASSERT(tpl == NULL);
+ tpl = StringToTemplate("hi {{VAR:javascript_escaper}} lo",
+ STRIP_WHITESPACE);
+ ASSERT(tpl == NULL);
+ tpl = StringToTemplate("hi {{VAR:js:j}} lo", STRIP_WHITESPACE);
+ ASSERT(tpl == NULL);
+ tpl = StringToTemplate("hi {{VAR:}} lo", STRIP_WHITESPACE);
+ ASSERT(tpl == NULL);
+
+ // Check we reject modifier-values when we ought to
+ tpl = StringToTemplate("hi {{VAR:j=4}} lo", STRIP_WHITESPACE);
+ ASSERT(tpl == NULL);
+ tpl = StringToTemplate("hi {{VAR:html_escape=yes}} lo", STRIP_WHITESPACE);
+ ASSERT(tpl == NULL);
+ tpl = StringToTemplate("hi {{VAR:url_query_escape=wombats}} lo",
+ STRIP_WHITESPACE);
+ ASSERT(tpl == NULL);
+
+ // Check we don't allow modifiers on sections
+ tpl = StringToTemplate("hi {{#VAR:h}} lo {{/VAR}}", STRIP_WHITESPACE);
+ ASSERT(tpl == NULL);
+
+ // Test when expanded grows by more than 12% per modifier.
+ dict.SetValue("VAR", "http://a.com?b=c&d=e&f=g&q=a>b");
+ tpl = StringToTemplate("{{VAR:u:j:h}}",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict,
+ "http%3A//a.com%3Fb%3Dc%26d%3De%26f%3Dg%26q%3Da%3Eb",
+ true);
+
+ // As above with 4 modifiers.
+ dict.SetValue("VAR", "http://a.com?b=c&d=e&f=g&q=a>b");
+ tpl = StringToTemplate("{{VAR:u:j:h:h}}",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl, &dict,
+ "http%3A//a.com%3Fb%3Dc%26d%3De%26f%3Dg%26q%3Da%3Eb",
+ true);
+}
+
+TEST(Template, Section) {
+ Template* tpl = StringToTemplate(
+ "boo!\nhi {{#SEC}}lo{{#SUBSEC}}jo{{/SUBSEC}}{{/SEC}} bar",
+ STRIP_WHITESPACE);
+ TemplateDictionary dict("dict");
+ AssertExpandIs(tpl, &dict, "boo!hi bar", true);
+ dict.ShowSection("SEC");
+ AssertExpandIs(tpl, &dict, "boo!hi lo bar", true);
+ dict.ShowSection("SEC");
+ AssertExpandIs(tpl, &dict, "boo!hi lo bar", true);
+ // This should work even though subsec isn't a child of the main dict
+ dict.ShowSection("SUBSEC");
+ AssertExpandIs(tpl, &dict, "boo!hi lojo bar", true);
+
+ TemplateDictionary dict2("dict2");
+ dict2.AddSectionDictionary("SEC");
+ AssertExpandIs(tpl, &dict2, "boo!hi lo bar", true);
+ dict2.AddSectionDictionary("SEC");
+ AssertExpandIs(tpl, &dict2, "boo!hi lolo bar", true);
+ dict2.AddSectionDictionary("sec");
+ AssertExpandIs(tpl, &dict2, "boo!hi lolo bar", true);
+ dict2.ShowSection("SUBSEC");
+ AssertExpandIs(tpl, &dict2, "boo!hi lojolojo bar", true);
+}
+
+
+TEST(Template, SectionSeparator) {
+ Template* tpl = StringToTemplate(
+ "hi {{#SEC}}lo{{#SEC_separator}}jo{{JO}}{{/SEC_separator}}{{/SEC}} bar",
+ STRIP_WHITESPACE);
+ TemplateDictionary dict("dict");
+ AssertExpandIs(tpl, &dict, "hi bar", true);
+ // Since SEC is only expanded once, the separator section shouldn't show.
+ dict.ShowSection("SEC");
+ AssertExpandIs(tpl, &dict, "hi lo bar", true);
+ dict.ShowSection("SEC");
+ AssertExpandIs(tpl, &dict, "hi lo bar", true);
+ // This should work even though SEC_separator isn't a child of the
+ // main dict. It verifies SEC_separator is just a normal section, too.
+ dict.ShowSection("SEC_separator");
+ AssertExpandIs(tpl, &dict, "hi lojo bar", true);
+
+ TemplateDictionary dict2("dict2");
+ dict2.AddSectionDictionary("SEC");
+ AssertExpandIs(tpl, &dict2, "hi lo bar", true);
+ dict2.AddSectionDictionary("SEC");
+ AssertExpandIs(tpl, &dict2, "hi lojolo bar", true);
+ // This is a weird case: using separator and specifying manually.
+ dict2.ShowSection("SEC_separator");
+ AssertExpandIs(tpl, &dict2, "hi lojojolojo bar", true);
+
+ TemplateDictionary dict3("dict3");
+ TemplateDictionary* sec1 = dict3.AddSectionDictionary("SEC");
+ TemplateDictionary* sec2 = dict3.AddSectionDictionary("SEC");
+ TemplateDictionary* sec3 = dict3.AddSectionDictionary("SEC");
+ dict3.SetValue("JO", "J");
+ AssertExpandIs(tpl, &dict3, "hi lojoJlojoJlo bar", true);
+ sec1->SetValue("JO", "JO");
+ AssertExpandIs(tpl, &dict3, "hi lojoJOlojoJlo bar", true);
+ sec2->SetValue("JO", "JOO");
+ AssertExpandIs(tpl, &dict3, "hi lojoJOlojoJOOlo bar", true);
+ dict3.AddSectionDictionary("SEC");
+ AssertExpandIs(tpl, &dict3, "hi lojoJOlojoJOOlojoJlo bar", true);
+ sec3->AddSectionDictionary("SEC_separator");
+ AssertExpandIs(tpl, &dict3, "hi lojoJOlojoJOOlojoJjoJlo bar", true);
+
+ // Make sure we don't do anything special with var or include names
+ Template* tpl2 = StringToTemplate(
+ "hi {{#SEC}}lo{{>SEC_separator}}{{/SEC}} bar",
+ STRIP_WHITESPACE);
+ AssertExpandIs(tpl2, &dict2, "hi lolo bar", true);
+
+ Template* tpl3 = StringToTemplate(
+ "hi {{#SEC}}lo{{SEC_separator}}{{/SEC}} bar",
+ STRIP_WHITESPACE);
+ dict2.SetValue("SEC_separator", "-");
+ AssertExpandIs(tpl3, &dict2, "hi lo-lo- bar", true);
+}
+
+TEST(Template, Include) {
+ string incname = StringToTemplateFile("include file\n");
+ string incname2 = StringToTemplateFile("inc2a\ninc2b\n");
+ string incname_bad = StringToTemplateFile("{{syntax_error");
+ Template* tpl = StringToTemplate("hi {{>INC}} bar\n", STRIP_WHITESPACE);
+ TemplateDictionary dict("dict");
+ AssertExpandIs(tpl, &dict, "hi bar", true);
+ dict.AddIncludeDictionary("INC");
+ AssertExpandIs(tpl, &dict, "hi bar", true); // noop: no filename was set
+ dict.AddIncludeDictionary("INC")->SetFilename("/notarealfile ");
+ AssertExpandIs(tpl, &dict, "hi bar", false); // noop: illegal filename
+ dict.AddIncludeDictionary("INC")->SetFilename(incname);
+ AssertExpandIs(tpl, &dict, "hi include file bar", false);
+ dict.AddIncludeDictionary("INC")->SetFilename(incname_bad);
+ AssertExpandIs(tpl, &dict, "hi include file bar",
+ false); // noop: syntax error
+ dict.AddIncludeDictionary("INC")->SetFilename(incname);
+ AssertExpandIs(tpl, &dict, "hi include fileinclude file bar", false);
+ dict.AddIncludeDictionary("inc")->SetFilename(incname);
+ AssertExpandIs(tpl, &dict, "hi include fileinclude file bar", false);
+ dict.AddIncludeDictionary("INC")->SetFilename(incname2);
+ AssertExpandIs(tpl, &dict,
+ "hi include fileinclude fileinc2ainc2b bar", false);
+
+ // Now test that includes preserve Strip
+ Template* tpl2 = StringToTemplate("hi {{>INC}} bar", DO_NOT_STRIP);
+ AssertExpandIs(tpl2, &dict,
+ "hi include file\ninclude file\ninc2a\ninc2b\n bar", false);
+
+ // Test that if we indent the include, every line on the include
+ // is indented.
+ Template* tpl3 = StringToTemplate("hi\n {{>INC}} bar", DO_NOT_STRIP);
+ AssertExpandIs(tpl3, &dict,
+ "hi\n include file\n include file\n"
+ " inc2a\n inc2b\n bar",
+ false);
+ // But obviously, if we strip leading whitespace, no indentation.
+ Template* tpl4 = StringToTemplate("hi\n {{>INC}} bar", STRIP_WHITESPACE);
+ AssertExpandIs(tpl4, &dict,
+ "hiinclude fileinclude fileinc2ainc2b bar", false);
+ // And if it's not a whitespace indent, we don't indent either.
+ Template* tpl5 = StringToTemplate("hi\n - {{>INC}} bar", DO_NOT_STRIP);
+ AssertExpandIs(tpl5, &dict,
+ "hi\n - include file\ninclude file\n"
+ "inc2a\ninc2b\n bar",
+ false);
+ // Make sure we indent properly at the beginning.
+ Template* tpl6 = StringToTemplate(" {{>INC}}\nbar", DO_NOT_STRIP);
+ AssertExpandIs(tpl6, &dict,
+ " include file\n include file\n"
+ " inc2a\n inc2b\n \nbar",
+ false);
+ // And deal correctly when we include twice in a row.
+ Template* tpl7 = StringToTemplate(" {{>INC}}-{{>INC}}", DO_NOT_STRIP);
+ AssertExpandIs(tpl7, &dict,
+ " include file\n include file\n inc2a\n inc2b\n "
+ "-include file\ninclude file\ninc2a\ninc2b\n",
+ false);
+}
+
+TEST(Template, IncludeWithModifiers) {
+ string incname = StringToTemplateFile("include & print file\n");
+ string incname2 = StringToTemplateFile("inc2\n");
+ string incname3 = StringToTemplateFile("yo&yo");
+ // Note this also tests that html-escape, but not javascript-escape or
+ // pre-escape, escapes \n to <space>
+ Template* tpl1 = StringToTemplate("hi {{>INC:h}} bar\n", DO_NOT_STRIP);
+ Template* tpl2 = StringToTemplate("hi {{>INC:javascript_escape}} bar\n",
+ DO_NOT_STRIP);
+ Template* tpl3 = StringToTemplate("hi {{>INC:pre_escape}} bar\n",
+ DO_NOT_STRIP);
+ Template* tpl4 = StringToTemplate("hi {{>INC:u}} bar\n", DO_NOT_STRIP);
+ // Test that if we include the same template twice, once with a modifer
+ // and once without, they each get applied properly.
+ Template* tpl5 = StringToTemplate("hi {{>INC:h}} bar {{>INC}} baz\n",
+ DO_NOT_STRIP);
+
+ TemplateDictionary dict("dict");
+ AssertExpandIs(tpl1, &dict, "hi bar\n", true);
+ dict.AddIncludeDictionary("INC")->SetFilename(incname);
+ AssertExpandIs(tpl1, &dict, "hi include & print file bar\n", true);
+ dict.AddIncludeDictionary("INC")->SetFilename(incname2);
+ AssertExpandIs(tpl1, &dict, "hi include & print file inc2 bar\n",
+ true);
+ AssertExpandIs(tpl2, &dict, "hi include \\x26 print file\\ninc2\\n bar\n",
+ true);
+ AssertExpandIs(tpl3, &dict, "hi include & print file\ninc2\n bar\n",
+ true);
+ dict.AddIncludeDictionary("INC")->SetFilename(incname3);
+ AssertExpandIs(tpl4, &dict,
+ "hi include+%26+print+file%0Ainc2%0Ayo%26yo bar\n",
+ true);
+ AssertExpandIs(tpl5, &dict,
+ "hi include & print file inc2 yo&yo bar "
+ "include & print file\ninc2\nyo&yo baz\n",
+ true);
+
+ // Don't test modifier syntax here; that's in TestVariableWithModifiers()
+}
+
+// Make sure we don't deadlock when a template includes itself.
+// This also tests we handle recursive indentation properly.
+TEST(Template, RecursiveInclude) {
+ string incname = StringToTemplateFile("hi {{>INC}} bar\n {{>INC}}!");
+ Template* tpl = Template::GetTemplate(incname, DO_NOT_STRIP);
+ TemplateDictionary dict("dict");
+ dict.AddIncludeDictionary("INC")->SetFilename(incname);
+ // Note the last line is indented 4 spaces instead of 2. This is
+ // because the last sub-include is indented.
+ AssertExpandIs(tpl, &dict, "hi hi bar\n ! bar\n hi bar\n !!", true);
+}
+
+// Tests that vars inherit/override their parents properly
+TEST(Template, Inheritence) {
+ Template* tpl = StringToTemplate("{{FOO}}{{#SEC}}{{FOO}}{{#SEC}}{{FOO}}{{/SEC}}{{/SEC}}",
+ STRIP_WHITESPACE);
+ TemplateDictionary dict("dict");
+ dict.SetValue("FOO", "foo");
+ dict.ShowSection("SEC");
+ AssertExpandIs(tpl, &dict, "foofoofoo", true);
+
+ TemplateDictionary dict2("dict2");
+ dict2.SetValue("FOO", "foo");
+ TemplateDictionary* sec = dict2.AddSectionDictionary("SEC");
+ AssertExpandIs(tpl, &dict2, "foofoofoo", true);
+ sec->SetValue("FOO", "bar");
+ AssertExpandIs(tpl, &dict2, "foobarbar", true);
+ TemplateDictionary* sec2 = sec->AddSectionDictionary("SEC");
+ AssertExpandIs(tpl, &dict2, "foobarbar", true);
+ sec2->SetValue("FOO", "baz");
+ AssertExpandIs(tpl, &dict2, "foobarbaz", true);
+
+ // Now test an include template, which shouldn't inherit from its parents
+ tpl = StringToTemplate("{{FOO}}{{#SEC}}hi{{/SEC}}\n{{>INC}}",
+ STRIP_WHITESPACE);
+ string incname = StringToTemplateFile(
+ "include {{FOO}}{{#SEC}}invisible{{/SEC}}file\n");
+ TemplateDictionary incdict("dict");
+ incdict.ShowSection("SEC");
+ incdict.SetValue("FOO", "foo");
+ incdict.AddIncludeDictionary("INC")->SetFilename(incname);
+ AssertExpandIs(tpl, &incdict, "foohiinclude file", true);
+}
+
+TEST(Template, TemplateString) {
+ // Make sure using TemplateString and StaticTemplateString for the
+ // dictionary expands the same as using char*'s.
+ Template* tpl = StringToTemplate("hi {{VAR}} lo", STRIP_WHITESPACE);
+ TemplateDictionary dict("dict");
+ dict.SetValue("VAR", TemplateString("short-lived", strlen("short")));
+ AssertExpandIs(tpl, &dict, "hi short lo", true);
+ dict.SetValue("VAR", kHello);
+ AssertExpandIs(tpl, &dict, "hi Hello lo", true);
+}
+
+// Tests that we append to the output string, rather than overwrite
+TEST(Template, Expand) {
+ Template* tpl = StringToTemplate("hi", STRIP_WHITESPACE);
+ TemplateDictionary dict("test_expand");
+ string output("premade");
+ ASSERT(tpl->Expand(&output, &dict));
+ ASSERT_STREQ(output.c_str(), "premadehi");
+
+ tpl = StringToTemplate(" lo ", STRIP_WHITESPACE);
+ ASSERT(tpl->Expand(&output, &dict));
+ ASSERT_STREQ(output.c_str(), "premadehilo");
+}
+
+TEST(Template, ExpandTemplate) {
+ string filename = StringToTemplateFile(" hi {{THERE}}");
+ TemplateDictionary dict("test_expand");
+ dict.SetValue("THERE", "test");
+ string output;
+ ASSERT(ExpandTemplate(filename, STRIP_WHITESPACE, &dict, &output));
+ ASSERT_STREQ(output.c_str(), "hi test");
+
+ // This will append to output, so we see both together.
+ ASSERT(ExpandWithData(filename, DO_NOT_STRIP, &dict, NULL, &output));
+ ASSERT_STREQ(output.c_str(), "hi test hi test");
+
+ ASSERT(!ExpandTemplate(filename + " not found", DO_NOT_STRIP, &dict,
+ &output));
+}
+
+TEST(Template, ExpandWithCustomEmitter) {
+ Template* tpl = StringToTemplate("{{VAR}} {{VAR}}", STRIP_WHITESPACE);
+ TemplateDictionary dict("test_expand");
+ dict.SetValue("VAR", "this song is just six words long");
+ string output;
+ SizeofEmitter e(&output);
+ ASSERT(tpl->Expand(&e, &dict));
+ ASSERT_STREQ("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+ "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+ output.c_str());
+}
+
+TEST(Template, TemplateExpansionModifier) {
+ string parent_tpl_name = StringToTemplateFile("before {{>INC}} after");
+ string child_tpl_name1 = StringToTemplateFile("child1");
+ string child_tpl_name2 = StringToTemplateFile("child2");
+ Template* tpl = Template::GetTemplate(parent_tpl_name, DO_NOT_STRIP);
+
+ TemplateDictionary dict("parent dict");
+ dict.AddIncludeDictionary("INC")->SetFilename(child_tpl_name1);
+ dict.AddIncludeDictionary("INC")->SetFilename(child_tpl_name2);
+
+ PerExpandData per_expand_data;
+
+ EmphasizeTemplateModifier modifier1(child_tpl_name1);
+ per_expand_data.SetTemplateExpansionModifier(&modifier1);
+ AssertExpandWithDataIs(tpl, &dict, &per_expand_data,
+ "before >>child1<<child2 after", true);
+
+ EmphasizeTemplateModifier modifier2(child_tpl_name2);
+ per_expand_data.SetTemplateExpansionModifier(&modifier2);
+ AssertExpandWithDataIs(tpl, &dict, &per_expand_data,
+ "before child1>>child2<< after", true);
+
+ EmphasizeTemplateModifier modifier3(parent_tpl_name);
+ per_expand_data.SetTemplateExpansionModifier(&modifier3);
+ AssertExpandWithDataIs(tpl, &dict, &per_expand_data,
+ ">>before child1child2 after<<", true);
+
+ per_expand_data.SetTemplateExpansionModifier(NULL);
+ AssertExpandWithDataIs(tpl, &dict, &per_expand_data,
+ "before child1child2 after", true);
+}
+
+TEST(Template, GetTemplate) {
+ // Tests the cache
+ string filename = StringToTemplateFile("{This is perfectly valid} yay!");
+ Template* tpl1 = Template::GetTemplate(filename, DO_NOT_STRIP);
+ Template* tpl2 = Template::GetTemplate(filename.c_str(), DO_NOT_STRIP);
+ Template* tpl3 = Template::GetTemplate(filename, STRIP_WHITESPACE);
+ ASSERT(tpl1 && tpl2 && tpl3);
+ ASSERT(tpl1 == tpl2);
+ ASSERT(tpl1 != tpl3);
+
+ // Tests that a nonexistent template returns NULL
+ Template* tpl4 = Template::GetTemplate("/yakakak", STRIP_WHITESPACE);
+ ASSERT(!tpl4);
+
+ // Tests that syntax errors cause us to return NULL
+ Template* tpl5 = StringToTemplate("{{This has spaces in it}}", DO_NOT_STRIP);
+ ASSERT(!tpl5);
+ Template* tpl6 = StringToTemplate("{{#SEC}}foo", DO_NOT_STRIP);
+ ASSERT(!tpl6);
+ Template* tpl7 = StringToTemplate("{{#S1}}foo{{/S2}}", DO_NOT_STRIP);
+ ASSERT(!tpl7);
+ Template* tpl8 = StringToTemplate("{{#S1}}foo{{#S2}}bar{{/S1}{{/S2}",
+ DO_NOT_STRIP);
+ ASSERT(!tpl8);
+ Template* tpl9 = StringToTemplate("{{noend", DO_NOT_STRIP);
+ ASSERT(!tpl9);
+}
+
+TEST(Template, StringCacheKey) {
+ // If you use these same cache keys somewhere else,
+ // call Template::ClearCache first.
+ const string cache_key_a = "cache key a";
+ const string text = "Test template 1";
+ TemplateDictionary empty_dict("dict");
+
+ // When a string template is registered via StringToTemplateCache,
+ // we can use GetTemplate for that same cache-key under any other
+ // Strip because we cache the contents.
+ Template *tpl1, *tpl2;
+ ASSERT(Template::StringToTemplateCache(cache_key_a, text));
+ tpl1 = Template::GetTemplate(cache_key_a, DO_NOT_STRIP);
+ AssertExpandIs(tpl1, &empty_dict, text, true);
+
+ // Different strip.
+ ASSERT(tpl2 = Template::GetTemplate(cache_key_a, STRIP_BLANK_LINES));
+ ASSERT(tpl2 != tpl1);
+ AssertExpandIs(tpl2, &empty_dict, text, true);
+
+ Template::ClearCache();
+}
+
+TEST(Template, StringGetTemplate) {
+ TemplateDictionary dict("dict");
+
+ // Test cache lookups
+ const char* const tpltext = "{This is perfectly valid} yay!";
+ ASSERT(Template::StringToTemplateCache("tgt", tpltext));
+
+ Template* tpl1 = Template::GetTemplate("tgt", DO_NOT_STRIP);
+ Template* tpl2 = Template::GetTemplate("tgt", STRIP_WHITESPACE);
+ ASSERT(tpl1 && tpl2);
+ ASSERT(tpl1 != tpl2);
+ AssertExpandIs(tpl1, &dict, tpltext, true);
+ AssertExpandIs(tpl2, &dict, tpltext, true);
+
+ // If we register a new string under the same text, it should be
+ // ignored.
+ ASSERT(!Template::StringToTemplateCache("tgt", tpltext));
+ ASSERT(!Template::StringToTemplateCache("tgt", "new text"));
+ Template* tpl3 = Template::GetTemplate("tgt", DO_NOT_STRIP);
+ ASSERT(tpl3 == tpl1);
+ AssertExpandIs(tpl3, &dict, tpltext, true);
+
+ // Tests that syntax errors cause us to return NULL
+ ASSERT(!Template::StringToTemplateCache("tgt2", "{{This has spaces}}"));
+ ASSERT(!Template::StringToTemplateCache("tgt3", "{{#SEC}}foo"));
+ ASSERT(!Template::StringToTemplateCache("tgt4", "{{#S1}}foo{{/S2}}"));
+ ASSERT(!Template::StringToTemplateCache("tgt5",
+ "{{#S1}}foo{{#S2}}bar{{/S1}{{/S2}"));
+ ASSERT(!Template::StringToTemplateCache("tgt6", "{{noend"));
+ // And that we didn't cache them by mistake
+ ASSERT(!Template::GetTemplate("tgt2", STRIP_WHITESPACE));
+
+ Template::ClearCache();
+}
+
+TEST(Template, StringTemplateInclude) {
+ Template::ClearCache(); // just for exercise.
+ const string cache_key = "TestStringTemplateInclude";
+ const string cache_key_inc = "TestStringTemplateInclude-inc";
+ const string cache_key_indent = "TestStringTemplateInclude-indent";
+ const string text = "<html>{{>INC}}</html>";
+ const string text_inc = "<div>\n<p>\nUser {{USER}}\n</div>";
+ const string text_indent = "<html>\n {{>INC}}</html>";
+
+ ASSERT(Template::StringToTemplateCache(cache_key, text));
+ ASSERT(Template::StringToTemplateCache(cache_key_inc, text_inc));
+ ASSERT(Template::StringToTemplateCache(cache_key_indent, text_indent));
+
+ Template *tpl = Template::GetTemplate(cache_key, DO_NOT_STRIP);
+ ASSERT(tpl);
+
+ TemplateDictionary dict("dict");
+ TemplateDictionary* sub_dict = dict.AddIncludeDictionary("INC");
+ sub_dict->SetFilename(cache_key_inc);
+
+ sub_dict->SetValue("USER", "John<>Doe");
+ string expected = "<html><div>\n<p>\nUser John<>Doe\n</div></html>";
+ AssertExpandIs(tpl, &dict, expected, true);
+
+ // Repeat the same except that now the parent has a template-level
+ // directive (by way of the automatic-line-indenter).
+ tpl = Template::GetTemplate(cache_key_indent, DO_NOT_STRIP);
+ ASSERT(tpl);
+ expected =
+ "<html>\n"
+ " <div>\n"
+ " <p>\n"
+ " User John<>Doe\n"
+ " </div>"
+ "</html>";
+ AssertExpandIs(tpl, &dict, expected, true);
+
+ Template::ClearCache();
+}
+
+TEST(Template, TemplateSearchPath) {
+ const string pathA = PathJoin(FLAGS_test_tmpdir, "a/");
+ const string pathB = PathJoin(FLAGS_test_tmpdir, "b/");
+ CreateOrCleanTestDir(pathA);
+ CreateOrCleanTestDir(pathB);
+
+ TemplateDictionary dict("");
+ Template::SetTemplateRootDirectory(pathA);
+ Template::AddAlternateTemplateRootDirectory(pathB);
+
+ // 1. Show that a template in the secondary path can be found.
+ const string path_b_bar = PathJoin(pathB, "template_bar");
+ StringToFile("b/template_bar", path_b_bar);
+ ASSERT_STREQ(path_b_bar.c_str(),
+ Template::FindTemplateFilename("template_bar").c_str());
+ Template* b_bar = Template::GetTemplate("template_bar", DO_NOT_STRIP);
+ ASSERT(b_bar);
+ AssertExpandIs(b_bar, &dict, "b/template_bar", true);
+
+ // 2. Show that the search stops once the first match is found.
+ // Create two templates in separate directories with the same name.
+ const string path_a_foo = PathJoin(pathA, "template_foo");
+ StringToFile("a/template_foo", path_a_foo);
+ StringToFile("b/template_foo", PathJoin(pathB, "template_foo"));
+ ASSERT_STREQ(path_a_foo.c_str(),
+ Template::FindTemplateFilename("template_foo").c_str());
+ Template* a_foo = Template::GetTemplate("template_foo", DO_NOT_STRIP);
+ ASSERT(a_foo);
+ AssertExpandIs(a_foo, &dict, "a/template_foo", true);
+
+ // 3. Show that attempting to find a non-existent template gives an
+ // empty path.
+ ASSERT(Template::FindTemplateFilename("baz").empty());
+
+ CreateOrCleanTestDir(pathA);
+ CreateOrCleanTestDir(pathB);
+}
+
+TEST(Template, RemoveStringFromTemplateCache) {
+ Template::ClearCache(); // just for exercise.
+ const string cache_key = "TestRemoveStringFromTemplateCache";
+ const string text = "<html>here today...</html>";
+
+ TemplateDictionary dict("test");
+ ASSERT(Template::StringToTemplateCache(cache_key, text));
+ Template* tpl = Template::GetTemplate(cache_key, DO_NOT_STRIP);
+ ASSERT(tpl);
+ AssertExpandIs(tpl, &dict, text, true);
+ tpl = Template::GetTemplate(cache_key, STRIP_WHITESPACE);
+ ASSERT(tpl);
+ AssertExpandIs(tpl, &dict, text, true);
+
+ Template::RemoveStringFromTemplateCache(cache_key);
+ tpl = Template::GetTemplate(cache_key, DO_NOT_STRIP);
+ ASSERT(!tpl);
+ tpl = Template::GetTemplate(cache_key, STRIP_WHITESPACE);
+ ASSERT(!tpl);
+ tpl = Template::GetTemplate(cache_key, STRIP_BLANK_LINES);
+ ASSERT(!tpl);
+}
+
+TEST(Template, TemplateCache) {
+ const string filename_a = StringToTemplateFile("Test template 1");
+ const string filename_b = StringToTemplateFile("Test template 2.");
+
+ Template *tpl, *tpl2;
+ ASSERT(tpl = Template::GetTemplate(filename_a, DO_NOT_STRIP));
+
+ ASSERT(tpl2 = Template::GetTemplate(filename_b, DO_NOT_STRIP));
+ ASSERT(tpl2 != tpl); // different filenames.
+ ASSERT(tpl2 = Template::GetTemplate(filename_a, STRIP_BLANK_LINES));
+ ASSERT(tpl2 != tpl); // different strip.
+ ASSERT(tpl2 = Template::GetTemplate(filename_b, STRIP_BLANK_LINES));
+ ASSERT(tpl2 != tpl); // different filenames and strip.
+ ASSERT(tpl2 = Template::GetTemplate(filename_a, DO_NOT_STRIP));
+ ASSERT(tpl2 == tpl); // same filename and strip.
+}
+
+// Tests that the various strip values all do the expected thing.
+TEST(Template, Strip) {
+ TemplateDictionary dict("dict");
+ dict.SetValue("FOO", "foo");
+
+ const char* tests[][4] = { // 0: in, 1: do-not-strip, 2: blanklines, 3: ws
+ {"hi!\n", "hi!\n", "hi!\n", "hi!"},
+ {"hi!", "hi!", "hi!", "hi!"},
+ // These test strip-blank-lines, primarily
+ {"{{FOO}}\n\n{{FOO}}", "foo\n\nfoo", "foo\nfoo", "foofoo"},
+ {"{{FOO}}\r\n\r\n{{FOO}}", "foo\r\n\r\nfoo", "foo\r\nfoo", "foofoo"},
+ {"{{FOO}}\n \n{{FOO}}\n", "foo\n \nfoo\n", "foo\nfoo\n", "foofoo"},
+ {"{{FOO}}\n{{BI_NEWLINE}}\nb", "foo\n\n\nb", "foo\n\n\nb", "foo\nb"},
+ {"{{FOO}}\n{{!comment}}\nb", "foo\n\nb", "foo\nb", "foob"},
+ {"{{FOO}}\n{{!comment}}{{!comment2}}\nb", "foo\n\nb", "foo\n\nb", "foob"},
+ {"{{FOO}}\n{{>ONE_INC}}\nb", "foo\n\nb", "foo\nb", "foob"},
+ {"{{FOO}}\n\t{{>ONE_INC}} \nb", "foo\n\t \nb", "foo\nb", "foob"},
+ {"{{FOO}}\n{{>ONE_INC}}{{>TWO_INC}}\nb", "foo\n\nb", "foo\n\nb", "foob"},
+ {"{{FOO}}\n {{#SEC}}\ntext \n {{/SEC}}\n", "foo\n \n", "foo\n", "foo"},
+ {"{{%AUTOESCAPE context=\"HTML\"}}\nBLA", "\nBLA", "BLA", "BLA"},
+ // These test strip-whitespace
+ {"foo\nbar\n", "foo\nbar\n", "foo\nbar\n", "foobar"},
+ {"{{FOO}}\nbar\n", "foo\nbar\n", "foo\nbar\n", "foobar"},
+ {" {{FOO}} {{!comment}}\nb", " foo \nb", " foo \nb", "foo b"},
+ {" {{FOO}} {{BI_SPACE}}\n", " foo \n", " foo \n", "foo "},
+ {" \t \f\v \n\r\n ", " \t \f\v \n\r\n ", "", ""},
+ };
+
+ for (int i = 0; i < sizeof(tests)/sizeof(*tests); ++i) {
+ Template* tpl1 = StringToTemplate(tests[i][0], DO_NOT_STRIP);
+ Template* tpl2 = StringToTemplate(tests[i][0], STRIP_BLANK_LINES);
+ Template* tpl3 = StringToTemplate(tests[i][0], STRIP_WHITESPACE);
+ AssertExpandIs(tpl1, &dict, tests[i][1], true);
+ AssertExpandIs(tpl2, &dict, tests[i][2], true);
+ AssertExpandIs(tpl3, &dict, tests[i][3], true);
+ }
+}
+
+TEST(Template, TemplateRootDirectory) {
+ string filename = StringToTemplateFile("Test template");
+ ASSERT(IsAbspath(filename));
+ Template* tpl1 = Template::GetTemplate(filename, DO_NOT_STRIP);
+ Template::SetTemplateRootDirectory(kRootdir); // "/"
+ // template-root shouldn't matter for absolute directories
+ Template* tpl2 = Template::GetTemplate(filename, DO_NOT_STRIP);
+ Template::SetTemplateRootDirectory("/sadfadsf/waerfsa/safdg");
+ Template* tpl3 = Template::GetTemplate(filename, DO_NOT_STRIP);
+ ASSERT(tpl1 != NULL);
+ ASSERT(tpl1 == tpl2);
+ ASSERT(tpl1 == tpl3);
+
+ // Now test it actually works by breaking the abspath in various places.
+ // We do it twice, since we don't know if the path-sep is "/" or "\".
+ // NOTE: this depends on filename not using "/" or "\" except as a
+ // directory separator (so nothing like "/var/tmp/foo\a/weirdfile").
+ const char* const kPathSeps = "/\\";
+ for (const char* path_sep = kPathSeps; *path_sep; path_sep++) {
+ for (string::size_type sep_pos = filename.find(*path_sep, 0);
+ sep_pos != string::npos;
+ sep_pos = filename.find(*path_sep, sep_pos + 1)) {
+ Template::SetTemplateRootDirectory(filename.substr(0, sep_pos + 1));
+ Template* tpl = Template::GetTemplate(filename.substr(sep_pos + 1),
+ DO_NOT_STRIP);
+ ASSERT(string(tpl->template_file()) == tpl1->template_file());
+ }
+ }
+}
+
+#if defined(HAVE_PTHREAD) && !defined(NO_THREADS)
+struct ThreadReturn {
+ Template* file_template;
+ bool string_to_template_cache_return;
+ Template* string_template;
+};
+
+// RunThread returns a ThreadReturn* that should be deleted.
+static void* RunThread(void* vfilename) {
+ const char* filename = reinterpret_cast<const char*>(vfilename);
+ ThreadReturn* ret = new ThreadReturn;
+ ret->file_template = Template::GetTemplate(filename, DO_NOT_STRIP);
+ ASSERT(ret->file_template != NULL);
+ const char* const key = "RunThread key";
+ ret->string_to_template_cache_return =
+ StringToTemplateCache(key, " RunThread text ", STRIP_WHITESPACE);
+ ret->string_template = Template::GetTemplate(key, STRIP_WHITESPACE);
+ ASSERT(ret->string_template != NULL);
+ return ret;
+}
+
+TEST(Template, ThreadSafety) {
+ string filename = StringToTemplateFile("(testing thread-safety)");
+
+ // GetTemplate() is the most thread-contended routine. We get a
+ // template in many threads, and assert we get the same template
+ // from each.
+ pthread_t thread_ids[kNumThreads];
+ for (int i = 0; i < kNumThreads; ++i) {
+ ASSERT(pthread_create(thread_ids+i, NULL, RunThread,
+ (void*)filename.c_str())
+ == 0);
+ }
+
+ // Wait for all the threads to terminate (should be very quick!)
+ ThreadReturn* first_thread_return = NULL;
+ int num_times_string_to_template_cache_returned_true = 0;
+ for (int i = 0; i < kNumThreads; ++i) {
+ void* vthread_return;
+ ASSERT(pthread_join(thread_ids[i], &vthread_return) == 0);
+ ThreadReturn* thread_return =
+ reinterpret_cast<ThreadReturn*>(vthread_return);
+ if (thread_return->string_to_template_cache_return) {
+ ++num_times_string_to_template_cache_returned_true;
+ }
+ if (first_thread_return == NULL) { // we're the first thread
+ first_thread_return = thread_return;
+ } else {
+ ASSERT(thread_return->file_template ==
+ first_thread_return->file_template);
+ ASSERT(thread_return->string_template ==
+ first_thread_return->string_template);
+ delete thread_return;
+ }
+ }
+ delete first_thread_return;
+ ASSERT_INTEQ(1, num_times_string_to_template_cache_returned_true);
+ Template::ClearCache();
+}
+#endif // #if defined(HAVE_PTHREAD) && !defined(NO_THREADS)
+
+// Tests all the static methods in TemplateNamelist
+TEST(Template, TemplateNamelist) {
+ time_t before_time = Now(); // in template_test_util.cc
+ string f1 = StringToTemplateFile("{{This has spaces in it}}");
+ string f2 = StringToTemplateFile("{{#SEC}}foo");
+ string f3 = StringToTemplateFile("{This is ok");
+ // Where we'll copy f1 - f3 to: these are names known at compile-time
+ string f1_copy = PathJoin(FLAGS_test_tmpdir, INVALID1_FN);
+ string f2_copy = PathJoin(FLAGS_test_tmpdir, INVALID2_FN);
+ string f3_copy = PathJoin(FLAGS_test_tmpdir, VALID1_FN);
+ Template::SetTemplateRootDirectory(FLAGS_test_tmpdir);
+ time_t after_time = Now(); // f1, f2, f3 all written by now
+
+ TemplateNamelist::NameListType names = TemplateNamelist::GetList();
+ ASSERT(names.size() == 4);
+ ASSERT(names.count(NONEXISTENT_FN));
+ ASSERT(names.count(INVALID1_FN));
+ ASSERT(names.count(INVALID2_FN));
+ ASSERT(names.count(VALID1_FN));
+
+ // Before creating the files INVALID1_FN, etc., all should be missing.
+ for (int i = 0; i < 3; ++i) { // should be consistent all 3 times
+ const TemplateNamelist::MissingListType& missing =
+ TemplateNamelist::GetMissingList(false);
+ ASSERT(missing.size() == 4);
+ }
+ // Everyone is missing, but nobody should have bad syntax
+ ASSERT(!TemplateNamelist::AllDoExist());
+ ASSERT(TemplateNamelist::IsAllSyntaxOkay(DO_NOT_STRIP));
+
+ // Now create those files
+ ASSERT(link(f1.c_str(), f1_copy.c_str()) == 0);
+ ASSERT(link(f2.c_str(), f2_copy.c_str()) == 0);
+ ASSERT(link(f3.c_str(), f3_copy.c_str()) == 0);
+ // We also have to clear the template cache, since we created a new file.
+ // ReloadAllIfChanged() would probably work, too.
+ Template::ClearCache();
+
+ // When GetMissingList is false, we don't reload, so you still get all-gone
+ TemplateNamelist::MissingListType missing =
+ TemplateNamelist::GetMissingList(false);
+ ASSERT(missing.size() == 4);
+ // But with true, we should have a different story
+ missing = TemplateNamelist::GetMissingList(true);
+ ASSERT(missing.size() == 1);
+ missing = TemplateNamelist::GetMissingList(false);
+ ASSERT(missing.size() == 1);
+ ASSERT(missing[0] == NONEXISTENT_FN);
+ ASSERT(!TemplateNamelist::AllDoExist());
+
+ // IsAllSyntaxOK did a badsyntax check, before the files were created.
+ // So with a false arg, should still say everything is ok
+ TemplateNamelist::SyntaxListType badsyntax =
+ TemplateNamelist::GetBadSyntaxList(false, DO_NOT_STRIP);
+ ASSERT(badsyntax.size() == 0);
+ // But IsAllSyntaxOK forces a refresh
+ ASSERT(!TemplateNamelist::IsAllSyntaxOkay(DO_NOT_STRIP));
+ badsyntax = TemplateNamelist::GetBadSyntaxList(false, DO_NOT_STRIP);
+ ASSERT(badsyntax.size() == 2);
+ ASSERT(badsyntax[0] == INVALID1_FN || badsyntax[1] == INVALID1_FN);
+ ASSERT(badsyntax[0] == INVALID2_FN || badsyntax[1] == INVALID2_FN);
+ ASSERT(!TemplateNamelist::IsAllSyntaxOkay(DO_NOT_STRIP));
+ badsyntax = TemplateNamelist::GetBadSyntaxList(true, DO_NOT_STRIP);
+ ASSERT(badsyntax.size() == 2);
+
+ time_t modtime = TemplateNamelist::GetLastmodTime();
+ ASSERT(modtime >= before_time && modtime <= after_time);
+ // Now update a file and make sure lastmod time is updated.
+ // Note that since TemplateToFile uses "fake" timestamps way
+ // in the past, this append should definitely give a time
+ // that's after after_time.
+ FILE* fp = fopen(f1_copy.c_str(), "ab");
+ ASSERT(fp);
+ fwrite("\n", 1, 1, fp);
+ fclose(fp);
+ modtime = TemplateNamelist::GetLastmodTime();
+ ASSERT(modtime > after_time);
+
+ // Checking if we can register templates at run time.
+ string f4 = StringToTemplateFile("{{ONE_GOOD_TEMPLATE}}");
+ TemplateNamelist::RegisterTemplate(f4.c_str());
+ names = TemplateNamelist::GetList();
+ ASSERT(names.size() == 5);
+
+ string f5 = StringToTemplateFile("{{ONE BAD TEMPLATE}}");
+ TemplateNamelist::RegisterTemplate(f5.c_str());
+ names = TemplateNamelist::GetList();
+ ASSERT(names.size() == 6);
+ badsyntax = TemplateNamelist::GetBadSyntaxList(false, DO_NOT_STRIP);
+ ASSERT(badsyntax.size() == 2); // we did not refresh the bad syntax list
+ badsyntax = TemplateNamelist::GetBadSyntaxList(true, DO_NOT_STRIP);
+ // After refresh, the file we just registerd also added in bad syntax list
+ ASSERT(badsyntax.size() == 3);
+
+ TemplateNamelist::RegisterTemplate("A_non_existant_file.tpl");
+ names = TemplateNamelist::GetList();
+ ASSERT(names.size() == 7);
+ missing = TemplateNamelist::GetMissingList(false);
+ ASSERT(missing.size() == 1); // we did not refresh the missing list
+ missing = TemplateNamelist::GetMissingList(true);
+ // After refresh, the file we just registerd also added in missing list
+ ASSERT(missing.size() == 2);
+}
+
+// This test is not "end-to-end", it doesn't use a dictionary
+// and only outputs what the template system thinks is the
+// correct modifier for variables.
+TEST(Template, CorrectModifiersForAutoEscape) {
+ string text, expected_out;
+
+ // template with no variable, nothing to emit.
+ text = "Static template.";
+ AssertCorrectModifiers(TC_HTML, text, "");
+
+ // Simple templates with one variable substitution.
+
+ // 1. No in-template modifiers. Auto Escaper sets correct ones.
+ text = "Hello {{USER}}";
+ AssertCorrectModifiers(TC_HTML, text, "USER:h\n");
+
+ // Complete URLs in different attributes that take URLs.
+ text = "<a href=\"{{URL}}\">bla</a>";
+ AssertCorrectModifiers(TC_HTML, text, "URL:U=html\n");
+ text = "<script src=\"{{URL}}\"></script>";
+ AssertCorrectModifiers(TC_HTML, text, "URL:U=html\n");
+ text = "<img src=\"{{URL}}\">";
+ AssertCorrectModifiers(TC_HTML, text, "URL:U=html\n");
+ // URL fragment only so just html_escape.
+ text = "<img src=\"/bla?q={{QUERY}}\">";
+ AssertCorrectModifiers(TC_HTML, text, "QUERY:h\n");
+ // URL fragment not quoted, so url_escape.
+ text = "<img src=/bla?q={{QUERY}}>";
+ AssertCorrectModifiers(TC_HTML, text, "QUERY:u\n");
+
+ text = "<br class=\"{{CLASS}}\">";
+ AssertCorrectModifiers(TC_HTML, text, "CLASS:h\n");
+ text = "<br class={{CLASS}}>";
+ AssertCorrectModifiers(TC_HTML, text, "CLASS:H=attribute\n");
+ text = "<br {{CLASS}}>"; // CLASS here is name/value pair.
+ AssertCorrectModifiers(TC_HTML, text, "CLASS:H=attribute\n");
+ text = "<br style=\"display:{{DISPLAY}}\">"; // Style attribute.
+ AssertCorrectModifiers(TC_HTML, text, "DISPLAY:c\n");
+
+ // Content inside a style tag should have :c regardless of quoting.
+ text = "<style>color:{{COLOR}}; font:\"{{FONT}}\"</style>";
+ AssertCorrectModifiers(TC_HTML, text, "COLOR:c\nFONT:c\n");
+
+ // onMouseEvent and onKeyUp accept javascript.
+ text = "<a href=\"url\" onkeyup=\"doX('{{ID}}');\">"; // ID quoted
+ AssertCorrectModifiers(TC_HTML, text, "ID:j\n");
+ text = "<a href=\"url\" onclick=\"doX({{ID}});\">"; // ID not quoted
+ AssertCorrectModifiers(TC_HTML, text, "ID:J=number\n");
+ text = "<a href=\"url\" onclick=\"'{{ID}}'\">"; // not common
+ AssertCorrectModifiers(TC_HTML, text, "ID:j\n");
+ // If ID is javascript code, J=number will break it, for good and bad.
+ text = "<a href=\"url\" onclick=\"{{ID}}\">";
+ AssertCorrectModifiers(TC_HTML, text, "ID:J=number\n");
+
+ // Target just needs html escaping.
+ text = "<a href=\"url\" target=\"{{TARGET}}\">";
+ AssertCorrectModifiers(TC_HTML, text, "TARGET:h\n");
+
+ // Test a parsing corner case which uses TemplateDirective
+ // call in the parser to change state properly. To reproduce
+ // both variables should be unquoted and the first should
+ // have no value except the variable substitution.
+ text = "<img class={{CLASS}} src=/bla?q={{QUERY}}>";
+ AssertCorrectModifiers(TC_HTML, text, "CLASS:H=attribute\nQUERY:u\n");
+
+ // TODO(jad): Once we have a fix for it in code, fix me.
+ // Javascript URL is not properly supported, we currently
+ // apply :h which is not sufficient.
+ text = "<a href=\"javascript:foo('{{VAR}}')>bla</a>";
+ AssertCorrectModifiers(TC_HTML, text, "VAR:h\n");
+
+ // Special handling for BI_SPACE and BI_NEWLINE.
+ text = "{{BI_SPACE}}";
+ AssertCorrectModifiers(TC_HTML, text, "BI_SPACE\n"); // Untouched.
+ text = "{{BI_NEWLINE}}";
+ AssertCorrectModifiers(TC_HTML, text, "BI_NEWLINE\n"); // Untouched.
+ // Check that the parser is parsing BI_SPACE, if not, it would have failed.
+ text = "<a href=/bla{{BI_SPACE}}style=\"{{VAR}}\">text</a>";
+ AssertCorrectModifiers(TC_HTML, text, "BI_SPACE\nVAR:c\n");
+
+
+ // XML and JSON modes.
+ text = "<PARAM name=\"{{VAL}}\">{{DATA}}";
+ AssertCorrectModifiers(TC_XML, text, "VAL:xml_escape\nDATA:xml_escape\n");
+ text = "{ x = \"{{VAL}}\"}";
+ AssertCorrectModifiers(TC_JSON, text, "VAL:j\n");
+
+ // 2. Escaping modifiers were set, handle them.
+
+ // 2a: Modifier :none is honored whether the escaping is correct or not.
+ text = "Hello {{USER:none}}"; // :none on its own.
+ AssertCorrectModifiers(TC_HTML, text, "USER:none\n");
+ text = "Hello {{USER:h:none}}"; // correct escaping.
+ AssertCorrectModifiers(TC_HTML, text, "USER:h:none\n");
+ text = "Hello {{USER:j:none}}"; // incorrect escaping.
+ AssertCorrectModifiers(TC_HTML, text, "USER:j:none\n");
+ text = "<a href=\"url\" onkeyup=\"doX('{{ID:none}}');\">";
+ AssertCorrectModifiers(TC_HTML, text, "ID:none\n");
+
+ // 2b: Correct modifiers, nothing to change.
+ text = "Hello {{USER:h}}";
+ AssertCorrectModifiers(TC_HTML, text, "USER:h\n");
+ text = "Hello {{USER:U=html}}"; // :U=html is a valid replacement for .h
+ AssertCorrectModifiers(TC_HTML, text, "USER:U=html\n");
+ text = "Hello {{USER:H=url}}"; // :H=url (a.k.a. U=html) is valid too
+ AssertCorrectModifiers(TC_HTML, text, "USER:H=url\n");
+ text = "Hello {{USER:h:j}}"; // Extra :j, honor it.
+ AssertCorrectModifiers(TC_HTML, text, "USER:h:j\n");
+ text = "<a href=\"{{URL:U=html}}\">bla</a>";
+ AssertCorrectModifiers(TC_HTML, text, "URL:U=html\n");
+ text = "<a href=\"/bla?q={{QUERY:h}}\">bla</a>"; // :h is valid.
+ AssertCorrectModifiers(TC_HTML, text, "QUERY:h\n");
+ text = "<a href=\"/bla?q={{QUERY:u}}\">bla</a>"; // so is :u.
+ AssertCorrectModifiers(TC_HTML, text, "QUERY:u\n");
+ text = "<a href=\"url\" onclick=\"doX('{{ID:j}}');\">";
+ AssertCorrectModifiers(TC_HTML, text, "ID:j\n");
+ text = "<a href=\"url\" onclick=\"doX({{ID:J=number}});\">";
+ AssertCorrectModifiers(TC_HTML, text, "ID:J=number\n");
+ text = "<style>@import url(\"{{URL:U=css}}\")</style>"; // correct :U=css
+ AssertCorrectModifiers(TC_HTML, text, "URL:U=css\n");
+
+ // 2c: Incorrect modifiers, add our own.
+ text = "Hello {{USER:j}}"; // Missing :h
+ AssertCorrectModifiers(TC_HTML, text, "USER:j:h\n");
+ text = "Hello {{USER:c:c:c:c:c:j}}"; // Still missing :h
+ AssertCorrectModifiers(TC_HTML, text, "USER:c:c:c:c:c:j:h\n");
+ text = "<script>var a = \"{{VAR:h}}\";</script>"; // Missing :j
+ AssertCorrectModifiers(TC_HTML, text, "VAR:h:j\n");
+ text = "<script>var a = \"{{VAR:j:h:j}}\";</script>"; // Extra :h:j
+ AssertCorrectModifiers(TC_HTML, text, "VAR:j:h:j\n");
+ text = "<a href=\"url\" onclick=\"doX({{ID:j}});\">"; // Unquoted
+ AssertCorrectModifiers(TC_HTML, text, "ID:j:J=number\n");
+
+ // 2d: Custom modifiers are maintained.
+ text = "Hello {{USER:x-bla}}"; // Missing :h
+ AssertCorrectModifiers(TC_HTML, text, "USER:x-bla:h\n");
+ text = "Hello {{USER:x-bla:h}}"; // Correct, accept it.
+ AssertCorrectModifiers(TC_HTML, text, "USER:x-bla:h\n");
+ text = "Hello {{USER:x-bla:x-foo}}"; // Missing :h
+ AssertCorrectModifiers(TC_HTML, text, "USER:x-bla:x-foo:h\n");
+ text = "Hello {{USER:x-bla:none}}"; // Complete due to :none
+ AssertCorrectModifiers(TC_HTML, text, "USER:x-bla:none\n");
+ text = "Hello {{USER:h:x-bla}}"; // Still missing :h.
+ AssertCorrectModifiers(TC_HTML, text, "USER:h:x-bla:h\n");
+ text = "Hello {{USER:x-bla:h:x-foo}}"; // Still missing :h
+ AssertCorrectModifiers(TC_HTML, text, "USER:x-bla:h:x-foo:h\n");
+ text = "Hello {{USER:x-bla:h:x-foo:h}}"; // Valid, accept it.
+ AssertCorrectModifiers(TC_HTML, text, "USER:x-bla:h:x-foo:h\n");
+
+ // 2e: Equivalent modifiers are honored. All HTML Escapes.
+ text = "Hello {{USER:p}}";
+ AssertCorrectModifiers(TC_HTML, text, "USER:p\n");
+ text = "Hello {{USER:H=attribute}}";
+ AssertCorrectModifiers(TC_HTML, text, "USER:H=attribute\n");
+ text = "Hello {{USER:H=snippet}}";
+ AssertCorrectModifiers(TC_HTML, text, "USER:H=snippet\n");
+ text = "Hello {{USER:H=pre}}";
+ AssertCorrectModifiers(TC_HTML, text, "USER:H=pre\n");
+ // All URL + HTML Escapes.
+ text = "<a href=\"{{URL:H=url}}\">bla</a>";
+ AssertCorrectModifiers(TC_HTML, text, "URL:H=url\n");
+ text = "<a href=\"{{URL:U=html}}\">bla</a>";
+ AssertCorrectModifiers(TC_HTML, text, "URL:U=html\n");
+
+ // 2f: Initialize template in Javascript Context.
+ text = "var a = '{{VAR}}'"; // Escaping not given.
+ AssertCorrectModifiers(TC_JS, text, "VAR:j\n");
+ text = "var a = '{{VAR:none}}'"; // Variable safe.
+ AssertCorrectModifiers(TC_JS, text, "VAR:none\n");
+ text = "var a = '{{VAR:j}}'"; // Escaping correct.
+ AssertCorrectModifiers(TC_JS, text, "VAR:j\n");
+ text = "var a = '{{VAR:h}}'"; // Escaping incorrect.
+ AssertCorrectModifiers(TC_JS, text, "VAR:h:j\n");
+ text = "var a = '{{VAR:J=number}}'"; // Not considered equiv.
+ AssertCorrectModifiers(TC_JS, text, "VAR:J=number:j\n");
+
+ // 2g: Honor any modifiers for BI_SPACE and BI_NEWLINE.
+ text = "{{BI_NEWLINE:j}}"; // An invalid modifier for the context.
+ AssertCorrectModifiers(TC_HTML, text, "BI_NEWLINE:j\n");
+ text = "{{BI_SPACE:h}}"; // An otherwise valid modifier.
+ AssertCorrectModifiers(TC_HTML, text, "BI_SPACE:h\n");
+ text = "{{BI_SPACE:x-bla}}"; // Also support custom modifiers.
+ AssertCorrectModifiers(TC_HTML, text, "BI_SPACE:x-bla\n");
+
+ // 2h: TC_CSS, TC_XML and TC_JSON
+ text = "H1{margin-{{START_EDGE}}:0;\n text-align:{{END_EDGE}}\n}";
+ AssertCorrectModifiers(TC_CSS, text, "START_EDGE:c\nEND_EDGE:c\n");
+ text = "body{background:url('{{URL:U=css}}')}"; // :U=css valid substitute
+ AssertCorrectModifiers(TC_CSS, text, "URL:U=css\n");
+ text = "body{background:url('{{URL:U=html}}')}"; // Not valid, will add :c.
+ AssertCorrectModifiers(TC_CSS, text, "URL:U=html:c\n");
+ text = "<PARAM name=\"{{VAL:xml_escape}}\">"; // Correct escaping
+ AssertCorrectModifiers(TC_XML, text, "VAL:xml_escape\n");
+ text = "<PARAM name=\"{{VAL:H=attribute}}\">"; // XSS equivalent
+ AssertCorrectModifiers(TC_XML, text, "VAL:H=attribute\n");
+ text = "<PARAM name=\"{{VAL:h}}\">"; // XSS equivalent
+ AssertCorrectModifiers(TC_XML, text, "VAL:h\n");
+ text = "<PARAM name=\"{{VAL:H=pre}}\">"; // Not XSS equivalent
+ AssertCorrectModifiers(TC_XML, text, "VAL:H=pre:xml_escape\n");
+ text = "<PARAM name=\"{{VAL:c}}\">"; // Not XSS equivalent
+ AssertCorrectModifiers(TC_XML, text, "VAL:c:xml_escape\n");
+ text = "{user={{USER:j}}"; // Correct escaping
+ AssertCorrectModifiers(TC_JSON, text, "USER:j\n");
+ text = "{user={{USER:o}}"; // json_escape is XSS equivalent
+ AssertCorrectModifiers(TC_JSON, text, "USER:o\n");
+ text = "{user={{USER:h}}"; // but html_escape is not
+ AssertCorrectModifiers(TC_JSON, text, "USER:h:j\n");
+
+ // 2i: Variables with XssSafe Custom modifiers are untouched.
+ ASSERT(GOOGLE_NAMESPACE::AddXssSafeModifier("x-test-cm",
+ &GOOGLE_NAMESPACE::html_escape));
+ text = "Hello {{USER:x-test-cm}}"; // Missing :h
+ AssertCorrectModifiers(TC_HTML, text, "USER:x-test-cm\n");
+ text = "Hello {{USER:x-test-cm:j}}"; // Extra :j
+ AssertCorrectModifiers(TC_HTML, text, "USER:x-test-cm:j\n");
+ text = "Hello {{USER:x-test-cm:x-foo}}"; // Non-safe modifier
+ AssertCorrectModifiers(TC_HTML, text, "USER:x-test-cm:x-foo\n");
+ text = "Hello {{USER:x-foo:x-test-cm}}"; // Non-safe modifier
+ AssertCorrectModifiers(TC_HTML, text, "USER:x-foo:x-test-cm\n");
+ text = "Hello {{USER:x-test-cm:none}}"; // Complete due to :none
+ AssertCorrectModifiers(TC_HTML, text, "USER:x-test-cm:none\n");
+ text = "Hello {{USER:h:x-test-cm}}"; // Prior escaping
+ AssertCorrectModifiers(TC_HTML, text, "USER:h:x-test-cm\n");
+
+ // 3. Larger test with close to every escaping case.
+
+ text = "<html><head>\n"
+ "<style>\n"
+ "@import url(\"{{CSS_URL:U=css}}\");\n"
+ "color:{{COLOR}}</style></head><body>\n"
+ "<h1>{{TITLE}}</h1>\n"
+ "<img src=\"{{IMG_URL}}\">\n"
+ "<form action=\"/search\">\n"
+ " <input name=\"hl\" value={{HL}}>\n"
+ " <input name=\"m\" value=\"{{FORM_MSG}}\">\n"
+ "</form>\n"
+ "<div style=\"background:{{BG_COLOR}}\">\n"
+ "</div>\n"
+ "<script>\n"
+ " var msg_text = '{{MSG_TEXT}}';\n"
+ "</script>\n"
+ "<a href=\"url\" onmouseover=\"'{{MOUSE}}'\">bla</a>\n"
+ "Goodbye friend {{USER}}!\n</body></html>\n";
+ expected_out = "CSS_URL:U=css\n"
+ "COLOR:c\n"
+ "TITLE:h\n"
+ "IMG_URL:U=html\n"
+ "HL:H=attribute\n"
+ "FORM_MSG:h\n"
+ "BG_COLOR:c\n"
+ "MSG_TEXT:j\n"
+ "MOUSE:j\n" // :j also escapes html entities
+ "USER:h\n";
+ AssertCorrectModifiers(TC_HTML, text, expected_out);
+}
+
+// More "end-to-end" test to ensure that variables are
+// escaped as expected with auto-escape mode enabled.
+// Obviously there is a lot more we can test.
+TEST(Template, VariableWithAutoEscape) {
+ string text, expected_out;
+ TemplateDictionary dict("dict");
+ string good_url("http://www.google.com/");
+ string bad_url("javascript:alert();");
+
+ text = "hi {{VAR}} lo";
+ dict.SetValue("VAR", "<bad>yo");
+ AssertCorrectEscaping(TC_HTML, dict, text, "hi <bad>yo lo");
+
+ text = "<a href=\"{{URL}}\">bla</a>";
+ dict.SetValue("URL", good_url);
+ expected_out = "<a href=\"" + good_url + "\">bla</a>";
+ AssertCorrectEscaping(TC_HTML, dict, text, expected_out);
+ dict.SetValue("URL", bad_url);
+ expected_out = "<a href=\"#\">bla</a>";
+ AssertCorrectEscaping(TC_HTML, dict, text, expected_out);
+
+ text = "<br style=\"display:{{DISPLAY}}\">";
+ dict.SetValue("DISPLAY", "none");
+ expected_out = "<br style=\"display:none\">";
+ AssertCorrectEscaping(TC_HTML, dict, text, expected_out);
+ // Bad characters are simply removed in CleanseCss.
+ dict.SetValue("URL", "!#none_ ");
+ expected_out = "<br style=\"display:none\">";
+ AssertCorrectEscaping(TC_HTML, dict, text, expected_out);
+
+ text = "<a href=\"url\" onkeyup=\"'{{EVENT}}'\">";
+ dict.SetValue("EVENT", "safe");
+ expected_out = "<a href=\"url\" onkeyup=\"'safe'\">";
+ AssertCorrectEscaping(TC_HTML, dict, text, expected_out);
+ dict.SetValue("EVENT", "f = 'y';");
+ expected_out = "<a href=\"url\" onkeyup=\"'f \\x3d \\x27y\\x27;'\">";
+
+ // Check special handling of BI_SPACE and BI_NEWLINE.
+ text = "Hello\n{{BI_SPACE}}bla{{BI_NEWLINE}}foo.";
+ expected_out = "Hello bla\nfoo.";
+ AssertCorrectEscaping(TC_HTML, dict, text, expected_out);
+
+ // TC_CSS
+ text = "H1{margin-{{EDGE}}:0; text-align:{{BAD_EDGE}}}";
+ dict.SetValue("EDGE", "left");
+ dict.SetValue("BAD_EDGE", "$$center()!!"); // Bad chars are removed.
+ AssertCorrectEscaping(TC_CSS, dict, text,
+ "H1{margin-left:0; text-align:center!!}");
+
+ // TC_XML and TC_JSON
+ text = "<Q>{{DATA}}</Q>";
+ dict.SetValue("DATA", "good-data");
+ AssertCorrectEscaping(TC_XML, dict, text, "<Q>good-data</Q>");
+ dict.SetValue("DATA", "<BAD>FOO</BAD>");
+ AssertCorrectEscaping(TC_XML, dict, text,
+ "<Q><BAD>FOO</BAD></Q>");
+ text = "{user = \"{{USER}}\"}";
+ dict.SetValue("USER", "good-user");
+ AssertCorrectEscaping(TC_JSON, dict, text, "{user = \"good-user\"}");
+ dict.SetValue("USER", "evil'<>\"");
+ AssertCorrectEscaping(TC_JSON, dict, text,
+ "{user = \"evil\\x27\\x3c\\x3e\\x22\"}");
+}
+
+// Test that the template initialization fails in auto-escape
+// mode if the parser failed to parse.
+TEST(Template, FailedInitWithAutoEscape) {
+ Strip strip = STRIP_WHITESPACE;
+ // Taken from HTML Parser test suite.
+ string bad_html = "<a href='http://www.google.com' ''>\n";
+ ASSERT(NULL == StringToTemplateWithAutoEscaping(bad_html, strip, TC_HTML));
+
+ // Missing quotes around URL, not accepted in URL-taking attributes.
+ bad_html = "<a href={{URL}}>bla</a>";
+ ASSERT(NULL == StringToTemplateWithAutoEscaping(bad_html, strip, TC_HTML));
+
+ // Missing quotes around STYLE, not accepted in style-taking attributes.
+ bad_html = "<div style={{STYLE}}>";
+ ASSERT(NULL == StringToTemplateWithAutoEscaping(bad_html, strip, TC_HTML));
+}
+
+TEST(Template, AutoEscaping) {
+ Strip strip = STRIP_WHITESPACE;
+ Template *tpl;
+ string filename;
+ string text;
+ string user = "John<>Doe";
+ string user_esc = "John<>Doe";
+
+ // Positive test cases -- template initialization succeeds.
+ // We also check that modifiers that were missing or given incorrect
+ // have been updated as expected.
+ // TODO(jad): Cut-down redundancy by merging with
+ // TestCorrectModifiersForAutoEscape.
+ text = "{{%AUTOESCAPE context=\"HTML\"}}" // HTML
+ "{{USER:o}}<a href=\"{{URL}}\" class={{CLASS:h}}</a>";
+ ASSERT(tpl = StringToTemplate(text, strip));
+ string expected_mods = "USER:o:h\nURL:U=html\nCLASS:h:H=attribute\n";
+ AssertCorrectModifiersInTemplate(tpl, text, expected_mods);
+
+ text = "{{%AUTOESCAPE context=\"HTML\" state=\"IN_TAG\"}}" // HTML in tag
+ "href=\"{{URL}}\" class={{CLASS:h}} style=\"font:{{COLOR}}\"";
+ ASSERT(tpl = StringToTemplate(text, strip));
+ expected_mods = "URL:U=html\nCLASS:h:H=attribute\nCOLOR:c\n";
+ AssertCorrectModifiersInTemplate(tpl, text, expected_mods);
+
+ text = "{{%AUTOESCAPE context=\"HTML\" state=\"in_tag\"}}" // lowercase ok
+ "href=\"{{URL}}\" class={{CLASS:h}} style=\"font:{{COLOR}}\"";
+ ASSERT(tpl = StringToTemplate(text, strip));
+ expected_mods = "URL:U=html\nCLASS:h:H=attribute\nCOLOR:c\n";
+ AssertCorrectModifiersInTemplate(tpl, text, expected_mods);
+
+ // Repeat the test with trailing HTML that closes the tag. This is
+ // undefined behavior. We test it to ensure the parser does not choke.
+ text += ">Hello</a><span>Some text</span></body></html>";
+ ASSERT(tpl = StringToTemplate(text, strip));
+ expected_mods = "URL:U=html\nCLASS:h:H=attribute\nCOLOR:c\n";
+ AssertCorrectModifiersInTemplate(tpl, text, expected_mods);
+
+ text = "{{%AUTOESCAPE context=\"JAVASCRIPT\"}}" // JAVASCRIPT
+ "var a = {{A}}; var b = '{{B:h}}';";
+ ASSERT(tpl = StringToTemplate(text, strip));
+ expected_mods = "A:J=number\nB:h:j\n";
+ AssertCorrectModifiersInTemplate(tpl, text, expected_mods);
+
+ text = "{{%AUTOESCAPE context=\"CSS\"}}" // CSS
+ "body {color:\"{{COLOR}}\"; font-size:{{SIZE:j}}";
+ ASSERT(tpl = StringToTemplate(text, strip));
+ expected_mods = "COLOR:c\nSIZE:j:c\n";
+ AssertCorrectModifiersInTemplate(tpl, text, expected_mods);
+
+ text = "{{%AUTOESCAPE context=\"JSON\"}}" // JSON
+ "{ 'id': {{ID:j}}, 'value': {{VALUE:h}} }";
+ ASSERT(tpl = StringToTemplate(text, strip));
+ expected_mods = "ID:j\nVALUE:h:j\n";
+ AssertCorrectModifiersInTemplate(tpl, text, expected_mods);
+
+ text = "{{%AUTOESCAPE context=\"XML\"}}" // XML
+ "<PARAM name=\"{{VAL}}\">{{DATA:h}}";
+ ASSERT(tpl = StringToTemplate(text, strip));
+ expected_mods = "VAL:xml_escape\nDATA:h\n";
+ AssertCorrectModifiersInTemplate(tpl, text, expected_mods);
+
+ text = "{{%AUTOESCAPE context=\"xml\"}}" // lower-case XML
+ "<PARAM name=\"{{VAL}}\">{{DATA:h}}";
+ ASSERT(tpl = StringToTemplate(text, strip));
+ expected_mods = "VAL:xml_escape\nDATA:h\n";
+ AssertCorrectModifiersInTemplate(tpl, text, expected_mods);
+
+ text = "{{!bla}}{{%AUTOESCAPE context=\"HTML\"}}"; // after comment
+ ASSERT(tpl = StringToTemplate(text, strip));
+ text = "{{%AUTOESCAPE context=\"HTML\" state=\"default\"}}";
+ ASSERT(tpl = StringToTemplate(text, strip)); // adding state
+
+ // Negative test cases - template initialization fails due to errors
+ // in the marker. Also checks that our parsing is defensive.
+ text = "{{%AUTOESCAPE}}"; // missing context
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPER context=\"HTML\"}}"; // invalid id
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%}}"; // missing id
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{% }}"; // missing id
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{% =}}"; // missing id
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE =\"HTML\"}}"; // missing name
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE foo=\"HTML\"}}"; // bogus name
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE =}}"; // lone '='
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context=HTML}}"; // val not quoted
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context=\"HTML}}"; // no end quotes
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context=\"\\\"HTML\"}}"; // Unescape val
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context=\"\\\"HT\\\"\\\"ML\\\"\"}}"; // more complex
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context=\"\"HTML\"}}"; // Unescape val
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context=\"JAVASCRIPT\" bla}}"; // extra attr
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context=\"JAVASCRIPT\"bla}}"; // invalid value
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context=\"JAVASCRIPT\" foo=bla}}"; // extra attr/val
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context=\"HTML\"}}"; // extra whitesp
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context =\"HTML\"}}"; // extra whitesp
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context= \"HTML\"}}"; // extra whitesp
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context=\"HTML\" }}"; // extra whitesp
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context=\"Xml\"}}"; // mixed-case xml
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context=\"HTML\" state=\"tag\"}}"; // bad state
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{%AUTOESCAPE context=\"CSS\" state=\"IN_TAG\"}}"; // invalid state
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "Hello{{%AUTOESCAPE context=\"HTML\"}}"; // after text
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{USER}}{{%AUTOESCAPE context=\"HTML\"}}"; // after variable
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+ text = "{{#SEC}}{{%AUTOESCAPE context=\"HTML\"}}{{/SEC}}"; // not in MAIN
+ ASSERT((tpl = StringToTemplate(text, strip)) == NULL);
+
+ string kAutoescapeHtmlPragma = "{{%AUTOESCAPE context=\"HTML\"}}";
+
+ // Check that Selective Auto-Escape does not auto-escape included templates
+ // unless these are also marked for auto-escape. To attest that,
+ // we check that when no escaping was given in the included template, none
+ // will be applied to it. USER will not get html-escaped.
+ text = kAutoescapeHtmlPragma + "{{>INC}}";
+ tpl = StringToTemplate(text, strip);
+ ASSERT(tpl);
+ string inc_text = "{{USER}}"; // missing :h escaping.
+ TemplateDictionary dict("dict");
+ TemplateDictionary *inc_dict = dict.AddIncludeDictionary("INC");
+ inc_dict->SetFilename(StringToTemplateFile(inc_text));
+ inc_dict->SetValue("USER", user);
+ AssertExpandIs(tpl, &dict, user, true);
+
+ // Add AUTOESCAPE pragma to included template and check that it works.
+ inc_text = kAutoescapeHtmlPragma + inc_text;
+ filename = StringToTemplateFile(inc_text);
+ inc_dict->SetFilename(filename);
+ AssertExpandIs(tpl, &dict, user_esc, true);
+
+ // Check that Selective Auto-Escape works with Template::StringToTemplate.
+ tpl = Template::StringToTemplate(inc_text, strip);
+ ASSERT(tpl);
+ TemplateDictionary dict2("dict2");
+ dict2.SetValue("USER", user);
+ AssertExpandIs(tpl, &dict2, user_esc, true);
+ delete tpl;
+
+ // Test that Selective AutoEscape follows included templates: Included
+ // templates 2 and 4 are registered for auto-escaping but not included
+ // templates 1 and 3. Check that only templates 2 and 4 get escaped.
+ text = "Parent: {{USER}}; {{>INCONE}}";
+ string text_inc1 = "INC1: {{USER1}}; {{>INCTWO}}";
+ string text_inc2 = kAutoescapeHtmlPragma + "INC2: {{USER2}}; {{>INCTHREE}}";
+ string text_inc3 = "INC3: {{USER3}}; {{>INCFOUR}}";
+ string text_inc4 = kAutoescapeHtmlPragma + "INC4: {{USER4}}";
+ dict.SetValue("USER", user);
+
+ TemplateDictionary *dict_inc1 = dict.AddIncludeDictionary("INCONE");
+ dict_inc1->SetFilename(StringToTemplateFile(text_inc1));
+ dict_inc1->SetValue("USER1", user);
+
+ TemplateDictionary *dict_inc2 = dict_inc1->AddIncludeDictionary("INCTWO");
+ filename = StringToTemplateFile(text_inc2);
+ dict_inc2->SetFilename(filename);
+ dict_inc2->SetValue("USER2", user);
+
+ TemplateDictionary *dict_inc3 = dict_inc2->AddIncludeDictionary("INCTHREE");
+ dict_inc3->SetFilename(StringToTemplateFile(text_inc3));
+ dict_inc3->SetValue("USER3", user);
+
+ TemplateDictionary *dict_inc4 = dict_inc3->AddIncludeDictionary("INCFOUR");
+ filename = StringToTemplateFile(text_inc4);
+ dict_inc4->SetFilename(filename);
+ dict_inc4->SetValue("USER4", user);
+
+ tpl = StringToTemplate(text, strip);
+ string expected_out = "Parent: " + user + "; INC1: " + user +
+ "; INC2: " + user_esc + "; INC3: " + user + "; INC4: " + user_esc;
+ AssertExpandIs(tpl, &dict, expected_out, true);
+
+ // Check that we do not modify template-includes.
+ // Here, xml_escape would have been changed to :h:xml_escape
+ // causing a double-escaping of the USER.
+ text = kAutoescapeHtmlPragma + "{{>INC:xml_escape}}";
+ inc_text = "{{USER}}";
+ tpl = StringToTemplate(text, strip);
+ ASSERT(tpl);
+ TemplateDictionary dict3("dict");
+ inc_dict = dict3.AddIncludeDictionary("INC");
+ inc_dict->SetFilename(StringToTemplateFile(inc_text));
+ inc_dict->SetValue("USER", user);
+ AssertExpandIs(tpl, &dict3, user_esc, true);
+
+ // Test that {{%...}} is a "removable" marker. A related test is
+ // also added to TestStrip().
+ tpl = StringToTemplate("{{%AUTOESCAPE context=\"HTML\"}}\nText\n Text",
+ STRIP_BLANK_LINES);
+ AssertExpandIs(tpl, &dict, "Text\n Text", true);
+}
+
+TEST(Template, RegisterString) {
+ ASSERT(Template::StringToTemplateCache("file1", "Some text"));
+ Template* tpl = Template::GetTemplate("file1", STRIP_WHITESPACE);
+ ASSERT(tpl);
+ ASSERT(Template::GetTemplate("file1", STRIP_WHITESPACE) == tpl);
+
+ ASSERT(Template::StringToTemplateCache("file2", "Top {{>INC}}"));
+
+ TemplateDictionary dict("dict");
+ string expected = "Some text";
+ AssertExpandIs(tpl, &dict, expected, true);
+
+ TemplateDictionary* sub_dict = dict.AddIncludeDictionary("INC");
+ sub_dict->SetFilename("file1");
+ tpl = Template::GetTemplate("file2", STRIP_WHITESPACE);
+ expected = "Top Some text";
+ AssertExpandIs(tpl, &dict, expected, true);
+}
+
+// This tests that StaticTemplateString is sufficiently initialized at
+// static-initialization time (as opposed to dynamic-initialization
+// time, which comes later), that we can safely expand templates
+// during dynamic initialization. This is worth testing, because some
+// parts of a StaticTemplateString -- especially the hash value, *do*
+// get computed later at dynamic-initialization time, and we want to
+// make sure that things still work properly even if we access the
+// StaticTemplateString before that happens.
+extern const StaticTemplateString kLateDefine;
+class DynamicInitializationTemplateExpander {
+ public:
+ DynamicInitializationTemplateExpander() {
+ Template* tpl = Template::StringToTemplate("hi {{VAR}} lo",
+ STRIP_WHITESPACE);
+ TemplateDictionary dict("dict");
+ dict.SetValue("VAR", TemplateString("short-lived", strlen("short")));
+ AssertExpandIs(tpl, &dict, "hi short lo", true);
+ dict.SetValue("VAR", kHello);
+ AssertExpandIs(tpl, &dict, "hi Hello lo", true);
+ dict.SetValue("VAR", kLateDefine);
+ AssertExpandIs(tpl, &dict, "hi laterz lo", true);
+ delete tpl;
+ }
+};
+DynamicInitializationTemplateExpander sts_tester; // this runs before main()
+const StaticTemplateString kLateDefine = STS_INIT(kLateDefine, "laterz");
+
+int main(int argc, char** argv) {
+
+ CreateOrCleanTestDirAndSetAsTmpdir(FLAGS_test_tmpdir);
+
+ // This goes first so that future tests don't mess up the filenames.
+ // So we make it a normal function rather than using TEST() on it.
+ TestAnnotation();
+ return RUN_ALL_TESTS();
+}
diff --git a/src/tests/template_unittest_test_footer.in b/src/tests/template_unittest_test_footer.in
new file mode 100644
index 0000000..c81be51
--- /dev/null
+++ b/src/tests/template_unittest_test_footer.in
@@ -0,0 +1,49 @@
+<center><p><hr class=z>
+
+{{#TRIM_LINE}}
+<table width=100% border=0 cellpadding=0 cellspacing=0>
+ <tr>
+ <td bgcolor={{TRIM_LINE_COLOR}} colspan=2{{TRIM_LINE_HEIGHT}}>{{CLEARDOT}}</td>
+ </tr>
+</table>
+{{/TRIM_LINE}}
+ <table width=100% cellpadding=2 cellspacing=0 border=0>
+ <tr>
+ <td align=center{{FOOTER_BAR_ATTRIBUTES}}><font size=-1>
+ {{#FOOTER_BAR_TEXT}}
+ {{HOME_LINK}} -{{BI_SPACE}}
+ {{ADVERTISE_LINK}} -{{BI_SPACE}}
+ {{#PROMO_LICENSING_SECTION}}
+ {{PROMO_LICENSING_LINK}} -{{BI_SPACE}}
+ {{/PROMO_LICENSING_SECTION}}
+ {{ABOUT_GOOGLE_LINK}}
+ {{/FOOTER_BAR_TEXT}}
+ {{#EMPTY_FOOTER_BAR_TEXT}}
+ {{/EMPTY_FOOTER_BAR_TEXT}}
+ </font>
+ </table>
+ <br>
+ <font size=-1 class=p>
+ {{MODIFIED_BY_GOOGLE}}{{MSG_copyright}}
+ </font>
+ {{>ODP_ATTRIBUTION}}
+</center>
+
+{{#CLOSING_DIV_SECTION}}
+ </div>
+{{/CLOSING_DIV_SECTION}}
+
+{{>GOOGLE_COMPLETE_JS}}
+{{>SITE_SPEED_SCRIPT_FOOTER}}
+{{>AD_WIDE_WRAP_JAVASCRIPT}}
+{{>BROWSER_STATS_INCLUDE}}
+{{#LATENCY_PREFETCH}}
+ <link rel=prefetch href="{{LATENCY_PREFETCH_URL}}">
+{{/LATENCY_PREFETCH}}
+
+{{#JAVASCRIPT_FOOTER_SECTION}}
+ <script><!--{{BI_NEWLINE}}
+ {{>FAST_NEXT_JAVASCRIPT}}
+ //-->{{BI_NEWLINE}}
+ </script>
+{{/JAVASCRIPT_FOOTER_SECTION}}
diff --git a/src/tests/template_unittest_test_footer_dict01.out b/src/tests/template_unittest_test_footer_dict01.out
new file mode 100644
index 0000000..9bd3bf3
--- /dev/null
+++ b/src/tests/template_unittest_test_footer_dict01.out
@@ -0,0 +1,10 @@
+<center><p><hr class=z><table width=100% cellpadding=2 cellspacing=0 border=0><tr><td align=center><font size=-1><b>Time to go home!</b> - <b>Be advertiser #2</b> - <A HREF='foo'> - <A HREF=/>About Google!</A></font></table><br><font size=-1 class=p>2005© Google Inc. (all rights reserved)</font></center></div><link rel=prefetch href="/latency"><script><!--
+<html><head></head><body></body></html>
+<html><head>head</head><body></body></html>
+//-->
+</script><script><!--
+<html><head>include-head</head><body><b><A HREF=/></b>: 0.3333</body></html><html><head><script>
+<!--
+</script>
+</head><html><a id="aw" onMouseOver="return ss('')"><b></b></a></html></body>//-->
+</script>
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_footer_dict02.out b/src/tests/template_unittest_test_footer_dict02.out
new file mode 100644
index 0000000..dcdb111
--- /dev/null
+++ b/src/tests/template_unittest_test_footer_dict02.out
@@ -0,0 +1 @@
+<center><p><hr class=z><table width=100% cellpadding=2 cellspacing=0 border=0><tr><td align=center><font size=-1></font></table><br><font size=-1 class=p></font></center>
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_html.in b/src/tests/template_unittest_test_html.in
new file mode 100644
index 0000000..5d8ad35
--- /dev/null
+++ b/src/tests/template_unittest_test_html.in
@@ -0,0 +1,65 @@
+<html>
+<head>
+<script>{{BI_NEWLINE}}
+ <!--{{BI_NEWLINE}}
+
+ {{! Include the JS code to do query tracking. }}
+ {{! javascript_query_tracking_post*.tpl }}
+ {{>JAVASCRIPT_QUERY_TRACKING_FUNCTION}}
+
+ {{! netscape requires "window.status"; IE allows just "status" here }}
+ {{! must return true when mousing over a link; not necessary when over }}
+ {{! the table cell in general }}
+
+ {{! The "id" parameter here refers to the value of the id attribute }}
+ {{! of the anchor element for the ad (of the form "aw[POS]"). }}
+ {{! This is used by some spam-protection JavaScript to modify }}
+ {{! parameters in the URL of the link. }}
+
+ {{#NO_MOUSEOVER_FUNCTIONS}}
+ function ss(w,id){
+ window.status=w;
+ return true;
+ }{{BI_NEWLINE}}
+ {{/NO_MOUSEOVER_FUNCTIONS}}
+
+ {{#MOUSEOVER_FUNCTIONS}}
+ {{! If any ads are mouseover ads, this is used to import
+ increment_mouseover_js.tpl, which redefines function ss to count
+ mouseovers. Otherwise it produces no output. }}
+ {{>MOUSEOVER_JAVASCRIPT}}
+ {{! Since JSCompiler renames all functions/variables not beginning with an
+ underscore, we use _ss as our compiled function name,
+ then set ss to _ss }}
+ ss = _ss;
+ {{/MOUSEOVER_FUNCTIONS}}
+</script>{{BI_NEWLINE}}
+</head>
+
+<html>
+
+<a id="aw" onMouseOver="return ss('{{GOTO_MESSAGE}}')"{{TARGET}}>
+<b>{{TAG_LINE}}</b>
+</a>
+
+{{#UPDATE_SECTION}}Last updated: {{UPDATE}}<br>{{/UPDATE_SECTION}}
+
+{{#RESULTS}}
+<table cellspacing=0 cellpadding=0{{TABLE_WIDTH}} align={{ALIGNMENT}}{{BI_SPACE}}
+ {{#WHITE_BG}}bgColor=#ffffff {{/WHITE_BG}}border=0>
+ <tr>
+ <td>
+ <ol>
+ <li> Result: {{RESULT}}
+ <li> Goodness of result: {{GOODNESS}}
+ <li> xml-safe result: {{XML_RESULT}}
+ </ol>
+ </td>
+ </tr>
+</table>
+{{/RESULTS}}
+
+{{>FOOTER}}
+
+</html>
+</body>
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_html_dict01.anno_out b/src/tests/template_unittest_test_html_dict01.anno_out
new file mode 100644
index 0000000..40012d9
--- /dev/null
+++ b/src/tests/template_unittest_test_html_dict01.anno_out
@@ -0,0 +1,66 @@
+{{#FILE=template_unittest_test_html.in}}{{#SEC=__{{MAIN}}__}}<html>
+<head>
+<script>{{#VAR=BI_NEWLINE}}
+{{/VAR}}
+ <!--{{#VAR=BI_NEWLINE}}
+{{/VAR}}
+{{#SEC=MOUSEOVER_FUNCTIONS}}
+{{#INC=MOUSEOVER_JAVASCRIPT}}{{MISSING_FILE=not_a_template}}{{/INC}}
+ ss = _ss;
+{{/SEC}}</script>{{#VAR=BI_NEWLINE}}
+{{/VAR}}
+</head>
+<html>
+<a id="aw" onMouseOver="return ss('{{#VAR=GOTO_MESSAGE}}print \x22Go home\x22{{/VAR}}')"{{#VAR=TARGET}}{{/VAR}}>
+<b>{{#VAR=TAG_LINE}}{{/VAR}}</b>
+</a>
+{{#SEC=UPDATE_SECTION}}Last updated: {{#VAR=UPDATE}}monday & tuesday{{/VAR}}<br>{{/SEC}}
+{{#SEC=RESULTS}}<table cellspacing=0 cellpadding=0{{#VAR=TABLE_WIDTH}}{{/VAR}} align={{#VAR=ALIGNMENT}}"right"{{/VAR}}{{#VAR=BI_SPACE}} {{/VAR}}
+ {{#SEC=WHITE_BG}}bgColor=#ffffff {{/SEC}}border=0>
+ <tr>
+ <td>
+ <ol>
+ <li> Result: {{#VAR=RESULT}}<&>"result" #0'&'{{/VAR}}
+ <li> Goodness of result: {{#VAR=GOODNESS}}5{{/VAR}}
+ <li> xml-safe result: {{#VAR=XML_RESULT}}<&>"result" #0'&'{{/VAR}}
+ </ol>
+ </td>
+ </tr>
+</table>
+{{/SEC}}{{#SEC=RESULTS}}<table cellspacing=0 cellpadding=0{{#VAR=TABLE_WIDTH}}{{/VAR}} align={{#VAR=ALIGNMENT}}"right"{{/VAR}}{{#VAR=BI_SPACE}} {{/VAR}}
+ border=0>
+ <tr>
+ <td>
+ <ol>
+ <li> Result: {{#VAR=RESULT}}<&>"result" #1'&'{{/VAR}}
+ <li> Goodness of result: {{#VAR=GOODNESS}}6{{/VAR}}
+ <li> xml-safe result: {{#VAR=XML_RESULT}}<&>"result" #1'&'{{/VAR}}
+ </ol>
+ </td>
+ </tr>
+</table>
+{{/SEC}}{{#SEC=RESULTS}}<table cellspacing=0 cellpadding=0{{#VAR=TABLE_WIDTH}}{{/VAR}} align={{#VAR=ALIGNMENT}}"right"{{/VAR}}{{#VAR=BI_SPACE}} {{/VAR}}
+ {{#SEC=WHITE_BG}}bgColor=#ffffff {{/SEC}}border=0>
+ <tr>
+ <td>
+ <ol>
+ <li> Result: {{#VAR=RESULT}}<&>"result" #2'&'{{/VAR}}
+ <li> Goodness of result: {{#VAR=GOODNESS}}7{{/VAR}}
+ <li> xml-safe result: {{#VAR=XML_RESULT}}<&>"result" #2'&'{{/VAR}}
+ </ol>
+ </td>
+ </tr>
+</table>
+{{/SEC}}{{#INC=FOOTER}}{{#FILE=template_unittest_test_footer.in}}{{#SEC=__{{MAIN}}__}}<center><p><hr class=z>
+ <table width=100% cellpadding=2 cellspacing=0 border=0>
+ <tr>
+ <td align=center{{#VAR=FOOTER_BAR_ATTRIBUTES}}{{/VAR}}><font size=-1>
+ </font>
+ </table>
+ <br>
+ <font size=-1 class=p>
+ {{#VAR=MODIFIED_BY_GOOGLE}}{{/VAR}}{{#VAR=MSG_copyright}}{{/VAR}}
+ </font>
+</center>
+{{/SEC}}{{/FILE}}{{/INC}}</html>
+</body>{{/SEC}}{{/FILE}}
diff --git a/src/tests/template_unittest_test_html_dict01.out b/src/tests/template_unittest_test_html_dict01.out
new file mode 100644
index 0000000..1abe7b7
--- /dev/null
+++ b/src/tests/template_unittest_test_html_dict01.out
@@ -0,0 +1,4 @@
+<html><head><script>
+<!--
+ss = _ss;</script>
+</head><html><a id="aw" onMouseOver="return ss('print \x22Go home\x22')"><b></b></a>Last updated: monday & tuesday<br><table cellspacing=0 cellpadding=0 align="right" bgColor=#ffffff border=0><tr><td><ol><li> Result: <&>"result" #0'&'<li> Goodness of result: 5<li> xml-safe result: <&>"result" #0'&'</ol></td></tr></table><table cellspacing=0 cellpadding=0 align="right" border=0><tr><td><ol><li> Result: <&>"result" #1'&'<li> Goodness of result: 6<li> xml-safe result: <&>"result" #1'&'</ol></td></tr></table><table cellspacing=0 cellpadding=0 align="right" bgColor=#ffffff border=0><tr><td><ol><li> Result: <&>"result" #2'&'<li> Goodness of result: 7<li> xml-safe result: <&>"result" #2'&'</ol></td></tr></table><center><p><hr class=z><table width=100% cellpadding=2 cellspacing=0 border=0><tr><td align=center><font size=-1></font></table><br><font size=-1 class=p></font></center></html></body>
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_html_dict02.out b/src/tests/template_unittest_test_html_dict02.out
new file mode 100644
index 0000000..2fed87b
--- /dev/null
+++ b/src/tests/template_unittest_test_html_dict02.out
@@ -0,0 +1,4 @@
+<html><head><script>
+<!--
+</script>
+</head><html><a id="aw" onMouseOver="return ss('')"><b></b></a></html></body>
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_invalid1.in b/src/tests/template_unittest_test_invalid1.in
new file mode 100644
index 0000000..cf0e80a
--- /dev/null
+++ b/src/tests/template_unittest_test_invalid1.in
@@ -0,0 +1,3 @@
+This is html
+{This is fine}
+{{This is not so fine}} {{! Can't have spaces in a variable name!}}
diff --git a/src/tests/template_unittest_test_invalid2.in b/src/tests/template_unittest_test_invalid2.in
new file mode 100644
index 0000000..48383a6
--- /dev/null
+++ b/src/tests/template_unittest_test_invalid2.in
@@ -0,0 +1,7 @@
+This is html.
+
+{{#SECTION}}
+Happy section
+{{/SEC}}
+
+But the section never ends!
diff --git a/src/tests/template_unittest_test_markerdelim.in b/src/tests/template_unittest_test_markerdelim.in
new file mode 100644
index 0000000..e3762e6
--- /dev/null
+++ b/src/tests/template_unittest_test_markerdelim.in
@@ -0,0 +1,67 @@
+{{! Use <<< and >>> to delimit template stuff, rather than {{ and }}
+{{=<<< >>>=}}
+<html>
+<head>
+<script><<<BI_NEWLINE>>>
+ <!--<<<BI_NEWLINE>>>
+
+ <<<! Include the JS code to do query tracking. >>>
+ <<<! javascript_query_tracking_post*.tpl >>>
+ <<<>JAVASCRIPT_QUERY_TRACKING_FUNCTION>>>
+
+ <<<! netscape requires "window.status"; IE allows just "status" here >>>
+ <<<! must return true when mousing over a link; not necessary when over >>>
+ <<<! the table cell in general >>>
+
+ <<<! The "id" parameter here refers to the value of the id attribute >>>
+ <<<! of the anchor element for the ad (of the form "aw[POS]"). >>>
+ <<<! This is used by some spam-protection JavaScript to modify >>>
+ <<<! parameters in the URL of the link. >>>
+
+ <<<#NO_MOUSEOVER_FUNCTIONS>>>
+ function ss(w,id){
+ window.status=w;
+ return true;
+ }<<<BI_NEWLINE>>>
+ <<</NO_MOUSEOVER_FUNCTIONS>>>
+
+ <<<#MOUSEOVER_FUNCTIONS>>>
+ <<<! If any ads are mouseover ads, this is used to import
+ increment_mouseover_js.tpl, which redefines function ss to count
+ mouseovers. Otherwise it produces no output. >>>
+ <<<>MOUSEOVER_JAVASCRIPT>>>
+ <<<! Since JSCompiler renames all functions/variables not beginning with an
+ underscore, we use _ss as our compiled function name,
+ then set ss to _ss >>>
+ ss = _ss;
+ <<</MOUSEOVER_FUNCTIONS>>>
+</script><<<BI_NEWLINE>>>
+</head>
+
+<html>
+
+<a id="aw" onMouseOver="return ss('<<<GOTO_MESSAGE>>>')"<<<TARGET>>>>
+<b><<<TAG_LINE>>></b>
+</a>
+
+<<<#UPDATE_SECTION>>>Last updated: <<<UPDATE>>><br><<</UPDATE_SECTION>>>
+
+<<<#RESULTS>>>
+<table cellspacing=0 cellpadding=0<<<TABLE_WIDTH>>> align=<<<ALIGNMENT>>><<<BI_SPACE>>>
+ <<<#WHITE_BG>>>bgColor=#ffffff <<</WHITE_BG>>>border=0>
+ <tr>
+ <td>
+ <ol>
+ <li> Result: <<<RESULT>>>
+ <li> Goodness of result: <<<GOODNESS>>>
+ <li> xml-safe result: <<<XML_RESULT>>>
+ </ol>
+ </td>
+ </tr>
+</table>
+<<</RESULTS>>>
+
+<<<>FOOTER>>>
+
+</html>
+</body>
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_markerdelim_dict01.out b/src/tests/template_unittest_test_markerdelim_dict01.out
new file mode 100644
index 0000000..1abe7b7
--- /dev/null
+++ b/src/tests/template_unittest_test_markerdelim_dict01.out
@@ -0,0 +1,4 @@
+<html><head><script>
+<!--
+ss = _ss;</script>
+</head><html><a id="aw" onMouseOver="return ss('print \x22Go home\x22')"><b></b></a>Last updated: monday & tuesday<br><table cellspacing=0 cellpadding=0 align="right" bgColor=#ffffff border=0><tr><td><ol><li> Result: <&>"result" #0'&'<li> Goodness of result: 5<li> xml-safe result: <&>"result" #0'&'</ol></td></tr></table><table cellspacing=0 cellpadding=0 align="right" border=0><tr><td><ol><li> Result: <&>"result" #1'&'<li> Goodness of result: 6<li> xml-safe result: <&>"result" #1'&'</ol></td></tr></table><table cellspacing=0 cellpadding=0 align="right" bgColor=#ffffff border=0><tr><td><ol><li> Result: <&>"result" #2'&'<li> Goodness of result: 7<li> xml-safe result: <&>"result" #2'&'</ol></td></tr></table><center><p><hr class=z><table width=100% cellpadding=2 cellspacing=0 border=0><tr><td align=center><font size=-1></font></table><br><font size=-1 class=p></font></center></html></body>
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_markerdelim_dict02.out b/src/tests/template_unittest_test_markerdelim_dict02.out
new file mode 100644
index 0000000..2fed87b
--- /dev/null
+++ b/src/tests/template_unittest_test_markerdelim_dict02.out
@@ -0,0 +1,4 @@
+<html><head><script>
+<!--
+</script>
+</head><html><a id="aw" onMouseOver="return ss('')"><b></b></a></html></body>
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_modifiers.in b/src/tests/template_unittest_test_modifiers.in
new file mode 100644
index 0000000..b70e93f
--- /dev/null
+++ b/src/tests/template_unittest_test_modifiers.in
@@ -0,0 +1,15 @@
+<html>
+<body>
+{{#UPDATE_SECTION}}
+ {{UPDATE}}
+ {{UPDATE:h}}
+ {{UPDATE:javascript_escape}}
+ {{UPDATE:h:u}}
+{{/UPDATE_SECTION}}
+{{! There should be no problem with this comment having a : in it. }}
+ <IMG src=foo.jpg align={{ALIGNMENT}}>
+ <IMG src="mouseover() {img=\'foo.jpg\' align={{ALIGNMENT:j}}}">
+
+{{>SIMPLE:html_escape}}
+</body>
+</html>{{BI_NEWLINE}}
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_modifiers_dict01.anno_out b/src/tests/template_unittest_test_modifiers_dict01.anno_out
new file mode 100644
index 0000000..e14adb6
--- /dev/null
+++ b/src/tests/template_unittest_test_modifiers_dict01.anno_out
@@ -0,0 +1,11 @@
+{{#FILE=template_unittest_test_modifiers.in}}{{#SEC=__{{MAIN}}__}}<html>
+<body>
+{{#SEC=UPDATE_SECTION}} {{#VAR=UPDATE}}monday & tuesday{{/VAR}}
+ {{#VAR=UPDATE:html_escape}}monday &amp; tuesday{{/VAR}}
+ {{#VAR=UPDATE:javascript_escape}}monday \x26amp; tuesday{{/VAR}}
+ {{#VAR=UPDATE:html_escape:url_query_escape}}monday+%26amp%3Bamp%3B+tuesday{{/VAR}}
+{{/SEC}} <IMG src=foo.jpg align={{#VAR=ALIGNMENT}}"right"{{/VAR}}>
+ <IMG src="mouseover() {img=\'foo.jpg\' align={{#VAR=ALIGNMENT:javascript_escape}}\x22right\x22{{/VAR}}}">
+{{#INC=SIMPLE:html_escape}}{{#FILE=template_unittest_test_simple.in}}{{#SEC=__{{MAIN}}__}}<html> <head> {{#VAR=HEAD}}{{/VAR}} </head> <body> {{#VAR=BODY}}{{/VAR}} </body> </html>{{#VAR=BI_NEWLINE}} {{/VAR}}{{/SEC}}{{/FILE}}{{/INC}}</body>
+</html>{{#VAR=BI_NEWLINE}}
+{{/VAR}}{{/SEC}}{{/FILE}}
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_modifiers_dict01.out b/src/tests/template_unittest_test_modifiers_dict01.out
new file mode 100644
index 0000000..ea3cbad
--- /dev/null
+++ b/src/tests/template_unittest_test_modifiers_dict01.out
@@ -0,0 +1 @@
+<html><body>monday & tuesdaymonday &amp; tuesdaymonday \x26amp; tuesdaymonday+%26amp%3Bamp%3B+tuesday<IMG src=foo.jpg align="right"><IMG src="mouseover() {img=\'foo.jpg\' align=\x22right\x22}"><html><head></head><body></body></html> </body></html>
diff --git a/src/tests/template_unittest_test_nul.in b/src/tests/template_unittest_test_nul.in
new file mode 100644
index 0000000..82a536b
--- /dev/null
+++ b/src/tests/template_unittest_test_nul.in
Binary files differ
diff --git a/src/tests/template_unittest_test_nul_dict01.out b/src/tests/template_unittest_test_nul_dict01.out
new file mode 100644
index 0000000..90e9c58
--- /dev/null
+++ b/src/tests/template_unittest_test_nul_dict01.out
Binary files differ
diff --git a/src/tests/template_unittest_test_selective_css.in b/src/tests/template_unittest_test_selective_css.in
new file mode 100644
index 0000000..96e8246
--- /dev/null
+++ b/src/tests/template_unittest_test_selective_css.in
@@ -0,0 +1,15 @@
+{{%AUTOESCAPE context="CSS"}}
+
+P.abstract {
+ margin-{{AE_START_EDGE}}:0;
+ text-align:{{AE_END_EDGE}};
+ font-size:{{AE_FONT_SIZE_PC}};
+}
+.italic {font-style:{{AE_ITALIC}}}
+
+H1 {
+ font-size:{{AE_FONT_SIZE_PT}};
+ color:{{AE_MAUVE_RGB}};
+}
+
+BODY {background:transparent url('{{AE_URL_GOOD}}');}
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_selective_css_dict01.out b/src/tests/template_unittest_test_selective_css_dict01.out
new file mode 100644
index 0000000..1abf6f8
--- /dev/null
+++ b/src/tests/template_unittest_test_selective_css_dict01.out
@@ -0,0 +1 @@
+P.abstract {margin-left:0;text-align:center;font-size:120%;}.italic {font-style:italic}H1 {font-size:12pt;color:#FF7BD5;}BODY {background:transparent url('httpwww.google.com');}
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_selective_css_dict02.out b/src/tests/template_unittest_test_selective_css_dict02.out
new file mode 100644
index 0000000..f86da6f
--- /dev/null
+++ b/src/tests/template_unittest_test_selective_css_dict02.out
@@ -0,0 +1 @@
+P.abstract {margin-:0;text-align:;font-size:;}.italic {font-style:}H1 {font-size:;color:;}BODY {background:transparent url('');}
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_selective_html.in b/src/tests/template_unittest_test_selective_html.in
new file mode 100644
index 0000000..e005fbc
--- /dev/null
+++ b/src/tests/template_unittest_test_selective_html.in
@@ -0,0 +1,15 @@
+{{%AUTOESCAPE context="HTML"}}
+{{!Is a copy of template_unittest_test_autoescape_simple.in}}
+<h1>{{AE_TITLE_GOOD}}</h1>
+<h1>{{AE_TITLE_BAD}}</h1>
+<img src="{{AE_URL_GOOD}}">
+<img src="{{AE_URL_BAD}}">
+<div style="background:{{AE_BG_COLOR_GOOD}}">
+<div style="background:{{AE_BG_COLOR_BAD}}">
+</div>
+<script>
+ var msg_text = '{{AE_JS_GOOD}}';
+ var msg_text = '{{AE_JS_BAD}}';
+</script>
+Goodbye {{AE_USERNAME_GOOD}}!
+Goodbye {{AE_USERNAME_BAD}}!
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_selective_html_dict01.out b/src/tests/template_unittest_test_selective_html_dict01.out
new file mode 100644
index 0000000..4bec106
--- /dev/null
+++ b/src/tests/template_unittest_test_selective_html_dict01.out
@@ -0,0 +1 @@
+<h1>Hello World!</h1><h1>Hello <script>alert(1)</script> World!</h1><img src="http://www.google.com/"><img src="#"><div style="background:red"><div style="background:evil! "></div><script>var msg_text = 'your text here';var msg_text = 'your text\x27is clever\x27thanks';</script>Goodbye Mr. Nice!Goodbye Doctor<script>alert(2)</script>Evil!
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_selective_html_dict02.out b/src/tests/template_unittest_test_selective_html_dict02.out
new file mode 100644
index 0000000..2aa9b17
--- /dev/null
+++ b/src/tests/template_unittest_test_selective_html_dict02.out
@@ -0,0 +1 @@
+<h1></h1><h1></h1><img src=""><img src=""><div style="background:"><div style="background:"></div><script>var msg_text = '';var msg_text = '';</script>Goodbye !Goodbye !
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_selective_js.in b/src/tests/template_unittest_test_selective_js.in
new file mode 100644
index 0000000..de511db
--- /dev/null
+++ b/src/tests/template_unittest_test_selective_js.in
@@ -0,0 +1,7 @@
+{{%AUTOESCAPE context="JAVASCRIPT"}}
+ var msg_text1 = '{{AE_JS_GOOD}}';
+ var msg_text2 = '{{AE_JS_BAD}}';
+{{!Below variable is not quoted}}
+ var msg_text3 = {{AE_JS_BAD}};
+{{!Below variable ends up with :h:j}}
+ var msg_text4 = '{{AE_JS_BAD:h}}';
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_selective_js_dict01.out b/src/tests/template_unittest_test_selective_js_dict01.out
new file mode 100644
index 0000000..75fce98
--- /dev/null
+++ b/src/tests/template_unittest_test_selective_js_dict01.out
@@ -0,0 +1 @@
+var msg_text1 = 'your text here';var msg_text2 = 'your text\x27is clever\x27thanks';var msg_text3 = null;var msg_text4 = 'your text\x26#39;is clever\x26#39;thanks';
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_selective_js_dict02.out b/src/tests/template_unittest_test_selective_js_dict02.out
new file mode 100644
index 0000000..440a14b
--- /dev/null
+++ b/src/tests/template_unittest_test_selective_js_dict02.out
@@ -0,0 +1 @@
+var msg_text1 = '';var msg_text2 = '';var msg_text3 = ;var msg_text4 = '';
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_simple.in b/src/tests/template_unittest_test_simple.in
new file mode 100644
index 0000000..386fb63
--- /dev/null
+++ b/src/tests/template_unittest_test_simple.in
@@ -0,0 +1,8 @@
+<html>
+<head>
+ {{HEAD}}
+</head>
+<body>
+ {{BODY}}
+</body>
+</html>{{BI_NEWLINE}}
\ No newline at end of file
diff --git a/src/tests/template_unittest_test_simple_dict01.out b/src/tests/template_unittest_test_simple_dict01.out
new file mode 100644
index 0000000..e91f0cc
--- /dev/null
+++ b/src/tests/template_unittest_test_simple_dict01.out
@@ -0,0 +1 @@
+<html><head> This is the head </head><body></body></html>
diff --git a/src/tests/template_unittest_test_simple_dict02.out b/src/tests/template_unittest_test_simple_dict02.out
new file mode 100644
index 0000000..30e84fd
--- /dev/null
+++ b/src/tests/template_unittest_test_simple_dict02.out
@@ -0,0 +1 @@
+<html><head></head><body></body></html>
diff --git a/src/tests/template_unittest_test_simple_dict03.out b/src/tests/template_unittest_test_simple_dict03.out
new file mode 100644
index 0000000..5106739
--- /dev/null
+++ b/src/tests/template_unittest_test_simple_dict03.out
@@ -0,0 +1,2 @@
+<html><head> </head><body>
+</body></html>
diff --git a/src/tests/template_unittest_test_valid1.in b/src/tests/template_unittest_test_valid1.in
new file mode 100644
index 0000000..57f72b3
--- /dev/null
+++ b/src/tests/template_unittest_test_valid1.in
@@ -0,0 +1,3 @@
+This is ok.
+{This is also ok.}
+Look ma!, no template substitutions at all!
diff --git a/src/tests/template_unittest_test_valid1_dict01.out b/src/tests/template_unittest_test_valid1_dict01.out
new file mode 100644
index 0000000..c8e3d30
--- /dev/null
+++ b/src/tests/template_unittest_test_valid1_dict01.out
@@ -0,0 +1 @@
+This is ok.{This is also ok.}Look ma!, no template substitutions at all!
\ No newline at end of file
diff --git a/src/windows/config.h b/src/windows/config.h
new file mode 100644
index 0000000..d8a1e94
--- /dev/null
+++ b/src/windows/config.h
@@ -0,0 +1,222 @@
+/* A manual version of config.h fit for windows machines. */
+
+/* Sometimes we accidentally #include this config.h instead of the one
+ in .. -- this is particularly true for msys/mingw, which uses the
+ unix config.h but also runs code in the windows directory.
+*/
+#if defined(__MINGW32__) || defined(__MING64__)
+#define CTEMPLATE_DLL_DECL
+// These two lines make sure we read the unix-style config.h, and not the
+// windows-style config.h -- it would be bad if we tried to read both!
+#include "../config.h"
+#define GOOGLE_CTEMPLATE_WINDOWS_CONFIG_H_
+#endif
+
+#ifndef GOOGLE_CTEMPLATE_WINDOWS_CONFIG_H_
+#define GOOGLE_CTEMPLATE_WINDOWS_CONFIG_H_
+
+/* Namespace for Google classes */
+#define GOOGLE_NAMESPACE ctemplate
+
+/* the location of <unordered_map> or <hash_map> */
+#define HASH_MAP_H <hash_map>
+
+/* the namespace of hash_map/hash_set */
+#define HASH_NAMESPACE stdext
+
+/* the location of <unordered_set> or <hash_set> */
+#define HASH_SET_H <hash_set>
+
+/* Define to 1 if you have the <byteswap.h> header file. */
+#undef HAVE_BYTESWAP_H
+
+/* Define to 1 if you have the <dirent.h> header file, and it defines `DIR'.
+ */
+#undef HAVE_DIRENT_H
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define to 1 if you have the <endian.h> header file. */
+#undef HAVE_ENDIAN_H
+
+/* Define to 1 if you have the `getopt' function. */
+#undef HAVE_GETOPT
+
+/* Define to 1 if you have the <getopt.h> header file. */
+#undef HAVE_GETOPT_H
+
+/* Define to 1 if you have the `getopt_long' function. */
+#undef HAVE_GETOPT_LONG
+
+/* define if the compiler has hash_map */
+#define HAVE_HASH_MAP 1
+
+/* define if the compiler has hash_set */
+#define HAVE_HASH_SET 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <libkern/OSByteOrder.h> header file. */
+#undef HAVE_LIBKERN_OSBYTEORDER_H
+
+/* Define to 1 if you have the <machine/endian.h> header file. */
+#undef HAVE_MACHINE_ENDIAN_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* define if the compiler implements namespaces */
+#define HAVE_NAMESPACES 1
+
+/* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */
+#undef HAVE_NDIR_H
+
+/* Define if you have POSIX threads libraries and header files. */
+#undef HAVE_PTHREAD
+
+/* define if the compiler implements pthread_rwlock_* */
+#undef HAVE_RWLOCK
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/byteorder.h> header file. */
+#undef HAVE_SYS_BYTEORDER_H
+
+/* Define to 1 if you have the <sys/dir.h> header file, and it defines `DIR'.
+ */
+#undef HAVE_SYS_DIR_H
+
+/* Define to 1 if you have the <sys/endian.h> header file. */
+#undef HAVE_SYS_ENDIAN_H
+
+/* Define to 1 if you have the <sys/isa_defs.h> header file. */
+#undef HAVE_SYS_ISA_DEFS_H
+
+/* Define to 1 if you have the <sys/ndir.h> header file, and it defines `DIR'.
+ */
+#undef HAVE_SYS_NDIR_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if the system has the type `uint32_t'. */
+#undef HAVE_UINT32_T
+
+/* Define to 1 if the system has the type `uint64_t'. */
+#undef HAVE_UINT64_T
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* define if the compiler supports unordered_{map,set} */
+#undef HAVE_UNORDERED_MAP
+
+/* Define to 1 if you have the <utime.h> header file. */
+#undef HAVE_UTIME_H
+
+/* Define to 1 if the system has the type `u_int32_t'. */
+#undef HAVE_U_INT32_T
+
+/* Define to 1 if the system has the type `u_int64_t'. */
+#undef HAVE_U_INT64_T
+
+/* define if your compiler has __attribute__ */
+#undef HAVE___ATTRIBUTE__
+
+/* Define to 1 if the system has the type `__uint32. */
+#define HAVE___INT32 1
+
+/* Define to 1 if the system has the type `__uint64. */
+#define HAVE___INT64 1
+
+/* The namespace to put the htmlparser code. */
+#define HTMLPARSER_NAMESPACE google_ctemplate_streamhtmlparser
+
+/* define if first argument to InterlockedExchange is just LONG */
+#undef INTERLOCKED_EXCHANGE_NONVOLATILE
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+ */
+#undef LT_OBJDIR
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "ctemplate 1.1"
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* printf format code for printing a size_t and ssize_t */
+#define PRIdS "Id"
+
+/* printf format code for printing a size_t and ssize_t */
+#define PRIuS "Iu"
+
+/* printf format code for printing a size_t and ssize_t */
+#define PRIxS "Ix"
+
+/* Define to necessary symbol if this constant uses a non-standard name on
+ your system. */
+#undef PTHREAD_CREATE_JOINABLE
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* the namespace where STL code like vector<> is defined */
+#define STL_NAMESPACE std
+
+/* Version number of package */
+#undef VERSION
+
+/* Stops putting the code inside the Google namespace */
+#define _END_GOOGLE_NAMESPACE_ }
+
+/* Puts following code inside the Google namespace */
+#define _START_GOOGLE_NAMESPACE_ namespace ctemplate {
+
+// ---------------------------------------------------------------------
+// Extra stuff not found in config.h.in
+
+// This must be defined before anything else in our project: make sure
+// that when compiling the dll, we export our functions/classes. Safe
+// to define this here because this file is only used internally, to
+// compile the DLL, and every dll source file #includes "config.h"
+// before anything else.
+#ifndef CTEMPLATE_DLL_DECL
+# define CTEMPLATE_DLL_DECL __declspec(dllexport)
+# define CTEMPLATE_DLL_DECL_FOR_UNITTESTS __declspec(dllimport)
+#endif
+
+// TODO(csilvers): include windows/port.h in every relevant source file instead?
+#include "windows/port.h"
+
+#endif /* GOOGLE_CTEMPLATE_WINDOWS_CONFIG_H_ */
diff --git a/src/windows/ctemplate/find_ptr.h b/src/windows/ctemplate/find_ptr.h
new file mode 100644
index 0000000..8e6fec5
--- /dev/null
+++ b/src/windows/ctemplate/find_ptr.h
@@ -0,0 +1,75 @@
+// Copyright (c) 2012, Olaf van der Spek <olafvdspek@gmail.com>
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Olaf van der Spek <olafvdspek@gmail.com>
+
+#ifndef TEMPLATE_FIND_PTR_H_
+#define TEMPLATE_FIND_PTR_H_
+
+// NOTE: if you are statically linking the template library into your binary
+// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
+// as a compiler flag in your project file to turn off the dllimports.
+#ifndef CTEMPLATE_DLL_DECL
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+#endif
+
+namespace ctemplate {
+
+template <class T, class U>
+typename T::value_type::second_type* find_ptr(T& c, U v)
+{
+ typename T::iterator i = c.find(v);
+ return i == c.end() ? NULL : &i->second;
+}
+
+template <class T, class U>
+const typename T::value_type::second_type* find_ptr(const T& c, U v)
+{
+ typename T::const_iterator i = c.find(v);
+ return i == c.end() ? NULL : &i->second;
+}
+
+template <class T, class U>
+typename T::value_type::second_type find_ptr2(T& c, U v)
+{
+ typename T::iterator i = c.find(v);
+ return i == c.end() ? NULL : i->second;
+}
+
+template <class T, class U>
+const typename T::value_type::second_type find_ptr2(const T& c, U v)
+{
+ typename T::const_iterator i = c.find(v);
+ return i == c.end() ? NULL : i->second;
+}
+
+}
+
+#endif // TEMPLATE_FIND_PTR_H_
diff --git a/src/windows/ctemplate/per_expand_data.h b/src/windows/ctemplate/per_expand_data.h
new file mode 100644
index 0000000..6abce97
--- /dev/null
+++ b/src/windows/ctemplate/per_expand_data.h
@@ -0,0 +1,154 @@
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// In addition to a TemplateDictionary, there is also a PerExpandData
+// dictionary. This dictionary holds information that applies to one
+// call to Expand, such as whether to annotate the template expansion
+// output. A template dictionary is associated with a template (.tpl)
+// file; a per-expand dictionary is associated to a particular call to
+// Expand() in a .cc file.
+//
+// For (many) more details, see the doc/ directory.
+
+#ifndef TEMPLATE_PER_EXPAND_DATA_H_
+#define TEMPLATE_PER_EXPAND_DATA_H_
+
+#include <stdlib.h> // for NULL
+#include <string.h> // for strcmp
+#include <sys/types.h>
+#include <hash_map>
+#include <ctemplate/template_string.h> // for StringHash
+
+// NOTE: if you are statically linking the template library into your binary
+// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
+// as a compiler flag in your project file to turn off the dllimports.
+#ifndef CTEMPLATE_DLL_DECL
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+#endif
+
+namespace ctemplate {
+
+class TemplateModifier;
+class TemplateAnnotator;
+
+class CTEMPLATE_DLL_DECL PerExpandData {
+ public:
+ PerExpandData()
+ : annotate_path_(NULL),
+ annotator_(NULL),
+ expand_modifier_(NULL),
+ map_(NULL) { }
+
+ ~PerExpandData();
+
+ // Indicate that annotations should be inserted during template expansion.
+ // template_path_start - the start of a template path. When
+ // printing the filename for template-includes, anything before and
+ // including template_path_start is elided. This can make the
+ // output less dependent on filesystem location for template files.
+ void SetAnnotateOutput(const char* template_path_start) {
+ annotate_path_ = template_path_start;
+ }
+
+ // Whether to annotate the expanded output.
+ bool annotate() const { return annotate_path_ != NULL; }
+
+ // The annotate-path; undefined if annotate() != true
+ const char* annotate_path() const { return annotate_path_; }
+
+ // This sets the TemplateAnnotator to be used when annotating is on.
+ // This allows you to override the default text-based annotator
+ // that will be used if you do not call this. The passed annotator
+ // will be aliased by this object and returned by annotator().
+ // Passing NULL has the special behavior of causing annotator() to
+ // revert to returning its built-in instance.
+ void SetAnnotator(TemplateAnnotator* annotator) {
+ annotator_ = annotator;
+ }
+
+ // This returns the TemplateAnnotator to be used when annotating is on.
+ // The value returned will be either an instance previously provided
+ // to SetAnnotator() or the callable built-in text-based annotator.
+ TemplateAnnotator* annotator() const;
+
+ // This is a TemplateModifier to be applied to all templates
+ // expanded via this call to Expand(). That is, this modifier is
+ // applies to the template (.tpl) file we expand, as well as
+ // sub-templates that are expanded due to {{>INCLUDE}} directives.
+ // Caller is responsible for ensuring that modifier exists for the
+ // lifetime of this object.
+ void SetTemplateExpansionModifier(const TemplateModifier* modifier) {
+ expand_modifier_ = modifier;
+ }
+
+ const TemplateModifier* template_expansion_modifier() const {
+ return expand_modifier_;
+ }
+
+ // Store data in this structure, to be used by template modifiers
+ // (see template_modifiers.h). Call with value set to NULL to clear
+ // any value previously set. Caller is responsible for ensuring key
+ // and value point to valid data for the lifetime of this object.
+ void InsertForModifiers(const char* key, const void* value);
+
+ // Retrieve data specific to this Expand call. Returns NULL if key
+ // is not found. This should only be used by template modifiers.
+ const void* LookupForModifiers(const char* key) const;
+
+ // Same as Lookup, but casts the result to a c string.
+ const char* LookupForModifiersAsString(const char* key) const {
+ return static_cast<const char*>(LookupForModifiers(key));
+ }
+
+ private:
+#ifdef _MSC_VER
+ typedef stdext::hash_map<const char*, const void*, StringHash> DataMap;
+#else
+ struct DataEq {
+ bool operator()(const char* s1, const char* s2) const;
+ };
+ typedef stdext::hash_map<const char*, const void*, StringHash, DataEq>
+ DataMap;
+#endif
+
+ const char* annotate_path_;
+ TemplateAnnotator* annotator_;
+ const TemplateModifier* expand_modifier_;
+ DataMap* map_;
+
+ PerExpandData(const PerExpandData&); // disallow evil copy constructor
+ void operator=(const PerExpandData&); // disallow evil operator=
+};
+
+}
+
+#endif // TEMPLATE_PER_EXPAND_DATA_H_
diff --git a/src/windows/ctemplate/str_ref.h b/src/windows/ctemplate/str_ref.h
new file mode 100644
index 0000000..20a2663
--- /dev/null
+++ b/src/windows/ctemplate/str_ref.h
@@ -0,0 +1,134 @@
+// Copyright (c) 2012, Olaf van der Spek <olafvdspek@gmail.com>
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Olaf van der Spek <olafvdspek@gmail.com>
+
+#ifndef TEMPLATE_STR_REF_H_
+#define TEMPLATE_STR_REF_H_
+
+#include <cstddef>
+
+// NOTE: if you are statically linking the template library into your binary
+// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
+// as a compiler flag in your project file to turn off the dllimports.
+#ifndef CTEMPLATE_DLL_DECL
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+#endif
+
+namespace ctemplate {
+
+template <class T>
+class str_ref_basic
+{
+public:
+ str_ref_basic()
+ {
+ clear();
+ }
+
+ template <class U>
+ str_ref_basic(const U& c)
+ {
+ if (c.end() != c.begin())
+ assign(&*c.begin(), c.end() - c.begin() + &*c.begin());
+ else
+ clear();
+ }
+
+ str_ref_basic(const void* b, const void* e)
+ {
+ assign(b, e);
+ }
+
+ str_ref_basic(const void* b, size_t sz)
+ {
+ assign(b, sz);
+ }
+
+ str_ref_basic(const char* b)
+ {
+ if (b)
+ assign(b, strlen(b));
+ else
+ clear();
+ }
+
+ void clear()
+ {
+ begin_ = end_ = NULL;
+ }
+
+ void assign(const void* b, const void* e)
+ {
+ begin_ = reinterpret_cast<T>(b);
+ end_ = reinterpret_cast<T>(e);
+ }
+
+ void assign(const void* b, size_t sz)
+ {
+ begin_ = reinterpret_cast<T>(b);
+ end_ = begin_ + sz;
+ }
+
+ T begin() const
+ {
+ return begin_;
+ }
+
+ T end() const
+ {
+ return end_;
+ }
+
+ T data() const
+ {
+ return begin();
+ }
+
+ size_t size() const
+ {
+ return end() - begin();
+ }
+
+ bool empty() const
+ {
+ return begin() == end();
+ }
+private:
+ T begin_;
+ T end_;
+};
+
+typedef str_ref_basic<const unsigned char*> data_ref;
+typedef str_ref_basic<const char*> str_ref;
+
+}
+
+#endif // TEMPLATE_STR_REF_H_
diff --git a/src/windows/ctemplate/template.h b/src/windows/ctemplate/template.h
new file mode 100644
index 0000000..4d12cb7
--- /dev/null
+++ b/src/windows/ctemplate/template.h
@@ -0,0 +1,492 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+//
+// This file implements the Template class. For information about
+// how to use this class, and to write the templates it takes as input,
+// see the doc/ directory.
+
+#ifndef CTEMPLATE_TEMPLATE_H_
+#define CTEMPLATE_TEMPLATE_H_
+
+#include <time.h> // for time_t
+#include <string>
+#include <ctemplate/template_cache.h>
+#include <ctemplate/template_enums.h>
+#include <ctemplate/template_string.h>
+
+// We include this just so folks don't have to include both template.h
+// and template_dictionary.h, or template_namelist.h etc, to use the
+// template system; we don't actually use anything in these files
+// ourselves.
+#if 1
+#include <ctemplate/template_dictionary.h>
+#include <ctemplate/template_namelist.h>
+#include <ctemplate/per_expand_data.h>
+#else
+namespace ctemplate {
+class TemplateDictionaryInterface;
+class PerExpandData;
+}
+#endif
+
+namespace google_ctemplate_streamhtmlparser {
+class HtmlParser;
+}
+
+// NOTE: if you are statically linking the template library into your binary
+// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
+// as a compiler flag in your project file to turn off the dllimports.
+#ifndef CTEMPLATE_DLL_DECL
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+#endif
+
+namespace ctemplate {
+
+// These free functions form the "simple" template API, and support
+// the most common operations (expanding a template from a file, and
+// from a string). They all just delegate to a default instance of
+// the TemplateCache object.
+//
+// For more sophisticated use of the template system, you may need
+// to create your own TemplateCache object, and work directly with
+// it. See template_cache.h for details.
+
+extern CTEMPLATE_DLL_DECL const TemplateCache* default_template_cache();
+extern CTEMPLATE_DLL_DECL TemplateCache* mutable_default_template_cache();
+
+
+// ---- EXPANDING A TEMPLATE -------
+// ExpandTemplate
+// ExpandWithData
+
+// Loads the template named filename from disk if necessary -- it
+// gets it from the cache instead, if the template had been loaded
+// before or if it had been put explicitly in the cache via a call
+// to StringToTemplateCache() -- and expands it using the given
+// dictionary.
+// The first version is the most general, followed by common-case code.
+inline bool ExpandTemplate(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface *dictionary,
+ ExpandEmitter* output) {
+ return mutable_default_template_cache()->ExpandWithData(
+ filename, strip, dictionary, NULL, output);
+}
+inline bool ExpandTemplate(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface* dictionary,
+ std::string* output_buffer) {
+ return mutable_default_template_cache()->ExpandWithData(
+ filename, strip, dictionary, NULL, output_buffer);
+}
+
+// If you want any per-expand data to be used at expand time, call
+// this routine instead of Expand. You pass in an extra
+// PerExpandData structure (see per_expand_data.h) which sets this
+// data: whether or not you want the template to be annotated, and
+// any data you want to pass in to template modifers. If
+// per_expand_data is NULL, this is exactly the same as Expand().
+// The first version is the most general, followed by common-case code.
+inline bool ExpandWithData(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data,
+ ExpandEmitter* output) {
+ return mutable_default_template_cache()->ExpandWithData(
+ filename, strip, dictionary, per_expand_data, output);
+}
+inline bool ExpandWithData(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface* dictionary,
+ PerExpandData* per_expand_data,
+ std::string* output_buffer) {
+ return mutable_default_template_cache()->ExpandWithData(
+ filename, strip, dictionary, per_expand_data, output_buffer);
+}
+
+// ---- INSERTING INTO THE CACHE -------
+// LoadTemplate
+// StringToTemplateCache
+
+// Reads a file from disk and inserts it into the template, if it's
+// not already there. Returns true on success or false if the
+// template could not be found, or could not be parsed. It's never
+// necessary to call this -- Expand() will load templates lazily if
+// needed -- but you may want to if you want to make sure templates
+// exist before trying to expand them, or because you want to
+// control disk access patterns, or for some other reason.
+inline bool LoadTemplate(const TemplateString& filename, Strip strip) {
+ return mutable_default_template_cache()->LoadTemplate(filename, strip);
+}
+
+// Inserts the given string into the default template cache, as if
+// it were a file read from disk. You can call Expand() with its
+// first arg (filename) the same as the key you use here. You can
+// also use this key as the 'filename' for sub-included templates,
+// in TemplateDictionary::SetFilename().
+inline bool StringToTemplateCache(const TemplateString& key,
+ const TemplateString& content,
+ Strip strip) {
+ return mutable_default_template_cache()->StringToTemplateCache(
+ key, content, strip);
+}
+inline bool StringToTemplateCache(const TemplateString& key,
+ const char* content, size_t content_len,
+ Strip strip) {
+ return mutable_default_template_cache()->StringToTemplateCache(
+ key, content, content_len, strip);
+}
+
+
+// ---------------------------------------------------------------------
+// The follow are deprecated.
+// TODO(csilvers): move to parsed_template.h
+
+// TemplateState of a template is:
+// - TS_EMPTY before parsing is complete,
+// - TS_ERROR if a syntax error was found during parsing, and
+// - TS_READY if parsing has completed successfully
+// (TS_UNUSED is not used)
+enum TemplateState { TS_UNUSED, TS_EMPTY, TS_ERROR, TS_READY };
+
+// Used for Auto-Escape. It represents the different contexts a template may
+// be initialized in via the AUTOESCAPE pragma in the template file
+// (or string). It is only public for testing. The contexts are:
+// - TC_HTML: The template contains HTML code. Need not be a complete HTML
+// page just content the browser interprets in the context of
+// HTML parsing. This should be the most common context to use.
+// This mode activates our HTML parser.
+// - TC_JS: The template contains raw javascript. If your template
+// starts with <script> tag, it is of type TC_HTML not TC_JS.
+// TC_JS is typically associated with a content-type of
+// text/javascript. This mode activates our HTML parser.
+// - TC_CSS: The template contains CSS (cascaded style-sheet). If your
+// template starts with a <style> tag, it is of type TC_HTML
+// not TC_CSS. A TC_CSS template is typically associated with a
+// text/css content-type header. Currently treated same as
+// TC_HTML but don't rely on that. We may later develop
+// CSS-specific sanitizers and parsers.
+// - TC_JSON: The template contains raw JSON. Applies javascript_escape
+// to variables. Note: javascript_escape is safer than
+// json_escape which we may want to remove.
+// - TC_XML: The template contains raw XML. Applies xml_escape to variables.
+// CAUTION: This mode is not suitable for cases where the
+// application data encapsulated in XML requires special
+// escaping, such as the case of XHTML.
+// TC_XML is typically associated with text/xml content-type.
+// - TC_MANUAL: Equivalent to not specifying auto-escaping at all.
+//
+// TODO(csilvers): Make this a private part of the Template class.
+enum TemplateContext { TC_UNUSED, TC_HTML, TC_JS, TC_CSS, TC_JSON,
+ TC_XML, TC_MANUAL };
+
+
+// This class is deprecated. Old code uses this class heavily (via
+// GetTemplate() to obtain a Template*, and then methods on that
+// Template*) but new code should use the free functions above.
+class CTEMPLATE_DLL_DECL Template {
+ public:
+ // ---- METHODS FOR TOOLS ----
+ // These are not intended for normal use, but are public so a
+ // tool can use them.
+
+ // Used by make_tpl_varnames_h.cc.
+ void WriteHeaderEntries(std::string *outstring) const;
+
+ // ---- DEPRECATED METHODS ----
+ // These methods used to be the primary way of using the Template
+ // object, but have been deprecated in favor of the (static)
+ // methods above. If you are using these deprecated methods,
+ // consider moving to the above methods instead, or to moving to
+ // using your own TemplateCache (which supports richer operations
+ // on parsed templates).
+
+ // Loads a template from disk or cache or string, and returns the Template*.
+ // INSTEAD, use the static Expand that takes a filename.
+ static Template *GetTemplate(const TemplateString& filename, Strip strip);
+ virtual ~Template(); // when the time comes to delete these Template*'s.
+
+ // Parses a string immediately and returns the resulting Template*.
+ // You can call the (deprecated) non-static Expand() method on this
+ // template in order to expand it with a dictionary. You are
+ // responsible for deleting the Template* when you are done with it.
+ // INSTEAD, use StringToTemplateCache (with a key) plus the static Expand().
+ // TOOO(csilvers): return a const Template* instead.
+ static Template* StringToTemplate(const TemplateString& content,
+ Strip strip);
+ static Template* StringToTemplate(const char* content, size_t content_len,
+ Strip strip) {
+ return StringToTemplate(TemplateString(content, content_len), strip);
+ }
+
+ // Non-static Expand*() works on a Template* returned from GetTemplate().
+ // INSTEAD, use static expand with a filename (or key-name for strings).
+ bool ExpandWithData(ExpandEmitter* output,
+ const TemplateDictionaryInterface* dictionary,
+ PerExpandData* per_expand_data) const {
+ return ExpandWithDataAndCache(output, dictionary, per_expand_data,
+ default_template_cache());
+ }
+ bool ExpandWithData(std::string* output_buffer,
+ const TemplateDictionaryInterface* dictionary,
+ PerExpandData* per_expand_data) const {
+ if (output_buffer == NULL) return false;
+ StringEmitter e(output_buffer);
+ return ExpandWithData(&e, dictionary, per_expand_data);
+ }
+ bool Expand(ExpandEmitter* output,
+ const TemplateDictionaryInterface* dictionary) const {
+ return ExpandWithData(output, dictionary, NULL);
+ }
+ bool Expand(std::string* output_buffer,
+ const TemplateDictionaryInterface* dictionary) const {
+ return ExpandWithData(output_buffer, dictionary, NULL);
+ }
+
+ // Dump to stdout or a string. filename is just used to annotate output.
+ void Dump(const char *filename) const;
+ void DumpToString(const char *filename, std::string *out) const;
+
+ // Retrieves the state, template-file, or strip mode of this Template.
+ TemplateState state() const;
+ const char *template_file() const;
+ const char *original_filename() const;
+ Strip strip() const;
+
+ // Work at the level of groups of templates, so just call through to
+ // the default TemplateCache; see template_cache.h for what these do.
+ // INSTEAD, create your own TemplateCache and call these methods on that.
+ static bool SetTemplateRootDirectory(const std::string& dir) {
+ return mutable_default_template_cache()->SetTemplateRootDirectory(dir);
+ }
+ static bool AddAlternateTemplateRootDirectory(const std::string& dir) {
+ return mutable_default_template_cache()->AddAlternateTemplateRootDirectory(
+ dir);
+ }
+ static std::string template_root_directory() {
+ return default_template_cache()->template_root_directory();
+ }
+ static std::string FindTemplateFilename(
+ const std::string& unresolved) {
+ return default_template_cache()->FindTemplateFilename(unresolved);
+ }
+ static void RemoveStringFromTemplateCache(const std::string& key) {
+ mutable_default_template_cache()->Delete(key);
+ }
+ static void ClearCache() {
+ mutable_default_template_cache()->ClearCache();
+ }
+ static void ReloadAllIfChanged() {
+ mutable_default_template_cache()->ReloadAllIfChanged(
+ TemplateCache::LAZY_RELOAD);
+ }
+
+ // ---- EXTRA-DEPRECATED METHODS ----
+ // These methods were deprecated even before the move to
+ // TemplateCache. We'd really like you to move from these to one
+ // of the "approved" methods, or even one of the deprecated
+ // methods. Comments here don't even describe what these
+ // functions do, just how to transition off of using them.
+
+ // INSTEAD, use the StringToTemplateCache function that takes the strip mode.
+ static bool StringToTemplateCache(const TemplateString& key,
+ const TemplateString& content);
+ static bool StringToTemplateCache(const TemplateString& key,
+ const char* content, size_t content_len) {
+ return StringToTemplateCache(key, TemplateString(content, content_len));
+ }
+ // This is to prevent against typos: you want the global (free-function)
+ // StringToTemplateCache here, not the one in Template.
+ static bool StringToTemplateCache(const TemplateString& key,
+ const char* content, Strip);
+
+ // INSTEAD, use ReloadAllIfChanged.
+ bool ReloadIfChanged();
+
+ protected:
+ friend class SectionTemplateNode; // for access to set_state(), ParseState
+ friend class TemplateTemplateNode; // for recursive call to Expand()
+
+ // Template constructor
+ // Reads the template file and parses it into a parse tree of TemplateNodes
+ // by calling the method ReloadIfChanged
+ // The top node is a section node with the arbitrary name "__{{MAIN}}__"
+ // 'Strip' indicates how to handle whitespace when expanding the
+ // template. DO_NOT_STRIP keeps the template exactly as-is.
+ // STRIP_BLANK_LINES elides all blank lines in the template.
+ // STRIP_WHITESPACE elides all blank lines, and also all whitespace
+ // at either the beginning or end of a line. It also removes
+ // any linefeed (possibly following whitespace) that follows a closing
+ // '}}' of any kind of template marker EXCEPT a template variable.
+ // This means a linefeed may be removed anywhere by simply placing
+ // a comment marker as the last element on the line.
+ // These two options allow the template to include whitespace for
+ // readability without adding to the expanded output.
+ Template(const TemplateString& filename, Strip strip, TemplateCache* owner);
+
+ // MaybeInitHtmlParser
+ // In TemplateContexts where the HTML parser is needed, we
+ // initialize it in the appropriate mode. Also we do a sanity
+ // check (cannot fail) on the template filename. This function is
+ // called at most once for a Template. In_tag is only meaningful
+ // for TC_HTML: It is true for templates that start inside an HTML
+ // tag and hence are expected to contain HTML attribute name/value
+ // pairs only. It is false for standard HTML templates.
+ void MaybeInitHtmlParser(bool in_tag);
+
+ // BuildTree
+ // Parses the contents of the file (retrieved via ReloadIfChanged)
+ // and stores the resulting parse structure in tree_. Returns true
+ // iff the tree-builder encountered no errors. Note: takes
+ // ownership of input_buffer, and will delete it. It should have
+ // been created via new[].
+ bool BuildTree(const char *input_buffer, const char* input_buffer_end);
+
+ // Internal version of ReloadIfChanged, used when the function already
+ // has a write-lock on g_template_mutex.
+ bool ReloadIfChangedLocked();
+
+ // set_state
+ // Sets the state of the template. Used during BuildTree().
+ void set_state(TemplateState new_state);
+
+ // StripBuffer
+ // Modifies buffer in-place based on the strip_ mode, to remove
+ // extra whitespace. May delete[] the input buffer and replace
+ // it with a new buffer. Used by ReloadIfChanged().
+ void StripBuffer(char **buffer, size_t* len);
+
+ // The file we originally got from the Template() constructor
+ const std::string original_filename_;
+ // The pathname as fully resolved on the filesystem
+ std::string resolved_filename_;
+ time_t filename_mtime_; // lastmod time for filename last time we loaded it
+
+ // What to do with whitespace at template-expand time
+ Strip strip_;
+
+ // Keeps track of where we are in reloading, or if there was an error loading
+ TemplateState state_;
+
+ // The cache we got this template from. This is not well-defined: a
+ // Template can be in more than one cache.
+ // TODO(csilvers): remove this once we deprecate the one user, which
+ // is ReloadIfChanged.
+ TemplateCache* template_cache_;
+
+ // The current template-contents, as read from the file
+ const char* template_text_;
+ int template_text_len_;
+
+ // The current parsed template structure. Has pointers into template_text_.
+ class SectionTemplateNode *tree_; // defined in template.cc
+
+ // Template markers have the form {{VARIABLE}}, etc. These constants
+ // define the {{ and }} that delimit template markers.
+ struct CTEMPLATE_DLL_DECL MarkerDelimiters {
+ const char* start_marker;
+ size_t start_marker_len;
+ const char* end_marker;
+ size_t end_marker_len;
+
+ MarkerDelimiters() {
+ start_marker = "{{"; // The default start-marker
+ start_marker_len = strlen(start_marker);
+ end_marker = "}}";
+ end_marker_len = strlen(end_marker);
+ }
+ };
+
+ // The current parsing state. Used in BuildTree() and subroutines
+ struct CTEMPLATE_DLL_DECL ParseState {
+ const char* bufstart;
+ const char* bufend;
+ enum { PS_UNUSED, GETTING_TEXT, GETTING_NAME } phase;
+ MarkerDelimiters current_delimiters;
+ ParseState()
+ : bufstart(NULL), bufend(NULL), phase(PS_UNUSED), current_delimiters()
+ {}
+ };
+ ParseState parse_state_;
+
+ // All templates are initialized to TC_MANUAL (no Auto-Escape). Then,
+ // during template parsing (BuildTree()), if an AUTOESCAPE pragma is
+ // encountered, the context changes appropriately.
+ TemplateContext initial_context_;
+ // Non-null if the template was initialized in an Auto-Escape mode that
+ // requires a parser (currently TC_HTML, TC_CSS and TC_JS).
+ google_ctemplate_streamhtmlparser::HtmlParser *htmlparser_;
+
+ // A sorted list of trusted variable names, declared here because a unittest
+ // needs to verify that it is appropriately sorted (an unsorted array would
+ // lead to the binary search of this array failing).
+ static const char * const kSafeWhitelistedVariables[];
+ static const size_t kNumSafeWhitelistedVariables;
+
+ private:
+ friend class TemplateCache;
+ friend class TemplateCachePeer; // to access num_deletes_
+
+ // Internal implementation of Expand
+ bool ExpandWithDataAndCache(ExpandEmitter* output,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data,
+ const TemplateCache* cache) const;
+
+ // This is called for recursive expands, when we already hold template_lock.
+ bool ExpandLocked(ExpandEmitter* output,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data,
+ const TemplateCache* cache) const;
+
+ // Returns the lastmod time in mtime_
+ // For string-based templates, not backed by a file, this returns 0
+ time_t mtime() const;
+
+ // These are helper routines to StripFile. I would make them static
+ // inside template.cc, but they use the MarerDelimiters struct.
+ static bool ParseDelimiters(const char* text, size_t textlen,
+ MarkerDelimiters* delim);
+ static bool IsBlankOrOnlyHasOneRemovableMarker(const char** line, size_t* len,
+ const MarkerDelimiters& delim);
+ static size_t InsertLine(const char *line, size_t len, Strip strip,
+ const MarkerDelimiters& delim, char* buffer);
+
+ // This is only used by template_cache_test, via TemplateCachePeer.
+ static int num_deletes() { return num_deletes_; }
+
+ static int num_deletes_; // how many times the destructor has been called
+
+ // Can't invoke copy constructor or assignment operator
+ Template(const Template&);
+ void operator=(const Template &);
+};
+
+}
+
+
+#endif // CTEMPLATE_TEMPLATE_H_
diff --git a/src/windows/ctemplate/template_annotator.h b/src/windows/ctemplate/template_annotator.h
new file mode 100644
index 0000000..e7daf6d
--- /dev/null
+++ b/src/windows/ctemplate/template_annotator.h
@@ -0,0 +1,142 @@
+// Copyright (c) 2009, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+//
+// The template expansion system provides a set of hooks that allow for
+// the insertion of diagnostic content into the output stream for the use
+// by content developers and testers. For instance, the default text
+// annotation mode inserts strings bracketed by {{ }} to describe every
+// occurrence of a dynamic substitution feature. That mode turns the
+// rendering into a logical text description of the construction of
+// template-based output. It is useful for regression testing of output
+// in conjunction with text-based diffing tools.
+//
+// An annotation mode is implemented with the TemplateAnnotator interface.
+// When template annotation is turned on, then during template expansion
+// whenever a dynamic substitution feature is encountered, a call is made
+// to one of the TemplateAnnotator functions. In response to a call
+// an implementation can render any additional content into the passed
+// emitter, which is the same emitter that the rendering output is going
+// to.
+//
+// Template annotation is turned on and the template annotator subclass
+// set by methods in ctemplate::PerExpandData.
+
+#ifndef TEMPLATE_TEMPLATE_ANNOTATOR_H_
+#define TEMPLATE_TEMPLATE_ANNOTATOR_H_
+
+#include <string>
+
+// NOTE: if you are statically linking the template library into your binary
+// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
+// as a compiler flag in your project file to turn off the dllimports.
+#ifndef CTEMPLATE_DLL_DECL
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+#endif
+
+namespace ctemplate {
+
+class ExpandEmitter;
+
+// This is the abstract interface for an annotation mode. A new annotation
+// mode is introduced by subclassing and implementing each function
+// to add annotation content. There is one function for each internal
+// template expansion event type. The emitter argument passed to the
+// function is the same stream that the expanding content is being output to;
+// so the action of an implementation will be to add additional inline
+// content. The emitter argument is never to be remembered beyond each
+// function call.
+class CTEMPLATE_DLL_DECL TemplateAnnotator {
+ public:
+ TemplateAnnotator() { }
+ virtual ~TemplateAnnotator() { }
+
+ // Called before processing a subtemplate include marker.
+ // Passed value is the include marker name.
+ virtual void EmitOpenInclude(ExpandEmitter* emitter, const std::string& value) = 0;
+ // Called after processing a subtemplate include marker.
+ virtual void EmitCloseInclude(ExpandEmitter* emitter) = 0;
+
+ // Called before opening a template or subtemplate file for processing.
+ // Passed value is the filename.
+ virtual void EmitOpenFile(ExpandEmitter* emitter, const std::string& value) = 0;
+ // Called after processing a template or subtemplate file.
+ virtual void EmitCloseFile(ExpandEmitter* emitter) = 0;
+
+ // Called before processing a section.
+ // Passed value is the section name.
+ virtual void EmitOpenSection(ExpandEmitter* emitter, const std::string& value) = 0;
+ // Called after processing a section.
+ virtual void EmitCloseSection(ExpandEmitter* emitter) = 0;
+
+ // Called before processing a variable marker.
+ // Passed value is the variable name.
+ virtual void EmitOpenVariable(ExpandEmitter* emitter,
+ const std::string& value) = 0;
+ // Called after processing a variable marker.
+ virtual void EmitCloseVariable(ExpandEmitter* emitter) = 0;
+
+ virtual void EmitFileIsMissing(ExpandEmitter* emitter,
+ const std::string& value) = 0;
+
+ private:
+ // Can't invoke copy constructor or assignment operator
+ TemplateAnnotator(const TemplateAnnotator&);
+ void operator=(const TemplateAnnotator&);
+};
+
+// This is a concrete template annotator class that inserts annotations
+// that have a standard text form bracketed by {{ }}. It is used as
+// the default annotation implementation when annotation is turned on
+// by PerExpandData and no annotator type is specified.
+class CTEMPLATE_DLL_DECL TextTemplateAnnotator : public TemplateAnnotator {
+ public:
+ TextTemplateAnnotator() { }
+ virtual void EmitOpenInclude(ExpandEmitter* emitter, const std::string& value);
+ virtual void EmitCloseInclude(ExpandEmitter* emitter);
+ virtual void EmitOpenFile(ExpandEmitter* emitter, const std::string& value);
+ virtual void EmitCloseFile(ExpandEmitter* emitter);
+ virtual void EmitOpenSection(ExpandEmitter* emitter, const std::string& value);
+ virtual void EmitCloseSection(ExpandEmitter* emitter);
+ virtual void EmitOpenVariable(ExpandEmitter* emitter, const std::string& value);
+ virtual void EmitCloseVariable(ExpandEmitter* emitter);
+ virtual void EmitFileIsMissing(ExpandEmitter* emitter,
+ const std::string& value);
+
+ private:
+ // Can't invoke copy constructor or assignment operator
+ TextTemplateAnnotator(const TextTemplateAnnotator&);
+ void operator=(const TextTemplateAnnotator&);
+};
+
+}
+
+
+#endif // TEMPLATE_TEMPLATE_ANNOTATOR_H_
diff --git a/src/windows/ctemplate/template_cache.h b/src/windows/ctemplate/template_cache.h
new file mode 100644
index 0000000..2ee6b67
--- /dev/null
+++ b/src/windows/ctemplate/template_cache.h
@@ -0,0 +1,378 @@
+// Copyright (c) 2009, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+//
+// This file implements the Template Cache used to store templates.
+
+#ifndef TEMPLATE_TEMPLATE_CACHE_H_
+#define TEMPLATE_TEMPLATE_CACHE_H_
+
+#include <hash_map> // for stdext::hash_map<>
+#include <string> // for string
+#include <utility> // for pair
+#include <vector> // for vector<>
+#include <ctemplate/template_emitter.h> // for ExpandEmitter, etc
+#include <ctemplate/template_enums.h> // for Strip
+#include <ctemplate/template_string.h>
+#include <ctemplate/per_expand_data.h>
+namespace ctemplate {
+class FileStat;
+}
+class Mutex;
+class TemplateCacheUnittest;
+
+// NOTE: if you are statically linking the template library into your binary
+// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
+// as a compiler flag in your project file to turn off the dllimports.
+#ifndef CTEMPLATE_DLL_DECL
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+extern template class __declspec(dllimport) std::allocator<std::string>;
+extern template class __declspec(dllimport) std::vector<std::string>;
+#else
+template class __declspec(dllexport) std::allocator<std::string>;
+template class __declspec(dllexport) std::vector<std::string>;
+#endif
+
+namespace ctemplate {
+
+class PerExpandData;
+class Template;
+class TemplateCachePeer;
+class TemplateDictionaryInterface;
+
+// A cache to store parsed templates.
+class CTEMPLATE_DLL_DECL TemplateCache {
+ public:
+ TemplateCache();
+ ~TemplateCache();
+
+ // ---- CREATING A TEMPLATE OBJECT -------
+ // LoadTemplate
+ // StringToTemplateCache
+
+ // Attempts to load the template object stored under its filename,
+ // into the template cache. It first checks if the object is already
+ // in the cache. Any object retrieved from the cache is then
+ // checked to see if its status is marked for "reload if changed."
+ // If so, ReloadIfChanged is called on the retrieved object. Returns
+ // true if the object is loaded. Also returns true if the object
+ // already exists, and no reload was required.
+ //
+ // When it fails to retrieve one from the cache, it creates a new
+ // template object, passing the filename and 'strip' values to the
+ // constructor. (See constructor below for the meaning of the
+ // flags.) If it succeeds in creating an object, including loading
+ // and parsing the associated template file, the object is stored in
+ // the cache, and the method returns true.
+ //
+ // If it fails in loading and parsing the template file, either
+ // because the file was not found or it contained syntax errors,
+ // then the newly created object is deleted and the method returns
+ // false. (NOTE: This description is much longer and less precise
+ // and probably harder to understand than the method itself. Read
+ // the code.)
+ //
+ // To enable Auto-Escape on that template, place the corresponding
+ // AUTOESCAPE pragma at the top of the template file. The template
+ // will then be Auto-Escaped independently of the template it may be
+ // included from or the templates it may include.
+ //
+ // 'Strip' indicates how to handle whitespace when expanding the
+ // template. DO_NOT_STRIP keeps the template exactly as-is.
+ // STRIP_BLANK_LINES elides all blank lines in the template.
+ // STRIP_WHITESPACE elides all blank lines, and also all whitespace
+ // at either the beginning or end of a line. See template constructor
+ // for more details.
+ bool LoadTemplate(const TemplateString& filename, Strip strip);
+
+ // Parses the string as a template file (e.g. "Hello {{WORLD}}"),
+ // and inserts it into the parsed template cache, so it can later be
+ // used by the user. The user specifies a key and a strip, which are
+ // later passed in to expand the template.
+ // Returns true if the template was successfully parsed and
+ // inserted to the template cache, or false otherwise. In particular,
+ // we return false if a string was already cached with the given key.
+ // NOTE: to include this template from within another template (via
+ // "{{>TEMPLATE_THAT_COMES_FROM_A_STRING}}"), the argument you pass
+ // to TemplateDictionary::SetFilename() is the key you used to register
+ // the string-template.
+ bool StringToTemplateCache(const TemplateString& key,
+ const TemplateString& content,
+ Strip strip);
+ bool StringToTemplateCache(const TemplateString& key,
+ const char* content,
+ size_t content_len,
+ Strip strip) {
+ return StringToTemplateCache(key,
+ TemplateString(content, content_len),
+ strip);
+ }
+
+ // ---- EXPANDING A TEMPLATE -------
+ // ExpandWithData
+ // ExpandFrozen
+
+ // This returns false if the expand failed for some reason: filename
+ // could not be found on disk (and isn't already in the cache), or
+ // the template is mal-formed, or a sub-included template couldn't
+ // be found. Note that even if it returns false, it may have emitted
+ // some output to ExpandEmitter, before it noticed the problem.
+ bool ExpandWithData(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data,
+ ExpandEmitter* output);
+ bool ExpandWithData(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface* dictionary,
+ PerExpandData* per_expand_data,
+ std::string* output_buffer) {
+ if (output_buffer == NULL) return false;
+ StringEmitter e(output_buffer);
+ return ExpandWithData(filename, strip, dictionary, per_expand_data, &e);
+ }
+
+ // Const version of ExpandWithData, intended for use with frozen
+ // caches. This method returns false if the requested
+ // template-filename is not found in the cache, rather than fetching
+ // the template from disk and continuing, as ExpandWithData does.
+ // (That is why the method can be const.) Likewise, it will return
+ // false, rather than fetch, if any sub-included template filename
+ // is not found in the cache.
+ // Unfortunately, the only way to enforce this last requirement at
+ // the moment is to have the template-cache be Frozen(). So that
+ // is a pre-requisite for calling this method. It may be relaxed
+ // in the future (if we rewrite the control flow to pass around the
+ // necessary state).
+ // Like ExpandWithData(), this may write partial results into output
+ // even if it returns false (due to template error or file not found).
+ bool ExpandNoLoad(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data,
+ ExpandEmitter* output) const;
+ bool ExpandNoLoad(const TemplateString& filename, Strip strip,
+ const TemplateDictionaryInterface* dictionary,
+ PerExpandData* per_expand_data,
+ std::string* output_buffer) const {
+ if (output_buffer == NULL) return false;
+ StringEmitter e(output_buffer);
+ return ExpandNoLoad(filename, strip, dictionary, per_expand_data, &e);
+ }
+
+ // ---- FINDING A TEMPLATE FILE -------
+
+ // Sets the root directory for all templates used by the program.
+ // After calling this method, the filename passed to GetTemplate may
+ // be a relative pathname (no leading '/'), in which case this
+ // root-directory is prepended to the filename. This clears the old
+ // 'main' root directory, and also all alternate root directories
+ // that may had been added previously.
+ bool SetTemplateRootDirectory(const std::string& directory);
+
+ // Adds an additional search path for all templates used by the
+ // program. You may call this multiple times.
+ bool AddAlternateTemplateRootDirectory(const std::string& directory);
+
+ // Returns the 'main' root directory set by SetTemplateRootDirectory().
+ std::string template_root_directory() const;
+
+ // Given an unresolved filename, look through the template search
+ // path to see if the template can be found. If so, return the path
+ // of the resolved filename, otherwise return an empty string.
+ std::string FindTemplateFilename(const std::string& unresolved)
+ const;
+
+ // ---- MANAGING THE CACHE -------
+ // Freeze
+ // Delete
+ // ClearCache
+ // ReloadAllIfChanged
+ // Clone
+
+ // Marks the template cache as immutable. After this method is called,
+ // the cache can no longer be modified by loading new templates or
+ // reloading existing templates. During expansion only cached
+ // included templates will be used, they won't be loaded on-demand.
+ void Freeze();
+
+ // Delete
+ // Deletes one template object from the cache, if it exists.
+ // This can be used for either file- or string-based templates.
+ // Returns true if the object was deleted, false otherwise.
+ bool Delete(const TemplateString& key);
+
+ // ClearCache
+ // Deletes all the template objects in the cache and all raw
+ // contents cached from StringToTemplateCache. This should only
+ // be done once, just before exiting the program and after all
+ // template expansions are completed. (If you want to refresh the
+ // cache, the correct method to use is ReloadAllIfChanged, not
+ // this one.) Note: this method is not necessary unless you are
+ // testing for memory leaks. Calling this before exiting the
+ // program will prevent unnecessary reporting in that case.
+ void ClearCache();
+
+ // ReloadAllIfChanged
+ // If IMMEDIATE_RELOAD, reloads and parses all templates right away,
+ // if the corresponding template files have changed.
+ // If LAZY_RELOAD, then sets the reload bit on all templates.
+ // Subsequent call to GetTemplate() checks if file has changed, and if so
+ // reloads and parses the file into the cache.
+ //
+ // IMMEDIATE_RELOAD gives a more consistent snapshot of the current
+ // templates, since all templates in the cache are reloaded at
+ // (approximately) the same time. On the other hand, LAZY_RELOAD
+ // causes less of a latency spike, since it does not require
+ // loading lots of templates from disk at the same time. If in
+ // doubt, LAZY_RELOAD is probably a better choice.
+
+ // If a file with the same name as an existing template-file, is added
+ // in another search path, ReloadAllIfChanged will pick up the file in the
+ // earlier search-path.
+ enum ReloadType { LAZY_RELOAD, IMMEDIATE_RELOAD };
+ void ReloadAllIfChanged(ReloadType reload_tyle);
+
+ // Clone
+ // Returns a copy of the cache. It makes a shallow copy of the
+ // parsed_template_cache_, incrementing refcount of templates.
+ // The caller is responsible for deallocating the returned TemplateCache.
+ // NOTE(user): Annotalysis expects this method to have a lock for
+ // a TemplateCache instance local to the method, but we
+ // know that no other threads will have access to the
+ // instance, so ignore thread safety errors.
+ TemplateCache* Clone() const;
+
+ // ---- INSPECTING THE CACHE -------
+ // Dump
+ // DumpToString
+ // TODO(csilvers): implement these?
+
+ private:
+ // TODO(csilvers): nix Template friend once Template::ReloadIfChanged is gone
+ friend class Template; // for ResolveTemplateFilename
+ friend class TemplateTemplateNode; // for ExpandLocked
+ friend class TemplateCachePeer; // for unittests
+ friend class ::TemplateCacheUnittest; // for unittests
+
+ class RefcountedTemplate;
+ struct CachedTemplate;
+ class TemplateCacheHash;
+ class RefTplPtrHash;
+ // due to a bug(?) in MSVC, TemplateCachePeer won't compile unless this
+ // particular typedef is public. Ugh.
+ public:
+ typedef std::pair<TemplateId, int> TemplateCacheKey;
+ private:
+ typedef stdext::hash_map<TemplateCacheKey, CachedTemplate, TemplateCacheHash>
+ TemplateMap;
+ typedef stdext::hash_map<RefcountedTemplate*, int, RefTplPtrHash> TemplateCallMap;
+ // Where to search for files.
+ typedef std::vector<std::string> TemplateSearchPath;
+
+ // GetTemplate
+ // This method is deprecated. It exists here because it is called by
+ // Template::GetTemplate. Also this is used in tests.
+ const Template* GetTemplate(const TemplateString& key, Strip strip);
+
+ bool ResolveTemplateFilename(const std::string& unresolved,
+ std::string* resolved,
+ FileStat* statbuf) const;
+
+ // This is used only for internal (recursive) calls to Expand due
+ // to internal template-includes. It doesn't try to acquire the
+ // global template_lock again, in template.cc.
+ // TODO(csilvers): remove this when template.cc's g_template_lock goes away.
+ bool ExpandLocked(const TemplateString& filename, Strip strip,
+ ExpandEmitter* output,
+ const TemplateDictionaryInterface *dictionary,
+ PerExpandData* per_expand_data);
+
+ bool AddAlternateTemplateRootDirectoryHelper(
+ const std::string& directory,
+ bool clear_template_search_path);
+
+ // DoneWithGetTemplatePtrs
+ // For historical reasons, GetTemplate() returns a raw Template
+ // pointer rather than a refcounted pointer. So it's impossible
+ // for the user to call DecRef on the template when it's done
+ // using it. To make up for that, we provide this routine, which
+ // says "call DecRef()" on *all* Templates ever used by
+ // GetTemplate(). It's safe for the client to call this when it's
+ // done using all templates it's ever retrieved before (via
+ // GetTemplate). Most likely, the user will call this indirectly,
+ // via ClearCache().
+ // TODO(panicker): Consider making this method public.
+ void DoneWithGetTemplatePtrs();
+
+ // ValidTemplateFilename
+ // Validates the user provided filename before constructing the template
+ bool IsValidTemplateFilename(const std::string& filename,
+ std::string* resolved_filename,
+ FileStat* statbuf) const;
+
+ // GetTemplateLocked
+ // Internal version of GetTemplate. It's used when the function already
+ // has a write-lock on mutex_. It returns a pointer to a refcounted
+ // template (in the cache), or NULL if the template is not found.
+ // Its used by GetTemplate & ForceReloadAllIfChanged.
+ RefcountedTemplate* GetTemplateLocked(
+ const TemplateString& filename,
+ Strip strip,
+ const TemplateCacheKey& key);
+
+ // Refcount
+ // Testing only. Returns the refcount of a template, given its cache key.
+ int Refcount(const TemplateCacheKey template_cache_key) const;
+
+ // GetCachedTemplate
+ // Debug only. Returns whether the cache key is in the parsed cache.
+ bool TemplateIsCached(const TemplateCacheKey template_cache_key) const;
+
+ TemplateMap* parsed_template_cache_;
+ bool is_frozen_;
+ TemplateSearchPath search_path_;
+
+ // Since GetTemplate() returns a raw pointer, it's impossible for
+ // the caller to call DecRef() on the returned template when it's
+ // done using it. To make up for that, we store each retval of
+ // GetTemplate in this data structure. Then the user can call
+ // DecRef() on all of them at once, via a DoneWithGetTemplatePtrs()
+ // (which they will probably get at via a call to ClearCache()).
+ TemplateCallMap* get_template_calls_;
+
+ Mutex* const mutex_;
+ Mutex* const search_path_mutex_;
+
+ // Can't invoke copy constructor or assignment operator
+ TemplateCache(const TemplateCache&);
+ void operator=(const TemplateCache &);
+};
+
+}
+
+#endif // TEMPLATE_TEMPLATE_CACHE_H_
diff --git a/src/windows/ctemplate/template_dictionary.h b/src/windows/ctemplate/template_dictionary.h
new file mode 100644
index 0000000..c1f3869
--- /dev/null
+++ b/src/windows/ctemplate/template_dictionary.h
@@ -0,0 +1,464 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// Based on the 'old' TemplateDictionary by Frank Jernigan.
+//
+// A template dictionary maps names (as found in template files)
+// to their values. There are three types of names:
+// variables: value is a string.
+// sections: value is a list of sub-dicts to use when expanding the section;
+// the section is expanded once per sub-dict.
+// template-include: value is a list of pairs: name of the template file
+// to include, and the sub-dict to use when expanding it.
+// TemplateDictionary has routines for setting these values.
+//
+// For (many) more details, see the doc/ directory.
+
+#ifndef TEMPLATE_TEMPLATE_DICTIONARY_H_
+#define TEMPLATE_TEMPLATE_DICTIONARY_H_
+
+#include <stdarg.h> // for StringAppendV()
+#include <stddef.h> // for size_t and ptrdiff_t
+#include <stdlib.h> // for NULL
+#include <sys/types.h>
+#include <functional> // for less<>
+#include <map>
+#include <string>
+#include <vector>
+
+#include <ctemplate/str_ref.h>
+#include <ctemplate/template_dictionary_interface.h>
+#include <ctemplate/template_modifiers.h>
+#include <ctemplate/template_string.h>
+
+// NOTE: if you are statically linking the template library into your binary
+// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
+// as a compiler flag in your project file to turn off the dllimports.
+#ifndef CTEMPLATE_DLL_DECL
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+#endif
+
+namespace ctemplate {
+template <class T, class C> class ArenaAllocator;
+class UnsafeArena;
+template<typename A, int B, typename C, typename D> class small_map;
+template<typename NormalMap> class small_map_default_init; // in small_map.h
+}
+
+namespace ctemplate {
+
+
+class CTEMPLATE_DLL_DECL TemplateDictionary : public TemplateDictionaryInterface {
+ public:
+ // name is used only for debugging.
+ // arena is used to store all names and values. It can be NULL (the
+ // default), in which case we create own own arena.
+ explicit TemplateDictionary(const TemplateString& name,
+ UnsafeArena* arena=NULL);
+ ~TemplateDictionary();
+
+ // If you want to be explicit, you can use NO_ARENA as a synonym to NULL.
+ static UnsafeArena* const NO_ARENA;
+
+ std::string name() const {
+ return std::string(name_.data(), name_.size());
+ }
+
+ // Returns a recursive copy of this dictionary. This dictionary
+ // *must* be a "top-level" dictionary (that is, not created via
+ // AddSectionDictionary() or AddIncludeDictionary()). Caller owns
+ // the resulting dict, and must delete it. If arena is NULL, we
+ // create our own. Returns NULL if the copy fails (probably because
+ // the "top-level" rule was violated).
+ TemplateDictionary* MakeCopy(const TemplateString& name_of_copy,
+ UnsafeArena* arena=NULL);
+
+ // --- Routines for VARIABLES
+ // These are the five main routines used to set the value of a variable.
+ // As always, wherever you see TemplateString, you can also pass in
+ // either a char* or a C++ string, or a TemplateString(s, slen).
+
+ void SetValue(const TemplateString variable, const TemplateString value);
+ void SetIntValue(const TemplateString variable, long value);
+ void SetFormattedValue(const TemplateString variable, const char* format, ...)
+#if 0
+ __attribute__((__format__ (__printf__, 3, 4)))
+#endif
+ ; // starts at 3 because of implicit 1st arg 'this'
+
+ class SetProxy {
+ public:
+ SetProxy(TemplateDictionary& dict, const TemplateString& variable) :
+ dict_(dict),
+ variable_(variable) {
+ }
+
+ void operator=(str_ref value) {
+ dict_.SetValue(variable_, TemplateString(value.data(), value.size()));
+ }
+
+ void operator=(long value) {
+ dict_.SetIntValue(variable_, value);
+ }
+
+ private:
+ TemplateDictionary& dict_;
+ const TemplateString& variable_;
+ };
+
+ SetProxy operator[](const TemplateString& variable) {
+ return SetProxy(*this, variable);
+ }
+
+ // We also let you set values in the 'global' dictionary which is
+ // referenced when all other dictionaries fail. Note this is a
+ // static method: no TemplateDictionary instance needed. Since
+ // this routine is rarely used, we don't provide variants.
+ static void SetGlobalValue(const TemplateString variable,
+ const TemplateString value);
+
+ // This is used for a value that you want to be 'global', but only
+ // in the scope of a given template, including all its sections and
+ // all its sub-included dictionaries. The main difference between
+ // SetTemplateGlobalValue() and SetValue(), is that
+ // SetTemplateGlobalValue() values persist across template-includes.
+ // This is intended for session-global data; since that should be
+ // fairly rare, we don't provide variants.
+ void SetTemplateGlobalValue(const TemplateString variable,
+ const TemplateString value);
+
+ // Similar SetTemplateGlobalValue above, this method shows a section in this
+ // template, all its sections, and all its template-includes. This is intended
+ // for session-global data, for example allowing you to show variant portions
+ // of your template for certain browsers/languages without having to call
+ // ShowSection on each template you use.
+ void ShowTemplateGlobalSection(const TemplateString variable);
+
+ // These routines are like SetValue and SetTemplateGlobalValue, but
+ // they do not make a copy of the input data. THE CALLER IS
+ // RESPONSIBLE FOR ENSURING THE PASSED-IN STRINGS LIVE FOR AT LEAST
+ // AS LONG AS THIS DICTIONARY! In general, they yield a quite minor
+ // performance increase for significant increased code fragility,
+ // so do not use them unless you really need the speed improvements.
+ void SetValueWithoutCopy(const TemplateString variable,
+ const TemplateString value);
+ void SetTemplateGlobalValueWithoutCopy(const TemplateString variable,
+ const TemplateString value);
+
+
+ // --- Routines for SECTIONS
+ // We show a section once per dictionary that is added with its name.
+ // Recall that lookups are hierarchical: if a section tried to look
+ // up a variable in its sub-dictionary and fails, it will look next
+ // in its parent dictionary (us). So it's perfectly appropriate to
+ // keep the sub-dictionary empty: that will show the section once,
+ // and take all var definitions from us. ShowSection() is a
+ // convenience routine that does exactly that.
+
+ // Creates an empty dictionary whose parent is us, and returns it.
+ // As always, wherever you see TemplateString, you can also pass in
+ // either a char* or a C++ string, or a TemplateString(s, slen).
+ TemplateDictionary* AddSectionDictionary(const TemplateString section_name);
+ void ShowSection(const TemplateString section_name);
+
+ // A convenience method. Often a single variable is surrounded by
+ // some HTML that should not be printed if the variable has no
+ // value. The way to do this is to put that html in a section.
+ // This method makes it so the section is shown exactly once, with a
+ // dictionary that maps the variable to the proper value. If the
+ // value is "", on the other hand, this method does nothing, so the
+ // section remains hidden.
+ void SetValueAndShowSection(const TemplateString variable,
+ const TemplateString value,
+ const TemplateString section_name);
+
+
+ // --- Routines for TEMPLATE-INCLUDES
+ // Included templates are treated like sections, but they require
+ // the name of the include-file to go along with each dictionary.
+
+ TemplateDictionary* AddIncludeDictionary(const TemplateString variable);
+
+ // This is required for include-templates; it specifies what template
+ // to include. But feel free to call this on any dictionary, to
+ // document what template-file the dictionary is intended to go with.
+ void SetFilename(const TemplateString filename);
+
+ // --- DEBUGGING TOOLS
+
+ // Logs the contents of a dictionary and its sub-dictionaries.
+ // Dump goes to stdout/stderr, while DumpToString goes to the given string.
+ // 'indent' is how much to indent each line of the output.
+ void Dump(int indent=0) const;
+ virtual void DumpToString(std::string* out, int indent=0) const;
+
+
+ // --- DEPRECATED ESCAPING FUNCTIONALITY
+
+ // Escaping in the binary has been deprecated in favor of using modifiers
+ // to do the escaping in the template:
+ // "...{{MYVAR:html_escape}}..."
+ void SetEscapedValue(const TemplateString variable, const TemplateString value,
+ const TemplateModifier& escfn);
+ void SetEscapedFormattedValue(const TemplateString variable,
+ const TemplateModifier& escfn,
+ const char* format, ...)
+#if 0
+ __attribute__((__format__ (__printf__, 4, 5)))
+#endif
+ ; // starts at 4 because of implicit 1st arg 'this'
+ void SetEscapedValueAndShowSection(const TemplateString variable,
+ const TemplateString value,
+ const TemplateModifier& escfn,
+ const TemplateString section_name);
+
+
+ private:
+ friend class SectionTemplateNode; // for access to GetSectionValue(), etc.
+ friend class TemplateTemplateNode; // for access to GetSectionValue(), etc.
+ friend class VariableTemplateNode; // for access to GetSectionValue(), etc.
+ // For unittesting code using a TemplateDictionary.
+ friend class TemplateDictionaryPeer;
+
+ class DictionaryPrinter; // nested class
+ friend class DictionaryPrinter;
+
+ // We need this functor to tell small_map how to create a map<> when
+ // it decides to do so: we want it to create that map on the arena.
+ class map_arena_init;
+
+ typedef std::vector<TemplateDictionary*,
+ ArenaAllocator<TemplateDictionary*, UnsafeArena> >
+ DictVector;
+ // The '4' here is the size where small_map switches from vector<> to map<>.
+ typedef small_map<std::map<TemplateId, TemplateString, std::less<TemplateId>,
+ ArenaAllocator<std::pair<const TemplateId, TemplateString>,
+ UnsafeArena> >,
+ 4, std::equal_to<TemplateId>, map_arena_init>
+ VariableDict;
+ typedef small_map<std::map<TemplateId, DictVector*, std::less<TemplateId>,
+ ArenaAllocator<std::pair<const TemplateId, DictVector*>,
+ UnsafeArena> >,
+ 4, std::equal_to<TemplateId>, map_arena_init>
+ SectionDict;
+ typedef small_map<std::map<TemplateId, DictVector*, std::less<TemplateId>,
+ ArenaAllocator<std::pair<const TemplateId, DictVector*>,
+ UnsafeArena> >,
+ 4, std::equal_to<TemplateId>, map_arena_init>
+ IncludeDict;
+ // This is used only for global_dict_, which is just like a VariableDict
+ // but does not bother with an arena (since this memory lives forever).
+ typedef small_map<std::map<TemplateId, TemplateString, std::less<TemplateId> >,
+ 4, std::equal_to<TemplateId>,
+ small_map_default_init<
+ std::map<TemplateId, TemplateString,
+ std::less<TemplateId> > > >
+ GlobalDict;
+
+
+ // These are helper functions to allocate the parts of the dictionary
+ // on the arena.
+ template<typename T> inline void LazilyCreateDict(T** dict);
+ inline void LazyCreateTemplateGlobalDict();
+ inline DictVector* CreateDictVector();
+ inline TemplateDictionary* CreateTemplateSubdict(
+ const TemplateString& name,
+ UnsafeArena* arena,
+ TemplateDictionary* parent_dict,
+ TemplateDictionary* template_global_dict_owner);
+
+ // This is a helper function to insert <key,value> into m.
+ // Normally, we'd just use m[key] = value, but map rules
+ // require default constructor to be public for that to compile, and
+ // for some types we'd rather not allow that. HashInsert also inserts
+ // the key into an id(key)->key map, to allow for id-lookups later.
+ template<typename MapType, typename ValueType>
+ static void HashInsert(MapType* m, TemplateString key, ValueType value);
+
+ // Constructor created for all children dictionaries. This includes
+ // both a pointer to the parent dictionary and also the the
+ // template-global dictionary from which all children (both
+ // IncludeDictionary and SectionDictionary) inherit. Values are
+ // filled into global_template_dict via SetTemplateGlobalValue.
+ explicit TemplateDictionary(const TemplateString& name,
+ class UnsafeArena* arena,
+ TemplateDictionary* parent_dict,
+ TemplateDictionary* template_global_dict_owner);
+
+ // Helps set up the static stuff. Must be called exactly once before
+ // accessing global_dict_. GoogleOnceInit() is used to manage that
+ // initialization in a thread-safe way.
+ static void SetupGlobalDict();
+
+ // Utility functions for copying a string into the arena.
+ // Memdup also copies in a trailing NUL, which is why we have the
+ // trailing-NUL check in the TemplateString version of Memdup.
+ TemplateString Memdup(const char* s, size_t slen);
+ TemplateString Memdup(const TemplateString& s) {
+ if (s.is_immutable() && s.data()[s.size()] == '\0') {
+ return s;
+ }
+ return Memdup(s.data(), s.size());
+ }
+
+ // Used for recursive MakeCopy calls.
+ TemplateDictionary* InternalMakeCopy(
+ const TemplateString& name_of_copy,
+ UnsafeArena* arena,
+ TemplateDictionary* parent_dict,
+ TemplateDictionary* template_global_dict_owner);
+
+ // A helper for creating section and include dicts.
+ static std::string CreateSubdictName(
+ const TemplateString& dict_name, const TemplateString& sub_name,
+ size_t index, const char* suffix);
+
+ // Must be called whenever we add a value to one of the dictionaries above,
+ // to ensure that we can reconstruct the id -> string mapping.
+ static void AddToIdToNameMap(TemplateId id, const TemplateString& str);
+
+ // Used to do the formatting for the SetFormatted*() functions
+ static int StringAppendV(char* space, char** out,
+ const char* format, va_list ap);
+
+ // How Template::Expand() and its children access the template-dictionary.
+ // These fill the API required by TemplateDictionaryInterface.
+ virtual TemplateString GetValue(const TemplateString& variable) const;
+ virtual bool IsHiddenSection(const TemplateString& name) const;
+ virtual bool IsUnhiddenSection(const TemplateString& name) const {
+ return !IsHiddenSection(name);
+ }
+ virtual bool IsHiddenTemplate(const TemplateString& name) const;
+ virtual const char* GetIncludeTemplateName(
+ const TemplateString& variable, int dictnum) const;
+
+ // Determine whether there's anything set in this dictionary
+ bool Empty() const;
+
+ // This is needed by DictionaryPrinter because it's not a friend
+ // of TemplateString, but we are
+ static std::string PrintableTemplateString(
+ const TemplateString& ts) {
+ return std::string(ts.data(), ts.size());
+ }
+ static bool InvalidTemplateString(const TemplateString& ts) {
+ return ts.data() == NULL;
+ }
+ // Compilers differ about whether nested classes inherit our friendship.
+ // The only thing DictionaryPrinter needs is IdToString, so just re-export.
+ static TemplateString IdToString(TemplateId id) { // for DictionaryPrinter
+ return TemplateString::IdToString(id);
+ }
+
+ // CreateTemplateIterator
+ // This is SectionIterator exactly, just with a different name to
+ // self-document the fact the value applies to a template include.
+ // Caller frees return value.
+ virtual TemplateDictionaryInterface::Iterator* CreateTemplateIterator(
+ const TemplateString& section_name) const;
+
+ // CreateSectionIterator
+ // Factory method implementation that constructs a iterator representing the
+ // set of dictionaries associated with a section name, if any. This
+ // implementation checks the local dictionary itself, not the template-wide
+ // dictionary or the global dictionary.
+ // Caller frees return value.
+ virtual TemplateDictionaryInterface::Iterator* CreateSectionIterator(
+ const TemplateString& section_name) const;
+
+ // TemplateDictionary-specific implementation of dictionary iterators.
+ template <typename T> // T is *TemplateDictionary::const_iterator
+ class Iterator : public TemplateDictionaryInterface::Iterator {
+ protected:
+ friend class TemplateDictionary;
+ Iterator(T begin, T end) : begin_(begin), end_(end) { }
+ public:
+ virtual ~Iterator() { }
+ virtual bool HasNext() const;
+ virtual const TemplateDictionaryInterface& Next();
+ private:
+ T begin_;
+ const T end_;
+ };
+
+ // A small helper factory function for Iterator
+ template <typename T>
+ static Iterator<typename T::const_iterator>* MakeIterator(const T& dv) {
+ return new Iterator<typename T::const_iterator>(dv.begin(), dv.end());
+ }
+
+
+ // The "name" of the dictionary for debugging output (Dump, etc.)
+ // The arena, also set at construction time.
+ class UnsafeArena* const arena_;
+ bool should_delete_arena_; // only true if we 'new arena' in constructor
+ TemplateString name_; // points into the arena, or to static memory
+
+ // The three dictionaries that I own -- for vars, sections, and template-incs
+ VariableDict* variable_dict_;
+ SectionDict* section_dict_;
+ IncludeDict* include_dict_;
+
+
+ // The template_global_dict is consulted if a lookup in the variable, section,
+ // or include dicts named above fails. It forms a convenient place to store
+ // session-specific data that's applicable to all templates in the dictionary
+ // tree.
+ // For the parent-template, template_global_dict_ is not NULL, and
+ // template_global_dict_owner_ is this. For all of its children,
+ // template_global_dict_ is NULL, and template_global_dict_owner_ points to
+ // the root parent-template (the one with the non-NULL template_global_dict_).
+ TemplateDictionary* template_global_dict_;
+ TemplateDictionary* template_global_dict_owner_;
+
+ // My parent dictionary, used when variable lookups at this level fail.
+ // Note this is only for *variables* and *sections*, not templates.
+ TemplateDictionary* parent_dict_;
+ // The static, global dictionary, at the top of the parent-dictionary chain
+ static GlobalDict* global_dict_;
+ static TemplateString* empty_string_; // what is returned on lookup misses
+
+ // The filename associated with this dictionary. If set, this declares
+ // what template the dictionary is supposed to be expanded with. Required
+ // for template-includes, optional (but useful) for 'normal' dicts.
+ const char* filename_;
+
+ private:
+ // Can't invoke copy constructor or assignment operator
+ TemplateDictionary(const TemplateDictionary&);
+ void operator=(const TemplateDictionary&);
+};
+
+}
+
+
+#endif // TEMPLATE_TEMPLATE_DICTIONARY_H_
diff --git a/src/windows/ctemplate/template_dictionary_interface.h b/src/windows/ctemplate/template_dictionary_interface.h
new file mode 100644
index 0000000..03bae41
--- /dev/null
+++ b/src/windows/ctemplate/template_dictionary_interface.h
@@ -0,0 +1,149 @@
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: williasr@google.com (Scott Williams)
+//
+// This file implements the TemplateDictionaryInterface class. This interface
+// forms the root of the TemplateDictionary class tree, but the interface is
+// minimal enough to allow other sources of template data. Note that the
+// TemplateDictionaryInterface class enumerates the properties expected by
+// Template: it doesn't constrain how data gets into the
+// TemplateDictionaryInterface class to begin with. For these methods, see
+// TemplateDictionary.
+//
+
+#ifndef TEMPLATE_TEMPLATE_DICTIONARY_INTERFACE_H_
+#define TEMPLATE_TEMPLATE_DICTIONARY_INTERFACE_H_
+
+#include <stdlib.h>
+#include <string>
+#include <ctemplate/template_string.h>
+
+// NOTE: if you are statically linking the template library into your binary
+// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
+// as a compiler flag in your project file to turn off the dllimports.
+#ifndef CTEMPLATE_DLL_DECL
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+#endif
+
+namespace ctemplate {
+
+const int kIndent = 2; // num spaces to indent each level -- used with dump
+
+// TemplateDictionaryInterface
+// The template data contains the associated values for
+// variables, the hidden/visible state for sections and included
+// templates, the associated set of dictionaries for sections and
+// included templates, and the template filenames to be expanded in
+// place of template-include nodes.
+class CTEMPLATE_DLL_DECL TemplateDictionaryInterface {
+ public:
+ // TemplateDictionaryInterface destructor
+ virtual ~TemplateDictionaryInterface() {}
+
+ protected:
+ // The interface as follows is used at expand-time by Expand.
+ friend class VariableTemplateNode;
+ friend class SectionTemplateNode;
+ friend class TemplateTemplateNode;
+ // This class reaches into our internals for testing.
+ friend class TemplateDictionaryPeer;
+ friend class TemplateDictionaryPeerIterator;
+
+ // GetSectionValue
+ // Returns the value of a variable.
+ virtual TemplateString GetValue(const TemplateString& variable) const = 0;
+
+ // IsHiddenSection
+ // A predicate to indicate the current hidden/visible state of a section
+ // whose name is passed to it.
+ virtual bool IsHiddenSection(const TemplateString& name) const = 0;
+
+ // Dump a string representation of this dictionary to the supplied string.
+ virtual void DumpToString(std::string* out, int level) const = 0;
+
+ // TemplateDictionaryInterface is an abstract class, so its constructor is
+ // only visible to its subclasses.
+ TemplateDictionaryInterface() {}
+
+ class Iterator {
+ protected:
+ Iterator() { }
+ public:
+ virtual ~Iterator() { }
+
+ // Returns false if the iterator is exhausted.
+ virtual bool HasNext() const = 0;
+
+ // Returns the current referent and increments the iterator to the next.
+ virtual const TemplateDictionaryInterface& Next() = 0;
+ };
+
+ // IsHiddenTemplate
+ // Returns true if the template include is hidden. This is analogous to
+ // IsHiddenSection, but for template nodes.
+ virtual bool IsHiddenTemplate(const TemplateString& name) const = 0;
+
+ // GetIncludeTemplateName
+ // Returns the name of the template associated with the given template
+ // include variable. If more than one dictionary is attached to the include
+ // symbol, dictnum can be used to disambiguate which include name you mean.
+ virtual const char* GetIncludeTemplateName(
+ const TemplateString& variable, int dictnum) const = 0;
+
+ // CreateTemplateIterator
+ // A factory method for constructing an iterator representing the
+ // subdictionaries of the given include node. The caller is
+ // responsible for deleting the return value when it's done with it.
+ virtual Iterator* CreateTemplateIterator(
+ const TemplateString& section) const = 0;
+
+ // CreateSectionIterator
+ // A factory method for constructing an iterator representing the
+ // subdictionaries of the given section node. The caller is
+ // responsible for deleting the return value when it's done with it.
+ virtual Iterator* CreateSectionIterator(
+ const TemplateString& section) const = 0;
+
+ // IsUnhiddenSection
+ // Returns true if the section has been marked visible and false otherwise.
+ virtual bool IsUnhiddenSection(
+ const TemplateString& name) const = 0;
+
+ private:
+ // Disallow copy and assign.
+ TemplateDictionaryInterface(const TemplateDictionaryInterface&);
+ void operator=(const TemplateDictionaryInterface&);
+};
+
+}
+
+
+#endif // TEMPLATE_TEMPLATE_DICTIONARY_INTERFACE_H_
diff --git a/src/windows/ctemplate/template_emitter.h b/src/windows/ctemplate/template_emitter.h
new file mode 100644
index 0000000..58d038a
--- /dev/null
+++ b/src/windows/ctemplate/template_emitter.h
@@ -0,0 +1,76 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// When we expand a template, we expand into an abstract "emitter".
+// This is typically a string, but could be a file-wrapper, or any
+// other data structure that supports this very simple "append" API.
+
+#ifndef TEMPLATE_TEMPLATE_EMITTER_H_
+#define TEMPLATE_TEMPLATE_EMITTER_H_
+
+#include <sys/types.h> // for size_t
+#include <string>
+
+// NOTE: if you are statically linking the template library into your binary
+// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
+// as a compiler flag in your project file to turn off the dllimports.
+#ifndef CTEMPLATE_DLL_DECL
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+#endif
+
+namespace ctemplate {
+
+class CTEMPLATE_DLL_DECL ExpandEmitter {
+ public:
+ ExpandEmitter() {}
+ virtual ~ExpandEmitter() {}
+ virtual void Emit(char c) = 0;
+ virtual void Emit(const std::string& s) = 0;
+ virtual void Emit(const char* s) = 0;
+ virtual void Emit(const char* s, size_t slen) = 0;
+};
+
+
+class CTEMPLATE_DLL_DECL StringEmitter : public ExpandEmitter {
+ std::string* const outbuf_;
+ public:
+ StringEmitter(std::string* outbuf) : outbuf_(outbuf) {}
+ virtual void Emit(char c) { *outbuf_ += c; }
+ virtual void Emit(const std::string& s) { *outbuf_ += s; }
+ virtual void Emit(const char* s) { *outbuf_ += s; }
+ virtual void Emit(const char* s, size_t slen) { outbuf_->append(s, slen); }
+};
+
+}
+
+
+#endif // TEMPLATE_TEMPLATE_EMITTER_H_
diff --git a/src/windows/ctemplate/template_enums.h b/src/windows/ctemplate/template_enums.h
new file mode 100644
index 0000000..a240e75
--- /dev/null
+++ b/src/windows/ctemplate/template_enums.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+//
+// Alas that we can't forward-declare enums! These are the ones
+// used by multiple files
+
+#ifndef TEMPLATE_TEMPLATE_ENUMS_H_
+#define TEMPLATE_TEMPLATE_ENUMS_H_
+
+namespace ctemplate {
+
+// Enums for GetTemplate flag values
+enum Strip { DO_NOT_STRIP, STRIP_BLANK_LINES, STRIP_WHITESPACE,
+ NUM_STRIPS }; // sentinel value
+
+}
+
+
+#endif // TEMPLATE_TEMPLATE_ENUMS_H_
diff --git a/src/windows/ctemplate/template_modifiers.h b/src/windows/ctemplate/template_modifiers.h
new file mode 100644
index 0000000..b981a60
--- /dev/null
+++ b/src/windows/ctemplate/template_modifiers.h
@@ -0,0 +1,357 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+//
+// We allow template variables to have modifiers, each possibly with a
+// value associated with it. Format is
+// {{VARNAME:modname[=modifier-value]:modname[=modifier-value]:...}}
+// Modname refers to a functor that takes the variable's value
+// and modifier-value (empty-string if no modifier-value was
+// specified), and returns a munged value. Modifiers are applied
+// left-to-right. We define the legal modnames here, and the
+// functors they refer to.
+//
+// Modifiers have a long-name, an optional short-name (one char;
+// may be \0 if you don't want a shortname), and a functor that's
+// applied to the variable.
+//
+// In addition to the list of modifiers hard-coded in the source code
+// here, it is possible to dynamicly register modifiers using a long
+// name starting with "x-". If you wish to define your own modifier
+// class, in your own source code, just subclass TemplateModifier --
+// see template_modifiers.cc for details of how to do that.
+//
+// Adding a new built-in modifier, to this file, takes several steps,
+// both in this .h file and in the corresponding .cc file:
+// 1) .h file: Define a struct for the modifier. It must subclass
+// TemplateModifier.
+// 2) .h file: declare a variable that's an instance of the struct.
+// This is used for people who want to modify the string themselves,
+// via TemplateDictionary::SetEscapedValue.
+// 5) .cc file: define the new modifier's Modify method.
+// 6) .cc file: give storage for the variable declared in the .h file (in 2).
+// 7) .cc file: add the modifier to the g_modifiers array.
+
+#ifndef TEMPLATE_TEMPLATE_MODIFIERS_H_
+#define TEMPLATE_TEMPLATE_MODIFIERS_H_
+
+#include <sys/types.h> // for size_t
+#include <string>
+#include <ctemplate/template_emitter.h> // so we can inline operator()
+#include <ctemplate/per_expand_data.h> // could probably just forward-declare
+
+// NOTE: if you are statically linking the template library into your binary
+// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
+// as a compiler flag in your project file to turn off the dllimports.
+#ifndef CTEMPLATE_DLL_DECL
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+#endif
+
+namespace ctemplate {
+
+class Template;
+
+#define MODIFY_SIGNATURE_ \
+ public: \
+ virtual void Modify(const char* in, size_t inlen, \
+ const PerExpandData*, ExpandEmitter* outbuf, \
+ const std::string& arg) const
+
+// If you wish to write your own modifier, it should subclass this
+// method. Your subclass should only define Modify(); for efficiency,
+// we do not make operator() virtual.
+class CTEMPLATE_DLL_DECL TemplateModifier {
+ public:
+ // This function takes a string as input, a char*/size_t pair, and
+ // appends the modified version to the end of outbuf. In addition
+ // to the variable-value to modify (specified via in/inlen), each
+ // Modify passes in two pieces of user-supplied data:
+ // 1) arg: this is the modifier-value, for modifiers that take a
+ // value (e.g. "{{VAR:modifier=value}}"). This value
+ // comes from the template file. For modifiers that take
+ // no modval argument, arg will always be "". For modifiers
+ // that do take such an argument, arg will always start with "=".
+ // 2) per_expand_data: this is a set of data that the application can
+ // associate with a TemplateDictionary, and is passed in to
+ // every variable expanded using that dictionary. This value
+ // comes from the source code.
+ virtual void Modify(const char* in, size_t inlen,
+ const PerExpandData* per_expand_data,
+ ExpandEmitter* outbuf,
+ const std::string& arg) const = 0;
+
+ // This function can be used to speed up modification. If Modify()
+ // is often a noop, you can implement MightModify() to indicate
+ // situations where it's safe to avoid the call to Modify(), because
+ // Modify() won't do any modifications in this case. Note it's
+ // always safe to return true here; you should just return false if
+ // you're certain Modify() can be ignored. This function is
+ // advisory; the template system is not required to call
+ // MightModify() before Modify().
+ virtual bool MightModify(const PerExpandData* /*per_expand_data*/,
+ const std::string& /*arg*/) const {
+ return true;
+ }
+
+ // We support both modifiers that take an argument, and those that don't.
+ // We also support passing in a string, or a char*/int pair.
+ std::string operator()(const char* in, size_t inlen, const std::string& arg="") const {
+ std::string out;
+ // we'll reserve some space to account for minimal escaping: say 12%
+ out.reserve(inlen + inlen/8 + 16);
+ StringEmitter outbuf(&out);
+ Modify(in, inlen, NULL, &outbuf, arg);
+ return out;
+ }
+ std::string operator()(const std::string& in, const std::string& arg="") const {
+ return operator()(in.data(), in.size(), arg);
+ }
+
+ virtual ~TemplateModifier(); // always need a virtual destructor!
+};
+
+
+// Returns the input verbatim (for testing)
+class CTEMPLATE_DLL_DECL NullModifier : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern CTEMPLATE_DLL_DECL NullModifier null_modifier;
+
+// Escapes < > " ' & <non-space whitespace> to < > "
+// ' & <space>
+class CTEMPLATE_DLL_DECL HtmlEscape : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern CTEMPLATE_DLL_DECL HtmlEscape html_escape;
+
+// Same as HtmlEscape but leaves all whitespace alone. Eg. for <pre>..</pre>
+class CTEMPLATE_DLL_DECL PreEscape : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern CTEMPLATE_DLL_DECL PreEscape pre_escape;
+
+// Like HtmlEscape but allows HTML entities, <br> tags, <wbr> tags,
+// matched <b> and </b> tags, matched <i> and </i> tags, matched <em> and </em>
+// tags, and matched <span dir=(rtl|ltr)> tags.
+class CTEMPLATE_DLL_DECL SnippetEscape : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern CTEMPLATE_DLL_DECL SnippetEscape snippet_escape;
+
+// Replaces characters not safe for an unquoted attribute with underscore.
+// Safe characters are alphanumeric, underscore, dash, period, and colon.
+// The equal sign is also considered safe unless it is at the start
+// or end of the input in which case it is replaced with underscore.
+//
+// We added the equal sign to the safe characters to allow this modifier
+// to be used on attribute name/value pairs in HTML tags such as
+// <div {{CLASS:H=attribute}}>
+// where CLASS is expanded to "class=bla".
+//
+// Note: The equal sign is replaced when found at either boundaries of the
+// string due to the concern it may be lead to XSS under some special
+// circumstances: Say, if this string is the value of an attribute in an
+// HTML tag and ends with an equal sign, a browser may possibly end up
+// interpreting the next token as the value of this string rather than
+// a new attribute (esoteric).
+class CTEMPLATE_DLL_DECL CleanseAttribute : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern CTEMPLATE_DLL_DECL CleanseAttribute cleanse_attribute;
+
+// Removes characters not safe for a CSS value. Safe characters are
+// alphanumeric, space, underscore, period, coma, exclamation mark,
+// pound, percent, and dash.
+class CTEMPLATE_DLL_DECL CleanseCss : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern CTEMPLATE_DLL_DECL CleanseCss cleanse_css;
+
+// Checks that a url is either an absolute http(s) URL or a relative
+// url that doesn't have a protocol hidden in it (ie [foo.html] is
+// fine, but not [javascript:foo]) and then performs another type of
+// escaping. Returns the url escaped with the specified modifier if
+// good, otherwise returns a safe replacement URL.
+// This is normally "#", but for <img> tags, it is not safe to set
+// the src attribute to "#". This is because this causes some browsers
+// to reload the page, which can cause a DoS.
+class CTEMPLATE_DLL_DECL ValidateUrl : public TemplateModifier {
+ public:
+ explicit ValidateUrl(const TemplateModifier& chained_modifier,
+ const char* unsafe_url_replacement)
+ : chained_modifier_(chained_modifier),
+ unsafe_url_replacement_(unsafe_url_replacement),
+ unsafe_url_replacement_length_(strlen(unsafe_url_replacement)) { }
+ MODIFY_SIGNATURE_;
+ static const char* const kUnsafeUrlReplacement;
+ static const char* const kUnsafeImgSrcUrlReplacement;
+ private:
+ const TemplateModifier& chained_modifier_;
+ const char* unsafe_url_replacement_;
+ int unsafe_url_replacement_length_;
+};
+extern CTEMPLATE_DLL_DECL ValidateUrl validate_url_and_html_escape;
+extern CTEMPLATE_DLL_DECL ValidateUrl validate_url_and_javascript_escape;
+extern CTEMPLATE_DLL_DECL ValidateUrl validate_url_and_css_escape;
+extern CTEMPLATE_DLL_DECL ValidateUrl validate_img_src_url_and_html_escape;
+extern CTEMPLATE_DLL_DECL ValidateUrl validate_img_src_url_and_javascript_escape;
+extern CTEMPLATE_DLL_DECL ValidateUrl validate_img_src_url_and_css_escape;
+
+// Escapes < > & " ' to < > & " ' (same as in HtmlEscape).
+// If you use it within a CDATA section, you may be escaping more characters
+// than strictly necessary. If this turns out to be an issue, we will need
+// to add a variant just for CDATA.
+class CTEMPLATE_DLL_DECL XmlEscape : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern CTEMPLATE_DLL_DECL XmlEscape xml_escape;
+
+// Escapes characters that cannot appear unescaped in a javascript string
+// assuming UTF-8 encoded input.
+// This does NOT escape all characters that cannot appear unescaped in a
+// javascript regular expression literal.
+class CTEMPLATE_DLL_DECL JavascriptEscape : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern CTEMPLATE_DLL_DECL JavascriptEscape javascript_escape;
+
+// Checks that the input is a valid javascript non-string literal
+// meaning a boolean (true, false) or a numeric value (decimal, hex or octal).
+// If valid, we output the input as is, otherwise we output null instead.
+// Input of zero length is considered valid and nothing is output.
+//
+// The emphasis is on safety against injection of javascript code rather
+// than perfect validation, as such it is possible for non-valid literals to
+// pass through.
+//
+// You would use this modifier for javascript variables that are not
+// enclosed in quotes such as:
+// <script>var a = {{VALUE}};</script> OR
+// <a href="url" onclick="doSubmit({{ID}})">
+// For variables that are quoted (i.e. string literals) use javascript_escape.
+//
+// Limitations:
+// . NaN, +/-Infinity and null are not recognized.
+// . Output is not guaranteed to be a valid literal,
+// e.g: +55+-e34 will output as is.
+// e.g: trueeee will output nothing as it is not a valid boolean.
+//
+// Details:
+// . For Hex numbers, it checks for case-insensitive 0x[0-9A-F]+
+// that should be a proper check.
+// . For other numbers, it checks for case-insensitive [0-9eE+-.]*
+// so can also accept invalid numbers such as the number 5..45--10.
+// . "true" and "false" (without quotes) are also accepted and that's it.
+//
+class CTEMPLATE_DLL_DECL JavascriptNumber : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern CTEMPLATE_DLL_DECL JavascriptNumber javascript_number;
+
+// Escapes characters not in [0-9a-zA-Z.,_:*/~!()-] as %-prefixed hex.
+// Space is encoded as a +.
+class CTEMPLATE_DLL_DECL UrlQueryEscape : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern CTEMPLATE_DLL_DECL UrlQueryEscape url_query_escape;
+
+// Escapes " \ / <FF> <CR> <LF> <BS> <TAB> to \" \\ \/ \f \r \n \b \t
+// Also escapes < > & to their corresponding \uXXXX representation
+// (\u003C, \u003E, \u0026 respectively).
+class CTEMPLATE_DLL_DECL JsonEscape : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern CTEMPLATE_DLL_DECL JsonEscape json_escape;
+
+// Inserts the given prefix (given as the argument to this modifier)
+// after every newline in the text. Note that it does *not* insert
+// prefix at the very beginning of the text -- in its expected use,
+// that prefix will already be present before this text, in the
+// template. This is meant to be used internally, and is not exported
+// via the g_modifiers list.
+class CTEMPLATE_DLL_DECL PrefixLine : public TemplateModifier {
+ MODIFY_SIGNATURE_;
+};
+extern CTEMPLATE_DLL_DECL PrefixLine prefix_line;
+
+
+#undef MODIFY_SIGNATURE_
+
+
+// Registers a new template modifier.
+// long_name must start with "x-".
+// If the modifier takes a value (eg "{{VAR:x-name=value}}"), then
+// long_name should end with "=". This is similar to getopt(3) syntax.
+// We also allow value-specializations, with specific values specified
+// as part of long-name. For instance:
+// AddModifier("x-mod=", &my_modifierA);
+// AddModifier("x-mod=bar", &my_modifierB);
+// AddModifier("x-mod2", &my_modifierC);
+// For the template
+// {{VAR1:x-mod=foo}} {{VAR2:x-mod=bar}} {{VAR3:x-mod=baz}} {{VAR4:x-mod2}}
+// VAR1 and VAR3 would get modified by my_modifierA, VAR2 by my_modifierB,
+// and VAR4 by my_modifierC. The order of the AddModifier calls is not
+// significant.
+extern CTEMPLATE_DLL_DECL
+bool AddModifier(const char* long_name, const TemplateModifier* modifier);
+
+// Same as AddModifier() above except that the modifier is considered
+// to produce safe output that can be inserted in any context without
+// the need for additional escaping. This difference only impacts
+// the Auto-Escape mode: In that mode, when a variable (or template-include)
+// has a modifier added via AddXssSafeModifier(), it is excluded from
+// further escaping, effectively treated as though it had the :none modifier.
+// Because Auto-Escape is disabled for any variable and template-include
+// that includes such a modifier, use this function with care and ensure
+// that it may not emit harmful output that could lead to XSS.
+//
+// Some valid uses of AddXssSafeModifier:
+// . A modifier that converts a string to an integer since
+// an integer is generally safe in any context.
+// . A modifier that returns one of a fixed number of safe values
+// depending on properties of the input.
+//
+// Some not recommended uses of AddXssSafeModifier:
+// . A modifier that applies some extra formatting to the input
+// before returning it since the output will still contain
+// harmful content if the input does.
+// . A modifier that applies one type of escaping to the input
+// (say HTML-escape). This may be dangerous when the modifier
+// is used in a different context (say Javascript) where this
+// escaping may be inadequate.
+extern CTEMPLATE_DLL_DECL
+bool AddXssSafeModifier(const char* long_name,
+ const TemplateModifier* modifier);
+
+}
+
+
+#endif // TEMPLATE_TEMPLATE_MODIFIERS_H_
diff --git a/src/windows/ctemplate/template_namelist.h b/src/windows/ctemplate/template_namelist.h
new file mode 100644
index 0000000..26bd03a
--- /dev/null
+++ b/src/windows/ctemplate/template_namelist.h
@@ -0,0 +1,169 @@
+// Copyright (c) 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+//
+// This class implements some template helper classes, that manage
+// template files and make it easier to monitor them.
+//
+// For information about how to use these classes and macros, and to
+// write the templates it takes as input,
+// see the doc/ directory.
+
+#ifndef TEMPLATE_TEMPLATE_NAMELIST_H_
+#define TEMPLATE_TEMPLATE_NAMELIST_H_
+
+#include <time.h> // for time_t
+#include <hash_set>
+#include <string>
+#include <vector>
+#include <ctemplate/template_enums.h> // for Strip
+#include <ctemplate/template_string.h> // for StringHash
+
+// NOTE: if you are statically linking the template library into your binary
+// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
+// as a compiler flag in your project file to turn off the dllimports.
+#ifndef CTEMPLATE_DLL_DECL
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+#endif
+
+namespace ctemplate {
+
+// RegisterTemplateFilename
+// Used to define a reference variable for the name of a template file. E.g:
+// RegisterTemplateFilename(EXAMPLE_FN, "example.tpl");
+// This should be used rather than the seemingly equivalent:
+// #define EXAMPLE_FN "example.tpl"
+// As in the latter, any call to GetTemplate may then reference the name
+// via the first argument. In the example, that would be:
+// Template::GetTemplate(EXAMPLE_FN, DO_NOT_STRIP);
+// By using this macro, rather than the #define, all templates can
+// later be tested for either existence or correct syntax after program
+// start-up.
+// TODO (we wish): Make this macro produce the #include for the auto-generated
+// header files, when and if the macro pre-processor supports that
+#define RegisterTemplateFilename(var, name) \
+ const char* const var = ctemplate::TemplateNamelist::RegisterTemplate(name);
+
+// Class: TemplateNamelist
+// Each time this class is instantiated, the name passed to
+// the constructor is added to the class's static list of names. The
+// entire list may be retrieved later by calling the GetList accessor
+// method. Or they may all be checked for existence or for correct
+// syntax via the other methods. We use this in our
+// sanity-checking code to make sure all the templates used by a program
+// exist and are syntactically correct.
+
+class CTEMPLATE_DLL_DECL TemplateNamelist {
+ friend class TemporaryRegisterTemplate;
+ public:
+ // These types should be taken as 'generic' containers. The only
+ // thing you should do with them is call size() and/or iterate
+ // between begin() and end(), and the only operations we promise
+ // the iterators will support are operator* and operator++.
+ typedef stdext::hash_set<std::string, StringHash> NameListType;
+ typedef std::vector<std::string> MissingListType;
+ typedef std::vector<std::string> SyntaxListType;
+
+ public:
+ // Takes a name and pushes it onto the static namelist
+ // Returns: a pointer to the entry in namelist which holds the name
+ static const char* RegisterTemplate(const char* name);
+
+ // GetList
+ // Description: Returns the collected list of names.
+ static const NameListType& GetList();
+
+ // GetMissingList
+ // If refresh is true or if it is the first time the function is called
+ // in the execution of the program, it creates (or clears) the missing
+ // list and then fills it with the list of
+ // templates that the program knows about but are missing from
+ // the template directory.
+ // If refresh is false and it is not the first time the function is
+ // called, it merely returns the list created in the
+ // call when the last refresh was done.
+ // NOTE: The templates are NOT read, parsed, or cached
+ // by this function.
+ static const MissingListType& GetMissingList(bool refresh);
+
+ // GetBadSyntaxList
+ // If refresh is true or if it is the first time the function is called
+ // in the execution of the program, it creates (or clears) the "bad
+ // syntax" list and then fills it with the list of
+ // templates that the program knows about but contain syntax errors.
+ // A missing file is not considered a syntax error, and thus is
+ // not included in this list.
+ // If refresh is false and it is not the first time the function is
+ // called, it merely returns the list created in the
+ // call when the last refresh was done.
+ // NOTE: The side effect of calling this the first time or
+ // with refresh equal true is that all templates are parsed and cached.
+ // Hence they need to be retrieved with the flags that
+ // the program needs them loaded with (i.e, the strip parameter
+ // passed to Template::GetTemplate.)
+ static const SyntaxListType& GetBadSyntaxList(bool refresh, Strip strip);
+
+ // GetLastmodTime
+ // Iterates through all non-missing templates, and returns the latest
+ // last-modification time for the template files, as returned by stat().
+ // This can be used to make sure template files are getting refreshed.
+ static time_t GetLastmodTime();
+
+ // AllDoExist
+ // Retrieves the missing list (always refreshing the list)
+ // and returns true if it contains any names.
+ // Else, returns false.
+ static bool AllDoExist();
+
+ // IsAllSyntaxOkay
+ // Retrieves the "bad syntax" list (always refreshing the list)
+ // and returns true if it contains any names.
+ // Else, returns false.
+ // NOTE: The side effect of calling this is that all templates are parsed
+ // and cached, hence they need to be retrieved with the flags that
+ // the program needs them loaded with. (I.e, the strip parameter
+ // ultimately passed to Template::GetTemplate.)
+ static bool IsAllSyntaxOkay(Strip strip);
+
+ protected:
+ // The static list of names
+ static NameListType *namelist_;
+ static MissingListType *missing_list_;
+ static SyntaxListType *bad_syntax_list_;
+
+ private:
+ TemplateNamelist(const TemplateNamelist&); // disallow copying
+ void operator=(const TemplateNamelist&);
+};
+
+}
+
+
+#endif // TEMPLATE_TEMPLATE_NAMELIST_H_
diff --git a/src/windows/ctemplate/template_pathops.h b/src/windows/ctemplate/template_pathops.h
new file mode 100644
index 0000000..79f7bf7
--- /dev/null
+++ b/src/windows/ctemplate/template_pathops.h
@@ -0,0 +1,78 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: csilvers@google.com (Craig Silverstein)
+
+#ifndef TEMPLATE_TEMPLATE_PATHOPS_H_
+#define TEMPLATE_TEMPLATE_PATHOPS_H_
+
+#include <string>
+
+// NOTE: if you are statically linking the template library into your binary
+// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
+// as a compiler flag in your project file to turn off the dllimports.
+#ifndef CTEMPLATE_DLL_DECL
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+#endif
+
+namespace ctemplate {
+
+extern CTEMPLATE_DLL_DECL
+const char kCWD[]; // a string that's equivalent to "./"
+extern CTEMPLATE_DLL_DECL
+const char kRootdir[]; // a string that's equivalent to "/"
+
+extern CTEMPLATE_DLL_DECL
+std::string PathJoin(const std::string& a, const std::string& b);
+
+extern CTEMPLATE_DLL_DECL
+bool IsAbspath(const std::string& path);
+
+extern CTEMPLATE_DLL_DECL
+bool IsDirectory(const std::string& path); // checks if path ends with /
+
+extern CTEMPLATE_DLL_DECL
+void NormalizeDirectory(std::string* dir); // adds trailing / if needed
+
+extern CTEMPLATE_DLL_DECL
+std::string Basename(const std::string& path); // everything after last /
+
+// Returns true iff text contains the word as a full word, i.e. delimited by one
+// of [.,_-#*?:] on both the sides.
+// This is used while loading a template, to check that the file's name matches
+// the auto-escape mode specified by it.
+// NOTE: This assumes that the word doesn't contain any of the delimiter
+// characters.
+extern CTEMPLATE_DLL_DECL
+bool ContainsFullWord(const std::string& text, const std::string& word);
+
+}
+
+#endif // TEMPLATE_TEMPLATE_PATHOPS_H_
diff --git a/src/windows/ctemplate/template_string.h b/src/windows/ctemplate/template_string.h
new file mode 100644
index 0000000..f1bd38b
--- /dev/null
+++ b/src/windows/ctemplate/template_string.h
@@ -0,0 +1,363 @@
+// Copyright (c) 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// ---
+// Author: csilvers@google.com (Craig Silerstein)
+
+#ifndef TEMPLATE_TEMPLATE_STRING_H_
+#define TEMPLATE_TEMPLATE_STRING_H_
+
+#include <string.h> // for memcmp() and size_t
+#include <hash_map>
+#include <string>
+#include <vector>
+
+#include <assert.h>
+#if 0
+#include <stdint.h> // one place @ac_cv_unit64@ might live
+#endif
+#if 0
+#include <inttypes.h> // another place @ac_cv_unit64@ might live
+#endif
+#include <sys/types.h> // final place @ac_cv_unit64@ might live
+
+class TemplateStringTest; // needed for friendship declaration
+class StaticTemplateStringTest;
+
+#if 0
+extern char _start[] __attribute__((weak)); // linker emits: start of .text
+extern char data_start[] __attribute__((weak)); // start of .data
+#endif
+
+// NOTE: if you are statically linking the template library into your binary
+// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
+// as a compiler flag in your project file to turn off the dllimports.
+#ifndef CTEMPLATE_DLL_DECL
+# define CTEMPLATE_DLL_DECL __declspec(dllimport)
+#endif
+
+namespace ctemplate {
+
+// Most methods of TemplateDictionary take a TemplateString rather than a
+// C++ string. This is for efficiency: it can avoid extra string copies.
+// For any argument that takes a TemplateString, you can pass in any of:
+// * A C++ string
+// * A char*
+// * A StringPiece
+// * TemplateString(char*, length)
+// The last of these is the most efficient, though it requires more work
+// on the call site (you have to create the TemplateString explicitly).
+class TemplateString;
+
+// If you have a string constant (e.g. the string literal "foo") that
+// you need to pass into template routines repeatedly, it is more
+// efficient if you convert it into a TemplateString only once. The
+// way to do this is to use a global StaticTemplateString via STS_INIT
+// (note: do this at global scope *only*!):
+// static const StaticTemplateString kMyVar = STS_INIT(kMyVar, "MY_VALUE");
+struct StaticTemplateString;
+
+#define STS_INIT(name, str) STS_INIT_WITH_HASH(name, str, 0)
+
+// Let's define a convenient hash_compare function for hashing 'normal'
+// strings: char* and string. We'll use MurmurHash, which is probably
+// better than the STL default. We don't include TemplateString or
+// StaticTemplateString here, since they are hashed more efficiently
+// based on their id.
+struct CTEMPLATE_DLL_DECL StringHash {
+ inline size_t operator()(const char* s) const {
+ return Hash(s, strlen(s));
+ };
+
+ inline size_t operator()(const std::string& s) const {
+ return Hash(s.data(), s.size());
+ }
+
+ inline bool operator()(const char* a, const char* b) const {
+ return (a != b) && (strcmp(a, b) < 0); // <, for MSVC
+ }
+
+ inline bool operator()(const std::string& a, const std::string& b) const {
+ return a < b;
+ }
+
+ static const size_t bucket_size = 4; // These are required by MSVC
+ static const size_t min_buckets = 8; // 4 and 8 are the defaults
+ private:
+ size_t Hash(const char* s, size_t slen) const;
+};
+
+// ----------------------- THE CLASSES -------------------------------
+
+typedef unsigned __int64 TemplateId;
+
+const TemplateId kIllegalTemplateId = 0;
+
+struct CTEMPLATE_DLL_DECL StaticTemplateString {
+ // Do not define a constructor! We use only brace-initialization,
+ // so the data is constructed at static-initialization time.
+ // Anything you want to put in a constructor, put in
+ // StaticTemplateStringInitializer instead.
+
+ // These members shouldn't be accessed directly, except in the
+ // internals of the template code. They are public because that is
+ // the only way we can brace-initialize them. NOTE: MSVC (at least
+ // up to 8.0) has a bug where it ignores 'mutable' when it's buried
+ // in an internal struct. To fix that, we have to make this whole
+ // internal struct mutable. We only do this on MSVC, so on other
+ // compilers we get the full constness we want.
+#ifdef _MSC_VER
+ mutable
+#endif
+ struct {
+ const char* ptr_;
+ size_t length_;
+ mutable TemplateId id_; // sometimes lazily-initialized.
+ } do_not_use_directly_;
+
+ // This class is a good hash_compare functor to pass in as the third
+ // argument to stdext::hash_map<>, when creating a map whose keys are
+ // StaticTemplateString. NOTE: This class isn't that safe to use,
+ // because it requires that StaticTemplateStringInitializer has done
+ // its job. Unfortunately, even when you use the STS_INIT macro
+ // (which is always, right??), dynamic initialiation does not happen
+ // in a particular order, and objects in different .cc files may
+ // reference a StaticTemplateString before the corresponding
+ // StaticTemplateStringInitializer sets the id.
+ struct Hasher {
+ inline size_t operator()(const StaticTemplateString& sts) const;
+ inline bool operator()(const StaticTemplateString& a, // <, for MSVC
+ const StaticTemplateString& b) const;
+ static const size_t bucket_size = 4; // These are required by MSVC
+ static const size_t min_buckets = 8; // 4 and 8 are the defaults
+ };
+
+ inline bool empty() const {
+ return do_not_use_directly_.length_ == 0;
+ }
+
+ // Allows comparisons of StaticTemplateString objects as if they were
+ // strings. This is useful for STL.
+ inline bool operator==(const StaticTemplateString& x) const;
+};
+
+class CTEMPLATE_DLL_DECL TemplateString {
+ public:
+ TemplateString(const char* s)
+ : ptr_(s ? s : ""), length_(strlen(ptr_)),
+ is_immutable_(InTextSegment(ptr_)), id_(kIllegalTemplateId) {
+ }
+ TemplateString(const std::string& s)
+ : ptr_(s.data()), length_(s.size()),
+ is_immutable_(false), id_(kIllegalTemplateId) {
+ }
+ TemplateString(const char* s, size_t slen)
+ : ptr_(s), length_(slen),
+ is_immutable_(InTextSegment(s)), id_(kIllegalTemplateId) {
+ }
+ TemplateString(const StaticTemplateString& s)
+ : ptr_(s.do_not_use_directly_.ptr_),
+ length_(s.do_not_use_directly_.length_),
+ is_immutable_(true), id_(s.do_not_use_directly_.id_) {
+ }
+
+ const char* begin() const {
+ return ptr_;
+ }
+
+ const char* end() const {
+ return ptr_ + length_;
+ }
+
+ const char* data() const {
+ return ptr_;
+ }
+
+ size_t size() const {
+ return length_;
+ }
+
+ inline bool empty() const {
+ return length_ == 0;
+ };
+
+ inline bool is_immutable() const {
+ return is_immutable_;
+ }
+
+ // STL requires this to be public for hash_map, though I'd rather not.
+ inline bool operator==(const TemplateString& x) const {
+ return GetGlobalId() == x.GetGlobalId();
+ }
+
+ private:
+ // Only TemplateDictionaries and template expansion code can read these.
+ friend class TemplateDictionary;
+ friend class TemplateCache; // for GetGlobalId
+ friend class StaticTemplateStringInitializer; // for AddToGlo...
+ friend struct TemplateStringHasher; // for GetGlobalId
+ friend TemplateId GlobalIdForTest(const char* ptr, int len);
+ friend TemplateId GlobalIdForSTS_INIT(const TemplateString& s);
+
+ TemplateString(const char* s, size_t slen, bool is_immutable, TemplateId id)
+ : ptr_(s), length_(slen), is_immutable_(is_immutable), id_(id) {
+ }
+
+ // This returns true if s is in the .text segment of the binary.
+ // (Note this only checks .text of the main executable, not of
+ // shared libraries. So it may not be all that useful.)
+ // This requires the gnu linker (and probably elf), to define
+ // _start and data_start.
+ static bool InTextSegment(const char* s) {
+#if 0
+ return (s >= _start && s < data_start); // in .text
+#else
+ return false; // the conservative choice: assume it's not static memory
+#endif
+ }
+
+ protected:
+ inline void CacheGlobalId() { // used by HashedTemplateString
+ id_ = GetGlobalId();
+ };
+
+ private:
+ // Returns the global id, computing it for the first time if
+ // necessary. Note that since this is a const method, we don't
+ // store the computed value in id_, even if id_ is 0.
+ TemplateId GetGlobalId() const;
+ // Adds this TemplateString to the map from global-id to name.
+ void AddToGlobalIdToNameMap();
+
+ // Use sparingly. Converting to a string loses information about the
+ // id of the template string, making operations require extra hash_compare
+ // computations.
+ std::string ToString() const { return std::string(ptr_, length_); }
+
+ // Does the reverse map from TemplateId to TemplateString contents.
+ // Returns a TemplateString(kStsEmpty) if id isn't found. Note that
+ // the TemplateString returned is not necessarily NUL terminated.
+ static TemplateString IdToString(TemplateId id);
+
+ const char* ptr_;
+ size_t length_;
+ // Do we need to manage memory for this string?
+ bool is_immutable_;
+ // Id for hash_compare lookups. If 0, we don't have one and it should be
+ // computed as-needed.
+ TemplateId id_;
+};
+
+// ----------------------- THE CODE -------------------------------
+
+// Use the low-bit from TemplateId as the "initialized" flag. Note
+// that since all initialized TemplateId have the lower bit set, it's
+// safe to have used 0 for kIllegalTemplateId, as we did above.
+const TemplateId kTemplateStringInitializedFlag = 1;
+
+inline bool IsTemplateIdInitialized(TemplateId id) {
+ return id & kTemplateStringInitializedFlag;
+}
+
+// This is a helper struct used in TemplateString::Hasher/TemplateStringHasher
+struct TemplateIdHasher {
+ size_t operator()(TemplateId id) const {
+ // The shift has two effects: it randomizes the "initialized" flag,
+ // and slightly improves the randomness of the low bits. This is
+ // slightly useful when size_t is 32 bits, or when using a small
+ // hash_compare tables with power-of-2 sizes.
+ return static_cast<size_t>(id ^ (id >> 33));
+ }
+ bool operator()(TemplateId a, TemplateId b) const { // <, for MSVC
+ return a < b;
+ }
+ static const size_t bucket_size = 4; // These are required by MSVC
+ static const size_t min_buckets = 8; // 4 and 8 are the defaults
+};
+
+
+inline size_t StaticTemplateString::Hasher::operator()(
+ const StaticTemplateString& sts) const {
+ TemplateId id = sts.do_not_use_directly_.id_;
+ assert(IsTemplateIdInitialized(id));
+ return TemplateIdHasher()(id);
+}
+
+inline bool StaticTemplateString::Hasher::operator()(
+ const StaticTemplateString& a, const StaticTemplateString& b) const {
+ TemplateId id_a = a.do_not_use_directly_.id_;
+ TemplateId id_b = b.do_not_use_directly_.id_;
+ assert(IsTemplateIdInitialized(id_a));
+ assert(IsTemplateIdInitialized(id_b));
+ return TemplateIdHasher()(id_a, id_b);
+}
+
+inline bool StaticTemplateString::operator==(
+ const StaticTemplateString& x) const {
+ return (do_not_use_directly_.length_ == x.do_not_use_directly_.length_ &&
+ (do_not_use_directly_.ptr_ == x.do_not_use_directly_.ptr_ ||
+ memcmp(do_not_use_directly_.ptr_, x.do_not_use_directly_.ptr_,
+ do_not_use_directly_.length_) == 0));
+}
+
+// We set up as much of StaticTemplateString as we can at
+// static-initialization time (using brace-initialization), but some
+// things can't be set up then. This class is for those things; it
+// runs at dynamic-initialization time. If you add logic here, only
+// do so as an optimization: this may be called rather late (though
+// before main), so other code should not depend on this being called
+// before them.
+class CTEMPLATE_DLL_DECL StaticTemplateStringInitializer {
+ public:
+ // This constructor operates on a const StaticTemplateString - we should
+ // only change those things that are mutable.
+ explicit StaticTemplateStringInitializer(const StaticTemplateString* sts);
+};
+
+// Don't use this. This is used only in auto-generated .varnames.h files.
+#define STS_INIT_WITH_HASH(name, str, hash_compare) \
+ { { str, sizeof(""str"")-1, hash_compare } }; \
+ namespace ctemplate_sts_init { \
+ static const ctemplate::StaticTemplateStringInitializer name##_init(&name); \
+ }
+
+// We computed this hash_compare value for the empty string online. In debug
+// mode, we verify it's correct during runtime (that is, that we
+// verify the hash_compare function used by make_tpl_varnames_h hasn't changed
+// since we computed this number). Note this struct is logically
+// static, but since it's in a .h file, we don't say 'static' but
+// instead rely on the linker to provide the POD-with-internal-linkage
+// magic.
+const StaticTemplateString kStsEmpty =
+ STS_INIT_WITH_HASH(kStsEmpty, "", 1457976849674613049ULL);
+
+}
+
+
+#endif // TEMPLATE_TEMPLATE_STRING_H_
diff --git a/src/windows/port.cc b/src/windows/port.cc
new file mode 100644
index 0000000..73bb580
--- /dev/null
+++ b/src/windows/port.cc
@@ -0,0 +1,117 @@
+/* Copyright (c) 2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Craig Silverstein
+ */
+
+#include "windows/config.h"
+#ifndef _WIN32
+# error You should only be including windows/port.cc in a windows environment!
+#endif
+
+#include <stdarg.h> // for va_list, va_start, va_end
+#include <string.h> // for strstr()
+#include <assert.h>
+#include "port.h"
+
+// These call the windows _vsnprintf, but always NUL-terminate.
+#if !defined(__MINGW32__) && !defined(__MINGW64__) /* mingw already defines */
+int safe_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
+ if (size == 0) // not even room for a \0?
+ return -1; // not what C99 says to do, but what windows does
+ str[size-1] = '\0';
+ return _vsnprintf(str, size-1, format, ap);
+}
+
+int snprintf(char *str, size_t size, const char *format, ...) {
+ int r;
+ va_list ap;
+ va_start(ap, format);
+ r = vsnprintf(str, size, format, ap);
+ va_end(ap);
+ return r;
+}
+#endif /* #if !defined(__MINGW32__) && !defined(__MINGW64__) */
+
+#ifdef __cplusplus
+#include <string>
+#include <vector>
+#include <ctemplate/template_pathops.h>
+
+using std::string;
+using std::vector;
+
+namespace ctemplate {
+
+// defined (for unix) in template_test_utils.cc
+string TmpFile(const char* basename) {
+ char tmppath_buffer[1024];
+ int tmppath_len = GetTempPathA(sizeof(tmppath_buffer), tmppath_buffer);
+ if (tmppath_len <= 0 || tmppath_len >= sizeof(tmppath_buffer)) {
+ return basename; // an error, so just bail on tmppath
+ }
+ assert(tmppath_buffer[tmppath_len - 1] == '\\'); // API guarantees it
+ return string(tmppath_buffer) + basename;
+}
+
+// A replacement for template_unittest.cc:CleanTestDir()
+void CreateOrCleanTestDir(const string& dirname) {
+ string glob(PathJoin(dirname, "*"));
+ WIN32_FIND_DATAA found; // that final A is for Ansi (as opposed to Unicode)
+ HANDLE hFind = FindFirstFileA(glob.c_str(), &found); // A is for Ansi
+ if (hFind == INVALID_HANDLE_VALUE) { // directory doesn't exist or some such
+ _mkdir(dirname.c_str());
+ hFind = FindFirstFileA(glob.c_str(), &found); // Try again...
+ }
+ if (hFind != INVALID_HANDLE_VALUE) {
+ do {
+ if (strstr(found.cFileName, "template"))
+ _unlink(PathJoin(dirname, found.cFileName).c_str());
+ } while (FindNextFileA(hFind, &found) != FALSE); // A is for Ansi
+ FindClose(hFind);
+ }
+}
+
+}
+
+void GetNamelist(const char* testdata_dir, vector<string>* namelist) {
+ string glob(GOOGLE_NAMESPACE::PathJoin(testdata_dir,
+ "template_unittest_test*"));
+ WIN32_FIND_DATAA found; // that final A is for Ansi (as opposed to Unicode)
+ HANDLE hFind = FindFirstFileA(glob.c_str(), &found);
+ if (hFind == INVALID_HANDLE_VALUE) // no files matching the glob, probably
+ return; // if we don't find any files, nothing to add to namelist
+ do {
+ namelist->push_back(found.cFileName);
+ } while (FindNextFileA(hFind, &found) != FALSE); // A is for Ansi
+ FindClose(hFind);
+}
+
+#endif /* __cplusplus */
diff --git a/src/windows/port.h b/src/windows/port.h
new file mode 100644
index 0000000..7edc6f0
--- /dev/null
+++ b/src/windows/port.h
@@ -0,0 +1,140 @@
+/* Copyright (c) 2007, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Craig Silverstein
+ *
+ * These are some portability typedefs and defines to make it a bit
+ * easier to compile this code under VC++.
+ *
+ * Several of these are taken from glib:
+ * http://developer.gnome.org/doc/API/glib/glib-windows-compatability-functions.html
+ */
+
+#ifndef CTEMPLATE_WINDOWS_PORT_H_
+#define CTEMPLATE_WINDOWS_PORT_H_
+
+#include "windows/config.h"
+#ifdef _WIN32
+
+#define USING_PORT_CC
+
+#define WIN32_LEAN_AND_MEAN /* We always want minimal includes */
+#include <windows.h>
+#include <io.h> /* because we so often use open/close/etc */
+#include <direct.h> /* for _getcwd() */
+#include <sys/utime.h> /* for _utime() */
+#include <stdio.h> /* read in vsnprintf decl. before redifining it */
+#include <stdarg.h> /* template_dictionary.cc uses va_copy */
+#include <string.h> /* for _strnicmp */
+/* Note: the C++ #includes are all together at the bottom. This file is
+ * used by both C and C++ code, so we put all the C++ together.
+ */
+
+/* 4244: otherwise we get problems when substracting two size_t's to an int
+ * 4251: it's complaining about a private struct I've chosen not to dllexport
+ * 4355: we use this in a constructor, but we do it safely
+ * 4715: for some reason VC++ stopped realizing you can't return after abort()
+ * 4800: we know we're casting ints/char*'s to bools, and we're ok with that
+ * 4996: Yes, we're ok using "unsafe" functions like fopen() and strerror()
+ */
+#ifdef _MSC_VER
+#pragma warning(disable:4244 4251 4355 4715 4800 4996)
+#endif
+
+/* file I/O */
+#define PATH_MAX 1024
+#define access _access
+#define getcwd _getcwd
+#define open _open
+#define read _read
+#define write _write
+#define lseek _lseek
+#define close _close
+#define popen _popen
+#define pclose _pclose
+#ifndef R_OK /* mingw defines this, for instance */
+#define R_OK 04 /* read-only (for access()) */
+#endif
+#define S_ISDIR(m) (((m) & _S_IFMT) == _S_IFDIR)
+
+#define utime _utime
+#define utimbuf _utimbuf
+
+/* Not quite as lightweight as a hard-link, but more than good enough for us. */
+#define link(oldpath, newpath) (!CopyFileA(oldpath, newpath, false))
+
+#define strcasecmp _stricmp
+#define strncasecmp _strnicmp
+
+/* Sleep is in ms, on windows */
+#define sleep(secs) Sleep((secs) * 1000)
+
+/* We can't just use _vsnprintf and _snprintf as drop-in-replacements,
+ * because they don't always NUL-terminate. :-( We also can't use the
+ * name vsnprintf, since windows defines that (but not snprintf (!)).
+ */
+#if !defined(__MINGW32__) && !defined(__MINGW64__) /* mingw already defines */
+extern CTEMPLATE_DLL_DECL int snprintf(char *str, size_t size,
+ const char *format, ...);
+extern int CTEMPLATE_DLL_DECL safe_vsnprintf(char *str, size_t size,
+ const char *format, va_list ap);
+#define vsnprintf(str, size, format, ap) safe_vsnprintf(str, size, format, ap)
+#define va_copy(dst, src) (dst) = (src)
+#endif /* #if !defined(__MINGW32__) && !defined(__MINGW64__) */
+
+/* Windows doesn't support specifying the number of buckets as a
+ * hash_map constructor arg, so we leave this blank.
+ */
+#define CTEMPLATE_SMALL_HASHTABLE
+
+#define DEFAULT_TEMPLATE_ROOTDIR ".."
+
+
+/* These are functions we have to override because they're O/S-specific */
+#ifdef __cplusplus
+#include <string>
+#include <vector>
+
+namespace ctemplate {
+extern CTEMPLATE_DLL_DECL std::string TmpFile(const char* basename);
+void CTEMPLATE_DLL_DECL CreateOrCleanTestDir(const std::string& dirname);
+}
+void CTEMPLATE_DLL_DECL GetNamelist(const char* testdata_dir,
+ std::vector<std::string>* namelist);
+#endif /* __cplusplus */
+
+#ifndef __cplusplus
+/* I don't see how to get inlining for C code in MSVC. Ah well. */
+#define inline
+#endif
+
+#endif /* _WIN32 */
+
+#endif /* CTEMPLATE_WINDOWS_PORT_H_ */
diff --git a/src/windows/preprocess.sh b/src/windows/preprocess.sh
new file mode 100755
index 0000000..b089180
--- /dev/null
+++ b/src/windows/preprocess.sh
@@ -0,0 +1,118 @@
+#!/bin/sh
+
+# Copyright (c) 2007, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ---
+# Author: Craig Silverstein
+#
+# This script is meant to be run at distribution-generation time, for
+# instance by autogen.sh. It does some of the work configure would
+# normally do, for windows systems. In particular, it expands all the
+# @...@ variables found in .in files, and puts them here, in the windows
+# directory.
+#
+# This script should be run before any new release.
+
+if [ -z "$1" ]; then
+ echo "USAGE: $0 <src/ directory>"
+ exit 1
+fi
+
+DLLDEF_MACRO_NAME="CTEMPLATE_DLL_DECL"
+
+# The text we put in every .h files we create. As a courtesy, we'll
+# include a helpful comment for windows users as to how to use
+# CTEMPLATE_DLL_DECL. Apparently sed expands \n into a newline. Good!
+DLLDEF_DEFINES="\
+// NOTE: if you are statically linking the template library into your binary\n\
+// (rather than using the template .dll), set '/D $DLLDEF_MACRO_NAME='\n\
+// as a compiler flag in your project file to turn off the dllimports.\n\
+#ifndef $DLLDEF_MACRO_NAME\n\
+# define $DLLDEF_MACRO_NAME __declspec(dllimport)\n\
+#endif"
+
+# template_cache.h gets a special DEFINE to work around the
+# difficulties in dll-exporting stl containers. Ugh.
+TEMPLATE_CACHE_DLLDEF_DEFINES="\
+// NOTE: if you are statically linking the template library into your binary\n\
+// (rather than using the template .dll), set '/D $DLLDEF_MACRO_NAME='\n\
+// as a compiler flag in your project file to turn off the dllimports.\n\
+#ifndef $DLLDEF_MACRO_NAME\n\
+# define $DLLDEF_MACRO_NAME __declspec(dllimport)\n\
+extern template class __declspec(dllimport) std::allocator<std::string>;\n\
+extern template class __declspec(dllimport) std::vector<std::string>;\n\
+#else\n\
+template class __declspec(dllexport) std::allocator<std::string>;\n\
+template class __declspec(dllexport) std::vector<std::string>;\n\
+#endif"
+
+# Read all the windows config info into variables
+# In order for the 'set' to take, this requires putting all in a subshell.
+(
+ while read define varname value; do
+ [ "$define" != "#define" ] && continue
+ eval "$varname='$value'"
+ done
+
+ # Process all the .in files in the "ctemplate" subdirectory
+ mkdir -p "$1/windows/ctemplate"
+ for file in "$1"/ctemplate/*.in; do
+ echo "Processing $file"
+ outfile="$1/windows/ctemplate/`basename $file .in`"
+
+ if test "`basename $file`" = template_cache.h.in; then
+ MY_DLLDEF_DEFINES=$TEMPLATE_CACHE_DLLDEF_DEFINES
+ else
+ MY_DLLDEF_DEFINES=$DLLDEF_DEFINES
+ fi
+
+ # Besides replacing @...@, we also need to turn on dllimport
+ # We also need to replace hash by hash_compare (annoying we hard-code :-( )
+ sed -e "s!@ac_windows_dllexport@!$DLLDEF_MACRO_NAME!g" \
+ -e "s!@ac_windows_dllexport_defines@!$MY_DLLDEF_DEFINES!g" \
+ -e "s!@ac_cv_cxx_hash_map@!$HASH_MAP_H!g" \
+ -e "s!@ac_cv_cxx_hash_set@!$HASH_SET_H!g" \
+ -e "s!@ac_cv_cxx_hash_map_class@!$HASH_NAMESPACE::hash_map!g" \
+ -e "s!@ac_cv_cxx_hash_set_class@!$HASH_NAMESPACE::hash_set!g" \
+ -e "s!@ac_google_attribute@!${HAVE___ATTRIBUTE__:-0}!g" \
+ -e "s!@ac_google_end_namespace@!$_END_GOOGLE_NAMESPACE_!g" \
+ -e "s!@ac_google_namespace@!$GOOGLE_NAMESPACE!g" \
+ -e "s!@ac_google_start_namespace@!$_START_GOOGLE_NAMESPACE_!g" \
+ -e "s!@ac_htmlparser_namespace@!$HTMLPARSER_NAMESPACE!g" \
+ -e "s!@ac_cv_uint64@!unsigned __int64!g" \
+ -e "s!@ac_cv_have_stdint_h@!0!g" \
+ -e "s!@ac_cv_have_inttypes_h@!0!g" \
+ -e "s!@ac_have_attribute_weak@!0!g" \
+ -e "s!\\bhash\\b!hash_compare!g" \
+ "$file" > "$outfile"
+ done
+) < "$1/windows/config.h"
+
+echo "DONE"