Blame - include/boost/detail/utf8_codecvt_facet.hpp - RealtimeRoboticsGroup/test

blob: 12ae19ba93a8748714a1124470000ab35387dc44 [file] [log] [blame]

Brian Silverman	2987761	2018-08-05 00:42:41 -0700	[diff] [blame^]	1	// Copyright (c) 2001 Ronald Garcia, Indiana University (garcia@osl.iu.edu)
				2	// Andrew Lumsdaine, Indiana University (lums@osl.iu.edu).
				3	// Distributed under the Boost Software License, Version 1.0. (See accompany-
				4	// ing file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
				5
				6	#ifndef BOOST_UTF8_CODECVT_FACET_HPP
				7	#define BOOST_UTF8_CODECVT_FACET_HPP
				8
				9	// MS compatible compilers support #pragma once
				10	#if defined(_MSC_VER) && (_MSC_VER >= 1020)
				11	# pragma once
				12	#endif
				13
				14	/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
				15	// utf8_codecvt_facet.hpp
				16
				17	// This header defines class utf8_codecvt_facet, derived from
				18	// std::codecvt<wchar_t, char>, which can be used to convert utf8 data in
				19	// files into wchar_t strings in the application.
				20	//
				21	// The header is NOT STANDALONE, and is not to be included by the USER.
				22	// There are at least two libraries which want to use this functionality, and
				23	// we want to avoid code duplication. It would be possible to create utf8
				24	// library, but:
				25	// - this requires review process first
				26	// - in the case, when linking the a library which uses utf8
				27	// (say 'program_options'), user should also link to the utf8 library.
				28	// This seems inconvenient, and asking a user to link to an unrevieved
				29	// library is strange.
				30	// Until the above points are fixed, a library which wants to use utf8 must:
				31	// - include this header in one of it's headers or sources
				32	// - include the corresponding boost/detail/utf8_codecvt_facet.ipp file in one
				33	// of its sources
				34	// - before including either file, the library must define
				35	// - BOOST_UTF8_BEGIN_NAMESPACE to the namespace declaration that must be used
				36	// - BOOST_UTF8_END_NAMESPACE to the code to close the previous namespace
				37	// declaration.
				38	// - BOOST_UTF8_DECL -- to the code which must be used for all 'exportable'
				39	// symbols.
				40	//
				41	// For example, program_options library might contain:
				42	// #define BOOST_UTF8_BEGIN_NAMESPACE <backslash character>
				43	// namespace boost { namespace program_options {
				44	// #define BOOST_UTF8_END_NAMESPACE }}
				45	// #define BOOST_UTF8_DECL BOOST_PROGRAM_OPTIONS_DECL
				46	// #include <boost/detail/utf8_codecvt_facet.ipp>
				47	//
				48	// Essentially, each library will have its own copy of utf8 code, in
				49	// different namespaces.
				50
				51	// Note:(Robert Ramey). I have made the following alterations in the original
				52	// code.
				53	// a) Rendered utf8_codecvt<wchar_t, char> with using templates
				54	// b) Move longer functions outside class definition to prevent inlining
				55	// and make code smaller
				56	// c) added on a derived class to permit translation to/from current
				57	// locale to utf8
				58
				59	// See http://www.boost.org for updates, documentation, and revision history.
				60
				61	// archives stored as text - note these ar templated on the basic
				62	// stream templates to accommodate wide (and other?) kind of characters
				63	//
				64	// note the fact that on libraries without wide characters, ostream is
				65	// is not a specialization of basic_ostream which in fact is not defined
				66	// in such cases. So we can't use basic_ostream<OStream::char_type> but rather
				67	// use two template parameters
				68	//
				69	// utf8_codecvt_facet
				70	// This is an implementation of a std::codecvt facet for translating
				71	// from UTF-8 externally to UCS-4. Note that this is not tied to
				72	// any specific types in order to allow customization on platforms
				73	// where wchar_t is not big enough.
				74	//
				75	// NOTES: The current implementation jumps through some unpleasant hoops in
				76	// order to deal with signed character types. As a std::codecvt_base::result,
				77	// it is necessary for the ExternType to be convertible to unsigned char.
				78	// I chose not to tie the extern_type explicitly to char. But if any combination
				79	// of types other than <wchar_t,char_t> is used, then std::codecvt must be
				80	// specialized on those types for this to work.
				81
				82	#include <locale>
				83	#include <cwchar> // for mbstate_t
				84	#include <cstddef> // for std::size_t
				85
				86	#include <boost/config.hpp>
				87	#include <boost/detail/workaround.hpp>
				88
				89	#if defined(BOOST_NO_STDC_NAMESPACE)
				90	namespace std {
				91	using ::mbstate_t;
				92	using ::size_t;
				93	}
				94	#endif
				95
				96	// maximum lenght of a multibyte string
				97	#define MB_LENGTH_MAX 8
				98
				99	BOOST_UTF8_BEGIN_NAMESPACE
				100
				101	//----------------------------------------------------------------------------//
				102	// //
				103	// utf8_codecvt_facet //
				104	// //
				105	// See utf8_codecvt_facet.ipp for the implementation. //
				106	//----------------------------------------------------------------------------//
				107
				108	#ifndef BOOST_UTF8_DECL
				109	#define BOOST_UTF8_DECL
				110	#endif
				111
				112	struct BOOST_UTF8_DECL utf8_codecvt_facet :
				113	public std::codecvt<wchar_t, char, std::mbstate_t>
				114	{
				115	public:
				116	explicit utf8_codecvt_facet(std::size_t no_locale_manage=0);
				117	virtual ~utf8_codecvt_facet(){}
				118	protected:
				119	virtual std::codecvt_base::result do_in(
				120	std::mbstate_t& state,
				121	const char * from,
				122	const char * from_end,
				123	const char * & from_next,
				124	wchar_t * to,
				125	wchar_t * to_end,
				126	wchar_t*& to_next
				127	) const;
				128
				129	virtual std::codecvt_base::result do_out(
				130	std::mbstate_t & state,
				131	const wchar_t * from,
				132	const wchar_t * from_end,
				133	const wchar_t* & from_next,
				134	char * to,
				135	char * to_end,
				136	char * & to_next
				137	) const;
				138
				139	bool invalid_continuing_octet(unsigned char octet_1) const {
				140	return (octet_1 < 0x80\|\| 0xbf< octet_1);
				141	}
				142
				143	bool invalid_leading_octet(unsigned char octet_1) const {
				144	return (0x7f < octet_1 && octet_1 < 0xc0) \|\|
				145	(octet_1 > 0xfd);
				146	}
				147
				148	// continuing octets = octets except for the leading octet
				149	static unsigned int get_cont_octet_count(unsigned char lead_octet) {
				150	return get_octet_count(lead_octet) - 1;
				151	}
				152
				153	static unsigned int get_octet_count(unsigned char lead_octet);
				154
				155	// How many "continuing octets" will be needed for this word
				156	// == total octets - 1.
				157	int get_cont_octet_out_count(wchar_t word) const ;
				158
				159	virtual bool do_always_noconv() const BOOST_NOEXCEPT_OR_NOTHROW {
				160	return false;
				161	}
				162
				163	// UTF-8 isn't really stateful since we rewind on partial conversions
				164	virtual std::codecvt_base::result do_unshift(
				165	std::mbstate_t&,
				166	char * from,
				167	char * /to/,
				168	char * & next
				169	) const {
				170	next = from;
				171	return ok;
				172	}
				173
				174	virtual int do_encoding() const BOOST_NOEXCEPT_OR_NOTHROW {
				175	const int variable_byte_external_encoding=0;
				176	return variable_byte_external_encoding;
				177	}
				178
				179	// How many char objects can I process to get <= max_limit
				180	// wchar_t objects?
				181	virtual int do_length(
				182	std::mbstate_t &,
				183	const char * from,
				184	const char * from_end,
				185	std::size_t max_limit
				186	) const
				187	#if BOOST_WORKAROUND(__IBMCPP__, BOOST_TESTED_AT(600))
				188	throw()
				189	#endif
				190	;
				191
				192	// Nonstandard override
				193	virtual int do_length(
				194	const std::mbstate_t & s,
				195	const char * from,
				196	const char * from_end,
				197	std::size_t max_limit
				198	) const
				199	#if BOOST_WORKAROUND(__IBMCPP__, BOOST_TESTED_AT(600))
				200	throw()
				201	#endif
				202	{
				203	return do_length(
				204	const_cast<std::mbstate_t &>(s),
				205	from,
				206	from_end,
				207	max_limit
				208	);
				209	}
				210
				211	// Largest possible value do_length(state,from,from_end,1) could return.
				212	virtual int do_max_length() const BOOST_NOEXCEPT_OR_NOTHROW {
				213	return 6; // largest UTF-8 encoding of a UCS-4 character
				214	}
				215	};
				216
				217	BOOST_UTF8_END_NAMESPACE
				218
				219	#endif // BOOST_UTF8_CODECVT_FACET_HPP