Brian Silverman | f7bd1c2 | 2015-12-24 16:07:11 -0800 | [diff] [blame^] | 1 | //===-- llvm/ADT/StringExtras.h - Useful string functions -------*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file contains some functions that are useful when dealing with strings. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #ifndef LLVM_ADT_STRINGEXTRAS_H |
| 15 | #define LLVM_ADT_STRINGEXTRAS_H |
| 16 | |
| 17 | #include "llvm/StringRef.h" |
| 18 | #include <cstdint> |
| 19 | #include <iterator> |
| 20 | |
| 21 | namespace llvm { |
| 22 | template<typename T> class SmallVectorImpl; |
| 23 | |
| 24 | /// hexdigit - Return the hexadecimal character for the |
| 25 | /// given number \p X (which should be less than 16). |
| 26 | static inline char hexdigit(unsigned X, bool LowerCase = false) { |
| 27 | const char HexChar = LowerCase ? 'a' : 'A'; |
| 28 | return X < 10 ? '0' + X : HexChar + X - 10; |
| 29 | } |
| 30 | |
| 31 | /// Construct a string ref from a boolean. |
| 32 | static inline StringRef toStringRef(bool B) { |
| 33 | return StringRef(B ? "true" : "false"); |
| 34 | } |
| 35 | |
| 36 | /// Interpret the given character \p C as a hexadecimal digit and return its |
| 37 | /// value. |
| 38 | /// |
| 39 | /// If \p C is not a valid hex digit, -1U is returned. |
| 40 | static inline unsigned hexDigitValue(char C) { |
| 41 | if (C >= '0' && C <= '9') return C-'0'; |
| 42 | if (C >= 'a' && C <= 'f') return C-'a'+10U; |
| 43 | if (C >= 'A' && C <= 'F') return C-'A'+10U; |
| 44 | return -1U; |
| 45 | } |
| 46 | |
| 47 | /// utohex_buffer - Emit the specified number into the buffer specified by |
| 48 | /// BufferEnd, returning a pointer to the start of the string. This can be used |
| 49 | /// like this: (note that the buffer must be large enough to handle any number): |
| 50 | /// char Buffer[40]; |
| 51 | /// printf("0x%s", utohex_buffer(X, Buffer+40)); |
| 52 | /// |
| 53 | /// This should only be used with unsigned types. |
| 54 | /// |
| 55 | template<typename IntTy> |
| 56 | static inline char *utohex_buffer(IntTy X, char *BufferEnd, bool LowerCase = false) { |
| 57 | char *BufPtr = BufferEnd; |
| 58 | *--BufPtr = 0; // Null terminate buffer. |
| 59 | if (X == 0) { |
| 60 | *--BufPtr = '0'; // Handle special case. |
| 61 | return BufPtr; |
| 62 | } |
| 63 | |
| 64 | while (X) { |
| 65 | unsigned char Mod = static_cast<unsigned char>(X) & 15; |
| 66 | *--BufPtr = hexdigit(Mod, LowerCase); |
| 67 | X >>= 4; |
| 68 | } |
| 69 | return BufPtr; |
| 70 | } |
| 71 | |
| 72 | static inline std::string utohexstr(uint64_t X, bool LowerCase = false) { |
| 73 | char Buffer[17]; |
| 74 | return utohex_buffer(X, Buffer+17, LowerCase); |
| 75 | } |
| 76 | |
| 77 | static inline std::string utostr_32(uint32_t X, bool isNeg = false) { |
| 78 | char Buffer[11]; |
| 79 | char *BufPtr = Buffer+11; |
| 80 | |
| 81 | if (X == 0) *--BufPtr = '0'; // Handle special case... |
| 82 | |
| 83 | while (X) { |
| 84 | *--BufPtr = '0' + char(X % 10); |
| 85 | X /= 10; |
| 86 | } |
| 87 | |
| 88 | if (isNeg) *--BufPtr = '-'; // Add negative sign... |
| 89 | |
| 90 | return std::string(BufPtr, Buffer+11); |
| 91 | } |
| 92 | |
| 93 | static inline std::string utostr(uint64_t X, bool isNeg = false) { |
| 94 | char Buffer[21]; |
| 95 | char *BufPtr = Buffer+21; |
| 96 | |
| 97 | if (X == 0) *--BufPtr = '0'; // Handle special case... |
| 98 | |
| 99 | while (X) { |
| 100 | *--BufPtr = '0' + char(X % 10); |
| 101 | X /= 10; |
| 102 | } |
| 103 | |
| 104 | if (isNeg) *--BufPtr = '-'; // Add negative sign... |
| 105 | return std::string(BufPtr, Buffer+21); |
| 106 | } |
| 107 | |
| 108 | |
| 109 | static inline std::string itostr(int64_t X) { |
| 110 | if (X < 0) |
| 111 | return utostr(static_cast<uint64_t>(-X), true); |
| 112 | else |
| 113 | return utostr(static_cast<uint64_t>(X)); |
| 114 | } |
| 115 | |
| 116 | /// StrInStrNoCase - Portable version of strcasestr. Locates the first |
| 117 | /// occurrence of string 's1' in string 's2', ignoring case. Returns |
| 118 | /// the offset of s2 in s1 or npos if s2 cannot be found. |
| 119 | StringRef::size_type StrInStrNoCase(StringRef s1, StringRef s2); |
| 120 | |
| 121 | /// getToken - This function extracts one token from source, ignoring any |
| 122 | /// leading characters that appear in the Delimiters string, and ending the |
| 123 | /// token at any of the characters that appear in the Delimiters string. If |
| 124 | /// there are no tokens in the source string, an empty string is returned. |
| 125 | /// The function returns a pair containing the extracted token and the |
| 126 | /// remaining tail string. |
| 127 | std::pair<StringRef, StringRef> getToken(StringRef Source, |
| 128 | StringRef Delimiters = " \t\n\v\f\r"); |
| 129 | |
| 130 | /// SplitString - Split up the specified string according to the specified |
| 131 | /// delimiters, appending the result fragments to the output list. |
| 132 | void SplitString(StringRef Source, |
| 133 | SmallVectorImpl<StringRef> &OutFragments, |
| 134 | StringRef Delimiters = " \t\n\v\f\r"); |
| 135 | |
| 136 | /// HashString - Hash function for strings. |
| 137 | /// |
| 138 | /// This is the Bernstein hash function. |
| 139 | // |
| 140 | // FIXME: Investigate whether a modified bernstein hash function performs |
| 141 | // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx |
| 142 | // X*33+c -> X*33^c |
| 143 | static inline unsigned HashString(StringRef Str, unsigned Result = 0) { |
| 144 | for (StringRef::size_type i = 0, e = Str.size(); i != e; ++i) |
| 145 | Result = Result * 33 + (unsigned char)Str[i]; |
| 146 | return Result; |
| 147 | } |
| 148 | |
| 149 | /// Returns the English suffix for an ordinal integer (-st, -nd, -rd, -th). |
| 150 | static inline StringRef getOrdinalSuffix(unsigned Val) { |
| 151 | // It is critically important that we do this perfectly for |
| 152 | // user-written sequences with over 100 elements. |
| 153 | switch (Val % 100) { |
| 154 | case 11: |
| 155 | case 12: |
| 156 | case 13: |
| 157 | return "th"; |
| 158 | default: |
| 159 | switch (Val % 10) { |
| 160 | case 1: return "st"; |
| 161 | case 2: return "nd"; |
| 162 | case 3: return "rd"; |
| 163 | default: return "th"; |
| 164 | } |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | template <typename IteratorT> |
| 169 | inline std::string join_impl(IteratorT Begin, IteratorT End, |
| 170 | StringRef Separator, std::input_iterator_tag) { |
| 171 | std::string S; |
| 172 | if (Begin == End) |
| 173 | return S; |
| 174 | |
| 175 | S += (*Begin); |
| 176 | while (++Begin != End) { |
| 177 | S += Separator; |
| 178 | S += (*Begin); |
| 179 | } |
| 180 | return S; |
| 181 | } |
| 182 | |
| 183 | template <typename IteratorT> |
| 184 | inline std::string join_impl(IteratorT Begin, IteratorT End, |
| 185 | StringRef Separator, std::forward_iterator_tag) { |
| 186 | std::string S; |
| 187 | if (Begin == End) |
| 188 | return S; |
| 189 | |
| 190 | size_t Len = (std::distance(Begin, End) - 1) * Separator.size(); |
| 191 | for (IteratorT I = Begin; I != End; ++I) |
| 192 | Len += (*Begin).size(); |
| 193 | S.reserve(Len); |
| 194 | S += (*Begin); |
| 195 | while (++Begin != End) { |
| 196 | S += Separator; |
| 197 | S += (*Begin); |
| 198 | } |
| 199 | return S; |
| 200 | } |
| 201 | |
| 202 | /// Joins the strings in the range [Begin, End), adding Separator between |
| 203 | /// the elements. |
| 204 | template <typename IteratorT> |
| 205 | inline std::string join(IteratorT Begin, IteratorT End, StringRef Separator) { |
| 206 | typedef typename std::iterator_traits<IteratorT>::iterator_category tag; |
| 207 | return join_impl(Begin, End, Separator, tag()); |
| 208 | } |
| 209 | |
| 210 | } // namespace llvm |
| 211 | |
| 212 | #endif |