Brian Silverman | 1f5d398 | 2018-08-04 23:37:52 -0700 | [diff] [blame^] | 1 | ////////////////////////////////////////////////////////////////////////////// |
| 2 | // |
| 3 | // (C) Copyright Ion Gaztanaga 2015-2016. |
| 4 | // Distributed under the Boost Software License, Version 1.0. |
| 5 | // (See accompanying file LICENSE_1_0.txt or copy at |
| 6 | // http://www.boost.org/LICENSE_1_0.txt) |
| 7 | // |
| 8 | // See http://www.boost.org/libs/move for documentation. |
| 9 | // |
| 10 | ////////////////////////////////////////////////////////////////////////////// |
| 11 | |
| 12 | #ifndef BOOST_MOVE_ADAPTIVE_MERGE_HPP |
| 13 | #define BOOST_MOVE_ADAPTIVE_MERGE_HPP |
| 14 | |
| 15 | #include <boost/move/detail/config_begin.hpp> |
| 16 | #include <boost/move/algo/detail/adaptive_sort_merge.hpp> |
| 17 | |
| 18 | namespace boost { |
| 19 | namespace movelib { |
| 20 | |
| 21 | ///@cond |
| 22 | namespace detail_adaptive { |
| 23 | |
| 24 | template<class RandIt, class Compare, class XBuf> |
| 25 | inline void adaptive_merge_combine_blocks( RandIt first |
| 26 | , typename iterator_traits<RandIt>::size_type len1 |
| 27 | , typename iterator_traits<RandIt>::size_type len2 |
| 28 | , typename iterator_traits<RandIt>::size_type collected |
| 29 | , typename iterator_traits<RandIt>::size_type n_keys |
| 30 | , typename iterator_traits<RandIt>::size_type l_block |
| 31 | , bool use_internal_buf |
| 32 | , bool xbuf_used |
| 33 | , Compare comp |
| 34 | , XBuf & xbuf |
| 35 | ) |
| 36 | { |
| 37 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 38 | size_type const len = len1+len2; |
| 39 | size_type const l_combine = len-collected; |
| 40 | size_type const l_combine1 = len1-collected; |
| 41 | |
| 42 | if(n_keys){ |
| 43 | RandIt const first_data = first+collected; |
| 44 | RandIt const keys = first; |
| 45 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len); |
| 46 | if(xbuf_used){ |
| 47 | if(xbuf.size() < l_block){ |
| 48 | xbuf.initialize_until(l_block, *first); |
| 49 | } |
| 50 | BOOST_ASSERT(xbuf.size() >= l_block); |
| 51 | size_type n_block_a, n_block_b, l_irreg1, l_irreg2; |
| 52 | combine_params( keys, comp, l_combine |
| 53 | , l_combine1, l_block, xbuf |
| 54 | , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs |
| 55 | op_merge_blocks_with_buf |
| 56 | (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data()); |
| 57 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg xbf: ", len); |
| 58 | } |
| 59 | else{ |
| 60 | size_type n_block_a, n_block_b, l_irreg1, l_irreg2; |
| 61 | combine_params( keys, comp, l_combine |
| 62 | , l_combine1, l_block, xbuf |
| 63 | , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs |
| 64 | if(use_internal_buf){ |
| 65 | op_merge_blocks_with_buf |
| 66 | (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), first_data-l_block); |
| 67 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A mrg buf: ", len); |
| 68 | } |
| 69 | else{ |
| 70 | merge_blocks_bufferless |
| 71 | (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp); |
| 72 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg nbf: ", len); |
| 73 | } |
| 74 | } |
| 75 | } |
| 76 | else{ |
| 77 | xbuf.shrink_to_fit(l_block); |
| 78 | if(xbuf.size() < l_block){ |
| 79 | xbuf.initialize_until(l_block, *first); |
| 80 | } |
| 81 | size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block); |
| 82 | size_type n_block_a, n_block_b, l_irreg1, l_irreg2; |
| 83 | combine_params( uint_keys, less(), l_combine |
| 84 | , l_combine1, l_block, xbuf |
| 85 | , n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs |
| 86 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len); |
| 87 | BOOST_ASSERT(xbuf.size() >= l_block); |
| 88 | op_merge_blocks_with_buf |
| 89 | (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data()); |
| 90 | xbuf.clear(); |
| 91 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg buf: ", len); |
| 92 | } |
| 93 | } |
| 94 | |
| 95 | template<class RandIt, class Compare, class XBuf> |
| 96 | inline void adaptive_merge_final_merge( RandIt first |
| 97 | , typename iterator_traits<RandIt>::size_type len1 |
| 98 | , typename iterator_traits<RandIt>::size_type len2 |
| 99 | , typename iterator_traits<RandIt>::size_type collected |
| 100 | , typename iterator_traits<RandIt>::size_type l_intbuf |
| 101 | , typename iterator_traits<RandIt>::size_type l_block |
| 102 | , bool use_internal_buf |
| 103 | , bool xbuf_used |
| 104 | , Compare comp |
| 105 | , XBuf & xbuf |
| 106 | ) |
| 107 | { |
| 108 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 109 | (void)l_block; |
| 110 | size_type n_keys = collected-l_intbuf; |
| 111 | size_type len = len1+len2; |
| 112 | if(use_internal_buf){ |
| 113 | if(xbuf_used){ |
| 114 | xbuf.clear(); |
| 115 | //Nothing to do |
| 116 | if(n_keys){ |
| 117 | unstable_sort(first, first+n_keys, comp, xbuf); |
| 118 | stable_merge(first, first+n_keys, first+len, comp, xbuf); |
| 119 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A key mrg: ", len); |
| 120 | } |
| 121 | } |
| 122 | else{ |
| 123 | xbuf.clear(); |
| 124 | unstable_sort(first, first+collected, comp, xbuf); |
| 125 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len); |
| 126 | stable_merge(first, first+collected, first+len, comp, xbuf); |
| 127 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len); |
| 128 | } |
| 129 | } |
| 130 | else{ |
| 131 | xbuf.clear(); |
| 132 | unstable_sort(first, first+collected, comp, xbuf); |
| 133 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len); |
| 134 | stable_merge(first, first+collected, first+len1+len2, comp, xbuf); |
| 135 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len); |
| 136 | } |
| 137 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A fin mrg: ", len); |
| 138 | } |
| 139 | |
| 140 | template<class SizeType> |
| 141 | inline static SizeType adaptive_merge_n_keys_without_external_keys(SizeType l_block, SizeType len1, SizeType len2, SizeType l_intbuf) |
| 142 | { |
| 143 | typedef SizeType size_type; |
| 144 | //This is the minimum number of keys to implement the ideal algorithm |
| 145 | size_type n_keys = len1/l_block+len2/l_block; |
| 146 | const size_type second_half_blocks = len2/l_block; |
| 147 | const size_type first_half_aux = len1-l_intbuf; |
| 148 | while(n_keys >= ((first_half_aux-n_keys)/l_block + second_half_blocks)){ |
| 149 | --n_keys; |
| 150 | } |
| 151 | ++n_keys; |
| 152 | return n_keys; |
| 153 | } |
| 154 | |
| 155 | template<class SizeType> |
| 156 | inline static SizeType adaptive_merge_n_keys_with_external_keys(SizeType l_block, SizeType len1, SizeType len2, SizeType l_intbuf) |
| 157 | { |
| 158 | typedef SizeType size_type; |
| 159 | //This is the minimum number of keys to implement the ideal algorithm |
| 160 | size_type n_keys = (len1-l_intbuf)/l_block + len2/l_block; |
| 161 | return n_keys; |
| 162 | } |
| 163 | |
| 164 | template<class SizeType, class Xbuf> |
| 165 | inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout) |
| 166 | { |
| 167 | typedef SizeType size_type; |
| 168 | size_type l_block = rl_block; |
| 169 | size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block; |
| 170 | |
| 171 | while(xbuf.capacity() >= l_block*2){ |
| 172 | l_block *= 2; |
| 173 | } |
| 174 | |
| 175 | //This is the minimum number of keys to implement the ideal algorithm |
| 176 | size_type n_keys = adaptive_merge_n_keys_without_external_keys(l_block, len1, len2, l_intbuf); |
| 177 | BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)); |
| 178 | |
| 179 | if(xbuf.template supports_aligned_trailing<size_type> |
| 180 | ( l_block |
| 181 | , adaptive_merge_n_keys_with_external_keys(l_block, len1, len2, l_intbuf))) |
| 182 | { |
| 183 | n_keys = 0u; |
| 184 | } |
| 185 | l_intbuf_inout = l_intbuf; |
| 186 | rl_block = l_block; |
| 187 | return n_keys; |
| 188 | } |
| 189 | |
| 190 | // Main explanation of the merge algorithm. |
| 191 | // |
| 192 | // csqrtlen = ceil(sqrt(len)); |
| 193 | // |
| 194 | // * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect |
| 195 | // unique elements are extracted from elements to be sorted and placed in the beginning of the range. |
| 196 | // |
| 197 | // * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements |
| 198 | // are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements. |
| 199 | // |
| 200 | // Explanation of the "combine_blocks" step: |
| 201 | // |
| 202 | // * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements. |
| 203 | // Remaining elements that can't form a group are grouped in front of those elements. |
| 204 | // * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements. |
| 205 | // Remaining elements that can't form a group are grouped in the back of those elements. |
| 206 | // * In parallel the following two steps are performed: |
| 207 | // * Groups are selection-sorted by first or last element (depending whether they are going |
| 208 | // to be merged to left or right) and keys are reordered accordingly as an imitation-buffer. |
| 209 | // * Elements of each block pair are merged using the csqrtlen buffer taking into account |
| 210 | // if they belong to the first half or second half (marked by the key). |
| 211 | // |
| 212 | // * In the final merge step leading "to_collect" elements are merged with rotations |
| 213 | // with the rest of merged elements in the "combine_blocks" step. |
| 214 | // |
| 215 | // Corner cases: |
| 216 | // |
| 217 | // * If no "to_collect" elements can be extracted: |
| 218 | // |
| 219 | // * If more than a minimum number of elements is extracted |
| 220 | // then reduces the number of elements used as buffer and keys in the |
| 221 | // and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction |
| 222 | // then uses a rotation based smart merge. |
| 223 | // |
| 224 | // * If the minimum number of keys can't be extracted, a rotation-based merge is performed. |
| 225 | // |
| 226 | // * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed. |
| 227 | // |
| 228 | // * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed. |
| 229 | // |
| 230 | // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), |
| 231 | // then no csqrtlen need to be extracted and "combine_blocks" will use integral |
| 232 | // keys to combine blocks. |
| 233 | template<class RandIt, class Compare, class XBuf> |
| 234 | void adaptive_merge_impl |
| 235 | ( RandIt first |
| 236 | , typename iterator_traits<RandIt>::size_type len1 |
| 237 | , typename iterator_traits<RandIt>::size_type len2 |
| 238 | , Compare comp |
| 239 | , XBuf & xbuf |
| 240 | ) |
| 241 | { |
| 242 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 243 | |
| 244 | if(xbuf.capacity() >= min_value<size_type>(len1, len2)){ |
| 245 | buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf); |
| 246 | } |
| 247 | else{ |
| 248 | const size_type len = len1+len2; |
| 249 | //Calculate ideal parameters and try to collect needed unique keys |
| 250 | size_type l_block = size_type(ceil_sqrt(len)); |
| 251 | |
| 252 | //One range is not big enough to extract keys and the internal buffer so a |
| 253 | //rotation-based based merge will do just fine |
| 254 | if(len1 <= l_block*2 || len2 <= l_block*2){ |
| 255 | merge_bufferless(first, first+len1, first+len1+len2, comp); |
| 256 | return; |
| 257 | } |
| 258 | |
| 259 | //Detail the number of keys and internal buffer. If xbuf has enough memory, no |
| 260 | //internal buffer is needed so l_intbuf will remain 0. |
| 261 | size_type l_intbuf = 0; |
| 262 | size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf); |
| 263 | size_type const to_collect = l_intbuf+n_keys; |
| 264 | //Try to extract needed unique values from the first range |
| 265 | size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf); |
| 266 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n A collect: ", len); |
| 267 | |
| 268 | //Not the minimum number of keys is not available on the first range, so fallback to rotations |
| 269 | if(collected != to_collect && collected < 4){ |
| 270 | merge_bufferless(first, first+collected, first+len1, comp); |
| 271 | merge_bufferless(first, first + len1, first + len1 + len2, comp); |
| 272 | return; |
| 273 | } |
| 274 | |
| 275 | //If not enough keys but more than minimum, adjust the internal buffer and key count |
| 276 | bool use_internal_buf = collected == to_collect; |
| 277 | if (!use_internal_buf){ |
| 278 | l_intbuf = 0u; |
| 279 | n_keys = collected; |
| 280 | l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf); |
| 281 | //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used |
| 282 | l_intbuf = use_internal_buf ? l_block : 0u; |
| 283 | } |
| 284 | |
| 285 | bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block; |
| 286 | //Merge trailing elements using smart merges |
| 287 | adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf); |
| 288 | //Merge buffer and keys with the rest of the values |
| 289 | adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf); |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | } //namespace detail_adaptive { |
| 294 | |
| 295 | ///@endcond |
| 296 | |
| 297 | //! <b>Effects</b>: Merges two consecutive sorted ranges [first, middle) and [middle, last) |
| 298 | //! into one sorted range [first, last) according to the given comparison function comp. |
| 299 | //! The algorithm is stable (if there are equivalent elements in the original two ranges, |
| 300 | //! the elements from the first range (preserving their original order) precede the elements |
| 301 | //! from the second range (preserving their original order). |
| 302 | //! |
| 303 | //! <b>Requires</b>: |
| 304 | //! - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator. |
| 305 | //! - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible. |
| 306 | //! |
| 307 | //! <b>Parameters</b>: |
| 308 | //! - first: the beginning of the first sorted range. |
| 309 | //! - middle: the end of the first sorted range and the beginning of the second |
| 310 | //! - last: the end of the second sorted range |
| 311 | //! - comp: comparison function object which returns true if the first argument is is ordered before the second. |
| 312 | //! - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len" |
| 313 | //! elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len |
| 314 | //! is min(std::distance(first, middle), std::distance(middle, last)). |
| 315 | //! |
| 316 | //! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type |
| 317 | //! of dereferenced RandIt throws. |
| 318 | //! |
| 319 | //! <b>Complexity</b>: Always K x O(N) comparisons and move assignments/constructors/swaps. |
| 320 | //! Constant factor for comparisons and data movement is minimized when uninitialized_len |
| 321 | //! is min(std::distance(first, middle), std::distance(middle, last)). |
| 322 | //! Pretty good enough performance is achieved when uninitialized_len is |
| 323 | //! ceil(sqrt(std::distance(first, last)))*2. |
| 324 | //! |
| 325 | //! <b>Caution</b>: Experimental implementation, not production-ready. |
| 326 | template<class RandIt, class Compare> |
| 327 | void adaptive_merge( RandIt first, RandIt middle, RandIt last, Compare comp |
| 328 | , typename iterator_traits<RandIt>::value_type* uninitialized = 0 |
| 329 | , std::size_t uninitialized_len = 0) |
| 330 | { |
| 331 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 332 | typedef typename iterator_traits<RandIt>::value_type value_type; |
| 333 | |
| 334 | ::boost::movelib::detail_adaptive::adaptive_xbuf<value_type> xbuf(uninitialized, uninitialized_len); |
| 335 | ::boost::movelib::detail_adaptive::adaptive_merge_impl(first, size_type(middle - first), size_type(last - middle), comp, xbuf); |
| 336 | } |
| 337 | |
| 338 | } //namespace movelib { |
| 339 | } //namespace boost { |
| 340 | |
| 341 | #include <boost/move/detail/config_end.hpp> |
| 342 | |
| 343 | #endif //#define BOOST_MOVE_ADAPTIVE_MERGE_HPP |