Brian Silverman | 1f5d398 | 2018-08-04 23:37:52 -0700 | [diff] [blame^] | 1 | ////////////////////////////////////////////////////////////////////////////// |
| 2 | // |
| 3 | // (C) Copyright Ion Gaztanaga 2015-2016. |
| 4 | // Distributed under the Boost Software License, Version 1.0. |
| 5 | // (See accompanying file LICENSE_1_0.txt or copy at |
| 6 | // http://www.boost.org/LICENSE_1_0.txt) |
| 7 | // |
| 8 | // See http://www.boost.org/libs/move for documentation. |
| 9 | // |
| 10 | ////////////////////////////////////////////////////////////////////////////// |
| 11 | |
| 12 | #include <algorithm> //std::inplace_merge |
| 13 | #include <cstdio> //std::printf |
| 14 | #include <iostream> //std::cout |
| 15 | |
| 16 | #include <boost/config.hpp> |
| 17 | |
| 18 | #include <boost/move/unique_ptr.hpp> |
| 19 | #include <boost/timer/timer.hpp> |
| 20 | |
| 21 | #include "order_type.hpp" |
| 22 | #include "random_shuffle.hpp" |
| 23 | |
| 24 | using boost::timer::cpu_timer; |
| 25 | using boost::timer::cpu_times; |
| 26 | using boost::timer::nanosecond_type; |
| 27 | |
| 28 | //#define BOOST_MOVE_ADAPTIVE_SORT_STATS |
| 29 | //#define BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 2 |
| 30 | void print_stats(const char *str, boost::ulong_long_type element_count) |
| 31 | { |
| 32 | std::printf("%sCmp:%8.04f Cpy:%9.04f\n", str, double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count ); |
| 33 | } |
| 34 | |
| 35 | #include <boost/move/algo/adaptive_merge.hpp> |
| 36 | #include <boost/move/algo/detail/merge.hpp> |
| 37 | #include <boost/move/core.hpp> |
| 38 | |
| 39 | template<class T, class Compare> |
| 40 | std::size_t generate_elements(T elements[], std::size_t element_count, std::size_t key_reps[], std::size_t key_len, Compare comp) |
| 41 | { |
| 42 | std::srand(0); |
| 43 | for(std::size_t i = 0; i < (key_len ? key_len : element_count); ++i){ |
| 44 | key_reps[i]=0; |
| 45 | } |
| 46 | for(std::size_t i=0; i < element_count; ++i){ |
| 47 | std::size_t key = key_len ? (i % key_len) : i; |
| 48 | elements[i].key=key; |
| 49 | } |
| 50 | ::random_shuffle(elements, elements + element_count); |
| 51 | ::random_shuffle(elements, elements + element_count); |
| 52 | ::random_shuffle(elements, elements + element_count); |
| 53 | for(std::size_t i = 0; i < element_count; ++i){ |
| 54 | elements[i].val = key_reps[elements[i].key]++; |
| 55 | } |
| 56 | std::size_t split_count = element_count/2; |
| 57 | std::stable_sort(elements, elements+split_count, comp); |
| 58 | std::stable_sort(elements+split_count, elements+element_count, comp); |
| 59 | return split_count; |
| 60 | } |
| 61 | |
| 62 | |
| 63 | |
| 64 | template<class T, class Compare> |
| 65 | void adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen) |
| 66 | { |
| 67 | boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]); |
| 68 | boost::movelib::adaptive_merge(elements, mid, last, comp, reinterpret_cast<T*>(mem.get()), BufLen); |
| 69 | } |
| 70 | |
| 71 | enum AlgoType |
| 72 | { |
| 73 | StdMerge, |
| 74 | AdaptiveMerge, |
| 75 | SqrtHAdaptiveMerge, |
| 76 | SqrtAdaptiveMerge, |
| 77 | Sqrt2AdaptiveMerge, |
| 78 | QuartAdaptiveMerge, |
| 79 | StdInplaceMerge, |
| 80 | MaxMerge |
| 81 | }; |
| 82 | |
| 83 | const char *AlgoNames [] = { "StdMerge " |
| 84 | , "AdaptMerge " |
| 85 | , "SqrtHAdaptMerge " |
| 86 | , "SqrtAdaptMerge " |
| 87 | , "Sqrt2AdaptMerge " |
| 88 | , "QuartAdaptMerge " |
| 89 | , "StdInplaceMerge " |
| 90 | }; |
| 91 | |
| 92 | BOOST_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxMerge); |
| 93 | |
| 94 | template<class T> |
| 95 | bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count, std::size_t key_len, unsigned alg, nanosecond_type &prev_clock) |
| 96 | { |
| 97 | std::size_t const split_pos = generate_elements(elements, element_count, key_reps, key_len, order_type_less()); |
| 98 | |
| 99 | std::printf("%s ", AlgoNames[alg]); |
| 100 | order_perf_type::num_compare=0; |
| 101 | order_perf_type::num_copy=0; |
| 102 | order_perf_type::num_elements = element_count; |
| 103 | cpu_timer timer; |
| 104 | timer.resume(); |
| 105 | switch(alg) |
| 106 | { |
| 107 | case StdMerge: |
| 108 | std::inplace_merge(elements, elements+split_pos, elements+element_count, order_type_less()); |
| 109 | break; |
| 110 | case AdaptiveMerge: |
| 111 | boost::movelib::adaptive_merge(elements, elements+split_pos, elements+element_count, order_type_less()); |
| 112 | break; |
| 113 | case SqrtHAdaptiveMerge: |
| 114 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
| 115 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1); |
| 116 | break; |
| 117 | case SqrtAdaptiveMerge: |
| 118 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
| 119 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); |
| 120 | break; |
| 121 | case Sqrt2AdaptiveMerge: |
| 122 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
| 123 | , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); |
| 124 | break; |
| 125 | case QuartAdaptiveMerge: |
| 126 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
| 127 | , (element_count-1)/4+1); |
| 128 | break; |
| 129 | case StdInplaceMerge: |
| 130 | boost::movelib::merge_bufferless_ONlogN(elements, elements+split_pos, elements+element_count, order_type_less()); |
| 131 | break; |
| 132 | } |
| 133 | timer.stop(); |
| 134 | |
| 135 | if(order_perf_type::num_elements == element_count){ |
| 136 | std::printf(" Tmp Ok "); |
| 137 | } else{ |
| 138 | std::printf(" Tmp KO "); |
| 139 | } |
| 140 | nanosecond_type new_clock = timer.elapsed().wall; |
| 141 | |
| 142 | //std::cout << "Cmp:" << order_perf_type::num_compare << " Cpy:" << order_perf_type::num_copy; //for old compilers without ll size argument |
| 143 | std::printf("Cmp:%8.04f Cpy:%9.04f", double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count ); |
| 144 | |
| 145 | double time = double(new_clock); |
| 146 | |
| 147 | const char *units = "ns"; |
| 148 | if(time >= 1000000000.0){ |
| 149 | time /= 1000000000.0; |
| 150 | units = " s"; |
| 151 | } |
| 152 | else if(time >= 1000000.0){ |
| 153 | time /= 1000000.0; |
| 154 | units = "ms"; |
| 155 | } |
| 156 | else if(time >= 1000.0){ |
| 157 | time /= 1000.0; |
| 158 | units = "us"; |
| 159 | } |
| 160 | |
| 161 | std::printf(" %6.02f%s (%6.02f)\n" |
| 162 | , time |
| 163 | , units |
| 164 | , prev_clock ? double(new_clock)/double(prev_clock): 1.0); |
| 165 | prev_clock = new_clock; |
| 166 | bool res = is_order_type_ordered(elements, element_count, true); |
| 167 | return res; |
| 168 | } |
| 169 | |
| 170 | template<class T> |
| 171 | bool measure_all(std::size_t L, std::size_t NK) |
| 172 | { |
| 173 | boost::movelib::unique_ptr<T[]> pdata(new T[L]); |
| 174 | boost::movelib::unique_ptr<std::size_t[]> pkeys(new std::size_t[NK ? NK : L]); |
| 175 | T *A = pdata.get(); |
| 176 | std::size_t *Keys = pkeys.get(); |
| 177 | std::printf("\n - - N: %u, NK: %u - -\n", (unsigned)L, (unsigned)NK); |
| 178 | |
| 179 | nanosecond_type prev_clock = 0; |
| 180 | nanosecond_type back_clock; |
| 181 | bool res = true; |
| 182 | res = res && measure_algo(A,Keys,L,NK,StdMerge, prev_clock); |
| 183 | back_clock = prev_clock; |
| 184 | // |
| 185 | prev_clock = back_clock; |
| 186 | res = res && measure_algo(A,Keys,L,NK,QuartAdaptiveMerge, prev_clock); |
| 187 | // |
| 188 | prev_clock = back_clock; |
| 189 | res = res && measure_algo(A,Keys,L,NK,Sqrt2AdaptiveMerge, prev_clock); |
| 190 | // |
| 191 | prev_clock = back_clock; |
| 192 | res = res && measure_algo(A,Keys,L,NK,SqrtAdaptiveMerge, prev_clock); |
| 193 | // |
| 194 | prev_clock = back_clock; |
| 195 | res = res && measure_algo(A,Keys,L,NK,SqrtHAdaptiveMerge, prev_clock); |
| 196 | // |
| 197 | prev_clock = back_clock; |
| 198 | res = res && measure_algo(A,Keys,L,NK,AdaptiveMerge, prev_clock); |
| 199 | // |
| 200 | prev_clock = back_clock; |
| 201 | res = res && measure_algo(A,Keys,L,NK,StdInplaceMerge, prev_clock); |
| 202 | // |
| 203 | if(!res) |
| 204 | throw int(0); |
| 205 | return res; |
| 206 | } |
| 207 | |
| 208 | //Undef it to run the long test |
| 209 | #define BENCH_MERGE_SHORT |
| 210 | #define BENCH_SORT_UNIQUE_VALUES |
| 211 | |
| 212 | int main() |
| 213 | { |
| 214 | try{ |
| 215 | #ifndef BENCH_SORT_UNIQUE_VALUES |
| 216 | measure_all<order_perf_type>(101,1); |
| 217 | measure_all<order_perf_type>(101,7); |
| 218 | measure_all<order_perf_type>(101,31); |
| 219 | #endif |
| 220 | measure_all<order_perf_type>(101,0); |
| 221 | |
| 222 | // |
| 223 | #ifndef BENCH_SORT_UNIQUE_VALUES |
| 224 | measure_all<order_perf_type>(1101,1); |
| 225 | measure_all<order_perf_type>(1001,7); |
| 226 | measure_all<order_perf_type>(1001,31); |
| 227 | measure_all<order_perf_type>(1001,127); |
| 228 | measure_all<order_perf_type>(1001,511); |
| 229 | #endif |
| 230 | measure_all<order_perf_type>(1001,0); |
| 231 | // |
| 232 | #ifndef BENCH_MERGE_SHORT |
| 233 | #ifndef BENCH_SORT_UNIQUE_VALUES |
| 234 | measure_all<order_perf_type>(10001,65); |
| 235 | measure_all<order_perf_type>(10001,255); |
| 236 | measure_all<order_perf_type>(10001,1023); |
| 237 | measure_all<order_perf_type>(10001,4095); |
| 238 | #endif |
| 239 | measure_all<order_perf_type>(10001,0); |
| 240 | |
| 241 | // |
| 242 | #ifndef BENCH_SORT_UNIQUE_VALUES |
| 243 | measure_all<order_perf_type>(100001,511); |
| 244 | measure_all<order_perf_type>(100001,2047); |
| 245 | measure_all<order_perf_type>(100001,8191); |
| 246 | measure_all<order_perf_type>(100001,32767); |
| 247 | #endif |
| 248 | measure_all<order_perf_type>(100001,0); |
| 249 | |
| 250 | // |
| 251 | #ifdef NDEBUG |
| 252 | #ifndef BENCH_SORT_UNIQUE_VALUES |
| 253 | measure_all<order_perf_type>(1000001,1); |
| 254 | measure_all<order_perf_type>(1000001,1024); |
| 255 | measure_all<order_perf_type>(1000001,32768); |
| 256 | measure_all<order_perf_type>(1000001,524287); |
| 257 | #endif |
| 258 | measure_all<order_perf_type>(1000001,0); |
| 259 | measure_all<order_perf_type>(3000001,0); |
| 260 | measure_all<order_perf_type>(5000001,0); |
| 261 | #endif //NDEBUG |
| 262 | |
| 263 | #endif //#ifndef BENCH_MERGE_SHORT |
| 264 | |
| 265 | //measure_all<order_perf_type>(100000001,0); |
| 266 | } |
| 267 | catch(...) |
| 268 | { |
| 269 | return 1; |
| 270 | } |
| 271 | |
| 272 | return 0; |
| 273 | } |
| 274 | |