The C and C++ Include Header Files
/usr/include/c++/11/pstl/unseq_backend_simd.h
$ cat -n /usr/include/c++/11/pstl/unseq_backend_simd.h 1 // -*- C++ -*- 2 //===-- unseq_backend_simd.h ----------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef _PSTL_UNSEQ_BACKEND_SIMD_H 11 #define _PSTL_UNSEQ_BACKEND_SIMD_H 12 13 #include
14 15 #include "utils.h" 16 17 // This header defines the minimum set of vector routines required 18 // to support parallel STL. 19 namespace __pstl 20 { 21 namespace __unseq_backend 22 { 23 24 // Expect vector width up to 64 (or 512 bit) 25 const std::size_t __lane_size = 64; 26 27 template
28 _Iterator 29 __simd_walk_1(_Iterator __first, _DifferenceType __n, _Function __f) noexcept 30 { 31 _PSTL_PRAGMA_SIMD 32 for (_DifferenceType __i = 0; __i < __n; ++__i) 33 __f(__first[__i]); 34 35 return __first + __n; 36 } 37 38 template
39 _Iterator2 40 __simd_walk_2(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Function __f) noexcept 41 { 42 _PSTL_PRAGMA_SIMD 43 for (_DifferenceType __i = 0; __i < __n; ++__i) 44 __f(__first1[__i], __first2[__i]); 45 return __first2 + __n; 46 } 47 48 template
49 _Iterator3 50 __simd_walk_3(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Iterator3 __first3, 51 _Function __f) noexcept 52 { 53 _PSTL_PRAGMA_SIMD 54 for (_DifferenceType __i = 0; __i < __n; ++__i) 55 __f(__first1[__i], __first2[__i], __first3[__i]); 56 return __first3 + __n; 57 } 58 59 // TODO: check whether __simd_first() can be used here 60 template
61 bool 62 __simd_or(_Index __first, _DifferenceType __n, _Pred __pred) noexcept 63 { 64 #if _PSTL_EARLYEXIT_PRESENT 65 _DifferenceType __i; 66 _PSTL_PRAGMA_VECTOR_UNALIGNED 67 _PSTL_PRAGMA_SIMD_EARLYEXIT 68 for (__i = 0; __i < __n; ++__i) 69 if (__pred(__first[__i])) 70 break; 71 return __i < __n; 72 #else 73 _DifferenceType __block_size = 4 < __n ? 4 : __n; 74 const _Index __last = __first + __n; 75 while (__last != __first) 76 { 77 int32_t __flag = 1; 78 _PSTL_PRAGMA_SIMD_REDUCTION(& : __flag) 79 for (_DifferenceType __i = 0; __i < __block_size; ++__i) 80 if (__pred(*(__first + __i))) 81 __flag = 0; 82 if (!__flag) 83 return true; 84 85 __first += __block_size; 86 if (__last - __first >= __block_size << 1) 87 { 88 // Double the block _Size. Any unnecessary iterations can be amortized against work done so far. 89 __block_size <<= 1; 90 } 91 else 92 { 93 __block_size = __last - __first; 94 } 95 } 96 return false; 97 #endif 98 } 99 100 template
101 _Index 102 __simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept 103 { 104 #if _PSTL_EARLYEXIT_PRESENT 105 _DifferenceType __i = __begin; 106 _PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part 107 _PSTL_PRAGMA_SIMD_EARLYEXIT for (; __i < __end; ++__i) 108 { 109 if (__comp(__first, __i)) 110 { 111 break; 112 } 113 } 114 return __first + __i; 115 #else 116 // Experiments show good block sizes like this 117 const _DifferenceType __block_size = 8; 118 alignas(__lane_size) _DifferenceType __lane[__block_size] = {0}; 119 while (__end - __begin >= __block_size) 120 { 121 _DifferenceType __found = 0; 122 _PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part 123 _PSTL_PRAGMA_SIMD_REDUCTION(| 124 : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; 125 ++__i) 126 { 127 const _DifferenceType __t = __comp(__first, __i); 128 __lane[__i - __begin] = __t; 129 __found |= __t; 130 } 131 if (__found) 132 { 133 _DifferenceType __i; 134 // This will vectorize 135 for (__i = 0; __i < __block_size; ++__i) 136 { 137 if (__lane[__i]) 138 { 139 break; 140 } 141 } 142 return __first + __begin + __i; 143 } 144 __begin += __block_size; 145 } 146 147 //Keep remainder scalar 148 while (__begin != __end) 149 { 150 if (__comp(__first, __begin)) 151 { 152 return __first + __begin; 153 } 154 ++__begin; 155 } 156 return __first + __end; 157 #endif //_PSTL_EARLYEXIT_PRESENT 158 } 159 160 template
161 std::pair<_Index1, _Index2> 162 __simd_first(_Index1 __first1, _DifferenceType __n, _Index2 __first2, _Pred __pred) noexcept 163 { 164 #if _PSTL_EARLYEXIT_PRESENT 165 _DifferenceType __i = 0; 166 _PSTL_PRAGMA_VECTOR_UNALIGNED 167 _PSTL_PRAGMA_SIMD_EARLYEXIT 168 for (; __i < __n; ++__i) 169 if (__pred(__first1[__i], __first2[__i])) 170 break; 171 return std::make_pair(__first1 + __i, __first2 + __i); 172 #else 173 const _Index1 __last1 = __first1 + __n; 174 const _Index2 __last2 = __first2 + __n; 175 // Experiments show good block sizes like this 176 const _DifferenceType __block_size = 8; 177 alignas(__lane_size) _DifferenceType __lane[__block_size] = {0}; 178 while (__last1 - __first1 >= __block_size) 179 { 180 _DifferenceType __found = 0; 181 _DifferenceType __i; 182 _PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part 183 _PSTL_PRAGMA_SIMD_REDUCTION(| 184 : __found) for (__i = 0; __i < __block_size; ++__i) 185 { 186 const _DifferenceType __t = __pred(__first1[__i], __first2[__i]); 187 __lane[__i] = __t; 188 __found |= __t; 189 } 190 if (__found) 191 { 192 _DifferenceType __i2; 193 // This will vectorize 194 for (__i2 = 0; __i2 < __block_size; ++__i2) 195 { 196 if (__lane[__i2]) 197 break; 198 } 199 return std::make_pair(__first1 + __i2, __first2 + __i2); 200 } 201 __first1 += __block_size; 202 __first2 += __block_size; 203 } 204 205 //Keep remainder scalar 206 for (; __last1 != __first1; ++__first1, ++__first2) 207 if (__pred(*(__first1), *(__first2))) 208 return std::make_pair(__first1, __first2); 209 210 return std::make_pair(__last1, __last2); 211 #endif //_PSTL_EARLYEXIT_PRESENT 212 } 213 214 template
215 _DifferenceType 216 __simd_count(_Index __index, _DifferenceType __n, _Pred __pred) noexcept 217 { 218 _DifferenceType __count = 0; 219 _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count) 220 for (_DifferenceType __i = 0; __i < __n; ++__i) 221 if (__pred(*(__index + __i))) 222 ++__count; 223 224 return __count; 225 } 226 227 template
228 _OutputIterator 229 __simd_unique_copy(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, 230 _BinaryPredicate __pred) noexcept 231 { 232 if (__n == 0) 233 return __result; 234 235 _DifferenceType __cnt = 1; 236 __result[0] = __first[0]; 237 238 _PSTL_PRAGMA_SIMD 239 for (_DifferenceType __i = 1; __i < __n; ++__i) 240 { 241 _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt : 1) 242 if (!__pred(__first[__i], __first[__i - 1])) 243 { 244 __result[__cnt] = __first[__i]; 245 ++__cnt; 246 } 247 } 248 return __result + __cnt; 249 } 250 251 template
252 _OutputIterator 253 __simd_assign(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, _Assigner __assigner) noexcept 254 { 255 _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED 256 _PSTL_PRAGMA_SIMD 257 for (_DifferenceType __i = 0; __i < __n; ++__i) 258 __assigner(__first + __i, __result + __i); 259 return __result + __n; 260 } 261 262 template
263 _OutputIterator 264 __simd_copy_if(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, _UnaryPredicate __pred) noexcept 265 { 266 _DifferenceType __cnt = 0; 267 268 _PSTL_PRAGMA_SIMD 269 for (_DifferenceType __i = 0; __i < __n; ++__i) 270 { 271 _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt : 1) 272 if (__pred(__first[__i])) 273 { 274 __result[__cnt] = __first[__i]; 275 ++__cnt; 276 } 277 } 278 return __result + __cnt; 279 } 280 281 template
282 _DifferenceType 283 __simd_calc_mask_2(_InputIterator __first, _DifferenceType __n, bool* __mask, _BinaryPredicate __pred) noexcept 284 { 285 _DifferenceType __count = 0; 286 287 _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count) 288 for (_DifferenceType __i = 0; __i < __n; ++__i) 289 { 290 __mask[__i] = !__pred(__first[__i], __first[__i - 1]); 291 __count += __mask[__i]; 292 } 293 return __count; 294 } 295 296 template
297 _DifferenceType 298 __simd_calc_mask_1(_InputIterator __first, _DifferenceType __n, bool* __mask, _UnaryPredicate __pred) noexcept 299 { 300 _DifferenceType __count = 0; 301 302 _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count) 303 for (_DifferenceType __i = 0; __i < __n; ++__i) 304 { 305 __mask[__i] = __pred(__first[__i]); 306 __count += __mask[__i]; 307 } 308 return __count; 309 } 310 311 template
312 void 313 __simd_copy_by_mask(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, bool* __mask, 314 _Assigner __assigner) noexcept 315 { 316 _DifferenceType __cnt = 0; 317 _PSTL_PRAGMA_SIMD 318 for (_DifferenceType __i = 0; __i < __n; ++__i) 319 { 320 if (__mask[__i]) 321 { 322 _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt : 1) 323 { 324 __assigner(__first + __i, __result + __cnt); 325 ++__cnt; 326 } 327 } 328 } 329 } 330 331 template
332 void 333 __simd_partition_by_mask(_InputIterator __first, _DifferenceType __n, _OutputIterator1 __out_true, 334 _OutputIterator2 __out_false, bool* __mask) noexcept 335 { 336 _DifferenceType __cnt_true = 0, __cnt_false = 0; 337 _PSTL_PRAGMA_SIMD 338 for (_DifferenceType __i = 0; __i < __n; ++__i) 339 { 340 _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(__cnt_true : 1, __cnt_false : 1) 341 if (__mask[__i]) 342 { 343 __out_true[__cnt_true] = __first[__i]; 344 ++__cnt_true; 345 } 346 else 347 { 348 __out_false[__cnt_false] = __first[__i]; 349 ++__cnt_false; 350 } 351 } 352 } 353 354 template
355 _Index 356 __simd_fill_n(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept 357 { 358 _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED 359 _PSTL_PRAGMA_SIMD 360 for (_DifferenceType __i = 0; __i < __n; ++__i) 361 __first[__i] = __value; 362 return __first + __n; 363 } 364 365 template
366 _Index 367 __simd_generate_n(_Index __first, _DifferenceType __size, _Generator __g) noexcept 368 { 369 _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED 370 _PSTL_PRAGMA_SIMD 371 for (_DifferenceType __i = 0; __i < __size; ++__i) 372 __first[__i] = __g(); 373 return __first + __size; 374 } 375 376 template
377 _Index 378 __simd_adjacent_find(_Index __first, _Index __last, _BinaryPredicate __pred, bool __or_semantic) noexcept 379 { 380 if (__last - __first < 2) 381 return __last; 382 383 typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType; 384 _DifferenceType __i = 0; 385 386 #if _PSTL_EARLYEXIT_PRESENT 387 //Some compiler versions fail to compile the following loop when iterators are used. Indices are used instead 388 const _DifferenceType __n = __last - __first - 1; 389 _PSTL_PRAGMA_VECTOR_UNALIGNED 390 _PSTL_PRAGMA_SIMD_EARLYEXIT 391 for (; __i < __n; ++__i) 392 if (__pred(__first[__i], __first[__i + 1])) 393 break; 394 395 return __i < __n ? __first + __i : __last; 396 #else 397 // Experiments show good block sizes like this 398 //TODO: to consider tuning block_size for various data types 399 const _DifferenceType __block_size = 8; 400 alignas(__lane_size) _DifferenceType __lane[__block_size] = {0}; 401 while (__last - __first >= __block_size) 402 { 403 _DifferenceType __found = 0; 404 _PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part 405 _PSTL_PRAGMA_SIMD_REDUCTION(| 406 : __found) for (__i = 0; __i < __block_size - 1; ++__i) 407 { 408 //TODO: to improve SIMD vectorization 409 const _DifferenceType __t = __pred(*(__first + __i), *(__first + __i + 1)); 410 __lane[__i] = __t; 411 __found |= __t; 412 } 413 414 //Process a pair of elements on a boundary of a data block 415 if (__first + __block_size < __last && __pred(*(__first + __i), *(__first + __i + 1))) 416 __lane[__i] = __found = 1; 417 418 if (__found) 419 { 420 if (__or_semantic) 421 return __first; 422 423 // This will vectorize 424 for (__i = 0; __i < __block_size; ++__i) 425 if (__lane[__i]) 426 break; 427 return __first + __i; //As far as found is true a __result (__lane[__i] is true) is guaranteed 428 } 429 __first += __block_size; 430 } 431 //Process the rest elements 432 for (; __last - __first > 1; ++__first) 433 if (__pred(*__first, *(__first + 1))) 434 return __first; 435 436 return __last; 437 #endif 438 } 439 440 // It was created to reduce the code inside std::enable_if 441 template
442 using is_arithmetic_plus = std::integral_constant
::value && 443 std::is_same<_BinaryOperation, std::plus<_Tp>>::value>; 444 445 template
446 typename std::enable_if
::value, _Tp>::type 447 __simd_transform_reduce(_DifferenceType __n, _Tp __init, _BinaryOperation, _UnaryOperation __f) noexcept 448 { 449 _PSTL_PRAGMA_SIMD_REDUCTION(+ : __init) 450 for (_DifferenceType __i = 0; __i < __n; ++__i) 451 __init += __f(__i); 452 return __init; 453 } 454 455 template
456 typename std::enable_if::value, _Tp>::type 457 __simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept 458 { 459 const _Size __block_size = __lane_size / sizeof(_Tp); 460 if (__n > 2 * __block_size && __block_size > 1) 461 { 462 alignas(__lane_size) char __lane_[__lane_size]; 463 _Tp* __lane = reinterpret_cast<_Tp*>(__lane_); 464 465 // initializer 466 _PSTL_PRAGMA_SIMD 467 for (_Size __i = 0; __i < __block_size; ++__i) 468 { 469 ::new (__lane + __i) _Tp(__binary_op(__f(__i), __f(__block_size + __i))); 470 } 471 // main loop 472 _Size __i = 2 * __block_size; 473 const _Size last_iteration = __block_size * (__n / __block_size); 474 for (; __i < last_iteration; __i += __block_size) 475 { 476 _PSTL_PRAGMA_SIMD 477 for (_Size __j = 0; __j < __block_size; ++__j) 478 { 479 __lane[__j] = __binary_op(__lane[__j], __f(__i + __j)); 480 } 481 } 482 // remainder 483 _PSTL_PRAGMA_SIMD 484 for (_Size __j = 0; __j < __n - last_iteration; ++__j) 485 { 486 __lane[__j] = __binary_op(__lane[__j], __f(last_iteration + __j)); 487 } 488 // combiner 489 for (_Size __j = 0; __j < __block_size; ++__j) 490 { 491 __init = __binary_op(__init, __lane[__j]); 492 } 493 // destroyer 494 _PSTL_PRAGMA_SIMD 495 for (_Size __j = 0; __j < __block_size; ++__j) 496 { 497 __lane[__j].~_Tp(); 498 } 499 } 500 else 501 { 502 for (_Size __i = 0; __i < __n; ++__i) 503 { 504 __init = __binary_op(__init, __f(__i)); 505 } 506 } 507 return __init; 508 } 509 510 // Exclusive scan for "+" and arithmetic types 511 template
513 typename std::enable_if
::value, std::pair<_OutputIterator, _Tp>>::type 514 __simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, 515 _BinaryOperation, /*Inclusive*/ std::false_type) 516 { 517 _PSTL_PRAGMA_SIMD_SCAN(+ : __init) 518 for (_Size __i = 0; __i < __n; ++__i) 519 { 520 __result[__i] = __init; 521 _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(__init) 522 __init += __unary_op(__first[__i]); 523 } 524 return std::make_pair(__result + __n, __init); 525 } 526 527 // As soon as we cannot call __binary_op in "combiner" we create a wrapper over _Tp to encapsulate __binary_op 528 template
529 struct _Combiner 530 { 531 _Tp __value; 532 _BinaryOp* __bin_op; // Here is a pointer to function because of default ctor 533 534 _Combiner() : __value{}, __bin_op(nullptr) {} 535 _Combiner(const _Tp& value, const _BinaryOp* bin_op) : __value(value), __bin_op(const_cast<_BinaryOp*>(bin_op)) {} 536 _Combiner(const _Combiner& __obj) : __value{}, __bin_op(__obj.__bin_op) {} 537 538 void 539 operator()(const _Combiner& __obj) 540 { 541 __value = (*__bin_op)(__value, __obj.__value); 542 } 543 }; 544 545 // Exclusive scan for other binary operations and types 546 template
548 typename std::enable_if::value, std::pair<_OutputIterator, _Tp>>::type 549 __simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, 550 _BinaryOperation __binary_op, /*Inclusive*/ std::false_type) 551 { 552 typedef _Combiner<_Tp, _BinaryOperation> _CombinerType; 553 _CombinerType __init_{__init, &__binary_op}; 554 555 _PSTL_PRAGMA_DECLARE_REDUCTION(__bin_op, _CombinerType) 556 557 _PSTL_PRAGMA_SIMD_SCAN(__bin_op : __init_) 558 for (_Size __i = 0; __i < __n; ++__i) 559 { 560 __result[__i] = __init_.__value; 561 _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(__init_) 562 _PSTL_PRAGMA_FORCEINLINE 563 __init_.__value = __binary_op(__init_.__value, __unary_op(__first[__i])); 564 } 565 return std::make_pair(__result + __n, __init_.__value); 566 } 567 568 // Inclusive scan for "+" and arithmetic types 569 template
571 typename std::enable_if
::value, std::pair<_OutputIterator, _Tp>>::type 572 __simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, 573 _BinaryOperation, /*Inclusive*/ std::true_type) 574 { 575 _PSTL_PRAGMA_SIMD_SCAN(+ : __init) 576 for (_Size __i = 0; __i < __n; ++__i) 577 { 578 __init += __unary_op(__first[__i]); 579 _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(__init) 580 __result[__i] = __init; 581 } 582 return std::make_pair(__result + __n, __init); 583 } 584 585 // Inclusive scan for other binary operations and types 586 template
588 typename std::enable_if::value, std::pair<_OutputIterator, _Tp>>::type 589 __simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, 590 _BinaryOperation __binary_op, std::true_type) 591 { 592 typedef _Combiner<_Tp, _BinaryOperation> _CombinerType; 593 _CombinerType __init_{__init, &__binary_op}; 594 595 _PSTL_PRAGMA_DECLARE_REDUCTION(__bin_op, _CombinerType) 596 597 _PSTL_PRAGMA_SIMD_SCAN(__bin_op : __init_) 598 for (_Size __i = 0; __i < __n; ++__i) 599 { 600 _PSTL_PRAGMA_FORCEINLINE 601 __init_.__value = __binary_op(__init_.__value, __unary_op(__first[__i])); 602 _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(__init_) 603 __result[__i] = __init_.__value; 604 } 605 return std::make_pair(__result + __n, __init_.__value); 606 } 607 608 // [restriction] - std::iterator_traits<_ForwardIterator>::value_type should be DefaultConstructible. 609 // complexity [violation] - We will have at most (__n-1 + number_of_lanes) comparisons instead of at most __n-1. 610 template
611 _ForwardIterator 612 __simd_min_element(_ForwardIterator __first, _Size __n, _Compare __comp) noexcept 613 { 614 if (__n == 0) 615 { 616 return __first; 617 } 618 619 typedef typename std::iterator_traits<_ForwardIterator>::value_type _ValueType; 620 struct _ComplexType 621 { 622 _ValueType __min_val; 623 _Size __min_ind; 624 _Compare* __min_comp; 625 626 _ComplexType() : __min_val{}, __min_ind{}, __min_comp(nullptr) {} 627 _ComplexType(const _ValueType& val, const _Compare* comp) 628 : __min_val(val), __min_ind(0), __min_comp(const_cast<_Compare*>(comp)) 629 { 630 } 631 _ComplexType(const _ComplexType& __obj) 632 : __min_val(__obj.__min_val), __min_ind(__obj.__min_ind), __min_comp(__obj.__min_comp) 633 { 634 } 635 636 _PSTL_PRAGMA_DECLARE_SIMD 637 void 638 operator()(const _ComplexType& __obj) 639 { 640 if (!(*__min_comp)(__min_val, __obj.__min_val) && 641 ((*__min_comp)(__obj.__min_val, __min_val) || __obj.__min_ind - __min_ind < 0)) 642 { 643 __min_val = __obj.__min_val; 644 __min_ind = __obj.__min_ind; 645 } 646 } 647 }; 648 649 _ComplexType __init{*__first, &__comp}; 650 651 _PSTL_PRAGMA_DECLARE_REDUCTION(__min_func, _ComplexType) 652 653 _PSTL_PRAGMA_SIMD_REDUCTION(__min_func : __init) 654 for (_Size __i = 1; __i < __n; ++__i) 655 { 656 const _ValueType __min_val = __init.__min_val; 657 const _ValueType __current = __first[__i]; 658 if (__comp(__current, __min_val)) 659 { 660 __init.__min_val = __current; 661 __init.__min_ind = __i; 662 } 663 } 664 return __first + __init.__min_ind; 665 } 666 667 // [restriction] - std::iterator_traits<_ForwardIterator>::value_type should be DefaultConstructible. 668 // complexity [violation] - We will have at most (2*(__n-1) + 4*number_of_lanes) comparisons instead of at most [1.5*(__n-1)]. 669 template
670 std::pair<_ForwardIterator, _ForwardIterator> 671 __simd_minmax_element(_ForwardIterator __first, _Size __n, _Compare __comp) noexcept 672 { 673 if (__n == 0) 674 { 675 return std::make_pair(__first, __first); 676 } 677 typedef typename std::iterator_traits<_ForwardIterator>::value_type _ValueType; 678 679 struct _ComplexType 680 { 681 _ValueType __min_val; 682 _ValueType __max_val; 683 _Size __min_ind; 684 _Size __max_ind; 685 _Compare* __minmax_comp; 686 687 _ComplexType() : __min_val{}, __max_val{}, __min_ind{}, __max_ind{}, __minmax_comp(nullptr) {} 688 _ComplexType(const _ValueType& min_val, const _ValueType& max_val, const _Compare* comp) 689 : __min_val(min_val), __max_val(max_val), __min_ind(0), __max_ind(0), 690 __minmax_comp(const_cast<_Compare*>(comp)) 691 { 692 } 693 _ComplexType(const _ComplexType& __obj) 694 : __min_val(__obj.__min_val), __max_val(__obj.__max_val), __min_ind(__obj.__min_ind), 695 __max_ind(__obj.__max_ind), __minmax_comp(__obj.__minmax_comp) 696 { 697 } 698 699 void 700 operator()(const _ComplexType& __obj) 701 { 702 // min 703 if ((*__minmax_comp)(__obj.__min_val, __min_val)) 704 { 705 __min_val = __obj.__min_val; 706 __min_ind = __obj.__min_ind; 707 } 708 else if (!(*__minmax_comp)(__min_val, __obj.__min_val)) 709 { 710 __min_val = __obj.__min_val; 711 __min_ind = (__min_ind - __obj.__min_ind < 0) ? __min_ind : __obj.__min_ind; 712 } 713 714 // max 715 if ((*__minmax_comp)(__max_val, __obj.__max_val)) 716 { 717 __max_val = __obj.__max_val; 718 __max_ind = __obj.__max_ind; 719 } 720 else if (!(*__minmax_comp)(__obj.__max_val, __max_val)) 721 { 722 __max_val = __obj.__max_val; 723 __max_ind = (__max_ind - __obj.__max_ind < 0) ? __obj.__max_ind : __max_ind; 724 } 725 } 726 }; 727 728 _ComplexType __init{*__first, *__first, &__comp}; 729 730 _PSTL_PRAGMA_DECLARE_REDUCTION(__min_func, _ComplexType); 731 732 _PSTL_PRAGMA_SIMD_REDUCTION(__min_func : __init) 733 for (_Size __i = 1; __i < __n; ++__i) 734 { 735 auto __min_val = __init.__min_val; 736 auto __max_val = __init.__max_val; 737 auto __current = __first + __i; 738 if (__comp(*__current, __min_val)) 739 { 740 __init.__min_val = *__current; 741 __init.__min_ind = __i; 742 } 743 else if (!__comp(*__current, __max_val)) 744 { 745 __init.__max_val = *__current; 746 __init.__max_ind = __i; 747 } 748 } 749 return std::make_pair(__first + __init.__min_ind, __first + __init.__max_ind); 750 } 751 752 template
754 std::pair<_OutputIterator1, _OutputIterator2> 755 __simd_partition_copy(_InputIterator __first, _DifferenceType __n, _OutputIterator1 __out_true, 756 _OutputIterator2 __out_false, _UnaryPredicate __pred) noexcept 757 { 758 _DifferenceType __cnt_true = 0, __cnt_false = 0; 759 760 _PSTL_PRAGMA_SIMD 761 for (_DifferenceType __i = 0; __i < __n; ++__i) 762 { 763 _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(__cnt_true : 1, __cnt_false : 1) 764 if (__pred(__first[__i])) 765 { 766 __out_true[__cnt_true] = __first[__i]; 767 ++__cnt_true; 768 } 769 else 770 { 771 __out_false[__cnt_false] = __first[__i]; 772 ++__cnt_false; 773 } 774 } 775 return std::make_pair(__out_true + __cnt_true, __out_false + __cnt_false); 776 } 777 778 template
779 _ForwardIterator1 780 __simd_find_first_of(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, 781 _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept 782 { 783 typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferencType; 784 785 const _DifferencType __n1 = __last - __first; 786 const _DifferencType __n2 = __s_last - __s_first; 787 if (__n1 == 0 || __n2 == 0) 788 { 789 return __last; // according to the standard 790 } 791 792 // Common case 793 // If first sequence larger than second then we'll run simd_first with parameters of first sequence. 794 // Otherwise, vice versa. 795 if (__n1 < __n2) 796 { 797 for (; __first != __last; ++__first) 798 { 799 if (__unseq_backend::__simd_or( 800 __s_first, __n2, 801 __internal::__equal_value_by_pred
(*__first, __pred))) 802 { 803 return __first; 804 } 805 } 806 } 807 else 808 { 809 for (; __s_first != __s_last; ++__s_first) 810 { 811 const auto __result = __unseq_backend::__simd_first( 812 __first, _DifferencType(0), __n1, [__s_first, &__pred](_ForwardIterator1 __it, _DifferencType __i) { 813 return __pred(__it[__i], *__s_first); 814 }); 815 if (__result != __last) 816 { 817 return __result; 818 } 819 } 820 } 821 return __last; 822 } 823 824 template
825 _RandomAccessIterator 826 __simd_remove_if(_RandomAccessIterator __first, _DifferenceType __n, _UnaryPredicate __pred) noexcept 827 { 828 // find first element we need to remove 829 auto __current = __unseq_backend::__simd_first( 830 __first, _DifferenceType(0), __n, 831 [&__pred](_RandomAccessIterator __it, _DifferenceType __i) { return __pred(__it[__i]); }); 832 __n -= __current - __first; 833 834 // if we have in sequence only one element that pred(__current[1]) != false we can exit the function 835 if (__n < 2) 836 { 837 return __current; 838 } 839 840 _DifferenceType __cnt = 0; 841 _PSTL_PRAGMA_SIMD 842 for (_DifferenceType __i = 1; __i < __n; ++__i) 843 { 844 _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt : 1) 845 if (!__pred(__current[__i])) 846 { 847 __current[__cnt] = std::move(__current[__i]); 848 ++__cnt; 849 } 850 } 851 return __current + __cnt; 852 } 853 } // namespace __unseq_backend 854 } // namespace __pstl 855 856 #endif /* _PSTL_UNSEQ_BACKEND_SIMD_H */
Contact us
|
About us
|
Term of use
|
Copyright © 2000-2024 MyWebUniversity.com ™