| /* |
| |
| Copyright (c) 2017, NVIDIA Corporation |
| All rights reserved. |
| |
| Redistribution and use in source and binary forms, with or without modification, |
| are permitted provided that the following conditions are met: |
| |
| 1. Redistributions of source code must retain the above copyright notice, this |
| list of conditions and the following disclaimer. |
| |
| 2. Redistributions in binary form must reproduce the above copyright notice, |
| this list of conditions and the following disclaimer in the documentation |
| and/or other materials provided with the distribution. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, |
| INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE |
| OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| */ |
| |
| #ifndef __semaphore__ |
| #define __semaphore__ |
| |
| #include "details/config.hpp" |
| |
| #ifdef __semaphore_cuda |
| namespace cuda |
| { |
| #else |
| namespace std |
| { |
| #endif |
| namespace experimental |
| { |
| inline namespace v1 |
| { |
| |
| // 32.10, semaphore type and operations |
| enum semaphore_notify |
| { |
| semaphore_notify_all, |
| semaphore_notify_one, |
| semaphore_notify_none |
| }; |
| struct binary_semaphore; |
| struct counting_semaphore; |
| struct synchronic; |
| |
| // 32.10.2, initialization |
| #define ATOMIC_SEM_INIT 0 //see below |
| |
| // 32.10.3, atomic free functions for waiting |
| template <class T> |
| __semaphore_abi void atomic_signal_explicit(const atomic<T> *, semaphore_notify); |
| template <class T> |
| __semaphore_abi void atomic_signal(const atomic<T> *); |
| template <class T, class V> |
| __semaphore_abi void atomic_wait_explicit(const atomic<T> *, V, std::memory_order); |
| template <class T, class V> |
| __semaphore_abi void atomic_wait(const atomic<T> *, V); |
| template <class T, class V> |
| __semaphore_abi bool atomic_try_wait_explicit(const atomic<T> *, V, std::memory_order); |
| template <class T, class V> |
| __semaphore_abi bool atomic_try_wait(const atomic<T> *,V); |
| |
| struct binary_semaphore |
| { |
| using count_type = uint32_t; |
| |
| static constexpr count_type max = 1; |
| |
| __semaphore_abi inline void release() noexcept |
| { |
| if(__stolen) |
| __stolen = false; |
| else |
| #ifdef __semaphore_cuda |
| __tocket.fetch_add(1, std::memory_order_relaxed); |
| #else |
| __tocket.store(__tocket.load(std::memory_order_relaxed)+1, std::memory_order_relaxed); |
| #endif |
| #ifdef __semaphore_fast_path |
| count_type old = __valubit; |
| if (__semaphore_expect(!__atom.compare_exchange_strong(old, 0, std::memory_order_acquire, std::memory_order_relaxed), 0)) |
| __release_slow(old); |
| #else |
| __atom.fetch_and(~__valubit, std::memory_order_acquire); |
| #endif |
| } |
| __semaphore_abi inline bool try_acquire() noexcept |
| { |
| count_type old = 0; |
| for (int i = 0;i < 64; ++i) { |
| if(__semaphore_expect(__atom.compare_exchange_strong(old = 0, __valubit, std::memory_order_acquire, std::memory_order_relaxed),1)) |
| return __stolen = true; |
| for(; old != 0 && i < 64; ++i) { |
| details::__semaphore_yield(); |
| old = __atom.load(std::memory_order_relaxed); |
| } |
| } |
| return false; |
| } |
| __semaphore_abi inline void acquire() noexcept |
| { |
| if (__semaphore_expect(try_acquire(), 1)) |
| return; |
| __acquire_slow(); |
| } |
| template <class Clock, class Duration> |
| __semaphore_abi bool try_acquire_until(const std::chrono::time_point<Clock, Duration>& abs_time) |
| { |
| if (__semaphore_expect(try_acquire(), 1)) |
| return true; |
| return __acquire_slow_timed(abs_time); |
| } |
| template <class Rep, class Period> |
| __semaphore_abi bool try_acquire_for(const std::chrono::duration<Rep, Period>& rel_time) |
| { |
| if (__semaphore_expect(try_acquire(), 1)) |
| return true; |
| return __acquire_slow_timed(details::__semaphore_clock::now() + rel_time); |
| } |
| |
| __semaphore_abi constexpr binary_semaphore(count_type desired) noexcept : __atom(desired ? 0 : 1), __ticket(0), __tocket(0), __stolen(false) |
| { |
| } |
| |
| binary_semaphore(const binary_semaphore&) = delete; |
| binary_semaphore &operator=(const binary_semaphore&) = delete; |
| |
| static constexpr count_type __valubit = 1; |
| static constexpr count_type __lockbit = 2; |
| static constexpr count_type __slowbit = 4; |
| static constexpr count_type __contbit = 8; |
| |
| #ifdef __semaphore_fast_path |
| __semaphore_abi void __release_slow(count_type old) noexcept; |
| #endif |
| |
| __semaphore_abi void __acquire_slow() noexcept; |
| |
| bool __acquire_slow_timed(std::chrono::time_point<details::__semaphore_clock, details::__semaphore_duration> const &) noexcept; |
| |
| atomic<count_type> __atom; |
| atomic<count_type> __ticket; |
| atomic<count_type> __tocket; |
| bool __stolen; |
| }; |
| |
| #ifndef __semaphore_sem |
| |
| struct counting_semaphore |
| { |
| using count_type = int32_t; |
| |
| private: |
| #ifdef __semaphore_fast_path |
| static constexpr count_type __valumask = ~3, |
| __contmask = 2, |
| __lockmask = 1, |
| __shift = 2; |
| #else |
| static constexpr count_type __valumask = ~0, |
| __contmask = 0, |
| __lockmask = 0, |
| __shift = 0; |
| #endif |
| public: |
| static constexpr count_type max = (__valumask >> (__shift ? __shift : 1)); |
| |
| __semaphore_abi inline void release(std::memory_order order = std::memory_order_seq_cst) noexcept |
| { |
| release(1, semaphore_notify_all, order); |
| } |
| __semaphore_abi inline void release(count_type term, std::memory_order order = std::memory_order_seq_cst) noexcept |
| { |
| release(term, semaphore_notify_all, order); |
| } |
| __semaphore_abi inline void release(count_type term, semaphore_notify notify, std::memory_order order = std::memory_order_seq_cst) noexcept |
| { |
| |
| #ifdef __semaphore_fast_path |
| count_type old = 0, set = term << __shift; |
| bool success = atom.compare_exchange_weak(old, set, order, std::memory_order_relaxed); |
| while (__semaphore_expect(!success && !(old & (__contmask | __lockmask)), 0)) |
| { |
| set = old + (term << __shift); |
| success = atom.compare_exchange_weak(old, set, order, std::memory_order_relaxed); |
| } |
| if (__semaphore_expect(!success, 0)) |
| __fetch_add_slow(term, old, order, notify); |
| #else |
| count_type old = atom.fetch_add(term, order); |
| #endif |
| } |
| |
| __semaphore_abi inline void acquire(std::memory_order order = std::memory_order_seq_cst) noexcept |
| { |
| |
| while (__semaphore_expect(!__fetch_sub_if(order), 0)) |
| { |
| bool const success = __acquire_fast(order); |
| if (__semaphore_expect(!success, 0)) |
| __acquire_slow(order); |
| } |
| } |
| __semaphore_abi inline bool try_acquire(std::memory_order order = std::memory_order_seq_cst) noexcept |
| { |
| |
| return __fetch_sub_if(order); |
| } |
| template <class Clock, class Duration> |
| __semaphore_abi bool try_acquire_until(std::chrono::time_point<Clock, Duration> const &abs_time, std::memory_order order = std::memory_order_seq_cst) |
| { |
| |
| while (__semaphore_expect(!__fetch_sub_if(order), 0)) |
| { |
| bool success = __acquire_fast(order); |
| if (__semaphore_expect(!success, 0)) |
| success = __acquire_slow_timed(abs_time, order); |
| if (__semaphore_expect(!success, 0)) |
| return false; |
| } |
| return true; |
| } |
| template <class Rep, class Period> |
| __semaphore_abi bool try_acquire_for(std::chrono::duration<Rep, Period> const &rel_time, std::memory_order order = std::memory_order_seq_cst) |
| { |
| |
| if (__semaphore_expect(__fetch_sub_if(order), 1)) |
| return true; |
| else |
| return try_acquire_until(details::__semaphore_clock::now() + std::chrono::duration_cast<std::chrono::nanoseconds>(rel_time), order); |
| } |
| |
| __semaphore_abi constexpr counting_semaphore(count_type desired) noexcept : atom(desired << __shift) |
| { |
| } |
| counting_semaphore(const counting_semaphore &) = delete; |
| counting_semaphore &operator=(const counting_semaphore &) = delete; |
| |
| private: |
| __semaphore_abi bool __fetch_sub_if_slow(count_type old, std::memory_order order) noexcept; |
| |
| __semaphore_abi bool __fetch_sub_if(std::memory_order order) noexcept |
| { |
| |
| count_type old = 1 << __shift, set = 0; |
| bool retcode = atom.compare_exchange_weak(old, set, order, std::memory_order_relaxed); |
| if (__semaphore_expect(!retcode && (old >> __shift) >= 1, 0)) |
| { |
| old &= __valumask; |
| set = old - (1 << __shift); |
| retcode = atom.compare_exchange_weak(old, set, order, std::memory_order_relaxed); |
| } |
| if (__semaphore_expect(!retcode && (old >> __shift) >= 1, 0)) |
| retcode = __fetch_sub_if_slow(old, order); |
| return retcode; |
| } |
| |
| #ifdef __semaphore_fast_path |
| __semaphore_abi void __fetch_add_slow(count_type term, count_type old, std::memory_order order, semaphore_notify notify) noexcept; |
| #endif |
| |
| __semaphore_abi bool __acquire_slow_timed(std::chrono::time_point<details::__semaphore_clock, details::__semaphore_duration> const &abs_time, std::memory_order order) noexcept; |
| |
| __semaphore_abi void __acquire_slow(std::memory_order order) noexcept; |
| |
| __semaphore_abi inline bool __acquire_fast(std::memory_order order) noexcept |
| { |
| |
| auto value = (atom.load(order) >> __shift); |
| if (__semaphore_expect(value >= 1, 1)) |
| return true; |
| for (int i = 0; i < 32; ++i) |
| { |
| details::__semaphore_yield(); |
| value = (atom.load(order) >> __shift); |
| if (__semaphore_expect(value >= 1, 1)) |
| return true; |
| } |
| return false; |
| } |
| |
| atomic<count_type> atom; |
| |
| friend struct synchronic; |
| }; |
| |
| #else |
| |
| struct counting_semaphore |
| { |
| using count_type = int32_t; |
| |
| static constexpr count_type max = (numeric_limits<count_type>::max)() >> 1; |
| |
| __semaphore_abi inline void release(std::memory_order order = std::memory_order_seq_cst) noexcept |
| { |
| release(1, semaphore_notify_all, order); |
| } |
| __semaphore_abi inline void release(count_type term, std::memory_order order = std::memory_order_seq_cst) noexcept |
| { |
| release(term, semaphore_notify_all, order); |
| } |
| __semaphore_abi inline void release(count_type term, semaphore_notify, std::memory_order order = std::memory_order_seq_cst) noexcept |
| { |
| count_type old = __frontbuffer.load(std::memory_order_relaxed); |
| while (1) |
| { |
| old &= ~1; |
| if (__frontbuffer.compare_exchange_weak(old, old + (term << 1) + 1, order, std::memory_order_relaxed)) |
| break; |
| } |
| if (old >> 1 < 0) |
| { // was it depleted? |
| count_type inc = (min)(-(old >> 1), term); |
| #ifdef __semaphore_back_buffered |
| __backbuffer.fetch_add(inc - 1); |
| inc = 1; |
| #endif |
| count_type const result = details::__semaphore_sem_post(__semaphore, inc); |
| #ifdef WIN32 |
| if (!result) |
| { |
| auto d = GetLastError(); |
| assert(d == ERROR_SUCCESS); |
| } |
| #endif |
| assert(result); |
| } |
| __frontbuffer.fetch_sub(1); |
| } |
| |
| inline void acquire(std::memory_order order = std::memory_order_seq_cst) noexcept |
| { |
| __acquire_fast(); |
| __acquire_slow(order); |
| } |
| |
| template <class Rep, class Period> |
| bool try_acquire_for(std::chrono::duration<Rep, Period> const &rel_time, std::memory_order order = std::memory_order_seq_cst) |
| { |
| |
| __acquire_fast(); |
| if (__frontbuffer.fetch_sub(2, order) >> 1 > 0) |
| return true; |
| auto const result = details::__semaphore_sem_wait_timed(__semaphore, rel_time); |
| if (result) |
| __backfill(); |
| return result; |
| } |
| template <class Clock, class Duration> |
| bool try_acquire_until(std::chrono::time_point<Clock, Duration> const &abs_time, std::memory_order order = std::memory_order_seq_cst) |
| { |
| |
| return try_acquire_for(abs_time - Clock::now(), order); |
| } |
| inline bool try_acquire(std::memory_order order = std::memory_order_seq_cst) noexcept |
| { |
| |
| __acquire_fast(); |
| if (__frontbuffer.fetch_sub(2, order) >> 1 > 0) |
| return true; |
| return try_acquire_for(std::chrono::nanoseconds(0), order); |
| } |
| counting_semaphore(count_type desired) noexcept |
| : __frontbuffer { desired << 1 } |
| #ifdef __semaphore_back_buffered |
| , __backbuffer{ 0 } |
| #endif |
| { |
| auto const result = details::__semaphore_sem_init(__semaphore, desired); |
| #ifdef WIN32 |
| if (!result) |
| { |
| auto d = GetLastError(); |
| assert(d == ERROR_SUCCESS); |
| } |
| #endif |
| assert(result); |
| } |
| ~counting_semaphore() |
| { |
| |
| while (__frontbuffer.load(std::memory_order_acquire) & 1) |
| ; |
| auto const result = details::__semaphore_sem_destroy(__semaphore); |
| assert(result); |
| } |
| counting_semaphore(const counting_semaphore &) = delete; |
| counting_semaphore &operator=(const counting_semaphore &) = delete; |
| |
| private: |
| inline void __acquire_fast() noexcept |
| { |
| |
| if (__semaphore_expect(__frontbuffer.load(std::memory_order_relaxed) > 1, 1)) |
| return; |
| for (int i = 0; i < 32; ++i) |
| { |
| details::__semaphore_yield(); |
| if (__semaphore_expect(__frontbuffer.load(std::memory_order_relaxed) > 1, 1)) |
| return; |
| } |
| } |
| |
| inline void __acquire_slow(std::memory_order order) noexcept { |
| if (__frontbuffer.fetch_sub(2, order) >> 1 > 0) |
| return; |
| count_type const result = details::__semaphore_sem_wait(__semaphore); |
| #ifdef WIN32 |
| if (!result) |
| { |
| auto d = GetLastError(); |
| assert(d == ERROR_SUCCESS); |
| } |
| #endif |
| assert(result); |
| __backfill(); |
| } |
| |
| inline void __backfill() |
| { |
| #ifdef __semaphore_back_buffered |
| if (__semaphore_expect(__backbuffer.load(std::memory_order_relaxed) == 0, 1)) |
| return; |
| if (__semaphore_expect(__backbuffer.fetch_sub(1, std::memory_order_relaxed) == 0, 0)) |
| { |
| __backbuffer.fetch_add(1, std::memory_order_relaxed); // put back |
| return; |
| } |
| auto const result = details::__semaphore_sem_post(__semaphore, 1); |
| assert(result); |
| #endif |
| } |
| |
| details::__semaphore_sem_t __semaphore; |
| atomic<count_type> __frontbuffer; |
| #ifdef __semaphore_back_buffered |
| atomic<count_type> __backbuffer; |
| #endif |
| |
| friend struct synchronic; |
| }; |
| |
| #endif |
| |
| #ifndef __semaphore_cuda |
| #define __semaphore_abi_a __semaphore_abi |
| #else |
| #define __semaphore_abi_a __semaphore_abi static |
| #endif |
| |
| struct synchronic { |
| |
| template <class T> |
| __semaphore_abi_a void signal(const atomic<T>*, semaphore_notify notify = semaphore_notify_all) noexcept |
| { |
| #ifndef __semaphore_cuda |
| if (__semaphore_expect(!__reversebuffer.load(std::memory_order_relaxed), 1)) |
| return; |
| (atomic_thread_fence)(std::memory_order_seq_cst); |
| int const waiting = __reversebuffer.exchange(0, std::memory_order_relaxed); |
| if (__semaphore_expect(waiting, 0)) |
| __sem.release(waiting, notify, std::memory_order_release); |
| #endif |
| } |
| |
| template <class T, class V> |
| __semaphore_abi_a void wait_cas(atomic<T>* a, V oldval, V newval, std::memory_order order = std::memory_order_seq_cst) noexcept |
| { |
| for (int i = 0; i < 16; ++i, details::__semaphore_yield()) { |
| auto old = oldval; |
| if (__semaphore_expect(a->compare_exchange_weak(old, newval, order), 1)) |
| return; |
| } |
| details::__semaphore_exponential_backoff b; |
| while (1) |
| { |
| b.sleep(); |
| auto old = oldval; |
| if (__semaphore_expect(a->compare_exchange_weak(old, newval, order), 1)) |
| return; |
| } |
| } |
| |
| template <class T, class V> |
| __semaphore_abi_a void wait(atomic<T> const *a, V oldval, std::memory_order order = std::memory_order_seq_cst) noexcept |
| { |
| for (int i = 0; i < 16; ++i, details::__semaphore_yield()) |
| if (__semaphore_expect(a->load(order) != oldval, 1)) |
| return; |
| details::__semaphore_exponential_backoff b; |
| do |
| { |
| #ifndef __semaphore_cuda |
| __reversebuffer.fetch_add(1, std::memory_order_relaxed); |
| (atomic_thread_fence)(std::memory_order_seq_cst); |
| if (__semaphore_expect(a->load(order) != oldval, 0)) |
| { |
| auto const waiting = __reversebuffer.exchange(0, std::memory_order_relaxed); |
| switch (waiting) |
| { |
| case 0: |
| __sem.acquire(std::memory_order_relaxed); // uuuuuuuuhhhh, this is really weird for for/until |
| case 1: |
| break; |
| default: |
| __sem.release(waiting - 1, semaphore_notify_all, std::memory_order_relaxed); break; |
| } |
| break; |
| } |
| __sem.__acquire_slow(std::memory_order_relaxed); |
| #else |
| b.sleep(); |
| #endif |
| } while (a->load(order) == oldval); |
| } |
| |
| template <class T, class V> |
| __semaphore_abi_a bool try_wait(atomic<T> const *a, V oldval, std::memory_order order = std::memory_order_seq_cst) noexcept |
| { |
| |
| for (int i = 0; i < 128; ++i, details::__semaphore_yield()) |
| if (__semaphore_expect(a->load(order) != oldval, 1)) |
| return true; |
| return false; |
| } |
| |
| __semaphore_abi inline synchronic(counting_semaphore::count_type desired) noexcept |
| #ifndef __semaphore_cuda |
| : __sem(desired), __reversebuffer{0} |
| #endif |
| { |
| } |
| synchronic(const synchronic &) = delete; |
| synchronic &operator=(const synchronic &) = delete; |
| |
| #ifndef __semaphore_cuda |
| counting_semaphore __sem; |
| atomic<counting_semaphore::count_type> __reversebuffer{ 0 }; |
| #endif |
| }; |
| |
| #ifndef __semaphore_cuda |
| extern __semaphore_abi synchronic *__atomic_wait_get_semaphore(void const *ptr); |
| #endif |
| |
| template <class T> |
| __semaphore_abi void atomic_signal_explicit(const atomic<T>*a, semaphore_notify notify) |
| { |
| #ifndef __semaphore_cuda |
| __atomic_wait_get_semaphore(a)-> |
| #else |
| synchronic:: |
| #endif |
| signal(a, notify); |
| } |
| template <class T> |
| __semaphore_abi void atomic_signal(const atomic<T>* a) |
| { |
| #ifndef __semaphore_cuda |
| __atomic_wait_get_semaphore(a)-> |
| #else |
| synchronic:: |
| #endif |
| signal(a); |
| } |
| template <class T, class V> |
| __semaphore_abi void atomic_wait_cas_explicit(atomic<T>* a, V oldval, V newval, std::memory_order order) |
| { |
| #ifndef __semaphore_cuda |
| __atomic_wait_get_semaphore(a)-> |
| #else |
| synchronic:: |
| #endif |
| wait_cas(a, oldval, newval, order); |
| } |
| template <class T, class V> |
| __semaphore_abi void atomic_wait_cas(atomic<T>* a, V oldval, V newval) |
| { |
| #ifndef __semaphore_cuda |
| __atomic_wait_get_semaphore(a)-> |
| #else |
| synchronic:: |
| #endif |
| wait_cas(a, oldval, newval); |
| } |
| template <class T, class V> |
| __semaphore_abi void atomic_wait_explicit(const atomic<T>* a, V oldval, std::memory_order order) |
| { |
| #ifndef __semaphore_cuda |
| __atomic_wait_get_semaphore(a)-> |
| #else |
| synchronic:: |
| #endif |
| wait(a, oldval, order); |
| } |
| template <class T, class V> |
| __semaphore_abi void atomic_wait(const atomic<T>* a, V oldval) |
| { |
| #ifndef __semaphore_cuda |
| __atomic_wait_get_semaphore(a)-> |
| #else |
| synchronic:: |
| #endif |
| wait(a, oldval); |
| } |
| template <class T, class V> |
| __semaphore_abi void atomic_try_wait_explicit(const atomic<T>* a, V oldval, std::memory_order order) |
| { |
| #ifndef __semaphore_cuda |
| __atomic_wait_get_semaphore(a)-> |
| #else |
| synchronic:: |
| #endif |
| try_wait(a, oldval, order); |
| } |
| template <class T, class V> |
| __semaphore_abi void atomic_try_wait(const atomic<T>* a, V oldval) |
| { |
| #ifndef __semaphore_cuda |
| __atomic_wait_get_semaphore(a)-> |
| #else |
| synchronic:: |
| #endif |
| try_wait(a, oldval); |
| } |
| |
| } |
| } // namespace experimental |
| } |
| |
| #endif //__semaphore__ |