OrcaSlicer-bambulab/deps_src/mcut/include/mcut/internal/tpool.h
2026-05-11 19:29:55 +01:00

1010 lines
No EOL
29 KiB
C++

/**
* Copyright (c) 2021-2022 Floyd M. Chitalu.
* All rights reserved.
*
* NOTE: This file is licensed under GPL-3.0-or-later (default).
* A commercial license can be purchased from Floyd M. Chitalu.
*
* License details:
*
* (A) GNU General Public License ("GPL"); a copy of which you should have
* recieved with this file.
* - see also: <http://www.gnu.org/licenses/>
* (B) Commercial license.
* - email: floyd.m.chitalu@gmail.com
*
* The commercial license options is for users that wish to use MCUT in
* their products for comercial purposes but do not wish to release their
* software products under the GPL license.
*
* Author(s) : Floyd M. Chitalu
*/
#ifndef MCUT_SCHEDULER_H_
#define MCUT_SCHEDULER_H_
#include <algorithm>
#include <atomic>
#include <condition_variable>
#include <future>
#include <memory>
#include <mutex>
#include <thread>
#include <vector>
#include <stack>
#include <functional>
#include <list>
#include <utility>
#include "mcut/internal/utils.h"
class function_wrapper {
private:
struct impl_base {
virtual void call() = 0;
virtual ~impl_base() { }
};
std::unique_ptr<impl_base> impl;
template <typename F>
struct impl_type : impl_base {
F f;
impl_type(F&& f_)
: f(std::move(f_))
{
}
void call() { return f(); }
};
public:
template <typename F>
function_wrapper(F&& f)
: impl(new impl_type<F>(std::move(f)))
{
}
void operator()() { impl->call(); }
function_wrapper() = default;
function_wrapper(function_wrapper&& other) noexcept
: impl(std::move(other.impl))
{
}
function_wrapper& operator=(function_wrapper&& other) noexcept
{
impl = std::move(other.impl);
return *this;
}
function_wrapper(const function_wrapper&) = delete;
function_wrapper(function_wrapper&) = delete;
function_wrapper& operator=(const function_wrapper&) = delete;
};
template <typename T>
class thread_safe_queue {
private:
std::atomic<bool>* m_done;
struct node {
std::shared_ptr<T> data;
std::unique_ptr<node> next;
};
std::mutex head_mutex;
std::unique_ptr<node> head;
std::mutex tail_mutex;
node* tail;
std::condition_variable data_cond;
std::unique_ptr<node> try_pop_head(T& value)
{
std::lock_guard<std::mutex> head_lock(head_mutex);
if (head.get() == get_tail()) {
return std::unique_ptr<node>(nullptr);
}
value = std::move(*head->data);
return pop_head();
}
node* get_tail()
{
std::lock_guard<std::mutex> tail_lock(tail_mutex);
return tail;
}
// dont call directly
std::unique_ptr<node> pop_head()
{
std::unique_ptr<node> old_head = std::move(head);
head = std::move(old_head->next);
return (old_head);
}
#if _WIN32 // on windows
// there is a C26115 warning here, which is by design. The warning is caused by
// the fact that our function here has the side effect of locking the mutex "head_mutex"
// via the construction of the unique_lock.
// The warning is removed by annotating the function with "_Acquires_lock_(...)".
// See here: https://developercommunity.visualstudio.com/t/unexpected-warning-c26115-for-returning-a-unique-l/1077322
_Acquires_lock_(return)
#endif
std::unique_lock<std::mutex> wait_for_data()
{
std::unique_lock<std::mutex> head_lock(head_mutex);
auto until = [&]() { return m_done->load() || head.get() != get_tail(); };
data_cond.wait(head_lock, until);
return head_lock;
}
std::unique_ptr<node> wait_pop_head(T& value)
{
std::unique_lock<std::mutex> head_lock(wait_for_data());
if (m_done->load() == false) {
value = std::move(*head->data);
return pop_head();
} else {
return std::unique_ptr<node>(nullptr);
}
}
public:
thread_safe_queue()
: head(new node)
, tail(head.get()) /*, can_wait_for_data(true)*/
{
}
thread_safe_queue(const thread_safe_queue& other) = delete;
thread_safe_queue& operator=(const thread_safe_queue& other) = delete;
void set_done_ptr(std::atomic<bool>* done)
{
m_done = done;
}
void disrupt_wait_for_data()
{
// https://stackoverflow.com/questions/60658842/condition-variable-doesnt-get-notified-to-wake-up-even-with-a-predicate
//
// Need to this to prevent data race, which can happen when user-app thread
// is too slow to reach this function i.e. that an API/context/device thread
// waits on the condition variable AFTER the client thread calls notify_one()
std::unique_lock<std::mutex> lock(head_mutex);
data_cond.notify_one();
}
void notify_one()
{
data_cond.notify_one();
}
void push(T new_value)
{
std::shared_ptr<T> new_data(std::make_shared<T>(std::move(new_value)));
std::unique_ptr<node> p(new node);
{
std::lock_guard<std::mutex> tail_lock(tail_mutex);
tail->data = new_data;
node* const new_tail = p.get();
tail->next = std::move(p);
tail = new_tail;
}
data_cond.notify_one();
}
void wait_and_pop(T& value)
{
std::unique_ptr<node> const old_head = wait_pop_head(value);
}
bool try_pop(T& value)
{
std::unique_ptr<node> const old_head = try_pop_head(value);
return !(!(old_head)); // https://stackoverflow.com/questions/30521849/error-on-implicit-cast-from-stdunique-ptr-to-bool
}
bool empty()
{
std::lock_guard<std::mutex> head_lock(head_mutex);
return (head.get() == get_tail());
}
};
class join_threads {
std::vector<std::thread>& threads;
public:
explicit join_threads(std::vector<std::thread>& threads_)
: threads(threads_)
{
}
~join_threads()
{
for (unsigned long i = 0; i < threads.size(); ++i) {
if (threads[i].joinable())
threads[i].join();
}
}
};
class thread_pool {
std::atomic<bool> m_done;
std::vector<thread_safe_queue<function_wrapper>> work_queues;
std::vector<std::thread> threads; // NOTE: must be declared after "thread_pool_terminate" and "work_queues"
join_threads joiner;
void worker_thread(int thread_id)
{
log_msg("[MCUT] Launch helper thread " << std::this_thread::get_id() << " (" << thread_id << ")");
do {
function_wrapper task;
if (!(work_queues[thread_id].try_pop(task))) {
work_queues[thread_id].wait_and_pop(task);
}
if (m_done) {
break; // finished (i.e. MCUT context was destroyed)
}
task(); // run the task
} while (true);
log_msg("[MCUT] Shutdown helper thread " << std::this_thread::get_id() << " (" << thread_id << ")");
}
uint32_t machine_thread_count;
public:
thread_pool(uint32_t nthreads, uint32_t used_cores)
: m_done(false),
joiner(threads)
, machine_thread_count(0)
{
log_msg("[MCUT] Create threadpool " << this);
machine_thread_count = (uint32_t)std::thread::hardware_concurrency();
uint32_t const pool_thread_count = std::min(
nthreads,
// prevents over-subscription given that "N=used_cores" API threads are already in use
machine_thread_count - used_cores);
try {
work_queues = std::vector<thread_safe_queue<function_wrapper>>(pool_thread_count);
for (unsigned i = 0; i < pool_thread_count; ++i) {
work_queues[i].set_done_ptr(&m_done);
threads.push_back(std::thread(&thread_pool::worker_thread, this, i));
}
} catch (...) {
m_done.store(true);
wakeup_and_shutdown();
throw;
}
}
~thread_pool()
{
log_msg("[MCUT] Destroy threadpool " << this);
m_done.store(true);
wakeup_and_shutdown();
}
// submit empty task so that worker threads can wake up
// with a valid (but redundant) task to then exit
void wakeup_and_shutdown()
{
for (unsigned i = 0; i < get_num_threads(); ++i) {
work_queues[i].disrupt_wait_for_data();
}
}
public:
/*
The thread pool takes care of the exception safety too. Any exception thrown by the
task gets propagated through the std::future returned from submit() , and if the function
exits with an exception, the thread pool destructor abandons any not-yet-completed
tasks and waits for the pool threads to finish.
*/
template <typename FunctionType>
std::future<typename std::result_of<FunctionType()>::type> submit(uint32_t worker_thread_id, FunctionType f)
{
typedef typename std::result_of<FunctionType()>::type result_type;
std::packaged_task<result_type()> task(std::move(f));
std::future<result_type> res(task.get_future());
work_queues[worker_thread_id].push(std::move(task));
return res;
}
size_t get_num_threads() const
{
return threads.size();
}
uint32_t get_num_hardware_threads()
{
return machine_thread_count;
}
};
static void get_scheduling_parameters(
// the number of thread that will actually do some computation (including master)
uint32_t& num_threads,
// maximum possible number of threads that can be scheduled to perform the task.
uint32_t& max_threads,
// the size of each block of elements assigned to a thread (note: last thread,
// which is the master thread might have less)
uint32_t& block_size,
// number of data elements to process
const uint32_t length,
const uint32_t available_threads, // number of worker threads and master master thread
// minimum number of element assigned to a thread (below this threshold and we
// run just one thread)
const uint32_t min_per_thread = (1 << 10))
{
// maximum possible number of threads that can be scheduled to perform the task.
max_threads = (length + min_per_thread - 1) / min_per_thread;
// the number of thread that will actually do some computation (including master)
num_threads = std::min(available_threads != 0 ? available_threads : 2, max_threads);
// the size of each block of elements assigned to a thread (note: last thread,
// which is the master thread might have less)
block_size = length / num_threads;
// printf("max_threads=%u num_threads=%u block_size=%u\n", max_threads, num_threads, block_size);
if (num_threads == 0 || num_threads > available_threads) {
throw std::runtime_error("invalid number of threads (" + std::to_string(num_threads) + ")");
}
if (max_threads == 0) {
throw std::runtime_error("invalid maximum posible number of threads (" + std::to_string(max_threads) + ")");
}
if (block_size == 0) {
throw std::runtime_error("invalid work block-size per thread (" + std::to_string(block_size) + ")");
}
}
class barrier_t {
unsigned const count;
std::atomic<unsigned> spaces;
std::atomic<unsigned> generation;
public:
explicit barrier_t(unsigned count_)
: count(count_)
, spaces(count)
, generation(0)
{
}
void wait()
{
unsigned const my_generation = generation;
if (!--spaces) {
spaces = count;
++generation;
} else {
while (generation == my_generation)
std::this_thread::yield();
}
}
};
template <typename InputStorageIteratorType, typename OutputStorageType, typename FunctionType>
void parallel_for(
thread_pool& pool,
// start of data elements to be processed in parallel
const InputStorageIteratorType& first,
// end of of data elements to be processed in parallel (e.g. std::map::end())
const InputStorageIteratorType& last,
// the function that is executed on a sub-block of element within the range [first, last)
FunctionType& task_func,
// the part of the result/output that is computed by the master thread (i.e. the one that is scheduling)
// NOTE: this result must be merged which the output computed for the other
// sub-block in the input ranges. This other data is accessed from the std::futures
OutputStorageType& master_thread_output,
// Future promises of data (to be merged) that is computed by worker threads
std::vector<std::future<OutputStorageType>>& futures,
// minimum number of element assigned to a thread (below this threshold and we
// run just one thread)
const uint32_t min_per_thread = (1 << 10))
{
uint32_t const length_ = std::distance(first, last);
MCUT_ASSERT(length_ != 0);
uint32_t block_size = 0;
uint32_t max_threads = 0;
const uint32_t available_threads = pool.get_num_threads() + 1; // workers and master (+1)
uint32_t num_threads = 0;
get_scheduling_parameters(
num_threads,
max_threads,
block_size,
length_,
available_threads,
min_per_thread);
futures.resize(num_threads - 1);
InputStorageIteratorType block_start = first;
for (uint32_t i = 0; i < (num_threads - 1); ++i) {
InputStorageIteratorType block_end = block_start;
std::advance(block_end, block_size);
futures[i] = pool.submit(i,
[&, block_start, block_end]() -> OutputStorageType {
return task_func(block_start, block_end);
});
block_start = block_end;
}
master_thread_output = task_func(block_start, last);
}
template <typename InputStorageIteratorType, typename FunctionType>
void parallel_for(
thread_pool& pool,
// start of data elements to be processed in parallel
const InputStorageIteratorType& first,
// end of of data elements to be processed in parallel (e.g. std::map::end())
const InputStorageIteratorType& last,
// the function that is executed on a sub-block of element within the range [first, last)
// return void
FunctionType& task_func,
// minimum number of element assigned to a thread (below this threshold and we
// run just one thread)
const uint32_t min_per_thread = (1 << 10))
{
uint32_t const length_ = std::distance(first, last);
MCUT_ASSERT(length_ != 0);
uint32_t block_size;
uint32_t max_threads = 0;
const uint32_t available_threads = pool.get_num_threads() + 1; // workers and master (+1)
uint32_t num_threads = 0;
get_scheduling_parameters(
num_threads,
max_threads,
block_size,
length_,
available_threads,
min_per_thread);
std::vector<std::future<void>> futures;
futures.resize(num_threads - 1);
InputStorageIteratorType block_start = first;
for (uint32_t i = 0; i < (num_threads - 1); ++i) {
InputStorageIteratorType block_end = block_start;
std::advance(block_end, block_size);
futures[i] = pool.submit(i,
[&, block_start, block_end]() {
task_func(block_start, block_end);
});
block_start = block_end;
}
task_func(block_start, last); // master thread work
for (uint32_t i = 0; i < (uint32_t)futures.size(); ++i) {
futures[i].wait();
}
}
template <typename Iterator>
void partial_sum(Iterator first, Iterator last, Iterator d_first)
{
// std::vector<uint32_t>::iterator first = cc_uptr->face_sizes_cache.begin();
// std::vector<uint32_t>::iterator last = cc_uptr->face_sizes_cache.end();
// Iterator d_first = first;
if (first != last) {
uint32_t sum = *first;
*d_first = sum;
while (++first != last) {
sum = sum + *first;
*++d_first = sum;
}
++d_first;
}
}
template <typename Iterator>
void parallel_partial_sum(thread_pool& pool, Iterator first, Iterator last)
{
typedef typename Iterator::value_type value_type;
struct process_chunk {
void operator()(Iterator begin, Iterator last,
std::future<value_type>* previous_end_value,
std::promise<value_type>* end_value)
{
try {
Iterator end = last;
++end;
partial_sum(begin, end, begin);
if (previous_end_value) {
const value_type addend = previous_end_value->get();
*last += addend;
if (end_value) {
end_value->set_value(*last);
}
std::for_each(begin, last, [addend](value_type& item) {
item += addend;
});
} else if (end_value) {
end_value->set_value(*last);
}
} catch (...) {
if (end_value) {
end_value->set_exception(std::current_exception());
} else {
throw;
}
}
}
};
// number of elements in range
unsigned long const length = std::distance(first, last);
if (!length)
return;
uint32_t max_threads = 0;
const uint32_t available_threads = pool.get_num_threads() + 1; // workers and master (+1)
uint32_t num_threads = 0;
uint32_t block_size = 0;
get_scheduling_parameters(
num_threads,
max_threads,
block_size,
length,
available_threads);
typedef typename Iterator::value_type value_type;
// promises holding the value of the last element of a block. Only the
// first N-1 blocks (i.e. threads) will have to update this value.
std::vector<std::promise<value_type>> end_values(num_threads - 1);
// futures that are used to wait for the end value (the last element) of
// previous block. Only the last N-1 blocks (i.e. threads) will have to wait
// on this value. That is, the first thread has no "previous" block to wait
// on,
std::vector<std::future<value_type>> previous_end_values;
previous_end_values.reserve(num_threads - 1);
std::vector<std::future<void>> futures;
futures.resize(num_threads - 1);
Iterator block_start = first;
// for each pool-thread
for (unsigned long i = 0; i < (num_threads - 1); ++i) {
Iterator block_last = block_start;
std::advance(block_last, block_size - 1);
// process_chunk()" handles all synchronisation
futures[i] = pool.submit(i,
[i /*NOTE: capture by-value*/, &previous_end_values, &end_values, block_start, block_last]() {
process_chunk()(block_start, block_last, (i != 0) ? &previous_end_values[i - 1] : 0, &end_values[i]);
});
block_start = block_last;
++block_start;
previous_end_values.push_back(end_values[i].get_future()); // for next thread to wait on
}
Iterator final_element = block_start;
std::advance(final_element, std::distance(block_start, last) - 1);
// NOTE: this is a blocking call since the master thread has to wait
// for worker-threads assigned to preceeding chunks to finish before it can
// process its own chunk.
process_chunk()(block_start, final_element,
(num_threads > 1) ? &previous_end_values.back() : 0,
0);
}
template <typename Iterator, typename MatchType>
void find_element(Iterator begin, Iterator end,
MatchType match,
std::promise<Iterator>* result,
std::atomic<bool>* done_flag)
{
try {
for (; (begin != end) && !done_flag->load(); ++begin) {
if (*begin == match) {
result->set_value(begin);
done_flag->store(true);
return;
}
}
} catch (...) {
try {
result->set_exception(std::current_exception());
done_flag->store(true);
} catch (...) {
}
}
}
template <typename Iterator, typename MatchType>
Iterator parallel_find(thread_pool& pool, Iterator first, Iterator last, MatchType match)
{
unsigned long const length = std::distance(first, last);
if (!length)
return last;
uint32_t max_threads = 0;
const uint32_t available_threads = pool.get_num_threads() + 1; // workers and master (+1)
uint32_t num_threads = 0;
uint32_t block_size = 0;
get_scheduling_parameters(
num_threads,
max_threads,
block_size,
length,
available_threads);
std::promise<Iterator> result;
std::atomic<bool> done_flag(false);
std::vector<std::future<void>> futures;
futures.resize(num_threads - 1);
{
Iterator block_start = first;
for (unsigned long i = 0; i < (num_threads - 1); ++i) {
Iterator block_end = block_start;
std::advance(block_end, block_size);
// find_element()" handles all synchronisation
futures[i] = pool.submit(i,
[&result, &done_flag, block_start, block_end, &match]() {
find_element<Iterator, MatchType>,
block_start, block_end, match,
&result, &done_flag;
});
block_start = block_end;
}
find_element<Iterator, MatchType>(block_start, last, match, &result, &done_flag);
}
if (!done_flag.load()) {
return last; // if nothing found (by any thread), return "end"
}
return result.get_future().get();
}
template <typename Iterator, typename KeyType>
void find_map_element_by_key(Iterator begin, Iterator end,
KeyType match,
std::promise<Iterator>* result,
std::atomic<bool>* done_flag,
barrier_t* barrier_ptr)
{
try {
for (; (begin != end) && !done_flag->load(); ++begin) {
if (begin->first == match) {
result->set_value(begin);
done_flag->store(true);
break; // return;
}
}
} catch (...) {
try {
result->set_exception(std::current_exception());
done_flag->store(true);
} catch (...) {
}
}
barrier_ptr->wait();
}
template <typename Iterator, typename KeyType>
Iterator parallel_find_in_map_by_key(thread_pool& pool, Iterator first, Iterator last, KeyType match)
{
unsigned long const length = std::distance(first, last);
if (!length)
return last;
uint32_t max_threads = 0;
const uint32_t available_threads = pool.get_num_threads() + 1; // workers and master (+1)
uint32_t num_threads = 0;
uint32_t block_size = 0;
get_scheduling_parameters(
num_threads,
max_threads,
block_size,
length,
available_threads);
barrier_t barrier(num_threads);
std::promise<Iterator> result;
std::atomic<bool> done_flag(false);
{
std::vector<std::future<void>> futures;
futures.resize(num_threads - 1);
Iterator block_start = first;
for (unsigned long i = 0; i < (num_threads - 1); ++i) {
Iterator block_end = block_start;
std::advance(block_end, block_size);
futures[i] = pool.submit(i,
[&result, &done_flag, block_start, block_end, &match, &barrier]() {
find_map_element_by_key<Iterator, KeyType>(
block_start, block_end, match,
&result, &done_flag, &barrier);
});
block_start = block_end;
}
find_map_element_by_key<Iterator, KeyType>(block_start, last, match, &result, &done_flag, &barrier);
for (uint32_t i = 0; i < (uint32_t)futures.size(); ++i) {
futures[i].wait();
}
}
if (!done_flag.load()) {
return last; // if nothing found (by any thread), return "end"
}
return result.get_future().get();
}
template <typename Iterator, typename UnaryPredicate>
void find_element_with_pred(Iterator begin, Iterator end,
UnaryPredicate pred,
std::promise<Iterator>* result,
std::atomic<bool>* done_flag,
barrier_t* barrier_ptr)
{
try {
for (; (begin != end) && !done_flag->load(); ++begin) {
const bool found = pred(*begin);
if (found) {
result->set_value(begin);
done_flag->store(true);
break; // return;
}
}
} catch (...) {
try {
result->set_exception(std::current_exception());
done_flag->store(true);
} catch (...) {
}
}
barrier_ptr->wait();
}
template <typename Iterator, class UnaryPredicate>
Iterator parallel_find_if(thread_pool& pool, Iterator first, Iterator last, UnaryPredicate predicate)
{
unsigned long const length = std::distance(first, last);
if (!length)
return last;
uint32_t max_threads = 0;
const uint32_t available_threads = pool.get_num_threads() + 1; // workers and master (+1)
uint32_t num_threads = 0;
uint32_t block_size = 0;
get_scheduling_parameters(
num_threads,
max_threads,
block_size,
length,
available_threads);
barrier_t barrier(num_threads);
std::promise<Iterator> result;
std::atomic<bool> done_flag(false);
std::vector<std::future<void>> futures;
futures.resize(num_threads - 1);
Iterator block_start = first;
for (uint32_t i = 0; i < (num_threads - 1); ++i) {
Iterator block_end = block_start;
std::advance(block_end, block_size);
// barrier handles all synchronisation
futures[i] = pool.submit(i,
[&result, &done_flag, block_start, block_end, &predicate, &barrier]() {
find_element_with_pred<Iterator, UnaryPredicate>(
block_start, block_end, predicate,
&result, &done_flag, &barrier);
});
block_start = block_end;
}
find_element_with_pred<Iterator, UnaryPredicate>(block_start, last, predicate, &result, &done_flag, &barrier);
if (!done_flag.load()) {
return last; // if nothing found (by any thread), return "end"
}
return result.get_future().get();
}
template<typename T>
class threadsafe_list
{
struct node
{
std::mutex m;
T data;
std::unique_ptr<node> next;
node():
next()
{}
node(T const& value):
data(value)
{}
};
node head;
public:
threadsafe_list()
{}
~threadsafe_list()
{
remove_if([](T const&){return true;});
}
threadsafe_list(threadsafe_list const& other)=delete;
threadsafe_list& operator=(threadsafe_list const& other)=delete;
void push_front(T /*const&*/ value)
{
std::unique_ptr<node> new_node(new node(value));
std::lock_guard<std::mutex> lk(head.m);
new_node->next=std::move(head.next);
head.next=std::move(new_node);
}
template<typename Function>
void for_each(Function f)
{
node* current=&head;
std::unique_lock<std::mutex> lk(head.m);
while(node* const next=current->next.get())
{
std::unique_lock<std::mutex> next_lk(next->m);
lk.unlock();
f(next->data);
current=next;
lk=std::move(next_lk);
}
}
template<typename Predicate>
T find_first_if(Predicate p)
{
node* current=&head;
std::unique_lock<std::mutex> lk(head.m);
while(node* const next=current->next.get())
{
std::unique_lock<std::mutex> next_lk(next->m);
lk.unlock();
if(p(next->data))
{
return next->data;
}
current=next;
lk=std::move(next_lk);
}
return T();
}
template<typename Predicate>
void remove_if(Predicate p)
{
node* current=&head;
std::unique_lock<std::mutex> lk(head.m);
while(node* const next=current->next.get())
{
std::unique_lock<std::mutex> next_lk(next->m);
if(p(next->data))
{
std::unique_ptr<node> old_next=std::move(current->next);
current->next=std::move(next->next);
next_lk.unlock();
}
else
{
lk.unlock();
current=next;
lk=std::move(next_lk);
}
}
}
};
struct empty_stack: std::exception
{
const char* what() const throw()
{
return "empty stack";
}
};
template<typename T>
class threadsafe_stack
{
private:
std::stack<T> data;
mutable std::mutex m;
public:
threadsafe_stack(){}
threadsafe_stack(const threadsafe_stack& other)
{
std::lock_guard<std::mutex> lock(other.m);
data=other.data;
}
threadsafe_stack& operator=(const threadsafe_stack&) = delete;
void push(T new_value)
{
std::lock_guard<std::mutex> lock(m);
data.push(std::move(new_value));
}
std::shared_ptr<T> pop()
{
std::lock_guard<std::mutex> lock(m);
if(data.empty()) throw empty_stack();
std::shared_ptr<T> const res(
std::make_shared<T>(std::move(data.top())));
data.pop();
return res;
}
void pop(T& value)
{
std::lock_guard<std::mutex> lock(m);
if(data.empty()) throw empty_stack();
value=std::move(data.top());
data.pop();
}
bool empty() const
{
std::lock_guard<std::mutex> lock(m);
return data.empty();
}
};
#endif // MCUT_SCHEDULER_H_