libstdc++
|
00001 // -*- C++ -*- 00002 00003 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the terms 00007 // of the GNU General Public License as published by the Free Software 00008 // Foundation; either version 3, or (at your option) any later 00009 // version. 00010 00011 // This library is distributed in the hope that it will be useful, but 00012 // WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 // General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** @file parallel/partial_sum.h 00026 * @brief Parallel implementation of std::partial_sum(), i. e. prefix 00027 * sums. 00028 * This file is a GNU parallel extension to the Standard C++ Library. 00029 */ 00030 00031 // Written by Johannes Singler. 00032 00033 #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H 00034 #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1 00035 00036 #include <omp.h> 00037 #include <new> 00038 #include <bits/stl_algobase.h> 00039 #include <parallel/parallel.h> 00040 #include <parallel/numericfwd.h> 00041 00042 namespace __gnu_parallel 00043 { 00044 // Problem: there is no 0-element given. 00045 00046 /** @brief Base case prefix sum routine. 00047 * @param begin Begin iterator of input sequence. 00048 * @param end End iterator of input sequence. 00049 * @param result Begin iterator of output sequence. 00050 * @param bin_op Associative binary function. 00051 * @param value Start value. Must be passed since the neutral 00052 * element is unknown in general. 00053 * @return End iterator of output sequence. */ 00054 template<typename InputIterator, 00055 typename OutputIterator, 00056 typename BinaryOperation> 00057 OutputIterator 00058 parallel_partial_sum_basecase(InputIterator begin, InputIterator end, 00059 OutputIterator result, BinaryOperation bin_op, 00060 typename std::iterator_traits 00061 <InputIterator>::value_type value) 00062 { 00063 if (begin == end) 00064 return result; 00065 00066 while (begin != end) 00067 { 00068 value = bin_op(value, *begin); 00069 *result = value; 00070 ++result; 00071 ++begin; 00072 } 00073 return result; 00074 } 00075 00076 /** @brief Parallel partial sum implementation, two-phase approach, 00077 no recursion. 00078 * @param begin Begin iterator of input sequence. 00079 * @param end End iterator of input sequence. 00080 * @param result Begin iterator of output sequence. 00081 * @param bin_op Associative binary function. 00082 * @param n Length of sequence. 00083 * @param num_threads Number of threads to use. 00084 * @return End iterator of output sequence. 00085 */ 00086 template<typename InputIterator, 00087 typename OutputIterator, 00088 typename BinaryOperation> 00089 OutputIterator 00090 parallel_partial_sum_linear(InputIterator begin, InputIterator end, 00091 OutputIterator result, BinaryOperation bin_op, 00092 typename std::iterator_traits 00093 <InputIterator>::difference_type n) 00094 { 00095 typedef std::iterator_traits<InputIterator> traits_type; 00096 typedef typename traits_type::value_type value_type; 00097 typedef typename traits_type::difference_type difference_type; 00098 00099 if (begin == end) 00100 return result; 00101 00102 thread_index_t num_threads = 00103 std::min<difference_type>(get_max_threads(), n - 1); 00104 00105 if (num_threads < 2) 00106 { 00107 *result = *begin; 00108 return parallel_partial_sum_basecase( 00109 begin + 1, end, result + 1, bin_op, *begin); 00110 } 00111 00112 difference_type* borders; 00113 value_type* sums; 00114 00115 const _Settings& __s = _Settings::get(); 00116 00117 # pragma omp parallel num_threads(num_threads) 00118 { 00119 # pragma omp single 00120 { 00121 num_threads = omp_get_num_threads(); 00122 00123 borders = new difference_type[num_threads + 2]; 00124 00125 if (__s.partial_sum_dilation == 1.0f) 00126 equally_split(n, num_threads + 1, borders); 00127 else 00128 { 00129 difference_type chunk_length = 00130 ((double)n 00131 / ((double)num_threads + __s.partial_sum_dilation)), 00132 borderstart = n - num_threads * chunk_length; 00133 borders[0] = 0; 00134 for (int i = 1; i < (num_threads + 1); ++i) 00135 { 00136 borders[i] = borderstart; 00137 borderstart += chunk_length; 00138 } 00139 borders[num_threads + 1] = n; 00140 } 00141 00142 sums = static_cast<value_type*>(::operator new(sizeof(value_type) 00143 * num_threads)); 00144 OutputIterator target_end; 00145 } //single 00146 00147 thread_index_t iam = omp_get_thread_num(); 00148 if (iam == 0) 00149 { 00150 *result = *begin; 00151 parallel_partial_sum_basecase(begin + 1, begin + borders[1], 00152 result + 1, bin_op, *begin); 00153 ::new(&(sums[iam])) value_type(*(result + borders[1] - 1)); 00154 } 00155 else 00156 { 00157 ::new(&(sums[iam])) 00158 value_type(__gnu_parallel::accumulate(begin + borders[iam] + 1, 00159 begin + borders[iam + 1], 00160 *(begin + borders[iam]), 00161 bin_op, 00162 __gnu_parallel::sequential_tag())); 00163 } 00164 00165 # pragma omp barrier 00166 00167 # pragma omp single 00168 parallel_partial_sum_basecase( 00169 sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]); 00170 00171 # pragma omp barrier 00172 00173 // Still same team. 00174 parallel_partial_sum_basecase(begin + borders[iam + 1], 00175 begin + borders[iam + 2], 00176 result + borders[iam + 1], bin_op, 00177 sums[iam]); 00178 } //parallel 00179 00180 ::operator delete(sums); 00181 delete[] borders; 00182 00183 return result + n; 00184 } 00185 00186 /** @brief Parallel partial sum front-end. 00187 * @param begin Begin iterator of input sequence. 00188 * @param end End iterator of input sequence. 00189 * @param result Begin iterator of output sequence. 00190 * @param bin_op Associative binary function. 00191 * @return End iterator of output sequence. */ 00192 template<typename InputIterator, 00193 typename OutputIterator, 00194 typename BinaryOperation> 00195 OutputIterator 00196 parallel_partial_sum(InputIterator begin, InputIterator end, 00197 OutputIterator result, BinaryOperation bin_op) 00198 { 00199 _GLIBCXX_CALL(begin - end) 00200 00201 typedef std::iterator_traits<InputIterator> traits_type; 00202 typedef typename traits_type::value_type value_type; 00203 typedef typename traits_type::difference_type difference_type; 00204 00205 difference_type n = end - begin; 00206 00207 switch (_Settings::get().partial_sum_algorithm) 00208 { 00209 case LINEAR: 00210 // Need an initial offset. 00211 return parallel_partial_sum_linear(begin, end, result, bin_op, n); 00212 default: 00213 // Partial_sum algorithm not implemented. 00214 _GLIBCXX_PARALLEL_ASSERT(0); 00215 return result + n; 00216 } 00217 } 00218 } 00219 00220 #endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */