Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_tasking.c
1 /*
2  * kmp_tasking.c -- OpenMP 3.0 tasking support.
3  * $Revision: 42522 $
4  * $Date: 2013-07-16 05:28:49 -0500 (Tue, 16 Jul 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2013 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include "kmp.h"
38 #include "kmp_i18n.h"
39 #include "kmp_itt.h"
40 
41 
42 #if OMP_30_ENABLED
43 
44 /* ------------------------------------------------------------------------ */
45 /* ------------------------------------------------------------------------ */
46 
47 
48 /* forward declaration */
49 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
50 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
51 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
52 
53 #ifndef KMP_DEBUG
54 # define __kmp_static_delay( arg ) /* nothing to do */
55 #else
56 
57 static void
58 __kmp_static_delay( int arg )
59 {
60 /* Work around weird code-gen bug that causes assert to trip */
61 # if KMP_ARCH_X86_64 && KMP_OS_LINUX
62  KMP_ASSERT( arg != 0 );
63 # else
64  KMP_ASSERT( arg >= 0 );
65 # endif
66 }
67 #endif /* KMP_DEBUG */
68 
69 static void
70 __kmp_static_yield( int arg )
71 {
72  __kmp_yield( arg );
73 }
74 
75 #ifdef BUILD_TIED_TASK_STACK
76 
77 //---------------------------------------------------------------------------
78 // __kmp_trace_task_stack: print the tied tasks from the task stack in order
79 // from top do bottom
80 //
81 // gtid: global thread identifier for thread containing stack
82 // thread_data: thread data for task team thread containing stack
83 // threshold: value above which the trace statement triggers
84 // location: string identifying call site of this function (for trace)
85 
86 static void
87 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
88 {
89  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
90  kmp_taskdata_t **stack_top = task_stack -> ts_top;
91  kmp_int32 entries = task_stack -> ts_entries;
92  kmp_taskdata_t *tied_task;
93 
94  KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
95  "first_block = %p, stack_top = %p \n",
96  location, gtid, entries, task_stack->ts_first_block, stack_top ) );
97 
98  KMP_DEBUG_ASSERT( stack_top != NULL );
99  KMP_DEBUG_ASSERT( entries > 0 );
100 
101  while ( entries != 0 )
102  {
103  KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
104  // fix up ts_top if we need to pop from previous block
105  if ( entries & TASK_STACK_INDEX_MASK == 0 )
106  {
107  kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
108 
109  stack_block = stack_block -> sb_prev;
110  stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
111  }
112 
113  // finish bookkeeping
114  stack_top--;
115  entries--;
116 
117  tied_task = * stack_top;
118 
119  KMP_DEBUG_ASSERT( tied_task != NULL );
120  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
121 
122  KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
123  "stack_top=%p, tied_task=%p\n",
124  location, gtid, entries, stack_top, tied_task ) );
125  }
126  KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
127 
128  KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
129  location, gtid ) );
130 }
131 
132 //---------------------------------------------------------------------------
133 // __kmp_init_task_stack: initialize the task stack for the first time
134 // after a thread_data structure is created.
135 // It should not be necessary to do this again (assuming the stack works).
136 //
137 // gtid: global thread identifier of calling thread
138 // thread_data: thread data for task team thread containing stack
139 
140 static void
141 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
142 {
143  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
144  kmp_stack_block_t *first_block;
145 
146  // set up the first block of the stack
147  first_block = & task_stack -> ts_first_block;
148  task_stack -> ts_top = (kmp_taskdata_t **) first_block;
149  memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
150 
151  // initialize the stack to be empty
152  task_stack -> ts_entries = TASK_STACK_EMPTY;
153  first_block -> sb_next = NULL;
154  first_block -> sb_prev = NULL;
155 }
156 
157 
158 //---------------------------------------------------------------------------
159 // __kmp_free_task_stack: free the task stack when thread_data is destroyed.
160 //
161 // gtid: global thread identifier for calling thread
162 // thread_data: thread info for thread containing stack
163 
164 static void
165 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
166 {
167  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
168  kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
169 
170  KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
171  // free from the second block of the stack
172  while ( stack_block != NULL ) {
173  kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
174 
175  stack_block -> sb_next = NULL;
176  stack_block -> sb_prev = NULL;
177  if (stack_block != & task_stack -> ts_first_block) {
178  __kmp_thread_free( thread, stack_block ); // free the block, if not the first
179  }
180  stack_block = next_block;
181  }
182  // initialize the stack to be empty
183  task_stack -> ts_entries = 0;
184  task_stack -> ts_top = NULL;
185 }
186 
187 
188 //---------------------------------------------------------------------------
189 // __kmp_push_task_stack: Push the tied task onto the task stack.
190 // Grow the stack if necessary by allocating another block.
191 //
192 // gtid: global thread identifier for calling thread
193 // thread: thread info for thread containing stack
194 // tied_task: the task to push on the stack
195 
196 static void
197 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
198 {
199  // GEH - need to consider what to do if tt_threads_data not allocated yet
200  kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
201  tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
202  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
203 
204  if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
205  return; // Don't push anything on stack if team or team tasks are serialized
206  }
207 
208  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
209  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
210 
211  KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
212  gtid, thread, tied_task ) );
213  // Store entry
214  * (task_stack -> ts_top) = tied_task;
215 
216  // Do bookkeeping for next push
217  task_stack -> ts_top++;
218  task_stack -> ts_entries++;
219 
220  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
221  {
222  // Find beginning of this task block
223  kmp_stack_block_t *stack_block =
224  (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
225 
226  // Check if we already have a block
227  if ( stack_block -> sb_next != NULL )
228  { // reset ts_top to beginning of next block
229  task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
230  }
231  else
232  { // Alloc new block and link it up
233  kmp_stack_block_t *new_block = (kmp_stack_block_t *)
234  __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
235 
236  task_stack -> ts_top = & new_block -> sb_block[0];
237  stack_block -> sb_next = new_block;
238  new_block -> sb_prev = stack_block;
239  new_block -> sb_next = NULL;
240 
241  KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
242  gtid, tied_task, new_block ) );
243  }
244  }
245  KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
246 }
247 
248 //---------------------------------------------------------------------------
249 // __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
250 // the task, just check to make sure it matches the ending task passed in.
251 //
252 // gtid: global thread identifier for the calling thread
253 // thread: thread info structure containing stack
254 // tied_task: the task popped off the stack
255 // ending_task: the task that is ending (should match popped task)
256 
257 static void
258 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
259 {
260  // GEH - need to consider what to do if tt_threads_data not allocated yet
261  kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
262  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
263  kmp_taskdata_t *tied_task;
264 
265  if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
266  return; // Don't pop anything from stack if team or team tasks are serialized
267  }
268 
269  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
270  KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
271 
272  KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
273 
274  // fix up ts_top if we need to pop from previous block
275  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
276  {
277  kmp_stack_block_t *stack_block =
278  (kmp_stack_block_t *) (task_stack -> ts_top) ;
279 
280  stack_block = stack_block -> sb_prev;
281  task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
282  }
283 
284  // finish bookkeeping
285  task_stack -> ts_top--;
286  task_stack -> ts_entries--;
287 
288  tied_task = * (task_stack -> ts_top );
289 
290  KMP_DEBUG_ASSERT( tied_task != NULL );
291  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
292  KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
293 
294  KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
295  return;
296 }
297 #endif /* BUILD_TIED_TASK_STACK */
298 
299 //---------------------------------------------------
300 // __kmp_push_task: Add a task to the thread's deque
301 
302 static kmp_int32
303 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
304 {
305  kmp_info_t * thread = __kmp_threads[ gtid ];
306  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
307  kmp_task_team_t * task_team = thread->th.th_task_team;
308  kmp_int32 tid = __kmp_tid_from_gtid( gtid );
309  kmp_thread_data_t * thread_data;
310 
311  KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
312 
313  // The first check avoids building task_team thread data if serialized
314  if ( taskdata->td_flags.task_serial ) {
315  KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
316  gtid, taskdata ) );
317  return TASK_NOT_PUSHED;
318  }
319 
320  // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
321  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
322  if ( ! KMP_TASKING_ENABLED( task_team, thread->th.th_task_state ) ) {
323  __kmp_enable_tasking( task_team, thread );
324  }
325  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
326  KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
327 
328  // Find tasking deque specific to encountering thread
329  thread_data = & task_team -> tt.tt_threads_data[ tid ];
330 
331  // No lock needed since only owner can allocate
332  if (thread_data -> td.td_deque == NULL ) {
333  __kmp_alloc_task_deque( thread, thread_data );
334  }
335 
336  // Check if deque is full
337  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
338  {
339  KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
340  gtid, taskdata ) );
341  return TASK_NOT_PUSHED;
342  }
343 
344  // Lock the deque for the task push operation
345  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
346 
347  // Must have room since no thread can add tasks but calling thread
348  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
349 
350  thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
351  // Wrap index.
352  thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
353  TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
354 
355  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
356 
357  KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
358  "task=%p ntasks=%d head=%u tail=%u\n",
359  gtid, taskdata, thread_data->td.td_deque_ntasks,
360  thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
361 
362  return TASK_SUCCESSFULLY_PUSHED;
363 }
364 
365 
366 //-----------------------------------------------------------------------------------------
367 // __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
368 // this_thr: thread structure to set current_task in.
369 
370 void
371 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
372 {
373  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
374  "curtask_parent=%p\n",
375  0, this_thr, this_thr -> th.th_current_task,
376  this_thr -> th.th_current_task -> td_parent ) );
377 
378  this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
379 
380  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
381  "curtask_parent=%p\n",
382  0, this_thr, this_thr -> th.th_current_task,
383  this_thr -> th.th_current_task -> td_parent ) );
384 }
385 
386 
387 //---------------------------------------------------------------------------------------
388 // __kmp_push_current_task_to_thread: set up current task in called thread for a new team
389 // this_thr: thread structure to set up
390 // team: team for implicit task data
391 // tid: thread within team to set up
392 
393 void
394 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
395 {
396  // current task of the thread is a parent of the new just created implicit tasks of new team
397  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
398  "parent_task=%p\n",
399  tid, this_thr, this_thr->th.th_current_task,
400  team->t.t_implicit_task_taskdata[tid].td_parent ) );
401 
402  KMP_DEBUG_ASSERT (this_thr != NULL);
403 
404  if( tid == 0 ) {
405  if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
406  team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
407  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
408  }
409  } else {
410  team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
411  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
412  }
413 
414  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
415  "parent_task=%p\n",
416  tid, this_thr, this_thr->th.th_current_task,
417  team->t.t_implicit_task_taskdata[tid].td_parent ) );
418 }
419 
420 
421 //----------------------------------------------------------------------
422 // __kmp_task_start: bookkeeping for a task starting execution
423 // GTID: global thread id of calling thread
424 // task: task starting execution
425 // current_task: task suspending
426 
427 static void
428 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
429 {
430  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
431  kmp_info_t * thread = __kmp_threads[ gtid ];
432 
433  KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
434  gtid, taskdata, current_task) );
435 
436  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
437 
438  // mark currently executing task as suspended
439  // TODO: GEH - make sure root team implicit task is initialized properly.
440  // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
441  current_task -> td_flags.executing = 0;
442 
443  // Add task to stack if tied
444 #ifdef BUILD_TIED_TASK_STACK
445  if ( taskdata -> td_flags.tiedness == TASK_TIED )
446  {
447  __kmp_push_task_stack( gtid, thread, taskdata );
448  }
449 #endif /* BUILD_TIED_TASK_STACK */
450 
451  // mark starting task as executing and as current task
452  thread -> th.th_current_task = taskdata;
453 
454  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
455  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
456  taskdata -> td_flags.started = 1;
457  taskdata -> td_flags.executing = 1;
458  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
459  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
460 
461  // GEH TODO: shouldn't we pass some sort of location identifier here?
462  // APT: yes, we will pass location here.
463  // need to store current thread state (in a thread or taskdata structure)
464  // before setting work_state, otherwise wrong state is set after end of task
465 
466  KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
467  gtid, taskdata ) );
468 
469  return;
470 }
471 
472 
473 //----------------------------------------------------------------------
474 // __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
475 // loc_ref: source location information; points to beginning of task block.
476 // gtid: global thread number.
477 // task: task thunk for the started task.
478 
479 void
480 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
481 {
482  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
483  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
484 
485  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
486  gtid, loc_ref, taskdata, current_task ) );
487 
488  taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
489  __kmp_task_start( gtid, task, current_task );
490 
491  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
492  gtid, loc_ref, taskdata ) );
493 
494  return;
495 }
496 
497 #ifdef TASK_UNUSED
498 //----------------------------------------------------------------------
499 // __kmpc_omp_task_begin: report that a given task has started execution
500 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
501 
502 void
503 __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
504 {
505  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
506 
507  KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
508  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
509 
510  __kmp_task_start( gtid, task, current_task );
511 
512  KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
513  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
514 
515  return;
516 }
517 #endif // TASK_UNUSED
518 
519 
520 //-------------------------------------------------------------------------------------
521 // __kmp_free_task: free the current task space and the space for shareds
522 // gtid: Global thread ID of calling thread
523 // taskdata: task to free
524 // thread: thread data structure of caller
525 
526 static void
527 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
528 {
529  KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
530  gtid, taskdata) );
531 
532  // Check to make sure all flags and counters have the correct values
533  KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
534  KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
535  KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
536  KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
537  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
538  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
539 
540  taskdata->td_flags.freed = 1;
541  // deallocate the taskdata and shared variable blocks associated with this task
542  #if USE_FAST_MEMORY
543  __kmp_fast_free( thread, taskdata );
544  #else /* ! USE_FAST_MEMORY */
545  __kmp_thread_free( thread, taskdata );
546  #endif
547 
548  KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
549  gtid, taskdata) );
550 }
551 
552 //-------------------------------------------------------------------------------------
553 // __kmp_free_task_and_ancestors: free the current task and ancestors without children
554 //
555 // gtid: Global thread ID of calling thread
556 // taskdata: task to free
557 // thread: thread data structure of caller
558 
559 static void
560 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
561 {
562  kmp_int32 children = 0;
563  kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
564 
565  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
566 
567  if ( !team_or_tasking_serialized ) {
568  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
569  KMP_DEBUG_ASSERT( children >= 0 );
570  }
571 
572  // Now, go up the ancestor tree to see if any ancestors can now be freed.
573  while ( children == 0 )
574  {
575  kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
576 
577  KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
578  "and freeing itself\n", gtid, taskdata) );
579 
580  // --- Deallocate my ancestor task ---
581  __kmp_free_task( gtid, taskdata, thread );
582 
583  taskdata = parent_taskdata;
584 
585  // Stop checking ancestors at implicit task or if tasking serialized
586  // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
587  if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
588  return;
589 
590  if ( !team_or_tasking_serialized ) {
591  // Predecrement simulated by "- 1" calculation
592  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
593  KMP_DEBUG_ASSERT( children >= 0 );
594  }
595  }
596 
597  KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
598  "not freeing it yet\n", gtid, taskdata, children) );
599 }
600 
601 //---------------------------------------------------------------------
602 // __kmp_task_finish: bookkeeping to do when a task finishes execution
603 // gtid: global thread ID for calling thread
604 // task: task to be finished
605 // resumed_task: task to be resumed. (may be NULL if task is serialized)
606 
607 static void
608 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
609 {
610  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
611  kmp_info_t * thread = __kmp_threads[ gtid ];
612  kmp_int32 children = 0;
613 
614  KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
615  gtid, taskdata, resumed_task) );
616 
617  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
618 
619  // Pop task from stack if tied
620 #ifdef BUILD_TIED_TASK_STACK
621  if ( taskdata -> td_flags.tiedness == TASK_TIED )
622  {
623  __kmp_pop_task_stack( gtid, thread, taskdata );
624  }
625 #endif /* BUILD_TIED_TASK_STACK */
626 
627  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
628  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
629  taskdata -> td_flags.executing = 0; // suspend the finishing task
630  taskdata -> td_flags.complete = 1; // mark the task as completed
631  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
632  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
633 
634  // Only need to keep track of count if team parallel and tasking not serialized
635  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
636  // Predecrement simulated by "- 1" calculation
637  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
638  KMP_DEBUG_ASSERT( children >= 0 );
639 #if OMP_40_ENABLED
640  if ( taskdata->td_taskgroup )
641  KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
642  __kmp_release_deps(gtid,taskdata);
643 #endif
644  }
645 
646  KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
647  gtid, taskdata, children) );
648 
649  // bookkeeping for resuming task:
650  // GEH - note tasking_ser => task_serial
651  KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
652  taskdata->td_flags.task_serial);
653  if ( taskdata->td_flags.task_serial )
654  {
655  if (resumed_task == NULL) {
656  resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
657  }
658  else {
659  // verify resumed task passed in points to parent
660  KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
661  }
662  }
663  else {
664  KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
665  }
666 
667  // Free this task and then ancestor tasks if they have no children.
668  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
669 
670  __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
671 
672  // TODO: GEH - make sure root team implicit task is initialized properly.
673  // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
674  resumed_task->td_flags.executing = 1; // resume previous task
675 
676  KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
677  gtid, taskdata, resumed_task) );
678 
679  return;
680 }
681 
682 //---------------------------------------------------------------------
683 // __kmpc_omp_task_complete_if0: report that a task has completed execution
684 // loc_ref: source location information; points to end of task block.
685 // gtid: global thread number.
686 // task: task thunk for the completed task.
687 
688 void
689 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
690 {
691  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
692  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
693 
694  __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
695 
696  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
697  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
698 
699  return;
700 }
701 
702 #ifdef TASK_UNUSED
703 //---------------------------------------------------------------------
704 // __kmpc_omp_task_complete: report that a task has completed execution
705 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
706 
707 void
708 __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
709 {
710  KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
711  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
712 
713  __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
714 
715  KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
716  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
717  return;
718 }
719 #endif // TASK_UNUSED
720 
721 
722 //----------------------------------------------------------------------------------------------------
723 // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
724 //
725 // loc_ref: reference to source location of parallel region
726 // this_thr: thread data structure corresponding to implicit task
727 // team: team for this_thr
728 // tid: thread id of given thread within team
729 // set_curr_task: TRUE if need to push current task to thread
730 // NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
731 // TODO: Get better loc_ref. Value passed in may be NULL
732 
733 void
734 __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
735 {
736  kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
737 
738  KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
739  tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
740 
741  task->td_task_id = KMP_GEN_TASK_ID();
742  task->td_team = team;
743 // task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
744  task->td_ident = loc_ref;
745  task->td_taskwait_ident = NULL;
746  task->td_taskwait_counter = 0;
747  task->td_taskwait_thread = 0;
748 
749  task->td_flags.tiedness = TASK_TIED;
750  task->td_flags.tasktype = TASK_IMPLICIT;
751  // All implicit tasks are executed immediately, not deferred
752  task->td_flags.task_serial = 1;
753  task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
754  task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
755 
756  task->td_flags.started = 1;
757  task->td_flags.executing = 1;
758  task->td_flags.complete = 0;
759  task->td_flags.freed = 0;
760 
761 #if OMP_40_ENABLED
762  task->td_dephash = NULL;
763  task->td_depnode = NULL;
764 #endif
765 
766  if (set_curr_task) { // only do this initialization the first time a thread is created
767  task->td_incomplete_child_tasks = 0;
768  task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
769 #if OMP_40_ENABLED
770  task->td_taskgroup = NULL; // An implicit task does not have taskgroup
771 #endif
772  __kmp_push_current_task_to_thread( this_thr, team, tid );
773  } else {
774  KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
775  KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
776  }
777 
778  KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
779  tid, team, task ) );
780 }
781 
782 // Round up a size to a power of two specified by val
783 // Used to insert padding between structures co-allocated using a single malloc() call
784 static size_t
785 __kmp_round_up_to_val( size_t size, size_t val ) {
786  if ( size & ( val - 1 ) ) {
787  size &= ~ ( val - 1 );
788  if ( size <= KMP_SIZE_T_MAX - val ) {
789  size += val; // Round up if there is no overflow.
790  }; // if
791  }; // if
792  return size;
793 } // __kmp_round_up_to_va
794 
795 
796 //---------------------------------------------------------------------------------
797 // __kmp_task_alloc: Allocate the taskdata and task data structures for a task
798 //
799 // loc_ref: source location information
800 // gtid: global thread number.
801 // flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
802 // Converted from kmp_int32 to kmp_tasking_flags_t in routine.
803 // sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
804 // sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
805 // task_entry: Pointer to task code entry point generated by compiler.
806 // returns: a pointer to the allocated kmp_task_t structure (task).
807 
808 kmp_task_t *
809 __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
810  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
811  kmp_routine_entry_t task_entry )
812 {
813  kmp_task_t *task;
814  kmp_taskdata_t *taskdata;
815  kmp_info_t *thread = __kmp_threads[ gtid ];
816  kmp_team_t *team = thread->th.th_team;
817  kmp_taskdata_t *parent_task = thread->th.th_current_task;
818  size_t shareds_offset;
819 
820  KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
821  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
822  gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
823  sizeof_shareds, task_entry) );
824 
825  if ( parent_task->td_flags.final ) {
826  if (flags->merged_if0) {
827  }
828  flags->final = 1;
829  }
830 
831  // Calculate shared structure offset including padding after kmp_task_t struct
832  // to align pointers in shared struct
833  shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
834  shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
835 
836  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
837  KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
838  gtid, shareds_offset) );
839  KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
840  gtid, sizeof_shareds) );
841 
842  // Avoid double allocation here by combining shareds with taskdata
843  #if USE_FAST_MEMORY
844  taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
845  #else /* ! USE_FAST_MEMORY */
846  taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
847  #endif /* USE_FAST_MEMORY */
848 
849  task = KMP_TASKDATA_TO_TASK(taskdata);
850 
851  // Make sure task & taskdata are aligned appropriately
852 #if KMP_ARCH_X86
853  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
854  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
855 #else
856  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
857  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
858 #endif
859  if (sizeof_shareds > 0) {
860  // Avoid double allocation here by combining shareds with taskdata
861  task->shareds = & ((char *) taskdata)[ shareds_offset ];
862  // Make sure shareds struct is aligned to pointer size
863  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
864  } else {
865  task->shareds = NULL;
866  }
867  task->routine = task_entry;
868  task->part_id = 0; // AC: Always start with 0 part id
869 
870  taskdata->td_task_id = KMP_GEN_TASK_ID();
871  taskdata->td_team = team;
872  taskdata->td_alloc_thread = thread;
873  taskdata->td_parent = parent_task;
874  taskdata->td_level = parent_task->td_level + 1; // increment nesting level
875  taskdata->td_ident = loc_ref;
876  taskdata->td_taskwait_ident = NULL;
877  taskdata->td_taskwait_counter = 0;
878  taskdata->td_taskwait_thread = 0;
879  KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
880  copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
881 
882  taskdata->td_flags.tiedness = flags->tiedness;
883  taskdata->td_flags.final = flags->final;
884  taskdata->td_flags.merged_if0 = flags->merged_if0;
885  taskdata->td_flags.tasktype = TASK_EXPLICIT;
886 
887  // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
888  taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
889 
890  // GEH - TODO: fix this to copy parent task's value of team_serial flag
891  taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
892 
893  // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
894  // tasks are not left until program termination to execute. Also, it helps locality to execute
895  // immediately.
896  taskdata->td_flags.task_serial = ( taskdata->td_flags.final
897  || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
898 
899  taskdata->td_flags.started = 0;
900  taskdata->td_flags.executing = 0;
901  taskdata->td_flags.complete = 0;
902  taskdata->td_flags.freed = 0;
903 
904  taskdata->td_flags.native = flags->native;
905 
906  taskdata->td_incomplete_child_tasks = 0;
907  taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
908 #if OMP_40_ENABLED
909  taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
910  taskdata->td_dephash = NULL;
911  taskdata->td_depnode = NULL;
912 #endif
913  // Only need to keep track of child task counts if team parallel and tasking not serialized
914  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
915  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
916 #if OMP_40_ENABLED
917  if ( parent_task->td_taskgroup )
918  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
919 #endif
920  // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
921  if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
922  KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
923  }
924  }
925 
926  KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
927  gtid, taskdata, taskdata->td_parent) );
928 
929  return task;
930 }
931 
932 
933 kmp_task_t *
934 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
935  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
936  kmp_routine_entry_t task_entry )
937 {
938  kmp_task_t *retval;
939  kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
940 
941  input_flags->native = FALSE;
942  // __kmp_task_alloc() sets up all other runtime flags
943 
944  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
945  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
946  gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
947  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
948 
949  retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
950  sizeof_shareds, task_entry );
951 
952  KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
953 
954  return retval;
955 }
956 
957 //-----------------------------------------------------------
958 // __kmp_invoke_task: invoke the specified task
959 //
960 // gtid: global thread ID of caller
961 // task: the task to invoke
962 // current_task: the task to resume after task invokation
963 
964 static void
965 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
966 {
967  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
968  KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
969  gtid, taskdata, current_task) );
970 
971  __kmp_task_start( gtid, task, current_task );
972 
973  //
974  // Invoke the task routine and pass in relevant data.
975  // Thunks generated by gcc take a different argument list.
976  //
977 #ifdef KMP_GOMP_COMPAT
978  if (taskdata->td_flags.native) {
979  ((void (*)(void *))(*(task->routine)))(task->shareds);
980  }
981  else
982 #endif /* KMP_GOMP_COMPAT */
983  {
984  (*(task->routine))(gtid, task);
985  }
986 
987  __kmp_task_finish( gtid, task, current_task );
988 
989  KA_TRACE(30, ("__kmp_inovke_task(exit): T#%d completed task %p, resuming task %p\n",
990  gtid, taskdata, current_task) );
991  return;
992 }
993 
994 //-----------------------------------------------------------------------
995 // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
996 //
997 // loc_ref: location of original task pragma (ignored)
998 // gtid: Global Thread ID of encountering thread
999 // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1000 // Returns:
1001 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1002 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1003 
1004 kmp_int32
1005 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1006 {
1007  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1008 
1009  KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1010  gtid, loc_ref, new_taskdata ) );
1011 
1012  /* Should we execute the new task or queue it? For now, let's just always try to
1013  queue it. If the queue fills up, then we'll execute it. */
1014 
1015  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1016  { // Execute this task immediately
1017  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1018  new_taskdata->td_flags.task_serial = 1;
1019  __kmp_invoke_task( gtid, new_task, current_task );
1020  }
1021 
1022  KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1023  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1024  new_taskdata ) );
1025 
1026  return TASK_CURRENT_NOT_QUEUED;
1027 }
1028 
1029 
1030 //---------------------------------------------------------------------
1031 // __kmpc_omp_task: Schedule a non-thread-switchable task for execution
1032 // loc_ref: location of original task pragma (ignored)
1033 // gtid: Global Thread ID of encountering thread
1034 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1035 // returns:
1036 //
1037 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1038 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1039 
1040 kmp_int32
1041 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1042 {
1043  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1044  kmp_int32 rc;
1045 
1046  KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1047  gtid, loc_ref, new_taskdata ) );
1048 
1049  /* Should we execute the new task or queue it? For now, let's just always try to
1050  queue it. If the queue fills up, then we'll execute it. */
1051 
1052  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1053  { // Execute this task immediately
1054  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1055  new_taskdata -> td_flags.task_serial = 1;
1056  __kmp_invoke_task( gtid, new_task, current_task );
1057  }
1058 
1059  KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1060  gtid, loc_ref, new_taskdata ) );
1061 
1062  return TASK_CURRENT_NOT_QUEUED;
1063 }
1064 
1065 
1066 //-------------------------------------------------------------------------------------
1067 // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1068 
1069 kmp_int32
1070 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1071 {
1072  kmp_taskdata_t * taskdata;
1073  kmp_info_t * thread;
1074  int thread_finished = FALSE;
1075 
1076  KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1077  gtid, loc_ref) );
1078 
1079  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1080  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1081 
1082  thread = __kmp_threads[ gtid ];
1083  taskdata = thread -> th.th_current_task;
1084 #if USE_ITT_BUILD
1085  // Note: These values are used by ITT events as well.
1086 #endif /* USE_ITT_BUILD */
1087  taskdata->td_taskwait_counter += 1;
1088  taskdata->td_taskwait_ident = loc_ref;
1089  taskdata->td_taskwait_thread = gtid + 1;
1090 
1091 #if USE_ITT_BUILD
1092  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1093  if ( itt_sync_obj != NULL )
1094  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1095 #endif /* USE_ITT_BUILD */
1096 
1097  if ( ! taskdata->td_flags.team_serial ) {
1098  // GEH: if team serialized, avoid reading the volatile variable below.
1099  while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1100  __kmp_execute_tasks( thread, gtid, &(taskdata->td_incomplete_child_tasks),
1101  0, FALSE, &thread_finished,
1102 #if USE_ITT_BUILD
1103  itt_sync_obj,
1104 #endif /* USE_ITT_BUILD */
1105  __kmp_task_stealing_constraint );
1106  }
1107  }
1108 #if USE_ITT_BUILD
1109  if ( itt_sync_obj != NULL )
1110  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1111 #endif /* USE_ITT_BUILD */
1112 
1113  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1114  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1115  }
1116 
1117  KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1118  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1119 
1120  return TASK_CURRENT_NOT_QUEUED;
1121 }
1122 
1123 
1124 //-------------------------------------------------
1125 // __kmpc_omp_taskyield: switch to a different task
1126 
1127 kmp_int32
1128 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1129 {
1130  kmp_taskdata_t * taskdata;
1131  kmp_info_t * thread;
1132  int thread_finished = FALSE;
1133 
1134  KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1135  gtid, loc_ref, end_part) );
1136 
1137  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1138  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1139 
1140  thread = __kmp_threads[ gtid ];
1141  taskdata = thread -> th.th_current_task;
1142  // Should we model this as a task wait or not?
1143 #if USE_ITT_BUILD
1144  // Note: These values are used by ITT events as well.
1145 #endif /* USE_ITT_BUILD */
1146  taskdata->td_taskwait_counter += 1;
1147  taskdata->td_taskwait_ident = loc_ref;
1148  taskdata->td_taskwait_thread = gtid + 1;
1149 
1150 #if USE_ITT_BUILD
1151  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1152  if ( itt_sync_obj != NULL )
1153  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1154 #endif /* USE_ITT_BUILD */
1155  if ( ! taskdata->td_flags.team_serial ) {
1156  __kmp_execute_tasks( thread, gtid, NULL, 0, FALSE, &thread_finished,
1157 #if USE_ITT_BUILD
1158  itt_sync_obj,
1159 #endif /* USE_ITT_BUILD */
1160  __kmp_task_stealing_constraint );
1161  }
1162 
1163 #if USE_ITT_BUILD
1164  if ( itt_sync_obj != NULL )
1165  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1166 #endif /* USE_ITT_BUILD */
1167 
1168  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1169  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1170  }
1171 
1172  KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1173  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1174 
1175  return TASK_CURRENT_NOT_QUEUED;
1176 }
1177 
1178 
1179 #if OMP_40_ENABLED
1180 //-------------------------------------------------------------------------------------
1181 // __kmpc_taskgroup: Start a new taskgroup
1182 
1183 void
1184 __kmpc_taskgroup( ident* loc, int gtid )
1185 {
1186  kmp_info_t * thread = __kmp_threads[ gtid ];
1187  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1188  kmp_taskgroup_t * tg_new =
1189  (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1190  KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1191  tg_new->count = 0;
1192  tg_new->parent = taskdata->td_taskgroup;
1193  taskdata->td_taskgroup = tg_new;
1194 }
1195 
1196 
1197 //-------------------------------------------------------------------------------------
1198 // __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1199 // and its descendants are complete
1200 
1201 void
1202 __kmpc_end_taskgroup( ident* loc, int gtid )
1203 {
1204  kmp_info_t * thread = __kmp_threads[ gtid ];
1205  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1206  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1207  int thread_finished = FALSE;
1208 
1209  KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1210  KMP_DEBUG_ASSERT( taskgroup != NULL );
1211 
1212  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1213 #if USE_ITT_BUILD
1214  // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1215  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1216  if ( itt_sync_obj != NULL )
1217  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1218 #endif /* USE_ITT_BUILD */
1219 
1220  if ( ! taskdata->td_flags.team_serial ) {
1221  while ( TCR_4(taskgroup->count) != 0 ) {
1222  __kmp_execute_tasks( thread, gtid, &(taskgroup->count),
1223  0, FALSE, &thread_finished,
1224 #if USE_ITT_BUILD
1225  itt_sync_obj,
1226 #endif /* USE_ITT_BUILD */
1227  __kmp_task_stealing_constraint );
1228  }
1229  }
1230 
1231 #if USE_ITT_BUILD
1232  if ( itt_sync_obj != NULL )
1233  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1234 #endif /* USE_ITT_BUILD */
1235  }
1236  KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1237 
1238  // Restore parent taskgroup for the current task
1239  taskdata->td_taskgroup = taskgroup->parent;
1240  __kmp_thread_free( thread, taskgroup );
1241 
1242  KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1243 }
1244 #endif
1245 
1246 
1247 //------------------------------------------------------
1248 // __kmp_remove_my_task: remove a task from my own deque
1249 
1250 static kmp_task_t *
1251 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1252  kmp_int32 is_constrained )
1253 {
1254  kmp_task_t * task;
1255  kmp_taskdata_t * taskdata;
1256  kmp_thread_data_t *thread_data;
1257  kmp_uint32 tail;
1258 
1259  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1260  KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1261 
1262  thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1263 
1264  KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1265  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1266  thread_data->td.td_deque_tail) );
1267 
1268  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1269  KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1270  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1271  thread_data->td.td_deque_tail) );
1272  return NULL;
1273  }
1274 
1275  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1276 
1277  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1278  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1279  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1280  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1281  thread_data->td.td_deque_tail) );
1282  return NULL;
1283  }
1284 
1285  tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1286  taskdata = thread_data -> td.td_deque[ tail ];
1287 
1288  if (is_constrained) {
1289  // we need to check if the candidate obeys task scheduling constraint:
1290  // only child of current task can be scheduled
1291  kmp_taskdata_t * current = thread->th.th_current_task;
1292  kmp_int32 level = current->td_level;
1293  kmp_taskdata_t * parent = taskdata->td_parent;
1294  while ( parent != current && parent->td_level > level ) {
1295  parent = parent->td_parent; // check generation up to the level of the current task
1296  KMP_DEBUG_ASSERT(parent != NULL);
1297  }
1298  if ( parent != current ) {
1299  // If the tail task is not a child, then no other childs can appear in the deque.
1300  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1301  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1302  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1303  thread_data->td.td_deque_tail) );
1304  return NULL;
1305  }
1306  }
1307 
1308  thread_data -> td.td_deque_tail = tail;
1309  TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1310 
1311  __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1312 
1313  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1314  gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1315  thread_data->td.td_deque_tail) );
1316 
1317  task = KMP_TASKDATA_TO_TASK( taskdata );
1318  return task;
1319 }
1320 
1321 
1322 //-----------------------------------------------------------
1323 // __kmp_steal_task: remove a task from another thread's deque
1324 // Assume that calling thread has already checked existence of
1325 // task_team thread_data before calling this routine.
1326 
1327 static kmp_task_t *
1328 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1329  volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1330  kmp_int32 is_constrained )
1331 {
1332  kmp_task_t * task;
1333  kmp_taskdata_t * taskdata;
1334  kmp_thread_data_t *victim_td, *threads_data;
1335  kmp_int32 victim_tid, thread_tid;
1336 
1337  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1338 
1339  threads_data = task_team -> tt.tt_threads_data;
1340  KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1341 
1342  victim_tid = victim->th.th_info.ds.ds_tid;
1343  victim_td = & threads_data[ victim_tid ];
1344 
1345  KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1346  "head=%u tail=%u\n",
1347  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1348  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1349 
1350  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1351  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1352  {
1353  KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1354  "ntasks=%d head=%u tail=%u\n",
1355  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1356  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1357  return NULL;
1358  }
1359 
1360  __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1361 
1362  // Check again after we acquire the lock
1363  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1364  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1365  {
1366  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1367  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1368  "ntasks=%d head=%u tail=%u\n",
1369  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1370  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1371  return NULL;
1372  }
1373 
1374  KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1375 
1376  if ( !is_constrained ) {
1377  taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1378  // Bump head pointer and Wrap.
1379  victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1380  } else {
1381  // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1382  kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1383  taskdata = victim_td -> td.td_deque[ tail ];
1384  // we need to check if the candidate obeys task scheduling constraint:
1385  // only child of current task can be scheduled
1386  kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1387  kmp_int32 level = current->td_level;
1388  kmp_taskdata_t * parent = taskdata->td_parent;
1389  while ( parent != current && parent->td_level > level ) {
1390  parent = parent->td_parent; // check generation up to the level of the current task
1391  KMP_DEBUG_ASSERT(parent != NULL);
1392  }
1393  if ( parent != current ) {
1394  // If the tail task is not a child, then no other childs can appear in the deque (?).
1395  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1396  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1397  "ntasks=%d head=%u tail=%u\n",
1398  gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1399  task_team, victim_td->td.td_deque_ntasks,
1400  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1401  return NULL;
1402  }
1403  victim_td -> td.td_deque_tail = tail;
1404  }
1405  if (*thread_finished) {
1406  // We need to un-mark this victim as a finished victim. This must be done before
1407  // releasing the lock, or else other threads (starting with the master victim)
1408  // might be prematurely released from the barrier!!!
1409  kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1410 
1411  KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1412  gtid, count + 1, task_team) );
1413 
1414  *thread_finished = FALSE;
1415  }
1416  TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1417 
1418  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1419 
1420  KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#d: task_team=%p "
1421  "ntasks=%d head=%u tail=%u\n",
1422  gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1423  victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1424  victim_td->td.td_deque_tail) );
1425 
1426  task = KMP_TASKDATA_TO_TASK( taskdata );
1427  return task;
1428 }
1429 
1430 
1431 //-----------------------------------------------------------------------------
1432 // __kmp_execute_tasks: Choose and execute tasks until either the condition
1433 // is statisfied (return true) or there are none left (return false).
1434 // final_spin is TRUE if this is the spin at the release barrier.
1435 // thread_finished indicates whether the thread is finished executing all
1436 // the tasks it has on its deque, and is at the release barrier.
1437 // spinner is the location on which to spin.
1438 // spinner == NULL means only execute a single task and return.
1439 // checker is the value to check to terminate the spin.
1440 
1441 int
1442 __kmp_execute_tasks( kmp_info_t *thread,
1443  kmp_int32 gtid,
1444  volatile kmp_uint *spinner,
1445  kmp_uint checker,
1446  int final_spin,
1447  int *thread_finished,
1448 #if USE_ITT_BUILD
1449  void * itt_sync_obj,
1450 #endif /* USE_ITT_BUILD */
1451  kmp_int32 is_constrained )
1452 {
1453  kmp_task_team_t * task_team;
1454  kmp_team_t * team;
1455  kmp_thread_data_t * threads_data;
1456  kmp_task_t * task;
1457  kmp_taskdata_t * current_task = thread -> th.th_current_task;
1458  volatile kmp_uint32 * unfinished_threads;
1459  kmp_int32 nthreads, last_stolen, k, tid;
1460 
1461  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1462  KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1463 
1464  task_team = thread -> th.th_task_team;
1465  KMP_DEBUG_ASSERT( task_team != NULL );
1466 
1467  KA_TRACE(15, ("__kmp_execute_tasks(enter): T#%d final_spin=%d *thread_finished=%d\n",
1468  gtid, final_spin, *thread_finished) );
1469 
1470  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1471  KMP_DEBUG_ASSERT( threads_data != NULL );
1472 
1473  nthreads = task_team -> tt.tt_nproc;
1474  unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1475  KMP_DEBUG_ASSERT( nthreads > 1 );
1476  KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1477 
1478  // Choose tasks from our own work queue.
1479  start:
1480  while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1481 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1482  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1483  if ( itt_sync_obj == NULL ) {
1484  // we are at fork barrier where we could not get the object reliably
1485  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1486  }
1487  __kmp_itt_task_starting( itt_sync_obj );
1488  }
1489 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1490  __kmp_invoke_task( gtid, task, current_task );
1491 #if USE_ITT_BUILD
1492  if ( itt_sync_obj != NULL )
1493  __kmp_itt_task_finished( itt_sync_obj );
1494 #endif /* USE_ITT_BUILD */
1495 
1496  // If this thread is only partway through the barrier and the condition
1497  // is met, then return now, so that the barrier gather/release pattern can proceed.
1498  // If this thread is in the last spin loop in the barrier, waiting to be
1499  // released, we know that the termination condition will not be satisified,
1500  // so don't waste any cycles checking it.
1501  if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1502  KA_TRACE(15, ("__kmp_execute_tasks(exit #1): T#%d spin condition satisfied\n", gtid) );
1503  return TRUE;
1504  }
1505  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1506  }
1507 
1508  // This thread's work queue is empty. If we are in the final spin loop
1509  // of the barrier, check and see if the termination condition is satisfied.
1510  if (final_spin) {
1511  // First, decrement the #unfinished threads, if that has not already
1512  // been done. This decrement might be to the spin location, and
1513  // result in the termination condition being satisfied.
1514  if (! *thread_finished) {
1515  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1516  KA_TRACE(20, ("__kmp_execute_tasks(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
1517  gtid, count, task_team) );
1518  *thread_finished = TRUE;
1519  }
1520 
1521  // It is now unsafe to reference thread->th.th_team !!!
1522  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1523  // thread to pass through the barrier, where it might reset each thread's
1524  // th.th_team field for the next parallel region.
1525  // If we can steal more work, we know that this has not happened yet.
1526  if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1527  KA_TRACE(15, ("__kmp_execute_tasks(exit #2): T#%d spin condition satisfied\n", gtid) );
1528  return TRUE;
1529  }
1530  }
1531 
1532  // Try to steal from the last place I stole from successfully.
1533  tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1534  last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1535 
1536  if (last_stolen != -1) {
1537  kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1538 
1539  while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1540  thread_finished, is_constrained )) != NULL)
1541  {
1542 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1543  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1544  if ( itt_sync_obj == NULL ) {
1545  // we are at fork barrier where we could not get the object reliably
1546  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1547  }
1548  __kmp_itt_task_starting( itt_sync_obj );
1549  }
1550 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1551  __kmp_invoke_task( gtid, task, current_task );
1552 #if USE_ITT_BUILD
1553  if ( itt_sync_obj != NULL )
1554  __kmp_itt_task_finished( itt_sync_obj );
1555 #endif /* USE_ITT_BUILD */
1556 
1557  // Check to see if this thread can proceed.
1558  if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1559  KA_TRACE(15, ("__kmp_execute_tasks(exit #3): T#%d spin condition satisfied\n",
1560  gtid) );
1561  return TRUE;
1562  }
1563 
1564  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1565  // If the execution of the stolen task resulted in more tasks being
1566  // placed on our run queue, then restart the whole process.
1567  if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1568  KA_TRACE(20, ("__kmp_execute_tasks: T#%d stolen task spawned other tasks, restart\n",
1569  gtid) );
1570  goto start;
1571  }
1572  }
1573 
1574  // Don't give priority to stealing from this thread anymore.
1575  threads_data[ tid ].td.td_deque_last_stolen = -1;
1576 
1577  // The victims's work queue is empty. If we are in the final spin loop
1578  // of the barrier, check and see if the termination condition is satisfied.
1579  if (final_spin) {
1580  // First, decrement the #unfinished threads, if that has not already
1581  // been done. This decrement might be to the spin location, and
1582  // result in the termination condition being satisfied.
1583  if (! *thread_finished) {
1584  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1585  KA_TRACE(20, ("__kmp_execute_tasks(dec #2): T#%d dec unfinished_threads to %d "
1586  "task_team=%p\n", gtid, count, task_team) );
1587  *thread_finished = TRUE;
1588  }
1589 
1590  // If __kmp_tasking_mode != tskm_immediate_exec
1591  // then it is now unsafe to reference thread->th.th_team !!!
1592  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1593  // thread to pass through the barrier, where it might reset each thread's
1594  // th.th_team field for the next parallel region.
1595  // If we can steal more work, we know that this has not happened yet.
1596  if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1597  KA_TRACE(15, ("__kmp_execute_tasks(exit #4): T#%d spin condition satisfied\n",
1598  gtid) );
1599  return TRUE;
1600  }
1601  }
1602  }
1603 
1604  // Find a different thread to steal work from. Pick a random thread.
1605  // My initial plan was to cycle through all the threads, and only return
1606  // if we tried to steal from every thread, and failed. Arch says that's
1607  // not such a great idea.
1608  // GEH - need yield code in this loop for throughput library mode?
1609  new_victim:
1610  k = __kmp_get_random( thread ) % (nthreads - 1);
1611  if ( k >= thread -> th.th_info.ds.ds_tid ) {
1612  ++k; // Adjusts random distribution to exclude self
1613  }
1614  {
1615  kmp_info_t *other_thread = threads_data[k].td.td_thr;
1616  int first;
1617 
1618  // There is a slight chance that __kmp_enable_tasking() did not wake up
1619  // all threads waiting at the barrier. If this thread is sleeping, then
1620  // then wake it up. Since we weree going to pay the cache miss penalty
1621  // for referenceing another thread's kmp_info_t struct anyway, the check
1622  // shouldn't cost too much performance at this point.
1623  // In extra barrier mode, tasks do not sleep at the separate tasking
1624  // barrier, so this isn't a problem.
1625  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1626  (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1627  (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1628  {
1629  __kmp_resume( __kmp_gtid_from_thread( other_thread ), NULL );
1630 
1631  // A sleeping thread should not have any tasks on it's queue.
1632  // There is a slight possiblility that it resumes, steals a task from
1633  // another thread, which spawns more tasks, all in the that it takes
1634  // this thread to check => don't write an assertion that the victim's
1635  // queue is empty. Try stealing from a different thread.
1636  goto new_victim;
1637  }
1638 
1639  // Now try to steal work from the selected thread
1640  first = TRUE;
1641  while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1642  thread_finished, is_constrained )) != NULL)
1643  {
1644 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1645  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1646  if ( itt_sync_obj == NULL ) {
1647  // we are at fork barrier where we could not get the object reliably
1648  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1649  }
1650  __kmp_itt_task_starting( itt_sync_obj );
1651  }
1652 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1653  __kmp_invoke_task( gtid, task, current_task );
1654 #if USE_ITT_BUILD
1655  if ( itt_sync_obj != NULL )
1656  __kmp_itt_task_finished( itt_sync_obj );
1657 #endif /* USE_ITT_BUILD */
1658 
1659  // Try stealing from this victim again, in the future.
1660  if (first) {
1661  threads_data[ tid ].td.td_deque_last_stolen = k;
1662  first = FALSE;
1663  }
1664 
1665  // Check to see if this thread can proceed.
1666  if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1667  KA_TRACE(15, ("__kmp_execute_tasks(exit #5): T#%d spin condition satisfied\n",
1668  gtid) );
1669  return TRUE;
1670  }
1671  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1672 
1673  // If the execution of the stolen task resulted in more tasks being
1674  // placed on our run queue, then restart the whole process.
1675  if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1676  KA_TRACE(20, ("__kmp_execute_tasks: T#%d stolen task spawned other tasks, restart\n",
1677  gtid) );
1678  goto start;
1679  }
1680  }
1681 
1682  // The victims's work queue is empty. If we are in the final spin loop
1683  // of the barrier, check and see if the termination condition is satisfied.
1684  // Going on and finding a new victim to steal from is expensive, as it
1685  // involves a lot of cache misses, so we definitely want to re-check the
1686  // termination condition before doing that.
1687  if (final_spin) {
1688  // First, decrement the #unfinished threads, if that has not already
1689  // been done. This decrement might be to the spin location, and
1690  // result in the termination condition being satisfied.
1691  if (! *thread_finished) {
1692  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1693  KA_TRACE(20, ("__kmp_execute_tasks(dec #3): T#%d dec unfinished_threads to %d; "
1694  "task_team=%p\n",
1695  gtid, count, task_team) );
1696  *thread_finished = TRUE;
1697  }
1698 
1699  // If __kmp_tasking_mode != tskm_immediate_exec,
1700  // then it is now unsafe to reference thread->th.th_team !!!
1701  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1702  // thread to pass through the barrier, where it might reset each thread's
1703  // th.th_team field for the next parallel region.
1704  // If we can steal more work, we know that this has not happened yet.
1705  if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1706  KA_TRACE(15, ("__kmp_execute_tasks(exit #6): T#%d spin condition satisfied\n",
1707  gtid) );
1708  return TRUE;
1709  }
1710  }
1711  }
1712 
1713  KA_TRACE(15, ("__kmp_execute_tasks(exit #7): T#%d can't find work\n", gtid) );
1714  return FALSE;
1715 }
1716 
1717 
1718 //-----------------------------------------------------------------------------
1719 // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
1720 // next barrier so they can assist in executing enqueued tasks.
1721 // First thread in allocates the task team atomically.
1722 
1723 static void
1724 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
1725 {
1726  kmp_team_t *team = this_thr->th.th_team;
1727  kmp_thread_data_t *threads_data;
1728  int nthreads, i, is_init_thread;
1729 
1730  KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
1731  __kmp_gtid_from_thread( this_thr ) ) );
1732 
1733  KMP_DEBUG_ASSERT(task_team != NULL);
1734  KMP_DEBUG_ASSERT(team != NULL);
1735 
1736  nthreads = task_team->tt.tt_nproc;
1737  KMP_DEBUG_ASSERT(nthreads > 0);
1738  KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
1739 
1740  // Allocate or increase the size of threads_data if necessary
1741  is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
1742 
1743  if (!is_init_thread) {
1744  // Some other thread already set up the array.
1745  KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
1746  __kmp_gtid_from_thread( this_thr ) ) );
1747  return;
1748  }
1749  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1750  KMP_DEBUG_ASSERT( threads_data != NULL );
1751 
1752  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1753  ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
1754  {
1755  // Release any threads sleeping at the barrier, so that they can steal
1756  // tasks and execute them. In extra barrier mode, tasks do not sleep
1757  // at the separate tasking barrier, so this isn't a problem.
1758  for (i = 0; i < nthreads; i++) {
1759  volatile kmp_uint *sleep_loc;
1760  kmp_info_t *thread = threads_data[i].td.td_thr;
1761 
1762  if (i == this_thr->th.th_info.ds.ds_tid) {
1763  continue;
1764  }
1765  // Since we haven't locked the thread's suspend mutex lock at this
1766  // point, there is a small window where a thread might be putting
1767  // itself to sleep, but hasn't set the th_sleep_loc field yet.
1768  // To work around this, __kmp_execute_tasks() periodically checks
1769  // see if other threads are sleeping (using the same random
1770  // mechanism that is used for task stealing) and awakens them if
1771  // they are.
1772  if ( ( sleep_loc = (volatile kmp_uint *)
1773  TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
1774  {
1775  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
1776  __kmp_gtid_from_thread( this_thr ),
1777  __kmp_gtid_from_thread( thread ) ) );
1778  __kmp_resume( __kmp_gtid_from_thread( thread ), sleep_loc );
1779  }
1780  else {
1781  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
1782  __kmp_gtid_from_thread( this_thr ),
1783  __kmp_gtid_from_thread( thread ) ) );
1784  }
1785  }
1786  }
1787 
1788  KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
1789  __kmp_gtid_from_thread( this_thr ) ) );
1790 }
1791 
1792 
1793 /* ------------------------------------------------------------------------ */
1794 /*
1795  * Utility routines for "task teams". A task team (kmp_task_t) is kind of
1796  * like a shadow of the kmp_team_t data struct, with a different lifetime.
1797  * After a child * thread checks into a barrier and calls __kmp_release() from
1798  * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
1799  * longer assume that the kmp_team_t structure is intact (at any moment, the
1800  * master thread may exit the barrier code and free the team data structure,
1801  * and return the threads to the thread pool).
1802  *
1803  * This does not work with the the tasking code, as the thread is still
1804  * expected to participate in the execution of any tasks that may have been
1805  * spawned my a member of the team, and the thread still needs access to all
1806  * to each thread in the team, so that it can steal work from it.
1807  *
1808  * Enter the existence of the kmp_task_team_t struct. It employs a reference
1809  * counting mechanims, and is allocated by the master thread before calling
1810  * __kmp_<barrier_kind>_release, and then is release by the last thread to
1811  * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
1812  * of the kmp_task_team_t structs for consecutive barriers can overlap
1813  * (and will, unless the master thread is the last thread to exit the barrier
1814  * release phase, which is not typical).
1815  *
1816  * The existence of such a struct is useful outside the context of tasking,
1817  * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
1818  * so that any performance differences show up when comparing the 2.5 vs. 3.0
1819  * libraries.
1820  *
1821  * We currently use the existence of the threads array as an indicator that
1822  * tasks were spawned since the last barrier. If the structure is to be
1823  * useful outside the context of tasking, then this will have to change, but
1824  * not settting the field minimizes the performance impact of tasking on
1825  * barriers, when no explicit tasks were spawned (pushed, actually).
1826  */
1827 
1828 static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
1829 // Lock for task team data structures
1830 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
1831 
1832 
1833 //------------------------------------------------------------------------------
1834 // __kmp_alloc_task_deque:
1835 // Allocates a task deque for a particular thread, and initialize the necessary
1836 // data structures relating to the deque. This only happens once per thread
1837 // per task team since task teams are recycled.
1838 // No lock is needed during allocation since each thread allocates its own
1839 // deque.
1840 
1841 static void
1842 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
1843 {
1844  __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
1845  KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
1846 
1847  // Initialize last stolen task field to "none"
1848  thread_data -> td.td_deque_last_stolen = -1;
1849 
1850  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
1851  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
1852  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
1853 
1854  KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
1855  __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
1856  // Allocate space for task deque, and zero the deque
1857  // Cannot use __kmp_thread_calloc() because threads not around for
1858  // kmp_reap_task_team( ).
1859  thread_data -> td.td_deque = (kmp_taskdata_t **)
1860  __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
1861 }
1862 
1863 
1864 //------------------------------------------------------------------------------
1865 // __kmp_free_task_deque:
1866 // Deallocates a task deque for a particular thread.
1867 // Happens at library deallocation so don't need to reset all thread data fields.
1868 
1869 static void
1870 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
1871 {
1872  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1873 
1874  if ( thread_data -> td.td_deque != NULL ) {
1875  TCW_4(thread_data -> td.td_deque_ntasks, 0);
1876  __kmp_free( thread_data -> td.td_deque );
1877  thread_data -> td.td_deque = NULL;
1878  }
1879  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1880 
1881 #ifdef BUILD_TIED_TASK_STACK
1882  // GEH: Figure out what to do here for td_susp_tied_tasks
1883  if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
1884  __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
1885  }
1886 #endif // BUILD_TIED_TASK_STACK
1887 }
1888 
1889 
1890 //------------------------------------------------------------------------------
1891 // __kmp_realloc_task_threads_data:
1892 // Allocates a threads_data array for a task team, either by allocating an initial
1893 // array or enlarging an existing array. Only the first thread to get the lock
1894 // allocs or enlarges the array and re-initializes the array eleemnts.
1895 // That thread returns "TRUE", the rest return "FALSE".
1896 // Assumes that the new array size is given by task_team -> tt.tt_nproc.
1897 // The current size is given by task_team -> tt.tt_max_threads.
1898 
1899 static int
1900 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
1901 {
1902  kmp_thread_data_t ** threads_data_p;
1903  kmp_int32 nthreads, maxthreads;
1904  int is_init_thread = FALSE;
1905 
1906  if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
1907  // Already reallocated and initialized.
1908  return FALSE;
1909  }
1910 
1911  threads_data_p = & task_team -> tt.tt_threads_data;
1912  nthreads = task_team -> tt.tt_nproc;
1913  maxthreads = task_team -> tt.tt_max_threads;
1914 
1915  // All threads must lock when they encounter the first task of the implicit task
1916  // region to make sure threads_data fields are (re)initialized before used.
1917  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
1918 
1919  if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
1920  // first thread to enable tasking
1921  kmp_team_t *team = thread -> th.th_team;
1922  int i;
1923 
1924  is_init_thread = TRUE;
1925  if ( maxthreads < nthreads ) {
1926 
1927  if ( *threads_data_p != NULL ) {
1928  kmp_thread_data_t *old_data = *threads_data_p;
1929  kmp_thread_data_t *new_data = NULL;
1930 
1931  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
1932  "threads data for task_team %p, new_size = %d, old_size = %d\n",
1933  __kmp_gtid_from_thread( thread ), task_team,
1934  nthreads, maxthreads ) );
1935  // Reallocate threads_data to have more elements than current array
1936  // Cannot use __kmp_thread_realloc() because threads not around for
1937  // kmp_reap_task_team( ). Note all new array entries are initialized
1938  // to zero by __kmp_allocate().
1939  new_data = (kmp_thread_data_t *)
1940  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
1941  // copy old data to new data
1942  memcpy( (void *) new_data, (void *) old_data,
1943  maxthreads * sizeof(kmp_taskdata_t *) );
1944 
1945 #ifdef BUILD_TIED_TASK_STACK
1946  // GEH: Figure out if this is the right thing to do
1947  for (i = maxthreads; i < nthreads; i++) {
1948  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1949  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
1950  }
1951 #endif // BUILD_TIED_TASK_STACK
1952  // Install the new data and free the old data
1953  (*threads_data_p) = new_data;
1954  __kmp_free( old_data );
1955  }
1956  else {
1957  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
1958  "threads data for task_team %p, size = %d\n",
1959  __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
1960  // Make the initial allocate for threads_data array, and zero entries
1961  // Cannot use __kmp_thread_calloc() because threads not around for
1962  // kmp_reap_task_team( ).
1963  *threads_data_p = (kmp_thread_data_t *)
1964  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
1965 #ifdef BUILD_TIED_TASK_STACK
1966  // GEH: Figure out if this is the right thing to do
1967  for (i = 0; i < nthreads; i++) {
1968  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1969  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
1970  }
1971 #endif // BUILD_TIED_TASK_STACK
1972  }
1973  task_team -> tt.tt_max_threads = nthreads;
1974  }
1975  else {
1976  // If array has (more than) enough elements, go ahead and use it
1977  KMP_DEBUG_ASSERT( *threads_data_p != NULL );
1978  }
1979 
1980  // initialize threads_data pointers back to thread_info structures
1981  for (i = 0; i < nthreads; i++) {
1982  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1983  thread_data -> td.td_thr = team -> t.t_threads[i];
1984 
1985  if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
1986  // The last stolen field survives across teams / barrier, and the number
1987  // of threads may have changed. It's possible (likely?) that a new
1988  // parallel region will exhibit the same behavior as the previous region.
1989  thread_data -> td.td_deque_last_stolen = -1;
1990  }
1991  }
1992 
1993  KMP_MB();
1994  TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
1995  }
1996 
1997  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
1998  return is_init_thread;
1999 }
2000 
2001 
2002 //------------------------------------------------------------------------------
2003 // __kmp_free_task_threads_data:
2004 // Deallocates a threads_data array for a task team, including any attached
2005 // tasking deques. Only occurs at library shutdown.
2006 
2007 static void
2008 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2009 {
2010  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2011  if ( task_team -> tt.tt_threads_data != NULL ) {
2012  int i;
2013  for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2014  __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2015  }
2016  __kmp_free( task_team -> tt.tt_threads_data );
2017  task_team -> tt.tt_threads_data = NULL;
2018  }
2019  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2020 }
2021 
2022 
2023 //------------------------------------------------------------------------------
2024 // __kmp_allocate_task_team:
2025 // Allocates a task team associated with a specific team, taking it from
2026 // the global task team free list if possible. Also initializes data structures.
2027 
2028 static kmp_task_team_t *
2029 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2030 {
2031  kmp_task_team_t *task_team = NULL;
2032  int nthreads;
2033 
2034  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2035  (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2036 
2037  if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2038  // Take a task team from the task team pool
2039  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2040  if (__kmp_free_task_teams != NULL) {
2041  task_team = __kmp_free_task_teams;
2042  TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2043  task_team -> tt.tt_next = NULL;
2044  }
2045  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2046  }
2047 
2048  if (task_team == NULL) {
2049  KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2050  "task team for team %p\n",
2051  __kmp_gtid_from_thread( thread ), team ) );
2052  // Allocate a new task team if one is not available.
2053  // Cannot use __kmp_thread_malloc() because threads not around for
2054  // kmp_reap_task_team( ).
2055  task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2056  __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2057  //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2058  //task_team -> tt.tt_max_threads = 0;
2059  //task_team -> tt.tt_next = NULL;
2060  }
2061 
2062  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2063  task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2064 
2065  task_team -> tt.tt_state = 0;
2066  TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2067  TCW_4( task_team -> tt.tt_active, TRUE );
2068  TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2069 
2070  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2071  (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2072  return task_team;
2073 }
2074 
2075 
2076 //------------------------------------------------------------------------------
2077 // __kmp_free_task_team:
2078 // Frees the task team associated with a specific thread, and adds it
2079 // to the global task team free list.
2080 //
2081 
2082 static void
2083 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2084 {
2085  KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2086  thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2087 
2088  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2089 
2090  // Put task team back on free list
2091  __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2092 
2093  KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2094  task_team -> tt.tt_next = __kmp_free_task_teams;
2095  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2096  TCW_PTR(__kmp_free_task_teams, task_team);
2097 
2098  __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2099 }
2100 
2101 
2102 //------------------------------------------------------------------------------
2103 // __kmp_reap_task_teams:
2104 // Free all the task teams on the task team free list.
2105 // Should only be done during library shutdown.
2106 // Cannot do anything that needs a thread structure or gtid since they are already gone.
2107 
2108 void
2109 __kmp_reap_task_teams( void )
2110 {
2111  kmp_task_team_t *task_team;
2112 
2113  if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2114  // Free all task_teams on the free list
2115  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2116  while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2117  __kmp_free_task_teams = task_team -> tt.tt_next;
2118  task_team -> tt.tt_next = NULL;
2119 
2120  // Free threads_data if necessary
2121  if ( task_team -> tt.tt_threads_data != NULL ) {
2122  __kmp_free_task_threads_data( task_team );
2123  }
2124  __kmp_free( task_team );
2125  }
2126  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2127  }
2128 }
2129 
2130 
2131 //------------------------------------------------------------------------------
2132 // __kmp_unref_task_teams:
2133 // Remove one thread from referencing the task team structure by
2134 // decreasing the reference count and deallocate task team if no more
2135 // references to it.
2136 //
2137 void
2138 __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2139 {
2140  kmp_uint ref_ct;
2141 
2142  ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2143 
2144  KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2145  __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2146 
2147 
2148  if ( ref_ct == 0 ) {
2149  __kmp_free_task_team( thread, task_team );
2150  }
2151 
2152  TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2153 }
2154 
2155 
2156 //------------------------------------------------------------------------------
2157 // __kmp_wait_to_unref_task_teams:
2158 // Some threads could still be in the fork barrier release code, possibly
2159 // trying to steal tasks. Wait for each thread to unreference its task team.
2160 //
2161 void
2162 __kmp_wait_to_unref_task_teams(void)
2163 {
2164  kmp_info_t *thread;
2165  kmp_uint32 spins;
2166  int done;
2167 
2168  KMP_INIT_YIELD( spins );
2169 
2170 
2171  for (;;) {
2172  done = TRUE;
2173 
2174  // TODO: GEH - this may be is wrong because some sync would be necessary
2175  // in case threads are added to the pool during the traversal.
2176  // Need to verify that lock for thread pool is held when calling
2177  // this routine.
2178  for (thread = (kmp_info_t *)__kmp_thread_pool;
2179  thread != NULL;
2180  thread = thread->th.th_next_pool)
2181  {
2182  volatile kmp_uint *sleep_loc;
2183 #if KMP_OS_WINDOWS
2184  DWORD exit_val;
2185 #endif
2186  if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2187  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2188  __kmp_gtid_from_thread( thread ) ) );
2189  continue;
2190  }
2191 #if KMP_OS_WINDOWS
2192  // TODO: GEH - add this check for Linux* OS / OS X* as well?
2193  if (!__kmp_is_thread_alive(thread, &exit_val)) {
2194  if (TCR_PTR(thread->th.th_task_team) != NULL) {
2195  __kmp_unref_task_team( thread->th.th_task_team, thread );
2196  }
2197  continue;
2198  }
2199 #endif
2200 
2201  done = FALSE; // Because th_task_team pointer is not NULL for this thread
2202 
2203  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2204  __kmp_gtid_from_thread( thread ) ) );
2205 
2206  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2207  // If the thread is sleeping, awaken it.
2208  if ( ( sleep_loc = (volatile kmp_uint *) TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2209  KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2210  __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2211  __kmp_resume( __kmp_gtid_from_thread( thread ), sleep_loc );
2212  }
2213  }
2214  }
2215  if (done) {
2216  break;
2217  }
2218 
2219  // If we are oversubscribed,
2220  // or have waited a bit (and library mode is throughput), yield.
2221  // Pause is in the following code.
2222  KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2223  KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2224  }
2225 
2226 
2227 }
2228 
2229 
2230 //------------------------------------------------------------------------------
2231 // __kmp_task_team_setup: Create a task_team for the current team, but use
2232 // an already created, unused one if it already exists.
2233 // This may be called by any thread, but only for teams with # threads >1.
2234 
2235 void
2236 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team )
2237 {
2238  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2239 
2240  if ( ( team->t.t_task_team == NULL ) && ( team->t.t_nproc > 1 ) ) {
2241  // Allocate a new task team, which will be propagated to
2242  // all of the worker threads after the barrier. As they
2243  // spin in the barrier release phase, then will continue
2244  // to use the previous task team struct, until they receive
2245  // the signal to stop checking for tasks (they can't safely
2246  // reference the kmp_team_t struct, which could be reallocated
2247  // by the master thread).
2248  team->t.t_task_team = __kmp_allocate_task_team( this_thr, team );
2249  KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new "
2250  "task_team %p for team %d\n",
2251  __kmp_gtid_from_thread( this_thr ), team->t.t_task_team,
2252  ((team != NULL) ? team->t.t_id : -1)) );
2253  }
2254  else {
2255  // All threads have reported in, and no tasks were spawned
2256  // for this release->gather region. Leave the old task
2257  // team struct in place for the upcoming region. No task
2258  // teams are formed for serialized teams.
2259  }
2260  if ( team->t.t_task_team != NULL ) {
2261  // Toggle the state flag so that we can tell which side of
2262  // the barrier we are on.
2263  team->t.t_task_team->tt.tt_state = 1 - this_thr->th.th_task_state;
2264  }
2265 }
2266 
2267 
2268 //------------------------------------------------------------------------------
2269 // __kmp_task_team_sync: Propagation of task team data from team to threads
2270 // which happens just after the release phase of a team barrier. This may be
2271 // called by any thread, but only for teams with # threads > 1.
2272 
2273 void
2274 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2275 {
2276  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2277 
2278  // On the rare chance that this thread never saw that the task
2279  // team was no longer active, then unref/deallocate it now.
2280  if ( this_thr->th.th_task_team != NULL ) {
2281  if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2282  KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2283  __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
2284  } else {
2285  //
2286  // We are re-using a task team that was never enabled.
2287  //
2288  KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
2289  }
2290  }
2291 
2292  //
2293  // It is now safe to propagate the task team pointer from the
2294  // team struct to the current thread.
2295  //
2296  TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team);
2297  if ( this_thr->th.th_task_team != NULL ) {
2298  //
2299  // Toggle the th_task_state field, instead of reading it from
2300  // the task team. Reading the tt_state field at this point
2301  // causes a 30% regression on EPCC parallel - toggling it
2302  // is much cheaper.
2303  //
2304  this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2305  KMP_DEBUG_ASSERT( this_thr->th.th_task_state == TCR_4(team->t.t_task_team->tt.tt_state) );
2306  }
2307  KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2308  __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2309  this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2310 }
2311 
2312 
2313 //------------------------------------------------------------------------------
2314 // __kmp_task_team_wait: Master thread waits for outstanding tasks after
2315 // the barrier gather phase. Only called by master thread if #threads
2316 // in team > 1 !
2317 
2318 void
2319 __kmp_task_team_wait( kmp_info_t *this_thr,
2320  kmp_team_t *team
2321 #if USE_ITT_BUILD
2322  , void * itt_sync_obj
2323 #endif /* USE_ITT_BUILD */
2324  )
2325 {
2326  kmp_task_team_t *task_team = team->t.t_task_team;
2327 
2328  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2329  KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2330 
2331  if ( ( task_team != NULL ) && KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
2332  KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2333  __kmp_gtid_from_thread( this_thr ), task_team ) );
2334  //
2335  // All worker threads might have dropped through to the
2336  // release phase, but could still be executing tasks.
2337  // Wait here for all tasks to complete. To avoid memory
2338  // contention, only the master thread checks for the
2339  // termination condition.
2340  //
2341  __kmp_wait_sleep( this_thr, &task_team->tt.tt_unfinished_threads, 0, TRUE
2342 #if USE_ITT_BUILD
2343  , itt_sync_obj
2344 #endif /* USE_ITT_BUILD */
2345  );
2346 
2347  //
2348  // Kill the old task team, so that the worker threads will
2349  // stop referencing it while spinning. They will
2350  // deallocate it when the reference count reaches zero.
2351  // The master thread is not included in the ref count.
2352  //
2353  KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2354  __kmp_gtid_from_thread( this_thr ), task_team ) );
2355  KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2356  TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2357  KMP_MB();
2358 
2359  TCW_PTR(this_thr->th.th_task_team, NULL);
2360  team->t.t_task_team = NULL;
2361  }
2362 }
2363 
2364 
2365 //------------------------------------------------------------------------------
2366 // __kmp_tasking_barrier:
2367 // Internal function to execute all tasks prior to a regular barrier or a
2368 // join barrier. It is a full barrier itself, which unfortunately turns
2369 // regular barriers into double barriers and join barriers into 1 1/2
2370 // barriers.
2371 // This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2372 
2373 void
2374 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2375 {
2376  volatile kmp_uint32 *spin = &team->t.t_task_team->tt.tt_unfinished_threads;
2377  int flag = FALSE;
2378  KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2379 
2380 #if USE_ITT_BUILD
2381  KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2382 #endif /* USE_ITT_BUILD */
2383  while (! __kmp_execute_tasks( thread, gtid, spin, 0, TRUE, &flag, NULL ) ) {
2384 #if USE_ITT_BUILD
2385  // TODO: What about itt_sync_obj??
2386  KMP_FSYNC_SPIN_PREPARE( spin );
2387 #endif /* USE_ITT_BUILD */
2388 
2389  if( TCR_4(__kmp_global.g.g_done) ) {
2390  if( __kmp_global.g.g_abort )
2391  __kmp_abort_thread( );
2392  break;
2393  }
2394  KMP_YIELD( TRUE ); // GH: We always yield here
2395  }
2396 #if USE_ITT_BUILD
2397  KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2398 #endif /* USE_ITT_BUILD */
2399 }
2400 
2401 #endif // OMP_30_ENABLED
2402