StarPU Internal Handbook
starpu_mpi_fxt.h
Go to the documentation of this file.
1/* StarPU --- Runtime system for heterogeneous multicore architectures.
2 *
3 * Copyright (C) 2010-2022 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
4 * Copyright (C) 2019 Federal University of Rio Grande do Sul (UFRGS)
5 *
6 * StarPU is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation; either version 2.1 of the License, or (at
9 * your option) any later version.
10 *
11 * StarPU is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 *
15 * See the GNU Lesser General Public License in COPYING.LGPL for more details.
16 */
17
18#ifndef __STARPU_MPI_FXT_H__
19#define __STARPU_MPI_FXT_H__
20
21#include <starpu.h>
22#include <common/config.h>
23#include <common/fxt.h>
24
27#ifdef __cplusplus
28extern "C"
29{
30#endif
31
32#define _STARPU_MPI_FUT_POINT_TO_POINT_SEND 0x100
33#define _STARPU_MPI_FUT_COLLECTIVE_SEND 0x101
34
35#define _STARPU_MPI_FUT_START 0x5201
36#define _STARPU_MPI_FUT_STOP 0x5202
37#define _STARPU_MPI_FUT_BARRIER 0x5203
38#define _STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN 0x5204
39#define _STARPU_MPI_FUT_ISEND_SUBMIT_END 0x5205
40#define _STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN 0x5206
41#define _STARPU_MPI_FUT_IRECV_SUBMIT_END 0x5207
42#define _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN 0x5208
43#define _STARPU_MPI_FUT_ISEND_COMPLETE_END 0x5209
44#define _STARPU_MPI_FUT_DATA_SET_RANK 0x521a
45#define _STARPU_MPI_FUT_IRECV_TERMINATED 0x521b
46#define _STARPU_MPI_FUT_ISEND_TERMINATED 0x521c
47#define _STARPU_MPI_FUT_TESTING_DETACHED_BEGIN 0x521d
48#define _STARPU_MPI_FUT_TESTING_DETACHED_END 0x521e
49#define _STARPU_MPI_FUT_TEST_BEGIN 0x521f
50#define _STARPU_MPI_FUT_TEST_END 0x5220
51#define _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN 0x520a
52#define _STARPU_MPI_FUT_IRECV_COMPLETE_END 0x520b
53#define _STARPU_MPI_FUT_SLEEP_BEGIN 0x520c
54#define _STARPU_MPI_FUT_SLEEP_END 0x520d
55#define _STARPU_MPI_FUT_DTESTING_BEGIN 0x520e
56#define _STARPU_MPI_FUT_DTESTING_END 0x520f
57#define _STARPU_MPI_FUT_UTESTING_BEGIN 0x5210
58#define _STARPU_MPI_FUT_UTESTING_END 0x5211
59#define _STARPU_MPI_FUT_UWAIT_BEGIN 0x5212
60#define _STARPU_MPI_FUT_UWAIT_END 0x5213
61#define _STARPU_MPI_FUT_POLLING_BEGIN 0x5214
62#define _STARPU_MPI_FUT_POLLING_END 0x5215
63#define _STARPU_MPI_FUT_DRIVER_RUN_BEGIN 0x5216
64#define _STARPU_MPI_FUT_DRIVER_RUN_END 0x5217
65#define _STARPU_MPI_FUT_DATA_SET_TAG 0x5218
66#define _STARPU_MPI_FUT_IRECV_NUMA_NODE 0x5219
67#define _STARPU_MPI_FUT_ISEND_NUMA_NODE 0x5221
68#define _STARPU_MPI_FUT_CHECKPOINT_BEGIN 0x5222
69#define _STARPU_MPI_FUT_CHECKPOINT_END 0x5223
70
71#ifdef STARPU_USE_FXT
72
73#define _STARPU_MPI_TRACE_START(rank, worldsize) \
74 FUT_DO_ALWAYS_PROBE3(_STARPU_MPI_FUT_START, (rank), (worldsize), _starpu_gettid());
75#define _STARPU_MPI_TRACE_STOP(rank, worldsize) \
76 FUT_DO_ALWAYS_PROBE3(_STARPU_MPI_FUT_STOP, (rank), (worldsize), _starpu_gettid());
77#define _STARPU_MPI_TRACE_BARRIER(rank, worldsize, key, local_time) do {\
78 if (_starpu_fxt_started) \
79 FUT_DO_ALWAYS_PROBE5(_STARPU_MPI_FUT_BARRIER, (rank), (worldsize), (key), (local_time), _starpu_gettid()); \
80} while (0)
81#define _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(dest, data_tag, size) \
82 FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_SUBMIT_BEGIN, (dest), (data_tag), (size), _starpu_gettid());
83#define _STARPU_MPI_TRACE_ISEND_SUBMIT_END(type, req, prio) \
84 FUT_FULL_PROBE8(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_SUBMIT_END, (type), (req)->node_tag.node.rank, (req)->node_tag.data_tag, starpu_data_get_size((req)->data_handle), (req)->pre_sync_jobid, (req)->data_handle, (prio), _starpu_gettid()); \
85 FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA, _STARPU_MPI_FUT_ISEND_NUMA_NODE, (req)->node_tag.node.rank, (req)->pre_sync_jobid, starpu_get_memory_location_bitmap((req)->ptr, starpu_data_get_size((req)->data_handle)), _starpu_gettid());
86#define _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(src, data_tag) \
87 FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_SUBMIT_BEGIN, (src), (data_tag), _starpu_gettid());
88#define _STARPU_MPI_TRACE_IRECV_SUBMIT_END(src, data_tag) \
89 FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_SUBMIT_END, (src), (data_tag), _starpu_gettid());
90#define _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(dest, data_tag, size) \
91 FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_COMPLETE_BEGIN, (dest), (data_tag), (size), _starpu_gettid());
92#define _STARPU_MPI_TRACE_COMPLETE_BEGIN(type, rank, data_tag) \
93 if (type == RECV_REQ) { _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN((rank), (data_tag)); } else if (type == SEND_REQ) { _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN((rank), (data_tag), 0); }
94#define _STARPU_MPI_TRACE_ISEND_COMPLETE_END(dest, data_tag, size) \
95 FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_COMPLETE_END, (dest), (data_tag), (size), _starpu_gettid());
96#define _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(src, data_tag) \
97 FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_COMPLETE_BEGIN, (src), (data_tag), _starpu_gettid());
98#define _STARPU_MPI_TRACE_IRECV_COMPLETE_END(src, data_tag) \
99 FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_COMPLETE_END, (src), (data_tag), _starpu_gettid());
100#define _STARPU_MPI_TRACE_COMPLETE_END(type, rank, data_tag) \
101 if (type == RECV_REQ) { _STARPU_MPI_TRACE_IRECV_COMPLETE_END((rank), (data_tag)); } else if (type == SEND_REQ) { _STARPU_MPI_TRACE_ISEND_COMPLETE_END((rank), (data_tag), 0); }
102#define _STARPU_MPI_TRACE_TERMINATED(req) \
103 if ((req)->request_type == RECV_REQ) { \
104 FUT_FULL_PROBE5(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_IRECV_TERMINATED, (req)->node_tag.node.rank, (req)->node_tag.data_tag, (req)->post_sync_jobid, _starpu_gettid(), (req)->data_handle); \
105 FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_MPI_VERBOSE_EXTRA, _STARPU_MPI_FUT_IRECV_NUMA_NODE, (req)->node_tag.node.rank, (req)->post_sync_jobid, starpu_get_memory_location_bitmap((req)->ptr, starpu_data_get_size((req)->data_handle)), _starpu_gettid()); \
106 } else \
107 if ((req)->request_type == SEND_REQ) FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_ISEND_TERMINATED, (req)->node_tag.node.rank, (req)->node_tag.data_tag, _starpu_gettid());
108#define _STARPU_MPI_TRACE_SLEEP_BEGIN() \
109 FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_SLEEP_BEGIN, _starpu_gettid());
110#define _STARPU_MPI_TRACE_SLEEP_END() \
111 FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_SLEEP_END, _starpu_gettid());
112#define _STARPU_MPI_TRACE_DTESTING_BEGIN() \
113 FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DTESTING_BEGIN, _starpu_gettid());
114#define _STARPU_MPI_TRACE_DTESTING_END() \
115 FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DTESTING_END, _starpu_gettid());
116#define _STARPU_MPI_TRACE_UTESTING_BEGIN(src, data_tag) \
117 FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UTESTING_BEGIN, (src), (data_tag), _starpu_gettid());
118#define _STARPU_MPI_TRACE_UTESTING_END(src, data_tag) \
119 FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UTESTING_END, (src), (data_tag), _starpu_gettid());
120#define _STARPU_MPI_TRACE_UWAIT_BEGIN(src, data_tag) \
121 FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UWAIT_BEGIN, (src), (data_tag), _starpu_gettid());
122#define _STARPU_MPI_TRACE_UWAIT_END(src, data_tag) \
123 FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_UWAIT_END, (src), (data_tag), _starpu_gettid());
124#define _STARPU_MPI_TRACE_DATA_SET_RANK(handle, rank) \
125 FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DATA_SET_RANK, (handle), (rank), _starpu_gettid());
126#define _STARPU_MPI_TRACE_DATA_SET_TAG(handle, data_tag) \
127 FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DATA_SET_TAG, (handle), (data_tag), _starpu_gettid());
128#if 0
129/* This is very expensive in the trace, only enable for debugging */
130#define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN() \
131 FUT_DO_PROBE1(_STARPU_MPI_FUT_TESTING_DETACHED_BEGIN, _starpu_gettid());
132#define _STARPU_MPI_TRACE_TESTING_DETACHED_END() \
133 FUT_DO_PROBE1(_STARPU_MPI_FUT_TESTING_DETACHED_END, _starpu_gettid());
134#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag) \
135 FUT_DO_PROBE3(_STARPU_MPI_FUT_TEST_BEGIN, (peer), (data_tag), _starpu_gettid());
136#define _STARPU_MPI_TRACE_TEST_END(peer, data_tag) \
137 FUT_DO_PROBE3(_STARPU_MPI_FUT_TEST_END, (peer), (data_tag), _starpu_gettid());
138#else
139#define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN() do {} while(0)
140#define _STARPU_MPI_TRACE_TESTING_DETACHED_END() do {} while(0)
141#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag) do {} while(0)
142#define _STARPU_MPI_TRACE_TEST_END(peer, data_tag) do {} while(0)
143#endif
144#define _STARPU_MPI_TRACE_POLLING_BEGIN() \
145 if(!trace_loop) { \
146 trace_loop = 1; \
147 FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_POLLING_BEGIN, _starpu_gettid()); \
148 }
149#define _STARPU_MPI_TRACE_POLLING_END() \
150 if(trace_loop) { \
151 trace_loop = 0; \
152 FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_POLLING_END, _starpu_gettid()); \
153 }
154#define _STARPU_MPI_TRACE_DRIVER_RUN_BEGIN() \
155 FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DRIVER_RUN_BEGIN, _starpu_gettid());
156#define _STARPU_MPI_TRACE_DRIVER_RUN_END() \
157 FUT_FULL_PROBE1(_STARPU_FUT_KEYMASK_MPI, _STARPU_MPI_FUT_DRIVER_RUN_END, _starpu_gettid());
158#define _STARPU_MPI_TRACE_CHECKPOINT_BEGIN(cp_instance, cp_domain) \
159 FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_EVENT, _STARPU_MPI_FUT_CHECKPOINT_BEGIN, (cp_instance), (cp_domain), _starpu_gettid());
160#define _STARPU_MPI_TRACE_CHECKPOINT_END(cp_instance, cp_domain) \
161 FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_EVENT, _STARPU_MPI_FUT_CHECKPOINT_END, (cp_instance), (cp_domain), _starpu_gettid());
162#define TRACE
163#else
164#define _STARPU_MPI_TRACE_START(a, b) do {} while(0);
165#define _STARPU_MPI_TRACE_STOP(a, b) do {} while(0);
166#define _STARPU_MPI_TRACE_BARRIER(a, b, c, d) do {} while(0);
167#define _STARPU_MPI_TRACE_ISEND_SUBMIT_BEGIN(a, b, c) do {} while(0);
168#define _STARPU_MPI_TRACE_ISEND_SUBMIT_END(a, b, c) do {} while(0);
169#define _STARPU_MPI_TRACE_IRECV_SUBMIT_BEGIN(a, b) do {} while(0);
170#define _STARPU_MPI_TRACE_IRECV_SUBMIT_END(a, b) do {} while(0);
171#define _STARPU_MPI_TRACE_ISEND_COMPLETE_BEGIN(a, b, c) do {} while(0);
172#define _STARPU_MPI_TRACE_COMPLETE_BEGIN(a, b, c) do {} while(0);
173#define _STARPU_MPI_TRACE_COMPLETE_END(a, b, c) do {} while(0);
174#define _STARPU_MPI_TRACE_TERMINATED(a) do {} while(0);
175#define _STARPU_MPI_TRACE_ISEND_COMPLETE_END(a, b, c) do {} while(0);
176#define _STARPU_MPI_TRACE_IRECV_COMPLETE_BEGIN(a, b) do {} while(0);
177#define _STARPU_MPI_TRACE_IRECV_COMPLETE_END(a, b) do {} while(0);
178#define _STARPU_MPI_TRACE_SLEEP_BEGIN() do {} while(0);
179#define _STARPU_MPI_TRACE_SLEEP_END() do {} while(0);
180#define _STARPU_MPI_TRACE_DTESTING_BEGIN() do {} while(0);
181#define _STARPU_MPI_TRACE_DTESTING_END() do {} while(0);
182#define _STARPU_MPI_TRACE_UTESTING_BEGIN(a, b) do {} while(0);
183#define _STARPU_MPI_TRACE_UTESTING_END(a, b) do {} while(0);
184#define _STARPU_MPI_TRACE_UWAIT_BEGIN(a, b) do {} while(0);
185#define _STARPU_MPI_TRACE_UWAIT_END(a, b) do {} while(0);
186#define _STARPU_MPI_TRACE_DATA_SET_RANK(a, b) do {} while(0);
187#define _STARPU_MPI_TRACE_DATA_SET_TAG(a, b) do {} while(0);
188#define _STARPU_MPI_TRACE_TESTING_DETACHED_BEGIN() do {} while(0)
189#define _STARPU_MPI_TRACE_TESTING_DETACHED_END() do {} while(0)
190#define _STARPU_MPI_TRACE_TEST_BEGIN(peer, data_tag) do {} while(0)
191#define _STARPU_MPI_TRACE_TEST_END(peer, data_tag) do {} while(0)
192#define _STARPU_MPI_TRACE_POLLING_BEGIN() do {} while(0);
193#define _STARPU_MPI_TRACE_POLLING_END() do {} while(0);
194#define _STARPU_MPI_TRACE_DRIVER_RUN_BEGIN() do {} while(0);
195#define _STARPU_MPI_TRACE_DRIVER_RUN_END() do {} while(0);
196#define _STARPU_MPI_TRACE_CHECKPOINT_BEGIN(cp_instance, cp_domain) do {} while(0)
197#define _STARPU_MPI_TRACE_CHECKPOINT_END(cp_instance, cp_domain) do {} while(0)
198#endif
199
200void _starpu_mpi_fxt_init(void* arg);
201void _starpu_mpi_fxt_shutdown();
202
203#ifdef __cplusplus
204}
205#endif
206
207
208#endif // __STARPU_MPI_FXT_H__