2006-01-28 18:38:37 +03:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
|
|
* All rights reserved.
|
2006-08-24 20:38:08 +04:00
|
|
|
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
|
2006-01-28 18:38:37 +03:00
|
|
|
* All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2009-02-24 20:17:33 +03:00
|
|
|
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
|
2006-01-28 18:38:37 +03:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "ompi_config.h"
|
|
|
|
|
|
|
|
#include "osc_pt2pt.h"
|
|
|
|
#include "osc_pt2pt_sendreq.h"
|
|
|
|
#include "osc_pt2pt_header.h"
|
|
|
|
#include "osc_pt2pt_data_move.h"
|
2006-08-03 04:10:19 +04:00
|
|
|
#include "osc_pt2pt_buffer.h"
|
2006-01-28 18:38:37 +03:00
|
|
|
|
2008-04-18 00:43:56 +04:00
|
|
|
#include "opal/util/arch.h"
|
2009-02-14 05:26:12 +03:00
|
|
|
#include "opal/util/output.h"
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "opal/sys/atomic.h"
|
2006-08-03 04:10:19 +04:00
|
|
|
#include "ompi/mca/pml/pml.h"
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
#include "ompi/datatype/ompi_datatype.h"
|
2009-02-24 20:17:33 +03:00
|
|
|
#include "ompi/op/op.h"
|
2006-08-17 18:52:20 +04:00
|
|
|
#include "ompi/mca/osc/base/base.h"
|
2007-07-14 00:46:12 +04:00
|
|
|
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
|
2008-02-12 21:01:17 +03:00
|
|
|
#include "ompi/memchecker.h"
|
2006-01-28 18:38:37 +03:00
|
|
|
|
2006-08-03 04:10:19 +04:00
|
|
|
|
2006-01-28 18:38:37 +03:00
|
|
|
static inline int32_t
|
|
|
|
create_send_tag(ompi_osc_pt2pt_module_t *module)
|
|
|
|
{
|
2011-03-19 00:36:35 +03:00
|
|
|
#if OPAL_ENABLE_MULTI_THREADS && OPAL_HAVE_ATOMIC_CMPSET_32
|
2006-01-30 15:16:52 +03:00
|
|
|
int32_t newval, oldval;
|
2006-01-28 18:38:37 +03:00
|
|
|
do {
|
|
|
|
oldval = module->p2p_tag_counter;
|
|
|
|
newval = (oldval + 1) % mca_pml.pml_max_tag;
|
|
|
|
} while (0 == opal_atomic_cmpset_32(&module->p2p_tag_counter, oldval, newval));
|
|
|
|
return newval;
|
2007-05-21 06:21:25 +04:00
|
|
|
#else
|
2006-01-28 18:38:37 +03:00
|
|
|
int32_t ret;
|
|
|
|
/* no compare and swap - have to lock the module */
|
|
|
|
OPAL_THREAD_LOCK(&module->p2p_lock);
|
|
|
|
module->p2p_tag_counter = (module->p2p_tag_counter + 1) % mca_pml.pml_max_tag;
|
|
|
|
ret = module->p2p_tag_counter;
|
|
|
|
OPAL_THREAD_UNLOCK(&module->p2p_lock);
|
|
|
|
return ret;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
static inline void
|
|
|
|
inmsg_mark_complete(ompi_osc_pt2pt_module_t *module)
|
|
|
|
{
|
2007-05-22 00:53:02 +04:00
|
|
|
int32_t count;
|
|
|
|
bool need_unlock = false;
|
|
|
|
|
|
|
|
OPAL_THREAD_LOCK(&module->p2p_lock);
|
|
|
|
count = (module->p2p_num_pending_in -= 1);
|
|
|
|
if ((0 != module->p2p_lock_status) &&
|
|
|
|
(opal_list_get_size(&module->p2p_unlocks_pending) != 0)) {
|
|
|
|
need_unlock = true;
|
|
|
|
}
|
|
|
|
OPAL_THREAD_UNLOCK(&module->p2p_lock);
|
|
|
|
|
2008-04-16 17:24:35 +04:00
|
|
|
MEMCHECKER(
|
|
|
|
/* Here we need restore the initial states of memory. */
|
|
|
|
opal_memchecker_base_mem_defined( module->p2p_win->w_baseptr, module->p2p_win->w_size);
|
|
|
|
);
|
2007-05-21 06:21:25 +04:00
|
|
|
if (0 == count) {
|
2007-05-22 00:53:02 +04:00
|
|
|
if (need_unlock) ompi_osc_pt2pt_passive_unlock_complete(module);
|
2007-05-21 06:21:25 +04:00
|
|
|
opal_condition_broadcast(&module->p2p_cond);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-01-28 18:38:37 +03:00
|
|
|
/**********************************************************************
|
|
|
|
*
|
|
|
|
* Sending a sendreq to target
|
|
|
|
*
|
|
|
|
**********************************************************************/
|
|
|
|
static void
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_sendreq_send_long_cb(ompi_osc_pt2pt_mpireq_t *mpireq)
|
2006-01-28 18:38:37 +03:00
|
|
|
{
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_longreq_t *longreq =
|
|
|
|
(ompi_osc_pt2pt_longreq_t*) mpireq;
|
2006-01-28 18:38:37 +03:00
|
|
|
ompi_osc_pt2pt_sendreq_t *sendreq =
|
2007-05-21 06:21:25 +04:00
|
|
|
(ompi_osc_pt2pt_sendreq_t*) longreq->mpireq.cbdata;
|
|
|
|
int32_t count;
|
2006-01-28 18:38:37 +03:00
|
|
|
|
2008-06-09 18:53:58 +04:00
|
|
|
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
|
2007-02-25 04:03:19 +03:00
|
|
|
"%d completed long sendreq to %d",
|
2007-07-11 21:16:06 +04:00
|
|
|
ompi_comm_rank(sendreq->req_module->p2p_comm),
|
2007-02-25 04:03:19 +03:00
|
|
|
sendreq->req_target_rank));
|
2006-02-22 08:14:34 +03:00
|
|
|
|
2007-05-23 16:56:34 +04:00
|
|
|
OPAL_THREAD_LOCK(&sendreq->req_module->p2p_lock);
|
2007-05-22 00:53:02 +04:00
|
|
|
count = (sendreq->req_module->p2p_num_pending_out -= 1);
|
2007-05-23 16:56:34 +04:00
|
|
|
OPAL_THREAD_UNLOCK(&sendreq->req_module->p2p_lock);
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
ompi_osc_pt2pt_longreq_free(longreq);
|
|
|
|
ompi_osc_pt2pt_sendreq_free(sendreq);
|
2007-05-21 06:21:25 +04:00
|
|
|
|
|
|
|
if (0 == count) opal_condition_broadcast(&sendreq->req_module->p2p_cond);
|
2006-01-28 18:38:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_sendreq_send_cb(ompi_osc_pt2pt_mpireq_t *mpireq)
|
2006-01-28 18:38:37 +03:00
|
|
|
{
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_buffer_t *buffer =
|
|
|
|
(ompi_osc_pt2pt_buffer_t*) mpireq;
|
2006-01-28 18:38:37 +03:00
|
|
|
ompi_osc_pt2pt_sendreq_t *sendreq =
|
2007-05-21 06:21:25 +04:00
|
|
|
(ompi_osc_pt2pt_sendreq_t*) mpireq->cbdata;
|
2006-01-28 18:38:37 +03:00
|
|
|
ompi_osc_pt2pt_send_header_t *header =
|
2006-08-03 04:10:19 +04:00
|
|
|
(ompi_osc_pt2pt_send_header_t*) buffer->payload;
|
2007-05-21 06:21:25 +04:00
|
|
|
int32_t count;
|
2006-01-28 18:38:37 +03:00
|
|
|
|
2006-02-23 09:02:10 +03:00
|
|
|
/* have to look at header, and not the sendreq because in the case
|
|
|
|
of get, it's possible that the sendreq has been freed already
|
|
|
|
(if the remote side replies before we get our send completion
|
|
|
|
callback) and already allocated to another request. We don't
|
|
|
|
wait for this completion before exiting a synchronization point
|
|
|
|
in the case of get, as we really don't care when it completes -
|
|
|
|
only when the data arrives. */
|
|
|
|
if (OMPI_OSC_PT2PT_HDR_GET != header->hdr_base.hdr_type) {
|
2009-05-07 00:11:28 +04:00
|
|
|
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
2006-02-27 21:47:00 +03:00
|
|
|
if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
|
|
|
|
OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header);
|
|
|
|
}
|
|
|
|
#endif
|
2006-01-28 18:38:37 +03:00
|
|
|
/* do we need to post a send? */
|
|
|
|
if (header->hdr_msg_length != 0) {
|
|
|
|
/* sendreq is done. Mark it as so and get out of here */
|
2007-05-23 16:56:34 +04:00
|
|
|
OPAL_THREAD_LOCK(&sendreq->req_module->p2p_lock);
|
2007-05-22 00:53:02 +04:00
|
|
|
count = (sendreq->req_module->p2p_num_pending_out -= 1);
|
2007-05-23 16:56:34 +04:00
|
|
|
OPAL_THREAD_UNLOCK(&sendreq->req_module->p2p_lock);
|
2006-01-28 18:38:37 +03:00
|
|
|
ompi_osc_pt2pt_sendreq_free(sendreq);
|
2007-05-21 06:21:25 +04:00
|
|
|
if (0 == count) opal_condition_broadcast(&sendreq->req_module->p2p_cond);
|
2006-01-28 18:38:37 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-08-03 04:10:19 +04:00
|
|
|
/* release the buffer */
|
|
|
|
OPAL_FREE_LIST_RETURN(&mca_osc_pt2pt_component.p2p_c_buffers,
|
2007-05-21 06:21:25 +04:00
|
|
|
&mpireq->super);
|
2006-01-28 18:38:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* create the initial fragment, pack header, datatype, and payload (if
|
|
|
|
size fits) and send */
|
|
|
|
int
|
|
|
|
ompi_osc_pt2pt_sendreq_send(ompi_osc_pt2pt_module_t *module,
|
|
|
|
ompi_osc_pt2pt_sendreq_t *sendreq)
|
|
|
|
{
|
|
|
|
int ret = OMPI_SUCCESS;
|
2009-05-01 02:36:09 +04:00
|
|
|
opal_free_list_item_t *item = NULL;
|
2006-01-28 18:38:37 +03:00
|
|
|
ompi_osc_pt2pt_send_header_t *header = NULL;
|
2006-08-03 04:10:19 +04:00
|
|
|
ompi_osc_pt2pt_buffer_t *buffer = NULL;
|
2006-01-28 18:38:37 +03:00
|
|
|
size_t written_data = 0;
|
2006-02-07 15:16:23 +03:00
|
|
|
size_t needed_len = sizeof(ompi_osc_pt2pt_send_header_t);
|
|
|
|
const void *packed_ddt;
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
size_t packed_ddt_len = ompi_datatype_pack_description_length(sendreq->req_target_datatype);
|
2006-02-07 15:16:23 +03:00
|
|
|
|
|
|
|
/* we always need to send the ddt */
|
|
|
|
needed_len += packed_ddt_len;
|
|
|
|
if (OMPI_OSC_PT2PT_GET != sendreq->req_type) {
|
|
|
|
needed_len += sendreq->req_origin_bytes_packed;
|
|
|
|
}
|
|
|
|
|
2009-05-01 02:36:09 +04:00
|
|
|
/* verify at least enough space for header */
|
|
|
|
if (mca_osc_pt2pt_component.p2p_c_eager_size
|
|
|
|
< sizeof(ompi_osc_pt2pt_send_header_t) + packed_ddt_len) {
|
|
|
|
ret = MPI_ERR_TRUNCATE;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2006-08-03 04:10:19 +04:00
|
|
|
/* Get a buffer */
|
|
|
|
OPAL_FREE_LIST_GET(&mca_osc_pt2pt_component.p2p_c_buffers,
|
|
|
|
item, ret);
|
|
|
|
if (NULL == item) {
|
2006-01-28 18:38:37 +03:00
|
|
|
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2006-08-03 04:10:19 +04:00
|
|
|
buffer = (ompi_osc_pt2pt_buffer_t*) item;
|
2006-01-28 18:38:37 +03:00
|
|
|
|
2006-08-03 04:10:19 +04:00
|
|
|
/* setup buffer */
|
2007-05-21 06:21:25 +04:00
|
|
|
buffer->mpireq.cbfunc = ompi_osc_pt2pt_sendreq_send_cb;
|
|
|
|
buffer->mpireq.cbdata = (void*) sendreq;
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
/* pack header */
|
2006-08-03 04:10:19 +04:00
|
|
|
header = (ompi_osc_pt2pt_send_header_t*) buffer->payload;
|
2006-01-28 18:38:37 +03:00
|
|
|
written_data += sizeof(ompi_osc_pt2pt_send_header_t);
|
2006-02-27 21:47:00 +03:00
|
|
|
header->hdr_base.hdr_flags = 0;
|
2007-07-11 21:16:06 +04:00
|
|
|
header->hdr_origin = ompi_comm_rank(sendreq->req_module->p2p_comm);
|
2007-01-05 01:07:37 +03:00
|
|
|
header->hdr_origin_sendreq.pval = (void*) sendreq;
|
2006-01-28 18:38:37 +03:00
|
|
|
header->hdr_origin_tag = 0;
|
|
|
|
header->hdr_target_disp = sendreq->req_target_disp;
|
|
|
|
header->hdr_target_count = sendreq->req_target_count;
|
|
|
|
|
|
|
|
switch (sendreq->req_type) {
|
|
|
|
case OMPI_OSC_PT2PT_PUT:
|
2006-02-07 15:16:23 +03:00
|
|
|
header->hdr_base.hdr_type = OMPI_OSC_PT2PT_HDR_PUT;
|
2009-05-07 00:11:28 +04:00
|
|
|
#if OPAL_ENABLE_MEM_DEBUG
|
2006-01-28 18:38:37 +03:00
|
|
|
header->hdr_target_op = 0;
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OMPI_OSC_PT2PT_ACC:
|
2006-02-07 15:16:23 +03:00
|
|
|
header->hdr_base.hdr_type = OMPI_OSC_PT2PT_HDR_ACC;
|
2006-01-28 18:38:37 +03:00
|
|
|
header->hdr_target_op = sendreq->req_op_id;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OMPI_OSC_PT2PT_GET:
|
2006-02-07 15:16:23 +03:00
|
|
|
header->hdr_base.hdr_type = OMPI_OSC_PT2PT_HDR_GET;
|
2009-05-07 00:11:28 +04:00
|
|
|
#if OPAL_ENABLE_MEM_DEBUG
|
2006-01-28 18:38:37 +03:00
|
|
|
header->hdr_target_op = 0;
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Set datatype id and / or pack datatype */
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
ret = ompi_datatype_get_pack_description(sendreq->req_target_datatype, &packed_ddt);
|
2006-02-07 15:16:23 +03:00
|
|
|
if (OMPI_SUCCESS != ret) goto cleanup;
|
2006-08-03 04:10:19 +04:00
|
|
|
memcpy((unsigned char*) buffer->payload + written_data,
|
2006-02-07 15:16:23 +03:00
|
|
|
packed_ddt, packed_ddt_len);
|
|
|
|
written_data += packed_ddt_len;
|
2009-05-01 02:36:09 +04:00
|
|
|
|
2006-01-28 18:38:37 +03:00
|
|
|
if (OMPI_OSC_PT2PT_GET != sendreq->req_type) {
|
|
|
|
/* if sending data and it fits, pack payload */
|
2006-08-03 04:10:19 +04:00
|
|
|
if (mca_osc_pt2pt_component.p2p_c_eager_size >=
|
2006-01-28 18:38:37 +03:00
|
|
|
written_data + sendreq->req_origin_bytes_packed) {
|
|
|
|
struct iovec iov;
|
|
|
|
uint32_t iov_count = 1;
|
|
|
|
size_t max_data = sendreq->req_origin_bytes_packed;
|
|
|
|
|
|
|
|
iov.iov_len = max_data;
|
2006-08-24 20:38:08 +04:00
|
|
|
iov.iov_base = (IOVBASE_TYPE*)((unsigned char*) buffer->payload + written_data);
|
2008-02-12 11:46:27 +03:00
|
|
|
MEMCHECKER(
|
2008-04-16 17:24:35 +04:00
|
|
|
memchecker_convertor_call(&opal_memchecker_base_mem_defined,
|
|
|
|
&sendreq->req_origin_convertor);
|
2008-02-12 11:46:27 +03:00
|
|
|
);
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
ret = opal_convertor_pack(&sendreq->req_origin_convertor, &iov, &iov_count,
|
2006-10-27 03:11:26 +04:00
|
|
|
&max_data );
|
2008-02-12 11:46:27 +03:00
|
|
|
MEMCHECKER(
|
2008-04-16 17:24:35 +04:00
|
|
|
memchecker_convertor_call(&opal_memchecker_base_mem_noaccess,
|
|
|
|
&sendreq->req_origin_convertor);
|
2008-02-12 11:46:27 +03:00
|
|
|
);
|
2006-01-28 18:38:37 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
ret = OMPI_ERR_FATAL;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(max_data == sendreq->req_origin_bytes_packed);
|
|
|
|
written_data += max_data;
|
|
|
|
|
|
|
|
header->hdr_msg_length = sendreq->req_origin_bytes_packed;
|
|
|
|
} else {
|
|
|
|
header->hdr_msg_length = 0;
|
|
|
|
header->hdr_origin_tag = create_send_tag(module);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
header->hdr_msg_length = 0;
|
|
|
|
}
|
|
|
|
|
2006-08-03 04:10:19 +04:00
|
|
|
buffer->len = written_data;
|
|
|
|
|
2006-02-27 21:47:00 +03:00
|
|
|
#ifdef WORDS_BIGENDIAN
|
|
|
|
header->hdr_base.hdr_flags |= OMPI_OSC_PT2PT_HDR_FLAG_NBO;
|
2009-05-07 00:11:28 +04:00
|
|
|
#elif OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
2008-04-18 00:43:56 +04:00
|
|
|
if (sendreq->req_target_proc->proc_arch & OPAL_ARCH_ISBIGENDIAN) {
|
2006-02-27 21:47:00 +03:00
|
|
|
header->hdr_base.hdr_flags |= OMPI_OSC_PT2PT_HDR_FLAG_NBO;
|
|
|
|
OMPI_OSC_PT2PT_SEND_HDR_HTON(*header);
|
|
|
|
}
|
2006-01-28 18:38:37 +03:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* send fragment */
|
2008-06-09 18:53:58 +04:00
|
|
|
OPAL_OUTPUT_VERBOSE((51, ompi_osc_base_output,
|
2007-02-25 04:03:19 +03:00
|
|
|
"%d sending sendreq to %d",
|
2007-07-11 21:16:06 +04:00
|
|
|
ompi_comm_rank(sendreq->req_module->p2p_comm),
|
2007-02-25 04:03:19 +03:00
|
|
|
sendreq->req_target_rank));
|
2006-08-03 04:10:19 +04:00
|
|
|
ret = MCA_PML_CALL(isend(buffer->payload,
|
|
|
|
buffer->len,
|
|
|
|
MPI_BYTE,
|
|
|
|
sendreq->req_target_rank,
|
2006-11-28 00:55:41 +03:00
|
|
|
CONTROL_MSG_TAG,
|
2006-08-03 04:10:19 +04:00
|
|
|
MCA_PML_BASE_SEND_STANDARD,
|
|
|
|
module->p2p_comm,
|
2007-05-21 06:21:25 +04:00
|
|
|
&buffer->mpireq.request));
|
2008-02-12 11:46:27 +03:00
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
|
|
|
opal_list_append(&mca_osc_pt2pt_component.p2p_c_pending_requests,
|
|
|
|
&buffer->mpireq.super.super);
|
|
|
|
OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
2008-04-16 17:24:35 +04:00
|
|
|
/* Need to be fixed.
|
|
|
|
* The payload is made undefined due to the isend call.
|
|
|
|
*/
|
|
|
|
MEMCHECKER(
|
|
|
|
opal_memchecker_base_mem_defined(buffer->payload, buffer->len);
|
|
|
|
);
|
2007-01-26 02:07:03 +03:00
|
|
|
if (OMPI_OSC_PT2PT_GET != sendreq->req_type &&
|
|
|
|
header->hdr_msg_length == 0) {
|
|
|
|
ompi_osc_pt2pt_longreq_t *longreq;
|
|
|
|
ompi_osc_pt2pt_longreq_alloc(&longreq);
|
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
longreq->mpireq.cbfunc = ompi_osc_pt2pt_sendreq_send_long_cb;
|
|
|
|
longreq->mpireq.cbdata = sendreq;
|
2008-06-09 18:53:58 +04:00
|
|
|
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
|
2007-02-25 04:03:19 +03:00
|
|
|
"%d starting long sendreq to %d (%d)",
|
2007-07-11 21:16:06 +04:00
|
|
|
ompi_comm_rank(sendreq->req_module->p2p_comm),
|
2007-02-25 04:03:19 +03:00
|
|
|
sendreq->req_target_rank,
|
|
|
|
header->hdr_origin_tag));
|
2007-01-26 02:07:03 +03:00
|
|
|
|
|
|
|
mca_pml.pml_isend(sendreq->req_origin_convertor.pBaseBuf,
|
|
|
|
sendreq->req_origin_convertor.count,
|
|
|
|
sendreq->req_origin_datatype,
|
|
|
|
sendreq->req_target_rank,
|
|
|
|
header->hdr_origin_tag,
|
|
|
|
MCA_PML_BASE_SEND_STANDARD,
|
|
|
|
sendreq->req_module->p2p_comm,
|
2007-05-21 06:21:25 +04:00
|
|
|
&(longreq->mpireq.request));
|
2007-01-26 02:07:03 +03:00
|
|
|
|
|
|
|
/* put the send request in the waiting list */
|
2007-05-21 06:21:25 +04:00
|
|
|
OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
|
|
|
opal_list_append(&mca_osc_pt2pt_component.p2p_c_pending_requests,
|
|
|
|
&(longreq->mpireq.super.super));
|
|
|
|
OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
2007-01-26 02:07:03 +03:00
|
|
|
}
|
|
|
|
|
2006-01-28 18:38:37 +03:00
|
|
|
goto done;
|
|
|
|
|
|
|
|
cleanup:
|
2006-08-03 04:10:19 +04:00
|
|
|
if (item != NULL) {
|
|
|
|
OPAL_FREE_LIST_RETURN(&mca_osc_pt2pt_component.p2p_c_buffers,
|
|
|
|
item);
|
2006-01-28 18:38:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**********************************************************************
|
|
|
|
*
|
|
|
|
* Sending a replyreq back to origin
|
|
|
|
*
|
|
|
|
**********************************************************************/
|
|
|
|
static void
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_replyreq_send_long_cb(ompi_osc_pt2pt_mpireq_t *mpireq)
|
2006-01-28 18:38:37 +03:00
|
|
|
{
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_longreq_t *longreq =
|
|
|
|
(ompi_osc_pt2pt_longreq_t*) mpireq;
|
2006-01-28 18:38:37 +03:00
|
|
|
ompi_osc_pt2pt_replyreq_t *replyreq =
|
2007-05-21 06:21:25 +04:00
|
|
|
(ompi_osc_pt2pt_replyreq_t*) mpireq->cbdata;
|
2006-01-28 18:38:37 +03:00
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
inmsg_mark_complete(replyreq->rep_module);
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
ompi_osc_pt2pt_longreq_free(longreq);
|
|
|
|
ompi_osc_pt2pt_replyreq_free(replyreq);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_replyreq_send_cb(ompi_osc_pt2pt_mpireq_t *mpireq)
|
2006-01-28 18:38:37 +03:00
|
|
|
{
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_buffer_t *buffer =
|
|
|
|
(ompi_osc_pt2pt_buffer_t*) mpireq;
|
2006-01-28 18:38:37 +03:00
|
|
|
ompi_osc_pt2pt_replyreq_t *replyreq =
|
2007-05-21 06:21:25 +04:00
|
|
|
(ompi_osc_pt2pt_replyreq_t*) mpireq->cbdata;
|
2006-01-28 18:38:37 +03:00
|
|
|
ompi_osc_pt2pt_reply_header_t *header =
|
2006-08-03 04:10:19 +04:00
|
|
|
(ompi_osc_pt2pt_reply_header_t*) buffer->payload;
|
2006-01-28 18:38:37 +03:00
|
|
|
|
2009-05-07 00:11:28 +04:00
|
|
|
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
2006-02-27 21:47:00 +03:00
|
|
|
if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
|
|
|
|
OMPI_OSC_PT2PT_REPLY_HDR_NTOH(*header);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2006-01-28 18:38:37 +03:00
|
|
|
/* do we need to post a send? */
|
|
|
|
if (header->hdr_msg_length != 0) {
|
|
|
|
/* sendreq is done. Mark it as so and get out of here */
|
2007-05-21 06:21:25 +04:00
|
|
|
inmsg_mark_complete(replyreq->rep_module);
|
2006-01-28 18:38:37 +03:00
|
|
|
ompi_osc_pt2pt_replyreq_free(replyreq);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* release the descriptor and replyreq */
|
2006-08-03 04:10:19 +04:00
|
|
|
OPAL_FREE_LIST_RETURN(&mca_osc_pt2pt_component.p2p_c_buffers,
|
2007-05-21 06:21:25 +04:00
|
|
|
&mpireq->super);
|
2006-01-28 18:38:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
ompi_osc_pt2pt_replyreq_send(ompi_osc_pt2pt_module_t *module,
|
|
|
|
ompi_osc_pt2pt_replyreq_t *replyreq)
|
|
|
|
{
|
|
|
|
int ret = OMPI_SUCCESS;
|
2006-08-03 04:10:19 +04:00
|
|
|
opal_free_list_item_t *item;
|
|
|
|
ompi_osc_pt2pt_buffer_t *buffer = NULL;
|
2006-01-28 18:38:37 +03:00
|
|
|
ompi_osc_pt2pt_reply_header_t *header = NULL;
|
|
|
|
size_t written_data = 0;
|
2006-08-03 04:10:19 +04:00
|
|
|
|
|
|
|
/* Get a buffer */
|
|
|
|
OPAL_FREE_LIST_GET(&mca_osc_pt2pt_component.p2p_c_buffers,
|
|
|
|
item, ret);
|
|
|
|
if (NULL == item) {
|
2006-01-28 18:38:37 +03:00
|
|
|
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2006-08-03 04:10:19 +04:00
|
|
|
buffer = (ompi_osc_pt2pt_buffer_t*) item;
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
/* verify at least enough space for header */
|
2006-08-03 04:10:19 +04:00
|
|
|
if (mca_osc_pt2pt_component.p2p_c_eager_size < sizeof(ompi_osc_pt2pt_reply_header_t)) {
|
2006-01-28 18:38:37 +03:00
|
|
|
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2006-08-03 04:10:19 +04:00
|
|
|
/* setup buffer */
|
2007-05-21 06:21:25 +04:00
|
|
|
buffer->mpireq.cbfunc = ompi_osc_pt2pt_replyreq_send_cb;
|
|
|
|
buffer->mpireq.cbdata = (void*) replyreq;
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
/* pack header */
|
2006-08-03 04:10:19 +04:00
|
|
|
header = (ompi_osc_pt2pt_reply_header_t*) buffer->payload;
|
2006-01-28 18:38:37 +03:00
|
|
|
written_data += sizeof(ompi_osc_pt2pt_reply_header_t);
|
2006-02-07 15:16:23 +03:00
|
|
|
header->hdr_base.hdr_type = OMPI_OSC_PT2PT_HDR_REPLY;
|
2006-02-27 21:47:00 +03:00
|
|
|
header->hdr_base.hdr_flags = 0;
|
2006-01-28 18:38:37 +03:00
|
|
|
header->hdr_origin_sendreq = replyreq->rep_origin_sendreq;
|
|
|
|
header->hdr_target_tag = 0;
|
|
|
|
|
|
|
|
/* if sending data fits, pack payload */
|
2006-08-03 04:10:19 +04:00
|
|
|
if (mca_osc_pt2pt_component.p2p_c_eager_size >=
|
2006-01-28 18:38:37 +03:00
|
|
|
written_data + replyreq->rep_target_bytes_packed) {
|
|
|
|
struct iovec iov;
|
|
|
|
uint32_t iov_count = 1;
|
|
|
|
size_t max_data = replyreq->rep_target_bytes_packed;
|
|
|
|
|
2008-04-16 17:24:35 +04:00
|
|
|
iov.iov_len = max_data;
|
2006-08-24 20:38:08 +04:00
|
|
|
iov.iov_base = (IOVBASE_TYPE*)((unsigned char*) buffer->payload + written_data);
|
2008-02-12 11:46:27 +03:00
|
|
|
/*
|
|
|
|
* Before copy to the target buffer, make the target part
|
|
|
|
* accessable.
|
|
|
|
*/
|
|
|
|
MEMCHECKER(
|
2008-04-16 17:24:35 +04:00
|
|
|
memchecker_convertor_call(&opal_memchecker_base_mem_defined,
|
|
|
|
&replyreq->rep_target_convertor);
|
2008-02-12 11:46:27 +03:00
|
|
|
);
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
ret = opal_convertor_pack(&replyreq->rep_target_convertor, &iov, &iov_count,
|
2006-10-27 03:11:26 +04:00
|
|
|
&max_data );
|
2008-02-12 11:46:27 +03:00
|
|
|
/* Copy finished, make the target buffer unaccessable. */
|
|
|
|
MEMCHECKER(
|
2008-04-16 17:24:35 +04:00
|
|
|
memchecker_convertor_call(&opal_memchecker_base_mem_noaccess,
|
|
|
|
&replyreq->rep_target_convertor);
|
2008-02-12 11:46:27 +03:00
|
|
|
);
|
|
|
|
|
2006-01-28 18:38:37 +03:00
|
|
|
if (ret < 0) {
|
|
|
|
ret = OMPI_ERR_FATAL;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(max_data == replyreq->rep_target_bytes_packed);
|
|
|
|
written_data += max_data;
|
|
|
|
|
|
|
|
header->hdr_msg_length = replyreq->rep_target_bytes_packed;
|
|
|
|
} else {
|
|
|
|
header->hdr_msg_length = 0;
|
|
|
|
header->hdr_target_tag = create_send_tag(module);
|
|
|
|
}
|
|
|
|
|
2006-08-03 04:10:19 +04:00
|
|
|
buffer->len = written_data;
|
|
|
|
|
2006-02-27 21:47:00 +03:00
|
|
|
#ifdef WORDS_BIGENDIAN
|
|
|
|
header->hdr_base.hdr_flags |= OMPI_OSC_PT2PT_HDR_FLAG_NBO;
|
2009-05-07 00:11:28 +04:00
|
|
|
#elif OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
2008-04-18 00:43:56 +04:00
|
|
|
if (replyreq->rep_origin_proc->proc_arch & OPAL_ARCH_ISBIGENDIAN) {
|
2006-02-27 21:47:00 +03:00
|
|
|
header->hdr_base.hdr_flags |= OMPI_OSC_PT2PT_HDR_FLAG_NBO;
|
|
|
|
OMPI_OSC_PT2PT_REPLY_HDR_HTON(*header);
|
|
|
|
}
|
2006-01-28 18:38:37 +03:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* send fragment */
|
2006-08-03 04:10:19 +04:00
|
|
|
ret = MCA_PML_CALL(isend(buffer->payload,
|
|
|
|
buffer->len,
|
|
|
|
MPI_BYTE,
|
|
|
|
replyreq->rep_origin_rank,
|
2006-11-28 00:55:41 +03:00
|
|
|
CONTROL_MSG_TAG,
|
2006-08-03 04:10:19 +04:00
|
|
|
MCA_PML_BASE_SEND_STANDARD,
|
|
|
|
module->p2p_comm,
|
2007-05-21 06:21:25 +04:00
|
|
|
&buffer->mpireq.request));
|
|
|
|
OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
|
|
|
opal_list_append(&mca_osc_pt2pt_component.p2p_c_pending_requests,
|
|
|
|
&buffer->mpireq.super.super);
|
|
|
|
OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
2006-08-03 04:10:19 +04:00
|
|
|
|
2008-04-16 17:24:35 +04:00
|
|
|
/* Need to be fixed.
|
|
|
|
* The payload is made undefined due to the isend call.
|
|
|
|
*/
|
|
|
|
MEMCHECKER(
|
|
|
|
opal_memchecker_base_mem_defined(buffer->payload, buffer->len);
|
|
|
|
);
|
2007-01-26 02:07:03 +03:00
|
|
|
if (header->hdr_msg_length == 0) {
|
|
|
|
ompi_osc_pt2pt_longreq_t *longreq;
|
|
|
|
ompi_osc_pt2pt_longreq_alloc(&longreq);
|
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
longreq->mpireq.cbfunc = ompi_osc_pt2pt_replyreq_send_long_cb;
|
|
|
|
longreq->mpireq.cbdata = replyreq;
|
2007-01-26 02:07:03 +03:00
|
|
|
|
|
|
|
mca_pml.pml_isend(replyreq->rep_target_convertor.pBaseBuf,
|
|
|
|
replyreq->rep_target_convertor.count,
|
|
|
|
replyreq->rep_target_datatype,
|
|
|
|
replyreq->rep_origin_rank,
|
|
|
|
header->hdr_target_tag,
|
|
|
|
MCA_PML_BASE_SEND_STANDARD,
|
2007-05-21 06:21:25 +04:00
|
|
|
module->p2p_comm,
|
|
|
|
&(longreq->mpireq.request));
|
2007-01-26 02:07:03 +03:00
|
|
|
|
|
|
|
/* put the send request in the waiting list */
|
2007-05-21 06:21:25 +04:00
|
|
|
OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
|
|
|
opal_list_append(&mca_osc_pt2pt_component.p2p_c_pending_requests,
|
|
|
|
&longreq->mpireq.super.super);
|
|
|
|
OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
2007-01-26 02:07:03 +03:00
|
|
|
}
|
2006-01-28 18:38:37 +03:00
|
|
|
goto done;
|
|
|
|
|
|
|
|
cleanup:
|
2006-08-03 04:10:19 +04:00
|
|
|
if (item != NULL) {
|
|
|
|
OPAL_FREE_LIST_RETURN(&mca_osc_pt2pt_component.p2p_c_buffers,
|
|
|
|
item);
|
2006-01-28 18:38:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**********************************************************************
|
|
|
|
*
|
|
|
|
* Receive a put on the target side
|
|
|
|
*
|
|
|
|
**********************************************************************/
|
|
|
|
static void
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_sendreq_recv_put_long_cb(ompi_osc_pt2pt_mpireq_t *mpireq)
|
2006-01-28 18:38:37 +03:00
|
|
|
{
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_longreq_t *longreq =
|
|
|
|
(ompi_osc_pt2pt_longreq_t*) mpireq;
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
OBJ_RELEASE(longreq->req_datatype);
|
|
|
|
ompi_osc_pt2pt_longreq_free(longreq);
|
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
inmsg_mark_complete(longreq->req_module);
|
2006-01-28 18:38:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
ompi_osc_pt2pt_sendreq_recv_put(ompi_osc_pt2pt_module_t *module,
|
|
|
|
ompi_osc_pt2pt_send_header_t *header,
|
|
|
|
void *inbuf)
|
|
|
|
{
|
|
|
|
int ret = OMPI_SUCCESS;
|
|
|
|
void *target = (unsigned char*) module->p2p_win->w_baseptr +
|
2008-02-07 21:45:35 +03:00
|
|
|
((unsigned long)header->hdr_target_disp * module->p2p_win->w_disp_unit);
|
2006-09-21 02:14:46 +04:00
|
|
|
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
|
2006-02-07 15:16:23 +03:00
|
|
|
struct ompi_datatype_t *datatype =
|
2007-07-14 00:46:12 +04:00
|
|
|
ompi_osc_base_datatype_create(proc, &inbuf);
|
2006-01-28 18:38:37 +03:00
|
|
|
|
2007-05-31 00:32:02 +04:00
|
|
|
if (NULL == datatype) {
|
2008-06-09 18:53:58 +04:00
|
|
|
opal_output(ompi_osc_base_output,
|
2007-05-31 00:32:02 +04:00
|
|
|
"Error recreating datatype. Aborting.");
|
2007-05-31 00:33:08 +04:00
|
|
|
ompi_mpi_abort(module->p2p_comm, 1, false);
|
2007-05-31 00:32:02 +04:00
|
|
|
}
|
|
|
|
|
2006-01-28 18:38:37 +03:00
|
|
|
if (header->hdr_msg_length > 0) {
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_t convertor;
|
2006-01-28 18:38:37 +03:00
|
|
|
struct iovec iov;
|
|
|
|
uint32_t iov_count = 1;
|
|
|
|
size_t max_data;
|
|
|
|
ompi_proc_t *proc;
|
|
|
|
|
|
|
|
/* create convertor */
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
OBJ_CONSTRUCT(&convertor, opal_convertor_t);
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
/* initialize convertor */
|
|
|
|
proc = ompi_comm_peer_lookup(module->p2p_comm, header->hdr_origin);
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
|
|
|
|
&(datatype->super),
|
2006-01-28 18:38:37 +03:00
|
|
|
header->hdr_target_count,
|
|
|
|
target,
|
2006-03-17 21:46:48 +03:00
|
|
|
0,
|
2006-01-28 18:38:37 +03:00
|
|
|
&convertor);
|
|
|
|
iov.iov_len = header->hdr_msg_length;
|
2006-08-24 20:38:08 +04:00
|
|
|
iov.iov_base = (IOVBASE_TYPE*)inbuf;
|
2006-01-28 18:38:37 +03:00
|
|
|
max_data = iov.iov_len;
|
2008-02-12 11:46:27 +03:00
|
|
|
/*
|
|
|
|
* Before copy to the user buffer, make the target part
|
|
|
|
* accessable.
|
|
|
|
*/
|
|
|
|
MEMCHECKER(
|
2008-04-16 17:24:35 +04:00
|
|
|
memchecker_convertor_call(&opal_memchecker_base_mem_defined,
|
|
|
|
&convertor);
|
2008-02-12 11:46:27 +03:00
|
|
|
);
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_unpack(&convertor,
|
2006-01-28 18:38:37 +03:00
|
|
|
&iov,
|
|
|
|
&iov_count,
|
2006-10-27 03:11:26 +04:00
|
|
|
&max_data );
|
2008-02-12 11:46:27 +03:00
|
|
|
/* Copy finished, make the user buffer unaccessable. */
|
|
|
|
MEMCHECKER(
|
2008-04-16 17:24:35 +04:00
|
|
|
memchecker_convertor_call(&opal_memchecker_base_mem_noaccess,
|
|
|
|
&convertor);
|
2008-02-12 11:46:27 +03:00
|
|
|
);
|
2006-01-28 18:38:37 +03:00
|
|
|
OBJ_DESTRUCT(&convertor);
|
|
|
|
OBJ_RELEASE(datatype);
|
2007-05-21 06:21:25 +04:00
|
|
|
inmsg_mark_complete(module);
|
2006-01-28 18:38:37 +03:00
|
|
|
} else {
|
|
|
|
ompi_osc_pt2pt_longreq_t *longreq;
|
|
|
|
ompi_osc_pt2pt_longreq_alloc(&longreq);
|
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
longreq->mpireq.cbfunc = ompi_osc_pt2pt_sendreq_recv_put_long_cb;
|
|
|
|
longreq->mpireq.cbdata = NULL;
|
2006-01-28 18:38:37 +03:00
|
|
|
longreq->req_datatype = datatype;
|
|
|
|
longreq->req_module = module;
|
|
|
|
|
|
|
|
ret = mca_pml.pml_irecv(target,
|
|
|
|
header->hdr_target_count,
|
|
|
|
datatype,
|
|
|
|
header->hdr_origin,
|
|
|
|
header->hdr_origin_tag,
|
|
|
|
module->p2p_comm,
|
2007-05-21 06:21:25 +04:00
|
|
|
&(longreq->mpireq.request));
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
/* put the send request in the waiting list */
|
2007-05-21 06:21:25 +04:00
|
|
|
OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
|
|
|
opal_list_append(&mca_osc_pt2pt_component.p2p_c_pending_requests,
|
|
|
|
&(longreq->mpireq.super.super));
|
|
|
|
OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
2006-01-28 18:38:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**********************************************************************
|
|
|
|
*
|
|
|
|
* Receive an accumulate on the target side
|
|
|
|
*
|
|
|
|
**********************************************************************/
|
|
|
|
static void
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_sendreq_recv_accum_long_cb(ompi_osc_pt2pt_mpireq_t *mpireq)
|
2006-01-28 18:38:37 +03:00
|
|
|
{
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_longreq_t *longreq =
|
|
|
|
(ompi_osc_pt2pt_longreq_t*) mpireq;
|
2007-07-14 00:46:12 +04:00
|
|
|
ompi_osc_pt2pt_module_t *module = longreq->req_module;
|
2006-01-28 18:38:37 +03:00
|
|
|
ompi_osc_pt2pt_send_header_t *header =
|
2007-05-21 06:21:25 +04:00
|
|
|
(ompi_osc_pt2pt_send_header_t*) mpireq->cbdata;
|
2006-01-28 18:38:37 +03:00
|
|
|
void *payload = (void*) (header + 1);
|
|
|
|
int ret;
|
2007-07-14 00:46:12 +04:00
|
|
|
void *target = (unsigned char*) module->p2p_win->w_baseptr +
|
2008-02-07 21:45:35 +03:00
|
|
|
((unsigned long)header->hdr_target_disp * module->p2p_win->w_disp_unit);
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
/* lock the window for accumulates */
|
|
|
|
OPAL_THREAD_LOCK(&longreq->req_module->p2p_acc_lock);
|
|
|
|
|
2009-02-24 20:17:33 +03:00
|
|
|
if (longreq->req_op == &ompi_mpi_op_replace.op) {
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_t convertor;
|
2007-07-14 00:46:12 +04:00
|
|
|
struct iovec iov;
|
|
|
|
uint32_t iov_count = 1;
|
|
|
|
size_t max_data;
|
|
|
|
|
|
|
|
/* create convertor */
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
OBJ_CONSTRUCT(&convertor, opal_convertor_t);
|
2007-07-14 00:46:12 +04:00
|
|
|
|
|
|
|
/* initialize convertor */
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_copy_and_prepare_for_recv(ompi_proc_local()->proc_convertor,
|
|
|
|
&(longreq->req_datatype->super),
|
2007-07-14 00:46:12 +04:00
|
|
|
header->hdr_target_count,
|
|
|
|
target,
|
|
|
|
0,
|
|
|
|
&convertor);
|
|
|
|
|
|
|
|
iov.iov_len = header->hdr_msg_length;
|
|
|
|
iov.iov_base = (IOVBASE_TYPE*) payload;
|
|
|
|
max_data = iov.iov_len;
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_unpack(&convertor,
|
2007-07-14 00:46:12 +04:00
|
|
|
&iov,
|
|
|
|
&iov_count,
|
|
|
|
&max_data);
|
|
|
|
OBJ_DESTRUCT(&convertor);
|
2008-02-12 11:46:27 +03:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Before copy to the user buffer, make the target part
|
|
|
|
* accessable.
|
|
|
|
*/
|
|
|
|
MEMCHECKER(
|
|
|
|
opal_memchecker_base_mem_defined( target, header->hdr_msg_length );
|
|
|
|
);
|
2007-07-14 00:46:12 +04:00
|
|
|
/* copy the data from the temporary buffer into the user window */
|
|
|
|
ret = ompi_osc_base_process_op(target,
|
|
|
|
payload,
|
|
|
|
header->hdr_msg_length,
|
|
|
|
longreq->req_datatype,
|
|
|
|
header->hdr_target_count,
|
|
|
|
longreq->req_op);
|
2008-02-12 11:46:27 +03:00
|
|
|
/* Copy finished, make the user buffer unaccessable. */
|
|
|
|
MEMCHECKER(
|
|
|
|
opal_memchecker_base_mem_noaccess( target, header->hdr_msg_length );
|
|
|
|
);
|
2007-07-14 00:46:12 +04:00
|
|
|
}
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
/* unlock the window for accumulates */
|
|
|
|
OPAL_THREAD_UNLOCK(&longreq->req_module->p2p_acc_lock);
|
2006-02-22 19:20:13 +03:00
|
|
|
|
2008-06-09 18:53:58 +04:00
|
|
|
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
|
2007-02-25 04:03:19 +03:00
|
|
|
"%d finished receiving long accum message from %d",
|
2007-07-11 21:16:06 +04:00
|
|
|
ompi_comm_rank(longreq->req_module->p2p_comm),
|
2007-02-25 04:03:19 +03:00
|
|
|
header->hdr_origin));
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
/* free the temp buffer */
|
2007-05-21 06:21:25 +04:00
|
|
|
free(mpireq->cbdata);
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
/* Release datatype & op */
|
|
|
|
OBJ_RELEASE(longreq->req_datatype);
|
|
|
|
OBJ_RELEASE(longreq->req_op);
|
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
inmsg_mark_complete(longreq->req_module);
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
ompi_osc_pt2pt_longreq_free(longreq);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
ompi_osc_pt2pt_sendreq_recv_accum(ompi_osc_pt2pt_module_t *module,
|
|
|
|
ompi_osc_pt2pt_send_header_t *header,
|
|
|
|
void *payload)
|
|
|
|
{
|
|
|
|
int ret = OMPI_SUCCESS;
|
2007-07-14 00:46:12 +04:00
|
|
|
struct ompi_op_t *op = ompi_osc_base_op_create(header->hdr_target_op);
|
2006-09-21 02:14:46 +04:00
|
|
|
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
|
2006-02-07 15:16:23 +03:00
|
|
|
struct ompi_datatype_t *datatype =
|
2007-07-14 00:46:12 +04:00
|
|
|
ompi_osc_base_datatype_create(proc, &payload);
|
|
|
|
void *target = (unsigned char*) module->p2p_win->w_baseptr +
|
2008-02-07 21:45:35 +03:00
|
|
|
((unsigned long)header->hdr_target_disp * module->p2p_win->w_disp_unit);
|
2006-01-28 18:38:37 +03:00
|
|
|
|
2007-05-31 00:32:02 +04:00
|
|
|
if (NULL == datatype) {
|
2008-06-09 18:53:58 +04:00
|
|
|
opal_output(ompi_osc_base_output,
|
2007-05-31 00:32:02 +04:00
|
|
|
"Error recreating datatype. Aborting.");
|
2007-05-31 00:33:08 +04:00
|
|
|
ompi_mpi_abort(module->p2p_comm, 1, false);
|
2007-05-31 00:32:02 +04:00
|
|
|
}
|
|
|
|
|
2006-01-28 18:38:37 +03:00
|
|
|
if (header->hdr_msg_length > 0) {
|
|
|
|
/* lock the window for accumulates */
|
|
|
|
OPAL_THREAD_LOCK(&module->p2p_acc_lock);
|
|
|
|
|
2009-02-24 20:17:33 +03:00
|
|
|
if (op == &ompi_mpi_op_replace.op) {
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_t convertor;
|
2007-07-14 00:46:12 +04:00
|
|
|
struct iovec iov;
|
|
|
|
uint32_t iov_count = 1;
|
|
|
|
size_t max_data;
|
|
|
|
|
|
|
|
/* create convertor */
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
OBJ_CONSTRUCT(&convertor, opal_convertor_t);
|
2007-07-14 00:46:12 +04:00
|
|
|
|
|
|
|
/* initialize convertor */
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
|
|
|
|
&(datatype->super),
|
2007-07-14 00:46:12 +04:00
|
|
|
header->hdr_target_count,
|
|
|
|
target,
|
|
|
|
0,
|
|
|
|
&convertor);
|
|
|
|
|
|
|
|
iov.iov_len = header->hdr_msg_length;
|
|
|
|
iov.iov_base = (IOVBASE_TYPE*)payload;
|
|
|
|
max_data = iov.iov_len;
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_unpack(&convertor,
|
2007-07-14 00:46:12 +04:00
|
|
|
&iov,
|
|
|
|
&iov_count,
|
|
|
|
&max_data);
|
|
|
|
OBJ_DESTRUCT(&convertor);
|
|
|
|
} else {
|
|
|
|
void *buffer = NULL;
|
|
|
|
|
2009-05-07 00:11:28 +04:00
|
|
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
2007-07-14 00:46:12 +04:00
|
|
|
if (proc->proc_arch != ompi_proc_local()->proc_arch) {
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_t convertor;
|
2007-07-14 00:46:12 +04:00
|
|
|
struct iovec iov;
|
|
|
|
uint32_t iov_count = 1;
|
|
|
|
size_t max_data;
|
|
|
|
struct ompi_datatype_t *primitive_datatype = NULL;
|
|
|
|
uint32_t primitive_count;
|
|
|
|
size_t buflen;
|
|
|
|
|
|
|
|
ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
|
|
|
|
primitive_count *= header->hdr_target_count;
|
|
|
|
|
|
|
|
/* figure out how big a buffer we need */
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
ompi_datatype_type_size(primitive_datatype, &buflen);
|
2007-07-14 00:46:12 +04:00
|
|
|
buflen *= primitive_count;
|
|
|
|
|
|
|
|
/* create convertor */
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
OBJ_CONSTRUCT(&convertor, opal_convertor_t);
|
2007-07-14 00:46:12 +04:00
|
|
|
|
2008-07-17 23:04:40 +04:00
|
|
|
buffer = (void*) malloc(buflen);
|
|
|
|
if (NULL == buffer) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
2007-07-14 00:46:12 +04:00
|
|
|
|
|
|
|
/* initialize convertor */
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
|
|
|
|
&(primitive_datatype->super),
|
2007-07-14 00:46:12 +04:00
|
|
|
primitive_count,
|
|
|
|
buffer,
|
|
|
|
0,
|
|
|
|
&convertor);
|
|
|
|
|
|
|
|
iov.iov_len = header->hdr_msg_length;
|
|
|
|
iov.iov_base = (IOVBASE_TYPE*)payload;
|
|
|
|
max_data = iov.iov_len;
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_unpack(&convertor,
|
2007-07-14 00:46:12 +04:00
|
|
|
&iov,
|
|
|
|
&iov_count,
|
|
|
|
&max_data);
|
|
|
|
OBJ_DESTRUCT(&convertor);
|
|
|
|
} else {
|
|
|
|
buffer = payload;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
buffer = payload;
|
|
|
|
#endif
|
2008-02-12 11:46:27 +03:00
|
|
|
/*
|
|
|
|
* Before copy to the user buffer, make the target part
|
|
|
|
* accessable.
|
|
|
|
*/
|
|
|
|
MEMCHECKER(
|
|
|
|
opal_memchecker_base_mem_defined( target, header->hdr_msg_length );
|
|
|
|
);
|
2007-07-14 00:46:12 +04:00
|
|
|
/* copy the data from the temporary buffer into the user window */
|
|
|
|
ret = ompi_osc_base_process_op(target,
|
|
|
|
buffer,
|
|
|
|
header->hdr_msg_length,
|
|
|
|
datatype,
|
|
|
|
header->hdr_target_count,
|
|
|
|
op);
|
2008-02-12 11:46:27 +03:00
|
|
|
/* Copy finished, make the user buffer unaccessable. */
|
|
|
|
MEMCHECKER(
|
|
|
|
opal_memchecker_base_mem_noaccess( target, header->hdr_msg_length );
|
|
|
|
);
|
2007-07-14 00:46:12 +04:00
|
|
|
|
2009-05-07 00:11:28 +04:00
|
|
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
2007-07-14 00:46:12 +04:00
|
|
|
if (proc->proc_arch != ompi_proc_local()->proc_arch) {
|
|
|
|
if (NULL == buffer) free(buffer);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
/* unlock the window for accumulates */
|
|
|
|
OPAL_THREAD_UNLOCK(&module->p2p_acc_lock);
|
|
|
|
|
|
|
|
/* Release datatype & op */
|
|
|
|
OBJ_RELEASE(datatype);
|
|
|
|
OBJ_RELEASE(op);
|
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
inmsg_mark_complete(module);
|
2006-02-22 08:14:34 +03:00
|
|
|
|
2008-06-09 18:53:58 +04:00
|
|
|
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
|
2007-02-25 04:03:19 +03:00
|
|
|
"%d received accum message from %d",
|
2007-07-11 21:16:06 +04:00
|
|
|
ompi_comm_rank(module->p2p_comm),
|
2007-02-25 04:03:19 +03:00
|
|
|
header->hdr_origin));
|
2006-01-28 18:38:37 +03:00
|
|
|
} else {
|
|
|
|
ompi_osc_pt2pt_longreq_t *longreq;
|
|
|
|
size_t buflen;
|
2007-07-14 00:46:12 +04:00
|
|
|
struct ompi_datatype_t *primitive_datatype = NULL;
|
|
|
|
uint32_t primitive_count;
|
|
|
|
|
|
|
|
/* get underlying type... */
|
|
|
|
ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
|
|
|
|
primitive_count *= header->hdr_target_count;
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
/* figure out how big a buffer we need */
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
ompi_datatype_type_size(primitive_datatype, &buflen);
|
2007-07-14 00:46:12 +04:00
|
|
|
buflen *= primitive_count;
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
/* get a longreq and fill it in */
|
|
|
|
ompi_osc_pt2pt_longreq_alloc(&longreq);
|
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
longreq->mpireq.cbfunc = ompi_osc_pt2pt_sendreq_recv_accum_long_cb;
|
2006-01-28 18:38:37 +03:00
|
|
|
longreq->req_datatype = datatype;
|
|
|
|
longreq->req_op = op;
|
|
|
|
longreq->req_module = module;
|
|
|
|
|
|
|
|
/* allocate a buffer to receive into ... */
|
2007-05-21 06:21:25 +04:00
|
|
|
longreq->mpireq.cbdata = malloc(buflen + sizeof(ompi_osc_pt2pt_send_header_t));
|
2006-01-28 18:38:37 +03:00
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
if (NULL == longreq->mpireq.cbdata) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
2006-02-22 08:14:34 +03:00
|
|
|
/* fill in tmp header */
|
2007-05-21 06:21:25 +04:00
|
|
|
memcpy(longreq->mpireq.cbdata, header,
|
2006-02-22 08:14:34 +03:00
|
|
|
sizeof(ompi_osc_pt2pt_send_header_t));
|
2007-05-21 06:21:25 +04:00
|
|
|
((ompi_osc_pt2pt_send_header_t*) longreq->mpireq.cbdata)->hdr_msg_length = buflen;
|
2006-01-28 18:38:37 +03:00
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
ret = mca_pml.pml_irecv(((char*) longreq->mpireq.cbdata) + sizeof(ompi_osc_pt2pt_send_header_t),
|
2007-07-14 00:46:12 +04:00
|
|
|
primitive_count,
|
|
|
|
primitive_datatype,
|
2006-01-28 18:38:37 +03:00
|
|
|
header->hdr_origin,
|
|
|
|
header->hdr_origin_tag,
|
|
|
|
module->p2p_comm,
|
2007-05-21 06:21:25 +04:00
|
|
|
&(longreq->mpireq.request));
|
2006-02-22 08:14:34 +03:00
|
|
|
|
2008-06-09 18:53:58 +04:00
|
|
|
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
|
2007-02-25 04:03:19 +03:00
|
|
|
"%d started long recv accum message from %d (%d)",
|
2007-07-11 21:16:06 +04:00
|
|
|
ompi_comm_rank(module->p2p_comm),
|
2007-02-25 04:03:19 +03:00
|
|
|
header->hdr_origin,
|
|
|
|
header->hdr_origin_tag));
|
2006-02-22 08:14:34 +03:00
|
|
|
|
|
|
|
/* put the send request in the waiting list */
|
2007-05-21 06:21:25 +04:00
|
|
|
OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
|
|
|
opal_list_append(&mca_osc_pt2pt_component.p2p_c_pending_requests,
|
|
|
|
&(longreq->mpireq.super.super));
|
|
|
|
OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
2006-01-28 18:38:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**********************************************************************
|
|
|
|
*
|
|
|
|
* Recveive a get on the origin side
|
|
|
|
*
|
|
|
|
**********************************************************************/
|
|
|
|
static void
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_replyreq_recv_long_cb(ompi_osc_pt2pt_mpireq_t *mpireq)
|
2006-01-28 18:38:37 +03:00
|
|
|
{
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_longreq_t *longreq =
|
|
|
|
(ompi_osc_pt2pt_longreq_t*) mpireq;
|
2006-01-28 18:38:37 +03:00
|
|
|
ompi_osc_pt2pt_sendreq_t *sendreq =
|
2007-05-21 06:21:25 +04:00
|
|
|
(ompi_osc_pt2pt_sendreq_t*) longreq->mpireq.cbdata;
|
|
|
|
int32_t count;
|
2006-01-28 18:38:37 +03:00
|
|
|
|
2007-05-23 16:56:34 +04:00
|
|
|
OPAL_THREAD_LOCK(&sendreq->req_module->p2p_lock);
|
2007-05-22 00:53:02 +04:00
|
|
|
count = (sendreq->req_module->p2p_num_pending_out -= 1);
|
2007-05-23 16:56:34 +04:00
|
|
|
OPAL_THREAD_UNLOCK(&sendreq->req_module->p2p_lock);
|
2006-01-28 18:38:37 +03:00
|
|
|
|
2007-05-22 00:53:02 +04:00
|
|
|
ompi_osc_pt2pt_longreq_free(longreq);
|
2006-01-28 18:38:37 +03:00
|
|
|
ompi_osc_pt2pt_sendreq_free(sendreq);
|
2007-05-22 00:53:02 +04:00
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
if (0 == count) opal_condition_broadcast(&sendreq->req_module->p2p_cond);
|
2006-01-28 18:38:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
ompi_osc_pt2pt_replyreq_recv(ompi_osc_pt2pt_module_t *module,
|
|
|
|
ompi_osc_pt2pt_sendreq_t *sendreq,
|
|
|
|
ompi_osc_pt2pt_reply_header_t *header,
|
|
|
|
void *payload)
|
|
|
|
{
|
|
|
|
int ret = OMPI_SUCCESS;
|
2007-05-21 06:21:25 +04:00
|
|
|
int32_t count;
|
2006-01-28 18:38:37 +03:00
|
|
|
|
|
|
|
/* receive into user buffer */
|
|
|
|
if (header->hdr_msg_length > 0) {
|
|
|
|
/* short message. woo! */
|
|
|
|
|
|
|
|
struct iovec iov;
|
|
|
|
uint32_t iov_count = 1;
|
|
|
|
size_t max_data;
|
|
|
|
|
2008-04-16 17:24:35 +04:00
|
|
|
iov.iov_len = header->hdr_msg_length;
|
2006-08-24 20:38:08 +04:00
|
|
|
iov.iov_base = (IOVBASE_TYPE*)payload;
|
2008-04-16 17:24:35 +04:00
|
|
|
max_data = iov.iov_len;
|
2008-02-12 11:46:27 +03:00
|
|
|
/*
|
|
|
|
* Before copy to the target buffer, make the target part
|
|
|
|
* accessable.
|
|
|
|
*/
|
|
|
|
MEMCHECKER(
|
2008-04-16 17:24:35 +04:00
|
|
|
memchecker_convertor_call(&opal_memchecker_base_mem_defined,
|
|
|
|
&sendreq->req_origin_convertor);
|
2008-02-12 11:46:27 +03:00
|
|
|
);
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_unpack(&sendreq->req_origin_convertor,
|
2006-01-28 18:38:37 +03:00
|
|
|
&iov,
|
|
|
|
&iov_count,
|
2006-10-27 03:11:26 +04:00
|
|
|
&max_data );
|
2008-02-12 11:46:27 +03:00
|
|
|
/*
|
|
|
|
* Copy finished, make the target buffer unaccessable.(Or just leave it accessable?)
|
|
|
|
*/
|
|
|
|
MEMCHECKER(
|
2008-04-16 17:24:35 +04:00
|
|
|
memchecker_convertor_call(&opal_memchecker_base_mem_noaccess,
|
|
|
|
&sendreq->req_origin_convertor);
|
2008-02-12 11:46:27 +03:00
|
|
|
);
|
2007-05-22 00:53:02 +04:00
|
|
|
|
|
|
|
OPAL_THREAD_LOCK(&module->p2p_lock);
|
|
|
|
count = (sendreq->req_module->p2p_num_pending_out -= 1);
|
|
|
|
OPAL_THREAD_UNLOCK(&module->p2p_lock);
|
|
|
|
|
2006-01-28 18:38:37 +03:00
|
|
|
ompi_osc_pt2pt_sendreq_free(sendreq);
|
2007-05-22 00:53:02 +04:00
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
if (0 == count) opal_condition_broadcast(&module->p2p_cond);
|
|
|
|
|
2006-01-28 18:38:37 +03:00
|
|
|
} else {
|
|
|
|
ompi_osc_pt2pt_longreq_t *longreq;
|
|
|
|
ompi_osc_pt2pt_longreq_alloc(&longreq);
|
|
|
|
|
2007-05-21 06:21:25 +04:00
|
|
|
longreq->mpireq.cbfunc = ompi_osc_pt2pt_replyreq_recv_long_cb;
|
|
|
|
longreq->mpireq.cbdata = sendreq;
|
2006-01-28 18:38:37 +03:00
|
|
|
longreq->req_module = module;
|
|
|
|
|
|
|
|
/* BWB - FIX ME - George is going to kill me for this */
|
|
|
|
ret = mca_pml.pml_irecv(sendreq->req_origin_convertor.pBaseBuf,
|
|
|
|
sendreq->req_origin_convertor.count,
|
|
|
|
sendreq->req_origin_datatype,
|
|
|
|
sendreq->req_target_rank,
|
|
|
|
header->hdr_target_tag,
|
|
|
|
module->p2p_comm,
|
2007-05-21 06:21:25 +04:00
|
|
|
&(longreq->mpireq.request));
|
|
|
|
|
2006-01-28 18:38:37 +03:00
|
|
|
/* put the send request in the waiting list */
|
2007-05-21 06:21:25 +04:00
|
|
|
OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
|
|
|
opal_list_append(&mca_osc_pt2pt_component.p2p_c_pending_requests,
|
|
|
|
&(longreq->mpireq.super.super));
|
|
|
|
OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
2006-01-28 18:38:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2006-01-31 05:44:08 +03:00
|
|
|
|
|
|
|
|
|
|
|
/**********************************************************************
|
|
|
|
*
|
2006-02-23 09:02:10 +03:00
|
|
|
* Control message communication
|
2006-01-31 05:44:08 +03:00
|
|
|
*
|
|
|
|
**********************************************************************/
|
|
|
|
static void
|
2007-05-21 06:21:25 +04:00
|
|
|
ompi_osc_pt2pt_control_send_cb(ompi_osc_pt2pt_mpireq_t *mpireq)
|
2006-01-31 05:44:08 +03:00
|
|
|
{
|
|
|
|
/* release the descriptor and sendreq */
|
2006-08-03 04:10:19 +04:00
|
|
|
OPAL_FREE_LIST_RETURN(&mca_osc_pt2pt_component.p2p_c_buffers,
|
2007-05-21 06:21:25 +04:00
|
|
|
&mpireq->super);
|
2006-01-31 05:44:08 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
ompi_osc_pt2pt_control_send(ompi_osc_pt2pt_module_t *module,
|
|
|
|
ompi_proc_t *proc,
|
2006-02-07 21:45:18 +03:00
|
|
|
uint8_t type, int32_t value0, int32_t value1)
|
2006-01-31 05:44:08 +03:00
|
|
|
{
|
|
|
|
int ret = OMPI_SUCCESS;
|
2006-08-03 04:10:19 +04:00
|
|
|
opal_free_list_item_t *item;
|
|
|
|
ompi_osc_pt2pt_buffer_t *buffer = NULL;
|
2006-01-31 05:44:08 +03:00
|
|
|
ompi_osc_pt2pt_control_header_t *header = NULL;
|
2006-08-03 18:42:05 +04:00
|
|
|
int rank = -1, i;
|
2006-08-03 04:10:19 +04:00
|
|
|
|
|
|
|
/* find the rank */
|
2007-07-11 21:16:06 +04:00
|
|
|
for (i = 0 ; i < ompi_comm_size(module->p2p_comm) ; ++i) {
|
|
|
|
if (proc == ompi_comm_peer_lookup(module->p2p_comm, i)) {
|
2006-08-03 04:10:19 +04:00
|
|
|
rank = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get a buffer */
|
|
|
|
OPAL_FREE_LIST_GET(&mca_osc_pt2pt_component.p2p_c_buffers,
|
|
|
|
item, ret);
|
|
|
|
if (NULL == item) {
|
2006-01-31 05:44:08 +03:00
|
|
|
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2006-08-03 04:10:19 +04:00
|
|
|
buffer = (ompi_osc_pt2pt_buffer_t*) item;
|
2006-01-31 05:44:08 +03:00
|
|
|
|
|
|
|
/* verify at least enough space for header */
|
2006-08-03 04:10:19 +04:00
|
|
|
if (mca_osc_pt2pt_component.p2p_c_eager_size < sizeof(ompi_osc_pt2pt_control_header_t)) {
|
2006-01-31 05:44:08 +03:00
|
|
|
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2006-08-03 04:10:19 +04:00
|
|
|
/* setup buffer */
|
2007-05-21 06:21:25 +04:00
|
|
|
buffer->mpireq.cbfunc = ompi_osc_pt2pt_control_send_cb;
|
|
|
|
buffer->mpireq.cbdata = NULL;
|
2006-08-03 04:10:19 +04:00
|
|
|
buffer->len = sizeof(ompi_osc_pt2pt_control_header_t);
|
2006-01-31 05:44:08 +03:00
|
|
|
|
|
|
|
/* pack header */
|
2006-08-03 04:10:19 +04:00
|
|
|
header = (ompi_osc_pt2pt_control_header_t*) buffer->payload;
|
2006-02-07 15:16:23 +03:00
|
|
|
header->hdr_base.hdr_type = type;
|
2006-11-27 06:22:44 +03:00
|
|
|
header->hdr_base.hdr_flags = 0;
|
2006-02-07 21:45:18 +03:00
|
|
|
header->hdr_value[0] = value0;
|
|
|
|
header->hdr_value[1] = value1;
|
2006-01-31 05:44:08 +03:00
|
|
|
|
2006-02-27 21:47:00 +03:00
|
|
|
#ifdef WORDS_BIGENDIAN
|
|
|
|
header->hdr_base.hdr_flags |= OMPI_OSC_PT2PT_HDR_FLAG_NBO;
|
2009-05-07 00:11:28 +04:00
|
|
|
#elif OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
2008-04-18 00:43:56 +04:00
|
|
|
if (proc->proc_arch & OPAL_ARCH_ISBIGENDIAN) {
|
2006-02-27 21:47:00 +03:00
|
|
|
header->hdr_base.hdr_flags |= OMPI_OSC_PT2PT_HDR_FLAG_NBO;
|
|
|
|
OMPI_OSC_PT2PT_CONTROL_HDR_HTON(*header);
|
|
|
|
}
|
2006-01-31 05:44:08 +03:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* send fragment */
|
2006-08-03 04:10:19 +04:00
|
|
|
ret = MCA_PML_CALL(isend(buffer->payload,
|
|
|
|
buffer->len,
|
|
|
|
MPI_BYTE,
|
|
|
|
rank,
|
2006-11-28 00:55:41 +03:00
|
|
|
CONTROL_MSG_TAG,
|
2006-08-03 04:10:19 +04:00
|
|
|
MCA_PML_BASE_SEND_STANDARD,
|
|
|
|
module->p2p_comm,
|
2007-05-21 06:21:25 +04:00
|
|
|
&buffer->mpireq.request));
|
|
|
|
OPAL_THREAD_LOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
|
|
|
opal_list_append(&mca_osc_pt2pt_component.p2p_c_pending_requests,
|
|
|
|
&(buffer->mpireq.super.super));
|
|
|
|
OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.p2p_c_lock);
|
|
|
|
|
2006-01-31 05:44:08 +03:00
|
|
|
goto done;
|
|
|
|
|
|
|
|
cleanup:
|
2006-08-03 04:10:19 +04:00
|
|
|
if (item != NULL) {
|
|
|
|
OPAL_FREE_LIST_RETURN(&mca_osc_pt2pt_component.p2p_c_buffers,
|
|
|
|
item);
|
2006-01-31 05:44:08 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
return ret;
|
|
|
|
}
|