Support real RDMA operations for networks that support it
This commit was SVN r15288.
Этот коммит содержится в:
родитель
41afd4ebee
Коммит
74008aac53
@ -78,6 +78,7 @@ struct ompi_osc_rdma_btl_t {
|
|||||||
uint64_t peer_seg_key;
|
uint64_t peer_seg_key;
|
||||||
mca_bml_base_btl_t *bml_btl;
|
mca_bml_base_btl_t *bml_btl;
|
||||||
int rdma_order;
|
int rdma_order;
|
||||||
|
int32_t num_sent;
|
||||||
};
|
};
|
||||||
typedef struct ompi_osc_rdma_btl_t ompi_osc_rdma_btl_t;
|
typedef struct ompi_osc_rdma_btl_t ompi_osc_rdma_btl_t;
|
||||||
|
|
||||||
@ -178,10 +179,7 @@ struct ompi_osc_rdma_module_t {
|
|||||||
bool m_use_rdma;
|
bool m_use_rdma;
|
||||||
ompi_osc_rdma_setup_info_t *m_setup_info;
|
ompi_osc_rdma_setup_info_t *m_setup_info;
|
||||||
ompi_osc_rdma_peer_info_t *m_peer_info;
|
ompi_osc_rdma_peer_info_t *m_peer_info;
|
||||||
|
int32_t m_rdma_num_pending;
|
||||||
int32_t m_num_pending_rdma;
|
|
||||||
|
|
||||||
volatile int32_t m_num_complete_rdma;
|
|
||||||
|
|
||||||
/* ********************* FENCE data ************************ */
|
/* ********************* FENCE data ************************ */
|
||||||
/* an array of <sizeof(m_comm)> ints, each containing the value
|
/* an array of <sizeof(m_comm)> ints, each containing the value
|
||||||
|
@ -40,7 +40,6 @@ enqueue_sendreq(ompi_osc_rdma_module_t *module,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
ompi_osc_rdma_module_accumulate(void *origin_addr, int origin_count,
|
ompi_osc_rdma_module_accumulate(void *origin_addr, int origin_count,
|
||||||
struct ompi_datatype_t *origin_dt,
|
struct ompi_datatype_t *origin_dt,
|
||||||
@ -223,8 +222,6 @@ ompi_osc_rdma_module_put(void *origin_addr, int origin_count,
|
|||||||
&sendreq);
|
&sendreq);
|
||||||
if (OMPI_SUCCESS != ret) return ret;
|
if (OMPI_SUCCESS != ret) return ret;
|
||||||
|
|
||||||
/* if we're doing fence synchronization, try to actively send
|
|
||||||
right now */
|
|
||||||
if (module->m_eager_send_active) {
|
if (module->m_eager_send_active) {
|
||||||
OPAL_THREAD_LOCK(&module->m_lock);
|
OPAL_THREAD_LOCK(&module->m_lock);
|
||||||
sendreq->req_module->m_num_pending_out += 1;
|
sendreq->req_module->m_num_pending_out += 1;
|
||||||
@ -234,6 +231,7 @@ ompi_osc_rdma_module_put(void *origin_addr, int origin_count,
|
|||||||
ret = ompi_osc_rdma_sendreq_send(module, sendreq);
|
ret = ompi_osc_rdma_sendreq_send(module, sendreq);
|
||||||
|
|
||||||
if (OMPI_SUCCESS != ret) {
|
if (OMPI_SUCCESS != ret) {
|
||||||
|
opal_output(0, "rdma_senreq_send from put failed: %d", ret);
|
||||||
OPAL_THREAD_LOCK(&module->m_lock);
|
OPAL_THREAD_LOCK(&module->m_lock);
|
||||||
sendreq->req_module->m_num_pending_out -= 1;
|
sendreq->req_module->m_num_pending_out -= 1;
|
||||||
opal_list_append(&(module->m_pending_sendreqs),
|
opal_list_append(&(module->m_pending_sendreqs),
|
||||||
|
@ -784,6 +784,12 @@ component_fragment_cb(struct mca_btl_base_module_t *btl,
|
|||||||
descriptor->des_dst[0].seg_addr.pval;
|
descriptor->des_dst[0].seg_addr.pval;
|
||||||
int32_t count;
|
int32_t count;
|
||||||
|
|
||||||
|
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
if (header->hdr_base.hdr_flags & OMPI_OSC_RDMA_HDR_FLAG_NBO) {
|
||||||
|
OMPI_OSC_RDMA_CONTROL_HDR_NTOH(*header);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* get our module pointer */
|
/* get our module pointer */
|
||||||
module = ompi_osc_rdma_windx_to_module(header->hdr_windx);
|
module = ompi_osc_rdma_windx_to_module(header->hdr_windx);
|
||||||
if (NULL == module) return;
|
if (NULL == module) return;
|
||||||
@ -795,6 +801,30 @@ component_fragment_cb(struct mca_btl_base_module_t *btl,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OMPI_OSC_RDMA_HDR_RDMA_COMPLETE:
|
||||||
|
{
|
||||||
|
ompi_osc_rdma_control_header_t *header =
|
||||||
|
(ompi_osc_rdma_control_header_t*)
|
||||||
|
descriptor->des_dst[0].seg_addr.pval;
|
||||||
|
int32_t count;
|
||||||
|
|
||||||
|
#if !defined(WORDS_BIGENDIAN) && OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
if (header->hdr_base.hdr_flags & OMPI_OSC_RDMA_HDR_FLAG_NBO) {
|
||||||
|
OMPI_OSC_RDMA_CONTROL_HDR_NTOH(*header);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* get our module pointer */
|
||||||
|
module = ompi_osc_rdma_windx_to_module(header->hdr_windx);
|
||||||
|
if (NULL == module) return;
|
||||||
|
|
||||||
|
OPAL_THREAD_LOCK(&module->m_lock);
|
||||||
|
count = (module->m_num_pending_in -= header->hdr_value[0]);
|
||||||
|
OPAL_THREAD_UNLOCK(&module->m_lock);
|
||||||
|
if (count == 0) opal_condition_broadcast(&module->m_cond);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case OMPI_OSC_RDMA_HDR_RDMA_INFO:
|
case OMPI_OSC_RDMA_HDR_RDMA_INFO:
|
||||||
{
|
{
|
||||||
ompi_osc_rdma_rdma_info_header_t *header =
|
ompi_osc_rdma_rdma_info_header_t *header =
|
||||||
@ -836,6 +866,7 @@ component_fragment_cb(struct mca_btl_base_module_t *btl,
|
|||||||
rdma_btl->peer_seg_key = header->hdr_segkey;
|
rdma_btl->peer_seg_key = header->hdr_segkey;
|
||||||
rdma_btl->bml_btl = bml_btl;
|
rdma_btl->bml_btl = bml_btl;
|
||||||
rdma_btl->rdma_order = MCA_BTL_NO_ORDER;
|
rdma_btl->rdma_order = MCA_BTL_NO_ORDER;
|
||||||
|
rdma_btl->num_sent = 0;
|
||||||
|
|
||||||
module->m_setup_info->num_btls_callin++;
|
module->m_setup_info->num_btls_callin++;
|
||||||
OPAL_THREAD_UNLOCK(&module->m_lock);
|
OPAL_THREAD_UNLOCK(&module->m_lock);
|
||||||
@ -996,6 +1027,7 @@ rdma_send_info_send(ompi_osc_rdma_module_t *module,
|
|||||||
/* pack header */
|
/* pack header */
|
||||||
header = (ompi_osc_rdma_rdma_info_header_t*) descriptor->des_src[0].seg_addr.pval;
|
header = (ompi_osc_rdma_rdma_info_header_t*) descriptor->des_src[0].seg_addr.pval;
|
||||||
header->hdr_base.hdr_type = OMPI_OSC_RDMA_HDR_RDMA_INFO;
|
header->hdr_base.hdr_type = OMPI_OSC_RDMA_HDR_RDMA_INFO;
|
||||||
|
header->hdr_base.hdr_flags = 0;
|
||||||
header->hdr_segkey = peer_send_info->seg_key;
|
header->hdr_segkey = peer_send_info->seg_key;
|
||||||
header->hdr_origin = ompi_comm_rank(module->m_comm);
|
header->hdr_origin = ompi_comm_rank(module->m_comm);
|
||||||
header->hdr_windx = module->m_comm->c_contextid;
|
header->hdr_windx = module->m_comm->c_contextid;
|
||||||
@ -1219,7 +1251,7 @@ setup_rdma(ompi_osc_rdma_module_t *module)
|
|||||||
module->m_comm);
|
module->m_comm);
|
||||||
if (OMPI_SUCCESS != ret) goto cleanup;
|
if (OMPI_SUCCESS != ret) goto cleanup;
|
||||||
for (i = 0 ; i < ompi_comm_size(module->m_comm) ; ++i) {
|
for (i = 0 ; i < ompi_comm_size(module->m_comm) ; ++i) {
|
||||||
module->m_peer_info[i].peer_base = remote[i];
|
module->m_peer_info[i].peer_len = remote[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get number of btls we're expecting from everyone */
|
/* get number of btls we're expecting from everyone */
|
||||||
|
@ -76,6 +76,136 @@ inmsg_mark_complete(ompi_osc_rdma_module_t *module)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
rdma_cb(struct mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* endpoint,
|
||||||
|
struct mca_btl_base_descriptor_t* descriptor,
|
||||||
|
int status)
|
||||||
|
{
|
||||||
|
ompi_osc_rdma_sendreq_t *sendreq =
|
||||||
|
(ompi_osc_rdma_sendreq_t*) descriptor->des_cbdata;
|
||||||
|
int32_t out_count, rdma_count;
|
||||||
|
|
||||||
|
assert(OMPI_SUCCESS == status);
|
||||||
|
|
||||||
|
OPAL_THREAD_LOCK(&sendreq->req_module->m_lock);
|
||||||
|
out_count = (sendreq->req_module->m_num_pending_out -= 1);
|
||||||
|
rdma_count = (sendreq->req_module->m_rdma_num_pending -= 1);
|
||||||
|
OPAL_THREAD_UNLOCK(&sendreq->req_module->m_lock);
|
||||||
|
|
||||||
|
btl->btl_free(btl, descriptor);
|
||||||
|
ompi_osc_rdma_sendreq_free(sendreq);
|
||||||
|
|
||||||
|
if ((0 == out_count) || (0 == rdma_count)) {
|
||||||
|
opal_condition_broadcast(&sendreq->req_module->m_cond);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int
|
||||||
|
ompi_osc_rdma_sendreq_rdma(ompi_osc_rdma_module_t *module,
|
||||||
|
ompi_osc_rdma_sendreq_t *sendreq)
|
||||||
|
{
|
||||||
|
mca_btl_base_descriptor_t* descriptor;
|
||||||
|
size_t size = sendreq->req_origin_bytes_packed;
|
||||||
|
ompi_osc_rdma_btl_t *rdma_btl = NULL;
|
||||||
|
int index, target, ret;
|
||||||
|
|
||||||
|
target = sendreq->req_target_rank;
|
||||||
|
|
||||||
|
if (module->m_peer_info[target].peer_num_btls > 0) {
|
||||||
|
|
||||||
|
index = ++(module->m_peer_info[target].peer_index_btls);
|
||||||
|
if (index >= module->m_peer_info[target].peer_num_btls) {
|
||||||
|
module->m_peer_info[target].peer_index_btls = 0;
|
||||||
|
index = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
rdma_btl = &(module->m_peer_info[target].peer_btls[index]);
|
||||||
|
|
||||||
|
if (sendreq->req_type == OMPI_OSC_RDMA_PUT) {
|
||||||
|
descriptor = rdma_btl->bml_btl->
|
||||||
|
btl_prepare_src(rdma_btl->bml_btl->btl,
|
||||||
|
rdma_btl->bml_btl->btl_endpoint,
|
||||||
|
NULL, /* BWB - fix me */
|
||||||
|
&sendreq->req_origin_convertor,
|
||||||
|
rdma_btl->rdma_order,
|
||||||
|
0,
|
||||||
|
&size);
|
||||||
|
|
||||||
|
assert(NULL != descriptor);
|
||||||
|
|
||||||
|
descriptor->des_dst = sendreq->remote_segs;
|
||||||
|
descriptor->des_dst_cnt = 1;
|
||||||
|
descriptor->des_dst[0].seg_addr.lval =
|
||||||
|
module->m_peer_info[target].peer_base +
|
||||||
|
(sendreq->req_target_disp * module->m_win->w_disp_unit);
|
||||||
|
descriptor->des_dst[0].seg_len =
|
||||||
|
sendreq->req_origin_bytes_packed;
|
||||||
|
descriptor->des_dst[0].seg_key.key64 =
|
||||||
|
rdma_btl->peer_seg_key;
|
||||||
|
#if 0
|
||||||
|
opal_output(0, "putting to %d: 0x%lx(%d), %d, %d",
|
||||||
|
target, descriptor->des_dst[0].seg_addr.lval,
|
||||||
|
descriptor->des_dst[0].seg_len,
|
||||||
|
rdma_btl->rdma_order,
|
||||||
|
descriptor->order);
|
||||||
|
#endif
|
||||||
|
descriptor->des_cbdata = sendreq;
|
||||||
|
descriptor->des_cbfunc = rdma_cb;
|
||||||
|
|
||||||
|
ret = rdma_btl->bml_btl->
|
||||||
|
btl_put(rdma_btl->bml_btl->btl,
|
||||||
|
rdma_btl->bml_btl->btl_endpoint,
|
||||||
|
descriptor);
|
||||||
|
} else {
|
||||||
|
descriptor = rdma_btl->bml_btl->
|
||||||
|
btl_prepare_dst(rdma_btl->bml_btl->btl,
|
||||||
|
rdma_btl->bml_btl->btl_endpoint,
|
||||||
|
NULL, /* BWB - fix me */
|
||||||
|
&sendreq->req_origin_convertor,
|
||||||
|
rdma_btl->rdma_order,
|
||||||
|
0,
|
||||||
|
&size);
|
||||||
|
|
||||||
|
assert(NULL != descriptor);
|
||||||
|
|
||||||
|
descriptor->des_src = sendreq->remote_segs;
|
||||||
|
descriptor->des_src_cnt = 1;
|
||||||
|
descriptor->des_src[0].seg_addr.lval =
|
||||||
|
module->m_peer_info[target].peer_base +
|
||||||
|
(sendreq->req_target_disp * module->m_win->w_disp_unit);
|
||||||
|
descriptor->des_src[0].seg_len =
|
||||||
|
sendreq->req_origin_bytes_packed;
|
||||||
|
descriptor->des_src[0].seg_key.key64 =
|
||||||
|
rdma_btl->peer_seg_key;
|
||||||
|
|
||||||
|
descriptor->des_cbdata = sendreq;
|
||||||
|
descriptor->des_cbfunc = rdma_cb;
|
||||||
|
|
||||||
|
ret = rdma_btl->bml_btl->
|
||||||
|
btl_get(rdma_btl->bml_btl->btl,
|
||||||
|
rdma_btl->bml_btl->btl_endpoint,
|
||||||
|
descriptor);
|
||||||
|
}
|
||||||
|
rdma_btl->rdma_order = descriptor->order;
|
||||||
|
|
||||||
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||||
|
return ret;
|
||||||
|
} else {
|
||||||
|
OPAL_THREAD_LOCK(&module->m_lock);
|
||||||
|
rdma_btl->num_sent++;
|
||||||
|
sendreq->req_module->m_rdma_num_pending += 1;
|
||||||
|
OPAL_THREAD_UNLOCK(&module->m_lock);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return OMPI_ERR_NOT_SUPPORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**********************************************************************
|
/**********************************************************************
|
||||||
*
|
*
|
||||||
* Sending a sendreq to target
|
* Sending a sendreq to target
|
||||||
@ -140,7 +270,9 @@ ompi_osc_rdma_sendreq_send_cb(struct mca_btl_base_module_t* btl,
|
|||||||
/* do we need to post a send? */
|
/* do we need to post a send? */
|
||||||
if (header->hdr_msg_length != 0) {
|
if (header->hdr_msg_length != 0) {
|
||||||
/* sendreq is done. Mark it as so and get out of here */
|
/* sendreq is done. Mark it as so and get out of here */
|
||||||
|
OPAL_THREAD_LOCK(&sendreq->req_module->m_lock);
|
||||||
count = sendreq->req_module->m_num_pending_out -= 1;
|
count = sendreq->req_module->m_num_pending_out -= 1;
|
||||||
|
OPAL_THREAD_UNLOCK(&sendreq->req_module->m_lock);
|
||||||
ompi_osc_rdma_sendreq_free(sendreq);
|
ompi_osc_rdma_sendreq_free(sendreq);
|
||||||
if (0 == count) opal_condition_broadcast(&sendreq->req_module->m_cond);
|
if (0 == count) opal_condition_broadcast(&sendreq->req_module->m_cond);
|
||||||
} else {
|
} else {
|
||||||
@ -209,7 +341,19 @@ ompi_osc_rdma_sendreq_send(ompi_osc_rdma_module_t *module,
|
|||||||
size_t written_data = 0;
|
size_t written_data = 0;
|
||||||
size_t needed_len = sizeof(ompi_osc_rdma_send_header_t);
|
size_t needed_len = sizeof(ompi_osc_rdma_send_header_t);
|
||||||
const void *packed_ddt;
|
const void *packed_ddt;
|
||||||
size_t packed_ddt_len = ompi_ddt_pack_description_length(sendreq->req_target_datatype);
|
size_t packed_ddt_len;
|
||||||
|
|
||||||
|
if ((module->m_eager_send_active) &&
|
||||||
|
(module->m_use_rdma) &&
|
||||||
|
(ompi_ddt_is_contiguous_memory_layout(sendreq->req_target_datatype,
|
||||||
|
sendreq->req_target_count)) &&
|
||||||
|
(!ompi_convertor_need_buffers(&sendreq->req_origin_convertor)) &&
|
||||||
|
(sendreq->req_type != OMPI_OSC_RDMA_ACC)) {
|
||||||
|
ret = ompi_osc_rdma_sendreq_rdma(module, sendreq);
|
||||||
|
if (OPAL_LIKELY(OMPI_SUCCESS == ret)) return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
packed_ddt_len = ompi_ddt_pack_description_length(sendreq->req_target_datatype);
|
||||||
|
|
||||||
/* we always need to send the ddt */
|
/* we always need to send the ddt */
|
||||||
needed_len += packed_ddt_len;
|
needed_len += packed_ddt_len;
|
||||||
@ -821,6 +965,10 @@ ompi_osc_rdma_control_send_cb(struct mca_btl_base_module_t* btl,
|
|||||||
struct mca_btl_base_descriptor_t* descriptor,
|
struct mca_btl_base_descriptor_t* descriptor,
|
||||||
int status)
|
int status)
|
||||||
{
|
{
|
||||||
|
ompi_osc_rdma_control_header_t *header = NULL;
|
||||||
|
|
||||||
|
header = (ompi_osc_rdma_control_header_t*) descriptor->des_src[0].seg_addr.pval;
|
||||||
|
|
||||||
/* release the descriptor and sendreq */
|
/* release the descriptor and sendreq */
|
||||||
btl->btl_free(btl, descriptor);
|
btl->btl_free(btl, descriptor);
|
||||||
}
|
}
|
||||||
@ -863,6 +1011,7 @@ ompi_osc_rdma_control_send(ompi_osc_rdma_module_t *module,
|
|||||||
/* pack header */
|
/* pack header */
|
||||||
header = (ompi_osc_rdma_control_header_t*) descriptor->des_src[0].seg_addr.pval;
|
header = (ompi_osc_rdma_control_header_t*) descriptor->des_src[0].seg_addr.pval;
|
||||||
header->hdr_base.hdr_type = type;
|
header->hdr_base.hdr_type = type;
|
||||||
|
header->hdr_base.hdr_flags = 0;
|
||||||
header->hdr_value[0] = value0;
|
header->hdr_value[0] = value0;
|
||||||
header->hdr_value[1] = value1;
|
header->hdr_value[1] = value1;
|
||||||
header->hdr_windx = module->m_comm->c_contextid;
|
header->hdr_windx = module->m_comm->c_contextid;
|
||||||
@ -888,3 +1037,67 @@ ompi_osc_rdma_control_send(ompi_osc_rdma_module_t *module,
|
|||||||
done:
|
done:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
ompi_osc_rdma_rdma_ack_send(ompi_osc_rdma_module_t *module,
|
||||||
|
ompi_proc_t *proc,
|
||||||
|
ompi_osc_rdma_btl_t *rdma_btl)
|
||||||
|
{
|
||||||
|
int ret = OMPI_SUCCESS;
|
||||||
|
mca_bml_base_btl_t *bml_btl = rdma_btl->bml_btl;
|
||||||
|
mca_btl_base_descriptor_t *descriptor = NULL;
|
||||||
|
ompi_osc_rdma_control_header_t *header = NULL;
|
||||||
|
|
||||||
|
/* Get a BTL and a fragment to go with it */
|
||||||
|
descriptor = bml_btl->btl_alloc(bml_btl->btl,
|
||||||
|
rdma_btl->rdma_order,
|
||||||
|
sizeof(ompi_osc_rdma_control_header_t));
|
||||||
|
if (NULL == descriptor) {
|
||||||
|
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* verify at least enough space for header */
|
||||||
|
if (descriptor->des_src[0].seg_len < sizeof(ompi_osc_rdma_control_header_t)) {
|
||||||
|
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* setup descriptor */
|
||||||
|
descriptor->des_cbfunc = ompi_osc_rdma_control_send_cb;
|
||||||
|
descriptor->des_cbdata = NULL;
|
||||||
|
descriptor->des_flags = MCA_BTL_DES_FLAGS_PRIORITY;
|
||||||
|
descriptor->des_src[0].seg_len = sizeof(ompi_osc_rdma_control_header_t);
|
||||||
|
|
||||||
|
/* pack header */
|
||||||
|
header = (ompi_osc_rdma_control_header_t*) descriptor->des_src[0].seg_addr.pval;
|
||||||
|
header->hdr_base.hdr_type = OMPI_OSC_RDMA_HDR_RDMA_COMPLETE;
|
||||||
|
header->hdr_base.hdr_flags = 0;
|
||||||
|
header->hdr_value[0] = rdma_btl->num_sent;
|
||||||
|
header->hdr_value[1] = 0;
|
||||||
|
header->hdr_windx = module->m_comm->c_contextid;
|
||||||
|
|
||||||
|
#ifdef WORDS_BIGENDIAN
|
||||||
|
header->hdr_base.hdr_flags |= OMPI_OSC_RDMA_HDR_FLAG_NBO;
|
||||||
|
#elif OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
if (proc->proc_arch & OMPI_ARCH_ISBIGENDIAN) {
|
||||||
|
header->hdr_base.hdr_flags |= OMPI_OSC_RDMA_HDR_FLAG_NBO;
|
||||||
|
OMPI_OSC_RDMA_CONTROL_HDR_HTON(*header);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
assert(header->hdr_base.hdr_flags == 0);
|
||||||
|
|
||||||
|
/* send fragment */
|
||||||
|
ret = mca_bml_base_send(bml_btl, descriptor, MCA_BTL_TAG_OSC_RDMA);
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
if (descriptor != NULL) {
|
||||||
|
mca_bml_base_free(bml_btl, descriptor);
|
||||||
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
@ -52,4 +52,8 @@ int ompi_osc_rdma_control_send(ompi_osc_rdma_module_t *module,
|
|||||||
ompi_proc_t *proc,
|
ompi_proc_t *proc,
|
||||||
uint8_t type, int32_t value0, int32_t value1);
|
uint8_t type, int32_t value0, int32_t value1);
|
||||||
|
|
||||||
|
int ompi_osc_rdma_rdma_ack_send(ompi_osc_rdma_module_t *module,
|
||||||
|
ompi_proc_t *proc,
|
||||||
|
ompi_osc_rdma_btl_t *rdma_btl);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -25,16 +25,18 @@
|
|||||||
|
|
||||||
#include "opal/types.h"
|
#include "opal/types.h"
|
||||||
|
|
||||||
#define OMPI_OSC_RDMA_HDR_PUT 0x01
|
/* Note -- 0x05 to 0x0A are of control_hdr type */
|
||||||
#define OMPI_OSC_RDMA_HDR_ACC 0x02
|
#define OMPI_OSC_RDMA_HDR_PUT 0x01
|
||||||
#define OMPI_OSC_RDMA_HDR_GET 0x03
|
#define OMPI_OSC_RDMA_HDR_ACC 0x02
|
||||||
#define OMPI_OSC_RDMA_HDR_REPLY 0x04
|
#define OMPI_OSC_RDMA_HDR_GET 0x03
|
||||||
#define OMPI_OSC_RDMA_HDR_POST 0x05
|
#define OMPI_OSC_RDMA_HDR_REPLY 0x04
|
||||||
#define OMPI_OSC_RDMA_HDR_COMPLETE 0x06
|
#define OMPI_OSC_RDMA_HDR_POST 0x05
|
||||||
#define OMPI_OSC_RDMA_HDR_LOCK_REQ 0x07
|
#define OMPI_OSC_RDMA_HDR_COMPLETE 0x06
|
||||||
#define OMPI_OSC_RDMA_HDR_UNLOCK_REQ 0x08
|
#define OMPI_OSC_RDMA_HDR_LOCK_REQ 0x07
|
||||||
#define OMPI_OSC_RDMA_HDR_UNLOCK_REPLY 0x09
|
#define OMPI_OSC_RDMA_HDR_UNLOCK_REQ 0x08
|
||||||
#define OMPI_OSC_RDMA_HDR_RDMA_INFO 0x0A
|
#define OMPI_OSC_RDMA_HDR_UNLOCK_REPLY 0x09
|
||||||
|
#define OMPI_OSC_RDMA_HDR_RDMA_COMPLETE 0x0A
|
||||||
|
#define OMPI_OSC_RDMA_HDR_RDMA_INFO 0x0B
|
||||||
|
|
||||||
#define OMPI_OSC_RDMA_HDR_FLAG_NBO 0x01
|
#define OMPI_OSC_RDMA_HDR_FLAG_NBO 0x01
|
||||||
|
|
||||||
|
@ -68,6 +68,8 @@ struct ompi_osc_rdma_sendreq_t {
|
|||||||
|
|
||||||
/** op index on the target */
|
/** op index on the target */
|
||||||
int req_op_id;
|
int req_op_id;
|
||||||
|
|
||||||
|
mca_btl_base_segment_t remote_segs[1];
|
||||||
};
|
};
|
||||||
typedef struct ompi_osc_rdma_sendreq_t ompi_osc_rdma_sendreq_t;
|
typedef struct ompi_osc_rdma_sendreq_t ompi_osc_rdma_sendreq_t;
|
||||||
OBJ_CLASS_DECLARATION(ompi_osc_rdma_sendreq_t);
|
OBJ_CLASS_DECLARATION(ompi_osc_rdma_sendreq_t);
|
||||||
|
@ -128,6 +128,31 @@ ompi_osc_rdma_module_fence(int assert, ompi_win_t *win)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (module->m_use_rdma) {
|
||||||
|
OPAL_THREAD_LOCK(&module->m_lock);
|
||||||
|
while (module->m_rdma_num_pending != 0) {
|
||||||
|
opal_condition_wait(&module->m_cond, &module->m_lock);
|
||||||
|
}
|
||||||
|
OPAL_THREAD_UNLOCK(&module->m_lock);
|
||||||
|
|
||||||
|
for (i = 0 ; i < ompi_comm_size(module->m_comm) ; ++i) {
|
||||||
|
int j;
|
||||||
|
for (j = 0 ; j < module->m_peer_info[i].peer_num_btls ; ++j) {
|
||||||
|
if (module->m_peer_info[i].peer_btls[j].num_sent > 0) {
|
||||||
|
ret = ompi_osc_rdma_rdma_ack_send(module,
|
||||||
|
ompi_comm_peer_lookup(module->m_comm, i),
|
||||||
|
&(module->m_peer_info[i].peer_btls[j]));
|
||||||
|
if (OPAL_LIKELY(OMPI_SUCCESS == ret)) {
|
||||||
|
module->m_peer_info[i].peer_btls[j].num_sent = 0;
|
||||||
|
} else {
|
||||||
|
/* BWB - fix me */
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&module->m_lock);
|
OPAL_THREAD_LOCK(&module->m_lock);
|
||||||
/* if some requests couldn't be started, push into the
|
/* if some requests couldn't be started, push into the
|
||||||
"queued" list, where we will try to restart them later. */
|
"queued" list, where we will try to restart them later. */
|
||||||
@ -241,7 +266,7 @@ ompi_osc_rdma_module_start(ompi_group_t *group,
|
|||||||
int
|
int
|
||||||
ompi_osc_rdma_module_complete(ompi_win_t *win)
|
ompi_osc_rdma_module_complete(ompi_win_t *win)
|
||||||
{
|
{
|
||||||
int i;
|
int i, j;
|
||||||
int ret = OMPI_SUCCESS;
|
int ret = OMPI_SUCCESS;
|
||||||
ompi_group_t *group;
|
ompi_group_t *group;
|
||||||
opal_list_item_t *item;
|
opal_list_item_t *item;
|
||||||
@ -263,6 +288,27 @@ ompi_osc_rdma_module_complete(ompi_win_t *win)
|
|||||||
|
|
||||||
for (i = 0 ; i < ompi_group_size(module->m_sc_group) ; ++i) {
|
for (i = 0 ; i < ompi_group_size(module->m_sc_group) ; ++i) {
|
||||||
int comm_rank = module->m_sc_remote_ranks[i];
|
int comm_rank = module->m_sc_remote_ranks[i];
|
||||||
|
if (module->m_use_rdma) {
|
||||||
|
OPAL_THREAD_LOCK(&module->m_lock);
|
||||||
|
while (module->m_rdma_num_pending != 0) {
|
||||||
|
opal_condition_wait(&module->m_cond, &module->m_lock);
|
||||||
|
}
|
||||||
|
OPAL_THREAD_UNLOCK(&module->m_lock);
|
||||||
|
|
||||||
|
for (j = 0 ; j < module->m_peer_info[comm_rank].peer_num_btls ; ++j) {
|
||||||
|
if (module->m_peer_info[comm_rank].peer_btls[j].num_sent > 0) {
|
||||||
|
ret = ompi_osc_rdma_rdma_ack_send(module,
|
||||||
|
module->m_sc_group->grp_proc_pointers[i],
|
||||||
|
&(module->m_peer_info[comm_rank].peer_btls[j]));
|
||||||
|
if (OPAL_LIKELY(OMPI_SUCCESS == ret)) {
|
||||||
|
module->m_peer_info[comm_rank].peer_btls[j].num_sent = 0;
|
||||||
|
} else {
|
||||||
|
/* BWB - fix me */
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
ret = ompi_osc_rdma_control_send(module,
|
ret = ompi_osc_rdma_control_send(module,
|
||||||
module->m_sc_group->grp_proc_pointers[i],
|
module->m_sc_group->grp_proc_pointers[i],
|
||||||
OMPI_OSC_RDMA_HDR_COMPLETE,
|
OMPI_OSC_RDMA_HDR_COMPLETE,
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user