1
1

* Make rdma the default. Somehow, the code didn't match what was supposed

to happen
* Properly error out (rather than cause buffer overflow) in case where
  the datatype packed description is larger than our control fragments.
  This still isn't standards conforming, but at least we know what
  happened.
* Expose win_set_name to external libraries (like the osc modules)
* Set default window name to the CID of the communcator it's using
  for communication

Refs trac:1905

This commit was SVN r21134.

The following Trac tickets were found above:
  Ticket 1905 --> https://svn.open-mpi.org/trac/ompi/ticket/1905
Этот коммит содержится в:
Brian Barrett 2009-04-30 22:36:09 +00:00
родитель 736debcffc
Коммит 7f898d4e2b
8 изменённых файлов: 63 добавлений и 44 удалений

Просмотреть файл

@ -270,6 +270,7 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
int ret, i;
ompi_osc_pt2pt_buffer_t *buffer = NULL;
opal_free_list_item_t *item = NULL;
char *tmp = NULL;
/* create module structure */
module = (ompi_osc_pt2pt_module_t*)
@ -298,6 +299,10 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
"pt2pt component creating window with id %d",
ompi_comm_get_cid(module->p2p_comm));
asprintf(&tmp, "%d", ompi_comm_get_cid(module->p2p_comm));
ompi_win_set_name(win, tmp);
free(tmp);
module->p2p_num_pending_sendreqs = (unsigned int*)
malloc(sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_num_pending_sendreqs) {

Просмотреть файл

@ -159,7 +159,7 @@ ompi_osc_pt2pt_sendreq_send(ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_sendreq_t *sendreq)
{
int ret = OMPI_SUCCESS;
opal_free_list_item_t *item;
opal_free_list_item_t *item = NULL;
ompi_osc_pt2pt_send_header_t *header = NULL;
ompi_osc_pt2pt_buffer_t *buffer = NULL;
size_t written_data = 0;
@ -173,6 +173,13 @@ ompi_osc_pt2pt_sendreq_send(ompi_osc_pt2pt_module_t *module,
needed_len += sendreq->req_origin_bytes_packed;
}
/* verify at least enough space for header */
if (mca_osc_pt2pt_component.p2p_c_eager_size
< sizeof(ompi_osc_pt2pt_send_header_t) + packed_ddt_len) {
ret = MPI_ERR_TRUNCATE;
goto cleanup;
}
/* Get a buffer */
OPAL_FREE_LIST_GET(&mca_osc_pt2pt_component.p2p_c_buffers,
item, ret);
@ -182,12 +189,6 @@ ompi_osc_pt2pt_sendreq_send(ompi_osc_pt2pt_module_t *module,
}
buffer = (ompi_osc_pt2pt_buffer_t*) item;
/* verify at least enough space for header */
if (mca_osc_pt2pt_component.p2p_c_eager_size < sizeof(ompi_osc_pt2pt_send_header_t)) {
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/* setup buffer */
buffer->mpireq.cbfunc = ompi_osc_pt2pt_sendreq_send_cb;
buffer->mpireq.cbdata = (void*) sendreq;
@ -229,7 +230,7 @@ ompi_osc_pt2pt_sendreq_send(ompi_osc_pt2pt_module_t *module,
memcpy((unsigned char*) buffer->payload + written_data,
packed_ddt, packed_ddt_len);
written_data += packed_ddt_len;
if (OMPI_OSC_PT2PT_GET != sendreq->req_type) {
/* if sending data and it fits, pack payload */
if (mca_osc_pt2pt_component.p2p_c_eager_size >=

Просмотреть файл

@ -122,13 +122,14 @@ ompi_osc_pt2pt_module_fence(int assert, ompi_win_t *win)
ret = ompi_osc_pt2pt_sendreq_send(module, req);
if (OMPI_SUCCESS != ret) {
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret ) {
opal_output_verbose(5, ompi_osc_base_output,
"fence: failure in starting sendreq (%d). "
"Will try later.",
"complete: failure in starting sendreq (%d). Will try later.",
ret);
opal_list_append(&(module->p2p_copy_pending_sendreqs), item);
}
} else if (OMPI_SUCCESS != ret) {
return ret;
}
}
OPAL_THREAD_LOCK(&module->p2p_lock);
@ -266,12 +267,14 @@ ompi_osc_pt2pt_module_complete(ompi_win_t *win)
ret = ompi_osc_pt2pt_sendreq_send(module, req);
if (OMPI_SUCCESS != ret) {
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret ) {
opal_output_verbose(5, ompi_osc_base_output,
"complete: failure in starting sendreq (%d). Will try later.",
ret);
opal_list_append(&(module->p2p_copy_pending_sendreqs), item);
}
} else if (OMPI_SUCCESS != ret) {
return ret;
}
}
/* wait for all the requests */
@ -477,12 +480,14 @@ ompi_osc_pt2pt_module_unlock(int target,
ret = ompi_osc_pt2pt_sendreq_send(module, req);
if (OMPI_SUCCESS != ret) {
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret ) {
opal_output_verbose(5, ompi_osc_base_output,
"unlock: failure in starting sendreq (%d). Will try later.",
"complete: failure in starting sendreq (%d). Will try later.",
ret);
opal_list_append(&(module->p2p_copy_pending_sendreqs), item);
}
} else if (OMPI_SUCCESS != ret) {
return ret;
}
}
/* wait for all the requests */

Просмотреть файл

@ -134,7 +134,7 @@ ompi_osc_rdma_module_accumulate(void *origin_addr, int origin_count,
ret = ompi_osc_rdma_sendreq_send(module, sendreq);
if (OMPI_SUCCESS != ret) {
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
OPAL_THREAD_LOCK(&module->m_lock);
sendreq->req_module->m_num_pending_out -= 1;
opal_list_append(&(module->m_pending_sendreqs),
@ -206,7 +206,7 @@ ompi_osc_rdma_module_get(void *origin_addr,
ret = ompi_osc_rdma_sendreq_send(module, sendreq);
if (OMPI_SUCCESS != ret) {
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
OPAL_THREAD_LOCK(&module->m_lock);
sendreq->req_module->m_num_pending_out -= 1;
opal_list_append(&(module->m_pending_sendreqs),
@ -274,8 +274,7 @@ ompi_osc_rdma_module_put(void *origin_addr, int origin_count,
ret = ompi_osc_rdma_sendreq_send(module, sendreq);
if (OMPI_SUCCESS != ret) {
opal_output(0, "rdma_senreq_send from put failed: %d", ret);
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
OPAL_THREAD_LOCK(&module->m_lock);
sendreq->req_module->m_num_pending_out -= 1;
opal_list_append(&(module->m_pending_sendreqs),

Просмотреть файл

@ -152,7 +152,7 @@ component_open(void)
"Coalesce messages during an epoch to reduce "
"network utilization. Info key of same name "
"overrides this value.",
false, false, 0, NULL);
false, false, 1, NULL);
mca_base_param_reg_int(&mca_osc_rdma_component.super.osc_version,
"use_rdma",
@ -282,10 +282,7 @@ ompi_osc_rdma_component_query(ompi_win_t *win,
/* if we inited, then the BMLs are available and we have a path to
each peer. Return slightly higher priority than the
point-to-point code */
/* lower priority below that of the pt2pt component until the btl
redesign */
return 0;
return 10;
}
@ -296,6 +293,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
{
ompi_osc_rdma_module_t *module = NULL;
int ret, i;
char *tmp;
/* create module structure */
module = (ompi_osc_rdma_module_t*)
@ -329,6 +327,10 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
"rdma component creating window with id %d",
ompi_comm_get_cid(module->m_comm));
asprintf(&tmp, "%d", ompi_comm_get_cid(module->m_comm));
ompi_win_set_name(win, tmp);
free(tmp);
module->m_num_pending_sendreqs = (unsigned int*)
malloc(sizeof(unsigned int) * ompi_comm_size(module->m_comm));
if (NULL == module->m_num_pending_sendreqs) {

Просмотреть файл

@ -437,11 +437,15 @@ ompi_osc_rdma_sendreq_send(ompi_osc_rdma_module_t *module,
needed_len += sendreq->req_origin_bytes_packed;
}
/* see if we already have a buffer */
if ((module->m_pending_buffers[sendreq->req_target_rank].remain_len >=
sizeof(ompi_osc_rdma_send_header_t) + sendreq->req_origin_bytes_packed) ||
(0 < module->m_pending_buffers[sendreq->req_target_rank].remain_len &&
sendreq->req_origin_bytes_packed > 2048)) {
/* Reuse the buffer if:
* - The whole message will fit
* - The header and datatype will fit AND the payload would be long anyway
* Note that if the datatype is too big for an eager, we'll fall
* through and return an error out of the new buffer case */
if ((module->m_pending_buffers[sendreq->req_target_rank].remain_len >= needed_len) ||
((sizeof(ompi_osc_rdma_send_header_t) + packed_ddt_len <
module->m_pending_buffers[sendreq->req_target_rank].remain_len) &&
(needed_len > module->m_pending_buffers[sendreq->req_target_rank].bml_btl->btl->btl_eager_limit))) {
bml_btl = module->m_pending_buffers[sendreq->req_target_rank].bml_btl;
descriptor = module->m_pending_buffers[sendreq->req_target_rank].descriptor;
remain = module->m_pending_buffers[sendreq->req_target_rank].remain_len;
@ -450,7 +454,6 @@ ompi_osc_rdma_sendreq_send(ompi_osc_rdma_module_t *module,
if (module->m_pending_buffers[sendreq->req_target_rank].descriptor) {
send_multi_buffer(module, sendreq->req_target_rank);
}
assert(OMPI_SUCCESS == ret);
/* get a buffer... */
endpoint = (mca_bml_base_endpoint_t*) sendreq->req_target_proc->proc_bml;
@ -466,8 +469,8 @@ ompi_osc_rdma_sendreq_send(ompi_osc_rdma_module_t *module,
}
/* verify at least enough space for header */
if (descriptor->des_src[0].seg_len < sizeof(ompi_osc_rdma_send_header_t)) {
ret = OMPI_ERR_OUT_OF_RESOURCE;
if (descriptor->des_src[0].seg_len < sizeof(ompi_osc_rdma_send_header_t) + packed_ddt_len) {
ret = MPI_ERR_TRUNCATE;
goto cleanup;
}

Просмотреть файл

@ -139,8 +139,10 @@ ompi_osc_rdma_module_fence(int assert, ompi_win_t *win)
opal_list_remove_first(&(module->m_copy_pending_sendreqs));
ret = ompi_osc_rdma_sendreq_send(module, req);
if (OMPI_SUCCESS != ret) {
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
opal_list_append(&(module->m_copy_pending_sendreqs), (opal_list_item_t*)req);
} else if (OMPI_SUCCESS != ret) {
return ret;
} else {
started_send = 1;
}
@ -168,8 +170,7 @@ ompi_osc_rdma_module_fence(int assert, ompi_win_t *win)
if (OPAL_LIKELY(OMPI_SUCCESS == ret)) {
module->m_peer_info[i].peer_btls[j].num_sent = 0;
} else {
/* BWB - fix me */
abort();
return ret;
}
}
}
@ -331,8 +332,7 @@ ompi_osc_rdma_module_complete(ompi_win_t *win)
if (OPAL_LIKELY(OMPI_SUCCESS == ret)) {
module->m_peer_info[comm_rank].peer_btls[j].num_sent = 0;
} else {
/* BWB - fix me */
abort();
return ret;
}
}
}
@ -354,9 +354,11 @@ ompi_osc_rdma_module_complete(ompi_win_t *win)
(ompi_osc_rdma_sendreq_t*) item;
ret = ompi_osc_rdma_sendreq_send(module, req);
if (OMPI_SUCCESS != ret) {
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
opal_list_append(&(module->m_copy_pending_sendreqs), item);
break;
} else if (OMPI_SUCCESS != ret) {
return ret;
}
}
@ -576,9 +578,11 @@ ompi_osc_rdma_module_unlock(int target,
(ompi_osc_rdma_sendreq_t*) item;
ret = ompi_osc_rdma_sendreq_send(module, req);
if (OMPI_SUCCESS != ret) {
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
opal_list_append(&(module->m_copy_pending_sendreqs), item);
break;
} else if (OMPI_SUCCESS != ret) {
return ret;
}
}

Просмотреть файл

@ -118,10 +118,10 @@ int ompi_win_create(void *base, size_t size, int disp_unit,
int ompi_win_free(ompi_win_t *win);
int ompi_win_set_name(ompi_win_t *win, char *win_name);
int ompi_win_get_name(ompi_win_t *win, char *win_name, int *length);
OMPI_DECLSPEC int ompi_win_set_name(ompi_win_t *win, char *win_name);
OMPI_DECLSPEC int ompi_win_get_name(ompi_win_t *win, char *win_name, int *length);
int ompi_win_group(ompi_win_t *win, ompi_group_t **group);
OMPI_DECLSPEC int ompi_win_group(ompi_win_t *win, ompi_group_t **group);
/* Note that the defintion of an "invalid" window is closely related
to the defintion of an "invalid" communicator. See a big comment