osc/rdma: fix accumulate fragment size calculation
The wrong type was used when calculating the amount of space needed for an accumulate fragment. Fixed the calculation and took the opportunity to eliminate the get_acc header as it is identical to the acc header. This fixes trac:4719 and #4718 Tracking these fixes for 1.8.2 in this CMR. Throwing this to Brad for review as he is the one who ran into the issue. cmr=v1.8.2:reviewer=bbenton This commit was SVN r32015. The following Trac tickets were found above: Ticket 4719 --> https://svn.open-mpi.org/trac/ompi/ticket/4719
Этот коммит содержится в:
родитель
c330778903
Коммит
7f6de57653
@ -496,14 +496,14 @@ ompi_osc_rdma_accumulate_w_req (void *origin_addr, int origin_count,
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
|
||||
frag_len = sizeof(ompi_osc_rdma_header_acc_t) + ddt_len + payload_len;
|
||||
frag_len = sizeof(*header) + ddt_len + payload_len;
|
||||
ret = ompi_osc_rdma_frag_alloc(module, target, frag_len, &frag, &ptr);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
frag_len = sizeof(ompi_osc_rdma_header_acc_t) + ddt_len;
|
||||
frag_len = sizeof(*header) + ddt_len;
|
||||
ret = ompi_osc_rdma_frag_alloc(module, target, frag_len, &frag, &ptr);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
/* allocate space for the header plus space to store ddt_len */
|
||||
frag_len = sizeof(ompi_osc_rdma_header_put_t) + 8;
|
||||
frag_len = sizeof(*header) + 8;
|
||||
ret = ompi_osc_rdma_frag_alloc(module, target, frag_len, &frag, &ptr);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
@ -525,7 +525,7 @@ ompi_osc_rdma_accumulate_w_req (void *origin_addr, int origin_count,
|
||||
header->count = target_count;
|
||||
header->displacement = target_disp;
|
||||
header->op = op->o_f_to_c_index;
|
||||
ptr += sizeof(ompi_osc_rdma_header_acc_t);
|
||||
ptr += sizeof (*header);
|
||||
|
||||
do {
|
||||
ret = ompi_datatype_get_pack_description(target_dt, &packed_ddt);
|
||||
@ -983,7 +983,7 @@ int ompi_osc_rdma_rget_accumulate_internal (void *origin_addr, int origin_count,
|
||||
bool is_long_datatype = false;
|
||||
bool is_long_msg = false;
|
||||
ompi_osc_rdma_frag_t *frag;
|
||||
ompi_osc_rdma_header_get_acc_t *header;
|
||||
ompi_osc_rdma_header_acc_t *header;
|
||||
size_t ddt_len, payload_len, frag_len;
|
||||
char *ptr;
|
||||
const void *packed_ddt;
|
||||
@ -1046,7 +1046,7 @@ int ompi_osc_rdma_rget_accumulate_internal (void *origin_addr, int origin_count,
|
||||
ret = ompi_osc_rdma_frag_alloc(module, target_rank, frag_len, &frag, &ptr);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
/* allocate space for the header plus space to store ddt_len */
|
||||
frag_len = sizeof(ompi_osc_rdma_header_put_t) + 8;
|
||||
frag_len = sizeof(*header) + 8;
|
||||
ret = ompi_osc_rdma_frag_alloc(module, target_rank, frag_len, &frag, &ptr);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
@ -1070,7 +1070,7 @@ int ompi_osc_rdma_rget_accumulate_internal (void *origin_addr, int origin_count,
|
||||
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
|
||||
header = (ompi_osc_rdma_header_get_acc_t *) ptr;
|
||||
header = (ompi_osc_rdma_header_acc_t *) ptr;
|
||||
header->base.flags = 0;
|
||||
header->len = frag_len;
|
||||
header->count = target_count;
|
||||
|
@ -740,11 +740,9 @@ static int ompi_osc_rdma_acc_op_queue (ompi_osc_rdma_module_t *module, ompi_osc_
|
||||
switch (header->base.type) {
|
||||
case OMPI_OSC_RDMA_HDR_TYPE_ACC:
|
||||
case OMPI_OSC_RDMA_HDR_TYPE_ACC_LONG:
|
||||
pending_acc->header.acc = header->acc;
|
||||
break;
|
||||
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC:
|
||||
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC_LONG:
|
||||
pending_acc->header.get_acc = header->get_acc;
|
||||
pending_acc->header.acc = header->acc;
|
||||
break;
|
||||
case OMPI_OSC_RDMA_HDR_TYPE_CSWAP:
|
||||
pending_acc->header.cswap = header->cswap;
|
||||
@ -904,11 +902,11 @@ static int ompi_osc_rdma_acc_long_start (ompi_osc_rdma_module_t *module, int sou
|
||||
* function. It will release the lock when the operation is complete.
|
||||
*/
|
||||
static int ompi_osc_rdma_gacc_start (ompi_osc_rdma_module_t *module, int source, void *data, size_t data_len,
|
||||
ompi_datatype_t *datatype, ompi_osc_rdma_header_get_acc_t *get_acc_header)
|
||||
ompi_datatype_t *datatype, ompi_osc_rdma_header_acc_t *acc_header)
|
||||
{
|
||||
void *target = (unsigned char*) module->baseptr +
|
||||
((unsigned long) get_acc_header->displacement * module->disp_unit);
|
||||
struct ompi_op_t *op = ompi_osc_base_op_create(get_acc_header->op);
|
||||
((unsigned long) acc_header->displacement * module->disp_unit);
|
||||
struct ompi_op_t *op = ompi_osc_base_op_create(acc_header->op);
|
||||
struct osc_rdma_accumulate_data_t *acc_data;
|
||||
ompi_proc_t *proc;
|
||||
int ret;
|
||||
@ -917,13 +915,13 @@ static int ompi_osc_rdma_gacc_start (ompi_osc_rdma_module_t *module, int source,
|
||||
assert (NULL != proc);
|
||||
|
||||
do {
|
||||
ret = osc_rdma_accumulate_allocate (module, source, target, data, data_len, proc, get_acc_header->count,
|
||||
ret = osc_rdma_accumulate_allocate (module, source, target, data, data_len, proc, acc_header->count,
|
||||
datatype, op, 1, &acc_data);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
break;
|
||||
}
|
||||
|
||||
ret = ompi_osc_rdma_isend_w_cb (target, get_acc_header->count, datatype, source, get_acc_header->tag,
|
||||
ret = ompi_osc_rdma_isend_w_cb (target, acc_header->count, datatype, source, acc_header->tag,
|
||||
module->comm, accumulate_cb, acc_data);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
OBJ_RELEASE(acc_data);
|
||||
@ -947,17 +945,17 @@ static int ompi_osc_rdma_gacc_start (ompi_osc_rdma_module_t *module, int source,
|
||||
* @param[in] module - OSC RDMA module
|
||||
* @param[in] source - Source rank
|
||||
* @param[in] datatype - Accumulation datatype
|
||||
* @param[in] get_acc_header - Accumulate header
|
||||
* @param[in] acc_header - Accumulate header
|
||||
*
|
||||
* The module's accumulation lock must be held before calling this
|
||||
* function. It will release the lock when the operation is complete.
|
||||
*/
|
||||
static int ompi_osc_gacc_long_start (ompi_osc_rdma_module_t *module, int source, ompi_datatype_t *datatype,
|
||||
ompi_osc_rdma_header_get_acc_t *get_acc_header)
|
||||
ompi_osc_rdma_header_acc_t *acc_header)
|
||||
{
|
||||
void *target = (unsigned char*) module->baseptr +
|
||||
((unsigned long) get_acc_header->displacement * module->disp_unit);
|
||||
struct ompi_op_t *op = ompi_osc_base_op_create(get_acc_header->op);
|
||||
((unsigned long) acc_header->displacement * module->disp_unit);
|
||||
struct ompi_op_t *op = ompi_osc_base_op_create(acc_header->op);
|
||||
struct osc_rdma_accumulate_data_t *acc_data;
|
||||
ompi_datatype_t *primitive_datatype;
|
||||
ompi_request_t *recv_request;
|
||||
@ -971,7 +969,7 @@ static int ompi_osc_gacc_long_start (ompi_osc_rdma_module_t *module, int source,
|
||||
assert (NULL != proc);
|
||||
|
||||
/* allocate a temporary buffer to receive the accumulate data */
|
||||
buflen = datatype_buffer_length (datatype, get_acc_header->count);
|
||||
buflen = datatype_buffer_length (datatype, acc_header->count);
|
||||
|
||||
do {
|
||||
buffer = malloc (buflen);
|
||||
@ -985,22 +983,22 @@ static int ompi_osc_gacc_long_start (ompi_osc_rdma_module_t *module, int source,
|
||||
break;
|
||||
}
|
||||
|
||||
primitive_count *= get_acc_header->count;
|
||||
primitive_count *= acc_header->count;
|
||||
|
||||
ret = osc_rdma_accumulate_allocate (module, source, target, buffer, buflen, proc, get_acc_header->count,
|
||||
ret = osc_rdma_accumulate_allocate (module, source, target, buffer, buflen, proc, acc_header->count,
|
||||
datatype, op, 2, &acc_data);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
break;
|
||||
}
|
||||
|
||||
ret = ompi_osc_rdma_irecv_w_cb (buffer, get_acc_header->count, datatype, source, get_acc_header->tag,
|
||||
ret = ompi_osc_rdma_irecv_w_cb (buffer, acc_header->count, datatype, source, acc_header->tag,
|
||||
module->comm, &recv_request, accumulate_cb, acc_data);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
OBJ_RELEASE(acc_data);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = ompi_osc_rdma_isend_w_cb (target, primitive_count, primitive_datatype, source, get_acc_header->tag,
|
||||
ret = ompi_osc_rdma_isend_w_cb (target, primitive_count, primitive_datatype, source, acc_header->tag,
|
||||
module->comm, accumulate_cb, acc_data);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
/* cancel the receive and free the accumulate data */
|
||||
@ -1115,11 +1113,11 @@ int ompi_osc_rdma_progress_pending_acc (ompi_osc_rdma_module_t *module)
|
||||
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC:
|
||||
ret = ompi_osc_rdma_gacc_start (module, pending_acc->source, pending_acc->data,
|
||||
pending_acc->data_len, pending_acc->datatype,
|
||||
&pending_acc->header.get_acc);
|
||||
&pending_acc->header.acc);
|
||||
break;
|
||||
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC_LONG:
|
||||
ret = ompi_osc_gacc_long_start (module, pending_acc->source, pending_acc->datatype,
|
||||
&pending_acc->header.get_acc);
|
||||
&pending_acc->header.acc);
|
||||
break;
|
||||
case OMPI_OSC_RDMA_HDR_TYPE_CSWAP:
|
||||
ret = ompi_osc_rdma_cswap_start (module, pending_acc->source, pending_acc->data,
|
||||
@ -1210,9 +1208,9 @@ static inline int process_acc_long (ompi_osc_rdma_module_t* module, int source,
|
||||
}
|
||||
|
||||
static inline int process_get_acc(ompi_osc_rdma_module_t *module, int source,
|
||||
ompi_osc_rdma_header_get_acc_t *get_acc_header)
|
||||
ompi_osc_rdma_header_acc_t *acc_header)
|
||||
{
|
||||
char *data = (char *) (get_acc_header + 1);
|
||||
char *data = (char *) (acc_header + 1);
|
||||
struct ompi_datatype_t *datatype;
|
||||
void *buffer = NULL;
|
||||
uint64_t data_len;
|
||||
@ -1228,7 +1226,7 @@ static inline int process_get_acc(ompi_osc_rdma_module_t *module, int source,
|
||||
return ret;
|
||||
}
|
||||
|
||||
data_len = get_acc_header->len - ((char*) data - (char*) get_acc_header);
|
||||
data_len = acc_header->len - ((char*) data - (char*) acc_header);
|
||||
|
||||
if (0 == ompi_osc_rdma_accumulate_trylock (module)) {
|
||||
/* make a copy of the data since the buffer needs to be returned */
|
||||
@ -1243,23 +1241,23 @@ static inline int process_get_acc(ompi_osc_rdma_module_t *module, int source,
|
||||
}
|
||||
|
||||
ret = ompi_osc_rdma_gacc_start (module, source, buffer, data_len, datatype,
|
||||
get_acc_header);
|
||||
acc_header);
|
||||
} else {
|
||||
/* queue the operation */
|
||||
ret = ompi_osc_rdma_acc_op_queue (module, (ompi_osc_rdma_header_t *) get_acc_header,
|
||||
ret = ompi_osc_rdma_acc_op_queue (module, (ompi_osc_rdma_header_t *) acc_header,
|
||||
source, data, data_len, datatype);
|
||||
}
|
||||
|
||||
/* Release datatype & op */
|
||||
OBJ_RELEASE(datatype);
|
||||
|
||||
return (OMPI_SUCCESS == ret) ? (int) get_acc_header->len : ret;
|
||||
return (OMPI_SUCCESS == ret) ? (int) acc_header->len : ret;
|
||||
}
|
||||
|
||||
static inline int process_get_acc_long(ompi_osc_rdma_module_t *module, int source,
|
||||
ompi_osc_rdma_header_get_acc_t *get_acc_header)
|
||||
ompi_osc_rdma_header_acc_t *acc_header)
|
||||
{
|
||||
char *data = (char *) (get_acc_header + 1);
|
||||
char *data = (char *) (acc_header + 1);
|
||||
struct ompi_datatype_t *datatype;
|
||||
int ret;
|
||||
|
||||
@ -1274,17 +1272,17 @@ static inline int process_get_acc_long(ompi_osc_rdma_module_t *module, int sourc
|
||||
}
|
||||
|
||||
if (0 == ompi_osc_rdma_accumulate_trylock (module)) {
|
||||
ret = ompi_osc_gacc_long_start (module, source, datatype, get_acc_header);
|
||||
ret = ompi_osc_gacc_long_start (module, source, datatype, acc_header);
|
||||
} else {
|
||||
/* queue the operation */
|
||||
ret = ompi_osc_rdma_acc_op_queue (module, (ompi_osc_rdma_header_t *) get_acc_header,
|
||||
ret = ompi_osc_rdma_acc_op_queue (module, (ompi_osc_rdma_header_t *) acc_header,
|
||||
source, NULL, 0, datatype);
|
||||
}
|
||||
|
||||
/* Release datatype & op */
|
||||
OBJ_RELEASE(datatype);
|
||||
|
||||
return OMPI_SUCCESS == ret ? (int) get_acc_header->len : ret;
|
||||
return OMPI_SUCCESS == ret ? (int) acc_header->len : ret;
|
||||
}
|
||||
|
||||
|
||||
@ -1440,7 +1438,7 @@ static int process_large_datatype_request_cb (ompi_request_t *request)
|
||||
(void) process_acc_long (module, source, &header->acc);
|
||||
break;
|
||||
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC_LONG:
|
||||
(void) process_get_acc_long (module, source, &header->get_acc);
|
||||
(void) process_get_acc_long (module, source, &header->acc);
|
||||
break;
|
||||
default:
|
||||
/* developer error */
|
||||
@ -1490,8 +1488,8 @@ static int process_large_datatype_request (ompi_osc_rdma_module_t *module, int s
|
||||
tag = header->acc.tag;
|
||||
break;
|
||||
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC_LONG:
|
||||
header_len = sizeof (header->get_acc);
|
||||
tag = header->get_acc.tag;
|
||||
header_len = sizeof (header->acc);
|
||||
tag = header->acc.tag;
|
||||
break;
|
||||
default:
|
||||
/* developer error */
|
||||
@ -1588,11 +1586,11 @@ static inline int process_frag (ompi_osc_rdma_module_t *module,
|
||||
break;
|
||||
|
||||
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC:
|
||||
ret = process_get_acc (module, frag->source, &header->get_acc);
|
||||
ret = process_get_acc (module, frag->source, &header->acc);
|
||||
break;
|
||||
|
||||
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC_LONG:
|
||||
ret = process_get_acc_long (module, frag->source, &header->get_acc);
|
||||
ret = process_get_acc_long (module, frag->source, &header->acc);
|
||||
break;
|
||||
|
||||
case OMPI_OSC_RDMA_HDR_TYPE_FLUSH_REQ:
|
||||
|
@ -101,17 +101,6 @@ struct ompi_osc_rdma_header_complete_t {
|
||||
};
|
||||
typedef struct ompi_osc_rdma_header_complete_t ompi_osc_rdma_header_complete_t;
|
||||
|
||||
struct ompi_osc_rdma_header_get_acc_t {
|
||||
ompi_osc_rdma_header_base_t base;
|
||||
|
||||
uint16_t tag;
|
||||
uint32_t count;
|
||||
uint32_t op;
|
||||
uint64_t len;
|
||||
uint64_t displacement;
|
||||
};
|
||||
typedef struct ompi_osc_rdma_header_get_acc_t ompi_osc_rdma_header_get_acc_t;
|
||||
|
||||
struct ompi_osc_rdma_header_cswap_t {
|
||||
ompi_osc_rdma_header_base_t base;
|
||||
|
||||
@ -183,7 +172,6 @@ union ompi_osc_rdma_header_t {
|
||||
ompi_osc_rdma_header_acc_t acc;
|
||||
ompi_osc_rdma_header_get_t get;
|
||||
ompi_osc_rdma_header_complete_t complete;
|
||||
ompi_osc_rdma_header_get_acc_t get_acc;
|
||||
ompi_osc_rdma_header_cswap_t cswap;
|
||||
ompi_osc_rdma_header_post_t post;
|
||||
ompi_osc_rdma_header_lock_t lock;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user