1
1

osc/rdma: fix accumulate fragment size calculation

The wrong type was used when calculating the amount of space needed
for an accumulate fragment. Fixed the calculation and took the
opportunity to eliminate the get_acc header as it is identical to the
acc header.

This fixes trac:4719 and #4718

Tracking these fixes for 1.8.2 in this CMR.

Throwing this to Brad for review as he is the one who ran into the issue.

cmr=v1.8.2:reviewer=bbenton

This commit was SVN r32015.

The following Trac tickets were found above:
  Ticket 4719 --> https://svn.open-mpi.org/trac/ompi/ticket/4719
Этот коммит содержится в:
Nathan Hjelm 2014-06-17 14:53:24 +00:00
родитель c330778903
Коммит 7f6de57653
3 изменённых файлов: 40 добавлений и 54 удалений

Просмотреть файл

@ -496,14 +496,14 @@ ompi_osc_rdma_accumulate_w_req (void *origin_addr, int origin_count,
OPAL_THREAD_LOCK(&module->lock);
frag_len = sizeof(ompi_osc_rdma_header_acc_t) + ddt_len + payload_len;
frag_len = sizeof(*header) + ddt_len + payload_len;
ret = ompi_osc_rdma_frag_alloc(module, target, frag_len, &frag, &ptr);
if (OMPI_SUCCESS != ret) {
frag_len = sizeof(ompi_osc_rdma_header_acc_t) + ddt_len;
frag_len = sizeof(*header) + ddt_len;
ret = ompi_osc_rdma_frag_alloc(module, target, frag_len, &frag, &ptr);
if (OMPI_SUCCESS != ret) {
/* allocate space for the header plus space to store ddt_len */
frag_len = sizeof(ompi_osc_rdma_header_put_t) + 8;
frag_len = sizeof(*header) + 8;
ret = ompi_osc_rdma_frag_alloc(module, target, frag_len, &frag, &ptr);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
OPAL_THREAD_UNLOCK(&module->lock);
@ -525,7 +525,7 @@ ompi_osc_rdma_accumulate_w_req (void *origin_addr, int origin_count,
header->count = target_count;
header->displacement = target_disp;
header->op = op->o_f_to_c_index;
ptr += sizeof(ompi_osc_rdma_header_acc_t);
ptr += sizeof (*header);
do {
ret = ompi_datatype_get_pack_description(target_dt, &packed_ddt);
@ -983,7 +983,7 @@ int ompi_osc_rdma_rget_accumulate_internal (void *origin_addr, int origin_count,
bool is_long_datatype = false;
bool is_long_msg = false;
ompi_osc_rdma_frag_t *frag;
ompi_osc_rdma_header_get_acc_t *header;
ompi_osc_rdma_header_acc_t *header;
size_t ddt_len, payload_len, frag_len;
char *ptr;
const void *packed_ddt;
@ -1046,7 +1046,7 @@ int ompi_osc_rdma_rget_accumulate_internal (void *origin_addr, int origin_count,
ret = ompi_osc_rdma_frag_alloc(module, target_rank, frag_len, &frag, &ptr);
if (OMPI_SUCCESS != ret) {
/* allocate space for the header plus space to store ddt_len */
frag_len = sizeof(ompi_osc_rdma_header_put_t) + 8;
frag_len = sizeof(*header) + 8;
ret = ompi_osc_rdma_frag_alloc(module, target_rank, frag_len, &frag, &ptr);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
OPAL_THREAD_UNLOCK(&module->lock);
@ -1070,7 +1070,7 @@ int ompi_osc_rdma_rget_accumulate_internal (void *origin_addr, int origin_count,
OPAL_THREAD_UNLOCK(&module->lock);
header = (ompi_osc_rdma_header_get_acc_t *) ptr;
header = (ompi_osc_rdma_header_acc_t *) ptr;
header->base.flags = 0;
header->len = frag_len;
header->count = target_count;

Просмотреть файл

@ -740,11 +740,9 @@ static int ompi_osc_rdma_acc_op_queue (ompi_osc_rdma_module_t *module, ompi_osc_
switch (header->base.type) {
case OMPI_OSC_RDMA_HDR_TYPE_ACC:
case OMPI_OSC_RDMA_HDR_TYPE_ACC_LONG:
pending_acc->header.acc = header->acc;
break;
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC:
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC_LONG:
pending_acc->header.get_acc = header->get_acc;
pending_acc->header.acc = header->acc;
break;
case OMPI_OSC_RDMA_HDR_TYPE_CSWAP:
pending_acc->header.cswap = header->cswap;
@ -904,11 +902,11 @@ static int ompi_osc_rdma_acc_long_start (ompi_osc_rdma_module_t *module, int sou
* function. It will release the lock when the operation is complete.
*/
static int ompi_osc_rdma_gacc_start (ompi_osc_rdma_module_t *module, int source, void *data, size_t data_len,
ompi_datatype_t *datatype, ompi_osc_rdma_header_get_acc_t *get_acc_header)
ompi_datatype_t *datatype, ompi_osc_rdma_header_acc_t *acc_header)
{
void *target = (unsigned char*) module->baseptr +
((unsigned long) get_acc_header->displacement * module->disp_unit);
struct ompi_op_t *op = ompi_osc_base_op_create(get_acc_header->op);
((unsigned long) acc_header->displacement * module->disp_unit);
struct ompi_op_t *op = ompi_osc_base_op_create(acc_header->op);
struct osc_rdma_accumulate_data_t *acc_data;
ompi_proc_t *proc;
int ret;
@ -917,13 +915,13 @@ static int ompi_osc_rdma_gacc_start (ompi_osc_rdma_module_t *module, int source,
assert (NULL != proc);
do {
ret = osc_rdma_accumulate_allocate (module, source, target, data, data_len, proc, get_acc_header->count,
ret = osc_rdma_accumulate_allocate (module, source, target, data, data_len, proc, acc_header->count,
datatype, op, 1, &acc_data);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
break;
}
ret = ompi_osc_rdma_isend_w_cb (target, get_acc_header->count, datatype, source, get_acc_header->tag,
ret = ompi_osc_rdma_isend_w_cb (target, acc_header->count, datatype, source, acc_header->tag,
module->comm, accumulate_cb, acc_data);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
OBJ_RELEASE(acc_data);
@ -947,17 +945,17 @@ static int ompi_osc_rdma_gacc_start (ompi_osc_rdma_module_t *module, int source,
* @param[in] module - OSC RDMA module
* @param[in] source - Source rank
* @param[in] datatype - Accumulation datatype
* @param[in] get_acc_header - Accumulate header
* @param[in] acc_header - Accumulate header
*
* The module's accumulation lock must be held before calling this
* function. It will release the lock when the operation is complete.
*/
static int ompi_osc_gacc_long_start (ompi_osc_rdma_module_t *module, int source, ompi_datatype_t *datatype,
ompi_osc_rdma_header_get_acc_t *get_acc_header)
ompi_osc_rdma_header_acc_t *acc_header)
{
void *target = (unsigned char*) module->baseptr +
((unsigned long) get_acc_header->displacement * module->disp_unit);
struct ompi_op_t *op = ompi_osc_base_op_create(get_acc_header->op);
((unsigned long) acc_header->displacement * module->disp_unit);
struct ompi_op_t *op = ompi_osc_base_op_create(acc_header->op);
struct osc_rdma_accumulate_data_t *acc_data;
ompi_datatype_t *primitive_datatype;
ompi_request_t *recv_request;
@ -971,7 +969,7 @@ static int ompi_osc_gacc_long_start (ompi_osc_rdma_module_t *module, int source,
assert (NULL != proc);
/* allocate a temporary buffer to receive the accumulate data */
buflen = datatype_buffer_length (datatype, get_acc_header->count);
buflen = datatype_buffer_length (datatype, acc_header->count);
do {
buffer = malloc (buflen);
@ -985,22 +983,22 @@ static int ompi_osc_gacc_long_start (ompi_osc_rdma_module_t *module, int source,
break;
}
primitive_count *= get_acc_header->count;
primitive_count *= acc_header->count;
ret = osc_rdma_accumulate_allocate (module, source, target, buffer, buflen, proc, get_acc_header->count,
ret = osc_rdma_accumulate_allocate (module, source, target, buffer, buflen, proc, acc_header->count,
datatype, op, 2, &acc_data);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
break;
}
ret = ompi_osc_rdma_irecv_w_cb (buffer, get_acc_header->count, datatype, source, get_acc_header->tag,
ret = ompi_osc_rdma_irecv_w_cb (buffer, acc_header->count, datatype, source, acc_header->tag,
module->comm, &recv_request, accumulate_cb, acc_data);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
OBJ_RELEASE(acc_data);
break;
}
ret = ompi_osc_rdma_isend_w_cb (target, primitive_count, primitive_datatype, source, get_acc_header->tag,
ret = ompi_osc_rdma_isend_w_cb (target, primitive_count, primitive_datatype, source, acc_header->tag,
module->comm, accumulate_cb, acc_data);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
/* cancel the receive and free the accumulate data */
@ -1115,11 +1113,11 @@ int ompi_osc_rdma_progress_pending_acc (ompi_osc_rdma_module_t *module)
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC:
ret = ompi_osc_rdma_gacc_start (module, pending_acc->source, pending_acc->data,
pending_acc->data_len, pending_acc->datatype,
&pending_acc->header.get_acc);
&pending_acc->header.acc);
break;
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC_LONG:
ret = ompi_osc_gacc_long_start (module, pending_acc->source, pending_acc->datatype,
&pending_acc->header.get_acc);
&pending_acc->header.acc);
break;
case OMPI_OSC_RDMA_HDR_TYPE_CSWAP:
ret = ompi_osc_rdma_cswap_start (module, pending_acc->source, pending_acc->data,
@ -1210,9 +1208,9 @@ static inline int process_acc_long (ompi_osc_rdma_module_t* module, int source,
}
static inline int process_get_acc(ompi_osc_rdma_module_t *module, int source,
ompi_osc_rdma_header_get_acc_t *get_acc_header)
ompi_osc_rdma_header_acc_t *acc_header)
{
char *data = (char *) (get_acc_header + 1);
char *data = (char *) (acc_header + 1);
struct ompi_datatype_t *datatype;
void *buffer = NULL;
uint64_t data_len;
@ -1228,7 +1226,7 @@ static inline int process_get_acc(ompi_osc_rdma_module_t *module, int source,
return ret;
}
data_len = get_acc_header->len - ((char*) data - (char*) get_acc_header);
data_len = acc_header->len - ((char*) data - (char*) acc_header);
if (0 == ompi_osc_rdma_accumulate_trylock (module)) {
/* make a copy of the data since the buffer needs to be returned */
@ -1243,23 +1241,23 @@ static inline int process_get_acc(ompi_osc_rdma_module_t *module, int source,
}
ret = ompi_osc_rdma_gacc_start (module, source, buffer, data_len, datatype,
get_acc_header);
acc_header);
} else {
/* queue the operation */
ret = ompi_osc_rdma_acc_op_queue (module, (ompi_osc_rdma_header_t *) get_acc_header,
ret = ompi_osc_rdma_acc_op_queue (module, (ompi_osc_rdma_header_t *) acc_header,
source, data, data_len, datatype);
}
/* Release datatype & op */
OBJ_RELEASE(datatype);
return (OMPI_SUCCESS == ret) ? (int) get_acc_header->len : ret;
return (OMPI_SUCCESS == ret) ? (int) acc_header->len : ret;
}
static inline int process_get_acc_long(ompi_osc_rdma_module_t *module, int source,
ompi_osc_rdma_header_get_acc_t *get_acc_header)
ompi_osc_rdma_header_acc_t *acc_header)
{
char *data = (char *) (get_acc_header + 1);
char *data = (char *) (acc_header + 1);
struct ompi_datatype_t *datatype;
int ret;
@ -1274,17 +1272,17 @@ static inline int process_get_acc_long(ompi_osc_rdma_module_t *module, int sourc
}
if (0 == ompi_osc_rdma_accumulate_trylock (module)) {
ret = ompi_osc_gacc_long_start (module, source, datatype, get_acc_header);
ret = ompi_osc_gacc_long_start (module, source, datatype, acc_header);
} else {
/* queue the operation */
ret = ompi_osc_rdma_acc_op_queue (module, (ompi_osc_rdma_header_t *) get_acc_header,
ret = ompi_osc_rdma_acc_op_queue (module, (ompi_osc_rdma_header_t *) acc_header,
source, NULL, 0, datatype);
}
/* Release datatype & op */
OBJ_RELEASE(datatype);
return OMPI_SUCCESS == ret ? (int) get_acc_header->len : ret;
return OMPI_SUCCESS == ret ? (int) acc_header->len : ret;
}
@ -1440,7 +1438,7 @@ static int process_large_datatype_request_cb (ompi_request_t *request)
(void) process_acc_long (module, source, &header->acc);
break;
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC_LONG:
(void) process_get_acc_long (module, source, &header->get_acc);
(void) process_get_acc_long (module, source, &header->acc);
break;
default:
/* developer error */
@ -1490,8 +1488,8 @@ static int process_large_datatype_request (ompi_osc_rdma_module_t *module, int s
tag = header->acc.tag;
break;
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC_LONG:
header_len = sizeof (header->get_acc);
tag = header->get_acc.tag;
header_len = sizeof (header->acc);
tag = header->acc.tag;
break;
default:
/* developer error */
@ -1588,11 +1586,11 @@ static inline int process_frag (ompi_osc_rdma_module_t *module,
break;
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC:
ret = process_get_acc (module, frag->source, &header->get_acc);
ret = process_get_acc (module, frag->source, &header->acc);
break;
case OMPI_OSC_RDMA_HDR_TYPE_GET_ACC_LONG:
ret = process_get_acc_long (module, frag->source, &header->get_acc);
ret = process_get_acc_long (module, frag->source, &header->acc);
break;
case OMPI_OSC_RDMA_HDR_TYPE_FLUSH_REQ:

Просмотреть файл

@ -101,17 +101,6 @@ struct ompi_osc_rdma_header_complete_t {
};
typedef struct ompi_osc_rdma_header_complete_t ompi_osc_rdma_header_complete_t;
struct ompi_osc_rdma_header_get_acc_t {
ompi_osc_rdma_header_base_t base;
uint16_t tag;
uint32_t count;
uint32_t op;
uint64_t len;
uint64_t displacement;
};
typedef struct ompi_osc_rdma_header_get_acc_t ompi_osc_rdma_header_get_acc_t;
struct ompi_osc_rdma_header_cswap_t {
ompi_osc_rdma_header_base_t base;
@ -183,7 +172,6 @@ union ompi_osc_rdma_header_t {
ompi_osc_rdma_header_acc_t acc;
ompi_osc_rdma_header_get_t get;
ompi_osc_rdma_header_complete_t complete;
ompi_osc_rdma_header_get_acc_t get_acc;
ompi_osc_rdma_header_cswap_t cswap;
ompi_osc_rdma_header_post_t post;
ompi_osc_rdma_header_lock_t lock;