osc/pt2pt: enable heterogeneous support
Этот коммит содержится в:
родитель
c4ebdba035
Коммит
1488e82efd
@ -12,6 +12,8 @@
|
|||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||||
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
||||||
|
* Copyright (c) 2015 Research Organization for Information Science
|
||||||
|
* and Technology (RIST). All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -274,7 +276,6 @@ ompi_osc_pt2pt_complete(ompi_win_t *win)
|
|||||||
int i;
|
int i;
|
||||||
int *ranks = NULL;
|
int *ranks = NULL;
|
||||||
ompi_group_t *group;
|
ompi_group_t *group;
|
||||||
int my_rank = ompi_comm_rank (module->comm);
|
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||||
"ompi_osc_pt2pt_complete entering..."));
|
"ompi_osc_pt2pt_complete entering..."));
|
||||||
@ -307,7 +308,8 @@ ompi_osc_pt2pt_complete(ompi_win_t *win)
|
|||||||
At the same time, clean out the outgoing count for the next
|
At the same time, clean out the outgoing count for the next
|
||||||
round. */
|
round. */
|
||||||
for (i = 0 ; i < ompi_group_size(module->sc_group) ; ++i) {
|
for (i = 0 ; i < ompi_group_size(module->sc_group) ; ++i) {
|
||||||
if (my_rank == ranks[i]) {
|
ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, ranks[i]);
|
||||||
|
if (ompi_proc_local() == proc) {
|
||||||
/* shortcut for self */
|
/* shortcut for self */
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_complete self complete"));
|
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_complete self complete"));
|
||||||
module->num_complete_msgs++;
|
module->num_complete_msgs++;
|
||||||
@ -316,7 +318,12 @@ ompi_osc_pt2pt_complete(ompi_win_t *win)
|
|||||||
|
|
||||||
complete_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_COMPLETE;
|
complete_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_COMPLETE;
|
||||||
complete_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
|
complete_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
|
||||||
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||||
|
complete_req.padding[0] = 0;
|
||||||
|
complete_req.padding[1] = 0;
|
||||||
|
#endif
|
||||||
complete_req.frag_count = module->epoch_outgoing_frag_count[ranks[i]];
|
complete_req.frag_count = module->epoch_outgoing_frag_count[ranks[i]];
|
||||||
|
osc_pt2pt_hton(&complete_req, proc);
|
||||||
|
|
||||||
peer = module->peers + ranks[i];
|
peer = module->peers + ranks[i];
|
||||||
|
|
||||||
@ -388,7 +395,6 @@ ompi_osc_pt2pt_post(ompi_group_t *group,
|
|||||||
int ret = OMPI_SUCCESS;
|
int ret = OMPI_SUCCESS;
|
||||||
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
|
ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
|
||||||
ompi_osc_pt2pt_header_post_t post_req;
|
ompi_osc_pt2pt_header_post_t post_req;
|
||||||
int my_rank = ompi_comm_rank(module->comm);
|
|
||||||
|
|
||||||
/* can't check for all access epoch here due to fence */
|
/* can't check for all access epoch here due to fence */
|
||||||
if (module->pw_group) {
|
if (module->pw_group) {
|
||||||
@ -430,17 +436,19 @@ ompi_osc_pt2pt_post(ompi_group_t *group,
|
|||||||
/* send a hello counter to everyone in group */
|
/* send a hello counter to everyone in group */
|
||||||
for (int i = 0 ; i < ompi_group_size(module->pw_group) ; ++i) {
|
for (int i = 0 ; i < ompi_group_size(module->pw_group) ; ++i) {
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "Sending post message to rank %d", ranks[i]));
|
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "Sending post message to rank %d", ranks[i]));
|
||||||
|
ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, ranks[i]);
|
||||||
|
|
||||||
/* shortcut for self */
|
/* shortcut for self */
|
||||||
if (my_rank == ranks[i]) {
|
if (ompi_proc_local() == proc) {
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_complete self post"));
|
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_complete self post"));
|
||||||
osc_pt2pt_incoming_post (module, my_rank);
|
osc_pt2pt_incoming_post (module, ompi_comm_rank(module->comm));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
post_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_POST;
|
post_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_POST;
|
||||||
post_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
|
post_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
|
||||||
post_req.windx = ompi_comm_get_cid(module->comm);
|
post_req.windx = ompi_comm_get_cid(module->comm);
|
||||||
|
osc_pt2pt_hton(&post_req, proc);
|
||||||
|
|
||||||
/* we don't want to send any data, since we're the exposure
|
/* we don't want to send any data, since we're the exposure
|
||||||
epoch only, so use an unbuffered send */
|
epoch only, so use an unbuffered send */
|
||||||
|
@ -12,6 +12,8 @@
|
|||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
||||||
|
* Copyright (c) 2015 Research Organization for Information Science
|
||||||
|
* and Technology (RIST). All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -403,6 +405,7 @@ static inline int ompi_osc_pt2pt_put_w_req (void *origin_addr, int origin_count,
|
|||||||
|
|
||||||
if (!is_long_msg) {
|
if (!is_long_msg) {
|
||||||
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_PUT;
|
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_PUT;
|
||||||
|
osc_pt2pt_hton(header, proc);
|
||||||
|
|
||||||
osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc, origin_count,
|
osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc, origin_count,
|
||||||
origin_dt);
|
origin_dt);
|
||||||
@ -414,8 +417,8 @@ static inline int ompi_osc_pt2pt_put_w_req (void *origin_addr, int origin_count,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_PUT_LONG;
|
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_PUT_LONG;
|
||||||
|
|
||||||
header->tag = tag;
|
header->tag = tag;
|
||||||
|
osc_pt2pt_hton(header, proc);
|
||||||
|
|
||||||
/* increase the outgoing signal count */
|
/* increase the outgoing signal count */
|
||||||
ompi_osc_signal_outgoing (module, target, 1);
|
ompi_osc_signal_outgoing (module, target, 1);
|
||||||
@ -580,6 +583,7 @@ ompi_osc_pt2pt_accumulate_w_req (void *origin_addr, int origin_count,
|
|||||||
|
|
||||||
if (!is_long_msg) {
|
if (!is_long_msg) {
|
||||||
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC;
|
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC;
|
||||||
|
osc_pt2pt_hton(header, proc);
|
||||||
|
|
||||||
osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc,
|
osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc,
|
||||||
origin_count, origin_dt);
|
origin_count, origin_dt);
|
||||||
@ -591,8 +595,8 @@ ompi_osc_pt2pt_accumulate_w_req (void *origin_addr, int origin_count,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG;
|
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG;
|
||||||
|
|
||||||
header->tag = tag;
|
header->tag = tag;
|
||||||
|
osc_pt2pt_hton(header, proc);
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||||
"acc: starting long accumulate with tag %d", tag));
|
"acc: starting long accumulate with tag %d", tag));
|
||||||
@ -708,6 +712,7 @@ int ompi_osc_pt2pt_compare_and_swap (void *origin_addr, void *compare_addr,
|
|||||||
header->len = frag_len;
|
header->len = frag_len;
|
||||||
header->displacement = target_disp;
|
header->displacement = target_disp;
|
||||||
header->tag = tag;
|
header->tag = tag;
|
||||||
|
osc_pt2pt_hton(header, proc);
|
||||||
ptr += sizeof(ompi_osc_pt2pt_header_cswap_t);
|
ptr += sizeof(ompi_osc_pt2pt_header_cswap_t);
|
||||||
|
|
||||||
ret = ompi_datatype_get_pack_description(dt, &packed_ddt);
|
ret = ompi_datatype_get_pack_description(dt, &packed_ddt);
|
||||||
@ -880,6 +885,7 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co
|
|||||||
header->count = target_count;
|
header->count = target_count;
|
||||||
header->displacement = target_disp;
|
header->displacement = target_disp;
|
||||||
header->tag = tag;
|
header->tag = tag;
|
||||||
|
OSC_PT2PT_HTON(header, module, target);
|
||||||
ptr += sizeof(ompi_osc_pt2pt_header_get_t);
|
ptr += sizeof(ompi_osc_pt2pt_header_get_t);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
@ -1115,6 +1121,7 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count
|
|||||||
header->displacement = target_disp;
|
header->displacement = target_disp;
|
||||||
header->op = op->o_f_to_c_index;
|
header->op = op->o_f_to_c_index;
|
||||||
header->tag = tag;
|
header->tag = tag;
|
||||||
|
|
||||||
ptr = (char *)(header + 1);
|
ptr = (char *)(header + 1);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
@ -1151,6 +1158,7 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count
|
|||||||
|
|
||||||
if (!is_long_msg) {
|
if (!is_long_msg) {
|
||||||
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC;
|
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC;
|
||||||
|
osc_pt2pt_hton(header, proc);
|
||||||
|
|
||||||
if (&ompi_mpi_op_no_op.op != op) {
|
if (&ompi_mpi_op_no_op.op != op) {
|
||||||
osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc, origin_count,
|
osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc, origin_count,
|
||||||
@ -1158,6 +1166,7 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC_LONG;
|
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC_LONG;
|
||||||
|
osc_pt2pt_hton(header, proc);
|
||||||
|
|
||||||
ret = ompi_osc_pt2pt_isend_w_cb (origin_addr, origin_count, origin_datatype, target_rank,
|
ret = ompi_osc_pt2pt_isend_w_cb (origin_addr, origin_count, origin_datatype, target_rank,
|
||||||
tag, module->comm, ompi_osc_pt2pt_req_comm_complete, pt2pt_request);
|
tag, module->comm, ompi_osc_pt2pt_req_comm_complete, pt2pt_request);
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2009-2011 Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2009-2011 Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
||||||
* Copyright (c) 2014 Research Organization for Information Science
|
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -561,7 +561,6 @@ static inline int process_get (ompi_osc_pt2pt_module_t* module, int target,
|
|||||||
static inline int osc_pt2pt_accumulate_buffer (void *target, void *source, size_t source_len, ompi_proc_t *proc,
|
static inline int osc_pt2pt_accumulate_buffer (void *target, void *source, size_t source_len, ompi_proc_t *proc,
|
||||||
int count, ompi_datatype_t *datatype, ompi_op_t *op)
|
int count, ompi_datatype_t *datatype, ompi_op_t *op)
|
||||||
{
|
{
|
||||||
void *buffer = source;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
assert (NULL != target && NULL != source);
|
assert (NULL != target && NULL != source);
|
||||||
@ -576,6 +575,7 @@ static inline int osc_pt2pt_accumulate_buffer (void *target, void *source, size_
|
|||||||
ompi_datatype_t *primitive_datatype = NULL;
|
ompi_datatype_t *primitive_datatype = NULL;
|
||||||
uint32_t primitive_count;
|
uint32_t primitive_count;
|
||||||
size_t buflen;
|
size_t buflen;
|
||||||
|
void *buffer;
|
||||||
|
|
||||||
ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
|
ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
|
||||||
primitive_count *= count;
|
primitive_count *= count;
|
||||||
@ -589,20 +589,19 @@ static inline int osc_pt2pt_accumulate_buffer (void *target, void *source, size_
|
|||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
}
|
}
|
||||||
|
|
||||||
osc_pt2pt_copy_on_recv (buffer, source, source_len, proc, count, datatype);
|
osc_pt2pt_copy_on_recv (buffer, source, source_len, proc, primitive_count, primitive_datatype);
|
||||||
}
|
|
||||||
|
ret = ompi_osc_base_process_op(target, buffer, source_len, datatype,
|
||||||
|
count, op);
|
||||||
|
|
||||||
|
free(buffer);
|
||||||
|
} else
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* copy the data from the temporary buffer into the user window */
|
/* copy the data from the temporary buffer into the user window */
|
||||||
ret = ompi_osc_base_process_op(target, buffer, source_len, datatype,
|
ret = ompi_osc_base_process_op(target, source, source_len, datatype,
|
||||||
count, op);
|
count, op);
|
||||||
|
|
||||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
|
||||||
if (proc->super.proc_arch != ompi_proc_local()->super.proc_arch) {
|
|
||||||
free(buffer);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -682,8 +681,19 @@ static int accumulate_cb (ompi_request_t *request)
|
|||||||
/* no more requests needed before the buffer can be accumulated */
|
/* no more requests needed before the buffer can be accumulated */
|
||||||
|
|
||||||
if (acc_data->source) {
|
if (acc_data->source) {
|
||||||
ret = osc_pt2pt_accumulate_buffer (acc_data->target, acc_data->source, acc_data->source_len,
|
ompi_datatype_t *primitive_datatype = NULL;
|
||||||
acc_data->proc, acc_data->count, acc_data->datatype, acc_data->op);
|
uint32_t primitive_count;
|
||||||
|
|
||||||
|
assert (NULL != acc_data->target && NULL != acc_data->source);
|
||||||
|
|
||||||
|
ompi_osc_base_get_primitive_type_info(acc_data->datatype, &primitive_datatype, &primitive_count);
|
||||||
|
primitive_count *= acc_data->count;
|
||||||
|
|
||||||
|
if (acc_data->op == &ompi_mpi_op_replace.op) {
|
||||||
|
ret = ompi_datatype_sndrcv(acc_data->source, primitive_count, primitive_datatype, acc_data->target, acc_data->count, acc_data->datatype);
|
||||||
|
} else {
|
||||||
|
ret = ompi_osc_base_process_op(acc_data->target, acc_data->source, acc_data->source_len, acc_data->datatype, acc_data->count, acc_data->op);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* drop the accumulate lock */
|
/* drop the accumulate lock */
|
||||||
@ -1215,6 +1225,7 @@ static inline int process_get_acc(ompi_osc_pt2pt_module_t *module, int source,
|
|||||||
struct ompi_datatype_t *datatype;
|
struct ompi_datatype_t *datatype;
|
||||||
void *buffer = NULL;
|
void *buffer = NULL;
|
||||||
uint64_t data_len;
|
uint64_t data_len;
|
||||||
|
ompi_proc_t * proc;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||||
@ -1222,7 +1233,7 @@ static inline int process_get_acc(ompi_osc_pt2pt_module_t *module, int source,
|
|||||||
ompi_comm_rank(module->comm),
|
ompi_comm_rank(module->comm),
|
||||||
source));
|
source));
|
||||||
|
|
||||||
ret = datatype_create (module, source, NULL, &datatype, (void **) &data);
|
ret = datatype_create (module, source, &proc, &datatype, (void **) &data);
|
||||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -1232,13 +1243,18 @@ static inline int process_get_acc(ompi_osc_pt2pt_module_t *module, int source,
|
|||||||
if (0 == ompi_osc_pt2pt_accumulate_trylock (module)) {
|
if (0 == ompi_osc_pt2pt_accumulate_trylock (module)) {
|
||||||
/* make a copy of the data since the buffer needs to be returned */
|
/* make a copy of the data since the buffer needs to be returned */
|
||||||
if (data_len) {
|
if (data_len) {
|
||||||
|
ompi_datatype_t *primitive_datatype = NULL;
|
||||||
|
uint32_t primitive_count;
|
||||||
buffer = malloc (data_len);
|
buffer = malloc (data_len);
|
||||||
if (OPAL_UNLIKELY(NULL == buffer)) {
|
if (OPAL_UNLIKELY(NULL == buffer)) {
|
||||||
OBJ_RELEASE(datatype);
|
OBJ_RELEASE(datatype);
|
||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy (buffer, data, data_len);
|
ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
|
||||||
|
primitive_count *= acc_header->count;
|
||||||
|
|
||||||
|
osc_pt2pt_copy_on_recv (buffer, data, data_len, proc, primitive_count, primitive_datatype);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = ompi_osc_pt2pt_gacc_start (module, source, buffer, data_len, datatype,
|
ret = ompi_osc_pt2pt_gacc_start (module, source, buffer, data_len, datatype,
|
||||||
@ -1543,6 +1559,7 @@ static inline int process_frag (ompi_osc_pt2pt_module_t *module,
|
|||||||
header->base.flags));
|
header->base.flags));
|
||||||
|
|
||||||
if (OPAL_LIKELY(!(header->base.flags & OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE))) {
|
if (OPAL_LIKELY(!(header->base.flags & OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE))) {
|
||||||
|
osc_pt2pt_ntoh(header);
|
||||||
switch (header->base.type) {
|
switch (header->base.type) {
|
||||||
case OMPI_OSC_PT2PT_HDR_TYPE_PUT:
|
case OMPI_OSC_PT2PT_HDR_TYPE_PUT:
|
||||||
ret = process_put(module, frag->source, &header->put);
|
ret = process_put(module, frag->source, &header->put);
|
||||||
@ -1617,8 +1634,8 @@ static inline int process_frag (ompi_osc_pt2pt_module_t *module,
|
|||||||
static int ompi_osc_pt2pt_callback (ompi_request_t *request)
|
static int ompi_osc_pt2pt_callback (ompi_request_t *request)
|
||||||
{
|
{
|
||||||
ompi_osc_pt2pt_module_t *module = (ompi_osc_pt2pt_module_t *) request->req_complete_cb_data;
|
ompi_osc_pt2pt_module_t *module = (ompi_osc_pt2pt_module_t *) request->req_complete_cb_data;
|
||||||
ompi_osc_pt2pt_header_base_t *base_header =
|
ompi_osc_pt2pt_header_t *base_header =
|
||||||
(ompi_osc_pt2pt_header_base_t *) module->incoming_buffer;
|
(ompi_osc_pt2pt_header_t *) module->incoming_buffer;
|
||||||
size_t incoming_length = request->req_status._ucount;
|
size_t incoming_length = request->req_status._ucount;
|
||||||
int source = request->req_status.MPI_SOURCE;
|
int source = request->req_status.MPI_SOURCE;
|
||||||
|
|
||||||
@ -1628,14 +1645,15 @@ static int ompi_osc_pt2pt_callback (ompi_request_t *request)
|
|||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||||
"received pt2pt callback for fragment. source = %d, count = %u, type = 0x%x",
|
"received pt2pt callback for fragment. source = %d, count = %u, type = 0x%x",
|
||||||
source, (unsigned) incoming_length, base_header->type));
|
source, (unsigned) incoming_length, base_header->base.type));
|
||||||
|
|
||||||
switch (base_header->type) {
|
osc_pt2pt_ntoh(base_header);
|
||||||
|
switch (base_header->base.type) {
|
||||||
case OMPI_OSC_PT2PT_HDR_TYPE_FRAG:
|
case OMPI_OSC_PT2PT_HDR_TYPE_FRAG:
|
||||||
process_frag(module, (ompi_osc_pt2pt_frag_header_t *) base_header);
|
process_frag(module, (ompi_osc_pt2pt_frag_header_t *) base_header);
|
||||||
|
|
||||||
/* only data fragments should be included in the completion counters */
|
/* only data fragments should be included in the completion counters */
|
||||||
mark_incoming_completion (module, (base_header->flags & OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET) ? source : MPI_PROC_NULL);
|
mark_incoming_completion (module, (base_header->base.flags & OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET) ? source : MPI_PROC_NULL);
|
||||||
break;
|
break;
|
||||||
case OMPI_OSC_PT2PT_HDR_TYPE_POST:
|
case OMPI_OSC_PT2PT_HDR_TYPE_POST:
|
||||||
(void) osc_pt2pt_incoming_post (module, source);
|
(void) osc_pt2pt_incoming_post (module, source);
|
||||||
@ -1652,7 +1670,7 @@ static int ompi_osc_pt2pt_callback (ompi_request_t *request)
|
|||||||
default:
|
default:
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||||
"received unexpected message of type %x",
|
"received unexpected message of type %x",
|
||||||
(int) base_header->type));
|
(int) base_header->base.type));
|
||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||||
|
@ -59,6 +59,7 @@ static int frag_send (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_frag_t *fr
|
|||||||
"osc pt2pt: frag_send called to %d, frag = %p, count = %d",
|
"osc pt2pt: frag_send called to %d, frag = %p, count = %d",
|
||||||
frag->target, (void *) frag, count));
|
frag->target, (void *) frag, count));
|
||||||
|
|
||||||
|
OSC_PT2PT_HTON(frag->header, module, frag->target);
|
||||||
return ompi_osc_pt2pt_isend_w_cb (frag->buffer, count, MPI_BYTE, frag->target, OSC_PT2PT_FRAG_TAG,
|
return ompi_osc_pt2pt_isend_w_cb (frag->buffer, count, MPI_BYTE, frag->target, OSC_PT2PT_FRAG_TAG,
|
||||||
module->comm, frag_send_cb, frag);
|
module->comm, frag_send_cb, frag);
|
||||||
}
|
}
|
||||||
|
@ -13,6 +13,8 @@
|
|||||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
||||||
|
* Copyright (c) 2015 Research Organization for Information Science
|
||||||
|
* and Technology (RIST). All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -28,6 +30,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "opal/types.h"
|
#include "opal/types.h"
|
||||||
|
#include "opal/util/arch.h"
|
||||||
|
|
||||||
enum ompi_osc_pt2pt_hdr_type_t {
|
enum ompi_osc_pt2pt_hdr_type_t {
|
||||||
OMPI_OSC_PT2PT_HDR_TYPE_PUT = 0x01,
|
OMPI_OSC_PT2PT_HDR_TYPE_PUT = 0x01,
|
||||||
@ -79,9 +82,9 @@ struct ompi_osc_pt2pt_header_acc_t {
|
|||||||
|
|
||||||
uint16_t tag;
|
uint16_t tag;
|
||||||
uint32_t count;
|
uint32_t count;
|
||||||
uint32_t op;
|
|
||||||
uint64_t len;
|
uint64_t len;
|
||||||
uint64_t displacement;
|
uint64_t displacement;
|
||||||
|
uint32_t op;
|
||||||
};
|
};
|
||||||
typedef struct ompi_osc_pt2pt_header_acc_t ompi_osc_pt2pt_header_acc_t;
|
typedef struct ompi_osc_pt2pt_header_acc_t ompi_osc_pt2pt_header_acc_t;
|
||||||
|
|
||||||
@ -97,6 +100,9 @@ typedef struct ompi_osc_pt2pt_header_get_t ompi_osc_pt2pt_header_get_t;
|
|||||||
|
|
||||||
struct ompi_osc_pt2pt_header_complete_t {
|
struct ompi_osc_pt2pt_header_complete_t {
|
||||||
ompi_osc_pt2pt_header_base_t base;
|
ompi_osc_pt2pt_header_base_t base;
|
||||||
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
uint8_t padding[2];
|
||||||
|
#endif
|
||||||
int frag_count;
|
int frag_count;
|
||||||
};
|
};
|
||||||
typedef struct ompi_osc_pt2pt_header_complete_t ompi_osc_pt2pt_header_complete_t;
|
typedef struct ompi_osc_pt2pt_header_complete_t ompi_osc_pt2pt_header_complete_t;
|
||||||
@ -105,7 +111,6 @@ struct ompi_osc_pt2pt_header_cswap_t {
|
|||||||
ompi_osc_pt2pt_header_base_t base;
|
ompi_osc_pt2pt_header_base_t base;
|
||||||
|
|
||||||
uint16_t tag;
|
uint16_t tag;
|
||||||
|
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
uint64_t displacement;
|
uint64_t displacement;
|
||||||
};
|
};
|
||||||
@ -119,6 +124,9 @@ typedef struct ompi_osc_pt2pt_header_post_t ompi_osc_pt2pt_header_post_t;
|
|||||||
|
|
||||||
struct ompi_osc_pt2pt_header_lock_t {
|
struct ompi_osc_pt2pt_header_lock_t {
|
||||||
ompi_osc_pt2pt_header_base_t base;
|
ompi_osc_pt2pt_header_base_t base;
|
||||||
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
uint8_t padding[2];
|
||||||
|
#endif
|
||||||
int32_t lock_type;
|
int32_t lock_type;
|
||||||
uint64_t lock_ptr;
|
uint64_t lock_ptr;
|
||||||
};
|
};
|
||||||
@ -134,20 +142,29 @@ typedef struct ompi_osc_pt2pt_header_lock_ack_t ompi_osc_pt2pt_header_lock_ack_t
|
|||||||
|
|
||||||
struct ompi_osc_pt2pt_header_unlock_t {
|
struct ompi_osc_pt2pt_header_unlock_t {
|
||||||
ompi_osc_pt2pt_header_base_t base;
|
ompi_osc_pt2pt_header_base_t base;
|
||||||
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
uint8_t padding[2];
|
||||||
|
#endif
|
||||||
int32_t lock_type;
|
int32_t lock_type;
|
||||||
uint32_t frag_count;
|
|
||||||
uint64_t lock_ptr;
|
uint64_t lock_ptr;
|
||||||
|
uint32_t frag_count;
|
||||||
};
|
};
|
||||||
typedef struct ompi_osc_pt2pt_header_unlock_t ompi_osc_pt2pt_header_unlock_t;
|
typedef struct ompi_osc_pt2pt_header_unlock_t ompi_osc_pt2pt_header_unlock_t;
|
||||||
|
|
||||||
struct ompi_osc_pt2pt_header_unlock_ack_t {
|
struct ompi_osc_pt2pt_header_unlock_ack_t {
|
||||||
ompi_osc_pt2pt_header_base_t base;
|
ompi_osc_pt2pt_header_base_t base;
|
||||||
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
uint8_t padding[6];
|
||||||
|
#endif
|
||||||
uint64_t lock_ptr;
|
uint64_t lock_ptr;
|
||||||
};
|
};
|
||||||
typedef struct ompi_osc_pt2pt_header_unlock_ack_t ompi_osc_pt2pt_header_unlock_ack_t;
|
typedef struct ompi_osc_pt2pt_header_unlock_ack_t ompi_osc_pt2pt_header_unlock_ack_t;
|
||||||
|
|
||||||
struct ompi_osc_pt2pt_header_flush_t {
|
struct ompi_osc_pt2pt_header_flush_t {
|
||||||
ompi_osc_pt2pt_header_base_t base;
|
ompi_osc_pt2pt_header_base_t base;
|
||||||
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
uint8_t padding[2];
|
||||||
|
#endif
|
||||||
uint32_t frag_count;
|
uint32_t frag_count;
|
||||||
uint64_t serial_number;
|
uint64_t serial_number;
|
||||||
};
|
};
|
||||||
@ -155,6 +172,9 @@ typedef struct ompi_osc_pt2pt_header_flush_t ompi_osc_pt2pt_header_flush_t;
|
|||||||
|
|
||||||
struct ompi_osc_pt2pt_header_flush_ack_t {
|
struct ompi_osc_pt2pt_header_flush_ack_t {
|
||||||
ompi_osc_pt2pt_header_base_t base;
|
ompi_osc_pt2pt_header_base_t base;
|
||||||
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
uint8_t padding[6];
|
||||||
|
#endif
|
||||||
uint64_t serial_number;
|
uint64_t serial_number;
|
||||||
};
|
};
|
||||||
typedef struct ompi_osc_pt2pt_header_flush_ack_t ompi_osc_pt2pt_header_flush_ack_t;
|
typedef struct ompi_osc_pt2pt_header_flush_ack_t ompi_osc_pt2pt_header_flush_ack_t;
|
||||||
@ -186,4 +206,248 @@ union ompi_osc_pt2pt_header_t {
|
|||||||
};
|
};
|
||||||
typedef union ompi_osc_pt2pt_header_t ompi_osc_pt2pt_header_t;
|
typedef union ompi_osc_pt2pt_header_t ompi_osc_pt2pt_header_t;
|
||||||
|
|
||||||
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
#define MCA_OSC_PT2PT_FRAG_HDR_NTOH(h) \
|
||||||
|
(h).windx = ntohs((h).windx); \
|
||||||
|
(h).source = ntohl((h).source); \
|
||||||
|
(h).num_ops = ntohl((h).num_ops); \
|
||||||
|
(h).pad = ntohl((h).pad);
|
||||||
|
#define MCA_OSC_PT2PT_FRAG_HDR_HTON(h) \
|
||||||
|
(h).windx = htons((h).windx); \
|
||||||
|
(h).source = htonl((h).source); \
|
||||||
|
(h).num_ops = htonl((h).num_ops); \
|
||||||
|
(h).pad = htonl((h).pad);
|
||||||
|
|
||||||
|
#define MCA_OSC_PT2PT_PUT_HDR_NTOH(h) \
|
||||||
|
(h).tag = ntohs((h).tag); \
|
||||||
|
(h).count = ntohl((h).count); \
|
||||||
|
(h).len = ntoh64((h).len); \
|
||||||
|
(h).displacement = ntoh64((h).displacement);
|
||||||
|
#define MCA_OSC_PT2PT_PUT_HDR_HTON(h) \
|
||||||
|
(h).tag = htons((h).tag); \
|
||||||
|
(h).count = htonl((h).count); \
|
||||||
|
(h).len = hton64((h).len); \
|
||||||
|
(h).displacement = hton64((h).displacement);
|
||||||
|
|
||||||
|
#define MCA_OSC_PT2PT_GET_HDR_NTOH(h) \
|
||||||
|
(h).tag = ntohs((h).tag); \
|
||||||
|
(h).count = ntohl((h).count); \
|
||||||
|
(h).len = ntoh64((h).len); \
|
||||||
|
(h).displacement = ntoh64((h).displacement);
|
||||||
|
#define MCA_OSC_PT2PT_GET_HDR_HTON(h) \
|
||||||
|
(h).tag = htons((h).tag); \
|
||||||
|
(h).count = htonl((h).count); \
|
||||||
|
(h).len = hton64((h).len); \
|
||||||
|
(h).displacement = hton64((h).displacement);
|
||||||
|
|
||||||
|
#define MCA_OSC_PT2PT_ACC_HDR_NTOH(h) \
|
||||||
|
(h).tag = ntohs((h).tag); \
|
||||||
|
(h).count = ntohl((h).count); \
|
||||||
|
(h).len = ntoh64((h).len); \
|
||||||
|
(h).displacement = ntoh64((h).displacement);\
|
||||||
|
(h).op = ntohl((h).op);
|
||||||
|
#define MCA_OSC_PT2PT_ACC_HDR_HTON(h) \
|
||||||
|
(h).tag = htons((h).tag); \
|
||||||
|
(h).count = htonl((h).count); \
|
||||||
|
(h).len = hton64((h).len); \
|
||||||
|
(h).displacement = hton64((h).displacement);\
|
||||||
|
(h).op = htonl((h).op);
|
||||||
|
|
||||||
|
#define MCA_OSC_PT2PT_LOCK_HDR_NTOH(h) \
|
||||||
|
(h).lock_type = ntohl((h).lock_type); \
|
||||||
|
(h).lock_ptr = ntoh64((h).lock_ptr)
|
||||||
|
#define MCA_OSC_PT2PT_LOCK_HDR_HTON(h) \
|
||||||
|
(h).lock_type = htonl((h).lock_type); \
|
||||||
|
(h).lock_ptr = hton64((h).lock_ptr)
|
||||||
|
|
||||||
|
#define MCA_OSC_PT2PT_UNLOCK_HDR_NTOH(h) \
|
||||||
|
(h).lock_type = ntohl((h).lock_type); \
|
||||||
|
(h).lock_ptr = ntoh64((h).lock_ptr); \
|
||||||
|
(h).frag_count = ntohl((h).frag_count)
|
||||||
|
#define MCA_OSC_PT2PT_UNLOCK_HDR_HTON(h) \
|
||||||
|
(h).lock_type = htonl((h).lock_type); \
|
||||||
|
(h).lock_ptr = hton64((h).lock_ptr); \
|
||||||
|
(h).frag_count = htonl((h).frag_count)
|
||||||
|
|
||||||
|
#define MCA_OSC_PT2PT_LOCK_ACK_HDR_NTOH(h) \
|
||||||
|
(h).windx = ntohs((h).windx); \
|
||||||
|
(h).source = ntohl((h).source); \
|
||||||
|
(h).lock_ptr = ntoh64((h).lock_ptr)
|
||||||
|
#define MCA_OSC_PT2PT_LOCK_ACK_HDR_HTON(h) \
|
||||||
|
(h).windx = htonl((h).windx); \
|
||||||
|
(h).source= htonl((h).source); \
|
||||||
|
(h).lock_ptr = hton64((h).lock_ptr)
|
||||||
|
|
||||||
|
#define MCA_OSC_PT2PT_UNLOCK_ACK_HDR_NTOH(h) \
|
||||||
|
(h).lock_ptr = ntoh64((h).lock_ptr);
|
||||||
|
#define MCA_OSC_PT2PT_UNLOCK_ACK_HDR_HTON(h) \
|
||||||
|
(h).lock_ptr = hton64((h).lock_ptr);
|
||||||
|
|
||||||
|
#define MCA_OSC_PT2PT_COMPLETE_HDR_NTOH(h) \
|
||||||
|
(h).frag_count = ntohl((h).frag_count)
|
||||||
|
#define MCA_OSC_PT2PT_COMPLETE_HDR_HTON(h) \
|
||||||
|
(h).frag_count = htonl((h).frag_count)
|
||||||
|
|
||||||
|
#define MCA_OSC_PT2PT_FLUSH_HDR_NTOH(h) \
|
||||||
|
(h).frag_count = ntohl((h).frag_count); \
|
||||||
|
(h).serial_number = ntoh64((h).serial_number)
|
||||||
|
#define MCA_OSC_PT2PT_FLUSH_HDR_HTON(h) \
|
||||||
|
(h).frag_count = htonl((h).frag_count); \
|
||||||
|
(h).serial_number = ntoh64((h).serial_number)
|
||||||
|
|
||||||
|
#define MCA_OSC_PT2PT_FLUSH_ACK_HDR_NTOH(h) \
|
||||||
|
(h).serial_number = ntoh64((h).serial_number)
|
||||||
|
#define MCA_OSC_PT2PT_FLUSH_ACK_HDR_HTON(h) \
|
||||||
|
(h).serial_number = ntoh64((h).serial_number)
|
||||||
|
|
||||||
|
#define MCA_OSC_PT2PT_POST_HDR_NTOH(h) \
|
||||||
|
(h).windx = ntohs((h).windx)
|
||||||
|
#define MCA_OSC_PT2PT_POST_HDR_HTON(h) \
|
||||||
|
(h).windx = htons((h).windx)
|
||||||
|
|
||||||
|
#define MCA_OSC_PT2PT_CSWAP_HDR_NTOH(h) \
|
||||||
|
(h).tag = ntohs((h).tag); \
|
||||||
|
(h).len = ntohl((h).len); \
|
||||||
|
(h).displacement = ntoh64((h).displacement)
|
||||||
|
#define MCA_OSC_PT2PT_CSWAP_HDR_HTON(h) \
|
||||||
|
(h).tag = htons((h).tag); \
|
||||||
|
(h).len = htonl((h).len); \
|
||||||
|
(h).displacement = hton64((h).displacement)
|
||||||
|
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
|
||||||
|
|
||||||
|
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
static inline __opal_attribute_always_inline__ void
|
||||||
|
osc_pt2pt_ntoh(ompi_osc_pt2pt_header_t *hdr)
|
||||||
|
{
|
||||||
|
if(!(hdr->base.flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO))
|
||||||
|
return;
|
||||||
|
|
||||||
|
switch(hdr->base.type) {
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_PUT:
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_PUT_LONG:
|
||||||
|
MCA_OSC_PT2PT_PUT_HDR_NTOH(hdr->put);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_ACC:
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG:
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC:
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC_LONG:
|
||||||
|
MCA_OSC_PT2PT_ACC_HDR_NTOH(hdr->acc);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_GET:
|
||||||
|
MCA_OSC_PT2PT_GET_HDR_NTOH(hdr->get);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_CSWAP:
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_CSWAP_LONG:
|
||||||
|
MCA_OSC_PT2PT_CSWAP_HDR_NTOH(hdr->cswap);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_COMPLETE:
|
||||||
|
MCA_OSC_PT2PT_COMPLETE_HDR_NTOH(hdr->complete);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_POST:
|
||||||
|
MCA_OSC_PT2PT_POST_HDR_NTOH(hdr->post);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ:
|
||||||
|
MCA_OSC_PT2PT_LOCK_HDR_NTOH(hdr->lock);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_ACK:
|
||||||
|
MCA_OSC_PT2PT_LOCK_ACK_HDR_NTOH(hdr->lock_ack);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ:
|
||||||
|
MCA_OSC_PT2PT_UNLOCK_HDR_NTOH(hdr->unlock);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_ACK:
|
||||||
|
MCA_OSC_PT2PT_UNLOCK_ACK_HDR_NTOH(hdr->unlock_ack);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_REQ:
|
||||||
|
MCA_OSC_PT2PT_FLUSH_HDR_NTOH(hdr->flush);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_ACK:
|
||||||
|
MCA_OSC_PT2PT_FLUSH_ACK_HDR_NTOH(hdr->flush_ack);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_FRAG:
|
||||||
|
MCA_OSC_PT2PT_FRAG_HDR_NTOH(hdr->frag);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define osc_pt2pt_ntoh(h) \
|
||||||
|
do { } while (0)
|
||||||
|
#endif /* !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
|
||||||
|
|
||||||
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
|
#define osc_pt2pt_hton(h, p) \
|
||||||
|
osc_pt2pt_hton_intr((ompi_osc_pt2pt_header_t *)(h), (p));
|
||||||
|
static inline __opal_attribute_always_inline__ void
|
||||||
|
osc_pt2pt_hton_intr(ompi_osc_pt2pt_header_t *hdr, const ompi_proc_t *proc)
|
||||||
|
{
|
||||||
|
#ifdef WORDS_BIGENDIAN
|
||||||
|
hdr->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_NBO;
|
||||||
|
#else
|
||||||
|
if(!(proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN))
|
||||||
|
return;
|
||||||
|
|
||||||
|
hdr->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_NBO;
|
||||||
|
switch(hdr->base.type) {
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_PUT:
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_PUT_LONG:
|
||||||
|
MCA_OSC_PT2PT_PUT_HDR_HTON(hdr->put);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_ACC:
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG:
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC:
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC_LONG:
|
||||||
|
MCA_OSC_PT2PT_ACC_HDR_HTON(hdr->acc);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_GET:
|
||||||
|
MCA_OSC_PT2PT_GET_HDR_HTON(hdr->get);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_CSWAP:
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_CSWAP_LONG:
|
||||||
|
MCA_OSC_PT2PT_CSWAP_HDR_HTON(hdr->cswap);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_COMPLETE:
|
||||||
|
MCA_OSC_PT2PT_COMPLETE_HDR_HTON(hdr->complete);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_POST:
|
||||||
|
MCA_OSC_PT2PT_POST_HDR_HTON(hdr->post);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ:
|
||||||
|
MCA_OSC_PT2PT_LOCK_HDR_HTON(hdr->lock);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_ACK:
|
||||||
|
MCA_OSC_PT2PT_LOCK_ACK_HDR_HTON(hdr->lock_ack);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ:
|
||||||
|
MCA_OSC_PT2PT_UNLOCK_HDR_HTON(hdr->unlock);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_ACK:
|
||||||
|
MCA_OSC_PT2PT_UNLOCK_ACK_HDR_HTON(hdr->unlock_ack);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_REQ:
|
||||||
|
MCA_OSC_PT2PT_FLUSH_HDR_HTON(hdr->flush);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_ACK:
|
||||||
|
MCA_OSC_PT2PT_FLUSH_ACK_HDR_HTON(hdr->flush_ack);
|
||||||
|
break;
|
||||||
|
case OMPI_OSC_PT2PT_HDR_TYPE_FRAG:
|
||||||
|
MCA_OSC_PT2PT_FRAG_HDR_HTON(hdr->frag);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif /* WORDS_BIGENDIAN */
|
||||||
|
}
|
||||||
|
#define OSC_PT2PT_HTON(h, m, r) \
|
||||||
|
osc_pt2pt_hton_intr((ompi_osc_pt2pt_header_t *)(h), ompi_comm_peer_lookup((m)->comm, (r)));
|
||||||
|
#else
|
||||||
|
#define osc_pt2pt_hton(h, p) \
|
||||||
|
do { } while (0)
|
||||||
|
#define OSC_PT2PT_HTON(h, m, r) \
|
||||||
|
do { } while (0)
|
||||||
|
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
|
||||||
|
|
||||||
#endif /* OMPI_MCA_OSC_PT2PT_HDR_H */
|
#endif /* OMPI_MCA_OSC_PT2PT_HDR_H */
|
||||||
|
@ -13,6 +13,8 @@
|
|||||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||||
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
|
||||||
* Copyright (c) 2015 Intel, Inc. All rights reserved.
|
* Copyright (c) 2015 Intel, Inc. All rights reserved.
|
||||||
|
* Copyright (c) 2015 Research Organization for Information Science
|
||||||
|
* and Technology (RIST). All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -170,13 +172,19 @@ static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module,
|
|||||||
static inline int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_outstanding_lock_t *lock)
|
static inline int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_outstanding_lock_t *lock)
|
||||||
{
|
{
|
||||||
ompi_osc_pt2pt_header_lock_t lock_req;
|
ompi_osc_pt2pt_header_lock_t lock_req;
|
||||||
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/* generate a lock request */
|
/* generate a lock request */
|
||||||
lock_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ;
|
lock_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ;
|
||||||
lock_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID | OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET;
|
lock_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID | OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET;
|
||||||
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||||
|
lock_req.padding[0] = 0;
|
||||||
|
lock_req.padding[1] = 0;
|
||||||
|
#endif
|
||||||
lock_req.lock_type = lock->type;
|
lock_req.lock_type = lock->type;
|
||||||
lock_req.lock_ptr = (uint64_t) (uintptr_t) lock;
|
lock_req.lock_ptr = (uint64_t) (uintptr_t) lock;
|
||||||
|
OSC_PT2PT_HTON(&lock_req, module, target);
|
||||||
|
|
||||||
ret = ompi_osc_pt2pt_control_send (module, target, &lock_req, sizeof (lock_req));
|
ret = ompi_osc_pt2pt_control_send (module, target, &lock_req, sizeof (lock_req));
|
||||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||||
@ -197,9 +205,14 @@ static inline int ompi_osc_pt2pt_unlock_remote (ompi_osc_pt2pt_module_t *module,
|
|||||||
|
|
||||||
unlock_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ;
|
unlock_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ;
|
||||||
unlock_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID | OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET;
|
unlock_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID | OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET;
|
||||||
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||||
|
unlock_req.padding[0] = 0;
|
||||||
|
unlock_req.padding[1] = 0;
|
||||||
|
#endif
|
||||||
unlock_req.frag_count = frag_count;
|
unlock_req.frag_count = frag_count;
|
||||||
unlock_req.lock_type = lock->type;
|
unlock_req.lock_type = lock->type;
|
||||||
unlock_req.lock_ptr = (uint64_t) (uintptr_t) lock;
|
unlock_req.lock_ptr = (uint64_t) (uintptr_t) lock;
|
||||||
|
OSC_PT2PT_HTON(&unlock_req, module, target);
|
||||||
|
|
||||||
if (peer->active_frag && peer->active_frag->remain_len < sizeof (unlock_req)) {
|
if (peer->active_frag && peer->active_frag->remain_len < sizeof (unlock_req)) {
|
||||||
/* the peer should expect one more packet */
|
/* the peer should expect one more packet */
|
||||||
@ -239,6 +252,7 @@ static inline int ompi_osc_pt2pt_flush_remote (ompi_osc_pt2pt_module_t *module,
|
|||||||
target, flush_req.frag_count));
|
target, flush_req.frag_count));
|
||||||
|
|
||||||
/* send control message with unlock request and count */
|
/* send control message with unlock request and count */
|
||||||
|
OSC_PT2PT_HTON(&flush_req, module, target);
|
||||||
ret = ompi_osc_pt2pt_control_send (module, target, &flush_req, sizeof (flush_req));
|
ret = ompi_osc_pt2pt_control_send (module, target, &flush_req, sizeof (flush_req));
|
||||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||||
return ret;
|
return ret;
|
||||||
@ -682,6 +696,7 @@ static inline int activate_lock (ompi_osc_pt2pt_module_t *module, int requestor,
|
|||||||
lock_ack.source = ompi_comm_rank(module->comm);
|
lock_ack.source = ompi_comm_rank(module->comm);
|
||||||
lock_ack.windx = ompi_comm_get_cid(module->comm);
|
lock_ack.windx = ompi_comm_get_cid(module->comm);
|
||||||
lock_ack.lock_ptr = lock_ptr;
|
lock_ack.lock_ptr = lock_ptr;
|
||||||
|
OSC_PT2PT_HTON(&lock_ack, module, requestor);
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||||
"osc pt2pt: sending lock to %d", requestor));
|
"osc pt2pt: sending lock to %d", requestor));
|
||||||
@ -904,7 +919,16 @@ int ompi_osc_pt2pt_process_unlock (ompi_osc_pt2pt_module_t *module, int source,
|
|||||||
|
|
||||||
unlock_ack.base.type = OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_ACK;
|
unlock_ack.base.type = OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_ACK;
|
||||||
unlock_ack.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
|
unlock_ack.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
|
||||||
|
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||||
|
unlock_ack.padding[0] = 0;
|
||||||
|
unlock_ack.padding[1] = 0;
|
||||||
|
unlock_ack.padding[2] = 0;
|
||||||
|
unlock_ack.padding[3] = 0;
|
||||||
|
unlock_ack.padding[4] = 0;
|
||||||
|
unlock_ack.padding[5] = 0;
|
||||||
|
#endif
|
||||||
unlock_ack.lock_ptr = unlock_header->lock_ptr;
|
unlock_ack.lock_ptr = unlock_header->lock_ptr;
|
||||||
|
OSC_PT2PT_HTON(&unlock_ack, module, source);
|
||||||
|
|
||||||
ret = ompi_osc_pt2pt_control_send_unbuffered (module, source, &unlock_ack, sizeof (unlock_ack));
|
ret = ompi_osc_pt2pt_control_send_unbuffered (module, source, &unlock_ack, sizeof (unlock_ack));
|
||||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||||
@ -942,6 +966,7 @@ int ompi_osc_pt2pt_process_flush (ompi_osc_pt2pt_module_t *module, int source,
|
|||||||
flush_ack.base.type = OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_ACK;
|
flush_ack.base.type = OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_ACK;
|
||||||
flush_ack.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
|
flush_ack.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
|
||||||
flush_ack.serial_number = flush_header->serial_number;
|
flush_ack.serial_number = flush_header->serial_number;
|
||||||
|
OSC_PT2PT_HTON(&flush_ack, module, source);
|
||||||
|
|
||||||
return ompi_osc_pt2pt_control_send_unbuffered (module, source, &flush_ack, sizeof (flush_ack));
|
return ompi_osc_pt2pt_control_send_unbuffered (module, source, &flush_ack, sizeof (flush_ack));
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user