1
1

osc/pt2pt: enable heterogeneous support

Этот коммит содержится в:
Gilles Gouaillardet 2015-05-08 13:53:03 +09:00
родитель c4ebdba035
Коммит 1488e82efd
6 изменённых файлов: 356 добавлений и 31 удалений

Просмотреть файл

@ -12,6 +12,8 @@
* reserved. * reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -274,7 +276,6 @@ ompi_osc_pt2pt_complete(ompi_win_t *win)
int i; int i;
int *ranks = NULL; int *ranks = NULL;
ompi_group_t *group; ompi_group_t *group;
int my_rank = ompi_comm_rank (module->comm);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_pt2pt_complete entering...")); "ompi_osc_pt2pt_complete entering..."));
@ -307,7 +308,8 @@ ompi_osc_pt2pt_complete(ompi_win_t *win)
At the same time, clean out the outgoing count for the next At the same time, clean out the outgoing count for the next
round. */ round. */
for (i = 0 ; i < ompi_group_size(module->sc_group) ; ++i) { for (i = 0 ; i < ompi_group_size(module->sc_group) ; ++i) {
if (my_rank == ranks[i]) { ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, ranks[i]);
if (ompi_proc_local() == proc) {
/* shortcut for self */ /* shortcut for self */
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_complete self complete")); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_complete self complete"));
module->num_complete_msgs++; module->num_complete_msgs++;
@ -316,7 +318,12 @@ ompi_osc_pt2pt_complete(ompi_win_t *win)
complete_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_COMPLETE; complete_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_COMPLETE;
complete_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID; complete_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
complete_req.padding[0] = 0;
complete_req.padding[1] = 0;
#endif
complete_req.frag_count = module->epoch_outgoing_frag_count[ranks[i]]; complete_req.frag_count = module->epoch_outgoing_frag_count[ranks[i]];
osc_pt2pt_hton(&complete_req, proc);
peer = module->peers + ranks[i]; peer = module->peers + ranks[i];
@ -388,7 +395,6 @@ ompi_osc_pt2pt_post(ompi_group_t *group,
int ret = OMPI_SUCCESS; int ret = OMPI_SUCCESS;
ompi_osc_pt2pt_module_t *module = GET_MODULE(win); ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
ompi_osc_pt2pt_header_post_t post_req; ompi_osc_pt2pt_header_post_t post_req;
int my_rank = ompi_comm_rank(module->comm);
/* can't check for all access epoch here due to fence */ /* can't check for all access epoch here due to fence */
if (module->pw_group) { if (module->pw_group) {
@ -430,17 +436,19 @@ ompi_osc_pt2pt_post(ompi_group_t *group,
/* send a hello counter to everyone in group */ /* send a hello counter to everyone in group */
for (int i = 0 ; i < ompi_group_size(module->pw_group) ; ++i) { for (int i = 0 ; i < ompi_group_size(module->pw_group) ; ++i) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "Sending post message to rank %d", ranks[i])); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "Sending post message to rank %d", ranks[i]));
ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, ranks[i]);
/* shortcut for self */ /* shortcut for self */
if (my_rank == ranks[i]) { if (ompi_proc_local() == proc) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_complete self post")); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_complete self post"));
osc_pt2pt_incoming_post (module, my_rank); osc_pt2pt_incoming_post (module, ompi_comm_rank(module->comm));
continue; continue;
} }
post_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_POST; post_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_POST;
post_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID; post_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
post_req.windx = ompi_comm_get_cid(module->comm); post_req.windx = ompi_comm_get_cid(module->comm);
osc_pt2pt_hton(&post_req, proc);
/* we don't want to send any data, since we're the exposure /* we don't want to send any data, since we're the exposure
epoch only, so use an unbuffered send */ epoch only, so use an unbuffered send */

Просмотреть файл

@ -12,6 +12,8 @@
* reserved. * reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -403,6 +405,7 @@ static inline int ompi_osc_pt2pt_put_w_req (void *origin_addr, int origin_count,
if (!is_long_msg) { if (!is_long_msg) {
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_PUT; header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_PUT;
osc_pt2pt_hton(header, proc);
osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc, origin_count, osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc, origin_count,
origin_dt); origin_dt);
@ -414,8 +417,8 @@ static inline int ompi_osc_pt2pt_put_w_req (void *origin_addr, int origin_count,
} }
} else { } else {
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_PUT_LONG; header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_PUT_LONG;
header->tag = tag; header->tag = tag;
osc_pt2pt_hton(header, proc);
/* increase the outgoing signal count */ /* increase the outgoing signal count */
ompi_osc_signal_outgoing (module, target, 1); ompi_osc_signal_outgoing (module, target, 1);
@ -580,6 +583,7 @@ ompi_osc_pt2pt_accumulate_w_req (void *origin_addr, int origin_count,
if (!is_long_msg) { if (!is_long_msg) {
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC; header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC;
osc_pt2pt_hton(header, proc);
osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc, osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc,
origin_count, origin_dt); origin_count, origin_dt);
@ -591,8 +595,8 @@ ompi_osc_pt2pt_accumulate_w_req (void *origin_addr, int origin_count,
} }
} else { } else {
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG; header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG;
header->tag = tag; header->tag = tag;
osc_pt2pt_hton(header, proc);
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"acc: starting long accumulate with tag %d", tag)); "acc: starting long accumulate with tag %d", tag));
@ -708,6 +712,7 @@ int ompi_osc_pt2pt_compare_and_swap (void *origin_addr, void *compare_addr,
header->len = frag_len; header->len = frag_len;
header->displacement = target_disp; header->displacement = target_disp;
header->tag = tag; header->tag = tag;
osc_pt2pt_hton(header, proc);
ptr += sizeof(ompi_osc_pt2pt_header_cswap_t); ptr += sizeof(ompi_osc_pt2pt_header_cswap_t);
ret = ompi_datatype_get_pack_description(dt, &packed_ddt); ret = ompi_datatype_get_pack_description(dt, &packed_ddt);
@ -880,6 +885,7 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co
header->count = target_count; header->count = target_count;
header->displacement = target_disp; header->displacement = target_disp;
header->tag = tag; header->tag = tag;
OSC_PT2PT_HTON(header, module, target);
ptr += sizeof(ompi_osc_pt2pt_header_get_t); ptr += sizeof(ompi_osc_pt2pt_header_get_t);
do { do {
@ -1115,6 +1121,7 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count
header->displacement = target_disp; header->displacement = target_disp;
header->op = op->o_f_to_c_index; header->op = op->o_f_to_c_index;
header->tag = tag; header->tag = tag;
ptr = (char *)(header + 1); ptr = (char *)(header + 1);
do { do {
@ -1151,6 +1158,7 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count
if (!is_long_msg) { if (!is_long_msg) {
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC; header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC;
osc_pt2pt_hton(header, proc);
if (&ompi_mpi_op_no_op.op != op) { if (&ompi_mpi_op_no_op.op != op) {
osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc, origin_count, osc_pt2pt_copy_for_send (ptr, payload_len, origin_addr, proc, origin_count,
@ -1158,6 +1166,7 @@ int ompi_osc_pt2pt_rget_accumulate_internal (void *origin_addr, int origin_count
} }
} else { } else {
header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC_LONG; header->base.type = OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC_LONG;
osc_pt2pt_hton(header, proc);
ret = ompi_osc_pt2pt_isend_w_cb (origin_addr, origin_count, origin_datatype, target_rank, ret = ompi_osc_pt2pt_isend_w_cb (origin_addr, origin_count, origin_datatype, target_rank,
tag, module->comm, ompi_osc_pt2pt_req_comm_complete, pt2pt_request); tag, module->comm, ompi_osc_pt2pt_req_comm_complete, pt2pt_request);

Просмотреть файл

@ -12,7 +12,7 @@
* reserved. * reserved.
* Copyright (c) 2009-2011 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2009-2011 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science * Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -561,7 +561,6 @@ static inline int process_get (ompi_osc_pt2pt_module_t* module, int target,
static inline int osc_pt2pt_accumulate_buffer (void *target, void *source, size_t source_len, ompi_proc_t *proc, static inline int osc_pt2pt_accumulate_buffer (void *target, void *source, size_t source_len, ompi_proc_t *proc,
int count, ompi_datatype_t *datatype, ompi_op_t *op) int count, ompi_datatype_t *datatype, ompi_op_t *op)
{ {
void *buffer = source;
int ret; int ret;
assert (NULL != target && NULL != source); assert (NULL != target && NULL != source);
@ -576,6 +575,7 @@ static inline int osc_pt2pt_accumulate_buffer (void *target, void *source, size_
ompi_datatype_t *primitive_datatype = NULL; ompi_datatype_t *primitive_datatype = NULL;
uint32_t primitive_count; uint32_t primitive_count;
size_t buflen; size_t buflen;
void *buffer;
ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count); ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
primitive_count *= count; primitive_count *= count;
@ -589,20 +589,19 @@ static inline int osc_pt2pt_accumulate_buffer (void *target, void *source, size_
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
osc_pt2pt_copy_on_recv (buffer, source, source_len, proc, count, datatype); osc_pt2pt_copy_on_recv (buffer, source, source_len, proc, primitive_count, primitive_datatype);
}
#endif
/* copy the data from the temporary buffer into the user window */
ret = ompi_osc_base_process_op(target, buffer, source_len, datatype, ret = ompi_osc_base_process_op(target, buffer, source_len, datatype,
count, op); count, op);
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if (proc->super.proc_arch != ompi_proc_local()->super.proc_arch) {
free(buffer); free(buffer);
} } else
#endif #endif
/* copy the data from the temporary buffer into the user window */
ret = ompi_osc_base_process_op(target, source, source_len, datatype,
count, op);
return ret; return ret;
} }
@ -682,8 +681,19 @@ static int accumulate_cb (ompi_request_t *request)
/* no more requests needed before the buffer can be accumulated */ /* no more requests needed before the buffer can be accumulated */
if (acc_data->source) { if (acc_data->source) {
ret = osc_pt2pt_accumulate_buffer (acc_data->target, acc_data->source, acc_data->source_len, ompi_datatype_t *primitive_datatype = NULL;
acc_data->proc, acc_data->count, acc_data->datatype, acc_data->op); uint32_t primitive_count;
assert (NULL != acc_data->target && NULL != acc_data->source);
ompi_osc_base_get_primitive_type_info(acc_data->datatype, &primitive_datatype, &primitive_count);
primitive_count *= acc_data->count;
if (acc_data->op == &ompi_mpi_op_replace.op) {
ret = ompi_datatype_sndrcv(acc_data->source, primitive_count, primitive_datatype, acc_data->target, acc_data->count, acc_data->datatype);
} else {
ret = ompi_osc_base_process_op(acc_data->target, acc_data->source, acc_data->source_len, acc_data->datatype, acc_data->count, acc_data->op);
}
} }
/* drop the accumulate lock */ /* drop the accumulate lock */
@ -1215,6 +1225,7 @@ static inline int process_get_acc(ompi_osc_pt2pt_module_t *module, int source,
struct ompi_datatype_t *datatype; struct ompi_datatype_t *datatype;
void *buffer = NULL; void *buffer = NULL;
uint64_t data_len; uint64_t data_len;
ompi_proc_t * proc;
int ret; int ret;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
@ -1222,7 +1233,7 @@ static inline int process_get_acc(ompi_osc_pt2pt_module_t *module, int source,
ompi_comm_rank(module->comm), ompi_comm_rank(module->comm),
source)); source));
ret = datatype_create (module, source, NULL, &datatype, (void **) &data); ret = datatype_create (module, source, &proc, &datatype, (void **) &data);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret; return ret;
} }
@ -1232,13 +1243,18 @@ static inline int process_get_acc(ompi_osc_pt2pt_module_t *module, int source,
if (0 == ompi_osc_pt2pt_accumulate_trylock (module)) { if (0 == ompi_osc_pt2pt_accumulate_trylock (module)) {
/* make a copy of the data since the buffer needs to be returned */ /* make a copy of the data since the buffer needs to be returned */
if (data_len) { if (data_len) {
ompi_datatype_t *primitive_datatype = NULL;
uint32_t primitive_count;
buffer = malloc (data_len); buffer = malloc (data_len);
if (OPAL_UNLIKELY(NULL == buffer)) { if (OPAL_UNLIKELY(NULL == buffer)) {
OBJ_RELEASE(datatype); OBJ_RELEASE(datatype);
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
memcpy (buffer, data, data_len); ompi_osc_base_get_primitive_type_info(datatype, &primitive_datatype, &primitive_count);
primitive_count *= acc_header->count;
osc_pt2pt_copy_on_recv (buffer, data, data_len, proc, primitive_count, primitive_datatype);
} }
ret = ompi_osc_pt2pt_gacc_start (module, source, buffer, data_len, datatype, ret = ompi_osc_pt2pt_gacc_start (module, source, buffer, data_len, datatype,
@ -1543,6 +1559,7 @@ static inline int process_frag (ompi_osc_pt2pt_module_t *module,
header->base.flags)); header->base.flags));
if (OPAL_LIKELY(!(header->base.flags & OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE))) { if (OPAL_LIKELY(!(header->base.flags & OMPI_OSC_PT2PT_HDR_FLAG_LARGE_DATATYPE))) {
osc_pt2pt_ntoh(header);
switch (header->base.type) { switch (header->base.type) {
case OMPI_OSC_PT2PT_HDR_TYPE_PUT: case OMPI_OSC_PT2PT_HDR_TYPE_PUT:
ret = process_put(module, frag->source, &header->put); ret = process_put(module, frag->source, &header->put);
@ -1617,8 +1634,8 @@ static inline int process_frag (ompi_osc_pt2pt_module_t *module,
static int ompi_osc_pt2pt_callback (ompi_request_t *request) static int ompi_osc_pt2pt_callback (ompi_request_t *request)
{ {
ompi_osc_pt2pt_module_t *module = (ompi_osc_pt2pt_module_t *) request->req_complete_cb_data; ompi_osc_pt2pt_module_t *module = (ompi_osc_pt2pt_module_t *) request->req_complete_cb_data;
ompi_osc_pt2pt_header_base_t *base_header = ompi_osc_pt2pt_header_t *base_header =
(ompi_osc_pt2pt_header_base_t *) module->incoming_buffer; (ompi_osc_pt2pt_header_t *) module->incoming_buffer;
size_t incoming_length = request->req_status._ucount; size_t incoming_length = request->req_status._ucount;
int source = request->req_status.MPI_SOURCE; int source = request->req_status.MPI_SOURCE;
@ -1628,14 +1645,15 @@ static int ompi_osc_pt2pt_callback (ompi_request_t *request)
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"received pt2pt callback for fragment. source = %d, count = %u, type = 0x%x", "received pt2pt callback for fragment. source = %d, count = %u, type = 0x%x",
source, (unsigned) incoming_length, base_header->type)); source, (unsigned) incoming_length, base_header->base.type));
switch (base_header->type) { osc_pt2pt_ntoh(base_header);
switch (base_header->base.type) {
case OMPI_OSC_PT2PT_HDR_TYPE_FRAG: case OMPI_OSC_PT2PT_HDR_TYPE_FRAG:
process_frag(module, (ompi_osc_pt2pt_frag_header_t *) base_header); process_frag(module, (ompi_osc_pt2pt_frag_header_t *) base_header);
/* only data fragments should be included in the completion counters */ /* only data fragments should be included in the completion counters */
mark_incoming_completion (module, (base_header->flags & OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET) ? source : MPI_PROC_NULL); mark_incoming_completion (module, (base_header->base.flags & OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET) ? source : MPI_PROC_NULL);
break; break;
case OMPI_OSC_PT2PT_HDR_TYPE_POST: case OMPI_OSC_PT2PT_HDR_TYPE_POST:
(void) osc_pt2pt_incoming_post (module, source); (void) osc_pt2pt_incoming_post (module, source);
@ -1652,7 +1670,7 @@ static int ompi_osc_pt2pt_callback (ompi_request_t *request)
default: default:
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"received unexpected message of type %x", "received unexpected message of type %x",
(int) base_header->type)); (int) base_header->base.type));
} }
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,

Просмотреть файл

@ -59,6 +59,7 @@ static int frag_send (ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_frag_t *fr
"osc pt2pt: frag_send called to %d, frag = %p, count = %d", "osc pt2pt: frag_send called to %d, frag = %p, count = %d",
frag->target, (void *) frag, count)); frag->target, (void *) frag, count));
OSC_PT2PT_HTON(frag->header, module, frag->target);
return ompi_osc_pt2pt_isend_w_cb (frag->buffer, count, MPI_BYTE, frag->target, OSC_PT2PT_FRAG_TAG, return ompi_osc_pt2pt_isend_w_cb (frag->buffer, count, MPI_BYTE, frag->target, OSC_PT2PT_FRAG_TAG,
module->comm, frag_send_cb, frag); module->comm, frag_send_cb, frag);
} }

Просмотреть файл

@ -13,6 +13,8 @@
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -28,6 +30,7 @@
#endif #endif
#include "opal/types.h" #include "opal/types.h"
#include "opal/util/arch.h"
enum ompi_osc_pt2pt_hdr_type_t { enum ompi_osc_pt2pt_hdr_type_t {
OMPI_OSC_PT2PT_HDR_TYPE_PUT = 0x01, OMPI_OSC_PT2PT_HDR_TYPE_PUT = 0x01,
@ -79,9 +82,9 @@ struct ompi_osc_pt2pt_header_acc_t {
uint16_t tag; uint16_t tag;
uint32_t count; uint32_t count;
uint32_t op;
uint64_t len; uint64_t len;
uint64_t displacement; uint64_t displacement;
uint32_t op;
}; };
typedef struct ompi_osc_pt2pt_header_acc_t ompi_osc_pt2pt_header_acc_t; typedef struct ompi_osc_pt2pt_header_acc_t ompi_osc_pt2pt_header_acc_t;
@ -97,6 +100,9 @@ typedef struct ompi_osc_pt2pt_header_get_t ompi_osc_pt2pt_header_get_t;
struct ompi_osc_pt2pt_header_complete_t { struct ompi_osc_pt2pt_header_complete_t {
ompi_osc_pt2pt_header_base_t base; ompi_osc_pt2pt_header_base_t base;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t padding[2];
#endif
int frag_count; int frag_count;
}; };
typedef struct ompi_osc_pt2pt_header_complete_t ompi_osc_pt2pt_header_complete_t; typedef struct ompi_osc_pt2pt_header_complete_t ompi_osc_pt2pt_header_complete_t;
@ -105,7 +111,6 @@ struct ompi_osc_pt2pt_header_cswap_t {
ompi_osc_pt2pt_header_base_t base; ompi_osc_pt2pt_header_base_t base;
uint16_t tag; uint16_t tag;
uint32_t len; uint32_t len;
uint64_t displacement; uint64_t displacement;
}; };
@ -119,6 +124,9 @@ typedef struct ompi_osc_pt2pt_header_post_t ompi_osc_pt2pt_header_post_t;
struct ompi_osc_pt2pt_header_lock_t { struct ompi_osc_pt2pt_header_lock_t {
ompi_osc_pt2pt_header_base_t base; ompi_osc_pt2pt_header_base_t base;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t padding[2];
#endif
int32_t lock_type; int32_t lock_type;
uint64_t lock_ptr; uint64_t lock_ptr;
}; };
@ -134,20 +142,29 @@ typedef struct ompi_osc_pt2pt_header_lock_ack_t ompi_osc_pt2pt_header_lock_ack_t
struct ompi_osc_pt2pt_header_unlock_t { struct ompi_osc_pt2pt_header_unlock_t {
ompi_osc_pt2pt_header_base_t base; ompi_osc_pt2pt_header_base_t base;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t padding[2];
#endif
int32_t lock_type; int32_t lock_type;
uint32_t frag_count;
uint64_t lock_ptr; uint64_t lock_ptr;
uint32_t frag_count;
}; };
typedef struct ompi_osc_pt2pt_header_unlock_t ompi_osc_pt2pt_header_unlock_t; typedef struct ompi_osc_pt2pt_header_unlock_t ompi_osc_pt2pt_header_unlock_t;
struct ompi_osc_pt2pt_header_unlock_ack_t { struct ompi_osc_pt2pt_header_unlock_ack_t {
ompi_osc_pt2pt_header_base_t base; ompi_osc_pt2pt_header_base_t base;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t padding[6];
#endif
uint64_t lock_ptr; uint64_t lock_ptr;
}; };
typedef struct ompi_osc_pt2pt_header_unlock_ack_t ompi_osc_pt2pt_header_unlock_ack_t; typedef struct ompi_osc_pt2pt_header_unlock_ack_t ompi_osc_pt2pt_header_unlock_ack_t;
struct ompi_osc_pt2pt_header_flush_t { struct ompi_osc_pt2pt_header_flush_t {
ompi_osc_pt2pt_header_base_t base; ompi_osc_pt2pt_header_base_t base;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t padding[2];
#endif
uint32_t frag_count; uint32_t frag_count;
uint64_t serial_number; uint64_t serial_number;
}; };
@ -155,6 +172,9 @@ typedef struct ompi_osc_pt2pt_header_flush_t ompi_osc_pt2pt_header_flush_t;
struct ompi_osc_pt2pt_header_flush_ack_t { struct ompi_osc_pt2pt_header_flush_ack_t {
ompi_osc_pt2pt_header_base_t base; ompi_osc_pt2pt_header_base_t base;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t padding[6];
#endif
uint64_t serial_number; uint64_t serial_number;
}; };
typedef struct ompi_osc_pt2pt_header_flush_ack_t ompi_osc_pt2pt_header_flush_ack_t; typedef struct ompi_osc_pt2pt_header_flush_ack_t ompi_osc_pt2pt_header_flush_ack_t;
@ -186,4 +206,248 @@ union ompi_osc_pt2pt_header_t {
}; };
typedef union ompi_osc_pt2pt_header_t ompi_osc_pt2pt_header_t; typedef union ompi_osc_pt2pt_header_t ompi_osc_pt2pt_header_t;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
#define MCA_OSC_PT2PT_FRAG_HDR_NTOH(h) \
(h).windx = ntohs((h).windx); \
(h).source = ntohl((h).source); \
(h).num_ops = ntohl((h).num_ops); \
(h).pad = ntohl((h).pad);
#define MCA_OSC_PT2PT_FRAG_HDR_HTON(h) \
(h).windx = htons((h).windx); \
(h).source = htonl((h).source); \
(h).num_ops = htonl((h).num_ops); \
(h).pad = htonl((h).pad);
#define MCA_OSC_PT2PT_PUT_HDR_NTOH(h) \
(h).tag = ntohs((h).tag); \
(h).count = ntohl((h).count); \
(h).len = ntoh64((h).len); \
(h).displacement = ntoh64((h).displacement);
#define MCA_OSC_PT2PT_PUT_HDR_HTON(h) \
(h).tag = htons((h).tag); \
(h).count = htonl((h).count); \
(h).len = hton64((h).len); \
(h).displacement = hton64((h).displacement);
#define MCA_OSC_PT2PT_GET_HDR_NTOH(h) \
(h).tag = ntohs((h).tag); \
(h).count = ntohl((h).count); \
(h).len = ntoh64((h).len); \
(h).displacement = ntoh64((h).displacement);
#define MCA_OSC_PT2PT_GET_HDR_HTON(h) \
(h).tag = htons((h).tag); \
(h).count = htonl((h).count); \
(h).len = hton64((h).len); \
(h).displacement = hton64((h).displacement);
#define MCA_OSC_PT2PT_ACC_HDR_NTOH(h) \
(h).tag = ntohs((h).tag); \
(h).count = ntohl((h).count); \
(h).len = ntoh64((h).len); \
(h).displacement = ntoh64((h).displacement);\
(h).op = ntohl((h).op);
#define MCA_OSC_PT2PT_ACC_HDR_HTON(h) \
(h).tag = htons((h).tag); \
(h).count = htonl((h).count); \
(h).len = hton64((h).len); \
(h).displacement = hton64((h).displacement);\
(h).op = htonl((h).op);
#define MCA_OSC_PT2PT_LOCK_HDR_NTOH(h) \
(h).lock_type = ntohl((h).lock_type); \
(h).lock_ptr = ntoh64((h).lock_ptr)
#define MCA_OSC_PT2PT_LOCK_HDR_HTON(h) \
(h).lock_type = htonl((h).lock_type); \
(h).lock_ptr = hton64((h).lock_ptr)
#define MCA_OSC_PT2PT_UNLOCK_HDR_NTOH(h) \
(h).lock_type = ntohl((h).lock_type); \
(h).lock_ptr = ntoh64((h).lock_ptr); \
(h).frag_count = ntohl((h).frag_count)
#define MCA_OSC_PT2PT_UNLOCK_HDR_HTON(h) \
(h).lock_type = htonl((h).lock_type); \
(h).lock_ptr = hton64((h).lock_ptr); \
(h).frag_count = htonl((h).frag_count)
#define MCA_OSC_PT2PT_LOCK_ACK_HDR_NTOH(h) \
(h).windx = ntohs((h).windx); \
(h).source = ntohl((h).source); \
(h).lock_ptr = ntoh64((h).lock_ptr)
#define MCA_OSC_PT2PT_LOCK_ACK_HDR_HTON(h) \
(h).windx = htonl((h).windx); \
(h).source= htonl((h).source); \
(h).lock_ptr = hton64((h).lock_ptr)
#define MCA_OSC_PT2PT_UNLOCK_ACK_HDR_NTOH(h) \
(h).lock_ptr = ntoh64((h).lock_ptr);
#define MCA_OSC_PT2PT_UNLOCK_ACK_HDR_HTON(h) \
(h).lock_ptr = hton64((h).lock_ptr);
#define MCA_OSC_PT2PT_COMPLETE_HDR_NTOH(h) \
(h).frag_count = ntohl((h).frag_count)
#define MCA_OSC_PT2PT_COMPLETE_HDR_HTON(h) \
(h).frag_count = htonl((h).frag_count)
#define MCA_OSC_PT2PT_FLUSH_HDR_NTOH(h) \
(h).frag_count = ntohl((h).frag_count); \
(h).serial_number = ntoh64((h).serial_number)
#define MCA_OSC_PT2PT_FLUSH_HDR_HTON(h) \
(h).frag_count = htonl((h).frag_count); \
(h).serial_number = ntoh64((h).serial_number)
#define MCA_OSC_PT2PT_FLUSH_ACK_HDR_NTOH(h) \
(h).serial_number = ntoh64((h).serial_number)
#define MCA_OSC_PT2PT_FLUSH_ACK_HDR_HTON(h) \
(h).serial_number = ntoh64((h).serial_number)
#define MCA_OSC_PT2PT_POST_HDR_NTOH(h) \
(h).windx = ntohs((h).windx)
#define MCA_OSC_PT2PT_POST_HDR_HTON(h) \
(h).windx = htons((h).windx)
#define MCA_OSC_PT2PT_CSWAP_HDR_NTOH(h) \
(h).tag = ntohs((h).tag); \
(h).len = ntohl((h).len); \
(h).displacement = ntoh64((h).displacement)
#define MCA_OSC_PT2PT_CSWAP_HDR_HTON(h) \
(h).tag = htons((h).tag); \
(h).len = htonl((h).len); \
(h).displacement = hton64((h).displacement)
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
static inline __opal_attribute_always_inline__ void
osc_pt2pt_ntoh(ompi_osc_pt2pt_header_t *hdr)
{
if(!(hdr->base.flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO))
return;
switch(hdr->base.type) {
case OMPI_OSC_PT2PT_HDR_TYPE_PUT:
case OMPI_OSC_PT2PT_HDR_TYPE_PUT_LONG:
MCA_OSC_PT2PT_PUT_HDR_NTOH(hdr->put);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_ACC:
case OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG:
case OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC:
case OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC_LONG:
MCA_OSC_PT2PT_ACC_HDR_NTOH(hdr->acc);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_GET:
MCA_OSC_PT2PT_GET_HDR_NTOH(hdr->get);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_CSWAP:
case OMPI_OSC_PT2PT_HDR_TYPE_CSWAP_LONG:
MCA_OSC_PT2PT_CSWAP_HDR_NTOH(hdr->cswap);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_COMPLETE:
MCA_OSC_PT2PT_COMPLETE_HDR_NTOH(hdr->complete);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_POST:
MCA_OSC_PT2PT_POST_HDR_NTOH(hdr->post);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ:
MCA_OSC_PT2PT_LOCK_HDR_NTOH(hdr->lock);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_ACK:
MCA_OSC_PT2PT_LOCK_ACK_HDR_NTOH(hdr->lock_ack);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ:
MCA_OSC_PT2PT_UNLOCK_HDR_NTOH(hdr->unlock);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_ACK:
MCA_OSC_PT2PT_UNLOCK_ACK_HDR_NTOH(hdr->unlock_ack);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_REQ:
MCA_OSC_PT2PT_FLUSH_HDR_NTOH(hdr->flush);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_ACK:
MCA_OSC_PT2PT_FLUSH_ACK_HDR_NTOH(hdr->flush_ack);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_FRAG:
MCA_OSC_PT2PT_FRAG_HDR_NTOH(hdr->frag);
break;
default:
assert(0);
break;
}
}
#else
#define osc_pt2pt_ntoh(h) \
do { } while (0)
#endif /* !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
#define osc_pt2pt_hton(h, p) \
osc_pt2pt_hton_intr((ompi_osc_pt2pt_header_t *)(h), (p));
static inline __opal_attribute_always_inline__ void
osc_pt2pt_hton_intr(ompi_osc_pt2pt_header_t *hdr, const ompi_proc_t *proc)
{
#ifdef WORDS_BIGENDIAN
hdr->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_NBO;
#else
if(!(proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN))
return;
hdr->base.flags |= OMPI_OSC_PT2PT_HDR_FLAG_NBO;
switch(hdr->base.type) {
case OMPI_OSC_PT2PT_HDR_TYPE_PUT:
case OMPI_OSC_PT2PT_HDR_TYPE_PUT_LONG:
MCA_OSC_PT2PT_PUT_HDR_HTON(hdr->put);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_ACC:
case OMPI_OSC_PT2PT_HDR_TYPE_ACC_LONG:
case OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC:
case OMPI_OSC_PT2PT_HDR_TYPE_GET_ACC_LONG:
MCA_OSC_PT2PT_ACC_HDR_HTON(hdr->acc);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_GET:
MCA_OSC_PT2PT_GET_HDR_HTON(hdr->get);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_CSWAP:
case OMPI_OSC_PT2PT_HDR_TYPE_CSWAP_LONG:
MCA_OSC_PT2PT_CSWAP_HDR_HTON(hdr->cswap);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_COMPLETE:
MCA_OSC_PT2PT_COMPLETE_HDR_HTON(hdr->complete);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_POST:
MCA_OSC_PT2PT_POST_HDR_HTON(hdr->post);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ:
MCA_OSC_PT2PT_LOCK_HDR_HTON(hdr->lock);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_LOCK_ACK:
MCA_OSC_PT2PT_LOCK_ACK_HDR_HTON(hdr->lock_ack);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ:
MCA_OSC_PT2PT_UNLOCK_HDR_HTON(hdr->unlock);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_ACK:
MCA_OSC_PT2PT_UNLOCK_ACK_HDR_HTON(hdr->unlock_ack);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_REQ:
MCA_OSC_PT2PT_FLUSH_HDR_HTON(hdr->flush);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_ACK:
MCA_OSC_PT2PT_FLUSH_ACK_HDR_HTON(hdr->flush_ack);
break;
case OMPI_OSC_PT2PT_HDR_TYPE_FRAG:
MCA_OSC_PT2PT_FRAG_HDR_HTON(hdr->frag);
break;
default:
assert(0);
break;
}
#endif /* WORDS_BIGENDIAN */
}
#define OSC_PT2PT_HTON(h, m, r) \
osc_pt2pt_hton_intr((ompi_osc_pt2pt_header_t *)(h), ompi_comm_peer_lookup((m)->comm, (r)));
#else
#define osc_pt2pt_hton(h, p) \
do { } while (0)
#define OSC_PT2PT_HTON(h, m, r) \
do { } while (0)
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
#endif /* OMPI_MCA_OSC_PT2PT_HDR_H */ #endif /* OMPI_MCA_OSC_PT2PT_HDR_H */

Просмотреть файл

@ -13,6 +13,8 @@
* Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -170,13 +172,19 @@ static inline void ompi_osc_pt2pt_unlock_self (ompi_osc_pt2pt_module_t *module,
static inline int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_outstanding_lock_t *lock) static inline int ompi_osc_pt2pt_lock_remote (ompi_osc_pt2pt_module_t *module, int target, ompi_osc_pt2pt_outstanding_lock_t *lock)
{ {
ompi_osc_pt2pt_header_lock_t lock_req; ompi_osc_pt2pt_header_lock_t lock_req;
int ret; int ret;
/* generate a lock request */ /* generate a lock request */
lock_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ; lock_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_LOCK_REQ;
lock_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID | OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET; lock_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID | OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
lock_req.padding[0] = 0;
lock_req.padding[1] = 0;
#endif
lock_req.lock_type = lock->type; lock_req.lock_type = lock->type;
lock_req.lock_ptr = (uint64_t) (uintptr_t) lock; lock_req.lock_ptr = (uint64_t) (uintptr_t) lock;
OSC_PT2PT_HTON(&lock_req, module, target);
ret = ompi_osc_pt2pt_control_send (module, target, &lock_req, sizeof (lock_req)); ret = ompi_osc_pt2pt_control_send (module, target, &lock_req, sizeof (lock_req));
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
@ -197,9 +205,14 @@ static inline int ompi_osc_pt2pt_unlock_remote (ompi_osc_pt2pt_module_t *module,
unlock_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ; unlock_req.base.type = OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_REQ;
unlock_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID | OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET; unlock_req.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID | OMPI_OSC_PT2PT_HDR_FLAG_PASSIVE_TARGET;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
unlock_req.padding[0] = 0;
unlock_req.padding[1] = 0;
#endif
unlock_req.frag_count = frag_count; unlock_req.frag_count = frag_count;
unlock_req.lock_type = lock->type; unlock_req.lock_type = lock->type;
unlock_req.lock_ptr = (uint64_t) (uintptr_t) lock; unlock_req.lock_ptr = (uint64_t) (uintptr_t) lock;
OSC_PT2PT_HTON(&unlock_req, module, target);
if (peer->active_frag && peer->active_frag->remain_len < sizeof (unlock_req)) { if (peer->active_frag && peer->active_frag->remain_len < sizeof (unlock_req)) {
/* the peer should expect one more packet */ /* the peer should expect one more packet */
@ -239,6 +252,7 @@ static inline int ompi_osc_pt2pt_flush_remote (ompi_osc_pt2pt_module_t *module,
target, flush_req.frag_count)); target, flush_req.frag_count));
/* send control message with unlock request and count */ /* send control message with unlock request and count */
OSC_PT2PT_HTON(&flush_req, module, target);
ret = ompi_osc_pt2pt_control_send (module, target, &flush_req, sizeof (flush_req)); ret = ompi_osc_pt2pt_control_send (module, target, &flush_req, sizeof (flush_req));
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret; return ret;
@ -682,6 +696,7 @@ static inline int activate_lock (ompi_osc_pt2pt_module_t *module, int requestor,
lock_ack.source = ompi_comm_rank(module->comm); lock_ack.source = ompi_comm_rank(module->comm);
lock_ack.windx = ompi_comm_get_cid(module->comm); lock_ack.windx = ompi_comm_get_cid(module->comm);
lock_ack.lock_ptr = lock_ptr; lock_ack.lock_ptr = lock_ptr;
OSC_PT2PT_HTON(&lock_ack, module, requestor);
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"osc pt2pt: sending lock to %d", requestor)); "osc pt2pt: sending lock to %d", requestor));
@ -904,7 +919,16 @@ int ompi_osc_pt2pt_process_unlock (ompi_osc_pt2pt_module_t *module, int source,
unlock_ack.base.type = OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_ACK; unlock_ack.base.type = OMPI_OSC_PT2PT_HDR_TYPE_UNLOCK_ACK;
unlock_ack.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID; unlock_ack.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
unlock_ack.padding[0] = 0;
unlock_ack.padding[1] = 0;
unlock_ack.padding[2] = 0;
unlock_ack.padding[3] = 0;
unlock_ack.padding[4] = 0;
unlock_ack.padding[5] = 0;
#endif
unlock_ack.lock_ptr = unlock_header->lock_ptr; unlock_ack.lock_ptr = unlock_header->lock_ptr;
OSC_PT2PT_HTON(&unlock_ack, module, source);
ret = ompi_osc_pt2pt_control_send_unbuffered (module, source, &unlock_ack, sizeof (unlock_ack)); ret = ompi_osc_pt2pt_control_send_unbuffered (module, source, &unlock_ack, sizeof (unlock_ack));
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
@ -942,6 +966,7 @@ int ompi_osc_pt2pt_process_flush (ompi_osc_pt2pt_module_t *module, int source,
flush_ack.base.type = OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_ACK; flush_ack.base.type = OMPI_OSC_PT2PT_HDR_TYPE_FLUSH_ACK;
flush_ack.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID; flush_ack.base.flags = OMPI_OSC_PT2PT_HDR_FLAG_VALID;
flush_ack.serial_number = flush_header->serial_number; flush_ack.serial_number = flush_header->serial_number;
OSC_PT2PT_HTON(&flush_ack, module, source);
return ompi_osc_pt2pt_control_send_unbuffered (module, source, &flush_ack, sizeof (flush_ack)); return ompi_osc_pt2pt_control_send_unbuffered (module, source, &flush_ack, sizeof (flush_ack));
} }