1
1

Merge pull request #1853 from PDeveze/Patchs-on-osc-portals4

Patchs on osc portals4
Этот коммит содержится в:
Todd Kordenbrock 2016-07-20 09:22:19 -05:00 коммит произвёл GitHub
родитель 210534adb3 f19a2b961c
Коммит 37ad6aa711
4 изменённых файлов: 431 добавлений и 124 удалений

Просмотреть файл

@ -21,6 +21,8 @@
#include "ompi/mca/mtl/portals4/mtl_portals4.h"
#define REQ_OSC_TABLE_ID 4
#define OSC_PORTALS4_MB_DATA 0x0000000000000000ULL
#define OSC_PORTALS4_MB_CONTROL 0x1000000000000000ULL
@ -51,6 +53,11 @@ struct ompi_osc_portals4_component_t {
ptl_size_t matching_atomic_max;
ptl_size_t matching_fetch_atomic_max;
ptl_size_t matching_atomic_ordered_size;
ptl_size_t ptl_max_msg_size; /* max size given by portals (cf PtlNIInit) */
bool no_locks;
ptl_uid_t uid;
opal_mutex_t lock;
opal_condition_t cond;
opal_free_list_t requests; /* request free list for the r* communication variants */
};
@ -80,6 +87,7 @@ struct ompi_osc_portals4_module_t {
ptl_handle_ni_t ni_h; /* network interface used by this window */
ptl_pt_index_t pt_idx; /* portal table index used by this window (this will be same across window) */
ptl_handle_ct_t ct_h; /* Counting event handle used for completion in this window */
int ct_link; /* PTL_EVENT_LINK flag */
ptl_handle_md_t md_h; /* memory descriptor describing all of memory used by this window */
ptl_handle_md_t req_md_h; /* memory descriptor with event completion used by this window */
ptl_handle_me_t data_me_h; /* data match list entry (MB are CID | OSC_PORTALS4_MB_DATA) */
@ -175,7 +183,7 @@ int ompi_osc_portals4_get_accumulate(const void *origin_addr,
int result_count,
struct ompi_datatype_t *result_datatype,
int target_rank,
MPI_Aint target_disp,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_datatype,
struct ompi_op_t *op,
@ -219,7 +227,7 @@ int ompi_osc_portals4_rget_accumulate(const void *origin_addr,
int result_count,
struct ompi_datatype_t *result_datatype,
int target_rank,
MPI_Aint target_disp,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_datatype,
struct ompi_op_t *op,

Просмотреть файл

@ -178,6 +178,97 @@ ompi_osc_portals4_get_dt(struct ompi_datatype_t *dt, ptl_datatype_t *ptl_dt)
return 0;
}
static ptl_size_t
number_of_fragment(ptl_size_t length, ptl_size_t maxlength)
{
ptl_size_t nb_frag = length == 0 ? 1 : (length - 1) / maxlength + 1;
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d : %ld fragment(s)", __FUNCTION__, __LINE__, nb_frag));
return nb_frag;
}
static int
splittedPtlPut(ptl_handle_md_t md_h,
ptl_size_t loc_offset,
ptl_size_t length,
ptl_ack_req_t ack_req,
ptl_process_t target_id,
ptl_pt_index_t pt_index,
ptl_match_bits_t match_b,
ptl_size_t rem_offset,
void *usr_ptr,
ptl_hdr_data_t hdr_data)
{
ptl_size_t length_sent = 0;
do {
ptl_size_t length_frag;
int ret;
length_frag = (length > mca_osc_portals4_component.ptl_max_msg_size) ?
mca_osc_portals4_component.ptl_max_msg_size :
length;
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"Put size : %lu/%lu, offset:%lu", length_frag, length, length_sent));
ret = PtlPut(md_h,
loc_offset + length_sent,
length_frag,
ack_req,
target_id,
pt_index,
match_b,
rem_offset + length_sent,
usr_ptr,
hdr_data);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlPut failed with return value %d",
__FUNCTION__, __LINE__, ret);
return ret;
}
length -= length_frag;
length_sent += length_frag;
} while (length);
return PTL_OK;
}
static int
splittedPtlGet(ptl_handle_md_t md_h,
ptl_size_t loc_offset,
ptl_size_t length,
ptl_process_t target_id,
ptl_pt_index_t pt_index,
ptl_match_bits_t match_b,
ptl_size_t rem_offset,
void *usr_ptr)
{
ptl_size_t length_submitted = 0;
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output, "Get"));
do {
ptl_size_t length_frag;
int ret;
length_frag = (length > mca_osc_portals4_component.ptl_max_msg_size) ?
mca_osc_portals4_component.ptl_max_msg_size :
length;
ret = PtlGet(md_h,
(ptl_size_t) loc_offset + length_submitted,
length_frag,
target_id,
pt_index,
match_b,
rem_offset + length_submitted,
usr_ptr);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d PtlGet failed with return value %d",
__FUNCTION__, __LINE__, ret);
return ret;
}
length -= length_frag;
length_submitted += length_frag;
} while (length);
return PTL_OK;
}
int
ompi_osc_portals4_rput(const void *origin_addr,
@ -195,13 +286,13 @@ ompi_osc_portals4_rput(const void *origin_addr,
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
size_t length;
size_t offset;
OPAL_PTRDIFF_TYPE length, origin_lb, target_lb;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"rput: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
"rput: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
(unsigned long) origin_addr, origin_count,
origin_dt->name, target, (int) target_disp,
origin_dt->name, target, (unsigned long) target_disp,
target_count, target_dt->name,
(unsigned long) win));
@ -218,22 +309,30 @@ ompi_osc_portals4_rput(const void *origin_addr,
"MPI_Rput: transfer of non-contiguous memory is not currently supported.\n");
return OMPI_ERR_NOT_SUPPORTED;
} else {
(void)opal_atomic_add_64(&module->opcount, 1);
request->ops_expected = 1;
ret = ompi_datatype_type_size(origin_dt, &length);
ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
}
ret = ompi_datatype_type_lb(target_dt, &target_lb);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
}
length *= origin_count;
ret = PtlPut(module->req_md_h,
(ptl_size_t) origin_addr,
request->ops_expected = number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size);
opal_atomic_add_64(&module->opcount, request->ops_expected);
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
"%s,%d Put", __FUNCTION__, __LINE__));
ret = splittedPtlPut(module->req_md_h,
(ptl_size_t) origin_addr + origin_lb,
length,
PTL_ACK_REQ,
peer,
module->pt_idx,
module->match_bits,
offset,
offset + target_lb,
request,
0);
if (OMPI_SUCCESS != ret) {
@ -262,13 +361,13 @@ ompi_osc_portals4_rget(void *origin_addr,
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
size_t length;
size_t offset;
OPAL_PTRDIFF_TYPE length, origin_lb, target_lb;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"rget: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
"rget: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
(unsigned long) origin_addr, origin_count,
origin_dt->name, target, (int) target_disp,
origin_dt->name, target, (unsigned long) target_disp,
target_count, target_dt->name,
(unsigned long) win));
@ -285,21 +384,28 @@ ompi_osc_portals4_rget(void *origin_addr,
"MPI_Rget: transfer of non-contiguous memory is not currently supported.\n");
return OMPI_ERR_NOT_SUPPORTED;
} else {
(void)opal_atomic_add_64(&module->opcount, 1);
request->ops_expected = 1;
ret = ompi_datatype_type_size(origin_dt, &length);
ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
}
ret = ompi_datatype_type_lb(target_dt, &target_lb);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
}
length *= origin_count;
ret = PtlGet(module->req_md_h,
(ptl_size_t) origin_addr,
request->ops_expected = number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size);
opal_atomic_add_64(&module->opcount, request->ops_expected);
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
"%s,%d Get", __FUNCTION__, __LINE__));
ret = splittedPtlGet(module->req_md_h,
(ptl_size_t) origin_addr + origin_lb,
length,
peer,
module->pt_idx,
module->match_bits,
offset,
offset + target_lb,
request);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
@ -328,15 +434,15 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
size_t length, sent;
size_t offset;
ptl_op_t ptl_op;
ptl_datatype_t ptl_dt;
OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"raccumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s 0x%lx",
"raccumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s 0x%lx",
(unsigned long) origin_addr, origin_count,
origin_dt->name, target, (int) target_disp,
origin_dt->name, target, (unsigned long) target_disp,
target_count, target_dt->name,
op->o_name,
(unsigned long) win));
@ -356,7 +462,12 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
} else {
ptl_size_t md_offset;
ret = ompi_datatype_type_size(origin_dt, &length);
ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
}
ret = ompi_datatype_type_lb(target_dt, &target_lb);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
@ -368,35 +479,48 @@ ompi_osc_portals4_raccumulate(const void *origin_addr,
do {
size_t msg_length = MIN(module->atomic_max, length - sent);
(void)opal_atomic_add_64(&module->opcount, 1);
request->ops_expected++;
if (MPI_REPLACE == op) {
ret = PtlPut(module->req_md_h,
md_offset + sent,
request->ops_expected += number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size);
opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size));
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Put", __FUNCTION__, __LINE__));
ret = splittedPtlPut(module->req_md_h,
md_offset + sent + origin_lb,
msg_length,
PTL_ACK_REQ,
peer,
module->pt_idx,
module->match_bits,
offset + sent,
offset + sent + target_lb,
request,
0);
} else {
request->ops_expected++;
opal_atomic_add_64(&module->opcount, 1);
ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
if (OMPI_SUCCESS != ret) return ret;
if (OMPI_SUCCESS != ret) {
opal_output(ompi_osc_base_framework.framework_output,
"MPI_Raccumulate: datatype is not currently supported");
return OMPI_ERR_NOT_SUPPORTED;
}
ret = ompi_osc_portals4_get_op(op, &ptl_op);
if (OMPI_SUCCESS != ret) return ret;
if (OMPI_SUCCESS != ret) {
opal_output(ompi_osc_base_framework.framework_output,
"MPI_Raccumulate: operation is not currently supported");
return OMPI_ERR_NOT_SUPPORTED;
}
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
"%s,%d Atomic", __FUNCTION__, __LINE__));
ret = PtlAtomic(module->req_md_h,
offset + sent,
offset + sent + origin_lb,
msg_length,
PTL_ACK_REQ,
peer,
module->pt_idx,
module->match_bits,
offset + sent,
offset + sent + target_lb,
request,
0,
ptl_op,
@ -422,7 +546,7 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
int result_count,
struct ompi_datatype_t *result_dt,
int target,
MPI_Aint target_disp,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_op_t *op,
@ -434,17 +558,17 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
size_t length, sent;
size_t offset;
ptl_op_t ptl_op;
ptl_datatype_t ptl_dt;
OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb, result_lb;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx",
"rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
(unsigned long) origin_addr, origin_count,
origin_dt->name, (unsigned long) result_addr,
result_count, result_dt->name,
target, (int) target_disp,
target, (unsigned long) target_disp,
target_count, target_dt->name,
op->o_name,
(unsigned long) win));
@ -468,16 +592,29 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
if (MPI_REPLACE == op) {
ptl_size_t result_md_offset, origin_md_offset;
ret = ompi_datatype_type_size(origin_dt, &length);
ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
}
ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
ret = ompi_datatype_type_lb(target_dt, &target_lb);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
}
ret = ompi_datatype_type_lb(result_dt, &result_lb);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
}
ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
opal_output(ompi_osc_base_framework.framework_output,
"MPI_Rget_accumulate: datatype is not currently supported");
return OMPI_ERR_NOT_SUPPORTED;
}
length *= origin_count;
result_md_offset = (ptl_size_t) result_addr;
@ -489,15 +626,17 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
(void)opal_atomic_add_64(&module->opcount, 1);
request->ops_expected++;
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Swap", __FUNCTION__, __LINE__));
ret = PtlSwap(module->req_md_h,
result_md_offset + sent,
result_md_offset + sent + result_lb,
module->md_h,
origin_md_offset + sent,
origin_md_offset + sent + origin_lb,
msg_length,
peer,
module->pt_idx,
module->match_bits,
offset + sent,
offset + sent + target_lb,
request,
0,
NULL,
@ -508,7 +647,12 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
} else if (MPI_NO_OP == op) {
ptl_size_t md_offset;
ret = ompi_datatype_type_size(target_dt, &length);
ret = ompi_datatype_get_extent(target_dt, &target_lb, &length);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
}
ret = ompi_datatype_type_lb(result_dt, &result_lb);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
@ -520,23 +664,34 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
do {
size_t msg_length = MIN(module->fetch_atomic_max, length - sent);
(void)opal_atomic_add_64(&module->opcount, 1);
request->ops_expected++;
ret = PtlGet(module->req_md_h,
md_offset + sent,
opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size));
request->ops_expected += number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Get", __FUNCTION__, __LINE__));
ret = splittedPtlGet(module->req_md_h,
md_offset + sent + result_lb,
msg_length,
peer,
module->pt_idx,
module->match_bits,
offset + sent,
offset + sent + target_lb,
request);
sent += msg_length;
} while (sent < length);
} else {
ptl_size_t result_md_offset, origin_md_offset;
ret = ompi_datatype_type_size(origin_dt, &length);
ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
}
ret = ompi_datatype_type_lb(target_dt, &target_lb);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
}
ret = ompi_datatype_type_lb(result_dt, &result_lb);
if (OMPI_SUCCESS != ret) {
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
return ret;
@ -547,10 +702,18 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
origin_md_offset = (ptl_size_t) origin_addr;
ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
if (OMPI_SUCCESS != ret) return ret;
if (OMPI_SUCCESS != ret) {
opal_output(ompi_osc_base_framework.framework_output,
"MPI_Rget_accumulate: datatype is not currently supported");
return OMPI_ERR_NOT_SUPPORTED;
}
ret = ompi_osc_portals4_get_op(op, &ptl_op);
if (OMPI_SUCCESS != ret) return ret;
if (OMPI_SUCCESS != ret) {
opal_output(ompi_osc_base_framework.framework_output,
"MPI_Rget_accumulate: operation is not currently supported");
return OMPI_ERR_NOT_SUPPORTED;
}
do {
size_t msg_length = MIN(module->fetch_atomic_max, length - sent);
@ -558,15 +721,17 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr,
(void)opal_atomic_add_64(&module->opcount, 1);
request->ops_expected++;
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d FetchAtomic", __FUNCTION__, __LINE__));
ret = PtlFetchAtomic(module->req_md_h,
result_md_offset + sent,
result_md_offset + sent + result_lb,
module->md_h,
origin_md_offset + sent,
origin_md_offset + sent + origin_lb,
msg_length,
peer,
module->pt_idx,
module->match_bits,
offset + sent,
offset + sent + target_lb,
request,
0,
ptl_op,
@ -598,13 +763,13 @@ ompi_osc_portals4_put(const void *origin_addr,
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
size_t length;
size_t offset;
OPAL_PTRDIFF_TYPE length, origin_lb, target_lb;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"put: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
"put: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
(unsigned long) origin_addr, origin_count,
origin_dt->name, target, (int) target_disp,
origin_dt->name, target, (unsigned long) target_disp,
target_count, target_dt->name,
(unsigned long) win));
@ -616,20 +781,26 @@ ompi_osc_portals4_put(const void *origin_addr,
"MPI_Put: transfer of non-contiguous memory is not currently supported.\n");
return OMPI_ERR_NOT_SUPPORTED;
} else {
(void)opal_atomic_add_64(&module->opcount, 1);
ret = ompi_datatype_type_size(origin_dt, &length);
ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length);
if (OMPI_SUCCESS != ret) {
return ret;
}
ret = ompi_datatype_type_lb(target_dt, &target_lb);
if (OMPI_SUCCESS != ret) {
return ret;
}
length *= origin_count;
ret = PtlPut(module->md_h,
(ptl_size_t) origin_addr,
opal_atomic_add_64(&module->opcount, number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size));
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Put", __FUNCTION__, __LINE__));
ret = splittedPtlPut(module->md_h,
(ptl_size_t) origin_addr + origin_lb,
length,
PTL_ACK_REQ,
peer,
module->pt_idx,
module->match_bits,
offset,
offset + target_lb,
NULL,
0);
if (OMPI_SUCCESS != ret) {
@ -655,13 +826,13 @@ ompi_osc_portals4_get(void *origin_addr,
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
size_t length;
size_t offset;
OPAL_PTRDIFF_TYPE length, origin_lb, target_lb;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"get: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
"get: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
(unsigned long) origin_addr, origin_count,
origin_dt->name, target, (int) target_disp,
origin_dt->name, target, (unsigned long) target_disp,
target_count, target_dt->name,
(unsigned long) win));
@ -673,19 +844,25 @@ ompi_osc_portals4_get(void *origin_addr,
"MPI_Get: transfer of non-contiguous memory is not currently supported.\n");
return OMPI_ERR_NOT_SUPPORTED;
} else {
(void)opal_atomic_add_64(&module->opcount, 1);
ret = ompi_datatype_type_size(origin_dt, &length);
ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length);
if (OMPI_SUCCESS != ret) {
return ret;
}
ret = ompi_datatype_type_lb(target_dt, &target_lb);
if (OMPI_SUCCESS != ret) {
return ret;
}
length *= origin_count;
ret = PtlGet(module->md_h,
(ptl_size_t) origin_addr,
opal_atomic_add_64(&module->opcount, number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size));
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
"%s,%d Get", __FUNCTION__, __LINE__));
ret = splittedPtlGet(module->md_h,
(ptl_size_t) origin_addr + origin_lb,
length,
peer,
module->pt_idx,
module->match_bits,
offset,
offset + target_lb,
NULL);
if (OMPI_SUCCESS != ret) {
return ret;
@ -711,15 +888,15 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
size_t length, sent;
size_t offset;
ptl_op_t ptl_op;
ptl_datatype_t ptl_dt;
OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx",
"accumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
(unsigned long) origin_addr, origin_count,
origin_dt->name, target, (int) target_disp,
origin_dt->name, target, (unsigned long) target_disp,
target_count, target_dt->name,
op->o_name,
(unsigned long) win));
@ -734,7 +911,11 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
} else {
ptl_size_t md_offset;
ret = ompi_datatype_type_size(origin_dt, &length);
ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length);
if (OMPI_SUCCESS != ret) {
return ret;
}
ret = ompi_datatype_type_lb(target_dt, &target_lb);
if (OMPI_SUCCESS != ret) {
return ret;
}
@ -745,34 +926,47 @@ ompi_osc_portals4_accumulate(const void *origin_addr,
do {
size_t msg_length = MIN(module->atomic_max, length - sent);
(void)opal_atomic_add_64(&module->opcount, 1);
if (MPI_REPLACE == op) {
ret = PtlPut(module->md_h,
md_offset + sent,
opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size));
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Put", __FUNCTION__, __LINE__));
ret = splittedPtlPut(module->md_h,
md_offset + sent + origin_lb,
msg_length,
PTL_ACK_REQ,
peer,
module->pt_idx,
module->match_bits,
offset + sent,
offset + sent + target_lb,
NULL,
0);
} else {
(void)opal_atomic_add_64(&module->opcount, 1);
ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
if (OMPI_SUCCESS != ret) return ret;
if (OMPI_SUCCESS != ret) {
opal_output(ompi_osc_base_framework.framework_output,
"MPI_Accumulate: datatype is not currently supported");
return OMPI_ERR_NOT_SUPPORTED;
}
ret = ompi_osc_portals4_get_op(op, &ptl_op);
if (OMPI_SUCCESS != ret) return ret;
if (OMPI_SUCCESS != ret) {
opal_output(ompi_osc_base_framework.framework_output,
"MPI_Accumulate: operation is not currently supported");
return OMPI_ERR_NOT_SUPPORTED;
}
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Atomic", __FUNCTION__, __LINE__));
ret = PtlAtomic(module->md_h,
md_offset + sent,
md_offset + sent + origin_lb,
msg_length,
PTL_ACK_REQ,
peer,
module->pt_idx,
module->match_bits,
offset + sent,
offset + sent + target_lb,
NULL,
0,
ptl_op,
@ -797,7 +991,7 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
int result_count,
struct ompi_datatype_t *result_dt,
int target,
MPI_Aint target_disp,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_op_t *op,
@ -807,17 +1001,17 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
size_t length, sent;
size_t offset;
ptl_op_t ptl_op;
ptl_datatype_t ptl_dt;
OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb, result_lb;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx",
"get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
(unsigned long) origin_addr, origin_count,
origin_dt->name, (unsigned long) result_addr,
result_count, result_dt->name,
target, (int) target_disp,
target, (unsigned long) target_disp,
target_count, target_dt->name,
op->o_name,
(unsigned long) win));
@ -836,13 +1030,23 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
if (MPI_REPLACE == op) {
ptl_size_t result_md_offset, origin_md_offset;
ret = ompi_datatype_type_size(origin_dt, &length);
ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length);
if (OMPI_SUCCESS != ret) {
return ret;
}
ret = ompi_datatype_type_lb(target_dt, &target_lb);
if (OMPI_SUCCESS != ret) {
return ret;
}
ret = ompi_datatype_type_lb(result_dt, &result_lb);
if (OMPI_SUCCESS != ret) {
return ret;
}
ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
if (OMPI_SUCCESS != ret) {
return ret;
opal_output(ompi_osc_base_framework.framework_output,
"MPI_Get_accumulate: datatype is not currently supported");
return OMPI_ERR_NOT_SUPPORTED;
}
length *= origin_count;
@ -854,15 +1058,17 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
(void)opal_atomic_add_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Swap", __FUNCTION__, __LINE__));
ret = PtlSwap(module->md_h,
result_md_offset + sent,
result_md_offset + sent + result_lb,
module->md_h,
origin_md_offset + sent,
origin_md_offset + sent + origin_lb,
msg_length,
peer,
module->pt_idx,
module->match_bits,
offset + sent,
offset + sent + target_lb,
NULL,
0,
NULL,
@ -873,7 +1079,11 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
} else if (MPI_NO_OP == op) {
ptl_size_t md_offset;
ret = ompi_datatype_type_size(target_dt, &length);
ret = ompi_datatype_get_extent(target_dt, &target_lb, &length);
if (OMPI_SUCCESS != ret) {
return ret;
}
ret = ompi_datatype_type_lb(result_dt, &result_lb);
if (OMPI_SUCCESS != ret) {
return ret;
}
@ -884,22 +1094,31 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
do {
size_t msg_length = MIN(module->fetch_atomic_max, length - sent);
(void)opal_atomic_add_64(&module->opcount, 1);
ret = PtlGet(module->md_h,
md_offset + sent,
opal_atomic_add_64(&module->opcount, number_of_fragment(msg_length, mca_osc_portals4_component.ptl_max_msg_size));
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Get", __FUNCTION__, __LINE__));
ret = splittedPtlGet(module->md_h,
md_offset + sent + result_lb,
msg_length,
peer,
module->pt_idx,
module->match_bits,
offset + sent,
offset + sent + target_lb,
NULL);
sent += msg_length;
} while (sent < length);
} else {
ptl_size_t result_md_offset, origin_md_offset;
ret = ompi_datatype_type_size(origin_dt, &length);
ret = ompi_datatype_get_extent(origin_dt, &origin_lb, &length);
if (OMPI_SUCCESS != ret) {
return ret;
}
ret = ompi_datatype_type_lb(target_dt, &target_lb);
if (OMPI_SUCCESS != ret) {
return ret;
}
ret = ompi_datatype_type_lb(result_dt, &result_lb);
if (OMPI_SUCCESS != ret) {
return ret;
}
@ -909,26 +1128,35 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr,
origin_md_offset = (ptl_size_t) origin_addr;
ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
if (OMPI_SUCCESS != ret) return ret;
if (OMPI_SUCCESS != ret) {
opal_output(ompi_osc_base_framework.framework_output,
"MPI_Get_accumulate: datatype is not currently supported");
return OMPI_ERR_NOT_SUPPORTED;
}
ret = ompi_osc_portals4_get_op(op, &ptl_op);
if (OMPI_SUCCESS != ret) return ret;
if (OMPI_SUCCESS != ret) {
opal_output(ompi_osc_base_framework.framework_output,
"MPI_Get_accumulate: operation is not currently supported");
return OMPI_ERR_NOT_SUPPORTED;
}
do {
size_t msg_length = MIN(module->fetch_atomic_max, length - sent);
(void)opal_atomic_add_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d FetchAtomic", __FUNCTION__, __LINE__));
ret = PtlFetchAtomic(module->md_h,
result_md_offset + sent,
result_md_offset + sent + result_lb,
module->md_h,
origin_md_offset + sent,
origin_md_offset + sent + origin_lb,
msg_length,
peer,
module->pt_idx,
module->match_bits,
offset + sent,
offset + sent + target_lb,
NULL,
0,
ptl_op,
@ -964,15 +1192,19 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr,
ptl_size_t result_md_offset, origin_md_offset;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"compare_and_swap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %d, 0x%lx",
"compare_and_swap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %lu, 0x%lx",
(unsigned long) origin_addr,
(unsigned long) compare_addr,
(unsigned long) result_addr,
dt->name, target, (int) target_disp,
dt->name, target, (unsigned long) target_disp,
(unsigned long) win));
ret = ompi_osc_portals4_get_dt(dt, &ptl_dt);
if (OMPI_SUCCESS != ret) return ret;
if (OMPI_SUCCESS != ret) {
opal_output(ompi_osc_base_framework.framework_output,
"MPI_Compare_and_swap: datatype is not currently supported");
return OMPI_ERR_NOT_SUPPORTED;
}
offset = get_displacement(module, target) * target_disp;
@ -986,6 +1218,8 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr,
(void)opal_atomic_add_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
"%s,%d Swap", __FUNCTION__, __LINE__));
ret = PtlSwap(module->md_h,
result_md_offset,
module->md_h,
@ -1027,15 +1261,19 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
ptl_datatype_t ptl_dt;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"fetch_and_op: 0x%lx, 0x%lx, %s, %d, %d, %s, 0x%lx",
"fetch_and_op: 0x%lx, 0x%lx, %s, %d, %lu, %s, 0x%lx",
(unsigned long) origin_addr,
(unsigned long) result_addr,
dt->name, target, (int) target_disp,
dt->name, target, (unsigned long) target_disp,
op->o_name,
(unsigned long) win));
ret = ompi_osc_portals4_get_dt(dt, &ptl_dt);
if (OMPI_SUCCESS != ret) return ret;
if (OMPI_SUCCESS != ret) {
opal_output(ompi_osc_base_framework.framework_output,
"MPI_Fetch_and_op: datatype is not currently supported");
return OMPI_ERR_NOT_SUPPORTED;
}
offset = get_displacement(module, target) * target_disp;
@ -1044,14 +1282,15 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
assert(length <= module->fetch_atomic_max);
(void)opal_atomic_add_64(&module->opcount, 1);
if (MPI_REPLACE == op) {
ptl_size_t result_md_offset, origin_md_offset;
result_md_offset = (ptl_size_t) result_addr;
origin_md_offset = (ptl_size_t) origin_addr;
(void)opal_atomic_add_64(&module->opcount, 1);
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Swap", __FUNCTION__, __LINE__));
ret = PtlSwap(module->md_h,
result_md_offset,
module->md_h,
@ -1071,7 +1310,10 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
md_offset = (ptl_size_t) result_addr;
ret = PtlGet(module->md_h,
opal_atomic_add_64(&module->opcount, number_of_fragment(length, mca_osc_portals4_component.ptl_max_msg_size));
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d Get", __FUNCTION__, __LINE__));
ret = splittedPtlGet(module->md_h,
md_offset,
length,
peer,
@ -1081,13 +1323,20 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr,
NULL);
} else {
ptl_size_t result_md_offset, origin_md_offset;
(void)opal_atomic_add_64(&module->opcount, 1);
ret = ompi_osc_portals4_get_op(op, &ptl_op);
if (OMPI_SUCCESS != ret) return ret;
if (OMPI_SUCCESS != ret) {
opal_output(ompi_osc_base_framework.framework_output,
"MPI_Fetch_and_op: operation is not currently supported");
return OMPI_ERR_NOT_SUPPORTED;
}
result_md_offset = (ptl_size_t) result_addr;
origin_md_offset = (ptl_size_t) origin_addr;
OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
"%s,%d FetchAtomic", __FUNCTION__, __LINE__));
ret = PtlFetchAtomic(module->md_h,
result_md_offset,
module->md_h,

Просмотреть файл

@ -198,7 +198,7 @@ progress_callback(void)
"%s:%d: PtlEQGet reported dropped event",
__FILE__, __LINE__);
goto process;
} else if (PTL_EQ_EMPTY) {
} else if (PTL_EQ_EMPTY == ret) {
return 0;
} else {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
@ -218,6 +218,13 @@ process:
count++;
if (NULL != ev.user_ptr) {
/* be sure that we receive the PTL_EVENT_LINK */
if (ev.type == PTL_EVENT_LINK) {
*(int *)ev.user_ptr = *(int *)ev.user_ptr + 1;
opal_condition_broadcast(&mca_osc_portals4_component.cond);
continue;
}
req = (ompi_osc_portals4_request_t*) ev.user_ptr;
opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength);
ops = opal_atomic_add_32(&req->ops_committed, 1);
@ -243,7 +250,7 @@ component_open(void)
static int
component_register(void)
{
bool ompi_osc_portals4_no_locks = false;
mca_osc_portals4_component.no_locks = false;
(void) mca_base_component_var_register(&mca_osc_portals4_component.super.osc_version,
"no_locks",
"Enable optimizations available only if MPI_LOCK is "
@ -252,7 +259,19 @@ component_register(void)
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_osc_portals4_no_locks);
&mca_osc_portals4_component.no_locks);
mca_osc_portals4_component.ptl_max_msg_size = PTL_SIZE_MAX;
(void) mca_base_component_var_register(&mca_osc_portals4_component.super.osc_version,
"max_msg_size",
"Max size supported by portals4 (above that, a message is cut into messages less than that size)",
MCA_BASE_VAR_TYPE_UNSIGNED_LONG,
NULL,
0,
0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_osc_portals4_component.ptl_max_msg_size);
return OMPI_SUCCESS;
}
@ -287,6 +306,11 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads)
/* BWB: FIX ME: Need to make sure our ID matches with the MTL... */
if (mca_osc_portals4_component.ptl_max_msg_size > actual.max_msg_size)
mca_osc_portals4_component.ptl_max_msg_size = actual.max_msg_size;
OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output,
"max_size = %lu", mca_osc_portals4_component.ptl_max_msg_size));
mca_osc_portals4_component.matching_atomic_max = actual.max_atomic_size;
mca_osc_portals4_component.matching_fetch_atomic_max = actual.max_fetch_atomic_size;
mca_osc_portals4_component.matching_atomic_ordered_size =
@ -305,7 +329,7 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads)
ret = PtlPTAlloc(mca_osc_portals4_component.matching_ni_h,
0,
mca_osc_portals4_component.matching_eq_h,
4,
REQ_OSC_TABLE_ID,
&mca_osc_portals4_component.matching_pt_idx);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
@ -314,6 +338,13 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads)
return ret;
}
if (mca_osc_portals4_component.matching_pt_idx != REQ_OSC_TABLE_ID) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlPTAlloc did not allocate the requested PT: %d\n",
__FILE__, __LINE__, mca_osc_portals4_component.matching_pt_idx);
return ret;
}
OBJ_CONSTRUCT(&mca_osc_portals4_component.requests, opal_free_list_t);
ret = opal_free_list_init (&mca_osc_portals4_component.requests,
sizeof(ompi_osc_portals4_request_t),
@ -353,8 +384,18 @@ component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
struct ompi_communicator_t *comm, struct ompi_info_t *info,
int flavor)
{
int ret;
if (MPI_WIN_FLAVOR_SHARED == flavor) return -1;
ret = PtlGetUid(mca_osc_portals4_component.matching_ni_h, &mca_osc_portals4_component.uid);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlGetUid failed: %d\n",
__FILE__, __LINE__, ret);
return OMPI_ERROR;
}
return 20;
}
@ -475,7 +516,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
me.length = size;
}
me.ct_handle = PTL_CT_NONE;
me.uid = PTL_UID_ANY;
me.uid = mca_osc_portals4_component.uid;
me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE;
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
@ -486,7 +527,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
module->pt_idx,
&me,
PTL_PRIORITY_LIST,
NULL,
&module->ct_link,
&module->data_me_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
@ -498,7 +539,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
me.start = &module->state;
me.length = sizeof(module->state);
me.ct_handle = PTL_CT_NONE;
me.uid = PTL_UID_ANY;
me.uid = mca_osc_portals4_component.uid;
me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE;
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
@ -509,7 +550,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
module->pt_idx,
&me,
PTL_PRIORITY_LIST,
NULL,
&module->ct_link,
&module->control_me_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
@ -557,6 +598,13 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
PtlAtomicSync();
/* Make sure that everyone's ready to receive. */
OPAL_THREAD_LOCK(&mca_osc_portals4_component.lock);
while (module->ct_link != 2) {
opal_condition_wait(&mca_osc_portals4_component.cond,
&mca_osc_portals4_component.lock);
}
OPAL_THREAD_UNLOCK(&mca_osc_portals4_component.lock);
module->comm->c_coll.coll_barrier(module->comm,
module->comm->c_coll.coll_barrier_module);
@ -601,6 +649,7 @@ ompi_osc_portals4_free(struct ompi_win_t *win)
module->comm->c_coll.coll_barrier_module);
/* cleanup */
PtlMEUnlink(module->control_me_h);
PtlMEUnlink(module->data_me_h);
PtlMDRelease(module->md_h);
PtlMDRelease(module->req_md_h);

Просмотреть файл

@ -33,6 +33,7 @@ OBJ_CLASS_DECLARATION(ompi_osc_portals4_request_t);
req->super.req_mpi_object.win = win; \
req->super.req_complete = false; \
req->super.req_state = OMPI_REQUEST_ACTIVE; \
req->super.req_status.MPI_ERROR = MPI_SUCCESS; \
req->ops_expected = 0; \
req->ops_committed = 0; \
} while (0)