1
1

Merge one-sided updates to the trunk - written by Brian Barrett and Nathan Hjelmn

cmr=v1.7.5:reviewer=hjelmn:subject=Update one-sided to MPI-3

This commit was SVN r30816.
This commit is contained in:
Ralph Castain 2014-02-25 17:36:43 +00:00
parent 202bf90287
commit 49d938de29
132 changed files with 13242 additions and 8659 deletions

5
RMA_TODO.txt Normal file
View File

@ -0,0 +1,5 @@
- Support non-contiguous operations in Portals4 implementation
- Add memory barriers where needed in synchronization primitives of
Portals4 implementation
- Re-implement rdma component

View File

@ -127,6 +127,8 @@ int ompi_attr_create_predefined(void)
OMPI_SUCCESS != (ret = create_win(MPI_WIN_BASE)) ||
OMPI_SUCCESS != (ret = create_win(MPI_WIN_SIZE)) ||
OMPI_SUCCESS != (ret = create_win(MPI_WIN_DISP_UNIT)) ||
OMPI_SUCCESS != (ret = create_win(MPI_WIN_CREATE_FLAVOR)) ||
OMPI_SUCCESS != (ret = create_win(MPI_WIN_MODEL)) ||
#if 0
/* JMS For when we implement IMPI */
OMPI_SUCCESS != (ret = create_comm(IMPI_CLIENT_SIZE, true)) ||
@ -193,6 +195,8 @@ int ompi_attr_free_predefined(void)
OMPI_SUCCESS != (ret = free_win(MPI_WIN_BASE)) ||
OMPI_SUCCESS != (ret = free_win(MPI_WIN_SIZE)) ||
OMPI_SUCCESS != (ret = free_win(MPI_WIN_DISP_UNIT)) ||
OMPI_SUCCESS != (ret = free_win(MPI_WIN_CREATE_FLAVOR)) ||
OMPI_SUCCESS != (ret = free_win(MPI_WIN_MODEL)) ||
#if 0
/* JMS For when we implement IMPI */
OMPI_SUCCESS != (ret = free_comm(IMPI_CLIENT_SIZE)) ||

View File

@ -128,11 +128,7 @@ int main(int argc, char **argv) {
GAP_CHECK("w_f_to_c_index", test_win, w_f_to_c_index, w_keyhash, 1);
GAP_CHECK("error_handler", test_win, error_handler, w_f_to_c_index, 1);
GAP_CHECK("errhandler_type", test_win, errhandler_type, error_handler, 1);
GAP_CHECK("w_disp_unit", test_win, w_disp_unit, errhandler_type, 1);
GAP_CHECK("w_baseptr", test_win, w_baseptr, w_disp_unit, 1);
GAP_CHECK("w_size", test_win, w_size, w_baseptr, 1);
GAP_CHECK("w_mode", test_win, w_mode, w_size, 1);
GAP_CHECK("w_osc_module", test_win, w_osc_module, w_size, 1);
GAP_CHECK("w_osc_module", test_win, w_osc_module, errhandler_type, 1);
/* Test Predefined info sizes */
printf("=============================================\n");

View File

@ -107,6 +107,9 @@ ompi_mpi_errcode_t ompi_t_err_cvar_set_never;
ompi_mpi_errcode_t ompi_t_err_pvar_no_startstop;
ompi_mpi_errcode_t ompi_t_err_pvar_no_write;
ompi_mpi_errcode_t ompi_t_err_pvar_no_atomic;
ompi_mpi_errcode_t ompi_err_rma_range;
ompi_mpi_errcode_t ompi_err_rma_attach;
ompi_mpi_errcode_t ompi_err_rma_flavor;
static void ompi_mpi_errcode_construct(ompi_mpi_errcode_t* errcode);
static void ompi_mpi_errcode_destruct(ompi_mpi_errcode_t* errcode);
@ -202,6 +205,9 @@ int ompi_mpi_errcode_init (void)
CONSTRUCT_ERRCODE( ompi_t_err_pvar_no_startstop, MPI_T_ERR_PVAR_NO_STARTSTOP, "MPI_T_ERR_PVAR_NO_STARTSTOP: variable cannot be started or stopped" );
CONSTRUCT_ERRCODE( ompi_t_err_pvar_no_write, MPI_T_ERR_PVAR_NO_WRITE, "MPI_T_ERR_PVAR_NO_WRITE: variable cannot be written or reset" );
CONSTRUCT_ERRCODE( ompi_t_err_pvar_no_atomic, MPI_T_ERR_PVAR_NO_ATOMIC, "MPI_T_ERR_PVAR_NO_ATOMIC: variable cannot be read and written atomically" );
CONSTRUCT_ERRCODE( ompi_err_rma_range, MPI_ERR_RMA_RANGE, "MPI_ERR_RMA_RANGE: invalid RMA address range" );
CONSTRUCT_ERRCODE( ompi_err_rma_attach, MPI_ERR_RMA_ATTACH, "MPI_ERR_RMA_ATTACH: Could not attach RMA segment" );
CONSTRUCT_ERRCODE( ompi_err_rma_flavor, MPI_ERR_RMA_FLAVOR, "MPI_ERR_RMA_FLAVOR: Invalid type of window" );
/* Per MPI-3 p353:27-32, MPI_LASTUSEDCODE must be >=
MPI_ERR_LASTCODE. So just start it as == MPI_ERR_LASTCODE. */
@ -292,6 +298,9 @@ int ompi_mpi_errcode_finalize(void)
OBJ_DESTRUCT(&ompi_t_err_pvar_no_startstop);
OBJ_DESTRUCT(&ompi_t_err_pvar_no_write);
OBJ_DESTRUCT(&ompi_t_err_pvar_no_atomic);
OBJ_DESTRUCT(&ompi_err_rma_range);
OBJ_DESTRUCT(&ompi_err_rma_attach);
OBJ_DESTRUCT(&ompi_err_rma_flavor);
OBJ_DESTRUCT(&ompi_mpi_errcodes);
return OMPI_SUCCESS;

View File

@ -13,7 +13,7 @@
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009-2012 Oak Rigde National Laboratory. All rights reserved.
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2012-2013 Los Alamos Nat Security, LLC. All rights reserved.
* Copyright (c) 2012-2014 Los Alamos Nat Security, LLC. All rights reserved.
* Copyright (c) 2011-2013 INRIA. All rights reserved.
* $COPYRIGHT$
*
@ -488,6 +488,13 @@ typedef int (MPI_Grequest_cancel_function)(void *, int);
#define MPI_LOCK_EXCLUSIVE 1
#define MPI_LOCK_SHARED 2
#define MPI_WIN_FLAVOR_CREATE 1
#define MPI_WIN_FLAVOR_ALLOCATE 2
#define MPI_WIN_FLAVOR_DYNAMIC 3
#define MPI_WIN_FLAVOR_SHARED 4
#define MPI_WIN_UNIFIED 0
#define MPI_WIN_SEPARATE 1
/*
* Predefined attribute keyvals
@ -509,6 +516,8 @@ enum {
MPI_WIN_BASE,
MPI_WIN_SIZE,
MPI_WIN_DISP_UNIT,
MPI_WIN_CREATE_FLAVOR,
MPI_WIN_MODEL,
/* Even though these four are IMPI attributes, they need to be there
for all MPI jobs */
@ -590,10 +599,14 @@ enum {
#define MPI_T_ERR_PVAR_NO_STARTSTOP 65
#define MPI_T_ERR_PVAR_NO_WRITE 66
#define MPI_T_ERR_PVAR_NO_ATOMIC 67
#define MPI_ERR_RMA_RANGE 68
#define MPI_ERR_RMA_ATTACH 69
#define MPI_ERR_RMA_FLAVOR 70
/* Per MPI-3 p349 47, MPI_ERR_LASTCODE must be >= the last predefined
MPI_ERR_<foo> code. So just set it equal to the last code --
MPI_T_ERR_PVAR_NO_ATOMIC, in this case. */
#define MPI_ERR_LASTCODE MPI_T_ERR_PVAR_NO_ATOMIC
MPI_ERR_RMA_FLAVOR, in this case. */
#define MPI_ERR_LASTCODE MPI_ERR_RMA_FLAVOR
#define MPI_ERR_SYSRESOURCE -2
@ -888,6 +901,7 @@ OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_bxor;
OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_maxloc;
OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_minloc;
OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_replace;
OMPI_DECLSPEC extern struct ompi_predefined_op_t ompi_mpi_op_no_op;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_datatype_null;
@ -1019,6 +1033,7 @@ OMPI_DECLSPEC extern MPI_Fint *MPI_F_STATUSES_IGNORE;
#define MPI_MAXLOC OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_maxloc)
#define MPI_MINLOC OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_minloc)
#define MPI_REPLACE OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_replace)
#define MPI_NO_OP OMPI_PREDEFINED_GLOBAL(MPI_Op, ompi_mpi_op_no_op)
/* C datatypes */
#define MPI_DATATYPE_NULL OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_datatype_null)
@ -1298,6 +1313,9 @@ OMPI_DECLSPEC int MPI_Comm_spawn_multiple(int count, char *array_of_commands[],
OMPI_DECLSPEC int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm);
OMPI_DECLSPEC int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key, MPI_Info info, MPI_Comm *newcomm);
OMPI_DECLSPEC int MPI_Comm_test_inter(MPI_Comm comm, int *flag);
OMPI_DECLSPEC int MPI_Compare_and_swap(void *origin_addr, void *compare_addr,
void *result_addr, MPI_Datatype datatype, int target_rank,
MPI_Aint target_disp, MPI_Win win);
OMPI_DECLSPEC int MPI_Dims_create(int nnodes, int ndims, int dims[]);
OMPI_DECLSPEC MPI_Fint MPI_Errhandler_c2f(MPI_Errhandler errhandler);
OMPI_DECLSPEC int MPI_Errhandler_create(MPI_Handler_function *function,
@ -1313,6 +1331,8 @@ OMPI_DECLSPEC int MPI_Error_class(int errorcode, int *errorclass);
OMPI_DECLSPEC int MPI_Error_string(int errorcode, char *string, int *resultlen);
OMPI_DECLSPEC int MPI_Exscan(const void *sendbuf, void *recvbuf, int count,
MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
OMPI_DECLSPEC int MPI_Fetch_and_op(void *origin_addr, void *result_addr, MPI_Datatype datatype,
int target_rank, MPI_Aint target_disp, MPI_Op op, MPI_Win win);
OMPI_DECLSPEC int MPI_Iexscan(const void *sendbuf, void *recvbuf, int count,
MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request);
#if OMPI_PROVIDE_MPI_FILE_INTERFACE
@ -1428,6 +1448,10 @@ OMPI_DECLSPEC int MPI_Get(void *origin_addr, int origin_count,
MPI_Datatype origin_datatype, int target_rank,
MPI_Aint target_disp, int target_count,
MPI_Datatype target_datatype, MPI_Win win);
OMPI_DECLSPEC int MPI_Get_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
void *result_addr, int result_count, MPI_Datatype result_datatype,
int target_rank, MPI_Aint target_disp, int target_count,
MPI_Datatype target_datatype, MPI_Op op, MPI_Win win);
OMPI_DECLSPEC int MPI_Get_library_version(char *version, int *resultlen);
OMPI_DECLSPEC int MPI_Get_processor_name(char *name, int *resultlen);
OMPI_DECLSPEC int MPI_Get_version(int *version, int *subversion);
@ -1575,6 +1599,9 @@ OMPI_DECLSPEC int MPI_Put(const void *origin_addr, int origin_count, MPI_Dataty
int target_rank, MPI_Aint target_disp, int target_count,
MPI_Datatype target_datatype, MPI_Win win);
OMPI_DECLSPEC int MPI_Query_thread(int *provided);
OMPI_DECLSPEC int MPI_Raccumulate(void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
int target_rank, MPI_Aint target_disp, int target_count,
MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request);
OMPI_DECLSPEC int MPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int source,
int tag, MPI_Comm comm, MPI_Request *request);
OMPI_DECLSPEC int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source,
@ -1603,6 +1630,17 @@ OMPI_DECLSPEC MPI_Request MPI_Request_f2c(MPI_Fint request);
OMPI_DECLSPEC int MPI_Request_free(MPI_Request *request);
OMPI_DECLSPEC int MPI_Request_get_status(MPI_Request request, int *flag,
MPI_Status *status);
OMPI_DECLSPEC int MPI_Rget(void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype,
MPI_Win win, MPI_Request *request);
OMPI_DECLSPEC int MPI_Rget_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
void *result_addr, int result_count, MPI_Datatype result_datatype,
int target_rank, MPI_Aint target_disp, int target_count,
MPI_Datatype target_datatype, MPI_Op op,
MPI_Win win, MPI_Request *request);
OMPI_DECLSPEC int MPI_Rput(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
int target_rank, MPI_Aint target_disp, int target_cout,
MPI_Datatype target_datatype, MPI_Win win, MPI_Request *request);
OMPI_DECLSPEC int MPI_Rsend(const void *ibuf, int count, MPI_Datatype datatype, int dest,
int tag, MPI_Comm comm);
OMPI_DECLSPEC int MPI_Rsend_init(const void *buf, int count, MPI_Datatype datatype,
@ -1768,39 +1806,55 @@ OMPI_DECLSPEC int MPI_Wait(MPI_Request *request, MPI_Status *status);
OMPI_DECLSPEC int MPI_Waitsome(int incount, MPI_Request array_of_requests[],
int *outcount, int array_of_indices[],
MPI_Status array_of_statuses[]);
OMPI_DECLSPEC int MPI_Win_allocate(MPI_Aint size, int disp_unit, MPI_Info info,
MPI_Comm comm, void *baseptr, MPI_Win *win);
OMPI_DECLSPEC int MPI_Win_allocate_shared(MPI_Aint size, int disp_unit, MPI_Info info,
MPI_Comm comm, void *baseptr, MPI_Win *win);
OMPI_DECLSPEC int MPI_Win_attach(MPI_Win win, void *base, MPI_Aint size);
OMPI_DECLSPEC MPI_Fint MPI_Win_c2f(MPI_Win win);
OMPI_DECLSPEC int MPI_Win_call_errhandler(MPI_Win win, int errorcode);
OMPI_DECLSPEC int MPI_Win_complete(MPI_Win win);
OMPI_DECLSPEC int MPI_Win_create(void *base, MPI_Aint size, int disp_unit,
MPI_Info info, MPI_Comm comm, MPI_Win *win);
OMPI_DECLSPEC int MPI_Win_create_dynamic(MPI_Info info, MPI_Comm comm, MPI_Win *win);
OMPI_DECLSPEC int MPI_Win_create_errhandler(MPI_Win_errhandler_function *function,
MPI_Errhandler *errhandler);
OMPI_DECLSPEC int MPI_Win_create_keyval(MPI_Win_copy_attr_function *win_copy_attr_fn,
MPI_Win_delete_attr_function *win_delete_attr_fn,
int *win_keyval, void *extra_state);
OMPI_DECLSPEC int MPI_Win_delete_attr(MPI_Win win, int win_keyval);
OMPI_DECLSPEC int MPI_Win_detach(MPI_Win win, void *base);
OMPI_DECLSPEC MPI_Win MPI_Win_f2c(MPI_Fint win);
OMPI_DECLSPEC int MPI_Win_fence(int assert, MPI_Win win);
OMPI_DECLSPEC int MPI_Win_flush(int rank, MPI_Win win);
OMPI_DECLSPEC int MPI_Win_flush_all(MPI_Win win);
OMPI_DECLSPEC int MPI_Win_flush_local(int rank, MPI_Win win);
OMPI_DECLSPEC int MPI_Win_flush_local_all(MPI_Win win);
OMPI_DECLSPEC int MPI_Win_free(MPI_Win *win);
OMPI_DECLSPEC int MPI_Win_free_keyval(int *win_keyval);
OMPI_DECLSPEC int MPI_Win_get_attr(MPI_Win win, int win_keyval,
void *attribute_val, int *flag);
OMPI_DECLSPEC int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler);
OMPI_DECLSPEC int MPI_Win_get_group(MPI_Win win, MPI_Group *group);
OMPI_DECLSPEC int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used);
OMPI_DECLSPEC int MPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen);
OMPI_DECLSPEC int MPI_Win_lock(int lock_type, int rank, int assert, MPI_Win win);
OMPI_DECLSPEC int MPI_Win_lock_all(int assert, MPI_Win win);
OMPI_DECLSPEC int MPI_Win_post(MPI_Group group, int assert, MPI_Win win);
OMPI_DECLSPEC int MPI_Win_set_attr(MPI_Win win, int win_keyval, void *attribute_val);
OMPI_DECLSPEC int MPI_Win_set_errhandler(MPI_Win win, MPI_Errhandler errhandler);
OMPI_DECLSPEC int MPI_Win_set_info(MPI_Win win, MPI_Info info);
OMPI_DECLSPEC int MPI_Win_set_name(MPI_Win win, const char *win_name);
OMPI_DECLSPEC int MPI_Win_shared_query(MPI_Win win, int rank, MPI_Aint *size, int *disp_unit, void *baseptr);
OMPI_DECLSPEC int MPI_Win_start(MPI_Group group, int assert, MPI_Win win);
OMPI_DECLSPEC int MPI_Win_sync(MPI_Win win);
OMPI_DECLSPEC int MPI_Win_test(MPI_Win win, int *flag);
OMPI_DECLSPEC int MPI_Win_unlock(int rank, MPI_Win win);
OMPI_DECLSPEC int MPI_Win_unlock_all(MPI_Win win);
OMPI_DECLSPEC int MPI_Win_wait(MPI_Win win);
OMPI_DECLSPEC double MPI_Wtick(void);
OMPI_DECLSPEC double MPI_Wtime(void);
/*
* Profiling MPI API
*/
@ -1949,6 +2003,9 @@ OMPI_DECLSPEC int PMPI_Comm_spawn_multiple(int count, char *array_of_commands[]
OMPI_DECLSPEC int PMPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm);
OMPI_DECLSPEC int PMPI_Comm_split_type(MPI_Comm comm, int split_type, int key, MPI_Info info, MPI_Comm *newcomm);
OMPI_DECLSPEC int PMPI_Comm_test_inter(MPI_Comm comm, int *flag);
OMPI_DECLSPEC int PMPI_Compare_and_swap(void *origin_addr, void *compare_addr,
void *result_addr, MPI_Datatype datatype, int target_rank,
MPI_Aint target_disp, MPI_Win win);
OMPI_DECLSPEC int PMPI_Dims_create(int nnodes, int ndims, int dims[]);
OMPI_DECLSPEC MPI_Fint PMPI_Errhandler_c2f(MPI_Errhandler errhandler);
OMPI_DECLSPEC int PMPI_Errhandler_create(MPI_Handler_function *function,
@ -1964,6 +2021,8 @@ OMPI_DECLSPEC int PMPI_Error_class(int errorcode, int *errorclass);
OMPI_DECLSPEC int PMPI_Error_string(int errorcode, char *string, int *resultlen);
OMPI_DECLSPEC int PMPI_Exscan(const void *sendbuf, void *recvbuf, int count,
MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
OMPI_DECLSPEC int PMPI_Fetch_and_op(void *origin_addr, void *result_addr, MPI_Datatype datatype,
int target_rank, MPI_Aint target_disp, MPI_Op op, MPI_Win win);
OMPI_DECLSPEC int PMPI_Iexscan(const void *sendbuf, void *recvbuf, int count,
MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request);
#if OMPI_PROVIDE_MPI_FILE_INTERFACE
@ -2081,6 +2140,10 @@ OMPI_DECLSPEC int PMPI_Get(void *origin_addr, int origin_count,
MPI_Datatype origin_datatype, int target_rank,
MPI_Aint target_disp, int target_count,
MPI_Datatype target_datatype, MPI_Win win);
OMPI_DECLSPEC int PMPI_Get_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
void *result_addr, int result_count, MPI_Datatype result_datatype,
int target_rank, MPI_Aint target_disp, int target_count,
MPI_Datatype target_datatype, MPI_Op op, MPI_Win win);
OMPI_DECLSPEC int PMPI_Get_library_version(char *version, int *resultlen);
OMPI_DECLSPEC int PMPI_Get_processor_name(char *name, int *resultlen);
OMPI_DECLSPEC int PMPI_Get_version(int *version, int *subversion);
@ -2228,6 +2291,9 @@ OMPI_DECLSPEC int PMPI_Put(const void *origin_addr, int origin_count, MPI_Datat
int target_rank, MPI_Aint target_disp, int target_count,
MPI_Datatype target_datatype, MPI_Win win);
OMPI_DECLSPEC int PMPI_Query_thread(int *provided);
OMPI_DECLSPEC int PMPI_Raccumulate(void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
int target_rank, MPI_Aint target_disp, int target_count,
MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request);
OMPI_DECLSPEC int PMPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int source,
int tag, MPI_Comm comm, MPI_Request *request);
OMPI_DECLSPEC int PMPI_Recv(void *buf, int count, MPI_Datatype datatype, int source,
@ -2256,6 +2322,17 @@ OMPI_DECLSPEC MPI_Request PMPI_Request_f2c(MPI_Fint request);
OMPI_DECLSPEC int PMPI_Request_free(MPI_Request *request);
OMPI_DECLSPEC int PMPI_Request_get_status(MPI_Request request, int *flag,
MPI_Status *status);
OMPI_DECLSPEC int PMPI_Rget(void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype,
MPI_Win win, MPI_Request *request);
OMPI_DECLSPEC int PMPI_Rget_accumulate(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
void *result_addr, int result_count, MPI_Datatype result_datatype,
int target_rank, MPI_Aint target_disp, int target_count,
MPI_Datatype target_datatype, MPI_Op op,
MPI_Win win, MPI_Request *request);
OMPI_DECLSPEC int PMPI_Rput(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype,
int target_rank, MPI_Aint target_disp, int target_cout,
MPI_Datatype target_datatype, MPI_Win win, MPI_Request *request);
OMPI_DECLSPEC int PMPI_Rsend(const void *ibuf, int count, MPI_Datatype datatype, int dest,
int tag, MPI_Comm comm);
OMPI_DECLSPEC int PMPI_Rsend_init(const void *buf, int count, MPI_Datatype datatype,
@ -2421,34 +2498,51 @@ OMPI_DECLSPEC int PMPI_Wait(MPI_Request *request, MPI_Status *status);
OMPI_DECLSPEC int PMPI_Waitsome(int incount, MPI_Request array_of_requests[],
int *outcount, int array_of_indices[],
MPI_Status array_of_statuses[]);
OMPI_DECLSPEC int PMPI_Win_allocate(MPI_Aint size, int disp_unit, MPI_Info info,
MPI_Comm comm, void *baseptr, MPI_Win *win);
OMPI_DECLSPEC int PMPI_Win_allocate_shared(MPI_Aint size, int disp_unit, MPI_Info info,
MPI_Comm comm, void *baseptr, MPI_Win *win);
OMPI_DECLSPEC int PMPI_Win_attach(MPI_Win win, void *base, MPI_Aint size);
OMPI_DECLSPEC MPI_Fint PMPI_Win_c2f(MPI_Win win);
OMPI_DECLSPEC int PMPI_Win_call_errhandler(MPI_Win win, int errorcode);
OMPI_DECLSPEC int PMPI_Win_complete(MPI_Win win);
OMPI_DECLSPEC int PMPI_Win_create(void *base, MPI_Aint size, int disp_unit,
MPI_Info info, MPI_Comm comm, MPI_Win *win);
OMPI_DECLSPEC int PMPI_Win_create_dynamic(MPI_Info info, MPI_Comm comm, MPI_Win *win);
OMPI_DECLSPEC int PMPI_Win_create_errhandler(MPI_Win_errhandler_function *function,
MPI_Errhandler *errhandler);
OMPI_DECLSPEC int PMPI_Win_create_keyval(MPI_Win_copy_attr_function *win_copy_attr_fn,
MPI_Win_delete_attr_function *win_delete_attr_fn,
int *win_keyval, void *extra_state);
OMPI_DECLSPEC int PMPI_Win_delete_attr(MPI_Win win, int win_keyval);
OMPI_DECLSPEC int PMPI_Win_detach(MPI_Win win, void *base);
OMPI_DECLSPEC MPI_Win PMPI_Win_f2c(MPI_Fint win);
OMPI_DECLSPEC int PMPI_Win_fence(int assert, MPI_Win win);
OMPI_DECLSPEC int PMPI_Win_flush(int rank, MPI_Win win);
OMPI_DECLSPEC int PMPI_Win_flush_all(MPI_Win win);
OMPI_DECLSPEC int PMPI_Win_flush_local(int rank, MPI_Win win);
OMPI_DECLSPEC int PMPI_Win_flush_local_all(MPI_Win win);
OMPI_DECLSPEC int PMPI_Win_free(MPI_Win *win);
OMPI_DECLSPEC int PMPI_Win_free_keyval(int *win_keyval);
OMPI_DECLSPEC int PMPI_Win_get_attr(MPI_Win win, int win_keyval,
void *attribute_val, int *flag);
OMPI_DECLSPEC int PMPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler);
OMPI_DECLSPEC int PMPI_Win_get_group(MPI_Win win, MPI_Group *group);
OMPI_DECLSPEC int PMPI_Win_get_info(MPI_Win win, MPI_Info *info_used);
OMPI_DECLSPEC int PMPI_Win_get_name(MPI_Win win, char *win_name, int *resultlen);
OMPI_DECLSPEC int PMPI_Win_lock(int lock_type, int rank, int assert, MPI_Win win);
OMPI_DECLSPEC int PMPI_Win_lock_all(int assert, MPI_Win win);
OMPI_DECLSPEC int PMPI_Win_post(MPI_Group group, int assert, MPI_Win win);
OMPI_DECLSPEC int PMPI_Win_set_attr(MPI_Win win, int win_keyval, void *attribute_val);
OMPI_DECLSPEC int PMPI_Win_set_errhandler(MPI_Win win, MPI_Errhandler errhandler);
OMPI_DECLSPEC int PMPI_Win_set_info(MPI_Win win, MPI_Info info);
OMPI_DECLSPEC int PMPI_Win_set_name(MPI_Win win, const char *win_name);
OMPI_DECLSPEC int PMPI_Win_shared_query(MPI_Win win, int rank, MPI_Aint *size, int *disp_unit, void *baseptr);
OMPI_DECLSPEC int PMPI_Win_start(MPI_Group group, int assert, MPI_Win win);
OMPI_DECLSPEC int PMPI_Win_sync(MPI_Win win);
OMPI_DECLSPEC int PMPI_Win_test(MPI_Win win, int *flag);
OMPI_DECLSPEC int PMPI_Win_unlock(int rank, MPI_Win win);
OMPI_DECLSPEC int PMPI_Win_unlock_all(MPI_Win win);
OMPI_DECLSPEC int PMPI_Win_wait(MPI_Win win);
OMPI_DECLSPEC double PMPI_Wtick(void);
OMPI_DECLSPEC double PMPI_Wtime(void);

View File

@ -320,7 +320,10 @@ $constants->{MPI_T_ERR_CVAR_SET_NEVER} = 64;
$constants->{MPI_T_ERR_PVAR_NO_STARTSTOP} = 65;
$constants->{MPI_T_ERR_PVAR_NO_WRITE} = 66;
$constants->{MPI_T_ERR_PVAR_NO_ATOMIC} = 67;
$constants->{MPI_ERR_LASTCODE} = $constants->{MPI_T_ERR_PVAR_NO_ATOMIC};
$constants->{MPI_ERR_RMA_RANGE} = 68;
$constants->{MPI_ERR_RMA_ATTACH} = 69;
$constants->{MPI_ERR_RMA_FLAVOR} = 70;
$constants->{MPI_ERR_LASTCODE} = $constants->{MPI_ERR_RMA_FLAVOR};
$constants->{MPI_ERR_SYSRESOURCE} = -2;

View File

@ -227,6 +227,8 @@ enum {
OMPI_OP_BASE_FORTRAN_MINLOC,
/** Corresponds to Fortran MPI_REPLACE */
OMPI_OP_BASE_FORTRAN_REPLACE,
/** Corresponds to Fortran MPI_NO_OP */
OMPI_OP_BASE_FORTRAN_NO_OP,
/** Maximum value */
OMPI_OP_BASE_FORTRAN_OP_MAX

View File

@ -37,8 +37,13 @@ int ompi_osc_base_find_available(bool enable_progress_threads,
bool enable_mpi_threads);
int ompi_osc_base_select(ompi_win_t *win,
void **base,
size_t size,
int disp_unit,
ompi_communicator_t *comm,
ompi_info_t *info,
ompi_communicator_t *comm);
int flavor,
int *model);
int ompi_osc_base_finalize(void);

View File

@ -27,8 +27,13 @@
int
ompi_osc_base_select(ompi_win_t *win,
ompi_info_t *info,
ompi_communicator_t *comm)
void **base,
size_t size,
int disp_unit,
ompi_communicator_t *comm,
ompi_info_t *info,
int flavor,
int *model)
{
opal_list_item_t *item;
ompi_osc_base_component_t *best_component = NULL;
@ -45,7 +50,7 @@ ompi_osc_base_select(ompi_win_t *win,
ompi_osc_base_component_t *component = (ompi_osc_base_component_t*)
((mca_base_component_list_item_t*) item)->cli_component;
priority = component->osc_query(win, info, comm);
priority = component->osc_query(win, base, size, disp_unit, comm, info, flavor);
if (priority < 0) continue;
if (priority > best_priority) {
best_component = component;
@ -55,5 +60,5 @@ ompi_osc_base_select(ompi_win_t *win,
if (NULL == best_component) return OMPI_ERR_NOT_SUPPORTED;
return best_component->osc_select(win, info, comm);
return best_component->osc_select(win, base, size, disp_unit, comm, info, flavor, model);
}

View File

@ -239,3 +239,65 @@ ompi_osc_base_process_op(void *outbuf,
return OMPI_SUCCESS;
}
int
ompi_osc_base_sndrcv_op(void *origin,
int32_t origin_count,
struct ompi_datatype_t *origin_dt,
void *target,
int32_t target_count,
struct ompi_datatype_t *target_dt,
ompi_op_t *op)
{
if (ompi_datatype_is_predefined(origin_dt) && origin_dt == target_dt) {
ompi_op_reduce(op, origin, target, origin_count, origin_dt);
} else {
ompi_osc_base_convertor_t recv_convertor;
opal_convertor_t send_convertor;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data;
int completed, length;
struct opal_convertor_master_t master = {NULL, 0, 0, 0, {0, }, NULL};
/* initialize send convertor */
OBJ_CONSTRUCT(&send_convertor, opal_convertor_t);
opal_convertor_copy_and_prepare_for_send(ompi_proc_local()->proc_convertor,
&(origin_dt->super), origin_count, origin, 0,
&send_convertor);
/* initialize recv convertor */
OBJ_CONSTRUCT(&recv_convertor, ompi_osc_base_convertor_t);
recv_convertor.op = op;
recv_convertor.datatype = ompi_datatype_get_single_predefined_type_from_args(target_dt);
opal_convertor_copy_and_prepare_for_recv(ompi_proc_local()->proc_convertor,
&(target_dt->super), target_count,
target, 0, &recv_convertor.convertor);
memcpy(&master, recv_convertor.convertor.master, sizeof(struct opal_convertor_master_t));
master.next = recv_convertor.convertor.master;
master.pFunctions = (conversion_fct_t*) &ompi_osc_base_copy_functions;
recv_convertor.convertor.master = &master;
recv_convertor.convertor.fAdvance = opal_unpack_general;
/* copy */
iov.iov_len = length = 64 * 1024;
iov.iov_base = (IOVBASE_TYPE*)malloc( length * sizeof(char) );
completed = 0;
while(0 == completed) {
iov.iov_len = length;
iov_count = 1;
max_data = length;
completed |= opal_convertor_pack( &send_convertor, &iov, &iov_count, &max_data );
completed |= opal_convertor_unpack( &recv_convertor.convertor, &iov, &iov_count, &max_data );
}
free( iov.iov_base );
OBJ_DESTRUCT( &send_convertor );
OBJ_DESTRUCT( &recv_convertor );
}
return OMPI_SUCCESS;
}

View File

@ -117,4 +117,12 @@ OMPI_DECLSPEC int ompi_osc_base_process_op(void *outbuf,
int count,
ompi_op_t *op);
OMPI_DECLSPEC int ompi_osc_base_sndrcv_op(void *origin,
int32_t origin_count,
struct ompi_datatype_t *origin_dt,
void *target,
int32_t target_count,
struct ompi_datatype_t *target_dt,
ompi_op_t *op);
END_C_DECLS

View File

@ -49,7 +49,7 @@ struct ompi_communicator_t;
struct ompi_group_t;
struct ompi_datatype_t;
struct ompi_op_t;
struct ompi_request_t;
/* ******************************************************************** */
@ -111,9 +111,12 @@ typedef int (*ompi_osc_base_component_finalize_fn_t)(void);
* @retval >= 0 The priority of the component for this window
*/
typedef int (*ompi_osc_base_component_query_fn_t)(struct ompi_win_t *win,
void **base,
size_t size,
int disp_unit,
struct ompi_communicator_t *comm,
struct ompi_info_t *info,
struct ompi_communicator_t *comm);
int flavor);
/**
* OSC component select
@ -140,9 +143,13 @@ typedef int (*ompi_osc_base_component_query_fn_t)(struct ompi_win_t *win,
* @retval OMPI_ERROR An unspecified error occurred
*/
typedef int (*ompi_osc_base_component_select_fn_t)(struct ompi_win_t *win,
void **base,
size_t size,
int disp_unit,
struct ompi_communicator_t *comm,
struct ompi_info_t *info,
struct ompi_communicator_t *comm);
int flavor,
int *model);
/**
* OSC component interface
@ -171,6 +178,11 @@ typedef ompi_osc_base_component_2_0_0_t ompi_osc_base_component_t;
/* ******************************************************************** */
typedef int (*ompi_osc_base_module_win_shared_query_fn_t)(struct ompi_win_t *win, int rank,
size_t *size, int *disp_unit, void *baseptr);
typedef int (*ompi_osc_base_module_win_attach_fn_t)(struct ompi_win_t *win, void *base, size_t size);
typedef int (*ompi_osc_base_module_win_detach_fn_t)(struct ompi_win_t *win, void *base);
/**
* Free resources associated with win
@ -220,6 +232,80 @@ typedef int (*ompi_osc_base_module_accumulate_fn_t)(void *origin_addr,
struct ompi_op_t *op,
struct ompi_win_t *win);
typedef int (*ompi_osc_base_module_compare_and_swap_fn_t)(void *origin_addr,
void *compare_addr,
void *result_addr,
struct ompi_datatype_t *dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
struct ompi_win_t *win);
typedef int (*ompi_osc_base_module_fetch_and_op_fn_t)(void *origin_addr,
void *result_addr,
struct ompi_datatype_t *dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
struct ompi_op_t *op,
struct ompi_win_t *win);
typedef int (*ompi_osc_base_module_get_accumulate_fn_t)(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_datatype,
void *result_addr,
int result_count,
struct ompi_datatype_t *result_datatype,
int target_rank,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_datatype,
struct ompi_op_t *op,
struct ompi_win_t *win);
typedef int (*ompi_osc_base_module_rput_fn_t)(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win,
struct ompi_request_t **request);
typedef int (*ompi_osc_base_module_rget_fn_t)(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win,
struct ompi_request_t **request);
typedef int (*ompi_osc_base_module_raccumulate_fn_t)(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_op_t *op,
struct ompi_win_t *win,
struct ompi_request_t **request);
typedef int (*ompi_osc_base_module_rget_accumulate_fn_t)(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_datatype,
void *result_addr,
int result_count,
struct ompi_datatype_t *result_datatype,
int target_rank,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_datatype,
struct ompi_op_t *op,
struct ompi_win_t *win,
struct ompi_request_t **request);
typedef int (*ompi_osc_base_module_fence_fn_t)(int assert, struct ompi_win_t *win);
@ -249,10 +335,26 @@ typedef int (*ompi_osc_base_module_lock_fn_t)(int lock_type,
int assert,
struct ompi_win_t *win);
typedef int (*ompi_osc_base_module_unlock_fn_t)(int target,
struct ompi_win_t *win);
typedef int (*ompi_osc_base_module_lock_all_fn_t)(int assert,
struct ompi_win_t *win);
typedef int (*ompi_osc_base_module_unlock_all_fn_t)(struct ompi_win_t *win);
typedef int (*ompi_osc_base_module_sync_fn_t)(struct ompi_win_t *win);
typedef int (*ompi_osc_base_module_flush_fn_t)(int target,
struct ompi_win_t *win);
typedef int (*ompi_osc_base_module_flush_all_fn_t)(struct ompi_win_t *win);
typedef int (*ompi_osc_base_module_flush_local_fn_t)(int target,
struct ompi_win_t *win);
typedef int (*ompi_osc_base_module_flush_local_all_fn_t)(struct ompi_win_t *win);
typedef int (*ompi_osc_base_module_set_info_fn_t)(struct ompi_win_t *win, struct ompi_info_t *info);
typedef int (*ompi_osc_base_module_get_info_fn_t)(struct ompi_win_t *win, struct ompi_info_t **info_used);
/* ******************************************************************** */
@ -266,47 +368,58 @@ typedef int (*ompi_osc_base_module_unlock_fn_t)(int target,
* free to create a structure that inherits this one for use as the
* module structure.
*/
struct ompi_osc_base_module_1_0_0_t {
/** Free resources associated with the window */
struct ompi_osc_base_module_3_0_0_t {
ompi_osc_base_module_win_shared_query_fn_t osc_win_shared_query;
ompi_osc_base_module_win_attach_fn_t osc_win_attach;
ompi_osc_base_module_win_detach_fn_t osc_win_detach;
ompi_osc_base_module_free_fn_t osc_free;
/** Implement MPI_PUT */
ompi_osc_base_module_put_fn_t osc_put;
/** Implement MPI_GET */
ompi_osc_base_module_get_fn_t osc_get;
/** Implement MPI_ACCUMULATE */
ompi_osc_base_module_accumulate_fn_t osc_accumulate;
ompi_osc_base_module_compare_and_swap_fn_t osc_compare_and_swap;
ompi_osc_base_module_fetch_and_op_fn_t osc_fetch_and_op;
ompi_osc_base_module_get_accumulate_fn_t osc_get_accumulate;
ompi_osc_base_module_rput_fn_t osc_rput;
ompi_osc_base_module_rget_fn_t osc_rget;
ompi_osc_base_module_raccumulate_fn_t osc_raccumulate;
ompi_osc_base_module_rget_accumulate_fn_t osc_rget_accumulate;
/** Implement MPI_WIN_FENCE */
ompi_osc_base_module_fence_fn_t osc_fence;
/* Implement MPI_WIN_START */
ompi_osc_base_module_start_fn_t osc_start;
/* Implement MPI_WIN_COMPLETE */
ompi_osc_base_module_complete_fn_t osc_complete;
/* Implement MPI_WIN_POST */
ompi_osc_base_module_post_fn_t osc_post;
/* Implement MPI_WIN_WAIT */
ompi_osc_base_module_wait_fn_t osc_wait;
/* Implement MPI_WIN_TEST */
ompi_osc_base_module_test_fn_t osc_test;
/* Implement MPI_WIN_LOCK */
ompi_osc_base_module_lock_fn_t osc_lock;
/* Implement MPI_WIN_UNLOCK */
ompi_osc_base_module_unlock_fn_t osc_unlock;
ompi_osc_base_module_lock_all_fn_t osc_lock_all;
ompi_osc_base_module_unlock_all_fn_t osc_unlock_all;
ompi_osc_base_module_sync_fn_t osc_sync;
ompi_osc_base_module_flush_fn_t osc_flush;
ompi_osc_base_module_flush_all_fn_t osc_flush_all;
ompi_osc_base_module_flush_local_fn_t osc_flush_local;
ompi_osc_base_module_flush_local_all_fn_t osc_flush_local_all;
ompi_osc_base_module_set_info_fn_t osc_set_info;
ompi_osc_base_module_get_info_fn_t osc_get_info;
};
typedef struct ompi_osc_base_module_1_0_0_t ompi_osc_base_module_1_0_0_t;
typedef ompi_osc_base_module_1_0_0_t ompi_osc_base_module_t;
typedef struct ompi_osc_base_module_3_0_0_t ompi_osc_base_module_3_0_0_t;
typedef ompi_osc_base_module_3_0_0_t ompi_osc_base_module_t;
/* ******************************************************************** */
/** Macro for use in components that are of type osc */
#define OMPI_OSC_BASE_VERSION_2_0_0 \
#define OMPI_OSC_BASE_VERSION_3_0_0 \
MCA_BASE_VERSION_2_0_0, \
"osc", 2, 0, 0
"osc", 3, 0, 0
/* ******************************************************************** */

View File

@ -0,0 +1,43 @@
#
# Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
EXTRA_DIST =
portals4_sources = \
osc_portals4.h \
osc_portals4_comm.c \
osc_portals4_component.c \
osc_portals4_active_target.c \
osc_portals4_passive_target.c \
osc_portals4_request.c
AM_CPPFLAGS = $(osc_portals4_CPPFLAGS)
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_ompi_osc_portals4_DSO
component_noinst =
component_install = mca_osc_portals4.la
else
component_noinst = libmca_osc_portals4.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_osc_portals4_la_SOURCES = $(portals4_sources)
mca_osc_portals4_la_LIBADD = $(osc_portals4_LIBS)
mca_osc_portals4_la_LDFLAGS = -module -avoid-version $(osc_portals4_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_osc_portals4_la_SOURCES = $(portals4_sources)
libmca_osc_portals4_la_LIBADD = $(osc_portals4_LIBS)
libmca_osc_portals4_la_LDFLAGS = -module -avoid-version $(osc_portals4_LDFLAGS)

View File

@ -0,0 +1,42 @@
# -*- shell-script -*-
#
# Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_ompi_osc_portals4_POST_CONFIG(will_build)
# ----------------------------------------
# Only require the tag if we're actually going to be built
AC_DEFUN([MCA_ompi_osc_portals4_POST_CONFIG], [
AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([PORTALS4])])
])dnl
# MCA_osc_portals4_CONFIG(action-if-can-compile,
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_ompi_osc_portals4_CONFIG],[
AC_CONFIG_FILES([ompi/mca/osc/portals4/Makefile])
OMPI_CHECK_PORTALS4([osc_portals4],
[osc_portals4_happy="yes"],
[osc_portals4_happy="no"])
AS_IF([test "$osc_portals4_happy" = "yes"],
[osc_portals4_WRAPPER_EXTRA_LDFLAGS="$osc_portals4_LDFLAGS"
osc_portals4_WRAPPER_EXTRA_LIBS="$osc_portals4_LIBS"
$1],
[$2])
# need to propogate CPPFLAGS to all of OMPI
AS_IF([test "$DIRECT_osc" = "portals4"],
[CPPFLAGS="$CPPFLAGS $osc_portals4_CPPFLAGS"])
# substitute in the things needed to build portals4
AC_SUBST([osc_portals4_CPPFLAGS])
AC_SUBST([osc_portals4_LDFLAGS])
AC_SUBST([osc_portals4_LIBS])
])dnl

View File

@ -0,0 +1,338 @@
/*
* Copyright (c) 2011-2013 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OSC_PORTALS4_PORTALS4_H
#define OSC_PORTALS4_PORTALS4_H
#include <portals4.h>
#include "ompi/class/ompi_free_list.h"
#include "ompi/group/group.h"
#include "ompi/communicator/communicator.h"
#define OSC_PORTALS4_MB_DATA 0x0000000000000000ULL
#define OSC_PORTALS4_MB_CONTROL 0x1000000000000000ULL
/* Component structure. There is one of these per process, per process lifetime.
*
* Currently, the Portals 4 one-sided implementation only uses a
* matching interface for all communication. There are plans for
* using a non-matching interface for a few windows (they each need
* their own PTE, which is a precious resource). In anticipation of
* that, we initialize the network interfaces and keep them in the
* component structure (for win create), but then also keep a handle
* copy in the window module, so that we can use the right structures
* once we add the non-matching support.
*
* The sizes are kept in the component structure because we can only
* find them during PtlNIInit, and it would be poor to do that for
* every window creation. Again, the window module has a copy of the
* max sizes, but tweaked to match the window configuration (ie,
* there's one atomic size, instead of an ordered and unordered size,
* since we know the ordering constraints during window creation).
*/
struct ompi_osc_portals4_component_t {
ompi_osc_base_component_t super;
ptl_handle_ni_t matching_ni_h;
ptl_handle_eq_t matching_eq_h;
ptl_pt_index_t matching_pt_idx;
ptl_size_t matching_atomic_max;
ptl_size_t matching_fetch_atomic_max;
ptl_size_t matching_atomic_ordered_size;
ompi_free_list_t requests; /* request free list for the r* communication variants */
};
typedef struct ompi_osc_portals4_component_t ompi_osc_portals4_component_t;
OMPI_DECLSPEC extern ompi_osc_portals4_component_t mca_osc_portals4_component;
/* Data about me exposed to remote peers. Used in generalized active
target and passive target synchronization. */
struct ompi_osc_portals4_node_state_t {
volatile int32_t post_count;
volatile int32_t complete_count;
volatile uint64_t lock;
};
typedef struct ompi_osc_portals4_node_state_t ompi_osc_portals4_node_state_t;
#define LOCK_ILLEGAL (0x4000000000000000ULL)
#define LOCK_UNLOCKED (0x0000000000000000ULL)
#define LOCK_EXCLUSIVE (0x0000000100000000ULL)
/* Module structure. There is one of these per window */
struct ompi_osc_portals4_module_t {
ompi_osc_base_module_t super;
void *free_after; /* if non-null, this pointer should be free()ed when window destroyed */
struct ompi_communicator_t *comm; /* communicator which backs this window (unique to this window) */
int disp_unit; /* if -1, have to look at disp_units */
int *disp_units; /* array (possibly NULL!) of displacement units, per peer */
ptl_handle_ni_t ni_h; /* network interface used by this window */
ptl_pt_index_t pt_idx; /* portal table index used by this window (this will be same across window) */
ptl_handle_ct_t ct_h; /* Counting event handle used for completion in this window */
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
ptl_handle_md_t *md_h; /* memory descriptor describing all of memory used by this window */
ptl_handle_md_t *req_md_h; /* memory descriptor with event completion used by this window */
#else
ptl_handle_md_t md_h[1]; /* memory descriptor describing all of memory used by this window */
ptl_handle_md_t req_md_h[1]; /* memory descriptor with event completion used by this window */
#endif
ptl_handle_me_t data_me_h; /* data match list entry (MB are CID | OSC_PORTALS4_MB_DATA) */
ptl_handle_me_t control_me_h; /* match list entry for control data (node_state_t). Match bits are (CID | OSC_PORTALS4_MB_CONTROL). */
int64_t opcount;
ptl_match_bits_t match_bits; /* match bits for module. Same as cid for comm in most cases. */
ptl_size_t atomic_max; /* max size of atomic messages. Will guarantee ordering IF ordering requested */
ptl_size_t fetch_atomic_max; /* max size of fetchatomic messages. Will guarantee ordering IF ordering requested */
/* variable containing specified value. Needed for atomic
increments so they can be non-blocking */
int32_t zero;
int32_t one;
ompi_group_t *start_group;
ompi_group_t *post_group;
opal_list_t outstanding_locks;
/* things that are remotely accessible */
ompi_osc_portals4_node_state_t state;
};
typedef struct ompi_osc_portals4_module_t ompi_osc_portals4_module_t;
static inline size_t
get_displacement(ompi_osc_portals4_module_t *module,
int target)
{
if (-1 == module->disp_unit) {
return module->disp_units[target];
} else {
return module->disp_unit;
}
}
/*
* See note in ompi/mtl/portals4/mtl_portals4.h for how we deal with
* platforms that don't allow us to crate an MD that covers all of
* memory.
*/
static inline void
ompi_osc_portals4_get_md(const void *ptr, const ptl_handle_md_t *array,
ptl_handle_md_t *md_h, void **base_ptr)
{
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
int mask = (1ULL << (OMPI_PORTALS4_MAX_VA_SIZE - OMPI_PORTALS4_MAX_MD_SIZE + 1)) - 1;
int which = (((uintptr_t) ptr) >> (OMPI_PORTALS4_MAX_MD_SIZE - 1)) & mask;
*md_h = array[which];
*base_ptr = (void*) (which * (1ULL << (OMPI_PORTALS4_MAX_MD_SIZE - 1)));
#else
*md_h = array[0];
*base_ptr = 0;
#endif
}
static inline int
ompi_osc_portals4_get_num_mds(void)
{
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
return (1 << (OMPI_PORTALS4_MAX_VA_SIZE - OMPI_PORTALS4_MAX_MD_SIZE + 1));
#else
return 1;
#endif
}
int ompi_osc_portals4_attach(struct ompi_win_t *win, void *base, size_t len);
int ompi_osc_portals4_detach(struct ompi_win_t *win, void *base);
int ompi_osc_portals4_free(struct ompi_win_t *win);
int ompi_osc_portals4_put(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win);
int ompi_osc_portals4_get(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win);
int ompi_osc_portals4_accumulate(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_op_t *op,
struct ompi_win_t *win);
int ompi_osc_portals4_compare_and_swap(void *origin_addr,
void *compare_addr,
void *result_addr,
struct ompi_datatype_t *dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
struct ompi_win_t *win);
int ompi_osc_portals4_fetch_and_op(void *origin_addr,
void *result_addr,
struct ompi_datatype_t *dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
struct ompi_op_t *op,
struct ompi_win_t *win);
int ompi_osc_portals4_get_accumulate(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_datatype,
void *result_addr,
int result_count,
struct ompi_datatype_t *result_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
struct ompi_datatype_t *target_datatype,
struct ompi_op_t *op,
struct ompi_win_t *win);
int ompi_osc_portals4_rput(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win,
struct ompi_request_t **request);
int ompi_osc_portals4_rget(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win,
struct ompi_request_t **request);
int ompi_osc_portals4_raccumulate(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_op_t *op,
struct ompi_win_t *win,
struct ompi_request_t **request);
int ompi_osc_portals4_rget_accumulate(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_datatype,
void *result_addr,
int result_count,
struct ompi_datatype_t *result_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
struct ompi_datatype_t *target_datatype,
struct ompi_op_t *op,
struct ompi_win_t *win,
struct ompi_request_t **request);
int ompi_osc_portals4_fence(int assert, struct ompi_win_t *win);
int ompi_osc_portals4_start(struct ompi_group_t *group,
int assert,
struct ompi_win_t *win);
int ompi_osc_portals4_complete(struct ompi_win_t *win);
int ompi_osc_portals4_post(struct ompi_group_t *group,
int assert,
struct ompi_win_t *win);
int ompi_osc_portals4_wait(struct ompi_win_t *win);
int ompi_osc_portals4_test(struct ompi_win_t *win,
int *flag);
int ompi_osc_portals4_lock(int lock_type,
int target,
int assert,
struct ompi_win_t *win);
int ompi_osc_portals4_unlock(int target,
struct ompi_win_t *win);
int ompi_osc_portals4_lock_all(int assert,
struct ompi_win_t *win);
int ompi_osc_portals4_unlock_all(struct ompi_win_t *win);
int ompi_osc_portals4_sync(struct ompi_win_t *win);
int ompi_osc_portals4_flush(int target,
struct ompi_win_t *win);
int ompi_osc_portals4_flush_all(struct ompi_win_t *win);
int ompi_osc_portals4_flush_local(int target,
struct ompi_win_t *win);
int ompi_osc_portals4_flush_local_all(struct ompi_win_t *win);
int ompi_osc_portals4_set_info(struct ompi_win_t *win, struct ompi_info_t *info);
int ompi_osc_portals4_get_info(struct ompi_win_t *win, struct ompi_info_t **info_used);
static inline int
ompi_osc_portals4_complete_all(ompi_osc_portals4_module_t *module)
{
int ret;
ptl_ct_event_t event;
ret = PtlCTWait(module->ct_h, module->opcount, &event);
if (PTL_OK != ret || 0 != event.failure) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: flush_all ct failure: ret=%d, failure=%d\n",
__FILE__, __LINE__, ret, (int) event.failure);
event.success = event.failure = 0;
PtlCTSet(module->ct_h, event);
module->opcount = 0;
}
assert(event.success == (size_t) module->opcount);
PtlAtomicSync();
return ret;
}
static inline ptl_process_t
ompi_osc_portals4_get_peer(ompi_osc_portals4_module_t *module, int rank)
{
ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, rank);
return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
}
static inline ptl_process_t
ompi_osc_portals4_get_peer_group(struct ompi_group_t *group, int rank)
{
ompi_proc_t *proc = ompi_group_get_proc_ptr(group, rank);
return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
}
#endif

View File

@ -0,0 +1,192 @@
/*
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/mca/osc/osc.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
#include "osc_portals4.h"
#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h"
int
ompi_osc_portals4_fence(int assert, struct ompi_win_t *win)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
int comm_ret, ret;
comm_ret = ompi_osc_portals4_complete_all(module);
ret = module->comm->c_coll.coll_barrier(module->comm,
module->comm->c_coll.coll_barrier_module);
return (OMPI_SUCCESS == comm_ret) ? ret : comm_ret;
}
int
ompi_osc_portals4_start(struct ompi_group_t *group,
int assert,
struct ompi_win_t *win)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
if (0 == (assert & MPI_MODE_NOCHECK)) {
int size;
OBJ_RETAIN(group);
module->start_group = group;
size = ompi_group_size(module->start_group);
while (module->state.post_count != size) opal_progress();
} else {
module->start_group = NULL;
}
return OMPI_SUCCESS;
}
int
ompi_osc_portals4_complete(struct ompi_win_t *win)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
int ret, i, size;
ptl_handle_md_t md_h;
void *base;
ret = ompi_osc_portals4_complete_all(module);
if (ret != OMPI_SUCCESS) return ret;
if (NULL != module->start_group) {
module->state.post_count = 0;
PtlAtomicSync();
ompi_osc_portals4_get_md(&module->one, module->md_h, &md_h, &base);
size = ompi_group_size(module->start_group);
for (i = 0 ; i < size ; ++i) {
ret = PtlAtomic(md_h,
(ptl_size_t) ((char*) &module->one - (char*) base),
sizeof(module->one),
PTL_ACK_REQ,
ompi_osc_portals4_get_peer_group(module->start_group, i),
module->pt_idx,
module->match_bits | OSC_PORTALS4_MB_CONTROL,
offsetof(ompi_osc_portals4_node_state_t, complete_count),
NULL,
0,
PTL_SUM,
PTL_INT32_T);
if (ret != OMPI_SUCCESS) return ret;
OPAL_THREAD_ADD64(&module->opcount, 1);
}
ret = ompi_osc_portals4_complete_all(module);
if (ret != OMPI_SUCCESS) return ret;
OBJ_RELEASE(module->start_group);
module->start_group = NULL;
}
return OMPI_SUCCESS;
}
int
ompi_osc_portals4_post(struct ompi_group_t *group,
int assert,
struct ompi_win_t *win)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
int ret, i, size;
ptl_handle_md_t md_h;
void *base;
if (0 == (assert & MPI_MODE_NOCHECK)) {
OBJ_RETAIN(group);
module->post_group = group;
module->state.complete_count = 0;
PtlAtomicSync();
ompi_osc_portals4_get_md(&module->one, module->md_h, &md_h, &base);
size = ompi_group_size(module->post_group);
for (i = 0 ; i < size ; ++i) {
ret = PtlAtomic(md_h,
(ptl_size_t) ((char*) &module->one - (char*) base),
sizeof(module->one),
PTL_ACK_REQ,
ompi_osc_portals4_get_peer_group(module->post_group, i),
module->pt_idx,
module->match_bits | OSC_PORTALS4_MB_CONTROL,
offsetof(ompi_osc_portals4_node_state_t, post_count),
NULL,
0,
PTL_SUM,
PTL_INT32_T);
if (ret != OMPI_SUCCESS) return ret;
OPAL_THREAD_ADD64(&module->opcount, 1);
}
} else {
module->post_group = NULL;
}
return OMPI_SUCCESS;
}
int
ompi_osc_portals4_wait(struct ompi_win_t *win)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
if (NULL != module->post_group) {
int size = ompi_group_size(module->post_group);
while (module->state.complete_count != size) opal_progress();
OBJ_RELEASE(module->post_group);
module->post_group = NULL;
}
return OMPI_SUCCESS;
}
int
ompi_osc_portals4_test(struct ompi_win_t *win,
int *flag)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
if (NULL != module->post_group) {
int size = ompi_group_size(module->post_group);
if (module->state.complete_count == size) {
OBJ_RELEASE(module->post_group);
module->post_group = NULL;
*flag = 1;
}
} else {
*flag = 0;
}
return OMPI_SUCCESS;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,716 @@
/*
* Copyright (c) 2011-2013 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/mca/osc/osc.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
#include "ompi/request/request.h"
#include "ompi/class/ompi_free_list.h"
#include "osc_portals4.h"
#include "osc_portals4_request.h"
static int component_open(void);
static int component_register(void);
static int component_init(bool enable_progress_threads, bool enable_mpi_threads);
static int component_finalize(void);
static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
struct ompi_communicator_t *comm, struct ompi_info_t *info,
int flavor);
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
struct ompi_communicator_t *comm, struct ompi_info_t *info,
int flavor, int *model);
ompi_osc_portals4_component_t mca_osc_portals4_component = {
{ /* ompi_osc_base_component_t */
{ /* ompi_base_component_t */
OMPI_OSC_BASE_VERSION_3_0_0,
"portals4",
OMPI_MAJOR_VERSION, /* MCA component major version */
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
component_open,
NULL,
NULL,
component_register
},
{ /* mca_base_component_data */
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
component_init,
component_query,
component_select,
component_finalize
}
};
ompi_osc_portals4_module_t ompi_osc_portals4_module_template = {
{
NULL, /* shared_query */
ompi_osc_portals4_attach,
ompi_osc_portals4_detach,
ompi_osc_portals4_free,
ompi_osc_portals4_put,
ompi_osc_portals4_get,
ompi_osc_portals4_accumulate,
ompi_osc_portals4_compare_and_swap,
ompi_osc_portals4_fetch_and_op,
ompi_osc_portals4_get_accumulate,
ompi_osc_portals4_rput,
ompi_osc_portals4_rget,
ompi_osc_portals4_raccumulate,
ompi_osc_portals4_rget_accumulate,
ompi_osc_portals4_fence,
ompi_osc_portals4_start,
ompi_osc_portals4_complete,
ompi_osc_portals4_post,
ompi_osc_portals4_wait,
ompi_osc_portals4_test,
ompi_osc_portals4_lock,
ompi_osc_portals4_unlock,
ompi_osc_portals4_lock_all,
ompi_osc_portals4_unlock_all,
ompi_osc_portals4_sync,
ompi_osc_portals4_flush,
ompi_osc_portals4_flush_all,
ompi_osc_portals4_flush_local,
ompi_osc_portals4_flush_local_all,
ompi_osc_portals4_set_info,
ompi_osc_portals4_get_info
}
};
/* look up parameters for configuring this window. The code first
looks in the info structure passed by the user, then through mca
parameters. */
static bool
check_config_value_bool(char *key, ompi_info_t *info)
{
char *value_string;
int value_len, ret, flag, param;
const bool *flag_value;
bool result;
ret = ompi_info_get_valuelen(info, key, &value_len, &flag);
if (OMPI_SUCCESS != ret) goto info_not_found;
if (flag == 0) goto info_not_found;
value_len++;
value_string = (char*)malloc(sizeof(char) * value_len + 1); /* Should malloc 1 char for NUL-termination */
if (NULL == value_string) goto info_not_found;
ret = ompi_info_get(info, key, value_len, value_string, &flag);
if (OMPI_SUCCESS != ret) {
free(value_string);
goto info_not_found;
}
assert(flag != 0);
ret = ompi_info_value_to_bool(value_string, &result);
free(value_string);
if (OMPI_SUCCESS != ret) goto info_not_found;
return result;
info_not_found:
param = mca_base_var_find("ompi", "osc", "portals4", key);
if (0 > param) return false;
ret = mca_base_var_get_value(param, &flag_value, NULL, NULL);
if (OMPI_SUCCESS != ret) return false;
return flag_value[0];
}
static bool
check_config_value_equal(char *key, ompi_info_t *info, char *value)
{
char *value_string;
int value_len, ret, flag, param;
const bool *flag_value;
bool result = false;
ret = ompi_info_get_valuelen(info, key, &value_len, &flag);
if (OMPI_SUCCESS != ret) goto info_not_found;
if (flag == 0) goto info_not_found;
value_len++;
value_string = (char*)malloc(sizeof(char) * value_len + 1); /* Should malloc 1 char for NUL-termination */
if (NULL == value_string) goto info_not_found;
ret = ompi_info_get(info, key, value_len, value_string, &flag);
if (OMPI_SUCCESS != ret) {
free(value_string);
goto info_not_found;
}
assert(flag != 0);
if (0 == strcmp(value_string, value)) result = true;
free(value_string);
return result;
info_not_found:
param = mca_base_var_find("ompi", "osc", "portals4", key);
if (0 > param) return false;
ret = mca_base_var_get_value(param, &flag_value, NULL, NULL);
if (OMPI_SUCCESS != ret) return false;
if (0 == strcmp(value_string, value)) result = true;
return result;
}
static int
progress_callback(void)
{
int ret, count = 0;
ptl_event_t ev;
ompi_osc_portals4_request_t *req;
int32_t ops;
while (true) {
ret = PtlEQGet(mca_osc_portals4_component.matching_eq_h, &ev);
if (PTL_OK == ret) {
goto process;
} else if (PTL_EQ_DROPPED == ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlEQGet reported dropped event",
__FILE__, __LINE__);
goto process;
} else if (PTL_EQ_EMPTY) {
return 0;
} else {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlEQGet failed: %d\n",
__FILE__, __LINE__, ret);
return 0;
}
process:
if (ev.ni_fail_type != PTL_OK) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: event failure: %d %d",
__FILE__, __LINE__, ev.type, ev.ni_fail_type);
return 0;
}
count++;
if (NULL != ev.user_ptr) {
/* can't disable send events, but they don't count in ops */
if (ev.type == PTL_EVENT_SEND) continue;
req = (ompi_osc_portals4_request_t*) ev.user_ptr;
opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength);
ops = opal_atomic_add_32(&req->ops_committed, 1);
if (ops == req->ops_expected) {
OPAL_THREAD_LOCK(&ompi_request_lock);
ompi_request_complete(&req->super, true);
OPAL_THREAD_UNLOCK(&ompi_request_lock);
}
}
}
return count;
}
static int
component_open(void)
{
return OMPI_SUCCESS;
}
static int
component_register(void)
{
bool ompi_osc_portals4_no_locks = false;
(void) mca_base_component_var_register(&mca_osc_portals4_component.super.osc_version,
"no_locks",
"Enable optimizations available only if MPI_LOCK is "
"not used. "
"Info key of same name overrides this value.",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_osc_portals4_no_locks);
return OMPI_SUCCESS;
}
static int
component_init(bool enable_progress_threads, bool enable_mpi_threads)
{
int ret;
ptl_ni_limits_t actual;
ret = PtlInit();
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlInit failed: %d\n",
__FILE__, __LINE__, ret);
return OMPI_ERROR;
}
ret = PtlNIInit(PTL_IFACE_DEFAULT,
PTL_NI_PHYSICAL | PTL_NI_MATCHING,
PTL_PID_ANY,
NULL,
&actual,
&mca_osc_portals4_component.matching_ni_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlNIInit failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
/* BWB: FIX ME: Need to make sure our ID matches with the MTL... */
mca_osc_portals4_component.matching_atomic_max = actual.max_atomic_size;
mca_osc_portals4_component.matching_fetch_atomic_max = actual.max_fetch_atomic_size;
mca_osc_portals4_component.matching_atomic_ordered_size =
MAX(actual.max_waw_ordered_size, actual.max_war_ordered_size);
ret = PtlEQAlloc(mca_osc_portals4_component.matching_ni_h,
4096,
&mca_osc_portals4_component.matching_eq_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlEQAlloc failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
ret = PtlPTAlloc(mca_osc_portals4_component.matching_ni_h,
0,
mca_osc_portals4_component.matching_eq_h,
4,
&mca_osc_portals4_component.matching_pt_idx);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlPTAlloc failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
OBJ_CONSTRUCT(&mca_osc_portals4_component.requests, ompi_free_list_t);
ret = ompi_free_list_init(&mca_osc_portals4_component.requests,
sizeof(ompi_osc_portals4_request_t),
OBJ_CLASS(ompi_osc_portals4_request_t),
8,
0,
8,
NULL);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: ompi_free_list_init failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
ret = opal_progress_register(progress_callback);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: opal_progress_register failed: %d\n",
__FILE__, __LINE__, ret);
return ret;
}
return OMPI_SUCCESS;
}
static int
component_finalize(void)
{
PtlNIFini(mca_osc_portals4_component.matching_ni_h);
return OMPI_SUCCESS;
}
static int
component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
struct ompi_communicator_t *comm, struct ompi_info_t *info,
int flavor)
{
if (MPI_WIN_FLAVOR_SHARED == flavor) return -1;
return 20;
}
static int
component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
struct ompi_communicator_t *comm, struct ompi_info_t *info,
int flavor, int *model)
{
ompi_osc_portals4_module_t *module = NULL;
int ret = OMPI_ERROR;
int tmp;
ptl_md_t md;
ptl_me_t me;
char *name;
if (MPI_WIN_FLAVOR_SHARED == flavor) return OMPI_ERR_NOT_SUPPORTED;
/* create module structure */
module = (ompi_osc_portals4_module_t*)
calloc(1, sizeof(ompi_osc_portals4_module_t));
if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
/* fill in the function pointer part */
memcpy(module, &ompi_osc_portals4_module_template,
sizeof(ompi_osc_base_module_t));
/* fill in our part */
if (MPI_WIN_FLAVOR_ALLOCATE == flavor) {
module->free_after = *base = malloc(size);
if (NULL == *base) goto error;
} else {
module->free_after = NULL;
}
ret = ompi_comm_dup(comm, &module->comm);
if (OMPI_SUCCESS != ret) goto error;
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"portals4 component creating window with id %d",
ompi_comm_get_cid(module->comm));
asprintf(&name, "portals4 window %d", ompi_comm_get_cid(module->comm));
ompi_win_set_name(win, name);
free(name);
/* share everyone's displacement units. Only do an allgather if
strictly necessary, since it requires O(p) state. */
tmp = disp_unit;
ret = module->comm->c_coll.coll_bcast(&tmp, 1, MPI_INT, 0,
module->comm,
module->comm->c_coll.coll_bcast_module);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: MPI_Bcast failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
tmp = (tmp == disp_unit) ? 1 : 0;
ret = module->comm->c_coll.coll_allreduce(MPI_IN_PLACE, &tmp, 1, MPI_INT, MPI_LAND,
module->comm, module->comm->c_coll.coll_allreduce_module);
if (OMPI_SUCCESS != ret) goto error;
if (tmp == 1) {
module->disp_unit = disp_unit;
module->disp_units = NULL;
} else {
module->disp_unit = -1;
module->disp_units = malloc(sizeof(int) * ompi_comm_size(module->comm));
ret = module->comm->c_coll.coll_allgather(&disp_unit, 1, MPI_INT,
module->disp_units, 1, MPI_INT,
module->comm,
module->comm->c_coll.coll_allgather_module);
if (OMPI_SUCCESS != ret) goto error;
}
module->ni_h = mca_osc_portals4_component.matching_ni_h;
module->pt_idx = mca_osc_portals4_component.matching_pt_idx;
ret = PtlCTAlloc(module->ni_h, &(module->ct_h));
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlCTAlloc failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
{
int i;
int num_mds = ompi_mtl_portals4_get_num_mds();
ptl_size_t size = 1ULL << OMPI_PORTALS4_MAX_MD_SIZE;
ptl_size_t offset_unit = (1ULL << OMPI_PORTALS4_MAX_MD_SIZE) / 2;
module->md_h = malloc(sizeof(ptl_handle_md_t) * num_mds);
if (NULL == module->md_h) {
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto error;
}
for (i = 0 ; i < num_mds ; ++i) {
module->md_h[i] = PTL_INVALID_HANDLE;
}
module->req_md_h = malloc(sizeof(ptl_handle_md_t) * num_mds);
if (NULL == module->req_md_h) {
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto error;
}
for (i = 0 ; i < num_mds ; ++i) {
module->req_md_h[i] = PTL_INVALID_HANDLE;
}
for (i = 0 ; i < num_mds ; ++i) {
md.start = (char*) (offset_unit * i);
md.length = (i - 1 == num_mds) ? size / 2 : size;
md.options = PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
md.eq_handle = mca_osc_portals4_component.matching_eq_h;
md.ct_handle = module->ct_h;
ret = PtlMDBind(module->ni_h, &md, &module->md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
md.eq_handle = mca_osc_portals4_component.matching_eq_h;
md.ct_handle = module->ct_h;
ret = PtlMDBind(module->ni_h, &md, &module->req_md_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
}
#else
md.start = 0;
md.length = PTL_SIZE_MAX;
md.options = PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
md.eq_handle = mca_osc_portals4_component.matching_eq_h;
md.ct_handle = module->ct_h;
ret = PtlMDBind(module->ni_h, &md, &module->md_h[0]);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
md.start = 0;
md.length = PTL_SIZE_MAX;
md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
md.eq_handle = mca_osc_portals4_component.matching_eq_h;
md.ct_handle = module->ct_h;
ret = PtlMDBind(module->ni_h, &md, &module->req_md_h[0]);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlMDBind failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
#endif
if (MPI_WIN_FLAVOR_DYNAMIC == flavor) {
me.start = 0;
me.length = SIZE_MAX;
} else {
me.start = *base;
me.length = size;
}
me.ct_handle = PTL_CT_NONE;
me.uid = PTL_UID_ANY;
me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE;
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
me.match_bits = module->comm->c_contextid;
me.ignore_bits = 0;
ret = PtlMEAppend(module->ni_h,
module->pt_idx,
&me,
PTL_PRIORITY_LIST,
NULL,
&module->data_me_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlMEAppend failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
me.start = &module->state;
me.length = sizeof(module->state);
me.ct_handle = PTL_CT_NONE;
me.uid = PTL_UID_ANY;
me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE;
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
me.match_bits = module->comm->c_contextid | OSC_PORTALS4_MB_CONTROL;
me.ignore_bits = 0;
ret = PtlMEAppend(module->ni_h,
module->pt_idx,
&me,
PTL_PRIORITY_LIST,
NULL,
&module->control_me_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"%s:%d: PtlMEAppend failed: %d\n",
__FILE__, __LINE__, ret);
goto error;
}
module->opcount = 0;
module->match_bits = module->comm->c_contextid;
module->atomic_max = (check_config_value_equal("accumulate_ordering", info, "none")) ?
mca_osc_portals4_component.matching_atomic_max :
MIN(mca_osc_portals4_component.matching_atomic_max,
mca_osc_portals4_component.matching_atomic_ordered_size);
module->fetch_atomic_max = (check_config_value_equal("accumulate_ordering", info, "none")) ?
mca_osc_portals4_component.matching_fetch_atomic_max :
MIN(mca_osc_portals4_component.matching_fetch_atomic_max,
mca_osc_portals4_component.matching_atomic_ordered_size);
module->zero = 0;
module->one = 1;
module->start_group = NULL;
module->post_group = NULL;
module->state.post_count = 0;
module->state.complete_count = 0;
if (check_config_value_bool("no_locks", info)) {
module->state.lock = LOCK_ILLEGAL;
} else {
module->state.lock = LOCK_UNLOCKED;
}
OBJ_CONSTRUCT(&module->outstanding_locks, opal_list_t);
#if OPAL_ASSEMBLY_ARCH == OMPI_AMD64 || OPAL_ASSEMBLY_ARCH == IA32
*model = MPI_WIN_UNIFIED;
#else
*model = MPI_WIN_SEPARATE;
#endif
win->w_osc_module = &module->super;
PtlAtomicSync();
/* Make sure that everyone's ready to receive. */
module->comm->c_coll.coll_barrier(module->comm,
module->comm->c_coll.coll_barrier_module);
return OMPI_SUCCESS;
error:
/* BWB: FIX ME: This is all wrong... */
if (0 != module->ct_h) PtlCTFree(module->ct_h);
if (0 != module->data_me_h) PtlMEUnlink(module->data_me_h);
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
/* BWB: FIX ME */
#else
if (0 != module->req_md_h) PtlMDRelease(module->req_md_h[0]);
if (0 != module->md_h) PtlMDRelease(module->md_h[0]);
#endif
if (NULL != module->comm) ompi_comm_free(&module->comm);
if (NULL != module) free(module);
return ret;
}
int
ompi_osc_portals4_attach(struct ompi_win_t *win, void *base, size_t len)
{
return OMPI_SUCCESS;
}
int
ompi_osc_portals4_detach(struct ompi_win_t *win, void *base)
{
return OMPI_SUCCESS;
}
int
ompi_osc_portals4_free(struct ompi_win_t *win)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
int ret = OMPI_SUCCESS;
/* synchronize */
module->comm->c_coll.coll_barrier(module->comm,
module->comm->c_coll.coll_barrier_module);
/* cleanup */
PtlMEUnlink(module->data_me_h);
#if OMPI_PORTALS4_MAX_MD_SIZE < OMPI_PORTALS4_MAX_VA_SIZE
/* BWB: FIX ME */
#else
PtlMDRelease(module->md_h[0]);
PtlMDRelease(module->req_md_h[0]);
#endif
PtlCTFree(module->ct_h);
if (NULL != module->disp_units) free(module->disp_units);
ompi_comm_free(&module->comm);
if (NULL != module->free_after) free(module->free_after);
if (!opal_list_is_empty(&module->outstanding_locks)) {
ret = MPI_ERR_RMA_SYNC;
}
OBJ_DESTRUCT(&module->outstanding_locks);
free(module);
return ret;
}
int
ompi_osc_portals4_set_info(struct ompi_win_t *win, struct ompi_info_t *info)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
/* enforce collectiveness... */
return module->comm->c_coll.coll_barrier(module->comm,
module->comm->c_coll.coll_barrier_module);
}
int
ompi_osc_portals4_get_info(struct ompi_win_t *win, struct ompi_info_t **info_used)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
ompi_info_t *info = OBJ_NEW(ompi_info_t);
if (NULL == info) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
ompi_info_set(info, "no_locks", (module->state.lock == LOCK_ILLEGAL) ? "true" : "false");
if (module->atomic_max < mca_osc_portals4_component.matching_atomic_max) {
ompi_info_set(info, "accumulate_ordering", "none");
} else {
ompi_info_set(info, "accumulate_ordering", "rar,war,raw,waw");
}
*info_used = info;
return OMPI_SUCCESS;
}

View File

@ -0,0 +1,413 @@
/*
* Copyright (c) 2011-2013 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/mca/osc/osc.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
#include "osc_portals4.h"
#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h"
enum locktype_t {
lock_nocheck,
lock_exclusive,
lock_shared
};
struct ompi_osc_portals4_outstanding_lock_t {
opal_list_item_t super;
int target;
enum locktype_t lock_type;
};
typedef struct ompi_osc_portals4_outstanding_lock_t ompi_osc_portals4_outstanding_lock_t;
OBJ_CLASS_INSTANCE(ompi_osc_portals4_outstanding_lock_t, opal_list_item_t,
NULL, NULL);
static inline int
lk_cas64(ompi_osc_portals4_module_t *module,
int target,
int64_t write_val,
int64_t comp_val,
int64_t *result_val)
{
int ret;
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
ptl_handle_md_t result_md_h, write_md_h;
void *result_base, *write_base;
opal_atomic_add_64(&module->opcount, 1);
ompi_osc_portals4_get_md(result_val, module->md_h, &result_md_h, &result_base);
ompi_osc_portals4_get_md(&write_val, module->md_h, &write_md_h, &write_base);
ret = PtlSwap(result_md_h,
(char*) result_val - (char*) result_base,
write_md_h,
(char*) write_val - (char*) write_base,
sizeof(int64_t),
ompi_osc_portals4_get_peer(module, target),
module->pt_idx,
module->match_bits | OSC_PORTALS4_MB_CONTROL,
offset,
NULL,
0,
&comp_val,
PTL_CSWAP,
PTL_INT64_T);
if (OMPI_SUCCESS != ret) {
return ret;
}
ret = ompi_osc_portals4_complete_all(module);
return ret;
}
static inline int
lk_write64(ompi_osc_portals4_module_t *module,
int target,
int64_t write_val)
{
int ret;
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
ptl_handle_md_t md_h;
void *base;
opal_atomic_add_64(&module->opcount, 1);
ompi_osc_portals4_get_md(&write_val, module->md_h, &md_h, &base);
ret = PtlPut(md_h,
(char*) &write_val - (char*) base,
sizeof(int64_t),
PTL_ACK_REQ,
ompi_osc_portals4_get_peer(module, target),
module->pt_idx,
module->match_bits | OSC_PORTALS4_MB_CONTROL,
offset,
NULL,
0);
if (OMPI_SUCCESS != ret) {
return ret;
}
ret = ompi_osc_portals4_complete_all(module);
return ret;
}
static inline int
lk_add64(ompi_osc_portals4_module_t *module,
int target,
int64_t write_val,
int64_t *result_val)
{
int ret;
size_t offset = offsetof(ompi_osc_portals4_node_state_t, lock);
ptl_handle_md_t result_md_h, write_md_h;
void *result_base, *write_base;
opal_atomic_add_64(&module->opcount, 1);
ompi_osc_portals4_get_md(result_val, module->md_h, &result_md_h, &result_base);
ompi_osc_portals4_get_md(&write_val, module->md_h, &write_md_h, &write_base);
ret = PtlFetchAtomic(result_md_h,
(char*) &result_val - (char*) result_base,
write_md_h,
(char*) &write_val - (char*) write_base,
sizeof(int64_t),
ompi_osc_portals4_get_peer(module, target),
module->pt_idx,
module->match_bits | OSC_PORTALS4_MB_CONTROL,
offset,
NULL,
0,
PTL_SUM,
PTL_INT64_T);
if (OMPI_SUCCESS != ret) {
return ret;
}
ret = ompi_osc_portals4_complete_all(module);
return ret;
}
static inline int
start_exclusive(ompi_osc_portals4_module_t *module,
int target)
{
int64_t result;
int ret;
while (true) {
ret = lk_cas64(module, target, LOCK_EXCLUSIVE, 0, &result);
if (OMPI_SUCCESS != ret) return ret;
if (LOCK_ILLEGAL == (LOCK_ILLEGAL & result)) return MPI_ERR_RMA_SYNC;
if (0 == result) break;
}
return OMPI_SUCCESS;
}
static inline int
end_exclusive(ompi_osc_portals4_module_t *module,
int target)
{
int ret;
ret = lk_write64(module, target, LOCK_UNLOCKED);
return ret;
}
static inline int
start_shared(ompi_osc_portals4_module_t *module,
int target)
{
int64_t result;
int ret;
while (true) {
ret = lk_add64(module, target, 1, &result);
if (OMPI_SUCCESS != ret) return ret;
if (result > (int64_t)LOCK_EXCLUSIVE) {
if (LOCK_ILLEGAL == (LOCK_ILLEGAL & result)) return MPI_ERR_RMA_SYNC;
ret = lk_add64(module, target, -1, &result);
if (OMPI_SUCCESS != ret) return ret;
} else {
break;
}
}
return OMPI_SUCCESS;
}
static inline int
end_shared(ompi_osc_portals4_module_t *module,
int target)
{
int64_t result;
int ret;
ret = lk_add64(module, target, -1, &result);
return ret;
}
int
ompi_osc_portals4_lock(int lock_type,
int target,
int assert,
struct ompi_win_t *win)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
ompi_osc_portals4_outstanding_lock_t* lock;
int ret;
lock = OBJ_NEW(ompi_osc_portals4_outstanding_lock_t);
lock->target = target;
if (0 == (assert & MPI_MODE_NOCHECK)) {
if (MPI_LOCK_EXCLUSIVE == lock_type) {
lock->lock_type = lock_exclusive;
ret = start_exclusive(module, target);
} else {
lock->lock_type = lock_shared;
ret = start_shared(module, target);
}
} else {
lock->lock_type = lock_nocheck;
ret = OMPI_SUCCESS;
}
if (OMPI_SUCCESS == ret) {
opal_list_append(&module->outstanding_locks, &lock->super);
} else {
OBJ_RELEASE(lock);
}
return ret;
}
int
ompi_osc_portals4_unlock(int target,
struct ompi_win_t *win)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
ompi_osc_portals4_outstanding_lock_t *lock = NULL, *item;
int ret;
OPAL_LIST_FOREACH(item, &module->outstanding_locks,
ompi_osc_portals4_outstanding_lock_t) {
if (item->target == target) {
lock = item;
break;
}
}
if (NULL != item) {
opal_list_remove_item(&module->outstanding_locks, &lock->super);
} else {
return MPI_ERR_RMA_SYNC;
}
ret = ompi_osc_portals4_complete_all(module);
if (ret != OMPI_SUCCESS) return ret;
if (lock->lock_type == lock_exclusive) {
ret = end_exclusive(module, target);
} else if (lock->lock_type == lock_shared) {
ret = end_shared(module, target);
} else {
ret = OMPI_SUCCESS;
}
OBJ_RELEASE(lock);
return ret;
}
int
ompi_osc_portals4_lock_all(int assert,
struct ompi_win_t *win)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
ompi_osc_portals4_outstanding_lock_t* lock;
int ret = OMPI_SUCCESS;
lock = OBJ_NEW(ompi_osc_portals4_outstanding_lock_t);
lock->target = -1;
if (0 == (assert & MPI_MODE_NOCHECK)) {
int i, comm_size;
lock->lock_type = lock_shared;
comm_size = ompi_comm_size(module->comm);
for (i = 0 ; i < comm_size ; ++i) {
ret |= start_shared(module, i);
}
} else {
lock->lock_type = lock_nocheck;
ret = OMPI_SUCCESS;
}
if (OMPI_SUCCESS == ret) {
opal_list_append(&module->outstanding_locks, &lock->super);
} else {
OBJ_RELEASE(lock);
}
return ret;
}
int
ompi_osc_portals4_unlock_all(struct ompi_win_t *win)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
ompi_osc_portals4_outstanding_lock_t *lock = NULL, *item;
int ret;
OPAL_LIST_FOREACH(item, &module->outstanding_locks,
ompi_osc_portals4_outstanding_lock_t) {
if (item->target == -1) {
lock = item;
break;
}
}
if (NULL != item) {
opal_list_remove_item(&module->outstanding_locks, &lock->super);
} else {
return MPI_ERR_RMA_SYNC;
}
ret = ompi_osc_portals4_complete_all(module);
if (ret != OMPI_SUCCESS) return ret;
if (lock->lock_type == lock_shared) {
int i, comm_size;
comm_size = ompi_comm_size(module->comm);
for (i = 0 ; i < comm_size ; ++i) {
ret |= end_shared(module, i);
}
}
OBJ_RELEASE(lock);
return OMPI_SUCCESS;
}
int
ompi_osc_portals4_sync(struct ompi_win_t *win)
{
/* Not sure this is strictly necessary, but why not? */
opal_atomic_mb();
PtlAtomicSync();
return OMPI_SUCCESS;
}
int
ompi_osc_portals4_flush(int target,
struct ompi_win_t *win)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
return ompi_osc_portals4_complete_all(module);
}
int
ompi_osc_portals4_flush_all(struct ompi_win_t *win)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
return ompi_osc_portals4_complete_all(module);
}
int
ompi_osc_portals4_flush_local(int target,
struct ompi_win_t *win)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
return ompi_osc_portals4_complete_all(module);
}
int
ompi_osc_portals4_flush_local_all(struct ompi_win_t *win)
{
ompi_osc_portals4_module_t *module =
(ompi_osc_portals4_module_t*) win->w_osc_module;
return ompi_osc_portals4_complete_all(module);
}

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/request/request.h"
#include "ompi/mca/osc/osc.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
#include "osc_portals4.h"
#include "osc_portals4_request.h"
static int
request_cancel(struct ompi_request_t *request, int complete)
{
return MPI_ERR_REQUEST;
}
static int
request_free(struct ompi_request_t **ompi_req)
{
ompi_osc_portals4_request_t *request =
(ompi_osc_portals4_request_t*) *ompi_req;
if (true != request->super.req_complete) {
return MPI_ERR_REQUEST;
}
OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
*ompi_req = MPI_REQUEST_NULL;
return OMPI_SUCCESS;
}
static
void
request_construct(ompi_osc_portals4_request_t *request)
{
request->super.req_type = OMPI_REQUEST_WIN;
request->super.req_status._cancelled = 0;
request->super.req_free = request_free;
request->super.req_cancel = request_cancel;
}
OBJ_CLASS_INSTANCE(ompi_osc_portals4_request_t,
ompi_request_t,
request_construct,
NULL);

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2011-2013 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OSC_PORTALS4_REQUEST_H
#define OSC_PORTALS4_REQUEST_H
#include "ompi/request/request.h"
struct ompi_osc_portals4_request_t {
ompi_request_t super;
int32_t ops_expected;
volatile int32_t ops_committed;
};
typedef struct ompi_osc_portals4_request_t ompi_osc_portals4_request_t;
OBJ_CLASS_DECLARATION(ompi_osc_portals4_request_t);
#define OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, req) \
do { \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_WAIT_MT(&mca_osc_portals4_component.requests, \
item); \
req = (ompi_osc_portals4_request_t*) item; \
OMPI_REQUEST_INIT(&req->super, false); \
req->super.req_mpi_object.win = win; \
req->super.req_complete = false; \
req->super.req_state = OMPI_REQUEST_ACTIVE; \
req->ops_expected = 0; \
req->ops_committed = 0; \
} while (0)
#define OMPI_OSC_PORTALS4_REQUEST_RETURN(req) \
do { \
OMPI_REQUEST_FINI(&request->super); \
OMPI_FREE_LIST_RETURN_MT(&mca_osc_portals4_component.requests, \
(ompi_free_list_item_t*) req); \
} while (0)
#endif

View File

@ -1,55 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
pt2pt_sources = \
osc_pt2pt.h \
osc_pt2pt.c \
osc_pt2pt_buffer.h \
osc_pt2pt_buffer.c \
osc_pt2pt_comm.c \
osc_pt2pt_component.c \
osc_pt2pt_data_move.h \
osc_pt2pt_data_move.c \
osc_pt2pt_header.h \
osc_pt2pt_longreq.h \
osc_pt2pt_longreq.c \
osc_pt2pt_replyreq.h \
osc_pt2pt_replyreq.c \
osc_pt2pt_sendreq.h \
osc_pt2pt_sendreq.c \
osc_pt2pt_sync.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_ompi_osc_pt2pt_DSO
component_noinst =
component_install = mca_osc_pt2pt.la
else
component_noinst = libmca_osc_pt2pt.la
component_install =
endif
mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_osc_pt2pt_la_SOURCES = $(pt2pt_sources)
mca_osc_pt2pt_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_osc_pt2pt_la_SOURCES = $(pt2pt_sources)
libmca_osc_pt2pt_la_LDFLAGS = -module -avoid-version

View File

@ -1,81 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "osc_pt2pt.h"
#include "osc_pt2pt_sendreq.h"
#include "ompi/mca/osc/base/base.h"
#include "opal/threads/mutex.h"
#include "ompi/win/win.h"
#include "ompi/communicator/communicator.h"
#include "ompi/request/request.h"
#include "mpi.h"
int
ompi_osc_pt2pt_module_free(ompi_win_t *win)
{
int ret = OMPI_SUCCESS;
ompi_osc_pt2pt_module_t *module = P2P_MODULE(win);
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"pt2pt component destroying window with id %d",
ompi_comm_get_cid(module->p2p_comm));
/* finish with a barrier */
if (ompi_group_size(win->w_group) > 1) {
ret = module->p2p_comm->c_coll.coll_barrier(module->p2p_comm,
module->p2p_comm->c_coll.coll_barrier_module);
}
win->w_osc_module = NULL;
OBJ_DESTRUCT(&module->p2p_unlocks_pending);
OBJ_DESTRUCT(&module->p2p_locks_pending);
OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_acc_lock);
OBJ_DESTRUCT(&module->p2p_cond);
OBJ_DESTRUCT(&module->p2p_lock);
if (NULL != module->p2p_sc_remote_ranks) {
free(module->p2p_sc_remote_ranks);
}
if (NULL != module->p2p_sc_remote_active_ranks) {
free(module->p2p_sc_remote_active_ranks);
}
if (NULL != module->p2p_fence_coll_counts) {
free(module->p2p_fence_coll_counts);
}
if (NULL != module->p2p_copy_num_pending_sendreqs) {
free(module->p2p_copy_num_pending_sendreqs);
}
if (NULL != module->p2p_num_pending_sendreqs) {
free(module->p2p_num_pending_sendreqs);
}
if (NULL != module->p2p_comm) ompi_comm_free(&module->p2p_comm);
#if OPAL_ENABLE_DEBUG
memset(module, 0, sizeof(ompi_osc_base_module_t));
#endif
if (NULL != module) free(module);
return ret;
}

View File

@ -1,253 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_OSC_PT2PT_H
#define OMPI_OSC_PT2PT_H
#include "ompi_config.h"
#include "opal/class/opal_list.h"
#include "opal/class/opal_free_list.h"
#include "opal/class/opal_hash_table.h"
#include "opal/threads/threads.h"
#include "ompi/win/win.h"
#include "ompi/communicator/communicator.h"
#include "ompi/request/request.h"
#include "ompi/mca/osc/osc.h"
BEGIN_C_DECLS
#define CONTROL_MSG_TAG (-200)
struct ompi_osc_pt2pt_component_t {
/** Extend the basic osc component interface */
ompi_osc_base_component_t super;
/** max size of eager message */
unsigned long long p2p_c_eager_size;
/** free list of ompi_osc_pt2pt_sendreq_t structures */
opal_free_list_t p2p_c_sendreqs;
/** free list of ompi_osc_pt2pt_replyreq_t structures */
opal_free_list_t p2p_c_replyreqs;
/** free list of ompi_osc_pt2pt_longreq_t structures */
opal_free_list_t p2p_c_longreqs;
/** free list for eager / control meessages */
opal_free_list_t p2p_c_buffers;
};
typedef struct ompi_osc_pt2pt_component_t ompi_osc_pt2pt_component_t;
struct ompi_osc_pt2pt_module_t {
/** Extend the basic osc module interface */
ompi_osc_base_module_t super;
/** lock access to data structures in the current module */
opal_mutex_t p2p_lock;
/** condition variable for access to current module */
opal_condition_t p2p_cond;
/** lock for "atomic" window updates from reductions */
opal_mutex_t p2p_acc_lock;
/** pointer back to window */
ompi_win_t *p2p_win;
/** communicator created with this window */
ompi_communicator_t *p2p_comm;
/** list of ompi_osc_pt2pt_sendreq_t structures, and includes all
requests for this access epoch that have not already been
started. p2p_lock must be held when modifying this field. */
opal_list_t p2p_pending_sendreqs;
/** list of unsigned int counters for the number of requests to a
particular rank in p2p_comm for this access epoc. p2p_lock
must be held when modifying this field */
unsigned int *p2p_num_pending_sendreqs;
/** For MPI_Fence synchronization, the number of messages to send
in epoch. For Start/Complete, the number of updates for this
Complete. For lock, the number of
messages waiting for completion on on the origin side. Not
protected by p2p_lock - must use atomic counter operations. */
int32_t p2p_num_pending_out;
/** For MPI_Fence synchronization, the number of expected incoming
messages. For Post/Wait, the number of expected updates from
complete. For lock, the number of messages on the passive side
we are waiting for. Not protected by p2p_lock - must use
atomic counter operations. */
int32_t p2p_num_pending_in;
/** Number of "ping" messages from the remote post group we've
received */
int32_t p2p_num_post_msgs;
/** Number of "count" messages from the remote complete group
we've received */
int32_t p2p_num_complete_msgs;
/** cyclic counter for a unique tag for long messages. Not
protected by the p2p_lock - must use create_send_tag() to
create a send tag */
volatile int32_t p2p_tag_counter;
opal_list_t p2p_copy_pending_sendreqs;
unsigned int *p2p_copy_num_pending_sendreqs;
/* ********************* FENCE data ************************ */
/* an array of <sizeof(p2p_comm)> ints, each containing the value
1. */
int *p2p_fence_coll_counts;
/* ********************* PWSC data ************************ */
struct ompi_group_t *p2p_pw_group;
struct ompi_group_t *p2p_sc_group;
bool *p2p_sc_remote_active_ranks;
int *p2p_sc_remote_ranks;
/* ********************* LOCK data ************************ */
int32_t p2p_lock_status; /* one of 0, MPI_LOCK_EXCLUSIVE, MPI_LOCK_SHARED */
int32_t p2p_shared_count;
opal_list_t p2p_locks_pending;
opal_list_t p2p_unlocks_pending;
int32_t p2p_lock_received_ack;
};
typedef struct ompi_osc_pt2pt_module_t ompi_osc_pt2pt_module_t;
OMPI_MODULE_DECLSPEC extern ompi_osc_pt2pt_component_t mca_osc_pt2pt_component;
/**
* Helper macro for grabbing the module structure from a window instance
*/
#define P2P_MODULE(win) ((ompi_osc_pt2pt_module_t*) win->w_osc_module)
/*
* Component functions
*/
int ompi_osc_pt2pt_component_init(bool enable_progress_threads,
bool enable_mpi_threads);
int ompi_osc_pt2pt_component_finalize(void);
int ompi_osc_pt2pt_component_query(struct ompi_win_t *win,
struct ompi_info_t *info,
struct ompi_communicator_t *comm);
int ompi_osc_pt2pt_component_select(struct ompi_win_t *win,
struct ompi_info_t *info,
struct ompi_communicator_t *comm);
/* helper function that properly sets up request handling */
int ompi_osc_pt2pt_component_irecv(void *buf,
size_t count,
struct ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t *comm,
struct ompi_request_t **request,
ompi_request_complete_fn_t callback,
void *data);
int ompi_osc_pt2pt_component_isend(void *buf,
size_t count,
struct ompi_datatype_t *datatype,
int dest,
int tag,
struct ompi_communicator_t *comm,
struct ompi_request_t **request,
ompi_request_complete_fn_t callback,
void *data);
/*
* Module interface function types
*/
int ompi_osc_pt2pt_module_free(struct ompi_win_t *win);
int ompi_osc_pt2pt_module_put(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win);
int ompi_osc_pt2pt_module_accumulate(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_op_t *op,
struct ompi_win_t *win);
int ompi_osc_pt2pt_module_get(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win);
int ompi_osc_pt2pt_module_fence(int assert, struct ompi_win_t *win);
int ompi_osc_pt2pt_module_start(struct ompi_group_t *group,
int assert,
struct ompi_win_t *win);
int ompi_osc_pt2pt_module_complete(struct ompi_win_t *win);
int ompi_osc_pt2pt_module_post(struct ompi_group_t *group,
int assert,
struct ompi_win_t *win);
int ompi_osc_pt2pt_module_wait(struct ompi_win_t *win);
int ompi_osc_pt2pt_module_test(struct ompi_win_t *win,
int *flag);
int ompi_osc_pt2pt_module_lock(int lock_type,
int target,
int assert,
struct ompi_win_t *win);
int ompi_osc_pt2pt_module_unlock(int target,
struct ompi_win_t *win);
/*
* passive side sync interface functions
*/
int ompi_osc_pt2pt_passive_lock(ompi_osc_pt2pt_module_t *module,
int32_t origin,
int32_t lock_type);
int ompi_osc_pt2pt_passive_unlock(ompi_osc_pt2pt_module_t *module,
int32_t origin,
int32_t count);
int ompi_osc_pt2pt_passive_unlock_complete(ompi_osc_pt2pt_module_t *module);
END_C_DECLS
#endif /* OMPI_OSC_PT2PT_H */

View File

@ -1,43 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/class/opal_free_list.h"
#include "opal/types.h"
#include "osc_pt2pt_buffer.h"
static void ompi_osc_pt2pt_buffer_construct(ompi_osc_pt2pt_buffer_t *buf)
{
/* adjust payload location to account for alignment issues */
buf->payload = (void* )(((char*) buf) +
sizeof(ompi_osc_pt2pt_buffer_t) +
(sizeof(ompi_osc_pt2pt_buffer_t) % sizeof(ompi_ptr_t)));
}
static void ompi_osc_pt2pt_buffer_destruct(ompi_osc_pt2pt_buffer_t *buf)
{
buf->payload = NULL;
}
OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_buffer_t, opal_free_list_item_t,
ompi_osc_pt2pt_buffer_construct,
ompi_osc_pt2pt_buffer_destruct);

View File

@ -1,38 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_OSC_PT2PT_BUFFER_H
#define OMPI_OSC_PT2PT_BUFFER_H
#include "opal/class/opal_free_list.h"
#include "ompi/request/request.h"
BEGIN_C_DECLS
struct ompi_osc_pt2pt_buffer_t {
ompi_free_list_item_t super;
ompi_request_t *request;
void *data;
void *payload;
size_t len;
};
typedef struct ompi_osc_pt2pt_buffer_t ompi_osc_pt2pt_buffer_t;
OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_buffer_t);
END_C_DECLS
#endif

View File

@ -1,202 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "mpi.h"
#include <stdio.h>
#include "opal/class/opal_list.h"
#include "opal/threads/mutex.h"
#include "osc_pt2pt.h"
#include "osc_pt2pt_sendreq.h"
#include "osc_pt2pt_header.h"
#include "osc_pt2pt_data_move.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/op/op.h"
#include "ompi/win/win.h"
#include "ompi/memchecker.h"
static int
enqueue_sendreq(ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_sendreq_t *sendreq)
{
OPAL_THREAD_LOCK(&(module->p2p_lock));
opal_list_append(&(module->p2p_pending_sendreqs),
(opal_list_item_t*) sendreq);
module->p2p_num_pending_sendreqs[sendreq->req_target_rank]++;
OPAL_THREAD_UNLOCK(&(module->p2p_lock));
return OMPI_SUCCESS;
}
int
ompi_osc_pt2pt_module_accumulate(void *origin_addr, int origin_count,
struct ompi_datatype_t *origin_dt,
int target, OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_op_t *op, ompi_win_t *win)
{
int ret;
ompi_osc_pt2pt_sendreq_t *sendreq;
if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) &&
(!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) {
return MPI_ERR_RMA_SYNC;
}
if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) {
/* well, we're definitely in an access epoch now */
ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH |
OMPI_WIN_EXPOSE_EPOCH);
}
/* shortcut 0 count case */
if (0 == origin_count || 0 == target_count) {
return OMPI_SUCCESS;
}
/* create sendreq */
ret = ompi_osc_pt2pt_sendreq_alloc_init(OMPI_OSC_PT2PT_ACC,
origin_addr,
origin_count,
origin_dt,
target,
target_disp,
target_count,
target_dt,
P2P_MODULE(win),
&sendreq);
MEMCHECKER(
memchecker_convertor_call(&opal_memchecker_base_mem_noaccess,
&sendreq->req_origin_convertor);
);
if (OMPI_SUCCESS != ret) return ret;
sendreq->req_op_id = op->o_f_to_c_index;
/* enqueue sendreq */
ret = enqueue_sendreq(P2P_MODULE(win), sendreq);
return ret;
}
int
ompi_osc_pt2pt_module_get(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
ompi_win_t *win)
{
int ret;
ompi_osc_pt2pt_sendreq_t *sendreq;
if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) &&
(!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) {
return MPI_ERR_RMA_SYNC;
}
if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) {
/* well, we're definitely in an access epoch now */
ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH |
OMPI_WIN_EXPOSE_EPOCH);
}
/* shortcut 0 count case */
if (0 == origin_count || 0 == target_count) {
return OMPI_SUCCESS;
}
/* create sendreq */
ret = ompi_osc_pt2pt_sendreq_alloc_init(OMPI_OSC_PT2PT_GET,
origin_addr,
origin_count,
origin_dt,
target,
target_disp,
target_count,
target_dt,
P2P_MODULE(win),
&sendreq);
MEMCHECKER(
memchecker_convertor_call(&opal_memchecker_base_mem_noaccess,
&sendreq->req_origin_convertor);
);
if (OMPI_SUCCESS != ret) return ret;
/* enqueue sendreq */
ret = enqueue_sendreq(P2P_MODULE(win), sendreq);
return ret;
}
int
ompi_osc_pt2pt_module_put(void *origin_addr, int origin_count,
struct ompi_datatype_t *origin_dt,
int target, OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt, ompi_win_t *win)
{
int ret;
ompi_osc_pt2pt_sendreq_t *sendreq;
if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) &&
(!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) {
return MPI_ERR_RMA_SYNC;
}
if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) {
/* well, we're definitely in an access epoch now */
ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH |
OMPI_WIN_EXPOSE_EPOCH);
}
/* shortcut 0 count case */
if (0 == origin_count || 0 == target_count) {
return OMPI_SUCCESS;
}
/* create sendreq */
ret = ompi_osc_pt2pt_sendreq_alloc_init(OMPI_OSC_PT2PT_PUT,
origin_addr,
origin_count,
origin_dt,
target,
target_disp,
target_count,
target_dt,
P2P_MODULE(win),
&sendreq);
MEMCHECKER(
memchecker_convertor_call(&opal_memchecker_base_mem_noaccess,
&sendreq->req_origin_convertor);
);
if (OMPI_SUCCESS != ret) return ret;
/* enqueue sendreq */
ret = enqueue_sendreq(P2P_MODULE(win), sendreq);
return ret;
}

View File

@ -1,661 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2010 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2006-2008 University of Houston. All rights reserved.
* Copyright (c) 2010 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "osc_pt2pt.h"
#include "osc_pt2pt_sendreq.h"
#include "osc_pt2pt_replyreq.h"
#include "osc_pt2pt_header.h"
#include "osc_pt2pt_data_move.h"
#include "osc_pt2pt_buffer.h"
#include "opal/threads/mutex.h"
#include "ompi/info/info.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/osc/osc.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
#include "ompi/mca/pml/pml.h"
static int component_register(void);
static int component_fragment_cb(ompi_request_t *request);
ompi_osc_pt2pt_component_t mca_osc_pt2pt_component = {
{ /* ompi_osc_base_component_t */
{ /* ompi_base_component_t */
OMPI_OSC_BASE_VERSION_2_0_0,
"pt2pt",
OMPI_MAJOR_VERSION, /* MCA component major version */
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
NULL,
NULL,
NULL,
component_register
},
{ /* mca_base_component_data */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
ompi_osc_pt2pt_component_init,
ompi_osc_pt2pt_component_query,
ompi_osc_pt2pt_component_select,
ompi_osc_pt2pt_component_finalize
}
};
ompi_osc_pt2pt_module_t ompi_osc_pt2pt_module_template = {
{
ompi_osc_pt2pt_module_free,
ompi_osc_pt2pt_module_put,
ompi_osc_pt2pt_module_get,
ompi_osc_pt2pt_module_accumulate,
ompi_osc_pt2pt_module_fence,
ompi_osc_pt2pt_module_start,
ompi_osc_pt2pt_module_complete,
ompi_osc_pt2pt_module_post,
ompi_osc_pt2pt_module_wait,
ompi_osc_pt2pt_module_test,
ompi_osc_pt2pt_module_lock,
ompi_osc_pt2pt_module_unlock,
}
};
static int
component_register(void)
{
mca_osc_pt2pt_component.p2p_c_eager_size = 16 * 1024;
(void) mca_base_component_var_register(&mca_osc_pt2pt_component.super.osc_version,
"eager_limit",
"Max size of eagerly sent data",
MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_osc_pt2pt_component.p2p_c_eager_size);
return OMPI_SUCCESS;
}
int
ompi_osc_pt2pt_component_init(bool enable_progress_threads,
bool enable_mpi_threads)
{
size_t aligned_size;
OBJ_CONSTRUCT(&mca_osc_pt2pt_component.p2p_c_sendreqs, opal_free_list_t);
opal_free_list_init(&mca_osc_pt2pt_component.p2p_c_sendreqs,
sizeof(ompi_osc_pt2pt_sendreq_t),
OBJ_CLASS(ompi_osc_pt2pt_sendreq_t),
1, -1, 1);
OBJ_CONSTRUCT(&mca_osc_pt2pt_component.p2p_c_replyreqs, opal_free_list_t);
opal_free_list_init(&mca_osc_pt2pt_component.p2p_c_replyreqs,
sizeof(ompi_osc_pt2pt_replyreq_t),
OBJ_CLASS(ompi_osc_pt2pt_replyreq_t),
1, -1, 1);
OBJ_CONSTRUCT(&mca_osc_pt2pt_component.p2p_c_longreqs, opal_free_list_t);
opal_free_list_init(&mca_osc_pt2pt_component.p2p_c_longreqs,
sizeof(ompi_osc_pt2pt_longreq_t),
OBJ_CLASS(ompi_osc_pt2pt_longreq_t),
1, -1, 1);
/* adjust size to be multiple of ompi_ptr_t to avoid alignment issues*/
aligned_size = sizeof(ompi_osc_pt2pt_buffer_t) +
(sizeof(ompi_osc_pt2pt_buffer_t) % sizeof(ompi_ptr_t)) +
mca_osc_pt2pt_component.p2p_c_eager_size;
OBJ_CONSTRUCT(&mca_osc_pt2pt_component.p2p_c_buffers, opal_free_list_t);
opal_free_list_init(&mca_osc_pt2pt_component.p2p_c_buffers,
aligned_size,
OBJ_CLASS(ompi_osc_pt2pt_buffer_t),
1, -1, 1);
return OMPI_SUCCESS;
}
int
ompi_osc_pt2pt_component_finalize(void)
{
OBJ_DESTRUCT(&mca_osc_pt2pt_component.p2p_c_buffers);
OBJ_DESTRUCT(&mca_osc_pt2pt_component.p2p_c_longreqs);
OBJ_DESTRUCT(&mca_osc_pt2pt_component.p2p_c_replyreqs);
OBJ_DESTRUCT(&mca_osc_pt2pt_component.p2p_c_sendreqs);
return OMPI_SUCCESS;
}
int
ompi_osc_pt2pt_component_query(ompi_win_t *win,
ompi_info_t *info,
ompi_communicator_t *comm)
{
/* we can always run - return a low priority */
return 5;
}
int
ompi_osc_pt2pt_component_select(ompi_win_t *win,
ompi_info_t *info,
ompi_communicator_t *comm)
{
ompi_osc_pt2pt_module_t *module = NULL;
int ret, i;
ompi_osc_pt2pt_buffer_t *buffer = NULL;
opal_free_list_item_t *item = NULL;
char *tmp = NULL;
/* create module structure */
module = (ompi_osc_pt2pt_module_t*)
calloc(1, sizeof(ompi_osc_pt2pt_module_t));
if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
/* fill in the function pointer part */
memcpy(module, &ompi_osc_pt2pt_module_template,
sizeof(ompi_osc_base_module_t));
/* initialize the p2p part */
OBJ_CONSTRUCT(&(module->p2p_lock), opal_mutex_t);
OBJ_CONSTRUCT(&(module->p2p_cond), opal_condition_t);
OBJ_CONSTRUCT(&(module->p2p_acc_lock), opal_mutex_t);
OBJ_CONSTRUCT(&module->p2p_pending_sendreqs, opal_list_t);
OBJ_CONSTRUCT(&(module->p2p_copy_pending_sendreqs), opal_list_t);
OBJ_CONSTRUCT(&(module->p2p_locks_pending), opal_list_t);
OBJ_CONSTRUCT(&(module->p2p_unlocks_pending), opal_list_t);
module->p2p_win = win;
ret = ompi_comm_dup(comm, &(module->p2p_comm));
if (ret != OMPI_SUCCESS) goto cleanup;
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"pt2pt component creating window with id %d",
ompi_comm_get_cid(module->p2p_comm));
asprintf(&tmp, "%d", ompi_comm_get_cid(module->p2p_comm));
ompi_win_set_name(win, tmp);
free(tmp);
module->p2p_num_pending_sendreqs = (unsigned int*)
malloc(sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_num_pending_sendreqs) {
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto cleanup;
}
memset(module->p2p_num_pending_sendreqs, 0,
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
module->p2p_num_pending_out = 0;
module->p2p_num_pending_in = 0;
module->p2p_num_post_msgs = 0;
module->p2p_num_complete_msgs = 0;
module->p2p_tag_counter = 0;
module->p2p_copy_num_pending_sendreqs = (unsigned int*)
malloc(sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_copy_num_pending_sendreqs) {
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto cleanup;
}
memset(module->p2p_num_pending_sendreqs, 0,
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
/* fence data */
module->p2p_fence_coll_counts = (int*)
malloc(sizeof(int) * ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_fence_coll_counts) {
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto cleanup;
}
for (i = 0 ; i < ompi_comm_size(module->p2p_comm) ; ++i) {
module->p2p_fence_coll_counts[i] = 1;
}
/* pwsc data */
module->p2p_pw_group = NULL;
module->p2p_sc_group = NULL;
module->p2p_sc_remote_active_ranks = (bool*)
malloc(sizeof(bool) * ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_sc_remote_active_ranks) {
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto cleanup;
}
module->p2p_sc_remote_ranks = (int*)
malloc(sizeof(int) * ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_sc_remote_ranks) {
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
goto cleanup;
}
/* lock data */
module->p2p_lock_status = 0;
module->p2p_shared_count = 0;
module->p2p_lock_received_ack = 0;
/* fill in window information */
win->w_osc_module = (ompi_osc_base_module_t*) module;
/* sync memory - make sure all initialization completed */
opal_atomic_mb();
/* start up receive for protocol headers */
OPAL_FREE_LIST_GET(&mca_osc_pt2pt_component.p2p_c_buffers,
item, ret);
if (OMPI_SUCCESS != ret) goto cleanup;
buffer = (ompi_osc_pt2pt_buffer_t*) item;
buffer->data = (void*) module;
ret = ompi_osc_pt2pt_component_irecv(buffer->payload,
mca_osc_pt2pt_component.p2p_c_eager_size,
MPI_BYTE,
MPI_ANY_SOURCE,
CONTROL_MSG_TAG,
module->p2p_comm,
&(buffer->request),
component_fragment_cb,
buffer);
if (OMPI_SUCCESS != ret) goto cleanup;
return OMPI_SUCCESS;
cleanup:
OBJ_DESTRUCT(&module->p2p_unlocks_pending);
OBJ_DESTRUCT(&module->p2p_locks_pending);
OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_acc_lock);
OBJ_DESTRUCT(&module->p2p_cond);
OBJ_DESTRUCT(&module->p2p_lock);
if (NULL != buffer) {
OPAL_FREE_LIST_RETURN(&mca_osc_pt2pt_component.p2p_c_buffers, item);
}
if (NULL != module->p2p_sc_remote_ranks) {
free(module->p2p_sc_remote_ranks);
}
if (NULL != module->p2p_sc_remote_active_ranks) {
free(module->p2p_sc_remote_active_ranks);
}
if (NULL != module->p2p_fence_coll_counts) {
free(module->p2p_fence_coll_counts);
}
if (NULL != module->p2p_copy_num_pending_sendreqs) {
free(module->p2p_copy_num_pending_sendreqs);
}
if (NULL != module->p2p_num_pending_sendreqs) {
free(module->p2p_num_pending_sendreqs);
}
if (NULL != module->p2p_comm) ompi_comm_free(&module->p2p_comm);
#if OPAL_ENABLE_DEBUG
memset(module, 0, sizeof(ompi_osc_base_module_t));
#endif
if (NULL != module) free(module);
return ret;
}
/* dispatch for callback on message completion */
static int
component_fragment_cb(ompi_request_t *request)
{
int ret;
ompi_osc_pt2pt_buffer_t *buffer;
ompi_osc_pt2pt_module_t *module;
if (request->req_status._cancelled) {
opal_output_verbose(5, ompi_osc_base_framework.framework_output,
"pt2pt request was canceled");
return OMPI_ERR_NOT_AVAILABLE;
}
buffer = (ompi_osc_pt2pt_buffer_t*) request->req_complete_cb_data;
module = (ompi_osc_pt2pt_module_t*) buffer->data;
assert(request->req_status._ucount >= (int) sizeof(ompi_osc_pt2pt_base_header_t));
/* handle message */
switch (((ompi_osc_pt2pt_base_header_t*) buffer->payload)->hdr_type) {
case OMPI_OSC_PT2PT_HDR_PUT:
{
/* get our header and payload */
ompi_osc_pt2pt_send_header_t *header =
(ompi_osc_pt2pt_send_header_t*) buffer->payload;
void *payload = (void*) (header + 1);
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header);
}
#endif
if (!ompi_win_exposure_epoch(module->p2p_win)) {
if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) {
ompi_win_set_mode(module->p2p_win,
OMPI_WIN_FENCE |
OMPI_WIN_ACCESS_EPOCH |
OMPI_WIN_EXPOSE_EPOCH);
}
}
ret = ompi_osc_pt2pt_sendreq_recv_put(module, header, payload);
}
break;
case OMPI_OSC_PT2PT_HDR_ACC:
{
/* get our header and payload */
ompi_osc_pt2pt_send_header_t *header =
(ompi_osc_pt2pt_send_header_t*) buffer->payload;
void *payload = (void*) (header + 1);
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header);
}
#endif
if (!ompi_win_exposure_epoch(module->p2p_win)) {
if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) {
ompi_win_set_mode(module->p2p_win,
OMPI_WIN_FENCE |
OMPI_WIN_ACCESS_EPOCH |
OMPI_WIN_EXPOSE_EPOCH);
}
}
/* receive into temporary buffer */
ret = ompi_osc_pt2pt_sendreq_recv_accum(module, header, payload);
}
break;
case OMPI_OSC_PT2PT_HDR_GET:
{
/* get our header and payload */
ompi_osc_pt2pt_send_header_t *header =
(ompi_osc_pt2pt_send_header_t*) buffer->payload;
void *payload = (void*) (header + 1);
ompi_datatype_t *datatype;
ompi_osc_pt2pt_replyreq_t *replyreq;
ompi_proc_t *proc;
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header);
}
#endif
if (!ompi_win_exposure_epoch(module->p2p_win)) {
if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) {
ompi_win_set_mode(module->p2p_win,
OMPI_WIN_FENCE |
OMPI_WIN_ACCESS_EPOCH |
OMPI_WIN_EXPOSE_EPOCH);
}
}
/* create or get a pointer to our datatype */
proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
datatype = ompi_osc_base_datatype_create(proc, &payload);
if (NULL == datatype) {
opal_output(ompi_osc_base_framework.framework_output,
"Error recreating datatype. Aborting.");
ompi_mpi_abort(module->p2p_comm, 1, false);
}
/* create replyreq sendreq */
ret = ompi_osc_pt2pt_replyreq_alloc_init(module,
header->hdr_origin,
header->hdr_origin_sendreq,
header->hdr_target_disp,
header->hdr_target_count,
datatype,
&replyreq);
/* send replyreq */
ompi_osc_pt2pt_replyreq_send(module, replyreq);
/* sendreq does the right retain, so we can release safely */
OBJ_RELEASE(datatype);
}
break;
case OMPI_OSC_PT2PT_HDR_REPLY:
{
ompi_osc_pt2pt_reply_header_t *header =
(ompi_osc_pt2pt_reply_header_t*) buffer->payload;
void *payload = (void*) (header + 1);
ompi_osc_pt2pt_sendreq_t *sendreq;
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
OMPI_OSC_PT2PT_REPLY_HDR_NTOH(*header);
}
#endif
/* get original sendreq pointer */
sendreq = (ompi_osc_pt2pt_sendreq_t*) header->hdr_origin_sendreq.pval;
module = sendreq->req_module;
/* receive data */
ompi_osc_pt2pt_replyreq_recv(module, sendreq, header, payload);
}
break;
case OMPI_OSC_PT2PT_HDR_POST:
{
int32_t count;
OPAL_THREAD_LOCK(&module->p2p_lock);
count = (module->p2p_num_post_msgs -= 1);
OPAL_THREAD_UNLOCK(&module->p2p_lock);
if (count == 0) opal_condition_broadcast(&module->p2p_cond);
}
break;
case OMPI_OSC_PT2PT_HDR_COMPLETE:
{
ompi_osc_pt2pt_control_header_t *header =
(ompi_osc_pt2pt_control_header_t*) buffer->payload;
int32_t count;
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header);
}
#endif
/* we've heard from one more place, and have value reqs to
process */
OPAL_THREAD_LOCK(&module->p2p_lock);
count = (module->p2p_num_complete_msgs -= 1);
count += (module->p2p_num_pending_in += header->hdr_value[0]);
OPAL_THREAD_UNLOCK(&module->p2p_lock);
if (count == 0) opal_condition_broadcast(&module->p2p_cond);
}
break;
case OMPI_OSC_PT2PT_HDR_LOCK_REQ:
{
ompi_osc_pt2pt_control_header_t *header =
(ompi_osc_pt2pt_control_header_t*) buffer->payload;
int32_t count;
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header);
}
#endif
if (header->hdr_value[1] > 0) {
ompi_osc_pt2pt_passive_lock(module, header->hdr_value[0],
header->hdr_value[1]);
} else {
OPAL_THREAD_LOCK(&module->p2p_lock);
count = (module->p2p_lock_received_ack += 1);
OPAL_THREAD_UNLOCK(&module->p2p_lock);
if (count != 0) opal_condition_broadcast(&module->p2p_cond);
}
}
break;
case OMPI_OSC_PT2PT_HDR_UNLOCK_REQ:
{
ompi_osc_pt2pt_control_header_t *header =
(ompi_osc_pt2pt_control_header_t*) buffer->payload;
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header);
}
#endif
ompi_osc_pt2pt_passive_unlock(module, header->hdr_value[0],
header->hdr_value[1]);
}
break;
case OMPI_OSC_PT2PT_HDR_UNLOCK_REPLY:
{
int32_t count;
OPAL_THREAD_LOCK(&module->p2p_lock);
count = (module->p2p_num_pending_out -= 1);
OPAL_THREAD_UNLOCK(&module->p2p_lock);
if (count == 0) opal_condition_broadcast(&module->p2p_cond);
}
break;
default:
opal_output_verbose(5, ompi_osc_base_framework.framework_output,
"received one-sided packet for with unknown type");
}
ompi_request_free(&request);
ret = ompi_osc_pt2pt_component_irecv(buffer->payload,
mca_osc_pt2pt_component.p2p_c_eager_size,
MPI_BYTE,
MPI_ANY_SOURCE,
CONTROL_MSG_TAG,
module->p2p_comm,
&buffer->request,
component_fragment_cb,
buffer);
return ret;
}
int
ompi_osc_pt2pt_component_irecv(void *buf,
size_t count,
struct ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t *comm,
ompi_request_t **request,
ompi_request_complete_fn_t callback,
void *cbdata)
{
int ret;
bool missed_callback;
ompi_request_complete_fn_t tmp;
ret = MCA_PML_CALL(irecv(buf, count, datatype,
src, tag, comm, request));
if (OMPI_SUCCESS != ret) return ret;
/* lock the giant request mutex to update the callback data so
that the PML can't mark the request as complete while we're
updating the callback data, which means we can
deterministically ensure the callback is only fired once and
that we didn't miss it. */
OPAL_THREAD_LOCK(&ompi_request_lock);
(*request)->req_complete_cb = callback;
(*request)->req_complete_cb_data = cbdata;
missed_callback = (*request)->req_complete;
OPAL_THREAD_UNLOCK(&ompi_request_lock);
if (missed_callback) {
tmp = (*request)->req_complete_cb;
(*request)->req_complete_cb = NULL;
tmp(*request);
}
return OMPI_SUCCESS;
}
int
ompi_osc_pt2pt_component_isend(void *buf,
size_t count,
struct ompi_datatype_t *datatype,
int dest,
int tag,
struct ompi_communicator_t *comm,
ompi_request_t **request,
ompi_request_complete_fn_t callback,
void *cbdata)
{
int ret;
bool missed_callback;
ompi_request_complete_fn_t tmp;
ret = MCA_PML_CALL(isend(buf, count, datatype,
dest, tag, MCA_PML_BASE_SEND_STANDARD, comm, request));
if (OMPI_SUCCESS != ret) return ret;
/* lock the giant request mutex to update the callback data so
that the PML can't mark the request as complete while we're
updating the callback data, which means we can
deterministically ensure the callback is only fired once and
that we didn't miss it. */
OPAL_THREAD_LOCK(&ompi_request_lock);
(*request)->req_complete_cb = callback;
(*request)->req_complete_cb_data = cbdata;
missed_callback = (*request)->req_complete;
OPAL_THREAD_UNLOCK(&ompi_request_lock);
if (missed_callback) {
tmp = (*request)->req_complete_cb;
(*request)->req_complete_cb = NULL;
tmp(*request);
}
return OMPI_SUCCESS;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,55 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_MCA_OSC_PT2PT_DATA_MOVE_H
#define OMPI_MCA_OSC_PT2PT_DATA_MOVE_H
#include "osc_pt2pt_sendreq.h"
#include "osc_pt2pt_replyreq.h"
/* send a sendreq (the request from the origin for a Put, Get, or
Accumulate, including the payload for Put and Accumulate) */
int ompi_osc_pt2pt_sendreq_send(ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_sendreq_t *sendreq);
/* send a replyreq (the request from the target of a Get, with the
payload for the origin */
int ompi_osc_pt2pt_replyreq_send(ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_replyreq_t *replyreq);
/* receive the target side of a sendreq for a put, directly into the user's window */
int ompi_osc_pt2pt_sendreq_recv_put(ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_send_header_t *header,
void *payload);
/* receive the target side of a sendreq for an accumulate, possibly
using a temproart buffer, then calling the reduction functions */
int ompi_osc_pt2pt_sendreq_recv_accum(ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_send_header_t *header,
void *payload);
/* receive the origin side of a replyreq (the reply part of an
MPI_Get), directly into the user's window */
int ompi_osc_pt2pt_replyreq_recv(ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_sendreq_t *sendreq,
ompi_osc_pt2pt_reply_header_t *header,
void *payload);
int ompi_osc_pt2pt_control_send(ompi_osc_pt2pt_module_t *module,
ompi_proc_t *proc,
uint8_t type, int32_t value0, int32_t value1);
#endif

View File

@ -1,134 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_MCA_OSC_PT2PT_HDR_H
#define OMPI_MCA_OSC_PT2PT_HDR_H
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#include "opal/types.h"
#define OMPI_OSC_PT2PT_HDR_PUT 0x0001
#define OMPI_OSC_PT2PT_HDR_ACC 0x0002
#define OMPI_OSC_PT2PT_HDR_GET 0x0003
#define OMPI_OSC_PT2PT_HDR_REPLY 0x0004
#define OMPI_OSC_PT2PT_HDR_POST 0x0005
#define OMPI_OSC_PT2PT_HDR_COMPLETE 0x0006
#define OMPI_OSC_PT2PT_HDR_LOCK_REQ 0x0007
#define OMPI_OSC_PT2PT_HDR_UNLOCK_REQ 0x0008
#define OMPI_OSC_PT2PT_HDR_UNLOCK_REPLY 0x0009
#define OMPI_OSC_PT2PT_HDR_FLAG_NBO 0x0001
struct ompi_osc_pt2pt_base_header_t {
uint8_t hdr_type;
uint8_t hdr_flags;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t padding[2];
#endif
};
typedef struct ompi_osc_pt2pt_base_header_t ompi_osc_pt2pt_base_header_t;
#define OMPI_OSC_PT2PT_BASE_HDR_NTOH(h)
#define OMPI_OSC_PT2PT_BASE_HDR_HTON(h)
struct ompi_osc_pt2pt_send_header_t {
ompi_osc_pt2pt_base_header_t hdr_base;
int32_t hdr_origin;
ompi_ptr_t hdr_origin_sendreq;
int32_t hdr_origin_tag;
uint64_t hdr_target_disp;
int32_t hdr_target_count;
int32_t hdr_target_op;
int32_t hdr_msg_length; /* 0 if payload is not included */
};
typedef struct ompi_osc_pt2pt_send_header_t ompi_osc_pt2pt_send_header_t;
#define OMPI_OSC_PT2PT_SEND_HDR_HTON(hdr) \
do { \
OMPI_OSC_PT2PT_BASE_HDR_HTON((hdr).hdr_base) \
(hdr).hdr_origin = htonl((hdr).hdr_origin); \
(hdr).hdr_origin_tag = htonl((hdr).hdr_origin_tag); \
(hdr).hdr_target_disp = hton64((hdr).hdr_target_disp); \
(hdr).hdr_target_count = htonl((hdr).hdr_target_count); \
(hdr).hdr_target_op = htonl((hdr).hdr_target_op); \
(hdr).hdr_msg_length = htonl((hdr).hdr_msg_length); \
} while (0)
#define OMPI_OSC_PT2PT_SEND_HDR_NTOH(hdr) \
do { \
OMPI_OSC_PT2PT_BASE_HDR_NTOH((hdr).hdr_base) \
(hdr).hdr_origin = ntohl((hdr).hdr_origin); \
(hdr).hdr_origin_tag = ntohl((hdr).hdr_origin_tag); \
(hdr).hdr_target_disp = ntoh64((hdr).hdr_target_disp); \
(hdr).hdr_target_count = ntohl((hdr).hdr_target_count); \
(hdr).hdr_target_op = ntohl((hdr).hdr_target_op); \
(hdr).hdr_msg_length = ntohl((hdr).hdr_msg_length); \
} while (0)
struct ompi_osc_pt2pt_reply_header_t {
ompi_osc_pt2pt_base_header_t hdr_base;
int32_t hdr_target_tag;
ompi_ptr_t hdr_origin_sendreq;
int32_t hdr_msg_length;
};
typedef struct ompi_osc_pt2pt_reply_header_t ompi_osc_pt2pt_reply_header_t;
#define OMPI_OSC_PT2PT_REPLY_HDR_HTON(hdr) \
do { \
OMPI_OSC_PT2PT_BASE_HDR_HTON((hdr).hdr_base) \
(hdr).hdr_target_tag = htonl((hdr).hdr_target_tag); \
(hdr).hdr_msg_length = htonl((hdr).hdr_msg_length); \
} while (0)
#define OMPI_OSC_PT2PT_REPLY_HDR_NTOH(hdr) \
do { \
OMPI_OSC_PT2PT_BASE_HDR_NTOH((hdr).hdr_base) \
(hdr).hdr_target_tag = ntohl((hdr).hdr_target_tag); \
(hdr).hdr_msg_length = ntohl((hdr).hdr_msg_length); \
} while (0)
struct ompi_osc_pt2pt_control_header_t {
ompi_osc_pt2pt_base_header_t hdr_base;
int32_t hdr_value[2];
};
typedef struct ompi_osc_pt2pt_control_header_t ompi_osc_pt2pt_control_header_t;
#define OMPI_OSC_PT2PT_CONTROL_HDR_HTON(hdr) \
do { \
OMPI_OSC_PT2PT_BASE_HDR_HTON((hdr).hdr_base) \
(hdr).hdr_value[0] = htonl((hdr).hdr_value[0]); \
(hdr).hdr_value[1] = htonl((hdr).hdr_value[1]); \
} while (0)
#define OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(hdr) \
do { \
OMPI_OSC_PT2PT_BASE_HDR_NTOH((hdr).hdr_base) \
(hdr).hdr_value[0] = ntohl((hdr).hdr_value[0]); \
(hdr).hdr_value[1] = ntohl((hdr).hdr_value[1]); \
} while (0)
#endif /* OMPI_MCA_OSC_PT2PT_HDR_H */

View File

@ -1,23 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "osc_pt2pt_longreq.h"
OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_longreq_t, opal_free_list_item_t,
NULL, NULL);

View File

@ -1,64 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OSC_PT2PT_LONGREQ_H
#define OSC_PT2PT_LONGREQ_H
#include "opal/class/opal_free_list.h"
#include "osc_pt2pt.h"
struct ompi_osc_pt2pt_longreq_t {
opal_free_list_item_t super;
struct ompi_request_t *req_pml_request; /* PML request */
union {
struct ompi_osc_pt2pt_sendreq_t *req_sendreq;
struct ompi_osc_pt2pt_replyreq_t *req_replyreq;
struct ompi_osc_pt2pt_send_header_t *req_sendhdr;
} req_basereq;
/* This may not always be filled in... */
struct ompi_osc_pt2pt_module_t *req_module;
struct ompi_op_t *req_op;
struct ompi_datatype_t *req_datatype;
};
typedef struct ompi_osc_pt2pt_longreq_t ompi_osc_pt2pt_longreq_t;
OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_longreq_t);
static inline int
ompi_osc_pt2pt_longreq_alloc(ompi_osc_pt2pt_longreq_t **longreq)
{
opal_free_list_item_t *item;
int ret;
OPAL_FREE_LIST_GET(&mca_osc_pt2pt_component.p2p_c_longreqs,
item, ret);
*longreq = (ompi_osc_pt2pt_longreq_t*) item;
return ret;
}
static inline int
ompi_osc_pt2pt_longreq_free(ompi_osc_pt2pt_longreq_t *longreq)
{
OPAL_FREE_LIST_RETURN(&mca_osc_pt2pt_component.p2p_c_longreqs,
&longreq->super);
return OMPI_SUCCESS;
}
#endif

View File

@ -1,80 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "osc_pt2pt_replyreq.h"
#include "opal/class/opal_list.h"
#include "opal/datatype/opal_convertor.h"
int
ompi_osc_pt2pt_replyreq_alloc_init(ompi_osc_pt2pt_module_t *module,
int origin,
ompi_ptr_t origin_request,
OPAL_PTRDIFF_TYPE target_displacement,
int target_count,
struct ompi_datatype_t *datatype,
ompi_osc_pt2pt_replyreq_t **replyreq)
{
int ret;
void *target_addr = (unsigned char*) module->p2p_win->w_baseptr +
(target_displacement * module->p2p_win->w_disp_unit);
/* allocate a replyreq */
ret = ompi_osc_pt2pt_replyreq_alloc(module,
origin,
replyreq);
if (OMPI_SUCCESS != ret) return ret;
/* initialize local side of replyreq */
ret = ompi_osc_pt2pt_replyreq_init_target(*replyreq,
target_addr,
target_count,
datatype);
if (OMPI_SUCCESS != ret) {
ompi_osc_pt2pt_replyreq_free(*replyreq);
return ret;
}
/* initialize remote side of replyreq */
ret = ompi_osc_pt2pt_replyreq_init_origin(*replyreq,
origin_request);
if (OMPI_SUCCESS != ret) {
ompi_osc_pt2pt_replyreq_free(*replyreq);
return ret;
}
return OMPI_SUCCESS;
}
static void ompi_osc_pt2pt_replyreq_construct(ompi_osc_pt2pt_replyreq_t *replyreq)
{
OBJ_CONSTRUCT(&(replyreq->rep_target_convertor), opal_convertor_t);
}
static void ompi_osc_pt2pt_replyreq_destruct(ompi_osc_pt2pt_replyreq_t *replyreq)
{
OBJ_DESTRUCT(&(replyreq->rep_target_convertor));
}
OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_replyreq_t, opal_list_item_t,
ompi_osc_pt2pt_replyreq_construct,
ompi_osc_pt2pt_replyreq_destruct);

View File

@ -1,143 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_OSC_PT2PT_REPLYREQ_H
#define OMPI_OSC_PT2PT_REPLYREQ_H
#include "osc_pt2pt.h"
#include "osc_pt2pt_longreq.h"
#include "opal/types.h"
#include "opal/class/opal_list.h"
#include "ompi/datatype/ompi_datatype.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/communicator/communicator.h"
#include "ompi/proc/proc.h"
#include "ompi/memchecker.h"
struct ompi_osc_pt2pt_replyreq_t {
opal_list_item_t super;
/** pointer to the module that created the replyreq */
ompi_osc_pt2pt_module_t *rep_module;
/** Datatype for the target side of the operation */
struct ompi_datatype_t *rep_target_datatype;
/** Convertor for the target. Always setup for send. */
opal_convertor_t rep_target_convertor;
/** packed size of message on the target side */
size_t rep_target_bytes_packed;
/** rank in module's communicator for origin of operation */
int rep_origin_rank;
/** pointer to the proc structure for the origin of the operation */
ompi_proc_t *rep_origin_proc;
ompi_ptr_t rep_origin_sendreq;
};
typedef struct ompi_osc_pt2pt_replyreq_t ompi_osc_pt2pt_replyreq_t;
OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_replyreq_t);
/** allocate and populate a replyreq structure. datatype is
RETAINed for the life of the replyreq */
int
ompi_osc_pt2pt_replyreq_alloc_init(ompi_osc_pt2pt_module_t *module,
int origin,
ompi_ptr_t origin_request,
OPAL_PTRDIFF_TYPE target_displacement,
int target_count,
struct ompi_datatype_t *datatype,
ompi_osc_pt2pt_replyreq_t **replyreq);
static inline int
ompi_osc_pt2pt_replyreq_alloc(ompi_osc_pt2pt_module_t *module,
int origin_rank,
ompi_osc_pt2pt_replyreq_t **replyreq)
{
int ret;
opal_free_list_item_t *item;
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, origin_rank );
/* BWB - FIX ME - is this really the right return code? */
if (NULL == proc) return OMPI_ERR_OUT_OF_RESOURCE;
OPAL_FREE_LIST_GET(&mca_osc_pt2pt_component.p2p_c_replyreqs,
item, ret);
if (OMPI_SUCCESS != ret) return ret;
*replyreq = (ompi_osc_pt2pt_replyreq_t*) item;
(*replyreq)->rep_module = module;
(*replyreq)->rep_origin_rank = origin_rank;
(*replyreq)->rep_origin_proc = proc;
return OMPI_SUCCESS;
}
static inline int
ompi_osc_pt2pt_replyreq_init_target(ompi_osc_pt2pt_replyreq_t *replyreq,
void *target_addr,
int target_count,
struct ompi_datatype_t *target_dt)
{
OBJ_RETAIN(target_dt);
replyreq->rep_target_datatype = target_dt;
opal_convertor_copy_and_prepare_for_send(replyreq->rep_origin_proc->proc_convertor,
&(target_dt->super),
target_count,
target_addr,
0,
&(replyreq->rep_target_convertor));
opal_convertor_get_packed_size(&replyreq->rep_target_convertor,
&replyreq->rep_target_bytes_packed);
return OMPI_SUCCESS;
}
static inline int
ompi_osc_pt2pt_replyreq_init_origin(ompi_osc_pt2pt_replyreq_t *replyreq,
ompi_ptr_t origin_request)
{
replyreq->rep_origin_sendreq = origin_request;
return OMPI_SUCCESS;
}
static inline int
ompi_osc_pt2pt_replyreq_free(ompi_osc_pt2pt_replyreq_t *replyreq)
{
MEMCHECKER(
memchecker_convertor_call(&opal_memchecker_base_mem_defined,
&replyreq->rep_target_convertor);
);
opal_convertor_cleanup(&replyreq->rep_target_convertor);
OBJ_RELEASE(replyreq->rep_target_datatype);
OPAL_FREE_LIST_RETURN(&mca_osc_pt2pt_component.p2p_c_replyreqs,
(opal_list_item_t*) replyreq);
return OMPI_SUCCESS;
}
#endif /* OMPI_OSC_PT2PT_REPLYREQ_H */

View File

@ -1,85 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "osc_pt2pt_sendreq.h"
#include "opal/datatype/opal_convertor.h"
int
ompi_osc_pt2pt_sendreq_alloc_init(ompi_osc_pt2pt_req_type_t req_type,
void *origin_addr, int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_sendreq_t **sendreq)
{
int ret;
/* allocate a sendreq */
ret = ompi_osc_pt2pt_sendreq_alloc(module, target,
sendreq);
if (OMPI_SUCCESS != ret) return ret;
/* initialize local side of sendreq */
ret = ompi_osc_pt2pt_sendreq_init_origin(*sendreq,
req_type,
origin_addr,
origin_count,
origin_dt);
if (OMPI_SUCCESS != ret) {
ompi_osc_pt2pt_sendreq_free(*sendreq);
return ret;
}
/* initialize remote side of sendreq */
ret = ompi_osc_pt2pt_sendreq_init_target(*sendreq,
target_disp,
target_count,
target_dt);
if (OMPI_SUCCESS != ret) {
ompi_osc_pt2pt_sendreq_free(*sendreq);
return ret;
}
return OMPI_SUCCESS;
}
static void ompi_osc_pt2pt_sendreq_construct(ompi_osc_pt2pt_sendreq_t *req)
{
req->super.req_type = OMPI_REQUEST_WIN;
req->super.req_free = NULL;
req->super.req_cancel = NULL;
OBJ_CONSTRUCT(&(req->req_origin_convertor), opal_convertor_t);
}
static void ompi_osc_pt2pt_sendreq_destruct(ompi_osc_pt2pt_sendreq_t *req)
{
OBJ_DESTRUCT(&(req->req_origin_convertor));
}
OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_sendreq_t, ompi_request_t,
ompi_osc_pt2pt_sendreq_construct,
ompi_osc_pt2pt_sendreq_destruct);

View File

@ -1,180 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_OSC_PT2PT_SENDREQ_H
#define OMPI_OSC_PT2PT_SENDREQ_H
#include "osc_pt2pt.h"
#include "osc_pt2pt_longreq.h"
#include "opal/class/opal_list.h"
#include "ompi/datatype/ompi_datatype.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/communicator/communicator.h"
#include "ompi/proc/proc.h"
#include "ompi/memchecker.h"
typedef enum {
OMPI_OSC_PT2PT_GET,
OMPI_OSC_PT2PT_ACC,
OMPI_OSC_PT2PT_PUT
} ompi_osc_pt2pt_req_type_t;
struct ompi_osc_pt2pt_sendreq_t {
ompi_request_t super;
/** type of sendreq (from ompi_osc_pt2pt_req_type_t) */
ompi_osc_pt2pt_req_type_t req_type;
/** pointer to the module that created the sendreq */
ompi_osc_pt2pt_module_t *req_module;
/** Datatype for the origin side of the operation */
struct ompi_datatype_t *req_origin_datatype;
/** Convertor for the origin side of the operation. Setup for
either send (Put / Accumulate) or receive (Get) */
opal_convertor_t req_origin_convertor;
/** packed size of message on the origin side */
size_t req_origin_bytes_packed;
/** rank in module's communicator for target of operation */
int req_target_rank;
/** pointer to the proc structure for the target of the operation */
ompi_proc_t *req_target_proc;
/** displacement on target */
OPAL_PTRDIFF_TYPE req_target_disp;
/** datatype count on target */
int req_target_count;
/** datatype on target */
struct ompi_datatype_t *req_target_datatype;
/** op index on the target */
int req_op_id;
};
typedef struct ompi_osc_pt2pt_sendreq_t ompi_osc_pt2pt_sendreq_t;
OBJ_CLASS_DECLARATION(ompi_osc_pt2pt_sendreq_t);
/** allocate and populate a sendreq structure. Both datatypes are
RETAINed for the life of the sendreq */
int
ompi_osc_pt2pt_sendreq_alloc_init(ompi_osc_pt2pt_req_type_t req_type,
void *origin_addr, int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_datatype,
ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_sendreq_t **sendreq);
static inline int
ompi_osc_pt2pt_sendreq_alloc(ompi_osc_pt2pt_module_t *module,
int target_rank,
ompi_osc_pt2pt_sendreq_t **sendreq)
{
int ret;
opal_free_list_item_t *item;
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, target_rank );
/* BWB - FIX ME - is this really the right return code? */
if (NULL == proc) return OMPI_ERR_OUT_OF_RESOURCE;
OPAL_FREE_LIST_GET(&mca_osc_pt2pt_component.p2p_c_sendreqs,
item, ret);
if (OMPI_SUCCESS != ret) return ret;
*sendreq = (ompi_osc_pt2pt_sendreq_t*) item;
(*sendreq)->req_module = module;
(*sendreq)->req_target_rank = target_rank;
(*sendreq)->req_target_proc = proc;
return OMPI_SUCCESS;
}
static inline int
ompi_osc_pt2pt_sendreq_init_origin(ompi_osc_pt2pt_sendreq_t *sendreq,
ompi_osc_pt2pt_req_type_t req_type,
void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt)
{
OBJ_RETAIN(origin_dt);
sendreq->req_origin_datatype = origin_dt;
sendreq->req_type = req_type;
if (req_type != OMPI_OSC_PT2PT_GET) {
opal_convertor_copy_and_prepare_for_send(sendreq->req_target_proc->proc_convertor,
&(origin_dt->super),
origin_count,
origin_addr,
0,
&(sendreq->req_origin_convertor));
opal_convertor_get_packed_size(&sendreq->req_origin_convertor,
&sendreq->req_origin_bytes_packed);
} else {
opal_convertor_copy_and_prepare_for_recv(sendreq->req_target_proc->proc_convertor,
&(origin_dt->super),
origin_count,
origin_addr,
0,
&(sendreq->req_origin_convertor));
opal_convertor_get_packed_size(&sendreq->req_origin_convertor,
&sendreq->req_origin_bytes_packed);
}
return OMPI_SUCCESS;
}
static inline int
ompi_osc_pt2pt_sendreq_init_target(ompi_osc_pt2pt_sendreq_t *sendreq,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_datatype)
{
OBJ_RETAIN(target_datatype);
sendreq->req_target_disp = target_disp;
sendreq->req_target_count = target_count;
sendreq->req_target_datatype = target_datatype;
return OMPI_SUCCESS;
}
static inline int
ompi_osc_pt2pt_sendreq_free(ompi_osc_pt2pt_sendreq_t *sendreq)
{
MEMCHECKER(
memchecker_convertor_call(&opal_memchecker_base_mem_defined,
&sendreq->req_origin_convertor);
);
opal_convertor_cleanup(&sendreq->req_origin_convertor);
OBJ_RELEASE(sendreq->req_target_datatype);
OBJ_RELEASE(sendreq->req_origin_datatype);
OPAL_FREE_LIST_RETURN(&mca_osc_pt2pt_component.p2p_c_sendreqs,
(opal_list_item_t*) sendreq);
return OMPI_SUCCESS;
}
#endif /* OMPI_OSC_PT2PT_SENDREQ_H */

View File

@ -1,698 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "osc_pt2pt.h"
#include "osc_pt2pt_sendreq.h"
#include "osc_pt2pt_header.h"
#include "osc_pt2pt_data_move.h"
#include "mpi.h"
#include "opal/runtime/opal_progress.h"
#include "opal/threads/mutex.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/osc/base/base.h"
/* Must hold module's lock before calling... */
static inline void
ompi_osc_pt2pt_flip_sendreqs(ompi_osc_pt2pt_module_t *module)
{
unsigned int *tmp;
tmp = module->p2p_copy_num_pending_sendreqs;
module->p2p_copy_num_pending_sendreqs =
module->p2p_num_pending_sendreqs;
module->p2p_num_pending_sendreqs = tmp;
memset(module->p2p_num_pending_sendreqs, 0,
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
/* Copy in all the pending requests */
opal_list_join(&module->p2p_copy_pending_sendreqs,
opal_list_get_end(&module->p2p_copy_pending_sendreqs),
&module->p2p_pending_sendreqs);
}
int
ompi_osc_pt2pt_module_fence(int assert, ompi_win_t *win)
{
unsigned int incoming_reqs;
int ret = OMPI_SUCCESS, i;
ompi_osc_pt2pt_module_t *module = P2P_MODULE(win);
int num_outgoing = 0;
if (0 != (assert & MPI_MODE_NOPRECEDE)) {
/* check that the user didn't lie to us - since NOPRECEDED
must be specified by all processes if it is specified by
any process, if we see this it is safe to assume that there
are no pending operations anywhere needed to close out this
epoch. No need to lock, since it's a lookup and any
pending modification of the pending_sendreqs during this
time is an erroneous program. */
if (0 != opal_list_get_size(&(module->p2p_pending_sendreqs))) {
return MPI_ERR_RMA_SYNC;
}
} else {
opal_list_item_t *item;
/* "atomically" copy all the data we're going to be modifying
into the copy... */
OPAL_THREAD_LOCK(&(module->p2p_lock));
ompi_osc_pt2pt_flip_sendreqs(module);
OPAL_THREAD_UNLOCK(&(module->p2p_lock));
num_outgoing = opal_list_get_size(&(module->p2p_copy_pending_sendreqs));
/* find out how much data everyone is going to send us. */
ret = module->p2p_comm->
c_coll.coll_reduce_scatter(module->p2p_copy_num_pending_sendreqs,
&incoming_reqs,
module->p2p_fence_coll_counts,
MPI_UNSIGNED,
MPI_SUM,
module->p2p_comm,
module->p2p_comm->c_coll.coll_reduce_scatter_module);
if (OMPI_SUCCESS != ret) {
/* put the stupid data back for the user. This is not
cheap, but the user lost his data if we don't. */
OPAL_THREAD_LOCK(&(module->p2p_lock));
opal_list_join(&module->p2p_pending_sendreqs,
opal_list_get_end(&module->p2p_pending_sendreqs),
&module->p2p_copy_pending_sendreqs);
for (i = 0 ; i < ompi_comm_size(module->p2p_comm) ; ++i) {
module->p2p_num_pending_sendreqs[i] +=
module->p2p_copy_num_pending_sendreqs[i];
}
OPAL_THREAD_UNLOCK(&(module->p2p_lock));
return ret;
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"fence: waiting on %d in and %d out",
module->p2p_num_pending_in,
module->p2p_num_pending_out));
/* try to start all the requests. We've copied everything we
need out of pending_sendreqs, so don't need the lock
here */
while (NULL !=
(item = opal_list_remove_first(&(module->p2p_copy_pending_sendreqs)))) {
ompi_osc_pt2pt_sendreq_t *req =
(ompi_osc_pt2pt_sendreq_t*) item;
ret = ompi_osc_pt2pt_sendreq_send(module, req);
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
opal_output_verbose(5, ompi_osc_base_framework.framework_output,
"complete: failure in starting sendreq (%d). Will try later.",
ret);
opal_list_append(&(module->p2p_copy_pending_sendreqs), item);
} else if (OMPI_SUCCESS != ret) {
return ret;
}
}
OPAL_THREAD_LOCK(&module->p2p_lock);
/* possible we've already received a couple in messages, so
add however many we're going to wait for */
module->p2p_num_pending_in += incoming_reqs;
module->p2p_num_pending_out += num_outgoing;
/* now we know how many things we're waiting for - wait for them... */
while (module->p2p_num_pending_in > 0 ||
0 != module->p2p_num_pending_out) {
opal_condition_wait(&module->p2p_cond, &module->p2p_lock);
}
OPAL_THREAD_UNLOCK(&module->p2p_lock);
}
/* all transfers are done - back to the real world we go */
if (0 == (assert & MPI_MODE_NOSUCCEED)) {
ompi_win_set_mode(win, OMPI_WIN_FENCE);
} else {
ompi_win_set_mode(win, 0);
}
return OMPI_SUCCESS;
}
int
ompi_osc_pt2pt_module_start(ompi_group_t *group,
int assert,
ompi_win_t *win)
{
int i, ret = OMPI_SUCCESS;
ompi_osc_pt2pt_module_t *module = P2P_MODULE(win);
OBJ_RETAIN(group);
ompi_group_increment_proc_count(group);
OPAL_THREAD_LOCK(&(module->p2p_lock));
if (NULL != module->p2p_sc_group) {
OPAL_THREAD_UNLOCK(&module->p2p_lock);
ret = MPI_ERR_RMA_SYNC;
goto cleanup;
}
module->p2p_sc_group = group;
/* possible we've already received a couple in messages, so
add however many we're going to wait for */
module->p2p_num_post_msgs += ompi_group_size(module->p2p_sc_group);
OPAL_THREAD_UNLOCK(&(module->p2p_lock));
memset(module->p2p_sc_remote_active_ranks, 0,
sizeof(bool) * ompi_comm_size(module->p2p_comm));
/* for each process in the specified group, find it's rank in our
communicator, store those indexes, and set the true / false in
the active ranks table */
for (i = 0 ; i < ompi_group_size(group) ; i++) {
int comm_rank = -1, j;
/* find the rank in the communicator associated with this windows */
for (j = 0 ; j < ompi_comm_size(module->p2p_comm) ; ++j) {
if (ompi_group_peer_lookup(module->p2p_sc_group, i) ==
ompi_comm_peer_lookup(module->p2p_comm, j)) {
comm_rank = j;
break;
}
}
if (comm_rank == -1) {
ret = MPI_ERR_RMA_SYNC;
goto cleanup;
}
module->p2p_sc_remote_active_ranks[comm_rank] = true;
module->p2p_sc_remote_ranks[i] = comm_rank;
}
/* Set our mode to access w/ start */
ompi_win_remove_mode(win, OMPI_WIN_FENCE);
ompi_win_append_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_STARTED);
return OMPI_SUCCESS;
cleanup:
ompi_group_decrement_proc_count(group);
OBJ_RELEASE(group);
return ret;
}
int
ompi_osc_pt2pt_module_complete(ompi_win_t *win)
{
int i;
int ret = OMPI_SUCCESS;
ompi_group_t *group;
opal_list_item_t *item;
ompi_osc_pt2pt_module_t *module = P2P_MODULE(win);
/* wait for all the post messages */
OPAL_THREAD_LOCK(&module->p2p_lock);
while (0 != module->p2p_num_post_msgs) {
opal_condition_wait(&module->p2p_cond, &module->p2p_lock);
}
ompi_osc_pt2pt_flip_sendreqs(module);
/* for each process in group, send a control message with number
of updates coming, then start all the requests */
for (i = 0 ; i < ompi_group_size(module->p2p_sc_group) ; ++i) {
int comm_rank = module->p2p_sc_remote_ranks[i];
module->p2p_num_pending_out +=
module->p2p_copy_num_pending_sendreqs[comm_rank];
}
OPAL_THREAD_UNLOCK(&module->p2p_lock);
for (i = 0 ; i < ompi_group_size(module->p2p_sc_group) ; ++i) {
int comm_rank = module->p2p_sc_remote_ranks[i];
ret = ompi_osc_pt2pt_control_send(module,
ompi_group_peer_lookup(module->p2p_sc_group, i),
OMPI_OSC_PT2PT_HDR_COMPLETE,
module->p2p_copy_num_pending_sendreqs[comm_rank],
0);
assert(ret == OMPI_SUCCESS);
}
/* try to start all the requests. We've copied everything we
need out of pending_sendreqs, so don't need the lock
here */
while (NULL !=
(item = opal_list_remove_first(&(module->p2p_copy_pending_sendreqs)))) {
ompi_osc_pt2pt_sendreq_t *req =
(ompi_osc_pt2pt_sendreq_t*) item;
ret = ompi_osc_pt2pt_sendreq_send(module, req);
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
opal_output_verbose(5, ompi_osc_base_framework.framework_output,
"complete: failure in starting sendreq (%d). Will try later.",
ret);
opal_list_append(&(module->p2p_copy_pending_sendreqs), item);
} else if (OMPI_SUCCESS != ret) {
return ret;
}
}
/* wait for all the requests */
OPAL_THREAD_LOCK(&module->p2p_lock);
while (0 != module->p2p_num_pending_out) {
opal_condition_wait(&module->p2p_cond, &module->p2p_lock);
}
group = module->p2p_sc_group;
module->p2p_sc_group = NULL;
OPAL_THREAD_UNLOCK(&module->p2p_lock);
/* remove WIN_POSTED from our mode */
ompi_win_remove_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_STARTED);
ompi_group_decrement_proc_count(group);
OBJ_RELEASE(group);
return ret;
}
int
ompi_osc_pt2pt_module_post(ompi_group_t *group,
int assert,
ompi_win_t *win)
{
int i;
ompi_osc_pt2pt_module_t *module = P2P_MODULE(win);
OBJ_RETAIN(group);
ompi_group_increment_proc_count(group);
OPAL_THREAD_LOCK(&(module->p2p_lock));
assert(NULL == module->p2p_pw_group);
module->p2p_pw_group = group;
/* Set our mode to expose w/ post */
ompi_win_remove_mode(win, OMPI_WIN_FENCE);
ompi_win_append_mode(win, OMPI_WIN_EXPOSE_EPOCH | OMPI_WIN_POSTED);
/* list how many complete counters we're still waiting on */
module->p2p_num_complete_msgs +=
ompi_group_size(module->p2p_pw_group);
OPAL_THREAD_UNLOCK(&(module->p2p_lock));
/* send a hello counter to everyone in group */
for (i = 0 ; i < ompi_group_size(module->p2p_pw_group) ; ++i) {
ompi_osc_pt2pt_control_send(module,
ompi_group_peer_lookup(group, i),
OMPI_OSC_PT2PT_HDR_POST, 1, 0);
}
return OMPI_SUCCESS;
}
int
ompi_osc_pt2pt_module_wait(ompi_win_t *win)
{
ompi_group_t *group;
ompi_osc_pt2pt_module_t *module = P2P_MODULE(win);
OPAL_THREAD_LOCK(&module->p2p_lock);
while (0 != (module->p2p_num_pending_in) ||
0 != (module->p2p_num_complete_msgs)) {
opal_condition_wait(&module->p2p_cond, &module->p2p_lock);
}
group = module->p2p_pw_group;
module->p2p_pw_group = NULL;
OPAL_THREAD_UNLOCK(&module->p2p_lock);
ompi_win_remove_mode(win, OMPI_WIN_EXPOSE_EPOCH | OMPI_WIN_POSTED);
ompi_group_decrement_proc_count(group);
OBJ_RELEASE(group);
return OMPI_SUCCESS;
}
int
ompi_osc_pt2pt_module_test(ompi_win_t *win,
int *flag)
{
ompi_group_t *group;
ompi_osc_pt2pt_module_t *module = P2P_MODULE(win);
#if !OMPI_ENABLE_PROGRESS_THREADS
opal_progress();
#endif
if (0 != (module->p2p_num_pending_in) ||
0 != (module->p2p_num_complete_msgs)) {
*flag = 0;
return OMPI_SUCCESS;
}
*flag = 1;
ompi_win_remove_mode(win, OMPI_WIN_EXPOSE_EPOCH | OMPI_WIN_POSTED);
OPAL_THREAD_LOCK(&(module->p2p_lock));
group = module->p2p_pw_group;
module->p2p_pw_group = NULL;
OPAL_THREAD_UNLOCK(&(module->p2p_lock));
ompi_group_decrement_proc_count(group);
OBJ_RELEASE(group);
return OMPI_SUCCESS;
}
struct ompi_osc_pt2pt_pending_lock_t {
opal_list_item_t super;
ompi_proc_t *proc;
int32_t lock_type;
};
typedef struct ompi_osc_pt2pt_pending_lock_t ompi_osc_pt2pt_pending_lock_t;
OBJ_CLASS_INSTANCE(ompi_osc_pt2pt_pending_lock_t, opal_list_item_t,
NULL, NULL);
int
ompi_osc_pt2pt_module_lock(int lock_type,
int target,
int assert,
ompi_win_t *win)
{
ompi_osc_pt2pt_module_t *module = P2P_MODULE(win);
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, target );
assert(lock_type != 0);
/* set our mode on the window */
ompi_win_remove_mode(win, OMPI_WIN_FENCE);
ompi_win_append_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_LOCK_ACCESS);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"%d: sending lock request to %d",
ompi_comm_rank(module->p2p_comm),
target));
/* generate a lock request */
ompi_osc_pt2pt_control_send(module,
proc,
OMPI_OSC_PT2PT_HDR_LOCK_REQ,
ompi_comm_rank(module->p2p_comm),
lock_type);
if (ompi_comm_rank(module->p2p_comm) == target) {
/* If we're trying to lock locally, have to wait to actually
acquire the lock */
OPAL_THREAD_LOCK(&module->p2p_lock);
while (module->p2p_lock_received_ack == 0) {
opal_condition_wait(&module->p2p_cond, &module->p2p_lock);
}
OPAL_THREAD_UNLOCK(&module->p2p_lock);
}
/* return */
return OMPI_SUCCESS;
}
int
ompi_osc_pt2pt_module_unlock(int target,
ompi_win_t *win)
{
int32_t out_count;
opal_list_item_t *item;
int ret;
ompi_osc_pt2pt_module_t *module = P2P_MODULE(win);
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, target );
OPAL_THREAD_LOCK(&module->p2p_lock);
while (0 == module->p2p_lock_received_ack) {
opal_condition_wait(&module->p2p_cond, &module->p2p_lock);
}
module->p2p_lock_received_ack -= 1;
/* start all the requests */
ompi_osc_pt2pt_flip_sendreqs(module);
/* try to start all the requests. We've copied everything we need
out of pending_sendreqs, so don't need the lock here */
out_count = opal_list_get_size(&(module->p2p_copy_pending_sendreqs));
/* we want to send all the requests, plus we wait for one more
completion event for the control message ack from the unlocker
saying we're done */
module->p2p_num_pending_out += (out_count + 1);
OPAL_THREAD_UNLOCK(&module->p2p_lock);
/* send the unlock request */
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"%d: sending unlock request to %d with %d requests",
ompi_comm_rank(module->p2p_comm), target,
out_count));
ompi_osc_pt2pt_control_send(module,
proc,
OMPI_OSC_PT2PT_HDR_UNLOCK_REQ,
ompi_comm_rank(module->p2p_comm),
out_count);
while (NULL !=
(item = opal_list_remove_first(&(module->p2p_copy_pending_sendreqs)))) {
ompi_osc_pt2pt_sendreq_t *req =
(ompi_osc_pt2pt_sendreq_t*) item;
ret = ompi_osc_pt2pt_sendreq_send(module, req);
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
opal_output_verbose(5, ompi_osc_base_framework.framework_output,
"complete: failure in starting sendreq (%d). Will try later.",
ret);
opal_list_append(&(module->p2p_copy_pending_sendreqs), item);
} else if (OMPI_SUCCESS != ret) {
return ret;
}
}
/* wait for all the requests */
OPAL_THREAD_LOCK(&module->p2p_lock);
while (0 != module->p2p_num_pending_out) {
opal_condition_wait(&module->p2p_cond, &module->p2p_lock);
}
OPAL_THREAD_UNLOCK(&module->p2p_lock);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"%d: finished unlock to %d",
ompi_comm_rank(module->p2p_comm), target));
/* set our mode on the window */
ompi_win_remove_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_LOCK_ACCESS);
return OMPI_SUCCESS;
}
int
ompi_osc_pt2pt_passive_lock(ompi_osc_pt2pt_module_t *module,
int32_t origin,
int32_t lock_type)
{
bool send_ack = false;
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, origin );
ompi_osc_pt2pt_pending_lock_t *new_pending;
OPAL_THREAD_LOCK(&(module->p2p_lock));
if (lock_type == MPI_LOCK_EXCLUSIVE) {
if (module->p2p_lock_status == 0) {
module->p2p_lock_status = MPI_LOCK_EXCLUSIVE;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"%d: setting lock status to EXCLUSIVE (from %d)",
ompi_comm_rank(module->p2p_comm), origin));
ompi_win_append_mode(module->p2p_win, OMPI_WIN_EXPOSE_EPOCH);
send_ack = true;
} else {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"%d: queuing lock request from %d (type=%d)",
ompi_comm_rank(module->p2p_comm), origin, lock_type));
new_pending = OBJ_NEW(ompi_osc_pt2pt_pending_lock_t);
new_pending->proc = proc;
new_pending->lock_type = lock_type;
opal_list_append(&(module->p2p_locks_pending), &(new_pending->super));
}
} else if (lock_type == MPI_LOCK_SHARED) {
if (module->p2p_lock_status != MPI_LOCK_EXCLUSIVE) {
module->p2p_lock_status = MPI_LOCK_SHARED;
module->p2p_shared_count++;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"%d: setting lock status to SHARED (from %d), count %d",
ompi_comm_rank(module->p2p_comm), origin, module->p2p_shared_count));
ompi_win_append_mode(module->p2p_win, OMPI_WIN_EXPOSE_EPOCH);
send_ack = true;
} else {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"%d: queuing lock request from %d (type=%d)",
ompi_comm_rank(module->p2p_comm), origin, lock_type));
new_pending = OBJ_NEW(ompi_osc_pt2pt_pending_lock_t);
new_pending->proc = proc;
new_pending->lock_type = lock_type;
opal_list_append(&(module->p2p_locks_pending), &(new_pending->super));
}
}
OPAL_THREAD_UNLOCK(&(module->p2p_lock));
if (send_ack) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"%d: sending lock ack to %d",
ompi_comm_rank(module->p2p_comm), origin));
ompi_osc_pt2pt_control_send(module, proc,
OMPI_OSC_PT2PT_HDR_LOCK_REQ,
ompi_comm_rank(module->p2p_comm),
OMPI_SUCCESS);
}
return OMPI_SUCCESS;
}
int
ompi_osc_pt2pt_passive_unlock(ompi_osc_pt2pt_module_t *module,
int32_t origin,
int32_t count)
{
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, origin );
ompi_osc_pt2pt_pending_lock_t *new_pending = NULL;
assert(module->p2p_lock_status != 0);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"%d: received unlock request from %d with %d requests\n",
ompi_comm_rank(module->p2p_comm),
origin, count));
new_pending = OBJ_NEW(ompi_osc_pt2pt_pending_lock_t);
new_pending->proc = proc;
new_pending->lock_type = 0;
OPAL_THREAD_LOCK(&(module->p2p_lock));
module->p2p_num_pending_in += count;
opal_list_append(&module->p2p_unlocks_pending, &(new_pending->super));
OPAL_THREAD_UNLOCK(&(module->p2p_lock));
return ompi_osc_pt2pt_passive_unlock_complete(module);
}
int
ompi_osc_pt2pt_passive_unlock_complete(ompi_osc_pt2pt_module_t *module)
{
ompi_osc_pt2pt_pending_lock_t *new_pending = NULL;
opal_list_t copy_unlock_acks;
if (module->p2p_num_pending_in != 0) return OMPI_SUCCESS;
OPAL_THREAD_LOCK(&(module->p2p_lock));
if (module->p2p_num_pending_in != 0) {
OPAL_THREAD_UNLOCK(&module->p2p_lock);
return OMPI_SUCCESS;
}
if (module->p2p_lock_status == MPI_LOCK_EXCLUSIVE) {
ompi_win_remove_mode(module->p2p_win, OMPI_WIN_EXPOSE_EPOCH);
module->p2p_lock_status = 0;
} else {
module->p2p_shared_count -= opal_list_get_size(&module->p2p_unlocks_pending);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"%d: decrementing shared count to %d",
ompi_comm_rank(module->p2p_comm),
module->p2p_shared_count));
if (module->p2p_shared_count == 0) {
ompi_win_remove_mode(module->p2p_win, OMPI_WIN_EXPOSE_EPOCH);
module->p2p_lock_status = 0;
}
}
OBJ_CONSTRUCT(&copy_unlock_acks, opal_list_t);
/* copy over any unlocks that have been satisfied (possibly
multiple if SHARED) */
opal_list_join(&copy_unlock_acks,
opal_list_get_end(&copy_unlock_acks),
&module->p2p_unlocks_pending);
OPAL_THREAD_UNLOCK(&module->p2p_lock);
/* issue whichever unlock acks we should issue */
while (NULL != (new_pending = (ompi_osc_pt2pt_pending_lock_t*)
opal_list_remove_first(&copy_unlock_acks))) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"%d: sending unlock ack to proc %d",
ompi_comm_rank(module->p2p_comm),
new_pending->proc->proc_name.vpid));
ompi_osc_pt2pt_control_send(module,
new_pending->proc,
OMPI_OSC_PT2PT_HDR_UNLOCK_REPLY,
OMPI_SUCCESS, OMPI_SUCCESS);
OBJ_RELEASE(new_pending);
}
OBJ_DESTRUCT(&copy_unlock_acks);
/* if we were really unlocked, see if we have another lock request
we can satisfy */
OPAL_THREAD_LOCK(&module->p2p_lock);
if (0 == module->p2p_lock_status) {
new_pending = (ompi_osc_pt2pt_pending_lock_t*)
opal_list_remove_first(&(module->p2p_locks_pending));
if (NULL != new_pending) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"%d: sending lock ack to proc %d",
ompi_comm_rank(module->p2p_comm),
new_pending->proc->proc_name.vpid));
ompi_win_append_mode(module->p2p_win, OMPI_WIN_EXPOSE_EPOCH);
/* set lock state and generate a lock request */
module->p2p_lock_status = new_pending->lock_type;
if (MPI_LOCK_SHARED == new_pending->lock_type) {
module->p2p_shared_count++;
}
}
} else {
new_pending = NULL;
}
OPAL_THREAD_UNLOCK(&(module->p2p_lock));
if (NULL != new_pending) {
ompi_osc_pt2pt_control_send(module,
new_pending->proc,
OMPI_OSC_PT2PT_HDR_LOCK_REQ,
ompi_comm_rank(module->p2p_comm),
OMPI_SUCCESS);
OBJ_RELEASE(new_pending);
}
return OMPI_SUCCESS;
}

View File

@ -15,22 +15,21 @@
# $HEADER$
#
pt2pt_sources = \
rdma_sources = \
osc_rdma.h \
osc_rdma.c \
osc_rdma_comm.c \
osc_rdma_component.c \
osc_rdma_data_move.h \
osc_rdma_data_move.c \
osc_rdma_frag.h \
osc_rdma_frag.c \
osc_rdma_header.h \
osc_rdma_longreq.h \
osc_rdma_longreq.c \
osc_rdma_obj_convert.h \
osc_rdma_replyreq.h \
osc_rdma_replyreq.c \
osc_rdma_sendreq.h \
osc_rdma_sendreq.c \
osc_rdma_sync.c
osc_rdma_request.h \
osc_rdma_request.c \
osc_rdma_active_target.c \
osc_rdma_passive_target.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
@ -46,9 +45,9 @@ endif
mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_osc_rdma_la_SOURCES = $(pt2pt_sources)
mca_osc_rdma_la_SOURCES = $(rdma_sources)
mca_osc_rdma_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_osc_rdma_la_SOURCES = $(pt2pt_sources)
libmca_osc_rdma_la_SOURCES = $(rdma_sources)
libmca_osc_rdma_la_LDFLAGS = -module -avoid-version

View File

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
@ -7,8 +8,9 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -19,7 +21,6 @@
#include "ompi_config.h"
#include "osc_rdma.h"
#include "osc_rdma_sendreq.h"
#include "opal/threads/mutex.h"
#include "ompi/win/win.h"
@ -30,108 +31,81 @@
int
ompi_osc_rdma_module_free(ompi_win_t *win)
ompi_osc_rdma_attach(struct ompi_win_t *win, void *base, size_t len)
{
int ret = OMPI_SUCCESS;
int i;
ompi_osc_rdma_module_t *module = GET_MODULE(win);
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"rdma component destroying window with id %d",
ompi_comm_get_cid(module->m_comm));
/* finish with a barrier */
if (ompi_group_size(win->w_group) > 1) {
ret = module->m_comm->c_coll.coll_barrier(module->m_comm,
module->m_comm->c_coll.coll_barrier_module);
}
/* remove from component information */
OPAL_THREAD_LOCK(&mca_osc_rdma_component.c_lock);
opal_hash_table_remove_value_uint32(&mca_osc_rdma_component.c_modules,
ompi_comm_get_cid(module->m_comm));
OPAL_THREAD_UNLOCK(&mca_osc_rdma_component.c_lock);
win->w_osc_module = NULL;
OBJ_DESTRUCT(&module->m_unlocks_pending);
OBJ_DESTRUCT(&module->m_locks_pending);
OBJ_DESTRUCT(&module->m_queued_sendreqs);
OBJ_DESTRUCT(&module->m_copy_pending_sendreqs);
OBJ_DESTRUCT(&module->m_pending_sendreqs);
OBJ_DESTRUCT(&module->m_acc_lock);
OBJ_DESTRUCT(&module->m_cond);
OBJ_DESTRUCT(&module->m_lock);
if (NULL != module->m_sc_remote_ranks) {
free(module->m_sc_remote_ranks);
}
if (NULL != module->m_sc_remote_active_ranks) {
free(module->m_sc_remote_active_ranks);
}
if (NULL != module->m_pending_buffers) {
free(module->m_pending_buffers);
}
if (NULL != module->m_fence_coll_counts) {
free(module->m_fence_coll_counts);
}
if (NULL != module->m_copy_num_pending_sendreqs) {
free(module->m_copy_num_pending_sendreqs);
}
if (NULL != module->m_num_pending_sendreqs) {
free(module->m_num_pending_sendreqs);
}
if (NULL != module->m_peer_info) {
for (i = 0 ; i < ompi_comm_size(module->m_comm) ; ++i) {
ompi_osc_rdma_peer_info_free(&module->m_peer_info[i]);
}
free(module->m_peer_info);
}
if (NULL != module->m_comm) ompi_comm_free(&module->m_comm);
if (NULL != module) free(module);
return ret;
return OMPI_SUCCESS;
}
int
ompi_osc_rdma_peer_info_free(ompi_osc_rdma_peer_info_t *peer_info)
ompi_osc_rdma_detach(struct ompi_win_t *win, void *base)
{
int i;
if (NULL != peer_info->peer_btls) {
free(peer_info->peer_btls);
}
if (NULL != peer_info->local_descriptors) {
for (i = 0 ; i < peer_info->local_num_btls ; ++i) {
if (NULL != peer_info->local_descriptors[i]) {
mca_bml_base_btl_t *bml_btl = peer_info->local_btls[i];
mca_btl_base_module_t* btl = bml_btl->btl;
btl->btl_free(btl, peer_info->local_descriptors[i]);
}
}
free(peer_info->local_descriptors);
}
if (NULL != peer_info->local_registrations) {
for (i = 0 ; i < peer_info->local_num_btls ; ++i) {
if (NULL != peer_info->local_registrations[i]) {
mca_mpool_base_module_t *module =
peer_info->local_registrations[i]->mpool;
module->mpool_deregister(module,
peer_info->local_registrations[i]);
}
}
free(peer_info->local_registrations);
}
if (NULL != peer_info->local_btls) {
free(peer_info->local_btls);
}
memset(peer_info, 0, sizeof(ompi_osc_rdma_peer_info_t));
return OMPI_SUCCESS;
}
int
ompi_osc_rdma_free(ompi_win_t *win)
{
int ret = OMPI_SUCCESS;
ompi_osc_rdma_module_t *module = GET_MODULE(win);
opal_list_item_t *item;
assert (NULL != module);
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
"rdma component destroying window with id %d",
ompi_comm_get_cid(module->comm));
/* finish with a barrier */
if (ompi_group_size(win->w_group) > 1) {
ret = module->comm->c_coll.coll_barrier(module->comm,
module->comm->c_coll.coll_barrier_module);
}
/* remove from component information */
OPAL_THREAD_LOCK(&mca_osc_rdma_component.lock);
opal_hash_table_remove_value_uint32(&mca_osc_rdma_component.modules,
ompi_comm_get_cid(module->comm));
OPAL_THREAD_UNLOCK(&mca_osc_rdma_component.lock);
win->w_osc_module = NULL;
OBJ_DESTRUCT(&module->outstanding_locks);
OBJ_DESTRUCT(&module->locks_pending);
OBJ_DESTRUCT(&module->acc_lock);
OBJ_DESTRUCT(&module->cond);
OBJ_DESTRUCT(&module->lock);
/* it is erroneous to close a window with active operations on it so we should
* probably produce an error here instead of cleaning up */
while (NULL != (item = opal_list_remove_first (&module->pending_acc))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&module->pending_acc);
osc_rdma_request_gc_clean (module);
assert (0 == opal_list_get_size (&module->request_gc));
OBJ_DESTRUCT(&module->request_gc);
if (NULL != module->peers) {
free(module->peers);
}
if (NULL != module->passive_eager_send_active) free(module->passive_eager_send_active);
if (NULL != module->passive_incoming_frag_count) free(module->passive_incoming_frag_count);
if (NULL != module->passive_incoming_frag_signal_count) free(module->passive_incoming_frag_signal_count);
if (NULL != module->epoch_outgoing_frag_count) free(module->epoch_outgoing_frag_count);
if (NULL != module->incomming_buffer) free (module->incomming_buffer);
if (NULL != module->comm) ompi_comm_free(&module->comm);
if (NULL != module->free_after) free(module->free_after);
if (NULL != module->frag_request) {
module->frag_request->req_complete_cb = NULL;
ompi_request_cancel (module->frag_request);
}
free (module);
return ret;
}

View File

@ -1,19 +1,21 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2006 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
@ -28,234 +30,194 @@
#include "ompi/win/win.h"
#include "ompi/communicator/communicator.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/request/request.h"
#include "ompi/mca/osc/osc.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/bml/bml.h"
#include "ompi/memchecker.h"
#include "osc_rdma_header.h"
BEGIN_C_DECLS
struct ompi_osc_rdma_buffer_t {
mca_btl_base_descriptor_t* descriptor;
size_t remain_len;
mca_bml_base_btl_t *bml_btl;
};
typedef struct ompi_osc_rdma_buffer_t ompi_osc_rdma_buffer_t;
struct ompi_osc_rdma_frag_t;
struct ompi_osc_rdma_component_t {
/** Extend the basic osc component interface */
ompi_osc_base_component_t super;
/** lock access to datastructures in the component structure */
opal_mutex_t c_lock;
/** lock access to modules */
opal_mutex_t lock;
/** List of ompi_osc_rdma_module_ts currently in existance.
Needed so that received fragments can be dispatched to the
correct module */
opal_hash_table_t c_modules;
/** cid -> module mapping */
opal_hash_table_t modules;
/** Lock for request management */
opal_mutex_t c_request_lock;
/** module count */
int module_count;
/** Condition variable for request management */
opal_condition_t c_request_cond;
/** free list of ompi_osc_rdma_sendreq_t structures */
opal_free_list_t c_sendreqs;
/** free list of ompi_osc_rdma_replyreq_t structures */
opal_free_list_t c_replyreqs;
/** free list of ompi_osc_rdma_longreq_t structures */
opal_free_list_t c_longreqs;
/** free list of ompi_osc_rdma_frag_t structures */
opal_free_list_t frags;
bool c_btl_registered;
/** Free list of requests */
ompi_free_list_t requests;
uint32_t c_sequence_number;
/** RDMA component buffer size */
unsigned int buffer_size;
/** List of operations that need to be processed */
opal_list_t pending_operations;
/** Is the progress function enabled? */
bool progress_enable;
};
typedef struct ompi_osc_rdma_component_t ompi_osc_rdma_component_t;
struct ompi_osc_rdma_btl_t {
uint8_t peer_seg[MCA_BTL_SEG_MAX_SIZE];
mca_bml_base_btl_t *bml_btl;
int rdma_order;
int32_t num_sent;
struct ompi_osc_rdma_peer_t {
/** Pointer to the current send fragment for each outgoing target */
struct ompi_osc_rdma_frag_t *active_frag;
/** Number of acks pending. New requests can not be sent out if there are
* acks pending (to fulfill the ordering constraints of accumulate) */
uint32_t num_acks_pending;
};
typedef struct ompi_osc_rdma_btl_t ompi_osc_rdma_btl_t;
struct ompi_osc_rdma_peer_info_t {
uint64_t peer_base;
uint64_t peer_len;
int peer_num_btls;
volatile int peer_index_btls;
ompi_osc_rdma_btl_t *peer_btls;
int local_num_btls;
mca_bml_base_btl_t **local_btls;
mca_mpool_base_registration_t **local_registrations;
mca_btl_base_descriptor_t **local_descriptors;
};
typedef struct ompi_osc_rdma_peer_info_t ompi_osc_rdma_peer_info_t;
struct ompi_osc_rdma_setup_info_t {
volatile int32_t num_btls_callin;
int32_t num_btls_expected;
volatile int32_t num_btls_outgoing;
opal_list_t *outstanding_btl_requests;
};
typedef struct ompi_osc_rdma_setup_info_t ompi_osc_rdma_setup_info_t;
typedef struct ompi_osc_rdma_peer_t ompi_osc_rdma_peer_t;
#define SEQ_INVALID 0xFFFFFFFFFFFFFFFFULL
/** Module structure. Exactly one of these is associated with each
RDMA window */
struct ompi_osc_rdma_module_t {
/** Extend the basic osc module interface */
ompi_osc_base_module_t super;
uint32_t m_sequence_number;
/** window should have accumulate ordering... */
bool accumulate_ordering;
/** lock access to data structures in the current module */
opal_mutex_t m_lock;
/** pointer to free on cleanup (may be NULL) */
void *free_after;
/** condition variable for access to current module */
opal_condition_t m_cond;
/** Base pointer for local window */
void *baseptr;
/** lock for "atomic" window updates from reductions */
opal_mutex_t m_acc_lock;
/** communicator created with this window. This is the cid used
in the component's modules mapping. */
ompi_communicator_t *comm;
/** pointer back to window */
ompi_win_t *m_win;
/** Local displacement unit. */
int disp_unit;
/** communicator created with this window */
ompi_communicator_t *m_comm;
/** Mutex lock protecting module data */
opal_mutex_t lock;
/** list of ompi_osc_rdma_sendreq_t structures, and includes all
requests for this access epoch that have not already been
started. m_lock must be held when modifying this field. */
opal_list_t m_pending_sendreqs;
/** condition variable associated with lock */
opal_condition_t cond;
/** list of unsigned int counters for the number of requests to a
particular rank in m_comm for this access epoc. m_lock
must be held when modifying this field */
unsigned int *m_num_pending_sendreqs;
/** lock for atomic window updates from reductions */
opal_mutex_t acc_lock;
/** For MPI_Fence synchronization, the number of messages to send
in epoch. For Start/Complete, the number of updates for this
Complete. For lock, the number of
messages waiting for completion on on the origin side. Not
protected by m_lock - must use atomic counter operations. */
int32_t m_num_pending_out;
/** peer data */
ompi_osc_rdma_peer_t *peers;
/** For MPI_Fence synchronization, the number of expected incoming
messages. For Post/Wait, the number of expected updates from
complete. For lock, the number of messages on the passive side
we are waiting for. Not protected by m_lock - must use
atomic counter operations. */
int32_t m_num_pending_in;
/** Nmber of communication fragments started for this epoch, by
peer. Not in peer data to make fence more manageable. */
int32_t *epoch_outgoing_frag_count;
/** List of full communication buffers queued to be sent. Should
be maintained in order (at least in per-target order). */
opal_list_t queued_frags;
/** cyclic counter for a unique tage for long messages. */
int tag_counter;
/* Number of outgoing fragments that have completed since the
begining of time */
int32_t outgoing_frag_count;
/* Next outgoing fragment count at which we want a signal on cond */
int32_t outgoing_frag_signal_count;
/* Number of incoming fragments that have completed since the
begining of time */
int32_t active_incoming_frag_count;
/* Next incoming buffer count at which we want a signal on cond */
int32_t active_incoming_frag_signal_count;
int32_t *passive_incoming_frag_count;
int32_t *passive_incoming_frag_signal_count;
/* Number of flush ack requests send since beginning of time */
uint64_t flush_ack_requested_count;
/* Number of flush ack replies received since beginning of
time. cond should be signalled on every flush reply
received. */
uint64_t flush_ack_received_count;
/** True if the access epoch is a passive target access epoch */
bool passive_target_access_epoch;
/** start sending data eagerly */
bool active_eager_send_active;
bool *passive_eager_send_active;
/* ********************* PWSC data ************************ */
struct ompi_group_t *pw_group;
struct ompi_group_t *sc_group;
/** Number of "ping" messages from the remote post group we've
received */
int32_t m_num_post_msgs;
int32_t num_post_msgs;
/** Number of "count" messages from the remote complete group
we've received */
int32_t m_num_complete_msgs;
/** cyclic counter for a unique tage for long messages. Not
protected by the m_lock - must use create_send_tag() to
create a send tag */
volatile int32_t m_tag_counter;
opal_list_t m_copy_pending_sendreqs;
unsigned int *m_copy_num_pending_sendreqs;
opal_list_t m_queued_sendreqs;
/** start sending data eagerly */
bool m_eager_send_active;
bool m_eager_send_ok;
/* RDMA data */
bool m_use_rdma;
bool m_rdma_wait_completion;
ompi_osc_rdma_setup_info_t *m_setup_info;
ompi_osc_rdma_peer_info_t *m_peer_info;
int32_t m_rdma_num_pending;
/*** buffering ***/
bool m_use_buffers;
ompi_osc_rdma_buffer_t *m_pending_buffers;
/* ********************* FENCE data ************************ */
/* an array of <sizeof(m_comm)> ints, each containing the value
1. */
int *m_fence_coll_counts;
/* ********************* PWSC data ************************ */
struct ompi_group_t *m_pw_group;
struct ompi_group_t *m_sc_group;
bool *m_sc_remote_active_ranks;
int *m_sc_remote_ranks;
int32_t num_complete_msgs;
/* ********************* LOCK data ************************ */
int32_t m_lock_status; /* one of 0, MPI_LOCK_EXCLUSIVE, MPI_LOCK_SHARED */
int32_t m_shared_count;
opal_list_t m_locks_pending;
opal_list_t m_unlocks_pending;
int32_t m_lock_received_ack;
/** Status of the local window lock. One of 0 (unlocked),
MPI_LOCK_EXCLUSIVE, or MPI_LOCK_SHARED. */
int lock_status;
/** number of peers who hold a shared lock on the local window */
int32_t shared_count;
/** target side list of lock requests we couldn't satisfy yet */
opal_list_t locks_pending;
/** origin side list of locks currently outstanding */
opal_list_t outstanding_locks;
uint64_t lock_serial_number;
unsigned char *incomming_buffer;
ompi_request_t *frag_request;
opal_list_t request_gc;
/* enforce accumulate semantics */
opal_atomic_lock_t accumulate_lock;
opal_list_t pending_acc;
};
typedef struct ompi_osc_rdma_module_t ompi_osc_rdma_module_t;
OMPI_MODULE_DECLSPEC extern ompi_osc_rdma_component_t mca_osc_rdma_component;
struct ompi_osc_rdma_pending_t {
opal_list_item_t super;
ompi_osc_rdma_module_t *module;
int source;
ompi_osc_rdma_header_t header;
};
typedef struct ompi_osc_rdma_pending_t ompi_osc_rdma_pending_t;
OBJ_CLASS_DECLARATION(ompi_osc_rdma_pending_t);
#define GET_MODULE(win) ((ompi_osc_rdma_module_t*) win->w_osc_module)
/*
* Component functions
*/
int ompi_osc_rdma_component_init(bool enable_progress_threads,
bool enable_mpi_threads);
int ompi_osc_rdma_attach(struct ompi_win_t *win, void *base, size_t len);
int ompi_osc_rdma_detach(struct ompi_win_t *win, void *base);
int ompi_osc_rdma_component_finalize(void);
int ompi_osc_rdma_free(struct ompi_win_t *win);
int ompi_osc_rdma_component_query(struct ompi_win_t *win,
struct ompi_info_t *info,
struct ompi_communicator_t *comm);
int ompi_osc_rdma_component_select(struct ompi_win_t *win,
struct ompi_info_t *info,
struct ompi_communicator_t *comm);
/* helper function that properly sets up request handling */
int ompi_osc_rdma_component_irecv(void *buf,
size_t count,
struct ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t *comm,
struct ompi_request_t **request,
ompi_request_complete_fn_t callback,
void *data);
int ompi_osc_rdma_component_isend(void *buf,
size_t count,
struct ompi_datatype_t *datatype,
int dest,
int tag,
struct ompi_communicator_t *comm,
struct ompi_request_t **request,
ompi_request_complete_fn_t callback,
void *data);
int ompi_osc_rdma_peer_info_free(ompi_osc_rdma_peer_info_t *peer_info);
/*
* Module interface function types
*/
int ompi_osc_rdma_module_free(struct ompi_win_t *win);
int ompi_osc_rdma_module_put(void *origin_addr,
int ompi_osc_rdma_put(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
@ -264,7 +226,7 @@ int ompi_osc_rdma_module_put(void *origin_addr,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win);
int ompi_osc_rdma_module_accumulate(void *origin_addr,
int ompi_osc_rdma_accumulate(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
@ -274,7 +236,7 @@ int ompi_osc_rdma_module_accumulate(void *origin_addr,
struct ompi_op_t *op,
struct ompi_win_t *win);
int ompi_osc_rdma_module_get(void *origin_addr,
int ompi_osc_rdma_get(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
@ -283,43 +245,418 @@ int ompi_osc_rdma_module_get(void *origin_addr,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win);
int ompi_osc_rdma_module_fence(int assert, struct ompi_win_t *win);
int ompi_osc_rdma_compare_and_swap(void *origin_addr,
void *compare_addr,
void *result_addr,
struct ompi_datatype_t *dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
struct ompi_win_t *win);
int ompi_osc_rdma_module_start(struct ompi_group_t *group,
int assert,
int ompi_osc_rdma_fetch_and_op(void *origin_addr,
void *result_addr,
struct ompi_datatype_t *dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
struct ompi_op_t *op,
struct ompi_win_t *win);
int ompi_osc_rdma_module_complete(struct ompi_win_t *win);
int ompi_osc_rdma_module_post(struct ompi_group_t *group,
int ompi_osc_rdma_get_accumulate(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_datatype,
void *result_addr,
int result_count,
struct ompi_datatype_t *result_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
struct ompi_datatype_t *target_datatype,
struct ompi_op_t *op,
struct ompi_win_t *win);
int ompi_osc_rdma_rput(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win,
struct ompi_request_t **request);
int ompi_osc_rdma_rget(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_win_t *win,
struct ompi_request_t **request);
int ompi_osc_rdma_raccumulate(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_dt,
int target,
OPAL_PTRDIFF_TYPE target_disp,
int target_count,
struct ompi_datatype_t *target_dt,
struct ompi_op_t *op,
struct ompi_win_t *win,
struct ompi_request_t **request);
int ompi_osc_rdma_rget_accumulate(void *origin_addr,
int origin_count,
struct ompi_datatype_t *origin_datatype,
void *result_addr,
int result_count,
struct ompi_datatype_t *result_datatype,
int target_rank,
MPI_Aint target_disp,
int target_count,
struct ompi_datatype_t *target_datatype,
struct ompi_op_t *op,
struct ompi_win_t *win,
struct ompi_request_t **request);
int ompi_osc_rdma_fence(int assert, struct ompi_win_t *win);
/* received a post message */
int osc_rdma_incomming_post (ompi_osc_rdma_module_t *module);
int ompi_osc_rdma_start(struct ompi_group_t *group,
int assert,
struct ompi_win_t *win);
int ompi_osc_rdma_complete(struct ompi_win_t *win);
int ompi_osc_rdma_post(struct ompi_group_t *group,
int assert,
struct ompi_win_t *win);
int ompi_osc_rdma_module_wait(struct ompi_win_t *win);
int ompi_osc_rdma_wait(struct ompi_win_t *win);
int ompi_osc_rdma_module_test(struct ompi_win_t *win,
int ompi_osc_rdma_test(struct ompi_win_t *win,
int *flag);
int ompi_osc_rdma_module_lock(int lock_type,
int ompi_osc_rdma_lock(int lock_type,
int target,
int assert,
struct ompi_win_t *win);
int ompi_osc_rdma_module_unlock(int target,
int ompi_osc_rdma_unlock(int target,
struct ompi_win_t *win);
/*
* passive side sync interface functions
int ompi_osc_rdma_lock_all(int assert,
struct ompi_win_t *win);
int ompi_osc_rdma_unlock_all(struct ompi_win_t *win);
int ompi_osc_rdma_sync(struct ompi_win_t *win);
int ompi_osc_rdma_flush(int target,
struct ompi_win_t *win);
int ompi_osc_rdma_flush_all(struct ompi_win_t *win);
int ompi_osc_rdma_flush_local(int target,
struct ompi_win_t *win);
int ompi_osc_rdma_flush_local_all(struct ompi_win_t *win);
int ompi_osc_rdma_set_info(struct ompi_win_t *win, struct ompi_info_t *info);
int ompi_osc_rdma_get_info(struct ompi_win_t *win, struct ompi_info_t **info_used);
int ompi_osc_rdma_component_irecv(ompi_osc_rdma_module_t *module,
void *buf,
size_t count,
struct ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t *comm);
int ompi_osc_rdma_component_isend(ompi_osc_rdma_module_t *module,
void *buf,
size_t count,
struct ompi_datatype_t *datatype,
int dest,
int tag,
struct ompi_communicator_t *comm);
/**
* ompi_osc_rdma_progress_pending_acc:
*
* @short Progress one pending accumulation or compare and swap operation.
*
* @param[in] module - OSC RDMA module
*
* @long If the accumulation lock can be aquired progress one pending
* accumulate or compare and swap operation.
*/
int ompi_osc_rdma_passive_lock(ompi_osc_rdma_module_t *module,
int32_t origin,
int32_t lock_type);
int ompi_osc_rdma_progress_pending_acc (ompi_osc_rdma_module_t *module);
int ompi_osc_rdma_passive_unlock(ompi_osc_rdma_module_t *module,
int32_t origin,
int32_t count);
int ompi_osc_rdma_passive_unlock_complete(ompi_osc_rdma_module_t *module);
/**
* mark_incoming_completion:
*
* @short Increment incoming completeion count.
*
* @param[in] module - OSC RDMA module
* @param[in] source - Passive target source or MPI_PROC_NULL (active target)
*
* @long This function incremements either the passive or active incoming counts.
* If the count reaches the signal count we signal the module's condition.
* This function uses atomics if necessary so it is not necessary to hold
* the module lock before calling this function.
*/
static inline void mark_incoming_completion (ompi_osc_rdma_module_t *module, int source)
{
if (MPI_PROC_NULL == source) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"mark_incoming_completion marking active incomming complete. count = %d",
(int) module->active_incoming_frag_count + 1));
OPAL_THREAD_ADD32(&module->active_incoming_frag_count, 1);
if (module->active_incoming_frag_count >= module->active_incoming_frag_signal_count) {
opal_condition_broadcast(&module->cond);
}
} else {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"mark_incoming_completion marking passive incomming complete. source = %d, count = %d",
source, (int) module->passive_incoming_frag_count[source] + 1));
OPAL_THREAD_ADD32(module->passive_incoming_frag_count + source, 1);
if (module->passive_incoming_frag_count[source] >= module->passive_incoming_frag_signal_count[source]) {
opal_condition_broadcast(&module->cond);
}
}
}
/**
* mark_outgoing_completion:
*
* @short Increment outgoing count.
*
* @param[in] module - OSC RDMA module
*
* @long This function is used to signal that an outgoing send is complete. It
* incrememnts only the outgoing fragment count and signals the module
* condition the fragment count is >= the signal count. This function
* uses atomics if necessary so it is not necessary to hold the module
* lock before calling this function.
*/
static inline void mark_outgoing_completion (ompi_osc_rdma_module_t *module)
{
OPAL_THREAD_ADD32(&module->outgoing_frag_count, 1);
if (module->outgoing_frag_count >= module->outgoing_frag_signal_count) {
opal_condition_broadcast(&module->cond);
}
}
/**
* ompi_osc_signal_outgoing:
*
* @short Increment outgoing signal counters.
*
* @param[in] module - OSC RDMA module
* @param[in] target - Passive target rank or MPI_PROC_NULL (active target)
* @param[in] count - Number of outgoing messages to signal.
*
* @long This function uses atomics if necessary so it is not necessary to hold
* the module lock before calling this function.
*/
static inline void ompi_osc_signal_outgoing (ompi_osc_rdma_module_t *module, int target, int count)
{
OPAL_THREAD_ADD32(&module->outgoing_frag_signal_count, count);
if (MPI_PROC_NULL != target) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_signal_outgoing_passive: target = %d, count = %d, total = %d", target,
count, module->epoch_outgoing_frag_count[target] + count));
OPAL_THREAD_ADD32(module->epoch_outgoing_frag_count + target, count);
}
}
/**
* osc_rdma_copy_on_recv:
*
* @short Helper function. Copies data from source to target through the
* convertor.
*
* @param[in] target - destination for the data
* @param[in] source - packed data
* @param[in] source_len - length of source buffer
* @param[in] proc - proc that packed the source data
* @param[in] count - count of datatype items
* @param[in] datatype - datatype used for unpacking
*
* @long This functions unpacks data from the source buffer into the target
* buffer. The copy is done with a convertor generated from proc,
* datatype, and count.
*/
static inline void osc_rdma_copy_on_recv (void *target, void *source, size_t source_len, ompi_proc_t *proc,
int count, ompi_datatype_t *datatype)
{
opal_convertor_t convertor;
uint32_t iov_count = 1;
struct iovec iov;
size_t max_data;
/* create convertor */
OBJ_CONSTRUCT(&convertor, opal_convertor_t);
/* initialize convertor */
opal_convertor_copy_and_prepare_for_recv(proc->proc_convertor, &datatype->super, count, target,
0, &convertor);
iov.iov_len = source_len;
iov.iov_base = (IOVBASE_TYPE *) source;
max_data = iov.iov_len;
MEMCHECKER(memchecker_convertor_call(&opal_memchecker_base_mem_defined, &convertor));
opal_convertor_unpack (&convertor, &iov, &iov_count, &max_data);
MEMCHECKER(memchecker_convertor_call(&opal_memchecker_base_mem_noaccess, &convertor));
OBJ_DESTRUCT(&convertor);
}
/**
* osc_rdma_copy_for_send:
*
* @short: Helper function. Copies data from source to target through the
* convertor.
*
* @param[in] target - destination for the packed data
* @param[in] target_len - length of the target buffer
* @param[in] source - original data
* @param[in] proc - proc this data will be sent to
* @param[in] count - count of datatype items
* @param[in] datatype - datatype used for packing
*
* @long This functions packs data from the source buffer into the target
* buffer. The copy is done with a convertor generated from proc,
* datatype, and count.
*/
static inline void osc_rdma_copy_for_send (void *target, size_t target_len, void *source, ompi_proc_t *proc,
int count, ompi_datatype_t *datatype)
{
opal_convertor_t convertor;
uint32_t iov_count = 1;
struct iovec iov;
size_t max_data;
OBJ_CONSTRUCT(&convertor, opal_convertor_t);
opal_convertor_copy_and_prepare_for_send(proc->proc_convertor, &datatype->super,
count, source, 0, &convertor);
iov.iov_len = target_len;
iov.iov_base = (IOVBASE_TYPE *) target;
opal_convertor_pack(&convertor, &iov, &iov_count, &max_data);
OBJ_DESTRUCT(&convertor);
}
/**
* osc_rdma_request_gc_clean:
*
* @short Release finished PML requests.
*
* @param[in] module - OSC RDMA module
*
* @long This function exists because it is not possible to free a PML request
* from a request completion callback. We instead put the request on the
* module's garbage collection list and release it at a later time.
*/
static inline void osc_rdma_request_gc_clean (ompi_osc_rdma_module_t *module)
{
ompi_request_t *request;
while (NULL != (request = (ompi_request_t *) opal_list_remove_first (&module->request_gc))) {
ompi_request_free (&request);
}
}
#define OSC_RDMA_FRAG_TAG 0x10000
#define OSC_RDMA_FRAG_MASK 0x0ffff
/**
* get_tag:
*
* @short Get a send/recv tag for large memory operations.
*
* @param[in] module - OSC RDMA module
*
* @long This function aquires a 16-bit tag for use with large memory operations. The
* tag will be odd or even depending on if this is in a passive target access
* or not.
*/
static inline int get_tag(ompi_osc_rdma_module_t *module)
{
/* the LSB of the tag is used be the receiver to determine if the
message is a passive or active target (ie, where to mark
completion). */
int tmp = module->tag_counter + !!(module->passive_target_access_epoch);
module->tag_counter = (module->tag_counter + 2) & OSC_RDMA_FRAG_MASK;
return tmp;
}
/**
* ompi_osc_rdma_accumulate_lock:
*
* @short Internal function that spins until the accumulation lock has
* been aquired.
*
* @param[in] module - OSC RDMA module
*
* @returns 0
*
* @long This functions blocks until the accumulation lock has been aquired. This
* behavior is only acceptable from a user-level call as blocking in a
* callback may cause deadlock. If a callback needs the accumulate lock and
* it is not available it should be placed on the pending_acc list of the
* module. It will be released by ompi_osc_rdma_accumulate_unlock().
*/
static inline int ompi_osc_rdma_accumulate_lock (ompi_osc_rdma_module_t *module)
{
while (opal_atomic_trylock (&module->accumulate_lock)) {
opal_progress ();
}
return 0;
}
/**
* ompi_osc_rdma_accumulate_trylock:
*
* @short Try to aquire the accumulation lock.
*
* @param[in] module - OSC RDMA module
*
* @returns 0 if the accumulation lock was aquired
* @returns 1 if the lock was not available
*
* @long This function will try to aquire the accumulation lock. This function
* is safe to call from a callback.
*/
static inline int ompi_osc_rdma_accumulate_trylock (ompi_osc_rdma_module_t *module)
{
return opal_atomic_trylock (&module->accumulate_lock);
}
/**
* ompi_osc_rdma_accumulate_unlock:
*
* @short Unlock the accumulation lock and release a pending accumulation operation.
*
* @param[in] module - OSC RDMA module
*
* @long This function unlocks the accumulation lock and release a single pending
* accumulation operation if one exists. This function may be called recursively.
*/
static inline void ompi_osc_rdma_accumulate_unlock (ompi_osc_rdma_module_t *module)
{
opal_atomic_unlock (&module->accumulate_lock);
if (0 != opal_list_get_size (&module->pending_acc)) {
ompi_osc_rdma_progress_pending_acc (module);
}
}
END_C_DECLS

View File

@ -0,0 +1,414 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "osc_rdma.h"
#include "osc_rdma_header.h"
#include "osc_rdma_data_move.h"
#include "osc_rdma_frag.h"
#include "mpi.h"
#include "opal/runtime/opal_progress.h"
#include "opal/threads/mutex.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/osc/base/base.h"
static int*
get_comm_ranks(ompi_osc_rdma_module_t *module,
ompi_group_t *sub_group)
{
int *ranks1 = NULL, *ranks2 = NULL;
bool success = false;
int i, ret;
ranks1 = malloc(sizeof(int) * ompi_group_size(sub_group));
if (NULL == ranks1) goto cleanup;
ranks2 = malloc(sizeof(int) * ompi_group_size(sub_group));
if (NULL == ranks2) goto cleanup;
for (i = 0 ; i < ompi_group_size(sub_group) ; ++i) {
ranks1[i] = i;
}
ret = ompi_group_translate_ranks(sub_group,
ompi_group_size(sub_group),
ranks1,
module->comm->c_local_group,
ranks2);
if (OMPI_SUCCESS != ret) goto cleanup;
success = true;
cleanup:
if (NULL != ranks1) free(ranks1);
if (!success) {
if (NULL != ranks2) free(ranks2);
ranks2 = NULL;
}
return ranks2;
}
int
ompi_osc_rdma_fence(int assert, ompi_win_t *win)
{
ompi_osc_rdma_module_t *module = GET_MODULE(win);
uint32_t incoming_reqs;
int ret = OMPI_SUCCESS;
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"osc rdma: fence start"));
/* short-circuit the noprecede case */
if (0 != (assert & MPI_MODE_NOPRECEDE)) {
ret = module->comm->c_coll.coll_barrier(module->comm,
module->comm->c_coll.coll_barrier_module);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc rdma: fence end (short circuit)"));
return ret;
}
/* try to start all the requests. */
ret = ompi_osc_rdma_frag_flush_all(module);
if (OMPI_SUCCESS != ret) goto cleanup;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc rdma: fence done sending"));
/* find out how much data everyone is going to send us. */
ret = module->comm->c_coll.coll_reduce_scatter_block (module->epoch_outgoing_frag_count,
&incoming_reqs, 1, MPI_UINT32_T,
MPI_SUM, module->comm,
module->comm->c_coll.coll_reduce_scatter_block_module);
if (OMPI_SUCCESS != ret) goto cleanup;
OPAL_THREAD_LOCK(&module->lock);
bzero(module->epoch_outgoing_frag_count,
sizeof(uint32_t) * ompi_comm_size(module->comm));
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc rdma: fence expects %d requests",
incoming_reqs));
/* set our complete condition for incoming requests */
module->active_incoming_frag_signal_count += incoming_reqs;
/* wait for completion */
while (module->outgoing_frag_count != module->outgoing_frag_signal_count ||
module->active_incoming_frag_count < module->active_incoming_frag_signal_count) {
opal_condition_wait(&module->cond, &module->lock);
}
ret = OMPI_SUCCESS;
if (0 == (assert & MPI_MODE_NOSUCCEED)) {
module->active_eager_send_active = true;
}
cleanup:
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"osc rdma: fence end: %d", ret));
OPAL_THREAD_UNLOCK(&module->lock);
return ret;
}
int
ompi_osc_rdma_start(ompi_group_t *group,
int assert,
ompi_win_t *win)
{
int ret = OMPI_SUCCESS;
ompi_osc_rdma_module_t *module = GET_MODULE(win);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_rdma_start entering..."));
/* save the group */
OBJ_RETAIN(group);
ompi_group_increment_proc_count(group);
OPAL_THREAD_LOCK(&module->lock);
/* ensure we're not already in a start */
if (NULL != module->sc_group) {
ret = MPI_ERR_RMA_SYNC;
goto cleanup;
}
module->sc_group = group;
/* disable eager sends until we've receved the proper number of
post messages, at which time we know all our peers are ready to
receive messages. */
module->active_eager_send_active = false;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"num_post_msgs = %d", module->num_post_msgs));
/* possible we've already received a couple in messages, so
add however many we're going to wait for */
module->num_post_msgs -= ompi_group_size(module->sc_group);
/* if we've already received all the post messages, we can eager
send. Otherwise, eager send will be enabled when
numb_post_messages reaches 0 */
if (0 == module->num_post_msgs) {
module->active_eager_send_active = true;
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_rdma_start complete"));
OPAL_THREAD_UNLOCK(&module->lock);
return OMPI_SUCCESS;
cleanup:
OPAL_THREAD_UNLOCK(&module->lock);
ompi_group_decrement_proc_count(group);
OBJ_RELEASE(group);
return ret;
}
int
ompi_osc_rdma_complete(ompi_win_t *win)
{
ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_header_complete_t complete_req;
int ret = OMPI_SUCCESS;
int i;
int *ranks = NULL;
ompi_group_t *group;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_rdma_complete entering..."));
ranks = get_comm_ranks(module, module->sc_group);
if (NULL == ranks) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
OPAL_THREAD_LOCK(&module->lock);
/* wait for all the post messages */
while (0 != module->num_post_msgs) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"waiting for post messages. num_post_msgs = %d", module->num_post_msgs));
opal_condition_wait(&module->cond, &module->lock);
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_rdma_complete sending complete message"));
/* for each process in group, send a control message with number
of updates coming, then start all the requests. Note that the
control send is processed as another message in a fragment, so
this might get queued until the flush_all (which is fine).
At the same time, clean out the outgoing count for the next
round. */
OPAL_THREAD_UNLOCK(&module->lock);
for (i = 0 ; i < ompi_group_size(module->sc_group) ; ++i) {
complete_req.base.type = OMPI_OSC_RDMA_HDR_TYPE_COMPLETE;
complete_req.base.flags = OMPI_OSC_RDMA_HDR_FLAG_VALID;
complete_req.frag_count = module->epoch_outgoing_frag_count[ranks[i]];
module->epoch_outgoing_frag_count[ranks[i]] = 0;
ret = ompi_osc_rdma_control_send(module,
ranks[i],
&complete_req,
sizeof(ompi_osc_rdma_header_complete_t));
if (OMPI_SUCCESS != ret) goto cleanup;
}
OPAL_THREAD_LOCK(&module->lock);
/* start all requests */
ret = ompi_osc_rdma_frag_flush_all(module);
if (OMPI_SUCCESS != ret) goto cleanup;
/* wait for outgoing requests to complete. Don't wait for incoming, as
we're only completing the access epoch, not the exposure epoch */
while (module->outgoing_frag_count != module->outgoing_frag_signal_count) {
opal_condition_wait(&module->cond, &module->lock);
}
/* phase 1 cleanup group */
group = module->sc_group;
module->sc_group = NULL;
/* unlock here, as group cleanup can take a while... */
OPAL_THREAD_UNLOCK(&(module->lock));
/* phase 2 cleanup group */
ompi_group_decrement_proc_count(group);
OBJ_RELEASE(group);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_rdma_complete complete"));
free (ranks);
return OMPI_SUCCESS;
cleanup:
if (NULL != ranks) free(ranks);
OPAL_THREAD_UNLOCK(&(module->lock));
return ret;
}
int
ompi_osc_rdma_post(ompi_group_t *group,
int assert,
ompi_win_t *win)
{
int *ranks;
int ret = OMPI_SUCCESS;
ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_header_post_t post_req;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"ompi_osc_rdma_post entering..."));
/* save the group */
OBJ_RETAIN(group);
ompi_group_increment_proc_count(group);
OPAL_THREAD_LOCK(&(module->lock));
/* ensure we're not already in a post */
if (NULL != module->pw_group) {
OPAL_THREAD_UNLOCK(&(module->lock));
return MPI_ERR_RMA_SYNC;
}
module->pw_group = group;
/* Update completion counter. Can't have received any completion
messages yet; complete won't send a completion header until
we've sent a post header. */
module->num_complete_msgs = -ompi_group_size(module->pw_group);
OPAL_THREAD_UNLOCK(&(module->lock));
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"sending post messages"));
ranks = get_comm_ranks(module, module->pw_group);
if (NULL == ranks) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* send a hello counter to everyone in group */
for (int i = 0 ; i < ompi_group_size(module->pw_group) ; ++i) {
post_req.base.type = OMPI_OSC_RDMA_HDR_TYPE_POST;
post_req.base.flags = OMPI_OSC_RDMA_HDR_FLAG_VALID;
post_req.windx = ompi_comm_get_cid(module->comm);
/* we don't want to send any data, since we're the exposure
epoch only, so use an unbuffered send */
ret = ompi_osc_rdma_control_send_unbuffered(module, ranks[i], &post_req,
sizeof(ompi_osc_rdma_header_post_t));
if (OMPI_SUCCESS != ret) {
break;
}
}
free (ranks);
return ret;
}
int
ompi_osc_rdma_wait(ompi_win_t *win)
{
ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_group_t *group;
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_rdma_wait entering..."));
OPAL_THREAD_LOCK(&module->lock);
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_rdma_wait active_incoming_frag_count = %d, active_incoming_frag_signal_count = %d, num_complete_msgs = %d",
(int) module->active_incoming_frag_count, (int) module->active_incoming_frag_count, module->num_complete_msgs));
while (0 != module->num_complete_msgs ||
module->active_incoming_frag_count < module->active_incoming_frag_signal_count) {
opal_condition_wait(&module->cond, &module->lock);
}
group = module->pw_group;
module->pw_group = NULL;
OPAL_THREAD_UNLOCK(&module->lock);
ompi_group_decrement_proc_count(group);
OBJ_RELEASE(group);
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
"ompi_osc_rdma_wait complete"));
return OMPI_SUCCESS;
}
int
ompi_osc_rdma_test(ompi_win_t *win,
int *flag)
{
ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_group_t *group;
int ret = OMPI_SUCCESS;
#if !OMPI_ENABLE_PROGRESS_THREADS
opal_progress();
#endif
OPAL_THREAD_LOCK(&(module->lock));
if (0 != module->num_complete_msgs ||
module->active_incoming_frag_count < module->active_incoming_frag_signal_count) {
*flag = 0;
ret = OMPI_SUCCESS;
goto cleanup;
} else {
*flag = 1;
group = module->pw_group;
module->pw_group = NULL;
OPAL_THREAD_UNLOCK(&(module->lock));
ompi_group_decrement_proc_count(group);
OBJ_RELEASE(group);
return OMPI_SUCCESS;
}
cleanup:
OPAL_THREAD_UNLOCK(&(module->lock));
return ret;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
@ -7,6 +8,9 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -17,46 +21,125 @@
#ifndef OMPI_MCA_OSC_RDMA_DATA_MOVE_H
#define OMPI_MCA_OSC_RDMA_DATA_MOVE_H
#include "osc_rdma_sendreq.h"
#include "osc_rdma_replyreq.h"
/* send a sendreq (the request from the origin for a Put, Get, or
Accumulate, including the payload for Put and Accumulate) */
int ompi_osc_rdma_sendreq_send(ompi_osc_rdma_module_t *module,
ompi_osc_rdma_sendreq_t *sendreq);
/* send a replyreq (the request from the target of a Get, with the
payload for the origin */
int ompi_osc_rdma_replyreq_send(ompi_osc_rdma_module_t *module,
ompi_osc_rdma_replyreq_t *replyreq);
/* receive the target side of a sendreq for a put, directly into the user's window */
int ompi_osc_rdma_sendreq_recv_put(ompi_osc_rdma_module_t *module,
ompi_osc_rdma_send_header_t *header,
void **payload);
/* receive the target side of a sendreq for an accumulate, possibly
using a temproart buffer, then calling the reduction functions */
int ompi_osc_rdma_sendreq_recv_accum(ompi_osc_rdma_module_t *module,
ompi_osc_rdma_send_header_t *header,
void **payload);
/* receive the origin side of a replyreq (the reply part of an
MPI_Get), directly into the user's window */
int ompi_osc_rdma_replyreq_recv(ompi_osc_rdma_module_t *module,
ompi_osc_rdma_sendreq_t *sendreq,
ompi_osc_rdma_reply_header_t *header,
void **payload);
#include "osc_rdma_header.h"
int ompi_osc_rdma_control_send(ompi_osc_rdma_module_t *module,
ompi_proc_t *proc,
uint8_t type,
int32_t value0, int32_t value1);
int target,
void *data,
size_t len);
int ompi_osc_rdma_rdma_ack_send(ompi_osc_rdma_module_t *module,
ompi_proc_t *proc,
ompi_osc_rdma_btl_t *rdma_btl);
/**
* ompi_osc_rdma_control_send_unbuffered:
*
* @short Send an unbuffered control message to a peer.
*
* @param[in] module - OSC RDMA module
* @param[in] target - Target rank
* @param[in] data - Data to send
* @param[in] len - Length of data
*
* @long Directly send a control message. This does not allocate a
* fragment, so should only be used when sending other messages would
* be erroneous (such as complete messages, when there may be queued
* transactions from an overlapping post that has already heard back
* from its peer). The buffer specified by data will be available
* when this call returns.
*/
int ompi_osc_rdma_control_send_unbuffered (ompi_osc_rdma_module_t *module,
int target, void *data, size_t len);
int ompi_osc_rdma_flush(ompi_osc_rdma_module_t *module);
/**
* ompi_osc_rdma_isend_w_cb:
*
* @short Post a non-blocking send with a specified callback.
*
* @param[in] ptr - Source buffer. Will be available when the callback fires
* @param[in] count - Number of elements to send
* @param[in] datatype - Datatype of elements
* @param[in] source - Ranks to send data to
* @param[in] tag - Tag to use
* @param[in] comm - Communicator for communicating with rank
* @param[in] cb - Function to call when the request is complete
* @param[in] ctx - Context to store in new request for callback
*
* @long This function posts a new send request. Upon completion the function cb will
* be called with the associated request. The context specified in ctx will be stored in
* the req_completion_cb_data member of the ompi_request_t for use by the callback.
*/
int ompi_osc_rdma_isend_w_cb (void *ptr, int count, ompi_datatype_t *datatype, int target, int tag,
ompi_communicator_t *comm, ompi_request_complete_fn_t cb, void *ctx);
/**
* ompi_osc_rdma_irecv_w_cb:
*
* @short Post a non-blocking receive with a specified callback.
*
* @param[inout] ptr - Destination for incoming data
* @param[in] count - Number of elements to receive
* @param[in] datatype - Datatype of elements
* @param[in] source - Ranks to receive data from
* @param[in] tag - Tag to use
* @param[in] comm - Communicator for communicating with rank
* @param[in] request_out - Location to store new receive request (may be NULL)
* @param[in] cb - Function to call when the request is complete
* @param[in] ctx - Context to store in new request for callback
*
* @long This function posts a new request and stores the request in request_out if
* provided. Upon completion the function cb will be called with the associated
* request. The context specified in ctx will be stored in the req_completion_cb_data
* member of the ompi_request_t for use by the callback.
*/
int ompi_osc_rdma_irecv_w_cb (void *ptr, int count, ompi_datatype_t *datatype, int source, int tag,
ompi_communicator_t *comm, ompi_request_t **request_out,
ompi_request_complete_fn_t cb, void *ctx);
int ompi_osc_rdma_process_lock(ompi_osc_rdma_module_t* module,
int source,
struct ompi_osc_rdma_header_lock_t* lock_header);
void ompi_osc_rdma_process_lock_ack(ompi_osc_rdma_module_t* module,
struct ompi_osc_rdma_header_lock_ack_t* lock_header);
int ompi_osc_rdma_process_unlock(ompi_osc_rdma_module_t* module,
int source,
struct ompi_osc_rdma_header_unlock_t* lock_header);
int ompi_osc_rdma_process_flush (ompi_osc_rdma_module_t *module, int source,
ompi_osc_rdma_header_flush_t *flush_header);
/**
* ompi_osc_rdma_process_unlock_ack:
*
* @short Process an incomming unlock acknowledgement.
*
* @param[in] module - OSC RDMA module
* @param[in] source - Source rank
* @param[in] unlock_ack_header - Incoming unlock ack header
*/
void ompi_osc_rdma_process_unlock_ack (ompi_osc_rdma_module_t *module, int source,
ompi_osc_rdma_header_unlock_ack_t *unlock_ack_header);
/**
* ompi_osc_rdma_process_flush_ack:
*
* @short Process an incomming flush acknowledgement.
*
* @param[in] module - OSC RDMA module
* @param[in] source - Source rank
* @param[in] flush_ack_header - Incoming flush ack header
*/
void ompi_osc_rdma_process_flush_ack (ompi_osc_rdma_module_t *module, int source,
ompi_osc_rdma_header_flush_ack_t *flush_ack_header);
/**
* ompi_osc_rdma_frag_start_receive:
*
* @short Start receiving fragments on the OSC module.
*
* @param[in] module - OSC module
*
* @long This function starts receiving eager fragments on the module. The current
* implementation uses the pml to transfer eager fragments.
*/
int ompi_osc_rdma_frag_start_receive (ompi_osc_rdma_module_t *module);
#endif

View File

@ -0,0 +1,213 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/class/opal_list.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/pml/pml.h"
#include "osc_rdma.h"
#include "osc_rdma_frag.h"
#include "osc_rdma_data_move.h"
static void ompi_osc_rdma_frag_constructor (ompi_osc_rdma_frag_t *frag){
frag->buffer = malloc (mca_osc_rdma_component.buffer_size + sizeof (ompi_osc_rdma_frag_header_t));
assert (frag->buffer);
}
static void ompi_osc_rdma_frag_destructor (ompi_osc_rdma_frag_t *frag) {
if (NULL != frag->buffer) {
free (frag->buffer);
}
}
OBJ_CLASS_INSTANCE(ompi_osc_rdma_frag_t, opal_list_item_t,
ompi_osc_rdma_frag_constructor, ompi_osc_rdma_frag_destructor);
static int frag_send_cb (ompi_request_t *request)
{
ompi_osc_rdma_frag_t *frag =
(ompi_osc_rdma_frag_t*) request->req_complete_cb_data;
ompi_osc_rdma_module_t *module = frag->module;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc rdma: frag_send complete to %d, frag = %p, request = %p",
frag->target, (void *) frag, (void *) request));
mark_outgoing_completion(module);
OPAL_FREE_LIST_RETURN(&mca_osc_rdma_component.frags, &frag->super);
/* put this request on the garbage colletion list */
opal_list_append (&module->request_gc, (opal_list_item_t *) request);
return OMPI_SUCCESS;
}
static int
frag_send(ompi_osc_rdma_module_t *module,
ompi_osc_rdma_frag_t *frag)
{
int count;
count = (int)((uintptr_t) frag->top - (uintptr_t) frag->buffer);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc rdma: frag_send called to %d, frag = %p, count = %d",
frag->target, (void *) frag, count));
return ompi_osc_rdma_isend_w_cb (frag->buffer, count, MPI_BYTE, frag->target, OSC_RDMA_FRAG_TAG,
module->comm, frag_send_cb, frag);
}
int
ompi_osc_rdma_frag_start(ompi_osc_rdma_module_t *module,
ompi_osc_rdma_frag_t *frag)
{
int ret;
assert(0 == frag->pending);
assert(module->peers[frag->target].active_frag != frag);
/* we need to signal now that a frag is outgoing to ensure the count sent
* with the unlock message is correct */
ompi_osc_signal_outgoing (module, frag->target, 1);
/* if eager sends are not active, can't send yet, so buffer and
get out... */
if (module->passive_target_access_epoch) {
if (!module->passive_eager_send_active[frag->target]) {
opal_list_append(&module->queued_frags, &frag->super);
return OMPI_SUCCESS;
}
} else {
if (!module->active_eager_send_active) {
opal_list_append(&module->queued_frags, &frag->super);
return OMPI_SUCCESS;
}
}
ret = frag_send(module, frag);
opal_condition_broadcast(&module->cond);
return ret;
}
int
ompi_osc_rdma_frag_flush_target(ompi_osc_rdma_module_t *module, int target)
{
int ret = OMPI_SUCCESS;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc rdma: frag flush target begin"));
/* flush the active frag */
if (NULL != module->peers[target].active_frag) {
ompi_osc_rdma_frag_t *frag = module->peers[target].active_frag;
if (0 != frag->pending) {
/* communication going on while synchronizing; this is a bug */
return MPI_ERR_RMA_SYNC;
}
module->peers[target].active_frag = NULL;
ret = ompi_osc_rdma_frag_start(module, frag);
if (OMPI_SUCCESS != ret) return ret;
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc rdma: frag flush target finished active frag"));
/* walk through the pending list and send */
ompi_osc_rdma_frag_t *frag, *next;
OPAL_LIST_FOREACH_SAFE(frag, next, &module->queued_frags, ompi_osc_rdma_frag_t) {
if (frag->target == target) {
opal_list_remove_item(&module->queued_frags, &frag->super);
ret = frag_send(module, frag);
if (OMPI_SUCCESS != ret) return ret;
}
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc rdma: frag flush target finished"));
return OMPI_SUCCESS;
}
int
ompi_osc_rdma_frag_flush_all(ompi_osc_rdma_module_t *module)
{
int ret = OMPI_SUCCESS;
int i;
ompi_osc_rdma_frag_t *frag, *next;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc rdma: frag flush all begin"));
/* flush the active frag */
for (i = 0 ; i < ompi_comm_size(module->comm) ; ++i) {
if (NULL != module->peers[i].active_frag) {
ompi_osc_rdma_frag_t *frag = module->peers[i].active_frag;
if (0 != frag->pending) {
/* communication going on while synchronizing; this is a bug */
return MPI_ERR_RMA_SYNC;
}
module->peers[i].active_frag = NULL;
ret = ompi_osc_rdma_frag_start(module, frag);
if (OMPI_SUCCESS != ret) return ret;
}
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc rdma: frag flush all finished active frag"));
/* try to start all the queued frags */
OPAL_LIST_FOREACH_SAFE(frag, next, &module->queued_frags, ompi_osc_rdma_frag_t) {
opal_list_remove_item(&module->queued_frags, &frag->super);
ret = frag_send(module, frag);
if (OMPI_SUCCESS != ret) {
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc rdma: failure for frag send: %d", ret));
return ret;
}
}
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"osc rdma: frag flush all done"));
return OMPI_SUCCESS;
}
int osc_rdma_incomming_post (ompi_osc_rdma_module_t *module)
{
OPAL_THREAD_LOCK(&module->lock);
module->num_post_msgs++;
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"received post message. num_post_msgs = %d", module->num_post_msgs));
if (0 == module->num_post_msgs) {
module->active_eager_send_active = true;
}
opal_condition_broadcast (&module->cond);
OPAL_THREAD_UNLOCK(&module->lock);
return OMPI_SUCCESS;
}

View File

@ -0,0 +1,131 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OSC_RDMA_FRAG_H
#define OSC_RDMA_FRAG_H
#include "ompi/communicator/communicator.h"
#include "osc_rdma_header.h"
#include "osc_rdma_request.h"
/** Communication buffer for packing messages */
struct ompi_osc_rdma_frag_t {
opal_list_item_t super;
/* target rank of buffer */
int target;
unsigned char *buffer;
/* space remaining in buffer */
size_t remain_len;
/* start of unused space */
char *top;
/* Number of operations which have started writing into the frag, but not yet completed doing so */
int pending;
ompi_osc_rdma_frag_header_t *header;
ompi_osc_rdma_module_t *module;
};
typedef struct ompi_osc_rdma_frag_t ompi_osc_rdma_frag_t;
OBJ_CLASS_DECLARATION(ompi_osc_rdma_frag_t);
extern int ompi_osc_rdma_frag_start(ompi_osc_rdma_module_t *module, ompi_osc_rdma_frag_t *buffer);
extern int ompi_osc_rdma_frag_flush_target(ompi_osc_rdma_module_t *module, int target);
extern int ompi_osc_rdma_frag_flush_all(ompi_osc_rdma_module_t *module);
/*
* Note: module lock must be held during this operation
*/
static inline int ompi_osc_rdma_frag_alloc(ompi_osc_rdma_module_t *module, int target,
size_t request_len, ompi_osc_rdma_frag_t **buffer,
char **ptr)
{
ompi_osc_rdma_frag_t *curr = module->peers[target].active_frag;
int ret;
if (request_len > mca_osc_rdma_component.buffer_size) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
if (NULL == curr || curr->remain_len < request_len) {
opal_free_list_item_t *item;
if (NULL != curr) {
curr->remain_len = 0;
/* If there's something pending, the pending finish will
start the buffer. Otherwise, we need to start it now. */
if (0 == curr->pending) {
module->peers[target].active_frag = NULL;
ret = ompi_osc_rdma_frag_start(module, curr);
}
}
OPAL_FREE_LIST_GET(&mca_osc_rdma_component.frags,
item, ret);
if (OMPI_SUCCESS != ret) return ret;
curr = module->peers[target].active_frag =
(ompi_osc_rdma_frag_t*) item;
curr->target = target;
curr->header = (ompi_osc_rdma_frag_header_t*) curr->buffer;
curr->top = (char*) (curr->header + 1);
curr->remain_len = mca_osc_rdma_component.buffer_size;
curr->module = module;
curr->pending = 0;
curr->header->base.type = OMPI_OSC_RDMA_HDR_TYPE_FRAG;
curr->header->base.flags = OMPI_OSC_RDMA_HDR_FLAG_VALID;
if (module->passive_target_access_epoch) {
curr->header->base.flags |= OMPI_OSC_RDMA_HDR_FLAG_PASSIVE_TARGET;
}
curr->header->source = ompi_comm_rank(module->comm);
curr->header->num_ops = 0;
curr->header->windx = ompi_comm_get_cid(module->comm);
if (curr->remain_len < request_len) {
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
}
}
*ptr = curr->top;
*buffer = curr;
curr->top += request_len;
curr->remain_len -= request_len;
curr->pending++;
curr->header->num_ops++;
return OMPI_SUCCESS;
}
/*
* Note: module lock must be held for this operation