diff --git a/include/mpi.h b/include/mpi.h index 9913ef74cc..2068a994cb 100644 --- a/include/mpi.h +++ b/include/mpi.h @@ -1271,13 +1271,12 @@ extern "C" { int count); int PMPI_Testall(int count, MPI_Request array_of_requests[], int *flag, MPI_Status array_of_statuses[]); - int PMPI_Testany(int count, MPI_Request array_of_requests[], int *index, - MPI_Status *status); + int PMPI_Testany(int count, MPI_Request array_of_requests[], int *index, int *flag, MPI_Status *status); int PMPI_Test(MPI_Request *request, int *flag, MPI_Status *status); int PMPI_Test_cancelled(MPI_Status *status, int *flag); int PMPI_Testsome(int incount, MPI_Request array_of_requests[], - int *outcount, int array_of_indices, - MPI_Status array_of_statuses); + int *outcount, int array_of_indices[], + MPI_Status array_of_statuses[]); int PMPI_Topo_test(MPI_Comm comm, int *status); MPI_Fint PMPI_Type_c2f(MPI_Datatype datatype); int PMPI_Type_commit(MPI_Datatype *type); diff --git a/src/errhandler/errhandler.h b/src/errhandler/errhandler.h index f92486abed..4709f482cd 100644 --- a/src/errhandler/errhandler.h +++ b/src/errhandler/errhandler.h @@ -110,7 +110,7 @@ extern lam_pointer_array_t *lam_errhandler_f_to_c_table; * parallel invocation to LAM_ERRHANDLER_CHECK() and LAM_ERRHANDLER_RETURN(). */ #define LAM_ERRHANDLER_INVOKE(mpi_object, err_code, message) \ - lam_errhandler_invoke((mpi_object)->error_handler, (mpi_object), \ + lam_errhandler_invoke((mpi_object) != NULL ? (mpi_object)->error_handler : NULL, (mpi_object), \ (err_code), (message)); /** @@ -129,7 +129,7 @@ extern lam_pointer_array_t *lam_errhandler_f_to_c_table; */ #define LAM_ERRHANDLER_CHECK(rc, mpi_object, err_code, message) \ if (rc != LAM_SUCCESS) { \ - lam_errhandler_invoke((mpi_object)->error_handler, (mpi_object), \ + lam_errhandler_invoke((mpi_object) != NULL ? (mpi_object)->error_handler : NULL, (mpi_object), \ (err_code), (message)); \ return (err_code); \ } @@ -152,7 +152,7 @@ extern lam_pointer_array_t *lam_errhandler_f_to_c_table; */ #define LAM_ERRHANDLER_RETURN(rc, mpi_object, err_code, message) \ if (rc != LAM_SUCCESS) { \ - lam_errhandler_invoke((mpi_object)->error_handler, (mpi_object), \ + lam_errhandler_invoke((mpi_object != NULL) ? (mpi_object)->error_handler : NULL, (mpi_object), \ (err_code), (message)); \ return (err_code); \ } else { \ diff --git a/src/mca/pml/base/Makefile.am b/src/mca/pml/base/Makefile.am index 9a89525138..aff4132f59 100644 --- a/src/mca/pml/base/Makefile.am +++ b/src/mca/pml/base/Makefile.am @@ -8,7 +8,9 @@ noinst_LTLIBRARIES = libmca_pml_base.la # For VPATH builds, have to specify where static-modules.h will be found -AM_CPPFLAGS = -I$(top_builddir)/src +AM_CPPFLAGS = \ + -pedantic \ + -I$(top_builddir)/src # Source code files diff --git a/src/mca/pml/base/pml_base_request.c b/src/mca/pml/base/pml_base_request.c index 1207c9e3f0..d5c4bd4c67 100644 --- a/src/mca/pml/base/pml_base_request.c +++ b/src/mca/pml/base/pml_base_request.c @@ -5,6 +5,14 @@ #include "mca/pml/base/pml_base_request.h" +static void mca_pml_base_request_construct(mca_pml_base_request_t* req) +{ +} + +static void mca_pml_base_request_destruct(mca_pml_base_request_t* req) +{ +} + lam_class_t mca_pml_base_request_t_class = { "mca_pml_base_request_t", OBJ_CLASS(lam_request_t), @@ -12,11 +20,4 @@ lam_class_t mca_pml_base_request_t_class = { (lam_destruct_t) mca_pml_base_request_destruct }; -void mca_pml_base_request_construct(mca_pml_base_request_t* req) -{ -} - -void mca_pml_base_request_destruct(mca_pml_base_request_t* req) -{ -} diff --git a/src/mca/pml/base/pml_base_request.h b/src/mca/pml/base/pml_base_request.h index 467fe9cd66..31dabed524 100644 --- a/src/mca/pml/base/pml_base_request.h +++ b/src/mca/pml/base/pml_base_request.h @@ -1,8 +1,9 @@ /* * $HEADER$ */ -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ - +/** + * @file + */ #ifndef MCA_PML_BASE_REQUEST_H #define MCA_PML_BASE_REQUEST_H @@ -13,7 +14,9 @@ extern lam_class_t mca_pml_base_request_t_class; -/* request type */ +/** + * Type of request. + */ typedef enum { MCA_PML_REQUEST_NULL, MCA_PML_REQUEST_SEND, @@ -21,40 +24,27 @@ typedef enum { } mca_pml_base_request_type_t; -/* MPI pml (point-to-point) request */ +/** + * Base type for PML P2P requests + */ struct mca_pml_base_request_t { - /* base request */ - lam_request_t super; - /* pointer to application buffer */ - void *req_addr; - /* length of application buffer */ - size_t req_length; - /* peer process - rank w/in this communicator */ - int32_t req_peer; - /* user defined tag */ - int32_t req_tag; - /* communicator pointer */ - lam_communicator_t *req_comm; - /* pointer to data type */ - lam_datatype_t *req_datatype; - /* MPI request type - used for test */ - mca_pml_base_request_type_t req_type; - /* completion status */ - lam_status_public_t req_status; - /* flag indicating if the this is a persistent request */ - bool req_persistent; - /* flag indicating if MPI is done with this request */ - volatile bool req_mpi_done; - /* flag indicating if the pt-2-pt layer is done with this request */ - volatile bool req_pml_done; - /* flag indicating if the user has freed this request */ - volatile bool req_free_called; + lam_request_t super; /**< base request */ + void *req_addr; /**< pointer to application buffer */ + size_t req_count; /**< count of user datatype elements */ + int32_t req_peer; /**< peer process - rank w/in this communicator */ + int32_t req_tag; /**< user defined tag */ + lam_communicator_t *req_comm; /**< communicator pointer */ + lam_proc_t* req_proc; /**< peer process */ + lam_datatype_t *req_datatype; /**< pointer to data type */ + mca_pml_base_request_type_t req_type; /**< MPI request type - used for test */ + lam_status_public_t req_status; /**< completion status */ + bool req_persistent; /**< flag indicating if the this is a persistent request */ + volatile bool req_mpi_done; /**< flag indicating if MPI is done with this request */ + volatile bool req_pml_done; /**< flag indicating if the pt-2-pt layer is done with this request */ + volatile bool req_free_called; /**< flag indicating if the user has freed this request */ }; typedef struct mca_pml_base_request_t mca_pml_base_request_t; -void mca_pml_base_request_construct(mca_pml_base_request_t*); -void mca_pml_base_request_destruct(mca_pml_base_request_t*); - #endif diff --git a/src/mca/pml/pml.h b/src/mca/pml/pml.h index f12805dbc4..ac7fa2f39b 100644 --- a/src/mca/pml/pml.h +++ b/src/mca/pml/pml.h @@ -1,7 +1,48 @@ /* * $HEADER$ */ - +/** + * @file + * + * P2P Management Layer (PML) + * + * An MCA component type that provides the P2P interface functionality required + * by the MPI layer. The PML is a relatively thin layer that primarily provides + * for the fragmentation and scheduling of messages over multiple transports + * (instances of the P2P Transport Layer (PTL) MCA component type) as depicted below: + * + * ------------------------------------ + * | MPI | + * ------------------------------------ + * | PML | + * ------------------------------------ + * | PTL (TCP) | PTL (SM) | PTL (...) | + * ------------------------------------ + * + * A single PML module is selected by the MCA framework during library + * initialization. Initially, all available PMLs are loaded (potentially + * as shared libraries) and their module open and init functions called. + * The MCA framework selects the module returning the highest priority and + * closes/unloads any other PML modules that may have been opened. + * + * After the PML is selected, the MCA framework loads and initalize + * all available PTLs. The PML is notified of the selected PTLs via the + * the mca_pml_base_add_ptls_fn_t downcall from the MCA. + * + * After all of the MCA components are initialized, the MPI/RTE will make + * downcalls into the PML to provide the initial list of processes + * (lam_proc_t instances), and notification of changes (add/delete). + * + * The PML module must select the set of PTL modules that are to be used + * to reach a given destination. These should be cached on a PML specific + * data structure that is hung off the lam_proc_t. + * + * The PML should then apply a scheduling algorithm (round-robin, + * weighted distribution, etc), to schedule the delivery of messages + * over the available PTLs. + * + */ + #ifndef MCA_PML_H #define MCA_PML_H @@ -68,27 +109,114 @@ typedef struct mca_pml_base_module_1_0_0_t mca_pml_base_module_1_0_0_t; typedef mca_pml_base_module_1_0_0_t mca_pml_base_module_t; -/* - * PML instance interface functions and datatype +/** + * MCA management functions. */ -typedef int (*mca_pml_base_add_comm_fn_t)(struct lam_communicator_t*); -typedef int (*mca_pml_base_del_comm_fn_t)(struct lam_communicator_t*); -typedef int (*mca_pml_base_add_procs_fn_t)(struct lam_proc_t **procs, size_t nprocs); -typedef int (*mca_pml_base_del_procs_fn_t)(struct lam_proc_t **procs, size_t nprocs); -typedef int (*mca_pml_base_add_ptls_fn_t)(lam_list_t *ptls); -typedef int (*mca_pml_base_progress_fn_t)(void); +/** + * Downcall from MPI/RTE layer when new processes are created. + * + * @param procs Array of new processes + * @param nprocs Size of process array + * @return LAM_SUCCESS or failure status. + * + * Provides a notification to the PML that new processes have been + * created, and provides the PML the opportunity to cache data + * (e.g. list of PTLs to use) on the lam_proc_t data structure. + */ +typedef int (*mca_pml_base_add_procs_fn_t)(struct lam_proc_t **procs, size_t nprocs); + + +/** + * Downcall from MPI/RTE layer when processes are terminated. + * + * @param procs Array of processes + * @param nprocs Size of process array + * @return LAM_SUCCESS or failure status. + * + * Provides a notification to the PML that processes have + * gone away, and provides the PML the opportunity to cleanup + * any data cached on the lam_proc_t data structure. + */ +typedef int (*mca_pml_base_del_procs_fn_t)(struct lam_proc_t **procs, size_t nprocs); + + +/** + * Downcall from MCA layer after all PTLs have been loaded/selected. + * + * @param ptls List of selected PTLs + * @return LAM_SUCCESS or failure status. + * + * Provides a notification to the PML that processes have + * gone away, and provides the PML the opportunity to cleanup + * any data cached on the lam_proc_t data structure. + */ +typedef int (*mca_pml_base_add_ptls_fn_t)(lam_list_t *ptls); + + +/** + * MPI Interface Functions + */ + + +/** + * Downcall from MPI layer when a new communicator is created. + * + * @param comm Communicator + * @return LAM_SUCCESS or failure status. + * + * Provides the PML the opportunity to initialize/cache a data structure + * on the communicator. + */ +typedef int (*mca_pml_base_add_comm_fn_t)(struct lam_communicator_t* comm); + + +/** + * Downcall from MPI layer when a communicator is destroyed. + * + * @param comm Communicator + * @return LAM_SUCCESS or failure status. + * + * Provides the PML the opportunity to cleanup any datastructures + * associated with the communicator. + */ +typedef int (*mca_pml_base_del_comm_fn_t)(struct lam_communicator_t* comm); + +/** + * Initialize a persistent receive request. + * + * @param buf (IN) User buffer. + * @param count (IN) Number of elements of the specified datatype. + * @param datatype (IN) User defined datatype. + * @param src (IN) Source rank w/in communicator. + * @param tag (IN) User defined tag. + * @param comm (IN) Communicator. + * @param request (OUT) Request handle. + * @return LAM_SUCCESS or failure status. + */ typedef int (*mca_pml_base_irecv_init_fn_t)( - void *buf, - size_t count, - lam_datatype_t *datatype, + void *buf, + size_t count, + lam_datatype_t *datatype, int src, - int tag, + int tag, struct lam_communicator_t* comm, - struct lam_request_t **request + struct lam_request_t **request ); +/** + * Post a receive request. + * + * @param buf (IN) User buffer. + * @param count (IN) Number of elements of the specified datatype. + * @param datatype (IN) User defined datatype. + * @param src (IN) Source rank w/in communicator. + * @param tag (IN) User defined tag. + * @param comm (IN) Communicator. + * @param request (OUT) Request handle. + * @return LAM_SUCCESS or failure status. + */ typedef int (*mca_pml_base_irecv_fn_t)( void *buf, size_t count, @@ -99,6 +227,18 @@ typedef int (*mca_pml_base_irecv_fn_t)( struct lam_request_t **request ); +/** + * Post a receive and wait for completion. + * + * @param buf (IN) User buffer + * @param count (IN) Number of elements of the specified datatype + * @param datatype (IN) User defined datatype + * @param src (IN) Source rank w/in communicator + * @param tag (IN) User defined tag + * @param comm (IN) Communicator + * @param status (OUT) Completion status + * @return LAM_SUCCESS or failure status. + */ typedef int (*mca_pml_base_recv_fn_t)( void *buf, size_t count, @@ -109,6 +249,19 @@ typedef int (*mca_pml_base_recv_fn_t)( lam_status_public_t* status ); +/** + * Initialize a persistent send request. + * + * @param buf (IN) User buffer. + * @param count (IN) Number of elements of the specified datatype. + * @param datatype (IN) User defined datatype. + * @param dst (IN) Peer rank w/in communicator. + * @param tag (IN) User defined tag. + * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY) + * @param comm (IN) Communicator. + * @param request (OUT) Request handle. + * @return LAM_SUCCESS or failure status. + */ typedef int (*mca_pml_base_isend_init_fn_t)( void *buf, size_t count, @@ -120,6 +273,20 @@ typedef int (*mca_pml_base_isend_init_fn_t)( struct lam_request_t **request ); + +/** + * Post a send request. + * + * @param buf (IN) User buffer. + * @param count (IN) Number of elements of the specified datatype. + * @param datatype (IN) User defined datatype. + * @param dst (IN) Peer rank w/in communicator. + * @param tag (IN) User defined tag. + * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY) + * @param comm (IN) Communicator. + * @param request (OUT) Request handle. + * @return LAM_SUCCESS or failure status. + */ typedef int (*mca_pml_base_isend_fn_t)( void *buf, size_t count, @@ -131,6 +298,19 @@ typedef int (*mca_pml_base_isend_fn_t)( struct lam_request_t **request ); + +/** + * Post a send request and wait for completion. + * + * @param buf (IN) User buffer. + * @param count (IN) Number of elements of the specified datatype. + * @param datatype (IN) User defined datatype. + * @param dst (IN) Peer rank w/in communicator. + * @param tag (IN) User defined tag. + * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY) + * @param comm (IN) Communicator. + * @return LAM_SUCCESS or failure status. + */ typedef int (*mca_pml_base_send_fn_t)( void *buf, size_t count, @@ -141,16 +321,72 @@ typedef int (*mca_pml_base_send_fn_t)( struct lam_communicator_t* comm ); +/** + * Initiate one or more persistent requests. + * + * @param count Number of requests + * @param request Array of persistent requests + * @return LAM_SUCCESS or failure status. + */ typedef int (*mca_pml_base_start_fn_t)( - lam_request_t** request + size_t count, + lam_request_t** requests ); +/** + * Non-blocking test for request completion. + * + * @param count (IN) Number of requests + * @param request (IN) Array of requests + * @param index (OUT) Index of first completed request. + * @param complete (OUT) Flag indicating if index is valid (a request completed). + * @param status (OUT) Status of completed request. + * @return LAM_SUCCESS or failure status. + * + * Note that upon completion, the request is freed, and the + * request handle at index set to NULL. + */ typedef int (*mca_pml_base_test_fn_t)( - lam_request_t** request, + size_t count, + lam_request_t** requests, + int *index, int *completed, lam_status_public_t* status ); + +/** + * Non-blocking test for request completion. + * + * @param count (IN) Number of requests + * @param requests (IN) Array of requests + * @param completed (OUT) Flag indicating wether all requests completed. + * @param statuses (OUT) Array of completion statuses. + * @return LAM_SUCCESS or failure status. + * + * This routine returns completed==true if all requests have completed. + * The statuses parameter is only updated if all requests completed. Likewise, + * the requests array is not modified (no requests freed), unless all requests + * have completed. + */ +typedef int (*mca_pml_base_test_all_fn_t)( + size_t count, + lam_request_t** requests, + int *completed, + lam_status_public_t* statuses +); + + +/** + * Wait (blocking-mode) for one of N requests to complete. + * + * @param count (IN) Number of requests + * @param requests (IN) Array of requests + * @param index (OUT) Index into request array of completed request. + * @param status (OUT) Status of completed request. + * @return LAM_SUCCESS or failure status. + * + */ typedef int (*mca_pml_base_wait_fn_t)( size_t count, lam_request_t** request, @@ -158,37 +394,61 @@ typedef int (*mca_pml_base_wait_fn_t)( lam_status_public_t* status ); + +/** + * Wait (blocking-mode) for all of N requests to complete. + * + * @param count (IN) Number of requests + * @param requests (IN) Array of requests + * @param statuses (OUT) Array of completion statuses. + * @return LAM_SUCCESS or failure status. + * + */ typedef int (*mca_pml_base_wait_all_fn_t)( - size_t count, /* count of request/status arrays */ - lam_request_t** request, /* array of requests */ - lam_status_public_t *status /* array of statuses */ + size_t count, + lam_request_t** request, + lam_status_public_t *status ); + +/** + * Release resources held by a persistent mode request. + * + * @param request (IN) Request + * @return LAM_SUCCESS or failure status. + * + */ typedef int (*mca_pml_base_free_fn_t)( lam_request_t** request ); + +/** + * A special NULL request handle. + * + * @param request (OUT) Request + * @return LAM_SUCCESS or failure status. + * + */ typedef int (*mca_pml_base_null_fn_t)( lam_request_t** request ); - /** - * PML instance interface functions. + * PML instance. */ struct mca_pml_1_0_0_t { /* downcalls from MCA to PML */ - mca_pml_base_add_comm_fn_t pml_add_comm; - mca_pml_base_del_comm_fn_t pml_del_comm; mca_pml_base_add_procs_fn_t pml_add_procs; mca_pml_base_del_procs_fn_t pml_del_procs; mca_pml_base_add_ptls_fn_t pml_add_ptls; - mca_pml_base_progress_fn_t pml_progress; /* downcalls from MPI to PML */ + mca_pml_base_add_comm_fn_t pml_add_comm; + mca_pml_base_del_comm_fn_t pml_del_comm; mca_pml_base_irecv_init_fn_t pml_irecv_init; mca_pml_base_irecv_fn_t pml_irecv; mca_pml_base_recv_fn_t pml_recv; @@ -197,6 +457,7 @@ struct mca_pml_1_0_0_t { mca_pml_base_send_fn_t pml_send; mca_pml_base_start_fn_t pml_start; mca_pml_base_test_fn_t pml_test; + mca_pml_base_test_all_fn_t pml_test_all; mca_pml_base_wait_fn_t pml_wait; mca_pml_base_wait_all_fn_t pml_wait_all; mca_pml_base_free_fn_t pml_free; diff --git a/src/mca/pml/teg/src/pml_ptl_array.h b/src/mca/pml/teg/src/pml_ptl_array.h index 3f077ba6a2..7c35e2c713 100644 --- a/src/mca/pml/teg/src/pml_ptl_array.h +++ b/src/mca/pml/teg/src/pml_ptl_array.h @@ -1,7 +1,9 @@ /* * $HEADER$ */ - +/** + * @file + */ #ifndef LAM_PTL_ARRAY_H #define LAM_PTL_ARRAY_H @@ -10,24 +12,40 @@ extern lam_class_t mca_pml_teg_ptl_array_t_class; +/** + * A data structure associated with a lam_proc_t that caches + * addressing/scheduling attributes for a specific PTL instance + * that can be used to reach the process. + */ struct mca_ptl_proc_t { - double ptl_weight; /* PTL weight for scheduling */ - struct mca_ptl_base_peer_t* ptl_peer; /* PTL addressing info */ - mca_ptl_t *ptl; /* PTL implementation */ + double ptl_weight; /**< PTL weight for scheduling */ + struct mca_ptl_base_peer_t* ptl_peer; /**< PTL addressing info */ + mca_ptl_t *ptl; /**< PTL implementation */ }; typedef struct mca_ptl_proc_t mca_ptl_proc_t; +/** + * A dynamically growable array of mca_ptl_proc_t instances. + * Maintains an index into the array that is used for round-robin + * scheduling across contents. + */ struct mca_ptl_array_t { lam_object_t super; - mca_ptl_proc_t* ptl_procs; /* array of ptl procs */ - size_t ptl_size; /* number available */ - size_t ptl_reserve; - size_t ptl_index; /* last used index*/ + mca_ptl_proc_t* ptl_procs; /**< array of ptl procs */ + size_t ptl_size; /**< number available */ + size_t ptl_reserve; /**< size of allocated ptl_proc array */ + size_t ptl_index; /**< last used index*/ }; typedef struct mca_ptl_array_t mca_ptl_array_t; typedef struct mca_ptl_array_t mca_pml_teg_ptl_array_t; +/** + * If required, reallocate (grow) the array to the indicate size. + * + * @param array (IN) + * @param size (IN) + */ int mca_ptl_array_reserve(mca_ptl_array_t*, size_t); static inline size_t mca_ptl_array_get_size(mca_ptl_array_t* array) @@ -35,6 +53,12 @@ static inline size_t mca_ptl_array_get_size(mca_ptl_array_t* array) return array->ptl_size; } +/** + * Grow the array if required, and set the size. + * + * @param array (IN) + * @param size (IN) + */ static inline void mca_ptl_array_set_size(mca_ptl_array_t* array, size_t size) { if(array->ptl_size > array->ptl_reserve) @@ -42,6 +66,11 @@ static inline void mca_ptl_array_set_size(mca_ptl_array_t* array, size_t size) array->ptl_size = size; } +/** + * Grow the array size by one and return the item at that index. + * + * @param array (IN) + */ static inline mca_ptl_proc_t* mca_ptl_array_insert(mca_ptl_array_t* array) { #if LAM_ENABLE_DEBUG @@ -54,6 +83,12 @@ static inline mca_ptl_proc_t* mca_ptl_array_insert(mca_ptl_array_t* array) return &array->ptl_procs[array->ptl_size++]; } +/** + * Return an array item at the specified index. + * + * @param array (IN) + * @param index (IN) + */ static inline mca_ptl_proc_t* mca_ptl_array_get_index(mca_ptl_array_t* array, size_t index) { #if LAM_ENABLE_DEBUG @@ -66,6 +101,12 @@ static inline mca_ptl_proc_t* mca_ptl_array_get_index(mca_ptl_array_t* array, si return &array->ptl_procs[index]; } +/** + * Return the next LRU index in the array. + * + * @param array (IN) + * @param index (IN) + */ static inline mca_ptl_proc_t* mca_ptl_array_get_next(mca_ptl_array_t* array) { mca_ptl_proc_t* ptl_proc; diff --git a/src/mca/pml/teg/src/pml_teg.c b/src/mca/pml/teg/src/pml_teg.c index 658c093341..250bc4bd1f 100644 --- a/src/mca/pml/teg/src/pml_teg.c +++ b/src/mca/pml/teg/src/pml_teg.c @@ -19,12 +19,11 @@ mca_pml_teg_t mca_pml_teg = { { - mca_pml_teg_add_comm, - mca_pml_teg_del_comm, mca_pml_teg_add_procs, mca_pml_teg_del_procs, mca_pml_teg_add_ptls, - mca_pml_teg_progress, + mca_pml_teg_add_comm, + mca_pml_teg_del_comm, mca_pml_teg_irecv_init, mca_pml_teg_irecv, mca_pml_teg_recv, @@ -33,6 +32,7 @@ mca_pml_teg_t mca_pml_teg = { mca_pml_teg_send, mca_pml_teg_start, mca_pml_teg_test, + mca_pml_teg_test_all, mca_pml_teg_wait, mca_pml_teg_wait_all, mca_pml_teg_free, @@ -279,7 +279,7 @@ int mca_pml_teg_module_fini(void) int mca_pml_teg_null(lam_request_t** request) { - *request = (lam_request_t*)&mca_pml_teg.teg_null; + *request = (lam_request_t*)&mca_pml_teg.teg_request_null; return LAM_SUCCESS; } diff --git a/src/mca/pml/teg/src/pml_teg.h b/src/mca/pml/teg/src/pml_teg.h index 34e7de5021..67f885b2d9 100644 --- a/src/mca/pml/teg/src/pml_teg.h +++ b/src/mca/pml/teg/src/pml_teg.h @@ -1,11 +1,9 @@ -/** @file - * - * - */ - /* * $HEADER$ */ +/** + * @file + */ #ifndef MCA_PML_TEG_H #define MCA_PML_TEG_H @@ -51,9 +49,7 @@ struct mca_pml_teg_t { lam_mutex_t teg_request_lock; lam_condition_t teg_request_cond; volatile int teg_request_waiting; - - /* null request */ - mca_pml_base_request_t teg_null; + mca_pml_base_request_t teg_request_null; }; typedef struct mca_pml_teg_t mca_pml_teg_t; @@ -108,7 +104,7 @@ extern int mca_pml_teg_add_ptls( extern int mca_pml_teg_isend_init( void *buf, - size_t size, + size_t count, lam_datatype_t *datatype, int dst, int tag, @@ -119,7 +115,7 @@ extern int mca_pml_teg_isend_init( extern int mca_pml_teg_isend( void *buf, - size_t size, + size_t count, lam_datatype_t *datatype, int dst, int tag, @@ -130,7 +126,7 @@ extern int mca_pml_teg_isend( extern int mca_pml_teg_send( void *buf, - size_t size, + size_t count, lam_datatype_t *datatype, int dst, int tag, @@ -140,7 +136,7 @@ extern int mca_pml_teg_send( extern int mca_pml_teg_irecv_init( void *buf, - size_t size, + size_t count, lam_datatype_t *datatype, int src, int tag, @@ -150,7 +146,7 @@ extern int mca_pml_teg_irecv_init( extern int mca_pml_teg_irecv( void *buf, - size_t size, + size_t count, lam_datatype_t *datatype, int src, int tag, @@ -160,7 +156,7 @@ extern int mca_pml_teg_irecv( extern int mca_pml_teg_recv( void *buf, - size_t size, + size_t count, lam_datatype_t *datatype, int src, int tag, @@ -171,10 +167,20 @@ extern int mca_pml_teg_recv( extern int mca_pml_teg_progress(void); extern int mca_pml_teg_start( - lam_request_t** request + size_t count, + lam_request_t** requests ); extern int mca_pml_teg_test( + size_t count, + lam_request_t** request, + int *index, + int *completed, + lam_status_public_t* status +); + +extern int mca_pml_teg_test_all( + size_t count, lam_request_t** request, int *completed, lam_status_public_t* status diff --git a/src/mca/pml/teg/src/pml_teg_irecv.c b/src/mca/pml/teg/src/pml_teg_irecv.c index 2192bc069a..5033fda7f1 100644 --- a/src/mca/pml/teg/src/pml_teg_irecv.c +++ b/src/mca/pml/teg/src/pml_teg_irecv.c @@ -3,7 +3,7 @@ int mca_pml_teg_irecv_init( void *addr, - size_t length, + size_t count, lam_datatype_t *datatype, int src, int tag, @@ -18,7 +18,7 @@ int mca_pml_teg_irecv_init( mca_ptl_base_recv_request_init( recvreq, addr, - length, + count, datatype, src, tag, @@ -31,7 +31,7 @@ int mca_pml_teg_irecv_init( int mca_pml_teg_irecv( void *addr, - size_t length, + size_t count, lam_datatype_t *datatype, int src, int tag, @@ -40,10 +40,6 @@ int mca_pml_teg_irecv( { int rc; -#if 0 - lam_output(0, "mca_pml_teg_irecv: src=%d tag=%d comm=%d\n", src, tag, comm->c_contextid); -#endif - mca_ptl_base_recv_request_t *recvreq = mca_pml_teg_recv_request_alloc(&rc); if(NULL == recvreq) return rc; @@ -51,7 +47,7 @@ int mca_pml_teg_irecv( mca_ptl_base_recv_request_init( recvreq, addr, - length, + count, datatype, src, tag, @@ -69,16 +65,14 @@ int mca_pml_teg_irecv( int mca_pml_teg_recv( void *addr, - size_t length, + size_t count, lam_datatype_t *datatype, int src, int tag, struct lam_communicator_t* comm, lam_status_public_t* status) { - int rc; - int index; - + int rc, index; mca_ptl_base_recv_request_t *recvreq = mca_pml_teg_recv_request_alloc(&rc); if(NULL == recvreq) return rc; @@ -86,7 +80,7 @@ int mca_pml_teg_recv( mca_ptl_base_recv_request_init( recvreq, addr, - length, + count, datatype, src, tag, diff --git a/src/mca/pml/teg/src/pml_teg_isend.c b/src/mca/pml/teg/src/pml_teg_isend.c index c9da6b5e4f..e93bbed768 100644 --- a/src/mca/pml/teg/src/pml_teg_isend.c +++ b/src/mca/pml/teg/src/pml_teg_isend.c @@ -9,7 +9,7 @@ int mca_pml_teg_isend_init( void *buf, - size_t size, + size_t count, lam_datatype_t *datatype, int dst, int tag, @@ -26,7 +26,7 @@ int mca_pml_teg_isend_init( mca_ptl_base_send_request_init( sendreq, buf, - size, + count, datatype, dst, tag, @@ -42,7 +42,7 @@ int mca_pml_teg_isend_init( int mca_pml_teg_isend( void *buf, - size_t size, + size_t count, lam_datatype_t *datatype, int dst, int tag, @@ -58,7 +58,7 @@ int mca_pml_teg_isend( mca_ptl_base_send_request_init( sendreq, buf, - size, + count, datatype, dst, tag, @@ -76,16 +76,14 @@ int mca_pml_teg_isend( int mca_pml_teg_send( void *buf, - size_t size, + size_t count, lam_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t sendmode, lam_communicator_t* comm) { - int rc; - int index; - + int rc, index; mca_ptl_base_send_request_t* sendreq = mca_pml_teg_send_request_alloc(comm,dst,&rc); if(rc != LAM_SUCCESS) return rc; @@ -93,7 +91,7 @@ int mca_pml_teg_send( mca_ptl_base_send_request_init( sendreq, buf, - size, + count, datatype, dst, tag, diff --git a/src/mca/pml/teg/src/pml_teg_module.c b/src/mca/pml/teg/src/pml_teg_module.c index f4a39f4908..184f65f553 100644 --- a/src/mca/pml/teg/src/pml_teg_module.c +++ b/src/mca/pml/teg/src/pml_teg_module.c @@ -59,7 +59,7 @@ static inline int mca_pml_teg_param_register_int( int mca_pml_teg_module_open(void) { - mca_pml_base_request_t* teg_null = &mca_pml_teg.teg_null; + mca_pml_base_request_t* teg_null = &mca_pml_teg.teg_request_null; OBJ_CONSTRUCT(&mca_pml_teg.teg_lock, lam_mutex_t); OBJ_CONSTRUCT(&mca_pml_teg.teg_recv_requests, lam_free_list_t); OBJ_CONSTRUCT(&mca_pml_teg.teg_procs, lam_list_t); @@ -96,13 +96,6 @@ int mca_pml_teg_module_close(void) } -static void* mca_pml_teg_thread(lam_object_t* thread) -{ - lam_event_dispatch(); - return NULL; -} - - mca_pml_t* mca_pml_teg_module_init(int* priority, bool *allow_multi_user_threads, bool *have_hidden_threads) diff --git a/src/mca/pml/teg/src/pml_teg_proc.h b/src/mca/pml/teg/src/pml_teg_proc.h index 8b388d7852..f2636c3d02 100644 --- a/src/mca/pml/teg/src/pml_teg_proc.h +++ b/src/mca/pml/teg/src/pml_teg_proc.h @@ -1,7 +1,9 @@ /* * $HEADER$ */ - +/** + * @file + */ #ifndef MCA_PML_PROC_H #define MCA_PML_PROC_H @@ -11,17 +13,16 @@ #include "proc/proc.h" #include "pml_ptl_array.h" -/* +/** * Structure associated w/ lam_proc_t that contains data specific * to the PML. Note that this name is not PML specific. */ - struct mca_pml_proc_t { lam_list_item_t super; - lam_proc_t *proc_lam; - lam_mutex_t proc_lock; - mca_ptl_array_t proc_ptl_first; - mca_ptl_array_t proc_ptl_next; + lam_proc_t *proc_lam; /**< back-pointer to lam_proc_t */ + lam_mutex_t proc_lock; /**< lock to protect against concurrent access */ + mca_ptl_array_t proc_ptl_first; /**< array of ptls to use for first fragments */ + mca_ptl_array_t proc_ptl_next; /**< array of ptls to use for remaining fragments */ }; typedef struct mca_pml_proc_t mca_pml_proc_t; @@ -29,6 +30,13 @@ typedef struct mca_pml_proc_t mca_pml_proc_t; extern lam_class_t mca_pml_teg_proc_t_class; typedef struct mca_pml_proc_t mca_pml_teg_proc_t; +/** + * Return the mca_pml_proc_t instance cached in the communicators local group. + * + * @param comm Communicator + * @param rank Peer rank + * @return mca_pml_proc_t instance + */ static inline mca_pml_proc_t* mca_pml_teg_proc_lookup_local(lam_communicator_t* comm, int rank) { @@ -36,6 +44,14 @@ static inline mca_pml_proc_t* mca_pml_teg_proc_lookup_local(lam_communicator_t* return proc->proc_pml; } +/** + * Return the mca_pml_proc_t instance cached on the communicators remote group. + * + * @param comm Communicator + * @param rank Peer rank + * @return mca_pml_proc_t instance + */ + static inline mca_pml_proc_t* mca_pml_teg_proc_lookup_remote(lam_communicator_t* comm, int rank) { lam_proc_t* proc = comm->c_remote_group->grp_proc_pointers[rank]; diff --git a/src/mca/pml/teg/src/pml_teg_progress.c b/src/mca/pml/teg/src/pml_teg_progress.c index 2a50376658..a1897e84fb 100644 --- a/src/mca/pml/teg/src/pml_teg_progress.c +++ b/src/mca/pml/teg/src/pml_teg_progress.c @@ -4,6 +4,7 @@ int mca_pml_teg_progress(void) { +#if 0 mca_ptl_base_tstamp_t tstamp; size_t i; @@ -12,6 +13,7 @@ int mca_pml_teg_progress(void) */ for(i=0; iptlm_progress(tstamp); +#endif return LAM_SUCCESS; } diff --git a/src/mca/pml/teg/src/pml_teg_recvreq.h b/src/mca/pml/teg/src/pml_teg_recvreq.h index 7a1a6070d7..a8e75bb74c 100644 --- a/src/mca/pml/teg/src/pml_teg_recvreq.h +++ b/src/mca/pml/teg/src/pml_teg_recvreq.h @@ -1,7 +1,9 @@ /* * $HEADER$ */ - +/** + * @file + */ #ifndef LAM_PML_TEG_RECV_REQUEST_H #define LAM_PML_TEG_RECV_REQUEST_H @@ -11,38 +13,56 @@ #include "mca/ptl/base/ptl_base_recvfrag.h" -/* - * Allocate a recv request. +/** + * Allocate a recv request from the modules free list. + * + * @param rc (OUT) LAM_SUCCESS or error status on failure. + * @return Receive request. */ static inline mca_ptl_base_recv_request_t* mca_pml_teg_recv_request_alloc(int *rc) { return (mca_ptl_base_recv_request_t*)lam_free_list_get(&mca_pml_teg.teg_recv_requests, rc); } +/** + * Return a recv request to the modules free list. + * + * @param request (IN) Receive request. + */ static inline void mca_pml_teg_recv_request_return(mca_ptl_base_recv_request_t* request) { lam_free_list_return(&mca_pml_teg.teg_recv_requests, (lam_list_item_t*)request); } -/* - * Progress an initialized request. +/** + * Start an initialized request. + * + * @param request Receive request. + * @return LAM_SUCESS or error status on failure. */ -static inline int mca_pml_teg_recv_request_start(mca_ptl_base_recv_request_t* req) +static inline int mca_pml_teg_recv_request_start(mca_ptl_base_recv_request_t* request) { THREAD_SCOPED_LOCK(&mca_pml_teg.teg_lock, - (req->req_sequence = mca_pml_teg.teg_recv_sequence++)); + (request->req_sequence = mca_pml_teg.teg_recv_sequence++)); - if(req->super.req_peer == LAM_ANY_SOURCE) { - mca_ptl_base_recv_request_match_wild(req); + if(request->super.req_peer == LAM_ANY_SOURCE) { + mca_ptl_base_recv_request_match_wild(request); } else { - mca_ptl_base_recv_request_match_specific(req); + mca_ptl_base_recv_request_match_specific(request); } return LAM_SUCCESS; } +/** + * Update status of a recv request based on the completion status of + * the receive fragment. + * + * @param request (IN) Receive request. + * @param frag (IN) Receive fragment. + */ void mca_pml_teg_recv_request_progress( - mca_ptl_base_recv_request_t* recv_request, - mca_ptl_base_recv_frag_t* recv_frag + mca_ptl_base_recv_request_t* request, + mca_ptl_base_recv_frag_t* frag ); diff --git a/src/mca/pml/teg/src/pml_teg_sendreq.c b/src/mca/pml/teg/src/pml_teg_sendreq.c index 4a545d0a13..5a6ae76b2d 100644 --- a/src/mca/pml/teg/src/pml_teg_sendreq.c +++ b/src/mca/pml/teg/src/pml_teg_sendreq.c @@ -25,19 +25,19 @@ void mca_pml_teg_send_request_schedule(mca_ptl_base_send_request_t* req) mca_pml_proc_t* proc_pml = proc->proc_pml; /* allocate remaining bytes to PTLs */ - size_t bytes_remaining = req->super.req_length - req->req_offset; + size_t bytes_remaining = req->req_packed_size - req->req_offset; size_t num_ptl_avail = proc_pml->proc_ptl_next.ptl_size; size_t num_ptl = 0; while(bytes_remaining > 0 && num_ptl++ < num_ptl_avail) { mca_ptl_proc_t* ptl_proc = mca_ptl_array_get_next(&proc_pml->proc_ptl_next); mca_ptl_t* ptl = ptl_proc->ptl; + int rc; /* if this is the last PTL that is available to use, or the number of * bytes remaining in the message is less than the PTLs minimum fragment * size, then go ahead and give the rest of the message to this PTL. */ size_t bytes_to_frag; - int rc; if(num_ptl == num_ptl_avail || bytes_remaining < ptl->ptl_min_frag_size) bytes_to_frag = bytes_remaining; @@ -47,14 +47,14 @@ void mca_pml_teg_send_request_schedule(mca_ptl_base_send_request_t* req) * previously assigned) */ else { - bytes_to_frag = ptl_proc->ptl_weight * req->super.req_length; + bytes_to_frag = ptl_proc->ptl_weight * req->req_packed_size; if(bytes_to_frag > bytes_remaining) bytes_to_frag = bytes_remaining; } rc = ptl->ptl_send(ptl, ptl_proc->ptl_peer, req, bytes_to_frag, 0); if(rc == LAM_SUCCESS) - bytes_remaining = req->super.req_length - req->req_offset; + bytes_remaining = req->req_packed_size - req->req_offset; } /* unable to complete send - signal request failed */ @@ -76,7 +76,7 @@ void mca_pml_teg_send_request_progress( bool complete = false; lam_mutex_lock(&mca_pml_teg.teg_request_lock); req->req_bytes_sent += frag->super.frag_size; - if (req->req_bytes_sent >= req->super.req_length) { + if (req->req_bytes_sent >= req->req_packed_size) { req->super.req_mpi_done = true; req->super.req_pml_done = true; if(mca_pml_teg.teg_request_waiting) { diff --git a/src/mca/pml/teg/src/pml_teg_sendreq.h b/src/mca/pml/teg/src/pml_teg_sendreq.h index ea88fc5ba9..25ff819baf 100644 --- a/src/mca/pml/teg/src/pml_teg_sendreq.h +++ b/src/mca/pml/teg/src/pml_teg_sendreq.h @@ -1,7 +1,9 @@ /* * $HEADER$ */ - +/** + * @file + */ #ifndef LAM_PML_TEG_SEND_REQUEST_H #define LAM_PML_TEG_SEND_REQUEST_H @@ -47,8 +49,8 @@ static inline int mca_pml_teg_send_request_start( int flags, rc; /* start the first fragment */ - if(req->super.req_length <= first_fragment_size) { - first_fragment_size = req->super.req_length; + if(req->req_packed_size <= first_fragment_size) { + first_fragment_size = req->req_packed_size; flags = (req->req_send_mode == MCA_PML_BASE_SEND_SYNCHRONOUS) ? MCA_PTL_FLAGS_ACK_MATCHED : 0; } else { /* require match for first fragment of a multi-fragment message or if synchronous send */ diff --git a/src/mca/pml/teg/src/pml_teg_start.c b/src/mca/pml/teg/src/pml_teg_start.c index bc4c396bd3..b97d738af2 100644 --- a/src/mca/pml/teg/src/pml_teg_start.c +++ b/src/mca/pml/teg/src/pml_teg_start.c @@ -3,16 +3,30 @@ #include "pml_teg_sendreq.h" -int mca_pml_teg_start(lam_request_t** request) +int mca_pml_teg_start(size_t count, lam_request_t** requests) { - mca_pml_base_request_t *pml_request = *(mca_pml_base_request_t**)request; - switch(pml_request->req_type) { - case MCA_PML_REQUEST_SEND: - return mca_pml_teg_send_request_start((mca_ptl_base_send_request_t*)pml_request); - case MCA_PML_REQUEST_RECV: - return mca_pml_teg_recv_request_start((mca_ptl_base_recv_request_t*)pml_request); - default: - return LAM_ERROR; + int rc; + size_t i; + for(i=0; ireq_type) { + case MCA_PML_REQUEST_SEND: + if((rc = mca_pml_teg_send_request_start((mca_ptl_base_send_request_t*)pml_request)) + != LAM_SUCCESS) + return rc; + break; + case MCA_PML_REQUEST_RECV: + if((rc = mca_pml_teg_recv_request_start((mca_ptl_base_recv_request_t*)pml_request)) + != LAM_SUCCESS) + return rc; + break; + default: + return LAM_ERROR; + } } + return LAM_SUCCESS; } diff --git a/src/mca/pml/teg/src/pml_teg_test.c b/src/mca/pml/teg/src/pml_teg_test.c index 722f46b896..555b88179e 100644 --- a/src/mca/pml/teg/src/pml_teg_test.c +++ b/src/mca/pml/teg/src/pml_teg_test.c @@ -2,18 +2,74 @@ int mca_pml_teg_test( - lam_request_t** request, + size_t count, + lam_request_t** requests, + int *index, int *completed, lam_status_public_t* status) { - mca_pml_base_request_t* pml_request = *(mca_pml_base_request_t**)request; - if(pml_request->req_mpi_done) { - *completed = true; - mca_pml_teg_free(request); - if (status != NULL) - *status = pml_request->req_status; - } else { + size_t i; + for(i=0; ireq_mpi_done) { + *index = i; + *completed = true; + if (NULL != status) + *status = pml_request->req_status; + if(false == pml_request->req_persistent) + mca_pml_teg_free(requests+i); + } + } + + *index = MPI_UNDEFINED; + *completed = false; + if(NULL != status) + *status = mca_pml_teg.teg_request_null.req_status; + return LAM_SUCCESS; +} + + +int mca_pml_teg_test_all( + size_t count, + lam_request_t** requests, + int *completed, + lam_status_public_t* statuses) +{ + size_t i; + size_t num_completed; + for(i=0; ireq_mpi_done) + num_completed++; + } + + if(num_completed != count) { *completed = false; + return LAM_SUCCESS; + } + + *completed = true; + if(NULL != statuses) { + /* fill out completion status and free request if required */ + for(i=0; ireq_status; + if(false == pml_request->req_persistent) + mca_pml_teg_free(requests+i); + } + } + } else { + /* free request if required */ + for(i=0; ireq_persistent) + mca_pml_teg_free(requests+i); + } } return LAM_SUCCESS; } diff --git a/src/mca/pml/teg/src/pml_teg_wait.c b/src/mca/pml/teg/src/pml_teg_wait.c index 15e4fbb569..df06e68aed 100644 --- a/src/mca/pml/teg/src/pml_teg_wait.c +++ b/src/mca/pml/teg/src/pml_teg_wait.c @@ -46,15 +46,13 @@ int mca_pml_teg_wait( } /* return request to pool */ - if(pml_request->req_persistent == false) { + if(false == pml_request->req_persistent) { mca_pml_teg_free(request); } - if (status != NULL) { + if (NULL != status) { *status = pml_request->req_status; } - if (index != NULL) { - *index = completed; - } + *index = completed; return LAM_SUCCESS; } @@ -64,46 +62,58 @@ int mca_pml_teg_wait_all( lam_request_t** requests, lam_status_public_t* statuses) { - int completed, i; - - /* - * acquire lock and test for completion - if all requests are not completed - * pend on condition variable until a request completes - */ - lam_mutex_lock(&mca_pml_teg.teg_request_lock); - mca_pml_teg.teg_request_waiting++; - do { - completed = 0; - for(i=0; ireq_mpi_done == true) { - completed++; - continue; - } - } - if(completed != count) - lam_condition_wait(&mca_pml_teg.teg_request_cond, &mca_pml_teg.teg_request_lock); - } while (completed != count); - mca_pml_teg.teg_request_waiting--; - lam_mutex_unlock(&mca_pml_teg.teg_request_lock); - - /* - * fill out completion status and free request if required - */ + int completed = 0, i; for(i=0; ireq_mpi_done == true) { + completed++; + } + } - if (NULL != statuses) { - statuses[i] = pml_request->req_status; + /* if all requests have not completed -- defer requiring lock unless required */ + if(completed != count) { + /* + * acquire lock and test for completion - if all requests are not completed + * pend on condition variable until a request completes + */ + lam_mutex_lock(&mca_pml_teg.teg_request_lock); + mca_pml_teg.teg_request_waiting++; + do { + completed = 0; + for(i=0; ireq_mpi_done == true) { + completed++; + continue; + } + } + if(completed != count) + lam_condition_wait(&mca_pml_teg.teg_request_cond, &mca_pml_teg.teg_request_lock); + } while (completed != count); + mca_pml_teg.teg_request_waiting--; + lam_mutex_unlock(&mca_pml_teg.teg_request_lock); + } + + if(NULL != statuses) { + /* fill out status and free request if required */ + for(i=0; ireq_status; + if (false == pml_request->req_persistent) { + mca_pml_teg_free(&requests[i]); + } + } } - if (false == pml_request->req_persistent) { - /* return request to pool */ - mca_pml_teg_free(&requests[i]); + } else { + /* free request if required */ + for(i=0; ireq_persistent) { + mca_pml_teg_free(&requests[i]); + } } } return LAM_SUCCESS; diff --git a/src/mca/ptl/base/base.h b/src/mca/ptl/base/base.h index 881827f227..1681a743c5 100644 --- a/src/mca/ptl/base/base.h +++ b/src/mca/ptl/base/base.h @@ -1,7 +1,9 @@ /* * $HEADER$ */ - +/** + * @file + */ #ifndef MCA_PTL_BASE_H #define MCA_PTL_BASE_H diff --git a/src/mca/ptl/base/ptl_base_comm.c b/src/mca/ptl/base/ptl_base_comm.c index 6ddab15e47..a29b3944e3 100644 --- a/src/mca/ptl/base/ptl_base_comm.c +++ b/src/mca/ptl/base/ptl_base_comm.c @@ -23,6 +23,7 @@ static void mca_pml_ptl_comm_construct(mca_pml_ptl_comm_t* comm) OBJ_CONSTRUCT(&comm->c_wild_lock, lam_mutex_t); } + static void mca_pml_ptl_comm_destruct(mca_pml_ptl_comm_t* comm) { free(comm->c_msg_seq); diff --git a/src/mca/ptl/base/ptl_base_comm.h b/src/mca/ptl/base/ptl_base_comm.h index 5a90b315bf..0621be3719 100644 --- a/src/mca/ptl/base/ptl_base_comm.h +++ b/src/mca/ptl/base/ptl_base_comm.h @@ -1,3 +1,9 @@ +/* + * $HEADER$ + */ +/** + * @file + */ #ifndef MCA_PML_COMM_H #define MCA_PML_COMM_H @@ -6,49 +12,44 @@ #include "mca/ptl/ptl.h" #include "lfc/lam_list.h" -/* - * Structure associated w/ lam_communicator_t that contains data - * specific to the PML. - */ - extern lam_class_t mca_pml_ptl_comm_t_class; +/** + * Cached on lam_communicator_t to hold queues/state + * used by the PML<->PTL interface for matching logic. + */ struct mca_pml_comm_t { lam_object_t super; - - /* send message sequence-number support - sender side */ - mca_ptl_base_sequence_t *c_msg_seq; - - /* send message sequence-number support - receiver side */ - mca_ptl_base_sequence_t *c_next_msg_seq; - - /* matching lock */ - lam_mutex_t *c_matching_lock; - - /* unexpected fragments queues */ - lam_list_t *c_unexpected_frags; - - /* these locks are needed to avoid a probe interfering with a match */ - lam_mutex_t *c_unexpected_frags_lock; - - /* out-of-order fragments queues */ - lam_list_t *c_frags_cant_match; - - /* queues of unmatched specific (source process specified) receives - * sorted by source process */ - lam_list_t *c_specific_receives; - - /* queue of unmatched wild (source process not specified) receives - * */ - lam_list_t c_wild_receives; - - /* protect access to wild receives */ - lam_mutex_t c_wild_lock; + mca_ptl_base_sequence_t *c_msg_seq; /**< send message sequence number - sender side */ + mca_ptl_base_sequence_t *c_next_msg_seq; /**< send message sequence number - receiver side */ + lam_mutex_t *c_matching_lock; /**< matching lock */ + lam_list_t *c_unexpected_frags; /**< unexpected fragment queues */ + lam_mutex_t *c_unexpected_frags_lock; /**< unexpected fragment locks */ + lam_list_t *c_frags_cant_match; /**< out-of-order fragment queues */ + lam_list_t *c_specific_receives; /**< queues of unmatched specific (source process specified) receives */ + lam_list_t c_wild_receives; /**< queue of unmatched wild (source process not specified) receives */ + lam_mutex_t c_wild_lock; /**< lock to protect access to wild receives */ }; typedef struct mca_pml_comm_t mca_pml_ptl_comm_t; -extern int mca_pml_ptl_comm_init_size(mca_pml_ptl_comm_t*, size_t); +/** + * Initialize an instance of mca_pml_ptl_comm_t based on the communicator size. + * + * @param comm Instance of mca_pml_ptl_comm_t + * @param size Size of communicator + * @return LAM_SUCCESS or error status on failure. + */ + +extern int mca_pml_ptl_comm_init_size(mca_pml_ptl_comm_t* comm, size_t size); + +/** + * Obtain the next sequence number (MPI) for a given destination rank. + * + * @param comm Instance of mca_pml_ptl_comm_t + * @param dst Rank of destination. + * @return Next available sequence number. + */ static inline mca_ptl_base_sequence_t mca_pml_ptl_comm_send_sequence(mca_pml_ptl_comm_t* comm, int dst) { diff --git a/src/mca/ptl/base/ptl_base_fragment.h b/src/mca/ptl/base/ptl_base_fragment.h index 99ec041a8a..f903c1f488 100644 --- a/src/mca/ptl/base/ptl_base_fragment.h +++ b/src/mca/ptl/base/ptl_base_fragment.h @@ -1,25 +1,30 @@ /* * $HEADER$ */ -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ - +/** + * @file + */ #ifndef MCA_PML_BASE_FRAGMENT_H #define MCA_PML_BASE_FRAGMENT_H #include "lfc/lam_list.h" #include "mca/ptl/ptl.h" +#include "datatype/datatype.h" #include "mca/ptl/base/ptl_base_header.h" - extern lam_class_t mca_ptl_base_frag_t_class; +/** + * Base type for fragment descriptors. + */ struct mca_ptl_base_frag_t { - lam_list_item_t super; - mca_ptl_base_header_t frag_header; + lam_list_item_t super; /**< allow the fragment to be placed on a list */ + mca_ptl_base_header_t frag_header; /**< header used for fragment matching */ struct mca_ptl_t* frag_owner; /**< PTL that allocated this fragment */ struct mca_ptl_base_peer_t* frag_peer; /**< PTL specific addressing info */ - void *frag_addr; /* pointer into request buffer at fragment offset */ - size_t frag_size; /* number of bytes available in request buffer */ + void *frag_addr; /**< pointer into request buffer at fragment offset */ + size_t frag_size; /**< number of bytes available in request buffer */ + lam_convertor_t frag_convertor; /**< datatype convertor for fragment packing/unpacking */ }; typedef struct mca_ptl_base_frag_t mca_ptl_base_frag_t; diff --git a/src/mca/ptl/base/ptl_base_header.h b/src/mca/ptl/base/ptl_base_header.h index 7dac91e2eb..2ba8617a52 100644 --- a/src/mca/ptl/base/ptl_base_header.h +++ b/src/mca/ptl/base/ptl_base_header.h @@ -1,7 +1,9 @@ /* * $HEADER$ */ - +/** + * @file + */ #ifndef MCA_PTL_BASE_HEADER_H #define MCA_PTL_BASE_HEADER_H @@ -17,80 +19,60 @@ #define MCA_PTL_FLAGS_ACK_AGGREGATE 2 -/* Defines the common header attributes - must be first element in each header type */ +/** + * Common header attributes - must be first element in each header type + */ struct mca_ptl_base_common_header_t { - /* type of envelope */ - uint8_t hdr_type; - /* flags indicating how fragment should be processed */ - uint8_t hdr_flags; - /* size of header - allow for variable length */ - uint16_t hdr_size; + uint8_t hdr_type; /**< type of envelope */ + uint8_t hdr_flags; /**< flags indicating how fragment should be processed */ + uint16_t hdr_size; /**< size of header - allow for variable length */ }; typedef struct mca_ptl_base_common_header_t mca_ptl_base_common_header_t; -/* - * Common header definition for all fragments. +/** + * Basic header for all fragments. */ - struct mca_ptl_base_frag_header_t { - /* common header */ - mca_ptl_base_common_header_t hdr_common; - /* fragment length */ - uint32_t hdr_frag_length; - /* offset into message */ - uint32_t hdr_frag_offset; - /* fragment sequence number */ - mca_ptl_base_sequence_t hdr_frag_seq; - /* pointer to source fragment */ - lam_ptr_t hdr_src_ptr; - /* pointer to matched receive */ - lam_ptr_t hdr_dst_ptr; + mca_ptl_base_common_header_t hdr_common; /**< common attributes */ + uint32_t hdr_frag_length; /**< fragment length */ + uint32_t hdr_frag_offset; /**< offset into message */ + mca_ptl_base_sequence_t hdr_frag_seq; /**< fragment sequence number */ + lam_ptr_t hdr_src_ptr; /**< pointer to source fragment */ + lam_ptr_t hdr_dst_ptr; /**< pointer to matched receive */ }; typedef struct mca_ptl_base_frag_header_t mca_ptl_base_frag_header_t; -/* +/** * Header definition for the first fragment, contains the additional * attributes required to match the corresponding posted receive. */ - struct mca_ptl_base_match_header_t { - /* fragment info */ - mca_ptl_base_frag_header_t hdr_frag; - /* communicator index */ - uint32_t hdr_contextid; - /* source rank */ - int32_t hdr_src; - /* destination rank */ - int32_t hdr_dst; - /* user tag */ - int32_t hdr_tag; - /* message length */ - uint32_t hdr_msg_length; - /* message sequence number */ - mca_ptl_base_sequence_t hdr_msg_seq; + mca_ptl_base_frag_header_t hdr_frag; /**< fragment attributes */ + uint32_t hdr_contextid; /**< communicator index */ + int32_t hdr_src; /**< source rank */ + int32_t hdr_dst; /**< destination rank */ + int32_t hdr_tag; /**< user tag */ + uint32_t hdr_msg_length; /**< message length */ + mca_ptl_base_sequence_t hdr_msg_seq; /**< message sequence number */ }; typedef struct mca_ptl_base_match_header_t mca_ptl_base_match_header_t; -/* +/** * Header used to acknowledgment outstanding fragment(s). */ - struct mca_ptl_base_ack_header_t { - /* common header */ - mca_ptl_base_common_header_t hdr_common; - /* source fragment */ - lam_ptr_t hdr_src_ptr; - /* matched receive request */ - lam_ptr_t hdr_dst_ptr; - /* sequence range */ + mca_ptl_base_common_header_t hdr_common; /**< common attributes */ + lam_ptr_t hdr_src_ptr; /**< source fragment */ + lam_ptr_t hdr_dst_ptr; /**< matched receive request */ + /* sequence range? */ }; typedef struct mca_ptl_base_ack_header_t mca_ptl_base_ack_header_t; -/* +/** * Union of defined header types. */ union mca_ptl_base_header_t { diff --git a/src/mca/ptl/base/ptl_base_match.c b/src/mca/ptl/base/ptl_base_match.c index c039ab84ae..47e5c44135 100644 --- a/src/mca/ptl/base/ptl_base_match.c +++ b/src/mca/ptl/base/ptl_base_match.c @@ -535,7 +535,7 @@ static void mca_ptl_base_check_cantmatch_for_match(lam_list_t *additional_matche { /* local parameters */ int match_found; - mca_ptl_base_sequence_t next_msg_seq_expected, frag_seqber; + mca_ptl_base_sequence_t next_msg_seq_expected, frag_seq; mca_ptl_base_recv_frag_t *frag_desc; mca_ptl_base_recv_request_t *matched_receive; @@ -567,8 +567,8 @@ static void mca_ptl_base_check_cantmatch_for_match(lam_list_t *additional_matche /* * If the message has the next expected seq from that proc... */ - frag_seqber=frag_desc->super.frag_header.hdr_match.hdr_msg_seq; - if (frag_seqber == next_msg_seq_expected) { + frag_seq=frag_desc->super.frag_header.hdr_match.hdr_msg_seq; + if (frag_seq == next_msg_seq_expected) { /* We're now expecting the next sequence number. */ (pml_comm->c_next_msg_seq[frag_src])++; @@ -617,7 +617,7 @@ static void mca_ptl_base_check_cantmatch_for_match(lam_list_t *additional_matche * and re-start search for next sequence number */ break; - } /* end if (frag_seqber == next_msg_seq_expected) */ + } /* end if (frag_seq == next_msg_seq_expected) */ } /* end for (frag_desc) loop */ diff --git a/src/mca/ptl/base/ptl_base_match.h b/src/mca/ptl/base/ptl_base_match.h index 38d03b2e53..756bdb28c0 100644 --- a/src/mca/ptl/base/ptl_base_match.h +++ b/src/mca/ptl/base/ptl_base_match.h @@ -1,13 +1,23 @@ /* * $HEADER$ */ - +/** + * @file + */ #ifndef MCA_PTL_BASE_MATCH_H #define MCA_PTL_BASE_MATCH_H struct mca_ptl_base_recv_frag_t; - +/** + * Match incoming fragments against posted receives. + * + * @param frag_header (IN) Header of received fragment. + * @param frag_desc (IN) Received fragment descriptor. + * @param match_made (OUT) Flag indicating wether a match was made. + * @param additional_matches (OUT) List of additional matches if a match was made. + * @return LAM_SUCCESS or error status on failure. + */ int mca_ptl_base_match(mca_ptl_base_match_header_t *frag_header, struct mca_ptl_base_recv_frag_t *frag_desc, bool *match_made, lam_list_t *additional_matches); diff --git a/src/mca/ptl/base/ptl_base_recvfrag.h b/src/mca/ptl/base/ptl_base_recvfrag.h index a3ac539ba5..3849bf873c 100644 --- a/src/mca/ptl/base/ptl_base_recvfrag.h +++ b/src/mca/ptl/base/ptl_base_recvfrag.h @@ -1,8 +1,9 @@ /* * $HEADER$ */ -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ - +/** + * @file + */ #ifndef MCA_PTL_BASE_RECVFRAG_H #define MCA_PTL_BASE_RECVFRAG_H @@ -13,43 +14,85 @@ extern lam_class_t mca_ptl_base_recv_frag_t_class; - +/** + * Base type for receive fragment descriptors. + */ struct mca_ptl_base_recv_frag_t { - mca_ptl_base_frag_t super; - mca_ptl_base_recv_request_t *frag_request; /* matched posted receive */ + mca_ptl_base_frag_t super; /**< base fragment descriptor */ + mca_ptl_base_recv_request_t *frag_request; /**< matched posted receive */ + bool frag_is_buffered; /**< does fragment need to be unpacked into users buffer */ }; typedef struct mca_ptl_base_recv_frag_t mca_ptl_base_recv_frag_t; - +/** + * Initialize the receive fragment after a match has been made. + * + * @param frag (IN) Receive fragment descriptor. + * + * If a buffer has not already been allocated, determine the + * offset into the users buffer (if contigous data), or allocate + * a buffer for the non-contigous case. + * + * TODO: may need to pass in an allocator.... + */ static inline void mca_ptl_base_recv_frag_init(mca_ptl_base_recv_frag_t* frag) { mca_ptl_base_recv_request_t* request = frag->frag_request; - mca_ptl_base_frag_header_t* header = &frag->super.frag_header.hdr_frag; + mca_ptl_base_match_header_t* header = &frag->super.frag_header.hdr_match; - /* determine the offset and size of posted buffer */ - if (request->super.req_length < header->hdr_frag_offset) { + /* initialize status */ + request->super.req_status.MPI_SOURCE = header->hdr_src; + request->super.req_status.MPI_TAG = header->hdr_tag; + request->super.req_status.MPI_ERROR = LAM_SUCCESS; + request->super.req_status._count = header->hdr_msg_length; - /* user buffer is to small - discard entire fragment */ - frag->super.frag_addr = 0; - frag->super.frag_size = 0; + if(header->hdr_frag.hdr_frag_length > 0) { - } else if (request->super.req_length < header->hdr_frag_offset + header->hdr_frag_length) { + /* initialize receive convertor */ + lam_proc_t *proc = + lam_comm_peer_lookup(request->super.req_comm, request->super.req_peer); + lam_convertor_copy(proc->proc_convertor, &frag->super.frag_convertor); + lam_convertor_init_for_recv( + &frag->super.frag_convertor, /* convertor */ + 0, /* flags */ + request->super.req_datatype, /* datatype */ + request->super.req_count, /* count elements */ + request->super.req_addr, /* users buffer */ + header->hdr_frag.hdr_frag_offset); /* offset in bytes into packed buffer */ - /* user buffer is to small - discard part of fragment */ - frag->super.frag_addr = ((unsigned char*)request->super.req_addr + header->hdr_frag_offset); - frag->super.frag_size = request->super.req_length - header->hdr_frag_offset; - - } else { - - /* user buffer is large enough for this fragment */ - frag->super.frag_addr = ((unsigned char*)request->super.req_addr + header->hdr_frag_offset); - frag->super.frag_size = header->hdr_frag_length; + /* if buffer has not already been allocated for eager + * send - go ahead and figure out offset into users + * buffer (for contigous data) - or allocate a buffer + * for the receive if required. + */ + if(NULL == frag->super.frag_addr) { + struct iovec iov; + iov.iov_base = NULL; + iov.iov_len = header->hdr_frag.hdr_frag_length; + lam_convertor_unpack(&frag->super.frag_convertor, &iov, 1); + /* non-contiguous - allocate buffer for receive */ + if(NULL == iov.iov_base) { + frag->super.frag_addr = malloc(iov.iov_len); + frag->frag_is_buffered = true; + /* we now have correct offset into users buffer */ + } else { + frag->super.frag_addr = iov.iov_base; + frag->frag_is_buffered = false; + } + frag->super.frag_size = header->hdr_frag.hdr_frag_length; + } } } - +/** + * Called by the PTL to match attempt a match for new fragments. + * + * @param frag (IN) Receive fragment descriptor. + * @param header (IN) Header corresponding to the receive fragment. + * @return LAM_SUCCESS or error status on failure. + */ static inline int mca_ptl_base_recv_frag_match( mca_ptl_base_recv_frag_t* frag, mca_ptl_base_match_header_t* header) @@ -70,7 +113,7 @@ static inline int mca_ptl_base_recv_frag_match( mca_ptl_base_recv_frag_init(frag); /* notify ptl of match */ - ptl->ptl_recv(ptl, frag, &frag->frag_request->super.req_status); + ptl->ptl_recv(ptl, frag); /* process any additional fragments that arrived out of order */ frag = (mca_ptl_base_recv_frag_t*)lam_list_remove_first(&matched_frags); diff --git a/src/mca/ptl/base/ptl_base_recvreq.c b/src/mca/ptl/base/ptl_base_recvreq.c index 0e1824cc9f..f8fa8c1239 100644 --- a/src/mca/ptl/base/ptl_base_recvreq.c +++ b/src/mca/ptl/base/ptl_base_recvreq.c @@ -51,7 +51,7 @@ void mca_ptl_base_recv_request_match_specific(mca_ptl_base_recv_request_t* reque mca_ptl_t* ptl = frag->super.frag_owner; THREAD_UNLOCK(pml_comm->c_matching_lock+req_peer); mca_ptl_base_recv_frag_init(frag); - ptl->ptl_recv(ptl, frag, &request->super.req_status); + ptl->ptl_recv(ptl, frag); return; /* match found */ } @@ -95,8 +95,8 @@ void mca_ptl_base_recv_request_match_wild(mca_ptl_base_recv_request_t* request) if ((frag = mca_ptl_base_recv_request_match_specific_proc(request, proc)) != NULL) { mca_ptl_t* ptl = frag->super.frag_owner; THREAD_UNLOCK(pml_comm->c_matching_lock+proc); - mca_ptl_base_recv_frag_init(frag); - ptl->ptl_recv(ptl, frag, &request->super.req_status); + mca_ptl_base_recv_frag_init(frag); + ptl->ptl_recv(ptl, frag); return; /* match found */ } THREAD_UNLOCK(pml_comm->c_matching_lock+proc); diff --git a/src/mca/ptl/base/ptl_base_recvreq.h b/src/mca/ptl/base/ptl_base_recvreq.h index 9a2904c074..874c90b570 100644 --- a/src/mca/ptl/base/ptl_base_recvreq.h +++ b/src/mca/ptl/base/ptl_base_recvreq.h @@ -1,7 +1,9 @@ /* * $HEADER$ */ - +/** + * @file + */ #ifndef MCA_PML_BASE_RECV_REQUEST_H #define MCA_PML_BASE_RECV_REQUEST_H @@ -11,25 +13,33 @@ extern lam_class_t mca_ptl_base_recv_request_t_class; struct mca_ptl_base_recv_frag_t; - +/** + * Base type for receive requests. + */ struct mca_ptl_base_recv_request_t { - mca_pml_base_request_t super; - /* request sequence number */ - mca_ptl_base_sequence_t req_sequence; - /* number of bytes delivered */ - size_t req_bytes_recvd; + mca_pml_base_request_t super; /**< base request */ + mca_ptl_base_sequence_t req_sequence; /**< request sequence number */ + size_t req_bytes_recvd; /**< number of bytes delivered to user */ }; typedef struct mca_ptl_base_recv_request_t mca_ptl_base_recv_request_t; -void mca_ptl_base_recv_request_match_wild(mca_ptl_base_recv_request_t*); -void mca_ptl_base_recv_request_match_specific(mca_ptl_base_recv_request_t*); - - +/** + * Initialize a receive request with call parameters. + * + * @param request (IN) Receive request. + * @param addr (IN) User buffer. + * @param count (IN) Number of elements of indicated datatype. + * @param datatype (IN) User defined datatype. + * @param src (IN) Source rank w/in the communicator. + * @param tag (IN) User defined tag. + * @param comm (IN) Communicator. + * @param persistent (IN) Is this a ersistent request. + */ static inline void mca_ptl_base_recv_request_init( mca_ptl_base_recv_request_t *request, void *addr, - size_t length, + size_t count, lam_datatype_t* datatype, int src, int tag, @@ -39,11 +49,12 @@ static inline void mca_ptl_base_recv_request_init( request->req_sequence = 0; request->req_bytes_recvd = 0; request->super.req_addr = addr; - request->super.req_length = length; + request->super.req_count = count; request->super.req_datatype = datatype; request->super.req_peer = src; request->super.req_tag = tag; request->super.req_comm = comm; + request->super.req_proc = NULL; request->super.req_type = MCA_PML_REQUEST_RECV; request->super.req_persistent = persistent; request->super.req_mpi_done = false; @@ -52,5 +63,21 @@ static inline void mca_ptl_base_recv_request_init( request->super.super.req_type = LAM_REQUEST_PML; } +/** + * Attempt to match the request against the unexpected fragment list + * for all source ranks w/in the communicator. + * + * @param request (IN) Request to match. + */ +void mca_ptl_base_recv_request_match_wild(mca_ptl_base_recv_request_t* request); + +/** + * Attempt to match the request against the unexpected fragment list + * for a specific source rank. + * + * @param request (IN) Request to match. + */ +void mca_ptl_base_recv_request_match_specific(mca_ptl_base_recv_request_t* request); + #endif diff --git a/src/mca/ptl/base/ptl_base_sendfrag.h b/src/mca/ptl/base/ptl_base_sendfrag.h index d5454ff0c5..ef9305d62a 100644 --- a/src/mca/ptl/base/ptl_base_sendfrag.h +++ b/src/mca/ptl/base/ptl_base_sendfrag.h @@ -1,6 +1,9 @@ /* * $HEADER$ */ +/** + * @file + */ #ifndef MCA_PTL_BASE_SEND_FRAG_H #define MCA_PTL_BASE_SEND_FRAG_H @@ -9,10 +12,12 @@ extern lam_class_t mca_ptl_base_send_frag_t_class; - +/** + * Base type for send fragment descriptors + */ struct mca_ptl_base_send_frag_t { - mca_ptl_base_frag_t super; - struct mca_ptl_base_send_request_t *frag_request; + mca_ptl_base_frag_t super; /**< base fragment descriptor */ + struct mca_ptl_base_send_request_t *frag_request; /**< pointer to send request */ }; typedef struct mca_ptl_base_send_frag_t mca_ptl_base_send_frag_t; diff --git a/src/mca/ptl/base/ptl_base_sendreq.c b/src/mca/ptl/base/ptl_base_sendreq.c index 557282d8be..c38553d112 100644 --- a/src/mca/ptl/base/ptl_base_sendreq.c +++ b/src/mca/ptl/base/ptl_base_sendreq.c @@ -1,6 +1,7 @@ /* * $HEADER$ */ +#include #include "mca/ptl/base/ptl_base_sendreq.h" #include "mca/ptl/base/ptl_base_sendfrag.h" @@ -18,11 +19,9 @@ lam_class_t mca_ptl_base_send_request_t_class = { static void mca_ptl_base_send_request_construct(mca_ptl_base_send_request_t* req) { - OBJ_CONSTRUCT(&req->req_unacked_frags, lam_list_t); } static void mca_ptl_base_send_request_destruct(mca_ptl_base_send_request_t* req) { - OBJ_DESTRUCT(&req->req_unacked_frags); } diff --git a/src/mca/ptl/base/ptl_base_sendreq.h b/src/mca/ptl/base/ptl_base_sendreq.h index c9ac31b0a3..5aa61bc3ec 100644 --- a/src/mca/ptl/base/ptl_base_sendreq.h +++ b/src/mca/ptl/base/ptl_base_sendreq.h @@ -1,12 +1,14 @@ /* * $HEADER$ */ - +/** + * @file + */ #ifndef MCA_PML_BASE_SEND_REQUEST_H #define MCA_PML_BASE_SEND_REQUEST_H #include "lam_config.h" - +#include "datatype/datatype.h" #include "mca/ptl/ptl.h" #include "mca/pml/base/pml_base_request.h" #include "mca/ptl/base/ptl_base_comm.h" @@ -16,72 +18,100 @@ extern lam_class_t mca_ptl_base_send_request_t_class; struct mca_ptl_base_send_frag_t; +/** + * Base type for send requests + */ struct mca_ptl_base_send_request_t { - /* request object - common data structure for use by wait/test */ - mca_pml_base_request_t super; - /* number of bytes that have already been assigned to a fragment */ - size_t req_offset; - /* number of fragments that have been allocated */ - size_t req_frags; - /* number of bytes that have been sent */ - size_t req_bytes_sent; - /* number of bytes that have been acked */ - size_t req_bytes_acked; - /* type of send */ - mca_pml_base_send_mode_t req_send_mode; - /* sequence number for MPI pt-2-pt ordering */ - mca_ptl_base_sequence_t req_msg_seq; - /* queue of fragments that are waiting to be acknowledged */ - mca_ptl_base_queue_t req_unacked_frags; - /* PTL that allocated this descriptor */ - struct mca_ptl_t* req_owner; - /* PTL peer instance that will be used for first fragment */ - struct mca_ptl_base_peer_t* req_peer; - /* peer matched receive */ - lam_ptr_t req_peer_request; + mca_pml_base_request_t super; /** base request type - common data structure for use by wait/test */ + size_t req_offset; /**< number of bytes that have already been assigned to a fragment */ + size_t req_frags; /**< number of fragments that have been allocated */ + size_t req_bytes_sent; /**< number of bytes that have been sent */ + mca_pml_base_send_mode_t req_send_mode; /**< type of send */ + mca_ptl_base_sequence_t req_msg_seq; /**< sequence number for MPI pt-2-pt ordering */ + struct mca_ptl_t* req_owner; /**< PTL that allocated this descriptor */ + struct mca_ptl_base_peer_t* req_peer; /**< PTL peer instance that will be used for first fragment */ + lam_ptr_t req_peer_request; /**< matched receive at peer */ + lam_convertor_t req_convertor; /**< convertor that describes this datatype */ + size_t req_packed_size; /**< packed size of a message given the datatype and count */ }; typedef struct mca_ptl_base_send_request_t mca_ptl_base_send_request_t; -static inline bool mca_ptl_base_send_request_matched( - mca_ptl_base_send_request_t* request) -{ - return (NULL != request->req_peer_request.pval); -} - +/** + * Initialize a send request with call parameters. + * + * @param request (IN) Send request + * @param addr (IN) User buffer + * @param count (IN) Number of elements of indicated datatype. + * @param datatype (IN) User defined datatype + * @param peer (IN) Destination rank + * @param tag (IN) User defined tag + * @param comm (IN) Communicator + * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY) + * @param persistent (IN) Is request persistent. + */ static inline void mca_ptl_base_send_request_init( mca_ptl_base_send_request_t *request, void *addr, - size_t length, + size_t count, lam_datatype_t* datatype, int peer, int tag, lam_communicator_t* comm, - mca_pml_base_send_mode_t sendmode, + mca_pml_base_send_mode_t mode, bool persistent) { request->req_offset = 0; request->req_frags = 0; request->req_bytes_sent = 0; - request->req_bytes_acked = 0; - request->req_send_mode = sendmode; + request->req_send_mode = mode; request->req_peer_request.lval = 0; request->req_msg_seq = mca_pml_ptl_comm_send_sequence(comm->c_pml_comm, peer); request->super.req_addr = addr; - request->super.req_length = length; + request->super.req_count = count; request->super.req_datatype = datatype; request->super.req_peer = peer; request->super.req_tag = tag; request->super.req_comm = comm; + request->super.req_proc = lam_comm_peer_lookup(comm,peer); request->super.req_type = MCA_PML_REQUEST_SEND; request->super.req_persistent = persistent; request->super.req_mpi_done = false; request->super.req_pml_done = false; request->super.req_free_called = false; request->super.super.req_type = LAM_REQUEST_PML; + + /* initialize datatype convertor for this request */ + if(count > 0) { + int packed_size; + lam_convertor_copy(request->super.req_proc->proc_convertor, &request->req_convertor); + lam_convertor_init_for_send( + &request->req_convertor, + 0, + request->super.req_datatype, + request->super.req_count, + request->super.req_addr, + 0); + lam_convertor_get_packed_size(&request->req_convertor, &packed_size); + request->req_packed_size = packed_size; + } else { + request->req_packed_size = 0; + } } +/** + * Test to check if an acknowledgment has been received, with the match. + * + * @param request (IN) Send request. + * return TRUE if an ack/match has been received from peer. + */ +static inline bool mca_ptl_base_send_request_matched( + mca_ptl_base_send_request_t* request) +{ + return (NULL != request->req_peer_request.pval); +} + #endif diff --git a/src/mca/ptl/ptl.h b/src/mca/ptl/ptl.h index 975e099657..bed9d93dac 100644 --- a/src/mca/ptl/ptl.h +++ b/src/mca/ptl/ptl.h @@ -1,9 +1,127 @@ /* * $HEADER$ */ -/* +/** + * @file + * * P2P Transport Layer (PTL) + * + * An MCA component type that allows the PML (mca_pml_t) to support a variety of + * network transports concurrently. The PTL layer is responsible for the reliable + * delivery of message fragments, while the assignment and scheduling of fragments + * to PTLs is handled by the upper layer. + * + * PTL Initialization: + * + * During library initialization, all available PTL modules are loaded and opened + * via their mca_base_open_module_fn_t function. If possible, the module should + * load and open regardless of wether the transport is available. This allows + * parameters used by the module to be reported on by tools such as XXXinfo. + * + * The mca_ptl_base_module_init_fn_t() is then called for each of the modules that + * are succesfully opened. The module init function may return either: + * + * (1) a NULL list of PTL instances if the transport is not available, + * (2) a list containing a single PTL instance, where the PTL provides + * a layer of abstraction over multiple physical devices (e.g. NICs), + * (3) a list containing multiple PTL instances where each PTL instance + * corresponds to a single physical device. + * + * If multiple network devices are available for a given transport, the preferred + * approach is (3) above. In this case, the PML layer will handle scheduling + * across the available resources, and fail-over in the event of a PTL failure. + * If the second approach is used, and a single PTL instance abstracts multiple + * physical devices, the PTL assumes all responsibility for scheduling/failover + * within those devices. + * + * During module initialization, the module should post any addressing information + * required by its peers. An example would be the TCP listen port opened by the + * TCP module for incoming connection requests. This information is published + * to peers via the mca_base_modex_send() interface. Note that peer information + * will not be available via mca_base_modex_recv() during the modules init + * function. However, it is guaranteed to be available during PTL selection. + * + * PTL Selection: + * + * The PML maintains a list of available PTL instances, sorted by their exclusivity + * ranking. This is a relative ranking that is used to select the set of PTLs that + * may be used to reach a given destination. The PTL modules are queried via their + * mca_ptl_base_add_proc_fn_t() to determine if they are able to reach a given destination. + * The first PTL module that returns success is selected. Subsequent PTL modules are + * queried only if they are at the same exclusivity ranking. + * + * An example of how this might be used: + * + * PTL Exclusivity Comments + * -------- ----------- ------------------ + * LO 100 Selected exclusively for local process + * SM 50 Selected exclusively for other processes on host + * IB 0 Selected based on network reachability + * IB 0 Selected based on network reachability + * TCP 0 Selected based on network reachability + * TCP 0 Selected based on network reachability + * + * When a PTL module is selected, it may choose to optionally return a pointer to an + * an mca_ptl_base_peer_t data structure to the PML. This pointer is cached by the PML + * and returned to the PTL on subsequent data transfer calls to this specific process. + * The actual contents of the data structure are defined on a per PTL basis, and are + * typically used to cache addressing or connection information, such as the TCP socket + * used by the TCP PTL. + * + * Send Path: + * + * When multiple PTLs are available to reach a given destination, a single request + * (that is large enough) will be split across the available PTLs. The PML scheduler + * will determine the PTL to use for the first fragment based on the relative latency + * ranking exported by the PTLs. + * + * To minimize latency, and allow for derived types of the mca_pml_base_send_request_t, + * the PML will call the selected PTLs ptl_request_alloc() method to allocate the send + * request descriptor. The PTL should return a derived type of mca_pml_base_send_request_t, + * that contains space for both the base send request and initial fragment descriptor + * (mca_ptl_base_send_frag_t or derived type). This approach allows all of the descriptors + * required to initiate the send to be allocated from a free list in a single operation. + * + * When the request is started, the PML will call the selected PTL's ptl_send() method + * with up to ptl_first_frag_size bytes of the request. The PTL should attempt to deliver + * up to the requested number of bytes. The number of bytes actually fragmented and queued + * for delivery must be updated on the send request to reflect the current offset into the + * send buffer. + * + * If the request is larger than ptl_first_frag_size, the remainder of the request + * will be scheduled across potentially multiple PTLs, upon an acknowledgment from + * the peer that the request has been matched on the receive side. The PTL must defer + * calling ptl_send_progress() on the initial fragment until an acknowledment is received, + * as this signals to the PML that the remaining fragments may be scheduled. For further + * detail on the scheduling algorithm, refer to the PML (mca_pml_t) documentation. + * + * As subsequent fragments are completed by the PTLs, the ptl_send_progress() method should + * be called to update the status of the send request. Note that each PTL is responsible + * for managing the resources associated with send fragments and their allocation from and + * return to internal caches/free lists. + * + * Recv Path: + * + * The first fragment of a message is sent with a header type of mca_ptl_base_match_header_t. + * When a header of this type is received, to minimize latency the PTL should call the + * ptl_match() method as soon as entire header is available, potentially prior to receiving + * any data associated with the first fragment. If a match is made, the PML will call + * the ptl_recv() method of the fragments PTL. + * + * The ptl_recv() method should generate, if required, an ack to the source process. An + * ack is required if the MCA_PTL_FLAGS_ACK_MATCHED bit is set by the source in the initial + * message header. The ack should contain a pointer to the matched request, along + * with the pointer to the orignal send fragment contained in the initial message header. + * + * On receipt of the ack, the source will schedule any remaining fragments. The selected PTLs + * should generate the remaining fragments with an mca_ptl_base_frag_header_t, which contains + * a placeholder for a pointer to the matched receive request. This allows the receiver to + * avoid calling the matching logic for subsequent fragments. As fragments are completed, + * each PTL calls their ptl_recv_progress() method to update the PML with the request + * status. + * */ + #ifndef MCA_PTL_H #define MCA_PTL_H @@ -13,7 +131,6 @@ #include "proc/proc.h" #include "mca/pml/pml.h" - /* * PTL types */ @@ -36,9 +153,10 @@ typedef lam_list_t mca_ptl_base_queue_t; */ /** - * MCA->PTL Intializes the PTL module and creates specific PTL instance(s). + * MCA->PTL Intializes the PTL module and creates specific PTL instance(s). * - * @param num_ptls (OUT) Returns the number of ptl instances created. + * @param num_ptls (OUT) Returns the number of ptl instances created, or 0 + * if the transport is not available. * * @param allow_multi_user_threads (OUT) Whether this module can run * at MPI_THREAD_MULTIPLE or not. @@ -46,7 +164,14 @@ typedef lam_list_t mca_ptl_base_queue_t; * @param have_hidden_threads (OUT) Whether this module may use * hidden threads (e.g., progress threads) or not. * - * @return Array of pointers to PTL instances. + * @return Array of pointers to PTL instances, or NULL if the transport + * is not available. + * + * During module initialization, the PTL module should discover the physical + * devices that are available for the given transport, and a PTL instance + * created to represent each available device. Any addressing information + * required by peers to reach the available devices should be published during + * the module init via the mca_base_modex_send() interface. */ typedef struct mca_ptl_t** (*mca_ptl_base_module_init_fn_t)( int *num_ptls, @@ -56,53 +181,71 @@ typedef struct mca_ptl_t** (*mca_ptl_base_module_init_fn_t)( /** - * PML->PTL Progresses outstanding requests in each PTL module. - * - * @param timstamp (IN) The current time - used for retransmisstion timers. - */ -typedef void (*mca_ptl_base_module_progress_fn_t)( - mca_ptl_base_tstamp_t timestamp -); - -/** - * PTL module version and interface functions. + * PTL module descriptor. Contains module version information + * and module open/close/init functions. */ struct mca_ptl_base_module_1_0_0_t { mca_base_module_t ptlm_version; mca_base_module_data_1_0_0_t ptlm_data; mca_ptl_base_module_init_fn_t ptlm_init; - mca_ptl_base_module_progress_fn_t ptlm_progress; }; typedef struct mca_ptl_base_module_1_0_0_t mca_ptl_base_module_1_0_0_t; typedef struct mca_ptl_base_module_1_0_0_t mca_ptl_base_module_t; /* - * PTL instance interface functions and datatype. + * PTL instance interface functions and datatype. */ + /** - * PML->PTL notification of change in the process list. + * MCA->PTL Clean up any resources held by PTL instance before the + * module is unloaded. + * + * @param ptl (IN) PTL instance. * - * @param ptl (IN) - * @param procs (IN) - * @param nprocs (IN) - * @return + * Prior to unloading a PTL module, the MCA framework will call the PTL + * finalize method for each PTL instance. + * + */ +typedef int (*mca_ptl_base_finalize_fn_t)( + struct mca_ptl_t* ptl +); + +/** + * PML->PTL notification of change in the process list. + * + * @param ptl (IN) PTL instance + * @param proc (IN) Peer process + * @param peer (OUT) Peer addressing information. + * @return Status indicates wether PTL is reachable. + * + * The mca_ptl_base_add_proc_fn_t() is called by the PML to determine + * the set of PTLs that should be used to reach the specified process. + * A return value of LAM_SUCCESS indicates the PTL should be added to the + * set used to reach the proc. The peers addressing information may be + * obtained by the PTL via the mca_base_modex_recv() function if required. + * The PTL may optionally return a pointer to a mca_ptl_base_peer_t data + * structure, to cache peer addressing or connection information. */ typedef int (*mca_ptl_base_add_proc_fn_t)( struct mca_ptl_t* ptl, struct lam_proc_t* proc, - struct mca_ptl_base_peer_t** + struct mca_ptl_base_peer_t** peer ); /** - * PML->PTL notification of change in the process list. + * PML->PTL notification of change in the process list. * - * @param ptl (IN) - * @param procs (IN) - * @param nprocs (IN) - * @return + * @param ptl (IN) PTL instance + * @param proc (IN) Peer process + * @param peer (IN) Peer addressing information. + * @return Status indicating if cleanup was successful + * + * If the process list shrinks, the PML will notify the PTL of the + * change. Peer addressing information cached by the PML is provided + * for cleanup by the PTL. */ typedef int (*mca_ptl_base_del_proc_fn_t)( struct mca_ptl_t* ptl, @@ -111,29 +254,59 @@ typedef int (*mca_ptl_base_del_proc_fn_t)( ); /** - * MCA->PTL Clean up any resources held by PTL instance before the module is unloaded. - * - * @param ptl (IN) The PTL module instance that is being unloaded. + * PML->PTL Allocate a send request from the PTL modules free list. + * + * @param ptl (IN) PTL instance + * @param request (OUT) Pointer to allocated request. + * @return Status indicating if allocation was successful. + * + * To reduce latency (number of required allocations), a derived + * type of mca_ptl_base_send_request_t is obtained from the PTL that + * is selected to send the first fragment. The derived type should contain + * space for the base request structure, the PTL first fragment, + * and any other required buffer space. */ -typedef int (*mca_ptl_base_finalize_fn_t)( - struct mca_ptl_t* ptl -); - typedef int (*mca_ptl_base_request_alloc_fn_t)( struct mca_ptl_t* ptl, struct mca_ptl_base_send_request_t** request ); +/** + * PML->PTL Return a send request to the PTL modules free list. + * + * @param ptl (IN) PTL instance + * @param request (IN) Pointer to allocated request. + * + * Called when the request has been completed at both the MPI + * and PML layers. + */ typedef void (*mca_ptl_base_request_return_fn_t)( struct mca_ptl_t* ptl, struct mca_ptl_base_send_request_t* request ); -typedef void (*mca_ptl_base_frag_return_fn_t)( - struct mca_ptl_t* ptl, - struct mca_ptl_base_recv_frag_t* frag -); - +/** + * PML->PTL Initiate a send of the specified size. + * + * @param ptl (IN) PTL instance + * @param ptl_base_peer (IN) PTL peer addressing + * @param send_request (IN/OUT) Send request (allocated by PML via mca_ptl_base_request_alloc_fn_t) + * @param size (IN) Number of bytes PML is requesting PTL to deliver + * @param flags (IN) Flags that should be passed to the peer via the message header. + * @param request (OUT) LAM_SUCCESS if the PTL was able to queue one or more fragments + * + * When multiple PTLs are available, a single request (that is large enough) + * will be split across the available PTLs. The PML scheduler will determine + * the percentage given to a PTL based on the bandwidth provided by the transport + * and its current resource usage. The size parameter to the send function indicates + * the number of bytes the PML is requesting the PTL to send. The PTL may choose + * to send 0->size bytes based on available resources. + * + * The current offset into the users buffer is passed into the send function + * via the req_offset member of the send request parameter. The send function + * must update req_offset with the actual number of bytes the PTL is able to + * fragment for delivery. + */ typedef int (*mca_ptl_base_send_fn_t)( struct mca_ptl_t* ptl, struct mca_ptl_base_peer_t* ptl_base_peer, @@ -142,36 +315,77 @@ typedef int (*mca_ptl_base_send_fn_t)( int flags ); +/** + * PML->PTL Notification that a receive fragment has been matched. + * + * @param ptl (IN) PTL instance + * @param recv_frag (IN) Receive fragment + * + * A fragment may be matched either when a new receive is posted, + * or on receipt of a fragment from the network. In either case, + * the PML will downcall into the PTL to provide a notification + * that the match was made. + */ typedef void (*mca_ptl_base_recv_fn_t)( struct mca_ptl_t* ptl, - struct mca_ptl_base_recv_frag_t* recv_frag, - struct lam_status_public_t* recv_status + struct mca_ptl_base_recv_frag_t* recv_frag ); +/** + * PTL->PML Notification from the PTL to the PML that a new fragment + * has arrived and can be matched against posted receives. + * + * @param ptl (IN) PTL instance + * @param recv_frag Receive fragment + * @param header (IN) Message header + * + * A fragment may be matched either when a new receive is posted, + * or on receipt of a fragment from the network. In either case, + * the PML will downcall into the PTL to provide a notification + * that the match was made. + * + * The message header used for matching is not required to be + * contained within the receive fragment. However, if the match is + * not made, the matching code will copy the supplied header into the + * recv fragment so that the match can be made when the receive is posted. + */ typedef int (*mca_ptl_base_match_fn_t)( - struct mca_ptl_base_recv_frag_t* frag, + struct mca_ptl_base_recv_frag_t* recv_frag, struct mca_ptl_base_match_header_t* header ); +/** + * PTL->PML Notification from the PTL to the PML that a fragment + * has completed (e.g. been successfully delivered into users buffer) + * + * @param recv_request (IN) Receive Request + * @param recv_frag (IN) Receive Fragment + */ typedef void (*mca_ptl_base_recv_progress_fn_t)( struct mca_ptl_base_recv_request_t* recv_request, struct mca_ptl_base_recv_frag_t* recv_frag ); +/** + * PTL->PML Notification from the PTL to the PML that a fragment + * has completed (e.g. been successfully delivered to peer) + * + * @param send_request (IN) Send Request + * @param send_frag (IN) Send Fragment + */ typedef void (*mca_ptl_base_send_progress_fn_t)( struct mca_ptl_base_send_request_t* send_request, struct mca_ptl_base_send_frag_t* send_frag ); /** - * PTL instance interface functions and common state. + * PTL instance interface functions and attributes. */ - struct mca_ptl_t { /* PTL common attributes */ - mca_ptl_base_module_t* ptl_module; - size_t ptl_first_frag_size; /**< maximum size of first fragment */ + mca_ptl_base_module_t* ptl_module; /**< pointer back to the PTL module structure */ + size_t ptl_first_frag_size; /**< maximum size of first fragment -- eager send */ size_t ptl_min_frag_size; /**< threshold below which the PTL will not fragment */ size_t ptl_max_frag_size; /**< maximum fragment size supported by the PTL */ uint32_t ptl_exclusivity; /**< indicates this PTL should be used exclusively */ @@ -186,7 +400,6 @@ struct mca_ptl_t { mca_ptl_base_recv_fn_t ptl_recv; mca_ptl_base_request_alloc_fn_t ptl_request_alloc; mca_ptl_base_request_return_fn_t ptl_request_return; - mca_ptl_base_frag_return_fn_t ptl_frag_return; /* PTL->PML function table - filled in by PML at init */ mca_ptl_base_match_fn_t ptl_match; diff --git a/src/mca/ptl/tcp/src/ptl_tcp.c b/src/mca/ptl/tcp/src/ptl_tcp.c index 4a0bd845b3..df3fc8f344 100644 --- a/src/mca/ptl/tcp/src/ptl_tcp.c +++ b/src/mca/ptl/tcp/src/ptl_tcp.c @@ -2,6 +2,7 @@ * $HEADER$ */ +#include #include "util/output.h" #include "util/if.h" #include "mca/pml/pml.h" @@ -35,28 +36,11 @@ mca_ptl_tcp_t mca_ptl_tcp = { mca_ptl_tcp_send, mca_ptl_tcp_recv, mca_ptl_tcp_request_alloc, - mca_ptl_tcp_request_return, - (mca_ptl_base_frag_return_fn_t)mca_ptl_tcp_recv_frag_return + mca_ptl_tcp_request_return } }; -int mca_ptl_tcp_create(int if_index) -{ - mca_ptl_tcp_t* ptl = malloc(sizeof(mca_ptl_tcp_t)); - if(NULL == ptl) - return LAM_ERR_OUT_OF_RESOURCE; - memcpy(ptl, &mca_ptl_tcp, sizeof(mca_ptl_tcp)); - mca_ptl_tcp_module.tcp_ptls[mca_ptl_tcp_module.tcp_num_ptls++] = ptl; - - /* initialize the ptl */ - ptl->ptl_ifindex = if_index; - lam_ifindextoaddr(if_index, (struct sockaddr*)&ptl->ptl_ifaddr, sizeof(ptl->ptl_ifaddr)); - lam_ifindextomask(if_index, (struct sockaddr*)&ptl->ptl_ifmask, sizeof(ptl->ptl_ifmask)); - return LAM_SUCCESS; -} - - int mca_ptl_tcp_add_proc(struct mca_ptl_t* ptl, struct lam_proc_t *lam_proc, struct mca_ptl_base_peer_t** peer_ret) { mca_ptl_tcp_proc_t* ptl_proc = mca_ptl_tcp_proc_create(lam_proc); @@ -130,8 +114,7 @@ void mca_ptl_tcp_request_return(struct mca_ptl_t* ptl, struct mca_ptl_base_send_ void mca_ptl_tcp_recv_frag_return(struct mca_ptl_t* ptl, struct mca_ptl_tcp_recv_frag_t* frag) { - if(frag->frag_buff != NULL && frag->frag_buff != frag->super.super.frag_addr) - free(frag->frag_buff); + /* FIX - need to cleanup convertor */ lam_free_list_return(&mca_ptl_tcp_module.tcp_recv_frags, (lam_list_item_t*)frag); } @@ -139,8 +122,9 @@ void mca_ptl_tcp_recv_frag_return(struct mca_ptl_t* ptl, struct mca_ptl_tcp_recv void mca_ptl_tcp_send_frag_return(struct mca_ptl_t* ptl, struct mca_ptl_tcp_send_frag_t* frag) { if(lam_list_get_size(&mca_ptl_tcp_module.tcp_pending_acks)) { - mca_ptl_tcp_recv_frag_t* pending = (mca_ptl_tcp_recv_frag_t*) - lam_list_remove_first(&mca_ptl_tcp_module.tcp_pending_acks); + mca_ptl_tcp_recv_frag_t* pending; + THREAD_LOCK(&mca_ptl_tcp_module.tcp_lock); + pending = (mca_ptl_tcp_recv_frag_t*)lam_list_remove_first(&mca_ptl_tcp_module.tcp_pending_acks); THREAD_LOCK(&mca_ptl_tcp_module.tcp_lock); if(NULL == pending) { THREAD_UNLOCK(&mca_ptl_tcp_module.tcp_lock); @@ -156,6 +140,12 @@ void mca_ptl_tcp_send_frag_return(struct mca_ptl_t* ptl, struct mca_ptl_tcp_send } } +/* + * Initiate a send. If this is the first fragment, use the fragment + * descriptor allocated with the send requests, otherwise obtain + * one from the free list. Initialize the fragment and foward + * on to the peer. + */ int mca_ptl_tcp_send( struct mca_ptl_t* ptl, @@ -178,20 +168,17 @@ int mca_ptl_tcp_send( } +/* + * A posted receive has been matched - if required send an + * ack back to the peer and process the fragment. + */ + void mca_ptl_tcp_recv( mca_ptl_t* ptl, - mca_ptl_base_recv_frag_t* frag, - lam_status_public_t* status) + mca_ptl_base_recv_frag_t* frag) { + /* send ack back to peer? */ mca_ptl_base_header_t* header = &frag->super.frag_header; - - /* fill in match */ - status->MPI_SOURCE = header->hdr_match.hdr_src; - status->MPI_TAG = header->hdr_match.hdr_tag; - status->MPI_ERROR = LAM_SUCCESS; - status->_count = header->hdr_match.hdr_msg_length; - - /* send ack back to peer */ if(header->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK_MATCHED) { int rc; mca_ptl_tcp_send_frag_t* ack = mca_ptl_tcp_send_frag_alloc(&rc); diff --git a/src/mca/ptl/tcp/src/ptl_tcp.h b/src/mca/ptl/tcp/src/ptl_tcp.h index 787ab122bd..2c61e953dc 100644 --- a/src/mca/ptl/tcp/src/ptl_tcp.h +++ b/src/mca/ptl/tcp/src/ptl_tcp.h @@ -1,8 +1,9 @@ -/* @file - * +/* * $HEADER$ */ - +/** + * @file + */ #ifndef MCA_PTL_TCP_H #define MCA_PTL_TCP_H @@ -15,31 +16,30 @@ #include "mca/ptl/ptl.h" -/* +/** * TCP PTL module. */ - struct mca_ptl_tcp_module_1_0_0_t { - mca_ptl_base_module_1_0_0_t super; - struct mca_ptl_tcp_t** tcp_ptls; - size_t tcp_num_ptls; /**< number of ptls actually used */ - size_t tcp_max_ptls; /**< maximum number of ptls - available kernel ifs */ - int tcp_listen_sd; - unsigned short tcp_listen_port; - char* tcp_if_include; /**< comma seperated list of interface to include */ - char* tcp_if_exclude; /**< comma seperated list of interface to exclude */ - int tcp_free_list_num; /**< initial size of free lists */ - int tcp_free_list_max; /**< maximum size of free lists */ - int tcp_free_list_inc; /**< number of elements to alloc when growing free lists */ - lam_free_list_t tcp_send_requests; - lam_free_list_t tcp_send_frags; - lam_free_list_t tcp_recv_frags; - lam_list_t tcp_procs; - lam_list_t tcp_pending_acks; - struct mca_ptl_tcp_proc_t* tcp_local; - lam_event_t tcp_send_event; - lam_event_t tcp_recv_event; - lam_mutex_t tcp_lock; + mca_ptl_base_module_1_0_0_t super; /**< base PTL module */ + struct mca_ptl_tcp_t** tcp_ptls; /**< array of available PTLs */ + size_t tcp_num_ptls; /**< number of ptls actually used */ + size_t tcp_max_ptls; /**< maximum number of ptls - available kernel ifs */ + int tcp_listen_sd; /**< listen socket for incoming connection requests */ + unsigned short tcp_listen_port; /**< listen port */ + char* tcp_if_include; /**< comma seperated list of interface to include */ + char* tcp_if_exclude; /**< comma seperated list of interface to exclude */ + int tcp_free_list_num; /**< initial size of free lists */ + int tcp_free_list_max; /**< maximum size of free lists */ + int tcp_free_list_inc; /**< number of elements to alloc when growing free lists */ + lam_free_list_t tcp_send_requests; /**< free list of tcp send requests -- sendreq + sendfrag */ + lam_free_list_t tcp_send_frags; /**< free list of tcp send fragments */ + lam_free_list_t tcp_recv_frags; /**< free list of tcp recv fragments */ + lam_list_t tcp_procs; /**< list of tcp proc structures */ + lam_list_t tcp_pending_acks; /**< list of pending acks - retry as sends complete */ + struct mca_ptl_tcp_proc_t* tcp_local; /**< the tcp proc instance corresponding to the local process */ + lam_event_t tcp_send_event; /**< event structure for sends */ + lam_event_t tcp_recv_event; /**< event structure for recvs */ + lam_mutex_t tcp_lock; /**< lock for accessing module state */ }; typedef struct mca_ptl_tcp_module_1_0_0_t mca_ptl_tcp_module_1_0_0_t; typedef struct mca_ptl_tcp_module_1_0_0_t mca_ptl_tcp_module_t; @@ -48,76 +48,140 @@ struct mca_ptl_tcp_send_frag_t; extern mca_ptl_tcp_module_1_0_0_t mca_ptl_tcp_module; +/** + * Register TCP module parameters with the MCA framework + */ extern int mca_ptl_tcp_module_open(void); + +/** + * Any final cleanup before being unloaded. + */ extern int mca_ptl_tcp_module_close(void); +/** + * TCP module initialization. + * + * @param num_ptls (OUT) Number of PTLs returned in PTL array. + * @param allow_multi_user_threads (OUT) Flag indicating wether PTL supports user threads (TRUE) + * @param have_hidden_threads (OUT) Flag indicating wether PTL uses threads (TRUE) + * + * (1) read interface list from kernel and compare against module parameters + * then create a PTL instance for selected interfaces + * (2) setup TCP listen socket for incoming connection attempts + * (3) publish PTL addressing info + * + */ extern mca_ptl_t** mca_ptl_tcp_module_init( int *num_ptls, bool *allow_multi_user_threads, bool *have_hidden_threads ); -extern void mca_ptl_tcp_module_progress( - mca_ptl_base_tstamp_t tstamp -); - - /** * TCP PTL Interface - * */ - struct mca_ptl_tcp_t { - mca_ptl_t super; - int ptl_ifindex; - struct sockaddr_in ptl_ifaddr; - struct sockaddr_in ptl_ifmask; + mca_ptl_t super; /**< base PTL interface */ + int ptl_ifindex; /**< PTL interface index */ + struct sockaddr_in ptl_ifaddr; /**< PTL interface address */ + struct sockaddr_in ptl_ifmask; /**< PTL interface netmask */ }; typedef struct mca_ptl_tcp_t mca_ptl_tcp_t; extern mca_ptl_tcp_t mca_ptl_tcp; -extern int mca_ptl_tcp_create( - int if_index -); +/** + * Cleanup any resources held by the PTL. + * + * @param ptl PTL instance. + * @return LAM_SUCCESS or error status on failure. + */ extern int mca_ptl_tcp_finalize( struct mca_ptl_t* ptl ); + +/** + * PML->PTL notification of change in the process list. + * + * @param ptl (IN) + * @param proc (IN) + * @param peer (OUT) + * @return LAM_SUCCESS or error status on failure. + * + */ + extern int mca_ptl_tcp_add_proc( struct mca_ptl_t* ptl, - struct lam_proc_t *procs, - struct mca_ptl_base_peer_t** addr + struct lam_proc_t *proc, + struct mca_ptl_base_peer_t** peer ); + +/** + * PML->PTL notification of change in the process list. + * + * @param ptl (IN) PTL instance + * @param proc (IN) Peer process + * @param peer (IN) Peer addressing information. + * @return Status indicating if cleanup was successful + * + */ extern int mca_ptl_tcp_del_proc( struct mca_ptl_t* ptl, struct lam_proc_t *procs, struct mca_ptl_base_peer_t* addr ); +/** + * PML->PTL Allocate a send request from the PTL modules free list. + * + * @param ptl (IN) PTL instance + * @param request (OUT) Pointer to allocated request. + * @return Status indicating if allocation was successful. + * + */ extern int mca_ptl_tcp_request_alloc( struct mca_ptl_t* ptl, struct mca_ptl_base_send_request_t** ); +/** + * PML->PTL Return a send request to the PTL modules free list. + * + * @param ptl (IN) PTL instance + * @param request (IN) Pointer to allocated request. + * + */ extern void mca_ptl_tcp_request_return( struct mca_ptl_t* ptl, struct mca_ptl_base_send_request_t* ); -extern void mca_ptl_tcp_recv_frag_return( +/** + * PML->PTL Notification that a receive fragment has been matched. + * + * @param ptl (IN) PTL instance + * @param recv_frag (IN) Receive fragment + * + */ +extern void mca_ptl_tcp_recv( struct mca_ptl_t* ptl, - struct mca_ptl_tcp_recv_frag_t* + struct mca_ptl_base_recv_frag_t* frag ); - -extern void mca_ptl_tcp_send_frag_return( - struct mca_ptl_t* ptl, - struct mca_ptl_tcp_send_frag_t* -); - + +/** + * PML->PTL Initiate a send of the specified size. + * + * @param ptl (IN) PTL instance + * @param ptl_base_peer (IN) PTL peer addressing + * @param send_request (IN/OUT) Send request (allocated by PML via mca_ptl_base_request_alloc_fn_t) + * @param size (IN) Number of bytes PML is requesting PTL to deliver + * @param flags (IN) Flags that should be passed to the peer via the message header. + * @param request (OUT) LAM_SUCCESS if the PTL was able to queue one or more fragments + */ extern int mca_ptl_tcp_send( struct mca_ptl_t* ptl, struct mca_ptl_base_peer_t* ptl_peer, @@ -125,13 +189,32 @@ extern int mca_ptl_tcp_send( size_t size, int flags ); - -extern void mca_ptl_tcp_recv( + +/** + * Return a recv fragment to the modules free list. + * + * @param ptl (IN) PTL instance + * @param frag (IN) TCP receive fragment + * + */ +extern void mca_ptl_tcp_recv_frag_return( struct mca_ptl_t* ptl, - struct mca_ptl_base_recv_frag_t* frag, - struct lam_status_public_t* status + struct mca_ptl_tcp_recv_frag_t* frag +); + + + +/** + * Return a send fragment to the modules free list. + * + * @param ptl (IN) PTL instance + * @param frag (IN) TCP send fragment + * + */ +extern void mca_ptl_tcp_send_frag_return( + struct mca_ptl_t* ptl, + struct mca_ptl_tcp_send_frag_t* ); - #endif diff --git a/src/mca/ptl/tcp/src/ptl_tcp_addr.h b/src/mca/ptl/tcp/src/ptl_tcp_addr.h index 88b541bc8e..c9ffe9abd6 100644 --- a/src/mca/ptl/tcp/src/ptl_tcp_addr.h +++ b/src/mca/ptl/tcp/src/ptl_tcp_addr.h @@ -1,8 +1,9 @@ -/* @file - * +/* * $HEADER$ */ - +/** + * @file + */ #ifndef MCA_PTL_TCP_ADDR_H #define MCA_PTL_TCP_ADDR_H @@ -11,10 +12,13 @@ #include +/** + * Structure used to publish TCP connection information to peers. + */ struct mca_ptl_tcp_addr_t { - struct in_addr addr_inet; - in_port_t addr_port; - unsigned short addr_inuse; + struct in_addr addr_inet; /**< IPv4 address in network byte order */ + in_port_t addr_port; /**< listen port */ + unsigned short addr_inuse; /**< local meaning only */ }; typedef struct mca_ptl_tcp_addr_t mca_ptl_tcp_addr_t; diff --git a/src/mca/ptl/tcp/src/ptl_tcp_module.c b/src/mca/ptl/tcp/src/ptl_tcp_module.c index 1deca40244..a44e46f161 100644 --- a/src/mca/ptl/tcp/src/ptl_tcp_module.c +++ b/src/mca/ptl/tcp/src/ptl_tcp_module.c @@ -3,6 +3,7 @@ */ #include #include +#include #include #include #include @@ -54,8 +55,7 @@ mca_ptl_tcp_module_1_0_0_t mca_ptl_tcp_module = { false }, - mca_ptl_tcp_module_init, /* module init */ - mca_ptl_tcp_module_progress /* module progress */ + mca_ptl_tcp_module_init /* module init */ } }; @@ -124,7 +124,7 @@ int mca_ptl_tcp_module_open(void) mca_ptl_tcp.super.ptl_exclusivity = mca_ptl_tcp_param_register_int("exclusivity", 0); mca_ptl_tcp.super.ptl_first_frag_size = - mca_ptl_tcp_param_register_int("first_frag_size", 16*1024); + mca_ptl_tcp_param_register_int("first_frag_size", 64*1024); mca_ptl_tcp.super.ptl_min_frag_size = mca_ptl_tcp_param_register_int("min_frag_size", 64*1024); mca_ptl_tcp.super.ptl_max_frag_size = @@ -149,6 +149,27 @@ int mca_ptl_tcp_module_close(void) } +/* + * Create a ptl instance and add to modules list. + */ + +static int mca_ptl_tcp_create(int if_index) +{ + mca_ptl_tcp_t* ptl = malloc(sizeof(mca_ptl_tcp_t)); + if(NULL == ptl) + return LAM_ERR_OUT_OF_RESOURCE; + memcpy(ptl, &mca_ptl_tcp, sizeof(mca_ptl_tcp)); + mca_ptl_tcp_module.tcp_ptls[mca_ptl_tcp_module.tcp_num_ptls++] = ptl; + + /* initialize the ptl */ + ptl->ptl_ifindex = if_index; + lam_ifindextoaddr(if_index, (struct sockaddr*)&ptl->ptl_ifaddr, sizeof(ptl->ptl_ifaddr)); + lam_ifindextomask(if_index, (struct sockaddr*)&ptl->ptl_ifmask, sizeof(ptl->ptl_ifmask)); + return LAM_SUCCESS; +} + + + /* * Create a TCP PTL instance for either: * (1) all interfaces specified by the user @@ -173,7 +194,7 @@ static int mca_ptl_tcp_module_create_instances(void) if(NULL == mca_ptl_tcp_module.tcp_ptls) return LAM_ERR_OUT_OF_RESOURCE; - /* if the user specified an interface list - use these only */ + /* if the user specified an interface list - use these exclusively */ argv = include = lam_argv_split(mca_ptl_tcp_module.tcp_if_include,'\''); while(argv && *argv) { char* if_name = *argv; @@ -217,8 +238,8 @@ static int mca_ptl_tcp_module_create_instances(void) static int mca_ptl_tcp_module_create_listen(void) { int flags; - struct sockaddr_in inaddr; - lam_socklen_t addrlen = sizeof(struct sockaddr_in); + struct sockaddr_in inaddr; + lam_socklen_t addrlen; /* create a listen socket for incoming connections */ mca_ptl_tcp_module.tcp_listen_sd = socket(AF_INET, SOCK_STREAM, 0); @@ -239,6 +260,7 @@ static int mca_ptl_tcp_module_create_listen(void) } /* resolve system assignend port */ + addrlen = sizeof(struct sockaddr_in); if(getsockname(mca_ptl_tcp_module.tcp_listen_sd, (struct sockaddr*)&inaddr, &addrlen) < 0) { lam_output(0, "mca_ptl_tcp_module_init: getsockname() failed with errno=%d", errno); return LAM_ERROR; @@ -281,7 +303,8 @@ static int mca_ptl_tcp_module_create_listen(void) static int mca_ptl_tcp_module_exchange(void) { - size_t i, rc; + int rc; + size_t i; size_t size = mca_ptl_tcp_module.tcp_num_ptls * sizeof(mca_ptl_tcp_addr_t); mca_ptl_tcp_addr_t *addrs = malloc(size); for(i=0; i +#include #include #include #include @@ -331,9 +332,9 @@ static int mca_ptl_tcp_peer_recv_connect_ack(mca_ptl_base_peer_t* ptl_peer) static int mca_ptl_tcp_peer_start_connect(mca_ptl_base_peer_t* ptl_peer) { - int rc; - int flags; + int rc,flags; struct sockaddr_in peer_addr; + ptl_peer->peer_sd = socket(AF_INET, SOCK_STREAM, 0); if (ptl_peer->peer_sd < 0) { ptl_peer->peer_retries++; diff --git a/src/mca/ptl/tcp/src/ptl_tcp_peer.h b/src/mca/ptl/tcp/src/ptl_tcp_peer.h index 1079cf591f..38aab2b855 100644 --- a/src/mca/ptl/tcp/src/ptl_tcp_peer.h +++ b/src/mca/ptl/tcp/src/ptl_tcp_peer.h @@ -1,8 +1,9 @@ -/* @file - * +/* * $HEADER$ */ - +/** + * @file + */ #ifndef MCA_PTL_TCP_PEER_H #define MCA_PTL_TCP_PEER_H @@ -26,7 +27,6 @@ typedef enum { } mca_ptl_tcp_state_t; - /** * An abstraction that represents a connection to a peer process. * An instance of mca_ptl_base_peer_t is associated w/ each process @@ -36,18 +36,18 @@ typedef enum { struct mca_ptl_base_peer_t { lam_list_item_t super; - struct mca_ptl_tcp_t* peer_ptl; - struct mca_ptl_tcp_proc_t* peer_proc; - struct mca_ptl_tcp_addr_t* peer_addr; - int peer_sd; - mca_ptl_tcp_send_frag_t* peer_send_frag; - mca_ptl_tcp_recv_frag_t* peer_recv_frag; - mca_ptl_tcp_state_t peer_state; - size_t peer_retries; - lam_list_t peer_frags; /* list of pending frags to send */ - lam_mutex_t peer_lock; - lam_event_t peer_send_event; - lam_event_t peer_recv_event; + struct mca_ptl_tcp_t* peer_ptl; /**< PTL instance that created this connection */ + struct mca_ptl_tcp_proc_t* peer_proc; /**< proc structure corresponding to peer */ + struct mca_ptl_tcp_addr_t* peer_addr; /**< address of peer */ + int peer_sd; /**< socket connection to peer */ + mca_ptl_tcp_send_frag_t* peer_send_frag; /**< current send frag being processed */ + mca_ptl_tcp_recv_frag_t* peer_recv_frag; /**< current recv frag being processed */ + mca_ptl_tcp_state_t peer_state; /**< current state of the connection */ + size_t peer_retries; /**< number of connection retries attempted */ + lam_list_t peer_frags; /**< list of pending frags to send */ + lam_mutex_t peer_lock; /**< lock for concurrent access to peer state */ + lam_event_t peer_send_event; /**< event for async processing of send frags */ + lam_event_t peer_recv_event; /**< event for async processing of recv frags */ }; typedef struct mca_ptl_base_peer_t mca_ptl_base_peer_t; diff --git a/src/mca/ptl/tcp/src/ptl_tcp_proc.c b/src/mca/ptl/tcp/src/ptl_tcp_proc.c index 1b448dc2ce..83b5ee2db3 100644 --- a/src/mca/ptl/tcp/src/ptl_tcp_proc.c +++ b/src/mca/ptl/tcp/src/ptl_tcp_proc.c @@ -1,6 +1,7 @@ /* * $HEADER$ */ +#include #include "atomic.h" #include "lfc/lam_hash_table.h" #include "mca/base/mca_base_module_exchange.h" @@ -81,7 +82,7 @@ mca_ptl_tcp_proc_t* mca_ptl_tcp_proc_create(lam_proc_t* lam_proc) return 0; } memcpy(ptl_proc->proc_guid, lam_proc->proc_job, size); - memcpy(((char*) ptl_proc->proc_guid) + size, &vpid, sizeof(uint32_t)); + memcpy(((unsigned char*)ptl_proc->proc_guid)+size, &vpid, sizeof(uint32_t)); /* lookup tcp parameters exported by this proc */ rc = mca_base_modex_recv( diff --git a/src/mca/ptl/tcp/src/ptl_tcp_proc.h b/src/mca/ptl/tcp/src/ptl_tcp_proc.h index f1b2a17ecd..d4bba30689 100644 --- a/src/mca/ptl/tcp/src/ptl_tcp_proc.h +++ b/src/mca/ptl/tcp/src/ptl_tcp_proc.h @@ -1,8 +1,9 @@ -/* @file - * +/* * $HEADER$ */ - +/** + * @file + */ #ifndef MCA_PTL_TCP_PROC_H #define MCA_PTL_TCP_PROC_H @@ -19,19 +20,19 @@ extern lam_class_t mca_ptl_tcp_proc_t_class; /** * Represents the state of a remote process and the set of addresses - * that it exports. Also cache an instance or mca_ptl_base_peer_t for each + * that it exports. Also cache an instance of mca_ptl_base_peer_t for each * PTL instance that attempts to open a connection to the process. */ struct mca_ptl_tcp_proc_t { - lam_list_item_t super; - lam_proc_t *proc_lam; - void* proc_guid; - size_t proc_guid_size; - struct mca_ptl_tcp_addr_t *proc_addrs; - size_t proc_addr_count; - struct mca_ptl_base_peer_t **proc_peers; - size_t proc_peer_count; - lam_mutex_t proc_lock; + lam_list_item_t super; /**< allow proc to be placed on a list */ + lam_proc_t *proc_lam; /**< pointer to corresponding lam_proc_t */ + void* proc_guid; /**< globally unique identifier for the process */ + size_t proc_guid_size; /**< size of the guid */ + struct mca_ptl_tcp_addr_t *proc_addrs; /**< array of addresses published by peer */ + size_t proc_addr_count; /**< number of addresses published by peer */ + struct mca_ptl_base_peer_t **proc_peers; /**< array of peers that have been created to access this proc */ + size_t proc_peer_count; /**< number of peers */ + lam_mutex_t proc_lock; /**< lock to protect against concurrent access to proc state */ }; typedef struct mca_ptl_tcp_proc_t mca_ptl_tcp_proc_t; diff --git a/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.c b/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.c index 347b12669a..e086ddebc1 100644 --- a/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.c +++ b/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.c @@ -48,8 +48,10 @@ void mca_ptl_tcp_recv_frag_init(mca_ptl_tcp_recv_frag_t* frag, mca_ptl_base_peer { frag->frag_owner = &peer->peer_ptl->super; frag->super.frag_request = 0; + frag->super.super.frag_addr = NULL; + frag->super.super.frag_size = 0; + frag->super.frag_is_buffered = false; frag->frag_peer = peer; - frag->frag_buff = NULL; frag->frag_hdr_cnt = 0; frag->frag_msg_cnt = 0; frag->frag_ack_pending = false; @@ -140,15 +142,11 @@ static bool mca_ptl_tcp_recv_frag_match(mca_ptl_tcp_recv_frag_t* frag, int sd) /* match was not made - so allocate buffer for eager send */ if (NULL == frag->super.frag_request) { - if(frag->frag_header.hdr_frag.hdr_frag_length > 0) { - frag->frag_buff = malloc(frag->frag_header.hdr_frag.hdr_frag_length); + frag->super.super.frag_addr = malloc(frag->frag_header.hdr_frag.hdr_frag_length); frag->super.super.frag_size = frag->frag_header.hdr_frag.hdr_frag_length; + frag->super.frag_is_buffered = true; } - - /* match was made - use application buffer */ - } else { - frag->frag_buff = (unsigned char*)frag->super.super.frag_addr; } } @@ -178,7 +176,6 @@ static bool mca_ptl_tcp_recv_frag_frag(mca_ptl_tcp_recv_frag_t* frag, int sd) if(frag->frag_msg_cnt == 0) { frag->super.frag_request = frag->frag_header.hdr_frag.hdr_dst_ptr.pval; mca_ptl_base_recv_frag_init(&frag->super); - frag->frag_buff = frag->super.super.frag_addr; } /* continue to receive user data */ @@ -207,7 +204,7 @@ static bool mca_ptl_tcp_recv_frag_data(mca_ptl_tcp_recv_frag_t* frag, int sd) { int cnt = -1; while(cnt < 0) { - cnt = recv(sd, (unsigned char*)frag->frag_buff+frag->frag_msg_cnt, + cnt = recv(sd, (unsigned char*)frag->super.super.frag_addr+frag->frag_msg_cnt, frag->super.super.frag_size-frag->frag_msg_cnt, 0); if(cnt == 0) { mca_ptl_tcp_peer_close(frag->frag_peer); diff --git a/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.h b/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.h index 7c9c7569c3..94eb536329 100644 --- a/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.h +++ b/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.h @@ -1,7 +1,9 @@ -/* @file - * +/* * $HEADER$ */ +/** + * @file + */ #ifndef MCA_PTL_TCP_RECV_FRAG_H #define MCA_PTL_TCP_RECV_FRAG_H @@ -18,13 +20,14 @@ extern lam_class_t mca_ptl_tcp_recv_frag_t_class; +/** + * TCP received fragment derived type. + */ struct mca_ptl_tcp_recv_frag_t { - mca_ptl_base_recv_frag_t super; - mca_ptl_base_ack_header_t frag_ack; - unsigned char* frag_buff; - size_t frag_hdr_cnt; - size_t frag_msg_cnt; - bool frag_ack_pending; + mca_ptl_base_recv_frag_t super; /**< base receive fragment descriptor */ + size_t frag_hdr_cnt; /**< number of header bytes received */ + size_t frag_msg_cnt; /**< number of msg bytes received */ + bool frag_ack_pending; /**< is an ack pending for this fragment */ }; typedef struct mca_ptl_tcp_recv_frag_t mca_ptl_tcp_recv_frag_t; @@ -42,8 +45,11 @@ bool mca_ptl_tcp_recv_frag_send_ack(mca_ptl_tcp_recv_frag_t* frag); static inline void mca_ptl_tcp_recv_frag_progress(mca_ptl_tcp_recv_frag_t* frag) { if(frag->frag_msg_cnt >= frag->super.super.frag_header.hdr_frag.hdr_frag_length) { - if(frag->frag_buff != frag->super.super.frag_addr) { - memcpy(frag->super.super.frag_addr, frag->frag_buff, frag->super.super.frag_size); + if(frag->super.frag_is_buffered) { + struct iovec iov; + iov.iov_base = frag->super.super.frag_addr; + iov.iov_len = frag->super.super.frag_size; + lam_convertor_unpack(&frag->super.super.frag_convertor, &iov, 1); } frag->super.super.frag_owner->ptl_recv_progress(frag->super.frag_request, &frag->super); if(frag->frag_ack_pending == false) { diff --git a/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.c b/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.c index 2cc8f17318..2a7dfcdf24 100644 --- a/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.c +++ b/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.c @@ -5,14 +5,17 @@ #include #include #include "types.h" +#include "datatype/datatype.h" #include "mca/ptl/base/ptl_base_sendreq.h" #include "ptl_tcp.h" #include "ptl_tcp_peer.h" +#include "ptl_tcp_proc.h" #include "ptl_tcp_sendfrag.h" -#define frag_header super.super.frag_header -#define frag_owner super.super.frag_owner -#define frag_peer super.super.frag_peer +#define frag_header super.super.frag_header +#define frag_owner super.super.frag_owner +#define frag_peer super.super.frag_peer +#define frag_convertor super.super.frag_convertor static void mca_ptl_tcp_send_frag_construct(mca_ptl_tcp_send_frag_t* frag); @@ -42,7 +45,7 @@ static void mca_ptl_tcp_send_frag_destruct(mca_ptl_tcp_send_frag_t* frag) * data buffer, and the indicated size. */ -void mca_ptl_tcp_send_frag_init( +int mca_ptl_tcp_send_frag_init( mca_ptl_tcp_send_frag_t* sendfrag, mca_ptl_base_peer_t* ptl_peer, mca_ptl_base_send_request_t* sendreq, @@ -63,7 +66,7 @@ void mca_ptl_tcp_send_frag_init( hdr->hdr_match.hdr_src = sendreq->super.req_comm->c_my_rank; hdr->hdr_match.hdr_dst = sendreq->super.req_peer; hdr->hdr_match.hdr_tag = sendreq->super.req_tag; - hdr->hdr_match.hdr_msg_length = sendreq->super.req_length; + hdr->hdr_match.hdr_msg_length = sendreq->req_packed_size; hdr->hdr_match.hdr_msg_seq = sendreq->req_msg_seq; } else { hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG; @@ -75,29 +78,61 @@ void mca_ptl_tcp_send_frag_init( hdr->hdr_frag.hdr_dst_ptr = sendreq->req_peer_request; } - /* update request */ - if(sendreq->req_offset + size > sendreq->super.req_length) - size = sendreq->super.req_length - sendreq->req_offset; + /* initialize convertor */ + if(size > 0) { + lam_convertor_t *convertor; + int rc; + + /* first fragment (eager send) and first fragment of long protocol + * can use the convertor initialized on the request, remaining fragments + * must copy/reinit the convertor as the transfer could be in parallel. + */ + if(sendreq->req_frags < 2) { + convertor = &sendreq->req_convertor; + } else { + + convertor = &sendfrag->frag_convertor; + if((rc = lam_convertor_copy(&sendreq->req_convertor, convertor)) != LAM_SUCCESS) + return rc; + + if((rc = lam_convertor_init_for_send( + convertor, + 0, + sendreq->super.req_datatype, + sendreq->super.req_count, + sendreq->super.req_addr, + sendreq->req_offset)) != LAM_SUCCESS) + return rc; + } + + /* if data is contigous convertor will return an offset + * into users buffer - otherwise will return an allocated buffer + * that holds the packed data + */ + sendfrag->frag_vec[1].iov_base = NULL; + sendfrag->frag_vec[1].iov_len = size; + if((rc = lam_convertor_pack(convertor, &sendfrag->frag_vec[1], 1)) != LAM_SUCCESS) + return rc; + + /* adjust size and request offset to reflect actual number of bytes packed by convertor */ + size = sendfrag->frag_vec[1].iov_len; + sendreq->req_offset += size; + } hdr->hdr_frag.hdr_frag_length = size; - sendreq->req_offset += size; sendreq->req_frags++; /* fragment state */ sendfrag->frag_owner = &ptl_peer->peer_ptl->super; sendfrag->super.frag_request = sendreq; - sendfrag->super.super.frag_addr = ((char*) sendreq->super.req_addr) + hdr->hdr_frag.hdr_frag_offset; + sendfrag->super.super.frag_addr = sendfrag->frag_vec[1].iov_base; sendfrag->super.super.frag_size = size; sendfrag->frag_peer = ptl_peer; sendfrag->frag_vec_ptr = sendfrag->frag_vec; + sendfrag->frag_vec_cnt = (size == 0) ? 1 : 2; sendfrag->frag_vec[0].iov_base = (lam_iov_base_ptr_t)hdr; sendfrag->frag_vec[0].iov_len = sizeof(mca_ptl_base_header_t); - sendfrag->frag_vec_cnt = 1; - if(size > 0) { - sendfrag->frag_vec[1].iov_base = (lam_iov_base_ptr_t)sendfrag->super.super.frag_addr; - sendfrag->frag_vec[1].iov_len = sendfrag->super.super.frag_size; - sendfrag->frag_vec_cnt++; - } + return LAM_SUCCESS; } diff --git a/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.h b/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.h index 275fa8592d..0514fb45cd 100644 --- a/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.h +++ b/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.h @@ -1,8 +1,9 @@ -/* @file - * +/* * $HEADER$ */ - +/** + * @file + */ #ifndef MCA_PTL_TCP_SEND_FRAG_H #define MCA_PTL_TCP_SEND_FRAG_H @@ -19,11 +20,14 @@ extern lam_class_t mca_ptl_tcp_send_frag_t_class; struct mca_ptl_base_peer_t; +/** + * TCP send fragment derived type. + */ struct mca_ptl_tcp_send_frag_t { - mca_ptl_base_send_frag_t super; - struct iovec *frag_vec_ptr; - size_t frag_vec_cnt; - struct iovec frag_vec[2]; + mca_ptl_base_send_frag_t super; /**< base send fragment descriptor */ + struct iovec *frag_vec_ptr; /**< pointer into iovec array */ + size_t frag_vec_cnt; /**< number of iovec structs left to process */ + struct iovec frag_vec[2]; /**< array of iovecs for send */ }; typedef struct mca_ptl_tcp_send_frag_t mca_ptl_tcp_send_frag_t; @@ -36,7 +40,8 @@ static inline mca_ptl_tcp_send_frag_t* mca_ptl_tcp_send_frag_alloc(int* rc) bool mca_ptl_tcp_send_frag_handler(mca_ptl_tcp_send_frag_t*, int sd); -void mca_ptl_tcp_send_frag_init( + +int mca_ptl_tcp_send_frag_init( mca_ptl_tcp_send_frag_t*, struct mca_ptl_base_peer_t*, struct mca_ptl_base_send_request_t*, diff --git a/src/mca/ptl/tcp/src/ptl_tcp_sendreq.h b/src/mca/ptl/tcp/src/ptl_tcp_sendreq.h index b2bc15be78..e4216383be 100644 --- a/src/mca/ptl/tcp/src/ptl_tcp_sendreq.h +++ b/src/mca/ptl/tcp/src/ptl_tcp_sendreq.h @@ -1,7 +1,9 @@ -/* @file - * +/* * $HEADER$ */ +/** + * @file + */ #ifndef MCA_PTL_TCP_SEND_REQUEST_H #define MCA_PTL_TCP_SEND_REQUEST_H @@ -13,9 +15,14 @@ #include "mca/ptl/base/ptl_base_sendreq.h" #include "ptl_tcp_sendfrag.h" - extern lam_class_t mca_ptl_tcp_send_request_t_class; +/** + * TCP send request derived type. The send request contains both the + * base send request, and space for the first TCP send fragment descriptor. + * This avoids the overhead of a second allocation for the initial send + * fragment on every send request. + */ struct mca_ptl_tcp_send_request_t { mca_ptl_base_send_request_t super; mca_ptl_tcp_send_frag_t req_frag; /* first fragment */ diff --git a/src/mpi/c/bsend.c b/src/mpi/c/bsend.c index aaf74d07d6..e4cf4e31df 100644 --- a/src/mpi/c/bsend.c +++ b/src/mpi/c/bsend.c @@ -17,20 +17,19 @@ int MPI_Bsend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm) { + int rc; if (dest == MPI_PROC_NULL) { return MPI_SUCCESS; } - + if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { @@ -38,11 +37,10 @@ int MPI_Bsend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Co } else if (lam_comm_peer_invalid(comm, dest)) { rc = MPI_ERR_RANK; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Bsend"); } - - return mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_BUFFERED, comm); + + rc = mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_BUFFERED, comm); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Bsend"); } diff --git a/src/mpi/c/bsend_init.c b/src/mpi/c/bsend_init.c index 0374a51184..f806c5a1e8 100644 --- a/src/mpi/c/bsend_init.c +++ b/src/mpi/c/bsend_init.c @@ -16,20 +16,19 @@ int MPI_Bsend_init(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm, MPI_Request *request) { + int rc; if (dest == MPI_PROC_NULL) { return MPI_SUCCESS; } - + if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { @@ -39,11 +38,10 @@ int MPI_Bsend_init(void *buf, int count, MPI_Datatype type, } else if (request == NULL) { rc = MPI_ERR_REQUEST; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Bsend_init"); } - - return mca_pml.pml_isend_init(buf, count, type, dest, tag, MCA_PML_BASE_SEND_BUFFERED, comm, request);; + + rc = mca_pml.pml_isend_init(buf,count,type,dest,tag,MCA_PML_BASE_SEND_BUFFERED,comm,request); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Bsend_init"); } diff --git a/src/mpi/c/ibsend.c b/src/mpi/c/ibsend.c index b1756e2e76..befb51522b 100644 --- a/src/mpi/c/ibsend.c +++ b/src/mpi/c/ibsend.c @@ -17,20 +17,19 @@ int MPI_Ibsend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm, MPI_Request *request) { + int rc; if (dest == MPI_PROC_NULL) { return MPI_SUCCESS; } if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { @@ -40,11 +39,10 @@ int MPI_Ibsend(void *buf, int count, MPI_Datatype type, int dest, } else if (request == NULL) { rc = MPI_ERR_REQUEST; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Ibsend"); } - return mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_BUFFERED,comm,request); + rc = mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_BUFFERED,comm,request); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Ibsend"); } diff --git a/src/mpi/c/irecv.c b/src/mpi/c/irecv.c index 5aec85c8de..37cbc45f76 100644 --- a/src/mpi/c/irecv.c +++ b/src/mpi/c/irecv.c @@ -17,32 +17,32 @@ int MPI_Irecv(void *buf, int count, MPI_Datatype type, int source, int tag, MPI_Comm comm, MPI_Request *request) { + int rc; if (source == MPI_PROC_NULL) { return mca_pml.pml_null(request); } if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { rc = MPI_ERR_COMM; - } else if (source != MPI_ANY_SOURCE && source != MPI_PROC_NULL && lam_comm_peer_invalid(comm, source)) { + } else if (source != MPI_ANY_SOURCE && + source != MPI_PROC_NULL && + lam_comm_peer_invalid(comm, source)) { rc = MPI_ERR_RANK; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Irecv"); } - return mca_pml.pml_irecv(buf,count,type,source,tag,comm,request); + rc = mca_pml.pml_irecv(buf,count,type,source,tag,comm,request); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Irecv"); } diff --git a/src/mpi/c/irsend.c b/src/mpi/c/irsend.c index 0426d171bd..f518ed2fb5 100644 --- a/src/mpi/c/irsend.c +++ b/src/mpi/c/irsend.c @@ -17,20 +17,19 @@ int MPI_Irsend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm, MPI_Request *request) { + int rc; if (dest == MPI_PROC_NULL) { return MPI_SUCCESS; } if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { @@ -40,11 +39,10 @@ int MPI_Irsend(void *buf, int count, MPI_Datatype type, int dest, } else if (request == NULL) { rc = MPI_ERR_REQUEST; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Irsend"); } - return mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_READY,comm,request); + rc = mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_READY,comm,request); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Irsend"); } diff --git a/src/mpi/c/isend.c b/src/mpi/c/isend.c index 858242e85b..614c07e9c7 100644 --- a/src/mpi/c/isend.c +++ b/src/mpi/c/isend.c @@ -17,20 +17,19 @@ int MPI_Isend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm, MPI_Request *request) { + int rc; if (dest == MPI_PROC_NULL) { return MPI_SUCCESS; } if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { @@ -40,11 +39,10 @@ int MPI_Isend(void *buf, int count, MPI_Datatype type, int dest, } else if (request == NULL) { rc = MPI_ERR_REQUEST; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Isend"); } - return mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_STANDARD,comm,request); + rc = mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_STANDARD,comm,request); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Isend"); } diff --git a/src/mpi/c/issend.c b/src/mpi/c/issend.c index 4e20a8ecea..a60585d7a9 100644 --- a/src/mpi/c/issend.c +++ b/src/mpi/c/issend.c @@ -5,6 +5,7 @@ #include "lam_config.h" #include "mpi.h" +#include "errhandler/errhandler.h" #include "runtime/runtime.h" #include "mpi/c/bindings.h" #include "mca/pml/pml.h" @@ -17,20 +18,19 @@ int MPI_Issend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm, MPI_Request *request) { + int rc; if (dest == MPI_PROC_NULL) { return MPI_SUCCESS; } if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { @@ -40,11 +40,10 @@ int MPI_Issend(void *buf, int count, MPI_Datatype type, int dest, } else if (request == NULL) { rc = MPI_ERR_REQUEST; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Issend"); } - return mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_SYNCHRONOUS,comm,request); + rc = mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_SYNCHRONOUS,comm,request); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Issend"); } diff --git a/src/mpi/c/recv.c b/src/mpi/c/recv.c index 143be7faaf..4e83c2a95c 100644 --- a/src/mpi/c/recv.c +++ b/src/mpi/c/recv.c @@ -17,6 +17,7 @@ int MPI_Recv(void *buf, int count, MPI_Datatype type, int source, int tag, MPI_Comm comm, MPI_Status *status) { + int rc; if (source == MPI_PROC_NULL) { if (status) { status->MPI_SOURCE = MPI_PROC_NULL; @@ -28,15 +29,13 @@ int MPI_Recv(void *buf, int count, MPI_Datatype type, int source, } if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { @@ -44,10 +43,10 @@ int MPI_Recv(void *buf, int count, MPI_Datatype type, int source, } else if (source != MPI_ANY_SOURCE && lam_comm_peer_invalid(comm, source)) { rc = MPI_ERR_RANK; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Recv"); } - return mca_pml.pml_recv(buf, count, type, source, tag, comm, status); + rc = mca_pml.pml_recv(buf, count, type, source, tag, comm, status); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Recv"); } + diff --git a/src/mpi/c/recv_init.c b/src/mpi/c/recv_init.c index 40676d097b..b0384b04f7 100644 --- a/src/mpi/c/recv_init.c +++ b/src/mpi/c/recv_init.c @@ -17,31 +17,32 @@ int MPI_Recv_init(void *buf, int count, MPI_Datatype type, int source, int tag, MPI_Comm comm, MPI_Request *request) { + int rc; if (source == MPI_PROC_NULL) { return mca_pml.pml_null(request); } - + if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { rc = MPI_ERR_COMM; - } else if (source != MPI_ANY_SOURCE && source != MPI_PROC_NULL && lam_comm_peer_invalid(comm, source)) { + } else if (source != MPI_ANY_SOURCE && + source != MPI_PROC_NULL && + lam_comm_peer_invalid(comm, source)) { rc = MPI_ERR_RANK; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Recv_init"); } - return mca_pml.pml_irecv_init(buf,count,type,source,tag,comm,request); + + rc = mca_pml.pml_irecv_init(buf,count,type,source,tag,comm,request); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Recv_init"); } diff --git a/src/mpi/c/rsend.c b/src/mpi/c/rsend.c index fa549b0ab8..8915c67a16 100644 --- a/src/mpi/c/rsend.c +++ b/src/mpi/c/rsend.c @@ -16,20 +16,19 @@ int MPI_Rsend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm) { + int rc; if (dest == MPI_PROC_NULL) { return MPI_SUCCESS; } if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { @@ -37,10 +36,10 @@ int MPI_Rsend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Co } else if (lam_comm_peer_invalid(comm, dest)) { rc = MPI_ERR_RANK; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Rsend"); } - return mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_READY, comm); + rc = mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_READY, comm); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Rsend"); } + diff --git a/src/mpi/c/rsend_init.c b/src/mpi/c/rsend_init.c index 59fb86fc4c..db863b1c42 100644 --- a/src/mpi/c/rsend_init.c +++ b/src/mpi/c/rsend_init.c @@ -18,20 +18,19 @@ int MPI_Rsend_init(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm, MPI_Request *request) { + int rc; if (dest == MPI_PROC_NULL) { return MPI_SUCCESS; } if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { @@ -41,11 +40,10 @@ int MPI_Rsend_init(void *buf, int count, MPI_Datatype type, } else if (request == NULL) { rc = MPI_ERR_REQUEST; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Rsend_init"); } - return mca_pml.pml_isend_init(buf, count, type, dest, tag, MCA_PML_BASE_SEND_READY, comm, request);; + rc = mca_pml.pml_isend_init(buf,count,type,dest,tag,MCA_PML_BASE_SEND_READY,comm,request); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Rsend_init"); } diff --git a/src/mpi/c/send.c b/src/mpi/c/send.c index 0aa887d4c6..448cd35386 100644 --- a/src/mpi/c/send.c +++ b/src/mpi/c/send.c @@ -17,20 +17,19 @@ int MPI_Send(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm) { + int rc; if (dest == MPI_PROC_NULL) { return MPI_SUCCESS; } if ( MPI_PARAM_CHECK ) { int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { @@ -38,11 +37,10 @@ int MPI_Send(void *buf, int count, MPI_Datatype type, int dest, } else if (lam_comm_peer_invalid(comm, dest)) { rc = MPI_ERR_RANK; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Send"); } - return mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm); + rc = mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Send"); } diff --git a/src/mpi/c/send_init.c b/src/mpi/c/send_init.c index 62bb700cc9..09818d66c9 100644 --- a/src/mpi/c/send_init.c +++ b/src/mpi/c/send_init.c @@ -18,20 +18,19 @@ int MPI_Send_init(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm, MPI_Request *request) { + int rc; if (dest == MPI_PROC_NULL) { return MPI_SUCCESS; } if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { @@ -41,11 +40,10 @@ int MPI_Send_init(void *buf, int count, MPI_Datatype type, } else if (request == NULL) { rc = MPI_ERR_REQUEST; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Send_init"); } - return mca_pml.pml_isend_init(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm, request);; + rc = mca_pml.pml_isend_init(buf,count,type,dest,tag,MCA_PML_BASE_SEND_STANDARD,comm,request); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Send_init"); } diff --git a/src/mpi/c/sendrecv.c b/src/mpi/c/sendrecv.c index 3b6b4d5160..798a24600e 100644 --- a/src/mpi/c/sendrecv.c +++ b/src/mpi/c/sendrecv.c @@ -5,7 +5,10 @@ #include #include "mpi.h" +#include "runtime/runtime.h" #include "mpi/c/bindings.h" +#include "mca/pml/pml.h" + #if LAM_HAVE_WEAK_SYMBOLS && LAM_PROFILING_DEFINES #pragma weak MPI_Sendrecv = PMPI_Sendrecv @@ -14,6 +17,60 @@ int MPI_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype recvtype, int dest, int sendtag, void *recvbuf, int recvcount, MPI_Datatype sendtype, int source, int recvtag, - MPI_Comm comm, MPI_Status *status) { - return MPI_SUCCESS; + MPI_Comm comm, MPI_Status *status) +{ + lam_request_t* req; + int rc; + + if ( MPI_PARAM_CHECK ) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { + rc = MPI_ERR_INTERN; + } else if (lam_comm_invalid(comm)) { + rc = MPI_ERR_COMM; + } else if (sendcount < 0) { + rc = MPI_ERR_COUNT; + } else if (sendtype == MPI_DATATYPE_NULL) { + rc = MPI_ERR_TYPE; + } else if (lam_comm_peer_invalid(comm, dest)) { + rc = MPI_ERR_RANK; + } else if (sendtag < 0 || sendtag > MPI_TAG_UB_VALUE) { + rc = MPI_ERR_TAG; + } else if (recvcount < 0) { + rc = MPI_ERR_COUNT; + } else if (recvtype == MPI_DATATYPE_NULL) { + rc = MPI_ERR_TYPE; + } else if (source != MPI_PROC_NULL && source != MPI_ANY_SOURCE && lam_comm_peer_invalid(comm, source)) { + rc = MPI_ERR_RANK; + } else if (recvtag < MPI_ANY_TAG || recvtag > MPI_TAG_UB_VALUE) { + rc = MPI_ERR_TAG; + } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Sendrecv"); + } + + if (source != MPI_PROC_NULL) { /* post recv */ + rc = mca_pml.pml_irecv(recvbuf, recvcount, recvtype, + source, recvtag, comm, &req); + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Sendrecv"); + } + + if (dest != MPI_PROC_NULL) { /* send */ + rc = mca_pml.pml_send(sendbuf, sendcount, sendtype, dest, + sendtag, MCA_PML_BASE_SEND_STANDARD, comm); + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Sendrecv"); + } + + if (source != MPI_PROC_NULL) { /* wait for recv */ + + rc = mca_pml.pml_wait(1, &req, NULL, status); + + } else { + + status->MPI_ERROR = MPI_SUCCESS; + status->MPI_SOURCE = MPI_PROC_NULL; + status->MPI_TAG = MPI_ANY_TAG; + status->_count = 0; + rc = MPI_SUCCESS; + } + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Sendrecv"); } diff --git a/src/mpi/c/sendrecv_replace.c b/src/mpi/c/sendrecv_replace.c index e3268788ad..f9895c4d28 100644 --- a/src/mpi/c/sendrecv_replace.c +++ b/src/mpi/c/sendrecv_replace.c @@ -13,6 +13,7 @@ int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, int dest, int sendtag, int source, int recvtag, - MPI_Comm comm, MPI_Status *status) { + MPI_Comm comm, MPI_Status *status) +{ return MPI_SUCCESS; } diff --git a/src/mpi/c/ssend.c b/src/mpi/c/ssend.c index d3cd42f575..29dbda4e06 100644 --- a/src/mpi/c/ssend.c +++ b/src/mpi/c/ssend.c @@ -16,20 +16,19 @@ int MPI_Ssend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm) { + int rc; if (dest == MPI_PROC_NULL) { return MPI_SUCCESS; } if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { @@ -37,11 +36,10 @@ int MPI_Ssend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Co } else if (lam_comm_peer_invalid(comm, dest)) { rc = MPI_ERR_RANK; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Ssend"); } - return mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_SYNCHRONOUS, comm); + rc = mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_SYNCHRONOUS, comm); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Ssend"); } diff --git a/src/mpi/c/ssend_init.c b/src/mpi/c/ssend_init.c index 08f2386a41..684a6bbc8a 100644 --- a/src/mpi/c/ssend_init.c +++ b/src/mpi/c/ssend_init.c @@ -18,20 +18,19 @@ int MPI_Ssend_init(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm, MPI_Request *request) { + int rc; if (dest == MPI_PROC_NULL) { return MPI_SUCCESS; } if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (count < 0) { rc = MPI_ERR_COUNT; -#if 0 } else if (type == MPI_DATATYPE_NULL) { rc = MPI_ERR_TYPE; -#endif } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) { rc = MPI_ERR_TAG; } else if (lam_comm_invalid(comm)) { @@ -41,10 +40,10 @@ int MPI_Ssend_init(void *buf, int count, MPI_Datatype type, } else if (request == NULL) { rc = MPI_ERR_REQUEST; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Ssend_init"); } - return mca_pml.pml_isend_init(buf, count, type, dest, tag, MCA_PML_BASE_SEND_SYNCHRONOUS, comm, request);; + rc = mca_pml.pml_isend_init(buf,count,type,dest,tag,MCA_PML_BASE_SEND_SYNCHRONOUS,comm,request); + LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Ssend_init"); } + diff --git a/src/mpi/c/start.c b/src/mpi/c/start.c index 14ac3db83f..1b68185dce 100644 --- a/src/mpi/c/start.c +++ b/src/mpi/c/start.c @@ -27,6 +27,6 @@ int MPI_Start(MPI_Request *request) return rc; } } - return mca_pml.pml_start(request); + return mca_pml.pml_start(1, request); } diff --git a/src/mpi/c/startall.c b/src/mpi/c/startall.c index 4f06e2a06b..f3b646bff8 100644 --- a/src/mpi/c/startall.c +++ b/src/mpi/c/startall.c @@ -5,12 +5,29 @@ #include #include "mpi.h" +#include "runtime/runtime.h" #include "mpi/c/bindings.h" +#include "mca/pml/pml.h" + #if LAM_HAVE_WEAK_SYMBOLS && LAM_PROFILING_DEFINES #pragma weak MPI_Startall = PMPI_Startall #endif -int MPI_Startall(int count, MPI_Request *array_of_requests) { - return MPI_SUCCESS; + +int MPI_Startall(int count, MPI_Request *requests) +{ + if ( MPI_PARAM_CHECK ) { + int rc = MPI_SUCCESS; + if (lam_mpi_finalized) { + rc = MPI_ERR_INTERN; + } else if (requests == NULL) { + rc = MPI_ERR_REQUEST; + } + if (rc != MPI_SUCCESS) { + return rc; + } + } + return mca_pml.pml_start(count, requests); } + diff --git a/src/mpi/c/test.c b/src/mpi/c/test.c index 47a881b35b..30ed0609bf 100644 --- a/src/mpi/c/test.c +++ b/src/mpi/c/test.c @@ -16,18 +16,17 @@ int MPI_Test(MPI_Request *request, int *completed, MPI_Status *status) { + int rc, index; if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if ( lam_mpi_finalized ) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (request == NULL) { rc = MPI_ERR_REQUEST; } else if (completed == NULL) { rc = MPI_ERR_ARG; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Test"); } if(*request == NULL) { @@ -38,6 +37,9 @@ int MPI_Test(MPI_Request *request, int *completed, MPI_Status *status) status->_count = 0; return MPI_SUCCESS; } - return mca_pml.pml_test(request,completed,status); + rc = mca_pml.pml_test(1, request, index, completed, status); + if(completed < 0) + completed = 0; + LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Test"); } diff --git a/src/mpi/c/testall.c b/src/mpi/c/testall.c index 536149432e..1c605d3a8e 100644 --- a/src/mpi/c/testall.c +++ b/src/mpi/c/testall.c @@ -12,7 +12,7 @@ #endif int MPI_Testall(int count, MPI_Request array_of_requests[], int *flag, - MPI_Status array_of_statuses[]) { - + MPI_Status array_of_statuses[]) +{ return MPI_SUCCESS; } diff --git a/src/mpi/c/testany.c b/src/mpi/c/testany.c index 1351491f55..43613bde66 100644 --- a/src/mpi/c/testany.c +++ b/src/mpi/c/testany.c @@ -5,13 +5,30 @@ #include #include "mpi.h" +#include "runtime/runtime.h" #include "mpi/c/bindings.h" +#include "mca/pml/pml.h" #if LAM_HAVE_WEAK_SYMBOLS && LAM_PROFILING_DEFINES #pragma weak MPI_Testany = PMPI_Testany #endif -int MPI_Testany(int count, MPI_Request array_of_requests[], int *index, - MPI_Status *status) { - return MPI_SUCCESS; +int MPI_Testany(int count, MPI_Request requests[], int *index, int *completed, MPI_Status *status) +{ + int rc; + if ( MPI_PARAM_CHECK ) { + int rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { + rc = MPI_ERR_INTERN; + } else if (NULL == requests) { + rc = MPI_ERR_REQUEST; + } else if (NULL == index) { + rc = MPI_ERR_ARG; + } + LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Testany"); + } + + rc = mca_pml.pml_test(count, requests, index, completed, status); + LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Testany"); } + diff --git a/src/mpi/c/testsome.c b/src/mpi/c/testsome.c index 90911368d9..b84cc232ae 100644 --- a/src/mpi/c/testsome.c +++ b/src/mpi/c/testsome.c @@ -5,14 +5,40 @@ #include #include "mpi.h" +#include "runtime/runtime.h" #include "mpi/c/bindings.h" +#include "mca/pml/pml.h" #if LAM_HAVE_WEAK_SYMBOLS && LAM_PROFILING_DEFINES #pragma weak MPI_Testsome = PMPI_Testsome #endif -int MPI_Testsome(int incount, MPI_Request array_of_requests[], - int *outcount, int array_of_indices, - MPI_Status array_of_statuses) { +int MPI_Testsome(int incount, MPI_Request requests[], + int *outcount, int indices[], + MPI_Status statuses[]) +{ + int rc, index, completed; + if ( MPI_PARAM_CHECK ) { + int rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { + rc = MPI_ERR_INTERN; + } else if (NULL == requests) { + rc = MPI_ERR_REQUEST; + } else if (NULL == indices) { + rc = MPI_ERR_ARG; + } + LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Testsome"); + } + + /* optimize this in the future */ + rc = mca_pml.pml_test(incount, requests, &index, &completed, statuses); + LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Testsome"); + if(completed) { + *outcount = 1; + indices[0] = index; + } else { + *outcount = 0; + } return MPI_SUCCESS; } + diff --git a/src/mpi/c/topo_test.c b/src/mpi/c/topo_test.c index 7f51ac47ed..623fc7430a 100644 --- a/src/mpi/c/topo_test.c +++ b/src/mpi/c/topo_test.c @@ -11,6 +11,7 @@ #pragma weak MPI_Topo_test = PMPI_Topo_test #endif -int MPI_Topo_test(MPI_Comm comm, int *status) { +int MPI_Topo_test(MPI_Comm comm, int *status) +{ return MPI_SUCCESS; } diff --git a/src/mpi/c/wait.c b/src/mpi/c/wait.c index bc1da906b5..4ded5841b0 100644 --- a/src/mpi/c/wait.c +++ b/src/mpi/c/wait.c @@ -15,26 +15,27 @@ int MPI_Wait(MPI_Request *request, MPI_Status *status) { - int index; + int index, rc; if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (request == NULL) { rc = MPI_ERR_REQUEST; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Wait"); } - if (*request == NULL) { - status->MPI_SOURCE = MPI_PROC_NULL; - status->MPI_TAG = MPI_ANY_TAG; - status->MPI_ERROR = MPI_SUCCESS; - status->_count = 0; + if (NULL == *request) { + if (NULL != status) { + status->MPI_SOURCE = MPI_PROC_NULL; + status->MPI_TAG = MPI_ANY_TAG; + status->MPI_ERROR = MPI_SUCCESS; + status->_count = 0; + } return MPI_SUCCESS; } - return mca_pml.pml_wait(1, request, &index, status); + rc = mca_pml.pml_wait(1, request, &index, status); + LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Wait"); } diff --git a/src/mpi/c/waitall.c b/src/mpi/c/waitall.c index d14c7da31f..0e397109f6 100644 --- a/src/mpi/c/waitall.c +++ b/src/mpi/c/waitall.c @@ -16,17 +16,17 @@ int MPI_Waitall(int count, MPI_Request *requests, MPI_Status *statuses) { + int rc; if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (requests == NULL) { rc = MPI_ERR_REQUEST; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitall"); } - return mca_pml.pml_wait_all(count, requests, statuses); + rc = mca_pml.pml_wait_all(count, requests, statuses); + LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitall"); } diff --git a/src/mpi/c/waitany.c b/src/mpi/c/waitany.c index f01473ea19..89d331dc5c 100644 --- a/src/mpi/c/waitany.c +++ b/src/mpi/c/waitany.c @@ -15,17 +15,17 @@ int MPI_Waitany(int count, MPI_Request *requests, int *index, MPI_Status *status) { + int rc; if ( MPI_PARAM_CHECK ) { - int rc = MPI_SUCCESS; - if (lam_mpi_finalized) { + rc = MPI_SUCCESS; + if ( LAM_MPI_INVALID_STATE ) { rc = MPI_ERR_INTERN; } else if (requests == NULL) { rc = MPI_ERR_REQUEST; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitany"); } - return mca_pml.pml_wait(count, requests, index, status); + rc = mca_pml.pml_wait(count, requests, index, status); + LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitany"); } diff --git a/src/mpi/c/waitsome.c b/src/mpi/c/waitsome.c index 4f745765ed..81cebdfbdc 100644 --- a/src/mpi/c/waitsome.c +++ b/src/mpi/c/waitsome.c @@ -29,14 +29,12 @@ int MPI_Waitsome(int incount, MPI_Request *requests, } else if (requests == NULL) { rc = MPI_ERR_REQUEST; } - if (rc != MPI_SUCCESS) { - return rc; - } + LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitsome"); } /* optimize this in the future */ - if((rc = mca_pml.pml_wait(incount, requests, &index, statuses)) != LAM_SUCCESS) - return rc; + rc = mca_pml.pml_wait(incount, requests, &index, statuses); + LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitsome"); *outcount = 1; indices[0] = index; return MPI_SUCCESS; diff --git a/src/proc/proc.c b/src/proc/proc.c index 243773eab6..8d65da25c6 100644 --- a/src/proc/proc.c +++ b/src/proc/proc.c @@ -32,6 +32,10 @@ void lam_proc_construct(lam_proc_t* proc) proc->proc_vpid = 0; proc->proc_pml = NULL; proc->proc_modex = NULL; + proc->proc_arch = 0; + + /* FIX - need to determine remote process architecture */ + proc->proc_convertor = lam_convertor_create(0, 0); THREAD_LOCK(&lam_proc_lock); lam_list_append(&lam_proc_list, (lam_list_item_t*)proc); diff --git a/src/proc/proc.h b/src/proc/proc.h index ccf28c9833..cc8ca89bf9 100644 --- a/src/proc/proc.h +++ b/src/proc/proc.h @@ -7,6 +7,7 @@ #include "include/types.h" #include "lfc/lam_list.h" +#include "datatype/datatype.h" extern lam_class_t lam_proc_t_class; @@ -18,6 +19,8 @@ struct lam_proc_t { uint32_t proc_vpid; /* process identifier w/in the job */ struct mca_pml_proc_t* proc_pml; /* PML specific proc data */ struct mca_base_modex_t* proc_modex; /* MCA module exchange data */ + int proc_arch; + lam_convertor_t* proc_convertor; /* JMS: need to have the following information: diff --git a/src/runtime/lam_mpi_init.c b/src/runtime/lam_mpi_init.c index ecae0c65d6..6aac602e91 100644 --- a/src/runtime/lam_mpi_init.c +++ b/src/runtime/lam_mpi_init.c @@ -142,5 +142,6 @@ int lam_mpi_init(int argc, char **argv, int requested, int *provided) /* All done */ lam_mpi_initialized = true; + lam_mpi_finalized = false; return MPI_SUCCESS; } diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index 2a4e237645..64fba01eb0 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -12,8 +12,11 @@ * Global variables and symbols for the MPI layer */ +#define LAM_MPI_INVALID_STATE (!lam_mpi_initialized || lam_mpi_finalized) + extern bool lam_mpi_initialized; extern bool lam_mpi_finalized; +extern bool lam_mpi_invalid_state; extern bool lam_mpi_thread_multiple; extern int lam_mpi_thread_requested;