1
1

- initial integration with datatypes

- p2p mpi i/f functions
- adding doxygen comments

This commit was SVN r976.
Этот коммит содержится в:
Tim Woodall 2004-03-26 14:15:20 +00:00
родитель 7c19f705eb
Коммит c1ee4fec23
79 изменённых файлов: 1779 добавлений и 809 удалений

Просмотреть файл

@ -1271,13 +1271,12 @@ extern "C" {
int count);
int PMPI_Testall(int count, MPI_Request array_of_requests[], int *flag,
MPI_Status array_of_statuses[]);
int PMPI_Testany(int count, MPI_Request array_of_requests[], int *index,
MPI_Status *status);
int PMPI_Testany(int count, MPI_Request array_of_requests[], int *index, int *flag, MPI_Status *status);
int PMPI_Test(MPI_Request *request, int *flag, MPI_Status *status);
int PMPI_Test_cancelled(MPI_Status *status, int *flag);
int PMPI_Testsome(int incount, MPI_Request array_of_requests[],
int *outcount, int array_of_indices,
MPI_Status array_of_statuses);
int *outcount, int array_of_indices[],
MPI_Status array_of_statuses[]);
int PMPI_Topo_test(MPI_Comm comm, int *status);
MPI_Fint PMPI_Type_c2f(MPI_Datatype datatype);
int PMPI_Type_commit(MPI_Datatype *type);

Просмотреть файл

@ -110,7 +110,7 @@ extern lam_pointer_array_t *lam_errhandler_f_to_c_table;
* parallel invocation to LAM_ERRHANDLER_CHECK() and LAM_ERRHANDLER_RETURN().
*/
#define LAM_ERRHANDLER_INVOKE(mpi_object, err_code, message) \
lam_errhandler_invoke((mpi_object)->error_handler, (mpi_object), \
lam_errhandler_invoke((mpi_object) != NULL ? (mpi_object)->error_handler : NULL, (mpi_object), \
(err_code), (message));
/**
@ -129,7 +129,7 @@ extern lam_pointer_array_t *lam_errhandler_f_to_c_table;
*/
#define LAM_ERRHANDLER_CHECK(rc, mpi_object, err_code, message) \
if (rc != LAM_SUCCESS) { \
lam_errhandler_invoke((mpi_object)->error_handler, (mpi_object), \
lam_errhandler_invoke((mpi_object) != NULL ? (mpi_object)->error_handler : NULL, (mpi_object), \
(err_code), (message)); \
return (err_code); \
}
@ -152,7 +152,7 @@ extern lam_pointer_array_t *lam_errhandler_f_to_c_table;
*/
#define LAM_ERRHANDLER_RETURN(rc, mpi_object, err_code, message) \
if (rc != LAM_SUCCESS) { \
lam_errhandler_invoke((mpi_object)->error_handler, (mpi_object), \
lam_errhandler_invoke((mpi_object != NULL) ? (mpi_object)->error_handler : NULL, (mpi_object), \
(err_code), (message)); \
return (err_code); \
} else { \

Просмотреть файл

@ -8,7 +8,9 @@ noinst_LTLIBRARIES = libmca_pml_base.la
# For VPATH builds, have to specify where static-modules.h will be found
AM_CPPFLAGS = -I$(top_builddir)/src
AM_CPPFLAGS = \
-pedantic \
-I$(top_builddir)/src
# Source code files

Просмотреть файл

@ -5,6 +5,14 @@
#include "mca/pml/base/pml_base_request.h"
static void mca_pml_base_request_construct(mca_pml_base_request_t* req)
{
}
static void mca_pml_base_request_destruct(mca_pml_base_request_t* req)
{
}
lam_class_t mca_pml_base_request_t_class = {
"mca_pml_base_request_t",
OBJ_CLASS(lam_request_t),
@ -12,11 +20,4 @@ lam_class_t mca_pml_base_request_t_class = {
(lam_destruct_t) mca_pml_base_request_destruct
};
void mca_pml_base_request_construct(mca_pml_base_request_t* req)
{
}
void mca_pml_base_request_destruct(mca_pml_base_request_t* req)
{
}

Просмотреть файл

@ -1,8 +1,9 @@
/*
* $HEADER$
*/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/**
* @file
*/
#ifndef MCA_PML_BASE_REQUEST_H
#define MCA_PML_BASE_REQUEST_H
@ -13,7 +14,9 @@
extern lam_class_t mca_pml_base_request_t_class;
/* request type */
/**
* Type of request.
*/
typedef enum {
MCA_PML_REQUEST_NULL,
MCA_PML_REQUEST_SEND,
@ -21,40 +24,27 @@ typedef enum {
} mca_pml_base_request_type_t;
/* MPI pml (point-to-point) request */
/**
* Base type for PML P2P requests
*/
struct mca_pml_base_request_t {
/* base request */
lam_request_t super;
/* pointer to application buffer */
void *req_addr;
/* length of application buffer */
size_t req_length;
/* peer process - rank w/in this communicator */
int32_t req_peer;
/* user defined tag */
int32_t req_tag;
/* communicator pointer */
lam_communicator_t *req_comm;
/* pointer to data type */
lam_datatype_t *req_datatype;
/* MPI request type - used for test */
mca_pml_base_request_type_t req_type;
/* completion status */
lam_status_public_t req_status;
/* flag indicating if the this is a persistent request */
bool req_persistent;
/* flag indicating if MPI is done with this request */
volatile bool req_mpi_done;
/* flag indicating if the pt-2-pt layer is done with this request */
volatile bool req_pml_done;
/* flag indicating if the user has freed this request */
volatile bool req_free_called;
lam_request_t super; /**< base request */
void *req_addr; /**< pointer to application buffer */
size_t req_count; /**< count of user datatype elements */
int32_t req_peer; /**< peer process - rank w/in this communicator */
int32_t req_tag; /**< user defined tag */
lam_communicator_t *req_comm; /**< communicator pointer */
lam_proc_t* req_proc; /**< peer process */
lam_datatype_t *req_datatype; /**< pointer to data type */
mca_pml_base_request_type_t req_type; /**< MPI request type - used for test */
lam_status_public_t req_status; /**< completion status */
bool req_persistent; /**< flag indicating if the this is a persistent request */
volatile bool req_mpi_done; /**< flag indicating if MPI is done with this request */
volatile bool req_pml_done; /**< flag indicating if the pt-2-pt layer is done with this request */
volatile bool req_free_called; /**< flag indicating if the user has freed this request */
};
typedef struct mca_pml_base_request_t mca_pml_base_request_t;
void mca_pml_base_request_construct(mca_pml_base_request_t*);
void mca_pml_base_request_destruct(mca_pml_base_request_t*);
#endif

Просмотреть файл

@ -1,7 +1,48 @@
/*
* $HEADER$
*/
/**
* @file
*
* P2P Management Layer (PML)
*
* An MCA component type that provides the P2P interface functionality required
* by the MPI layer. The PML is a relatively thin layer that primarily provides
* for the fragmentation and scheduling of messages over multiple transports
* (instances of the P2P Transport Layer (PTL) MCA component type) as depicted below:
*
* ------------------------------------
* | MPI |
* ------------------------------------
* | PML |
* ------------------------------------
* | PTL (TCP) | PTL (SM) | PTL (...) |
* ------------------------------------
*
* A single PML module is selected by the MCA framework during library
* initialization. Initially, all available PMLs are loaded (potentially
* as shared libraries) and their module open and init functions called.
* The MCA framework selects the module returning the highest priority and
* closes/unloads any other PML modules that may have been opened.
*
* After the PML is selected, the MCA framework loads and initalize
* all available PTLs. The PML is notified of the selected PTLs via the
* the mca_pml_base_add_ptls_fn_t downcall from the MCA.
*
* After all of the MCA components are initialized, the MPI/RTE will make
* downcalls into the PML to provide the initial list of processes
* (lam_proc_t instances), and notification of changes (add/delete).
*
* The PML module must select the set of PTL modules that are to be used
* to reach a given destination. These should be cached on a PML specific
* data structure that is hung off the lam_proc_t.
*
* The PML should then apply a scheduling algorithm (round-robin,
* weighted distribution, etc), to schedule the delivery of messages
* over the available PTLs.
*
*/
#ifndef MCA_PML_H
#define MCA_PML_H
@ -68,27 +109,114 @@ typedef struct mca_pml_base_module_1_0_0_t mca_pml_base_module_1_0_0_t;
typedef mca_pml_base_module_1_0_0_t mca_pml_base_module_t;
/*
* PML instance interface functions and datatype
/**
* MCA management functions.
*/
typedef int (*mca_pml_base_add_comm_fn_t)(struct lam_communicator_t*);
typedef int (*mca_pml_base_del_comm_fn_t)(struct lam_communicator_t*);
typedef int (*mca_pml_base_add_procs_fn_t)(struct lam_proc_t **procs, size_t nprocs);
typedef int (*mca_pml_base_del_procs_fn_t)(struct lam_proc_t **procs, size_t nprocs);
typedef int (*mca_pml_base_add_ptls_fn_t)(lam_list_t *ptls);
typedef int (*mca_pml_base_progress_fn_t)(void);
/**
* Downcall from MPI/RTE layer when new processes are created.
*
* @param procs Array of new processes
* @param nprocs Size of process array
* @return LAM_SUCCESS or failure status.
*
* Provides a notification to the PML that new processes have been
* created, and provides the PML the opportunity to cache data
* (e.g. list of PTLs to use) on the lam_proc_t data structure.
*/
typedef int (*mca_pml_base_add_procs_fn_t)(struct lam_proc_t **procs, size_t nprocs);
/**
* Downcall from MPI/RTE layer when processes are terminated.
*
* @param procs Array of processes
* @param nprocs Size of process array
* @return LAM_SUCCESS or failure status.
*
* Provides a notification to the PML that processes have
* gone away, and provides the PML the opportunity to cleanup
* any data cached on the lam_proc_t data structure.
*/
typedef int (*mca_pml_base_del_procs_fn_t)(struct lam_proc_t **procs, size_t nprocs);
/**
* Downcall from MCA layer after all PTLs have been loaded/selected.
*
* @param ptls List of selected PTLs
* @return LAM_SUCCESS or failure status.
*
* Provides a notification to the PML that processes have
* gone away, and provides the PML the opportunity to cleanup
* any data cached on the lam_proc_t data structure.
*/
typedef int (*mca_pml_base_add_ptls_fn_t)(lam_list_t *ptls);
/**
* MPI Interface Functions
*/
/**
* Downcall from MPI layer when a new communicator is created.
*
* @param comm Communicator
* @return LAM_SUCCESS or failure status.
*
* Provides the PML the opportunity to initialize/cache a data structure
* on the communicator.
*/
typedef int (*mca_pml_base_add_comm_fn_t)(struct lam_communicator_t* comm);
/**
* Downcall from MPI layer when a communicator is destroyed.
*
* @param comm Communicator
* @return LAM_SUCCESS or failure status.
*
* Provides the PML the opportunity to cleanup any datastructures
* associated with the communicator.
*/
typedef int (*mca_pml_base_del_comm_fn_t)(struct lam_communicator_t* comm);
/**
* Initialize a persistent receive request.
*
* @param buf (IN) User buffer.
* @param count (IN) Number of elements of the specified datatype.
* @param datatype (IN) User defined datatype.
* @param src (IN) Source rank w/in communicator.
* @param tag (IN) User defined tag.
* @param comm (IN) Communicator.
* @param request (OUT) Request handle.
* @return LAM_SUCCESS or failure status.
*/
typedef int (*mca_pml_base_irecv_init_fn_t)(
void *buf,
size_t count,
lam_datatype_t *datatype,
void *buf,
size_t count,
lam_datatype_t *datatype,
int src,
int tag,
int tag,
struct lam_communicator_t* comm,
struct lam_request_t **request
struct lam_request_t **request
);
/**
* Post a receive request.
*
* @param buf (IN) User buffer.
* @param count (IN) Number of elements of the specified datatype.
* @param datatype (IN) User defined datatype.
* @param src (IN) Source rank w/in communicator.
* @param tag (IN) User defined tag.
* @param comm (IN) Communicator.
* @param request (OUT) Request handle.
* @return LAM_SUCCESS or failure status.
*/
typedef int (*mca_pml_base_irecv_fn_t)(
void *buf,
size_t count,
@ -99,6 +227,18 @@ typedef int (*mca_pml_base_irecv_fn_t)(
struct lam_request_t **request
);
/**
* Post a receive and wait for completion.
*
* @param buf (IN) User buffer
* @param count (IN) Number of elements of the specified datatype
* @param datatype (IN) User defined datatype
* @param src (IN) Source rank w/in communicator
* @param tag (IN) User defined tag
* @param comm (IN) Communicator
* @param status (OUT) Completion status
* @return LAM_SUCCESS or failure status.
*/
typedef int (*mca_pml_base_recv_fn_t)(
void *buf,
size_t count,
@ -109,6 +249,19 @@ typedef int (*mca_pml_base_recv_fn_t)(
lam_status_public_t* status
);
/**
* Initialize a persistent send request.
*
* @param buf (IN) User buffer.
* @param count (IN) Number of elements of the specified datatype.
* @param datatype (IN) User defined datatype.
* @param dst (IN) Peer rank w/in communicator.
* @param tag (IN) User defined tag.
* @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
* @param comm (IN) Communicator.
* @param request (OUT) Request handle.
* @return LAM_SUCCESS or failure status.
*/
typedef int (*mca_pml_base_isend_init_fn_t)(
void *buf,
size_t count,
@ -120,6 +273,20 @@ typedef int (*mca_pml_base_isend_init_fn_t)(
struct lam_request_t **request
);
/**
* Post a send request.
*
* @param buf (IN) User buffer.
* @param count (IN) Number of elements of the specified datatype.
* @param datatype (IN) User defined datatype.
* @param dst (IN) Peer rank w/in communicator.
* @param tag (IN) User defined tag.
* @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
* @param comm (IN) Communicator.
* @param request (OUT) Request handle.
* @return LAM_SUCCESS or failure status.
*/
typedef int (*mca_pml_base_isend_fn_t)(
void *buf,
size_t count,
@ -131,6 +298,19 @@ typedef int (*mca_pml_base_isend_fn_t)(
struct lam_request_t **request
);
/**
* Post a send request and wait for completion.
*
* @param buf (IN) User buffer.
* @param count (IN) Number of elements of the specified datatype.
* @param datatype (IN) User defined datatype.
* @param dst (IN) Peer rank w/in communicator.
* @param tag (IN) User defined tag.
* @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
* @param comm (IN) Communicator.
* @return LAM_SUCCESS or failure status.
*/
typedef int (*mca_pml_base_send_fn_t)(
void *buf,
size_t count,
@ -141,16 +321,72 @@ typedef int (*mca_pml_base_send_fn_t)(
struct lam_communicator_t* comm
);
/**
* Initiate one or more persistent requests.
*
* @param count Number of requests
* @param request Array of persistent requests
* @return LAM_SUCCESS or failure status.
*/
typedef int (*mca_pml_base_start_fn_t)(
lam_request_t** request
size_t count,
lam_request_t** requests
);
/**
* Non-blocking test for request completion.
*
* @param count (IN) Number of requests
* @param request (IN) Array of requests
* @param index (OUT) Index of first completed request.
* @param complete (OUT) Flag indicating if index is valid (a request completed).
* @param status (OUT) Status of completed request.
* @return LAM_SUCCESS or failure status.
*
* Note that upon completion, the request is freed, and the
* request handle at index set to NULL.
*/
typedef int (*mca_pml_base_test_fn_t)(
lam_request_t** request,
size_t count,
lam_request_t** requests,
int *index,
int *completed,
lam_status_public_t* status
);
/**
* Non-blocking test for request completion.
*
* @param count (IN) Number of requests
* @param requests (IN) Array of requests
* @param completed (OUT) Flag indicating wether all requests completed.
* @param statuses (OUT) Array of completion statuses.
* @return LAM_SUCCESS or failure status.
*
* This routine returns completed==true if all requests have completed.
* The statuses parameter is only updated if all requests completed. Likewise,
* the requests array is not modified (no requests freed), unless all requests
* have completed.
*/
typedef int (*mca_pml_base_test_all_fn_t)(
size_t count,
lam_request_t** requests,
int *completed,
lam_status_public_t* statuses
);
/**
* Wait (blocking-mode) for one of N requests to complete.
*
* @param count (IN) Number of requests
* @param requests (IN) Array of requests
* @param index (OUT) Index into request array of completed request.
* @param status (OUT) Status of completed request.
* @return LAM_SUCCESS or failure status.
*
*/
typedef int (*mca_pml_base_wait_fn_t)(
size_t count,
lam_request_t** request,
@ -158,37 +394,61 @@ typedef int (*mca_pml_base_wait_fn_t)(
lam_status_public_t* status
);
/**
* Wait (blocking-mode) for all of N requests to complete.
*
* @param count (IN) Number of requests
* @param requests (IN) Array of requests
* @param statuses (OUT) Array of completion statuses.
* @return LAM_SUCCESS or failure status.
*
*/
typedef int (*mca_pml_base_wait_all_fn_t)(
size_t count, /* count of request/status arrays */
lam_request_t** request, /* array of requests */
lam_status_public_t *status /* array of statuses */
size_t count,
lam_request_t** request,
lam_status_public_t *status
);
/**
* Release resources held by a persistent mode request.
*
* @param request (IN) Request
* @return LAM_SUCCESS or failure status.
*
*/
typedef int (*mca_pml_base_free_fn_t)(
lam_request_t** request
);
/**
* A special NULL request handle.
*
* @param request (OUT) Request
* @return LAM_SUCCESS or failure status.
*
*/
typedef int (*mca_pml_base_null_fn_t)(
lam_request_t** request
);
/**
* PML instance interface functions.
* PML instance.
*/
struct mca_pml_1_0_0_t {
/* downcalls from MCA to PML */
mca_pml_base_add_comm_fn_t pml_add_comm;
mca_pml_base_del_comm_fn_t pml_del_comm;
mca_pml_base_add_procs_fn_t pml_add_procs;
mca_pml_base_del_procs_fn_t pml_del_procs;
mca_pml_base_add_ptls_fn_t pml_add_ptls;
mca_pml_base_progress_fn_t pml_progress;
/* downcalls from MPI to PML */
mca_pml_base_add_comm_fn_t pml_add_comm;
mca_pml_base_del_comm_fn_t pml_del_comm;
mca_pml_base_irecv_init_fn_t pml_irecv_init;
mca_pml_base_irecv_fn_t pml_irecv;
mca_pml_base_recv_fn_t pml_recv;
@ -197,6 +457,7 @@ struct mca_pml_1_0_0_t {
mca_pml_base_send_fn_t pml_send;
mca_pml_base_start_fn_t pml_start;
mca_pml_base_test_fn_t pml_test;
mca_pml_base_test_all_fn_t pml_test_all;
mca_pml_base_wait_fn_t pml_wait;
mca_pml_base_wait_all_fn_t pml_wait_all;
mca_pml_base_free_fn_t pml_free;

Просмотреть файл

@ -1,7 +1,9 @@
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef LAM_PTL_ARRAY_H
#define LAM_PTL_ARRAY_H
@ -10,24 +12,40 @@
extern lam_class_t mca_pml_teg_ptl_array_t_class;
/**
* A data structure associated with a lam_proc_t that caches
* addressing/scheduling attributes for a specific PTL instance
* that can be used to reach the process.
*/
struct mca_ptl_proc_t {
double ptl_weight; /* PTL weight for scheduling */
struct mca_ptl_base_peer_t* ptl_peer; /* PTL addressing info */
mca_ptl_t *ptl; /* PTL implementation */
double ptl_weight; /**< PTL weight for scheduling */
struct mca_ptl_base_peer_t* ptl_peer; /**< PTL addressing info */
mca_ptl_t *ptl; /**< PTL implementation */
};
typedef struct mca_ptl_proc_t mca_ptl_proc_t;
/**
* A dynamically growable array of mca_ptl_proc_t instances.
* Maintains an index into the array that is used for round-robin
* scheduling across contents.
*/
struct mca_ptl_array_t {
lam_object_t super;
mca_ptl_proc_t* ptl_procs; /* array of ptl procs */
size_t ptl_size; /* number available */
size_t ptl_reserve;
size_t ptl_index; /* last used index*/
mca_ptl_proc_t* ptl_procs; /**< array of ptl procs */
size_t ptl_size; /**< number available */
size_t ptl_reserve; /**< size of allocated ptl_proc array */
size_t ptl_index; /**< last used index*/
};
typedef struct mca_ptl_array_t mca_ptl_array_t;
typedef struct mca_ptl_array_t mca_pml_teg_ptl_array_t;
/**
* If required, reallocate (grow) the array to the indicate size.
*
* @param array (IN)
* @param size (IN)
*/
int mca_ptl_array_reserve(mca_ptl_array_t*, size_t);
static inline size_t mca_ptl_array_get_size(mca_ptl_array_t* array)
@ -35,6 +53,12 @@ static inline size_t mca_ptl_array_get_size(mca_ptl_array_t* array)
return array->ptl_size;
}
/**
* Grow the array if required, and set the size.
*
* @param array (IN)
* @param size (IN)
*/
static inline void mca_ptl_array_set_size(mca_ptl_array_t* array, size_t size)
{
if(array->ptl_size > array->ptl_reserve)
@ -42,6 +66,11 @@ static inline void mca_ptl_array_set_size(mca_ptl_array_t* array, size_t size)
array->ptl_size = size;
}
/**
* Grow the array size by one and return the item at that index.
*
* @param array (IN)
*/
static inline mca_ptl_proc_t* mca_ptl_array_insert(mca_ptl_array_t* array)
{
#if LAM_ENABLE_DEBUG
@ -54,6 +83,12 @@ static inline mca_ptl_proc_t* mca_ptl_array_insert(mca_ptl_array_t* array)
return &array->ptl_procs[array->ptl_size++];
}
/**
* Return an array item at the specified index.
*
* @param array (IN)
* @param index (IN)
*/
static inline mca_ptl_proc_t* mca_ptl_array_get_index(mca_ptl_array_t* array, size_t index)
{
#if LAM_ENABLE_DEBUG
@ -66,6 +101,12 @@ static inline mca_ptl_proc_t* mca_ptl_array_get_index(mca_ptl_array_t* array, si
return &array->ptl_procs[index];
}
/**
* Return the next LRU index in the array.
*
* @param array (IN)
* @param index (IN)
*/
static inline mca_ptl_proc_t* mca_ptl_array_get_next(mca_ptl_array_t* array)
{
mca_ptl_proc_t* ptl_proc;

Просмотреть файл

@ -19,12 +19,11 @@
mca_pml_teg_t mca_pml_teg = {
{
mca_pml_teg_add_comm,
mca_pml_teg_del_comm,
mca_pml_teg_add_procs,
mca_pml_teg_del_procs,
mca_pml_teg_add_ptls,
mca_pml_teg_progress,
mca_pml_teg_add_comm,
mca_pml_teg_del_comm,
mca_pml_teg_irecv_init,
mca_pml_teg_irecv,
mca_pml_teg_recv,
@ -33,6 +32,7 @@ mca_pml_teg_t mca_pml_teg = {
mca_pml_teg_send,
mca_pml_teg_start,
mca_pml_teg_test,
mca_pml_teg_test_all,
mca_pml_teg_wait,
mca_pml_teg_wait_all,
mca_pml_teg_free,
@ -279,7 +279,7 @@ int mca_pml_teg_module_fini(void)
int mca_pml_teg_null(lam_request_t** request)
{
*request = (lam_request_t*)&mca_pml_teg.teg_null;
*request = (lam_request_t*)&mca_pml_teg.teg_request_null;
return LAM_SUCCESS;
}

Просмотреть файл

@ -1,11 +1,9 @@
/** @file
*
*
*/
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_TEG_H
#define MCA_PML_TEG_H
@ -51,9 +49,7 @@ struct mca_pml_teg_t {
lam_mutex_t teg_request_lock;
lam_condition_t teg_request_cond;
volatile int teg_request_waiting;
/* null request */
mca_pml_base_request_t teg_null;
mca_pml_base_request_t teg_request_null;
};
typedef struct mca_pml_teg_t mca_pml_teg_t;
@ -108,7 +104,7 @@ extern int mca_pml_teg_add_ptls(
extern int mca_pml_teg_isend_init(
void *buf,
size_t size,
size_t count,
lam_datatype_t *datatype,
int dst,
int tag,
@ -119,7 +115,7 @@ extern int mca_pml_teg_isend_init(
extern int mca_pml_teg_isend(
void *buf,
size_t size,
size_t count,
lam_datatype_t *datatype,
int dst,
int tag,
@ -130,7 +126,7 @@ extern int mca_pml_teg_isend(
extern int mca_pml_teg_send(
void *buf,
size_t size,
size_t count,
lam_datatype_t *datatype,
int dst,
int tag,
@ -140,7 +136,7 @@ extern int mca_pml_teg_send(
extern int mca_pml_teg_irecv_init(
void *buf,
size_t size,
size_t count,
lam_datatype_t *datatype,
int src,
int tag,
@ -150,7 +146,7 @@ extern int mca_pml_teg_irecv_init(
extern int mca_pml_teg_irecv(
void *buf,
size_t size,
size_t count,
lam_datatype_t *datatype,
int src,
int tag,
@ -160,7 +156,7 @@ extern int mca_pml_teg_irecv(
extern int mca_pml_teg_recv(
void *buf,
size_t size,
size_t count,
lam_datatype_t *datatype,
int src,
int tag,
@ -171,10 +167,20 @@ extern int mca_pml_teg_recv(
extern int mca_pml_teg_progress(void);
extern int mca_pml_teg_start(
lam_request_t** request
size_t count,
lam_request_t** requests
);
extern int mca_pml_teg_test(
size_t count,
lam_request_t** request,
int *index,
int *completed,
lam_status_public_t* status
);
extern int mca_pml_teg_test_all(
size_t count,
lam_request_t** request,
int *completed,
lam_status_public_t* status

Просмотреть файл

@ -3,7 +3,7 @@
int mca_pml_teg_irecv_init(
void *addr,
size_t length,
size_t count,
lam_datatype_t *datatype,
int src,
int tag,
@ -18,7 +18,7 @@ int mca_pml_teg_irecv_init(
mca_ptl_base_recv_request_init(
recvreq,
addr,
length,
count,
datatype,
src,
tag,
@ -31,7 +31,7 @@ int mca_pml_teg_irecv_init(
int mca_pml_teg_irecv(
void *addr,
size_t length,
size_t count,
lam_datatype_t *datatype,
int src,
int tag,
@ -40,10 +40,6 @@ int mca_pml_teg_irecv(
{
int rc;
#if 0
lam_output(0, "mca_pml_teg_irecv: src=%d tag=%d comm=%d\n", src, tag, comm->c_contextid);
#endif
mca_ptl_base_recv_request_t *recvreq = mca_pml_teg_recv_request_alloc(&rc);
if(NULL == recvreq)
return rc;
@ -51,7 +47,7 @@ int mca_pml_teg_irecv(
mca_ptl_base_recv_request_init(
recvreq,
addr,
length,
count,
datatype,
src,
tag,
@ -69,16 +65,14 @@ int mca_pml_teg_irecv(
int mca_pml_teg_recv(
void *addr,
size_t length,
size_t count,
lam_datatype_t *datatype,
int src,
int tag,
struct lam_communicator_t* comm,
lam_status_public_t* status)
{
int rc;
int index;
int rc, index;
mca_ptl_base_recv_request_t *recvreq = mca_pml_teg_recv_request_alloc(&rc);
if(NULL == recvreq)
return rc;
@ -86,7 +80,7 @@ int mca_pml_teg_recv(
mca_ptl_base_recv_request_init(
recvreq,
addr,
length,
count,
datatype,
src,
tag,

Просмотреть файл

@ -9,7 +9,7 @@
int mca_pml_teg_isend_init(
void *buf,
size_t size,
size_t count,
lam_datatype_t *datatype,
int dst,
int tag,
@ -26,7 +26,7 @@ int mca_pml_teg_isend_init(
mca_ptl_base_send_request_init(
sendreq,
buf,
size,
count,
datatype,
dst,
tag,
@ -42,7 +42,7 @@ int mca_pml_teg_isend_init(
int mca_pml_teg_isend(
void *buf,
size_t size,
size_t count,
lam_datatype_t *datatype,
int dst,
int tag,
@ -58,7 +58,7 @@ int mca_pml_teg_isend(
mca_ptl_base_send_request_init(
sendreq,
buf,
size,
count,
datatype,
dst,
tag,
@ -76,16 +76,14 @@ int mca_pml_teg_isend(
int mca_pml_teg_send(
void *buf,
size_t size,
size_t count,
lam_datatype_t *datatype,
int dst,
int tag,
mca_pml_base_send_mode_t sendmode,
lam_communicator_t* comm)
{
int rc;
int index;
int rc, index;
mca_ptl_base_send_request_t* sendreq = mca_pml_teg_send_request_alloc(comm,dst,&rc);
if(rc != LAM_SUCCESS)
return rc;
@ -93,7 +91,7 @@ int mca_pml_teg_send(
mca_ptl_base_send_request_init(
sendreq,
buf,
size,
count,
datatype,
dst,
tag,

Просмотреть файл

@ -59,7 +59,7 @@ static inline int mca_pml_teg_param_register_int(
int mca_pml_teg_module_open(void)
{
mca_pml_base_request_t* teg_null = &mca_pml_teg.teg_null;
mca_pml_base_request_t* teg_null = &mca_pml_teg.teg_request_null;
OBJ_CONSTRUCT(&mca_pml_teg.teg_lock, lam_mutex_t);
OBJ_CONSTRUCT(&mca_pml_teg.teg_recv_requests, lam_free_list_t);
OBJ_CONSTRUCT(&mca_pml_teg.teg_procs, lam_list_t);
@ -96,13 +96,6 @@ int mca_pml_teg_module_close(void)
}
static void* mca_pml_teg_thread(lam_object_t* thread)
{
lam_event_dispatch();
return NULL;
}
mca_pml_t* mca_pml_teg_module_init(int* priority,
bool *allow_multi_user_threads,
bool *have_hidden_threads)

Просмотреть файл

@ -1,7 +1,9 @@
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_PROC_H
#define MCA_PML_PROC_H
@ -11,17 +13,16 @@
#include "proc/proc.h"
#include "pml_ptl_array.h"
/*
/**
* Structure associated w/ lam_proc_t that contains data specific
* to the PML. Note that this name is not PML specific.
*/
struct mca_pml_proc_t {
lam_list_item_t super;
lam_proc_t *proc_lam;
lam_mutex_t proc_lock;
mca_ptl_array_t proc_ptl_first;
mca_ptl_array_t proc_ptl_next;
lam_proc_t *proc_lam; /**< back-pointer to lam_proc_t */
lam_mutex_t proc_lock; /**< lock to protect against concurrent access */
mca_ptl_array_t proc_ptl_first; /**< array of ptls to use for first fragments */
mca_ptl_array_t proc_ptl_next; /**< array of ptls to use for remaining fragments */
};
typedef struct mca_pml_proc_t mca_pml_proc_t;
@ -29,6 +30,13 @@ typedef struct mca_pml_proc_t mca_pml_proc_t;
extern lam_class_t mca_pml_teg_proc_t_class;
typedef struct mca_pml_proc_t mca_pml_teg_proc_t;
/**
* Return the mca_pml_proc_t instance cached in the communicators local group.
*
* @param comm Communicator
* @param rank Peer rank
* @return mca_pml_proc_t instance
*/
static inline mca_pml_proc_t* mca_pml_teg_proc_lookup_local(lam_communicator_t* comm, int rank)
{
@ -36,6 +44,14 @@ static inline mca_pml_proc_t* mca_pml_teg_proc_lookup_local(lam_communicator_t*
return proc->proc_pml;
}
/**
* Return the mca_pml_proc_t instance cached on the communicators remote group.
*
* @param comm Communicator
* @param rank Peer rank
* @return mca_pml_proc_t instance
*/
static inline mca_pml_proc_t* mca_pml_teg_proc_lookup_remote(lam_communicator_t* comm, int rank)
{
lam_proc_t* proc = comm->c_remote_group->grp_proc_pointers[rank];

Просмотреть файл

@ -4,6 +4,7 @@
int mca_pml_teg_progress(void)
{
#if 0
mca_ptl_base_tstamp_t tstamp;
size_t i;
@ -12,6 +13,7 @@ int mca_pml_teg_progress(void)
*/
for(i=0; i<mca_pml_teg.teg_num_ptl_modules; i++)
mca_pml_teg.teg_ptl_modules[i]->ptlm_progress(tstamp);
#endif
return LAM_SUCCESS;
}

Просмотреть файл

@ -1,7 +1,9 @@
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef LAM_PML_TEG_RECV_REQUEST_H
#define LAM_PML_TEG_RECV_REQUEST_H
@ -11,38 +13,56 @@
#include "mca/ptl/base/ptl_base_recvfrag.h"
/*
* Allocate a recv request.
/**
* Allocate a recv request from the modules free list.
*
* @param rc (OUT) LAM_SUCCESS or error status on failure.
* @return Receive request.
*/
static inline mca_ptl_base_recv_request_t* mca_pml_teg_recv_request_alloc(int *rc)
{
return (mca_ptl_base_recv_request_t*)lam_free_list_get(&mca_pml_teg.teg_recv_requests, rc);
}
/**
* Return a recv request to the modules free list.
*
* @param request (IN) Receive request.
*/
static inline void mca_pml_teg_recv_request_return(mca_ptl_base_recv_request_t* request)
{
lam_free_list_return(&mca_pml_teg.teg_recv_requests, (lam_list_item_t*)request);
}
/*
* Progress an initialized request.
/**
* Start an initialized request.
*
* @param request Receive request.
* @return LAM_SUCESS or error status on failure.
*/
static inline int mca_pml_teg_recv_request_start(mca_ptl_base_recv_request_t* req)
static inline int mca_pml_teg_recv_request_start(mca_ptl_base_recv_request_t* request)
{
THREAD_SCOPED_LOCK(&mca_pml_teg.teg_lock,
(req->req_sequence = mca_pml_teg.teg_recv_sequence++));
(request->req_sequence = mca_pml_teg.teg_recv_sequence++));
if(req->super.req_peer == LAM_ANY_SOURCE) {
mca_ptl_base_recv_request_match_wild(req);
if(request->super.req_peer == LAM_ANY_SOURCE) {
mca_ptl_base_recv_request_match_wild(request);
} else {
mca_ptl_base_recv_request_match_specific(req);
mca_ptl_base_recv_request_match_specific(request);
}
return LAM_SUCCESS;
}
/**
* Update status of a recv request based on the completion status of
* the receive fragment.
*
* @param request (IN) Receive request.
* @param frag (IN) Receive fragment.
*/
void mca_pml_teg_recv_request_progress(
mca_ptl_base_recv_request_t* recv_request,
mca_ptl_base_recv_frag_t* recv_frag
mca_ptl_base_recv_request_t* request,
mca_ptl_base_recv_frag_t* frag
);

Просмотреть файл

@ -25,19 +25,19 @@ void mca_pml_teg_send_request_schedule(mca_ptl_base_send_request_t* req)
mca_pml_proc_t* proc_pml = proc->proc_pml;
/* allocate remaining bytes to PTLs */
size_t bytes_remaining = req->super.req_length - req->req_offset;
size_t bytes_remaining = req->req_packed_size - req->req_offset;
size_t num_ptl_avail = proc_pml->proc_ptl_next.ptl_size;
size_t num_ptl = 0;
while(bytes_remaining > 0 && num_ptl++ < num_ptl_avail) {
mca_ptl_proc_t* ptl_proc = mca_ptl_array_get_next(&proc_pml->proc_ptl_next);
mca_ptl_t* ptl = ptl_proc->ptl;
int rc;
/* if this is the last PTL that is available to use, or the number of
* bytes remaining in the message is less than the PTLs minimum fragment
* size, then go ahead and give the rest of the message to this PTL.
*/
size_t bytes_to_frag;
int rc;
if(num_ptl == num_ptl_avail || bytes_remaining < ptl->ptl_min_frag_size)
bytes_to_frag = bytes_remaining;
@ -47,14 +47,14 @@ void mca_pml_teg_send_request_schedule(mca_ptl_base_send_request_t* req)
* previously assigned)
*/
else {
bytes_to_frag = ptl_proc->ptl_weight * req->super.req_length;
bytes_to_frag = ptl_proc->ptl_weight * req->req_packed_size;
if(bytes_to_frag > bytes_remaining)
bytes_to_frag = bytes_remaining;
}
rc = ptl->ptl_send(ptl, ptl_proc->ptl_peer, req, bytes_to_frag, 0);
if(rc == LAM_SUCCESS)
bytes_remaining = req->super.req_length - req->req_offset;
bytes_remaining = req->req_packed_size - req->req_offset;
}
/* unable to complete send - signal request failed */
@ -76,7 +76,7 @@ void mca_pml_teg_send_request_progress(
bool complete = false;
lam_mutex_lock(&mca_pml_teg.teg_request_lock);
req->req_bytes_sent += frag->super.frag_size;
if (req->req_bytes_sent >= req->super.req_length) {
if (req->req_bytes_sent >= req->req_packed_size) {
req->super.req_mpi_done = true;
req->super.req_pml_done = true;
if(mca_pml_teg.teg_request_waiting) {

Просмотреть файл

@ -1,7 +1,9 @@
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef LAM_PML_TEG_SEND_REQUEST_H
#define LAM_PML_TEG_SEND_REQUEST_H
@ -47,8 +49,8 @@ static inline int mca_pml_teg_send_request_start(
int flags, rc;
/* start the first fragment */
if(req->super.req_length <= first_fragment_size) {
first_fragment_size = req->super.req_length;
if(req->req_packed_size <= first_fragment_size) {
first_fragment_size = req->req_packed_size;
flags = (req->req_send_mode == MCA_PML_BASE_SEND_SYNCHRONOUS) ? MCA_PTL_FLAGS_ACK_MATCHED : 0;
} else {
/* require match for first fragment of a multi-fragment message or if synchronous send */

Просмотреть файл

@ -3,16 +3,30 @@
#include "pml_teg_sendreq.h"
int mca_pml_teg_start(lam_request_t** request)
int mca_pml_teg_start(size_t count, lam_request_t** requests)
{
mca_pml_base_request_t *pml_request = *(mca_pml_base_request_t**)request;
switch(pml_request->req_type) {
case MCA_PML_REQUEST_SEND:
return mca_pml_teg_send_request_start((mca_ptl_base_send_request_t*)pml_request);
case MCA_PML_REQUEST_RECV:
return mca_pml_teg_recv_request_start((mca_ptl_base_recv_request_t*)pml_request);
default:
return LAM_ERROR;
int rc;
size_t i;
for(i=0; i<count; i++) {
mca_pml_base_request_t *pml_request = (mca_pml_base_request_t*)requests[i];
if(NULL == pml_request)
continue;
switch(pml_request->req_type) {
case MCA_PML_REQUEST_SEND:
if((rc = mca_pml_teg_send_request_start((mca_ptl_base_send_request_t*)pml_request))
!= LAM_SUCCESS)
return rc;
break;
case MCA_PML_REQUEST_RECV:
if((rc = mca_pml_teg_recv_request_start((mca_ptl_base_recv_request_t*)pml_request))
!= LAM_SUCCESS)
return rc;
break;
default:
return LAM_ERROR;
}
}
return LAM_SUCCESS;
}

Просмотреть файл

@ -2,18 +2,74 @@
int mca_pml_teg_test(
lam_request_t** request,
size_t count,
lam_request_t** requests,
int *index,
int *completed,
lam_status_public_t* status)
{
mca_pml_base_request_t* pml_request = *(mca_pml_base_request_t**)request;
if(pml_request->req_mpi_done) {
*completed = true;
mca_pml_teg_free(request);
if (status != NULL)
*status = pml_request->req_status;
} else {
size_t i;
for(i=0; i<count; i++) {
mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
if(pml_request == NULL)
continue;
if(pml_request->req_mpi_done) {
*index = i;
*completed = true;
if (NULL != status)
*status = pml_request->req_status;
if(false == pml_request->req_persistent)
mca_pml_teg_free(requests+i);
}
}
*index = MPI_UNDEFINED;
*completed = false;
if(NULL != status)
*status = mca_pml_teg.teg_request_null.req_status;
return LAM_SUCCESS;
}
int mca_pml_teg_test_all(
size_t count,
lam_request_t** requests,
int *completed,
lam_status_public_t* statuses)
{
size_t i;
size_t num_completed;
for(i=0; i<count; i++) {
mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
if(pml_request == NULL || pml_request->req_mpi_done)
num_completed++;
}
if(num_completed != count) {
*completed = false;
return LAM_SUCCESS;
}
*completed = true;
if(NULL != statuses) {
/* fill out completion status and free request if required */
for(i=0; i<count; i++) {
mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
if(NULL == pml_request) {
statuses[i] = mca_pml_teg.teg_request_null.req_status;
} else {
statuses[i] = pml_request->req_status;
if(false == pml_request->req_persistent)
mca_pml_teg_free(requests+i);
}
}
} else {
/* free request if required */
for(i=0; i<count; i++) {
mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
if(NULL != pml_request && false == pml_request->req_persistent)
mca_pml_teg_free(requests+i);
}
}
return LAM_SUCCESS;
}

Просмотреть файл

@ -46,15 +46,13 @@ int mca_pml_teg_wait(
}
/* return request to pool */
if(pml_request->req_persistent == false) {
if(false == pml_request->req_persistent) {
mca_pml_teg_free(request);
}
if (status != NULL) {
if (NULL != status) {
*status = pml_request->req_status;
}
if (index != NULL) {
*index = completed;
}
*index = completed;
return LAM_SUCCESS;
}
@ -64,46 +62,58 @@ int mca_pml_teg_wait_all(
lam_request_t** requests,
lam_status_public_t* statuses)
{
int completed, i;
/*
* acquire lock and test for completion - if all requests are not completed
* pend on condition variable until a request completes
*/
lam_mutex_lock(&mca_pml_teg.teg_request_lock);
mca_pml_teg.teg_request_waiting++;
do {
completed = 0;
for(i=0; i<count; i++) {
mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
if(pml_request == NULL || pml_request->req_mpi_done == true) {
completed++;
continue;
}
}
if(completed != count)
lam_condition_wait(&mca_pml_teg.teg_request_cond, &mca_pml_teg.teg_request_lock);
} while (completed != count);
mca_pml_teg.teg_request_waiting--;
lam_mutex_unlock(&mca_pml_teg.teg_request_lock);
/*
* fill out completion status and free request if required
*/
int completed = 0, i;
for(i=0; i<count; i++) {
mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
if (NULL == pml_request) {
if(NULL != statuses)
statuses[i] = mca_pml_teg.teg_null.req_status;
continue;
}
if(pml_request == NULL || pml_request->req_mpi_done == true) {
completed++;
}
}
if (NULL != statuses) {
statuses[i] = pml_request->req_status;
/* if all requests have not completed -- defer requiring lock unless required */
if(completed != count) {
/*
* acquire lock and test for completion - if all requests are not completed
* pend on condition variable until a request completes
*/
lam_mutex_lock(&mca_pml_teg.teg_request_lock);
mca_pml_teg.teg_request_waiting++;
do {
completed = 0;
for(i=0; i<count; i++) {
mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
if(pml_request == NULL || pml_request->req_mpi_done == true) {
completed++;
continue;
}
}
if(completed != count)
lam_condition_wait(&mca_pml_teg.teg_request_cond, &mca_pml_teg.teg_request_lock);
} while (completed != count);
mca_pml_teg.teg_request_waiting--;
lam_mutex_unlock(&mca_pml_teg.teg_request_lock);
}
if(NULL != statuses) {
/* fill out status and free request if required */
for(i=0; i<count; i++) {
mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
if (NULL == pml_request) {
statuses[i] = mca_pml_teg.teg_request_null.req_status;
} else {
statuses[i] = pml_request->req_status;
if (false == pml_request->req_persistent) {
mca_pml_teg_free(&requests[i]);
}
}
}
if (false == pml_request->req_persistent) {
/* return request to pool */
mca_pml_teg_free(&requests[i]);
} else {
/* free request if required */
for(i=0; i<count; i++) {
mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
if (NULL != pml_request && false == pml_request->req_persistent) {
mca_pml_teg_free(&requests[i]);
}
}
}
return LAM_SUCCESS;

Просмотреть файл

@ -1,7 +1,9 @@
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_BASE_H
#define MCA_PTL_BASE_H

Просмотреть файл

@ -23,6 +23,7 @@ static void mca_pml_ptl_comm_construct(mca_pml_ptl_comm_t* comm)
OBJ_CONSTRUCT(&comm->c_wild_lock, lam_mutex_t);
}
static void mca_pml_ptl_comm_destruct(mca_pml_ptl_comm_t* comm)
{
free(comm->c_msg_seq);

Просмотреть файл

@ -1,3 +1,9 @@
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_COMM_H
#define MCA_PML_COMM_H
@ -6,49 +12,44 @@
#include "mca/ptl/ptl.h"
#include "lfc/lam_list.h"
/*
* Structure associated w/ lam_communicator_t that contains data
* specific to the PML.
*/
extern lam_class_t mca_pml_ptl_comm_t_class;
/**
* Cached on lam_communicator_t to hold queues/state
* used by the PML<->PTL interface for matching logic.
*/
struct mca_pml_comm_t {
lam_object_t super;
/* send message sequence-number support - sender side */
mca_ptl_base_sequence_t *c_msg_seq;
/* send message sequence-number support - receiver side */
mca_ptl_base_sequence_t *c_next_msg_seq;
/* matching lock */
lam_mutex_t *c_matching_lock;
/* unexpected fragments queues */
lam_list_t *c_unexpected_frags;
/* these locks are needed to avoid a probe interfering with a match */
lam_mutex_t *c_unexpected_frags_lock;
/* out-of-order fragments queues */
lam_list_t *c_frags_cant_match;
/* queues of unmatched specific (source process specified) receives
* sorted by source process */
lam_list_t *c_specific_receives;
/* queue of unmatched wild (source process not specified) receives
* */
lam_list_t c_wild_receives;
/* protect access to wild receives */
lam_mutex_t c_wild_lock;
mca_ptl_base_sequence_t *c_msg_seq; /**< send message sequence number - sender side */
mca_ptl_base_sequence_t *c_next_msg_seq; /**< send message sequence number - receiver side */
lam_mutex_t *c_matching_lock; /**< matching lock */
lam_list_t *c_unexpected_frags; /**< unexpected fragment queues */
lam_mutex_t *c_unexpected_frags_lock; /**< unexpected fragment locks */
lam_list_t *c_frags_cant_match; /**< out-of-order fragment queues */
lam_list_t *c_specific_receives; /**< queues of unmatched specific (source process specified) receives */
lam_list_t c_wild_receives; /**< queue of unmatched wild (source process not specified) receives */
lam_mutex_t c_wild_lock; /**< lock to protect access to wild receives */
};
typedef struct mca_pml_comm_t mca_pml_ptl_comm_t;
extern int mca_pml_ptl_comm_init_size(mca_pml_ptl_comm_t*, size_t);
/**
* Initialize an instance of mca_pml_ptl_comm_t based on the communicator size.
*
* @param comm Instance of mca_pml_ptl_comm_t
* @param size Size of communicator
* @return LAM_SUCCESS or error status on failure.
*/
extern int mca_pml_ptl_comm_init_size(mca_pml_ptl_comm_t* comm, size_t size);
/**
* Obtain the next sequence number (MPI) for a given destination rank.
*
* @param comm Instance of mca_pml_ptl_comm_t
* @param dst Rank of destination.
* @return Next available sequence number.
*/
static inline mca_ptl_base_sequence_t mca_pml_ptl_comm_send_sequence(mca_pml_ptl_comm_t* comm, int dst)
{

Просмотреть файл

@ -1,25 +1,30 @@
/*
* $HEADER$
*/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/**
* @file
*/
#ifndef MCA_PML_BASE_FRAGMENT_H
#define MCA_PML_BASE_FRAGMENT_H
#include "lfc/lam_list.h"
#include "mca/ptl/ptl.h"
#include "datatype/datatype.h"
#include "mca/ptl/base/ptl_base_header.h"
extern lam_class_t mca_ptl_base_frag_t_class;
/**
* Base type for fragment descriptors.
*/
struct mca_ptl_base_frag_t {
lam_list_item_t super;
mca_ptl_base_header_t frag_header;
lam_list_item_t super; /**< allow the fragment to be placed on a list */
mca_ptl_base_header_t frag_header; /**< header used for fragment matching */
struct mca_ptl_t* frag_owner; /**< PTL that allocated this fragment */
struct mca_ptl_base_peer_t* frag_peer; /**< PTL specific addressing info */
void *frag_addr; /* pointer into request buffer at fragment offset */
size_t frag_size; /* number of bytes available in request buffer */
void *frag_addr; /**< pointer into request buffer at fragment offset */
size_t frag_size; /**< number of bytes available in request buffer */
lam_convertor_t frag_convertor; /**< datatype convertor for fragment packing/unpacking */
};
typedef struct mca_ptl_base_frag_t mca_ptl_base_frag_t;

Просмотреть файл

@ -1,7 +1,9 @@
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_BASE_HEADER_H
#define MCA_PTL_BASE_HEADER_H
@ -17,80 +19,60 @@
#define MCA_PTL_FLAGS_ACK_AGGREGATE 2
/* Defines the common header attributes - must be first element in each header type */
/**
* Common header attributes - must be first element in each header type
*/
struct mca_ptl_base_common_header_t {
/* type of envelope */
uint8_t hdr_type;
/* flags indicating how fragment should be processed */
uint8_t hdr_flags;
/* size of header - allow for variable length */
uint16_t hdr_size;
uint8_t hdr_type; /**< type of envelope */
uint8_t hdr_flags; /**< flags indicating how fragment should be processed */
uint16_t hdr_size; /**< size of header - allow for variable length */
};
typedef struct mca_ptl_base_common_header_t mca_ptl_base_common_header_t;
/*
* Common header definition for all fragments.
/**
* Basic header for all fragments.
*/
struct mca_ptl_base_frag_header_t {
/* common header */
mca_ptl_base_common_header_t hdr_common;
/* fragment length */
uint32_t hdr_frag_length;
/* offset into message */
uint32_t hdr_frag_offset;
/* fragment sequence number */
mca_ptl_base_sequence_t hdr_frag_seq;
/* pointer to source fragment */
lam_ptr_t hdr_src_ptr;
/* pointer to matched receive */
lam_ptr_t hdr_dst_ptr;
mca_ptl_base_common_header_t hdr_common; /**< common attributes */
uint32_t hdr_frag_length; /**< fragment length */
uint32_t hdr_frag_offset; /**< offset into message */
mca_ptl_base_sequence_t hdr_frag_seq; /**< fragment sequence number */
lam_ptr_t hdr_src_ptr; /**< pointer to source fragment */
lam_ptr_t hdr_dst_ptr; /**< pointer to matched receive */
};
typedef struct mca_ptl_base_frag_header_t mca_ptl_base_frag_header_t;
/*
/**
* Header definition for the first fragment, contains the additional
* attributes required to match the corresponding posted receive.
*/
struct mca_ptl_base_match_header_t {
/* fragment info */
mca_ptl_base_frag_header_t hdr_frag;
/* communicator index */
uint32_t hdr_contextid;
/* source rank */
int32_t hdr_src;
/* destination rank */
int32_t hdr_dst;
/* user tag */
int32_t hdr_tag;
/* message length */
uint32_t hdr_msg_length;
/* message sequence number */
mca_ptl_base_sequence_t hdr_msg_seq;
mca_ptl_base_frag_header_t hdr_frag; /**< fragment attributes */
uint32_t hdr_contextid; /**< communicator index */
int32_t hdr_src; /**< source rank */
int32_t hdr_dst; /**< destination rank */
int32_t hdr_tag; /**< user tag */
uint32_t hdr_msg_length; /**< message length */
mca_ptl_base_sequence_t hdr_msg_seq; /**< message sequence number */
};
typedef struct mca_ptl_base_match_header_t mca_ptl_base_match_header_t;
/*
/**
* Header used to acknowledgment outstanding fragment(s).
*/
struct mca_ptl_base_ack_header_t {
/* common header */
mca_ptl_base_common_header_t hdr_common;
/* source fragment */
lam_ptr_t hdr_src_ptr;
/* matched receive request */
lam_ptr_t hdr_dst_ptr;
/* sequence range */
mca_ptl_base_common_header_t hdr_common; /**< common attributes */
lam_ptr_t hdr_src_ptr; /**< source fragment */
lam_ptr_t hdr_dst_ptr; /**< matched receive request */
/* sequence range? */
};
typedef struct mca_ptl_base_ack_header_t mca_ptl_base_ack_header_t;
/*
/**
* Union of defined header types.
*/
union mca_ptl_base_header_t {

Просмотреть файл

@ -535,7 +535,7 @@ static void mca_ptl_base_check_cantmatch_for_match(lam_list_t *additional_matche
{
/* local parameters */
int match_found;
mca_ptl_base_sequence_t next_msg_seq_expected, frag_seqber;
mca_ptl_base_sequence_t next_msg_seq_expected, frag_seq;
mca_ptl_base_recv_frag_t *frag_desc;
mca_ptl_base_recv_request_t *matched_receive;
@ -567,8 +567,8 @@ static void mca_ptl_base_check_cantmatch_for_match(lam_list_t *additional_matche
/*
* If the message has the next expected seq from that proc...
*/
frag_seqber=frag_desc->super.frag_header.hdr_match.hdr_msg_seq;
if (frag_seqber == next_msg_seq_expected) {
frag_seq=frag_desc->super.frag_header.hdr_match.hdr_msg_seq;
if (frag_seq == next_msg_seq_expected) {
/* We're now expecting the next sequence number. */
(pml_comm->c_next_msg_seq[frag_src])++;
@ -617,7 +617,7 @@ static void mca_ptl_base_check_cantmatch_for_match(lam_list_t *additional_matche
* and re-start search for next sequence number */
break;
} /* end if (frag_seqber == next_msg_seq_expected) */
} /* end if (frag_seq == next_msg_seq_expected) */
} /* end for (frag_desc) loop */

Просмотреть файл

@ -1,13 +1,23 @@
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_BASE_MATCH_H
#define MCA_PTL_BASE_MATCH_H
struct mca_ptl_base_recv_frag_t;
/**
* Match incoming fragments against posted receives.
*
* @param frag_header (IN) Header of received fragment.
* @param frag_desc (IN) Received fragment descriptor.
* @param match_made (OUT) Flag indicating wether a match was made.
* @param additional_matches (OUT) List of additional matches if a match was made.
* @return LAM_SUCCESS or error status on failure.
*/
int mca_ptl_base_match(mca_ptl_base_match_header_t *frag_header,
struct mca_ptl_base_recv_frag_t *frag_desc, bool *match_made,
lam_list_t *additional_matches);

Просмотреть файл

@ -1,8 +1,9 @@
/*
* $HEADER$
*/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/**
* @file
*/
#ifndef MCA_PTL_BASE_RECVFRAG_H
#define MCA_PTL_BASE_RECVFRAG_H
@ -13,43 +14,85 @@
extern lam_class_t mca_ptl_base_recv_frag_t_class;
/**
* Base type for receive fragment descriptors.
*/
struct mca_ptl_base_recv_frag_t {
mca_ptl_base_frag_t super;
mca_ptl_base_recv_request_t *frag_request; /* matched posted receive */
mca_ptl_base_frag_t super; /**< base fragment descriptor */
mca_ptl_base_recv_request_t *frag_request; /**< matched posted receive */
bool frag_is_buffered; /**< does fragment need to be unpacked into users buffer */
};
typedef struct mca_ptl_base_recv_frag_t mca_ptl_base_recv_frag_t;
/**
* Initialize the receive fragment after a match has been made.
*
* @param frag (IN) Receive fragment descriptor.
*
* If a buffer has not already been allocated, determine the
* offset into the users buffer (if contigous data), or allocate
* a buffer for the non-contigous case.
*
* TODO: may need to pass in an allocator....
*/
static inline void mca_ptl_base_recv_frag_init(mca_ptl_base_recv_frag_t* frag)
{
mca_ptl_base_recv_request_t* request = frag->frag_request;
mca_ptl_base_frag_header_t* header = &frag->super.frag_header.hdr_frag;
mca_ptl_base_match_header_t* header = &frag->super.frag_header.hdr_match;
/* determine the offset and size of posted buffer */
if (request->super.req_length < header->hdr_frag_offset) {
/* initialize status */
request->super.req_status.MPI_SOURCE = header->hdr_src;
request->super.req_status.MPI_TAG = header->hdr_tag;
request->super.req_status.MPI_ERROR = LAM_SUCCESS;
request->super.req_status._count = header->hdr_msg_length;
/* user buffer is to small - discard entire fragment */
frag->super.frag_addr = 0;
frag->super.frag_size = 0;
if(header->hdr_frag.hdr_frag_length > 0) {
} else if (request->super.req_length < header->hdr_frag_offset + header->hdr_frag_length) {
/* initialize receive convertor */
lam_proc_t *proc =
lam_comm_peer_lookup(request->super.req_comm, request->super.req_peer);
lam_convertor_copy(proc->proc_convertor, &frag->super.frag_convertor);
lam_convertor_init_for_recv(
&frag->super.frag_convertor, /* convertor */
0, /* flags */
request->super.req_datatype, /* datatype */
request->super.req_count, /* count elements */
request->super.req_addr, /* users buffer */
header->hdr_frag.hdr_frag_offset); /* offset in bytes into packed buffer */
/* user buffer is to small - discard part of fragment */
frag->super.frag_addr = ((unsigned char*)request->super.req_addr + header->hdr_frag_offset);
frag->super.frag_size = request->super.req_length - header->hdr_frag_offset;
} else {
/* user buffer is large enough for this fragment */
frag->super.frag_addr = ((unsigned char*)request->super.req_addr + header->hdr_frag_offset);
frag->super.frag_size = header->hdr_frag_length;
/* if buffer has not already been allocated for eager
* send - go ahead and figure out offset into users
* buffer (for contigous data) - or allocate a buffer
* for the receive if required.
*/
if(NULL == frag->super.frag_addr) {
struct iovec iov;
iov.iov_base = NULL;
iov.iov_len = header->hdr_frag.hdr_frag_length;
lam_convertor_unpack(&frag->super.frag_convertor, &iov, 1);
/* non-contiguous - allocate buffer for receive */
if(NULL == iov.iov_base) {
frag->super.frag_addr = malloc(iov.iov_len);
frag->frag_is_buffered = true;
/* we now have correct offset into users buffer */
} else {
frag->super.frag_addr = iov.iov_base;
frag->frag_is_buffered = false;
}
frag->super.frag_size = header->hdr_frag.hdr_frag_length;
}
}
}
/**
* Called by the PTL to match attempt a match for new fragments.
*
* @param frag (IN) Receive fragment descriptor.
* @param header (IN) Header corresponding to the receive fragment.
* @return LAM_SUCCESS or error status on failure.
*/
static inline int mca_ptl_base_recv_frag_match(
mca_ptl_base_recv_frag_t* frag,
mca_ptl_base_match_header_t* header)
@ -70,7 +113,7 @@ static inline int mca_ptl_base_recv_frag_match(
mca_ptl_base_recv_frag_init(frag);
/* notify ptl of match */
ptl->ptl_recv(ptl, frag, &frag->frag_request->super.req_status);
ptl->ptl_recv(ptl, frag);
/* process any additional fragments that arrived out of order */
frag = (mca_ptl_base_recv_frag_t*)lam_list_remove_first(&matched_frags);

Просмотреть файл

@ -51,7 +51,7 @@ void mca_ptl_base_recv_request_match_specific(mca_ptl_base_recv_request_t* reque
mca_ptl_t* ptl = frag->super.frag_owner;
THREAD_UNLOCK(pml_comm->c_matching_lock+req_peer);
mca_ptl_base_recv_frag_init(frag);
ptl->ptl_recv(ptl, frag, &request->super.req_status);
ptl->ptl_recv(ptl, frag);
return; /* match found */
}
@ -95,8 +95,8 @@ void mca_ptl_base_recv_request_match_wild(mca_ptl_base_recv_request_t* request)
if ((frag = mca_ptl_base_recv_request_match_specific_proc(request, proc)) != NULL) {
mca_ptl_t* ptl = frag->super.frag_owner;
THREAD_UNLOCK(pml_comm->c_matching_lock+proc);
mca_ptl_base_recv_frag_init(frag);
ptl->ptl_recv(ptl, frag, &request->super.req_status);
mca_ptl_base_recv_frag_init(frag);
ptl->ptl_recv(ptl, frag);
return; /* match found */
}
THREAD_UNLOCK(pml_comm->c_matching_lock+proc);

Просмотреть файл

@ -1,7 +1,9 @@
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_BASE_RECV_REQUEST_H
#define MCA_PML_BASE_RECV_REQUEST_H
@ -11,25 +13,33 @@
extern lam_class_t mca_ptl_base_recv_request_t_class;
struct mca_ptl_base_recv_frag_t;
/**
* Base type for receive requests.
*/
struct mca_ptl_base_recv_request_t {
mca_pml_base_request_t super;
/* request sequence number */
mca_ptl_base_sequence_t req_sequence;
/* number of bytes delivered */
size_t req_bytes_recvd;
mca_pml_base_request_t super; /**< base request */
mca_ptl_base_sequence_t req_sequence; /**< request sequence number */
size_t req_bytes_recvd; /**< number of bytes delivered to user */
};
typedef struct mca_ptl_base_recv_request_t mca_ptl_base_recv_request_t;
void mca_ptl_base_recv_request_match_wild(mca_ptl_base_recv_request_t*);
void mca_ptl_base_recv_request_match_specific(mca_ptl_base_recv_request_t*);
/**
* Initialize a receive request with call parameters.
*
* @param request (IN) Receive request.
* @param addr (IN) User buffer.
* @param count (IN) Number of elements of indicated datatype.
* @param datatype (IN) User defined datatype.
* @param src (IN) Source rank w/in the communicator.
* @param tag (IN) User defined tag.
* @param comm (IN) Communicator.
* @param persistent (IN) Is this a ersistent request.
*/
static inline void mca_ptl_base_recv_request_init(
mca_ptl_base_recv_request_t *request,
void *addr,
size_t length,
size_t count,
lam_datatype_t* datatype,
int src,
int tag,
@ -39,11 +49,12 @@ static inline void mca_ptl_base_recv_request_init(
request->req_sequence = 0;
request->req_bytes_recvd = 0;
request->super.req_addr = addr;
request->super.req_length = length;
request->super.req_count = count;
request->super.req_datatype = datatype;
request->super.req_peer = src;
request->super.req_tag = tag;
request->super.req_comm = comm;
request->super.req_proc = NULL;
request->super.req_type = MCA_PML_REQUEST_RECV;
request->super.req_persistent = persistent;
request->super.req_mpi_done = false;
@ -52,5 +63,21 @@ static inline void mca_ptl_base_recv_request_init(
request->super.super.req_type = LAM_REQUEST_PML;
}
/**
* Attempt to match the request against the unexpected fragment list
* for all source ranks w/in the communicator.
*
* @param request (IN) Request to match.
*/
void mca_ptl_base_recv_request_match_wild(mca_ptl_base_recv_request_t* request);
/**
* Attempt to match the request against the unexpected fragment list
* for a specific source rank.
*
* @param request (IN) Request to match.
*/
void mca_ptl_base_recv_request_match_specific(mca_ptl_base_recv_request_t* request);
#endif

Просмотреть файл

@ -1,6 +1,9 @@
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_BASE_SEND_FRAG_H
#define MCA_PTL_BASE_SEND_FRAG_H
@ -9,10 +12,12 @@
extern lam_class_t mca_ptl_base_send_frag_t_class;
/**
* Base type for send fragment descriptors
*/
struct mca_ptl_base_send_frag_t {
mca_ptl_base_frag_t super;
struct mca_ptl_base_send_request_t *frag_request;
mca_ptl_base_frag_t super; /**< base fragment descriptor */
struct mca_ptl_base_send_request_t *frag_request; /**< pointer to send request */
};
typedef struct mca_ptl_base_send_frag_t mca_ptl_base_send_frag_t;

Просмотреть файл

@ -1,6 +1,7 @@
/*
* $HEADER$
*/
#include <string.h>
#include "mca/ptl/base/ptl_base_sendreq.h"
#include "mca/ptl/base/ptl_base_sendfrag.h"
@ -18,11 +19,9 @@ lam_class_t mca_ptl_base_send_request_t_class = {
static void mca_ptl_base_send_request_construct(mca_ptl_base_send_request_t* req)
{
OBJ_CONSTRUCT(&req->req_unacked_frags, lam_list_t);
}
static void mca_ptl_base_send_request_destruct(mca_ptl_base_send_request_t* req)
{
OBJ_DESTRUCT(&req->req_unacked_frags);
}

Просмотреть файл

@ -1,12 +1,14 @@
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_BASE_SEND_REQUEST_H
#define MCA_PML_BASE_SEND_REQUEST_H
#include "lam_config.h"
#include "datatype/datatype.h"
#include "mca/ptl/ptl.h"
#include "mca/pml/base/pml_base_request.h"
#include "mca/ptl/base/ptl_base_comm.h"
@ -16,72 +18,100 @@ extern lam_class_t mca_ptl_base_send_request_t_class;
struct mca_ptl_base_send_frag_t;
/**
* Base type for send requests
*/
struct mca_ptl_base_send_request_t {
/* request object - common data structure for use by wait/test */
mca_pml_base_request_t super;
/* number of bytes that have already been assigned to a fragment */
size_t req_offset;
/* number of fragments that have been allocated */
size_t req_frags;
/* number of bytes that have been sent */
size_t req_bytes_sent;
/* number of bytes that have been acked */
size_t req_bytes_acked;
/* type of send */
mca_pml_base_send_mode_t req_send_mode;
/* sequence number for MPI pt-2-pt ordering */
mca_ptl_base_sequence_t req_msg_seq;
/* queue of fragments that are waiting to be acknowledged */
mca_ptl_base_queue_t req_unacked_frags;
/* PTL that allocated this descriptor */
struct mca_ptl_t* req_owner;
/* PTL peer instance that will be used for first fragment */
struct mca_ptl_base_peer_t* req_peer;
/* peer matched receive */
lam_ptr_t req_peer_request;
mca_pml_base_request_t super; /** base request type - common data structure for use by wait/test */
size_t req_offset; /**< number of bytes that have already been assigned to a fragment */
size_t req_frags; /**< number of fragments that have been allocated */
size_t req_bytes_sent; /**< number of bytes that have been sent */
mca_pml_base_send_mode_t req_send_mode; /**< type of send */
mca_ptl_base_sequence_t req_msg_seq; /**< sequence number for MPI pt-2-pt ordering */
struct mca_ptl_t* req_owner; /**< PTL that allocated this descriptor */
struct mca_ptl_base_peer_t* req_peer; /**< PTL peer instance that will be used for first fragment */
lam_ptr_t req_peer_request; /**< matched receive at peer */
lam_convertor_t req_convertor; /**< convertor that describes this datatype */
size_t req_packed_size; /**< packed size of a message given the datatype and count */
};
typedef struct mca_ptl_base_send_request_t mca_ptl_base_send_request_t;
static inline bool mca_ptl_base_send_request_matched(
mca_ptl_base_send_request_t* request)
{
return (NULL != request->req_peer_request.pval);
}
/**
* Initialize a send request with call parameters.
*
* @param request (IN) Send request
* @param addr (IN) User buffer
* @param count (IN) Number of elements of indicated datatype.
* @param datatype (IN) User defined datatype
* @param peer (IN) Destination rank
* @param tag (IN) User defined tag
* @param comm (IN) Communicator
* @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
* @param persistent (IN) Is request persistent.
*/
static inline void mca_ptl_base_send_request_init(
mca_ptl_base_send_request_t *request,
void *addr,
size_t length,
size_t count,
lam_datatype_t* datatype,
int peer,
int tag,
lam_communicator_t* comm,
mca_pml_base_send_mode_t sendmode,
mca_pml_base_send_mode_t mode,
bool persistent)
{
request->req_offset = 0;
request->req_frags = 0;
request->req_bytes_sent = 0;
request->req_bytes_acked = 0;
request->req_send_mode = sendmode;
request->req_send_mode = mode;
request->req_peer_request.lval = 0;
request->req_msg_seq = mca_pml_ptl_comm_send_sequence(comm->c_pml_comm, peer);
request->super.req_addr = addr;
request->super.req_length = length;
request->super.req_count = count;
request->super.req_datatype = datatype;
request->super.req_peer = peer;
request->super.req_tag = tag;
request->super.req_comm = comm;
request->super.req_proc = lam_comm_peer_lookup(comm,peer);
request->super.req_type = MCA_PML_REQUEST_SEND;
request->super.req_persistent = persistent;
request->super.req_mpi_done = false;
request->super.req_pml_done = false;
request->super.req_free_called = false;
request->super.super.req_type = LAM_REQUEST_PML;
/* initialize datatype convertor for this request */
if(count > 0) {
int packed_size;
lam_convertor_copy(request->super.req_proc->proc_convertor, &request->req_convertor);
lam_convertor_init_for_send(
&request->req_convertor,
0,
request->super.req_datatype,
request->super.req_count,
request->super.req_addr,
0);
lam_convertor_get_packed_size(&request->req_convertor, &packed_size);
request->req_packed_size = packed_size;
} else {
request->req_packed_size = 0;
}
}
/**
* Test to check if an acknowledgment has been received, with the match.
*
* @param request (IN) Send request.
* return TRUE if an ack/match has been received from peer.
*/
static inline bool mca_ptl_base_send_request_matched(
mca_ptl_base_send_request_t* request)
{
return (NULL != request->req_peer_request.pval);
}
#endif

Просмотреть файл

@ -1,9 +1,127 @@
/*
* $HEADER$
*/
/*
/**
* @file
*
* P2P Transport Layer (PTL)
*
* An MCA component type that allows the PML (mca_pml_t) to support a variety of
* network transports concurrently. The PTL layer is responsible for the reliable
* delivery of message fragments, while the assignment and scheduling of fragments
* to PTLs is handled by the upper layer.
*
* PTL Initialization:
*
* During library initialization, all available PTL modules are loaded and opened
* via their mca_base_open_module_fn_t function. If possible, the module should
* load and open regardless of wether the transport is available. This allows
* parameters used by the module to be reported on by tools such as XXXinfo.
*
* The mca_ptl_base_module_init_fn_t() is then called for each of the modules that
* are succesfully opened. The module init function may return either:
*
* (1) a NULL list of PTL instances if the transport is not available,
* (2) a list containing a single PTL instance, where the PTL provides
* a layer of abstraction over multiple physical devices (e.g. NICs),
* (3) a list containing multiple PTL instances where each PTL instance
* corresponds to a single physical device.
*
* If multiple network devices are available for a given transport, the preferred
* approach is (3) above. In this case, the PML layer will handle scheduling
* across the available resources, and fail-over in the event of a PTL failure.
* If the second approach is used, and a single PTL instance abstracts multiple
* physical devices, the PTL assumes all responsibility for scheduling/failover
* within those devices.
*
* During module initialization, the module should post any addressing information
* required by its peers. An example would be the TCP listen port opened by the
* TCP module for incoming connection requests. This information is published
* to peers via the mca_base_modex_send() interface. Note that peer information
* will not be available via mca_base_modex_recv() during the modules init
* function. However, it is guaranteed to be available during PTL selection.
*
* PTL Selection:
*
* The PML maintains a list of available PTL instances, sorted by their exclusivity
* ranking. This is a relative ranking that is used to select the set of PTLs that
* may be used to reach a given destination. The PTL modules are queried via their
* mca_ptl_base_add_proc_fn_t() to determine if they are able to reach a given destination.
* The first PTL module that returns success is selected. Subsequent PTL modules are
* queried only if they are at the same exclusivity ranking.
*
* An example of how this might be used:
*
* PTL Exclusivity Comments
* -------- ----------- ------------------
* LO 100 Selected exclusively for local process
* SM 50 Selected exclusively for other processes on host
* IB 0 Selected based on network reachability
* IB 0 Selected based on network reachability
* TCP 0 Selected based on network reachability
* TCP 0 Selected based on network reachability
*
* When a PTL module is selected, it may choose to optionally return a pointer to an
* an mca_ptl_base_peer_t data structure to the PML. This pointer is cached by the PML
* and returned to the PTL on subsequent data transfer calls to this specific process.
* The actual contents of the data structure are defined on a per PTL basis, and are
* typically used to cache addressing or connection information, such as the TCP socket
* used by the TCP PTL.
*
* Send Path:
*
* When multiple PTLs are available to reach a given destination, a single request
* (that is large enough) will be split across the available PTLs. The PML scheduler
* will determine the PTL to use for the first fragment based on the relative latency
* ranking exported by the PTLs.
*
* To minimize latency, and allow for derived types of the mca_pml_base_send_request_t,
* the PML will call the selected PTLs ptl_request_alloc() method to allocate the send
* request descriptor. The PTL should return a derived type of mca_pml_base_send_request_t,
* that contains space for both the base send request and initial fragment descriptor
* (mca_ptl_base_send_frag_t or derived type). This approach allows all of the descriptors
* required to initiate the send to be allocated from a free list in a single operation.
*
* When the request is started, the PML will call the selected PTL's ptl_send() method
* with up to ptl_first_frag_size bytes of the request. The PTL should attempt to deliver
* up to the requested number of bytes. The number of bytes actually fragmented and queued
* for delivery must be updated on the send request to reflect the current offset into the
* send buffer.
*
* If the request is larger than ptl_first_frag_size, the remainder of the request
* will be scheduled across potentially multiple PTLs, upon an acknowledgment from
* the peer that the request has been matched on the receive side. The PTL must defer
* calling ptl_send_progress() on the initial fragment until an acknowledment is received,
* as this signals to the PML that the remaining fragments may be scheduled. For further
* detail on the scheduling algorithm, refer to the PML (mca_pml_t) documentation.
*
* As subsequent fragments are completed by the PTLs, the ptl_send_progress() method should
* be called to update the status of the send request. Note that each PTL is responsible
* for managing the resources associated with send fragments and their allocation from and
* return to internal caches/free lists.
*
* Recv Path:
*
* The first fragment of a message is sent with a header type of mca_ptl_base_match_header_t.
* When a header of this type is received, to minimize latency the PTL should call the
* ptl_match() method as soon as entire header is available, potentially prior to receiving
* any data associated with the first fragment. If a match is made, the PML will call
* the ptl_recv() method of the fragments PTL.
*
* The ptl_recv() method should generate, if required, an ack to the source process. An
* ack is required if the MCA_PTL_FLAGS_ACK_MATCHED bit is set by the source in the initial
* message header. The ack should contain a pointer to the matched request, along
* with the pointer to the orignal send fragment contained in the initial message header.
*
* On receipt of the ack, the source will schedule any remaining fragments. The selected PTLs
* should generate the remaining fragments with an mca_ptl_base_frag_header_t, which contains
* a placeholder for a pointer to the matched receive request. This allows the receiver to
* avoid calling the matching logic for subsequent fragments. As fragments are completed,
* each PTL calls their ptl_recv_progress() method to update the PML with the request
* status.
*
*/
#ifndef MCA_PTL_H
#define MCA_PTL_H
@ -13,7 +131,6 @@
#include "proc/proc.h"
#include "mca/pml/pml.h"
/*
* PTL types
*/
@ -36,9 +153,10 @@ typedef lam_list_t mca_ptl_base_queue_t;
*/
/**
* MCA->PTL Intializes the PTL module and creates specific PTL instance(s).
* MCA->PTL Intializes the PTL module and creates specific PTL instance(s).
*
* @param num_ptls (OUT) Returns the number of ptl instances created.
* @param num_ptls (OUT) Returns the number of ptl instances created, or 0
* if the transport is not available.
*
* @param allow_multi_user_threads (OUT) Whether this module can run
* at MPI_THREAD_MULTIPLE or not.
@ -46,7 +164,14 @@ typedef lam_list_t mca_ptl_base_queue_t;
* @param have_hidden_threads (OUT) Whether this module may use
* hidden threads (e.g., progress threads) or not.
*
* @return Array of pointers to PTL instances.
* @return Array of pointers to PTL instances, or NULL if the transport
* is not available.
*
* During module initialization, the PTL module should discover the physical
* devices that are available for the given transport, and a PTL instance
* created to represent each available device. Any addressing information
* required by peers to reach the available devices should be published during
* the module init via the mca_base_modex_send() interface.
*/
typedef struct mca_ptl_t** (*mca_ptl_base_module_init_fn_t)(
int *num_ptls,
@ -56,53 +181,71 @@ typedef struct mca_ptl_t** (*mca_ptl_base_module_init_fn_t)(
/**
* PML->PTL Progresses outstanding requests in each PTL module.
*
* @param timstamp (IN) The current time - used for retransmisstion timers.
*/
typedef void (*mca_ptl_base_module_progress_fn_t)(
mca_ptl_base_tstamp_t timestamp
);
/**
* PTL module version and interface functions.
* PTL module descriptor. Contains module version information
* and module open/close/init functions.
*/
struct mca_ptl_base_module_1_0_0_t {
mca_base_module_t ptlm_version;
mca_base_module_data_1_0_0_t ptlm_data;
mca_ptl_base_module_init_fn_t ptlm_init;
mca_ptl_base_module_progress_fn_t ptlm_progress;
};
typedef struct mca_ptl_base_module_1_0_0_t mca_ptl_base_module_1_0_0_t;
typedef struct mca_ptl_base_module_1_0_0_t mca_ptl_base_module_t;
/*
* PTL instance interface functions and datatype.
* PTL instance interface functions and datatype.
*/
/**
* PML->PTL notification of change in the process list.
* MCA->PTL Clean up any resources held by PTL instance before the
* module is unloaded.
*
* @param ptl (IN) PTL instance.
*
* @param ptl (IN)
* @param procs (IN)
* @param nprocs (IN)
* @return
* Prior to unloading a PTL module, the MCA framework will call the PTL
* finalize method for each PTL instance.
*
*/
typedef int (*mca_ptl_base_finalize_fn_t)(
struct mca_ptl_t* ptl
);
/**
* PML->PTL notification of change in the process list.
*
* @param ptl (IN) PTL instance
* @param proc (IN) Peer process
* @param peer (OUT) Peer addressing information.
* @return Status indicates wether PTL is reachable.
*
* The mca_ptl_base_add_proc_fn_t() is called by the PML to determine
* the set of PTLs that should be used to reach the specified process.
* A return value of LAM_SUCCESS indicates the PTL should be added to the
* set used to reach the proc. The peers addressing information may be
* obtained by the PTL via the mca_base_modex_recv() function if required.
* The PTL may optionally return a pointer to a mca_ptl_base_peer_t data
* structure, to cache peer addressing or connection information.
*/
typedef int (*mca_ptl_base_add_proc_fn_t)(
struct mca_ptl_t* ptl,
struct lam_proc_t* proc,
struct mca_ptl_base_peer_t**
struct mca_ptl_base_peer_t** peer
);
/**
* PML->PTL notification of change in the process list.
* PML->PTL notification of change in the process list.
*
* @param ptl (IN)
* @param procs (IN)
* @param nprocs (IN)
* @return
* @param ptl (IN) PTL instance
* @param proc (IN) Peer process
* @param peer (IN) Peer addressing information.
* @return Status indicating if cleanup was successful
*
* If the process list shrinks, the PML will notify the PTL of the
* change. Peer addressing information cached by the PML is provided
* for cleanup by the PTL.
*/
typedef int (*mca_ptl_base_del_proc_fn_t)(
struct mca_ptl_t* ptl,
@ -111,29 +254,59 @@ typedef int (*mca_ptl_base_del_proc_fn_t)(
);
/**
* MCA->PTL Clean up any resources held by PTL instance before the module is unloaded.
*
* @param ptl (IN) The PTL module instance that is being unloaded.
* PML->PTL Allocate a send request from the PTL modules free list.
*
* @param ptl (IN) PTL instance
* @param request (OUT) Pointer to allocated request.
* @return Status indicating if allocation was successful.
*
* To reduce latency (number of required allocations), a derived
* type of mca_ptl_base_send_request_t is obtained from the PTL that
* is selected to send the first fragment. The derived type should contain
* space for the base request structure, the PTL first fragment,
* and any other required buffer space.
*/
typedef int (*mca_ptl_base_finalize_fn_t)(
struct mca_ptl_t* ptl
);
typedef int (*mca_ptl_base_request_alloc_fn_t)(
struct mca_ptl_t* ptl,
struct mca_ptl_base_send_request_t** request
);
/**
* PML->PTL Return a send request to the PTL modules free list.
*
* @param ptl (IN) PTL instance
* @param request (IN) Pointer to allocated request.
*
* Called when the request has been completed at both the MPI
* and PML layers.
*/
typedef void (*mca_ptl_base_request_return_fn_t)(
struct mca_ptl_t* ptl,
struct mca_ptl_base_send_request_t* request
);
typedef void (*mca_ptl_base_frag_return_fn_t)(
struct mca_ptl_t* ptl,
struct mca_ptl_base_recv_frag_t* frag
);
/**
* PML->PTL Initiate a send of the specified size.
*
* @param ptl (IN) PTL instance
* @param ptl_base_peer (IN) PTL peer addressing
* @param send_request (IN/OUT) Send request (allocated by PML via mca_ptl_base_request_alloc_fn_t)
* @param size (IN) Number of bytes PML is requesting PTL to deliver
* @param flags (IN) Flags that should be passed to the peer via the message header.
* @param request (OUT) LAM_SUCCESS if the PTL was able to queue one or more fragments
*
* When multiple PTLs are available, a single request (that is large enough)
* will be split across the available PTLs. The PML scheduler will determine
* the percentage given to a PTL based on the bandwidth provided by the transport
* and its current resource usage. The size parameter to the send function indicates
* the number of bytes the PML is requesting the PTL to send. The PTL may choose
* to send 0->size bytes based on available resources.
*
* The current offset into the users buffer is passed into the send function
* via the req_offset member of the send request parameter. The send function
* must update req_offset with the actual number of bytes the PTL is able to
* fragment for delivery.
*/
typedef int (*mca_ptl_base_send_fn_t)(
struct mca_ptl_t* ptl,
struct mca_ptl_base_peer_t* ptl_base_peer,
@ -142,36 +315,77 @@ typedef int (*mca_ptl_base_send_fn_t)(
int flags
);
/**
* PML->PTL Notification that a receive fragment has been matched.
*
* @param ptl (IN) PTL instance
* @param recv_frag (IN) Receive fragment
*
* A fragment may be matched either when a new receive is posted,
* or on receipt of a fragment from the network. In either case,
* the PML will downcall into the PTL to provide a notification
* that the match was made.
*/
typedef void (*mca_ptl_base_recv_fn_t)(
struct mca_ptl_t* ptl,
struct mca_ptl_base_recv_frag_t* recv_frag,
struct lam_status_public_t* recv_status
struct mca_ptl_base_recv_frag_t* recv_frag
);
/**
* PTL->PML Notification from the PTL to the PML that a new fragment
* has arrived and can be matched against posted receives.
*
* @param ptl (IN) PTL instance
* @param recv_frag Receive fragment
* @param header (IN) Message header
*
* A fragment may be matched either when a new receive is posted,
* or on receipt of a fragment from the network. In either case,
* the PML will downcall into the PTL to provide a notification
* that the match was made.
*
* The message header used for matching is not required to be
* contained within the receive fragment. However, if the match is
* not made, the matching code will copy the supplied header into the
* recv fragment so that the match can be made when the receive is posted.
*/
typedef int (*mca_ptl_base_match_fn_t)(
struct mca_ptl_base_recv_frag_t* frag,
struct mca_ptl_base_recv_frag_t* recv_frag,
struct mca_ptl_base_match_header_t* header
);
/**
* PTL->PML Notification from the PTL to the PML that a fragment
* has completed (e.g. been successfully delivered into users buffer)
*
* @param recv_request (IN) Receive Request
* @param recv_frag (IN) Receive Fragment
*/
typedef void (*mca_ptl_base_recv_progress_fn_t)(
struct mca_ptl_base_recv_request_t* recv_request,
struct mca_ptl_base_recv_frag_t* recv_frag
);
/**
* PTL->PML Notification from the PTL to the PML that a fragment
* has completed (e.g. been successfully delivered to peer)
*
* @param send_request (IN) Send Request
* @param send_frag (IN) Send Fragment
*/
typedef void (*mca_ptl_base_send_progress_fn_t)(
struct mca_ptl_base_send_request_t* send_request,
struct mca_ptl_base_send_frag_t* send_frag
);
/**
* PTL instance interface functions and common state.
* PTL instance interface functions and attributes.
*/
struct mca_ptl_t {
/* PTL common attributes */
mca_ptl_base_module_t* ptl_module;
size_t ptl_first_frag_size; /**< maximum size of first fragment */
mca_ptl_base_module_t* ptl_module; /**< pointer back to the PTL module structure */
size_t ptl_first_frag_size; /**< maximum size of first fragment -- eager send */
size_t ptl_min_frag_size; /**< threshold below which the PTL will not fragment */
size_t ptl_max_frag_size; /**< maximum fragment size supported by the PTL */
uint32_t ptl_exclusivity; /**< indicates this PTL should be used exclusively */
@ -186,7 +400,6 @@ struct mca_ptl_t {
mca_ptl_base_recv_fn_t ptl_recv;
mca_ptl_base_request_alloc_fn_t ptl_request_alloc;
mca_ptl_base_request_return_fn_t ptl_request_return;
mca_ptl_base_frag_return_fn_t ptl_frag_return;
/* PTL->PML function table - filled in by PML at init */
mca_ptl_base_match_fn_t ptl_match;

Просмотреть файл

@ -2,6 +2,7 @@
* $HEADER$
*/
#include <string.h>
#include "util/output.h"
#include "util/if.h"
#include "mca/pml/pml.h"
@ -35,28 +36,11 @@ mca_ptl_tcp_t mca_ptl_tcp = {
mca_ptl_tcp_send,
mca_ptl_tcp_recv,
mca_ptl_tcp_request_alloc,
mca_ptl_tcp_request_return,
(mca_ptl_base_frag_return_fn_t)mca_ptl_tcp_recv_frag_return
mca_ptl_tcp_request_return
}
};
int mca_ptl_tcp_create(int if_index)
{
mca_ptl_tcp_t* ptl = malloc(sizeof(mca_ptl_tcp_t));
if(NULL == ptl)
return LAM_ERR_OUT_OF_RESOURCE;
memcpy(ptl, &mca_ptl_tcp, sizeof(mca_ptl_tcp));
mca_ptl_tcp_module.tcp_ptls[mca_ptl_tcp_module.tcp_num_ptls++] = ptl;
/* initialize the ptl */
ptl->ptl_ifindex = if_index;
lam_ifindextoaddr(if_index, (struct sockaddr*)&ptl->ptl_ifaddr, sizeof(ptl->ptl_ifaddr));
lam_ifindextomask(if_index, (struct sockaddr*)&ptl->ptl_ifmask, sizeof(ptl->ptl_ifmask));
return LAM_SUCCESS;
}
int mca_ptl_tcp_add_proc(struct mca_ptl_t* ptl, struct lam_proc_t *lam_proc, struct mca_ptl_base_peer_t** peer_ret)
{
mca_ptl_tcp_proc_t* ptl_proc = mca_ptl_tcp_proc_create(lam_proc);
@ -130,8 +114,7 @@ void mca_ptl_tcp_request_return(struct mca_ptl_t* ptl, struct mca_ptl_base_send_
void mca_ptl_tcp_recv_frag_return(struct mca_ptl_t* ptl, struct mca_ptl_tcp_recv_frag_t* frag)
{
if(frag->frag_buff != NULL && frag->frag_buff != frag->super.super.frag_addr)
free(frag->frag_buff);
/* FIX - need to cleanup convertor */
lam_free_list_return(&mca_ptl_tcp_module.tcp_recv_frags, (lam_list_item_t*)frag);
}
@ -139,8 +122,9 @@ void mca_ptl_tcp_recv_frag_return(struct mca_ptl_t* ptl, struct mca_ptl_tcp_recv
void mca_ptl_tcp_send_frag_return(struct mca_ptl_t* ptl, struct mca_ptl_tcp_send_frag_t* frag)
{
if(lam_list_get_size(&mca_ptl_tcp_module.tcp_pending_acks)) {
mca_ptl_tcp_recv_frag_t* pending = (mca_ptl_tcp_recv_frag_t*)
lam_list_remove_first(&mca_ptl_tcp_module.tcp_pending_acks);
mca_ptl_tcp_recv_frag_t* pending;
THREAD_LOCK(&mca_ptl_tcp_module.tcp_lock);
pending = (mca_ptl_tcp_recv_frag_t*)lam_list_remove_first(&mca_ptl_tcp_module.tcp_pending_acks);
THREAD_LOCK(&mca_ptl_tcp_module.tcp_lock);
if(NULL == pending) {
THREAD_UNLOCK(&mca_ptl_tcp_module.tcp_lock);
@ -156,6 +140,12 @@ void mca_ptl_tcp_send_frag_return(struct mca_ptl_t* ptl, struct mca_ptl_tcp_send
}
}
/*
* Initiate a send. If this is the first fragment, use the fragment
* descriptor allocated with the send requests, otherwise obtain
* one from the free list. Initialize the fragment and foward
* on to the peer.
*/
int mca_ptl_tcp_send(
struct mca_ptl_t* ptl,
@ -178,20 +168,17 @@ int mca_ptl_tcp_send(
}
/*
* A posted receive has been matched - if required send an
* ack back to the peer and process the fragment.
*/
void mca_ptl_tcp_recv(
mca_ptl_t* ptl,
mca_ptl_base_recv_frag_t* frag,
lam_status_public_t* status)
mca_ptl_base_recv_frag_t* frag)
{
/* send ack back to peer? */
mca_ptl_base_header_t* header = &frag->super.frag_header;
/* fill in match */
status->MPI_SOURCE = header->hdr_match.hdr_src;
status->MPI_TAG = header->hdr_match.hdr_tag;
status->MPI_ERROR = LAM_SUCCESS;
status->_count = header->hdr_match.hdr_msg_length;
/* send ack back to peer */
if(header->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK_MATCHED) {
int rc;
mca_ptl_tcp_send_frag_t* ack = mca_ptl_tcp_send_frag_alloc(&rc);

Просмотреть файл

@ -1,8 +1,9 @@
/* @file
*
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_TCP_H
#define MCA_PTL_TCP_H
@ -15,31 +16,30 @@
#include "mca/ptl/ptl.h"
/*
/**
* TCP PTL module.
*/
struct mca_ptl_tcp_module_1_0_0_t {
mca_ptl_base_module_1_0_0_t super;
struct mca_ptl_tcp_t** tcp_ptls;
size_t tcp_num_ptls; /**< number of ptls actually used */
size_t tcp_max_ptls; /**< maximum number of ptls - available kernel ifs */
int tcp_listen_sd;
unsigned short tcp_listen_port;
char* tcp_if_include; /**< comma seperated list of interface to include */
char* tcp_if_exclude; /**< comma seperated list of interface to exclude */
int tcp_free_list_num; /**< initial size of free lists */
int tcp_free_list_max; /**< maximum size of free lists */
int tcp_free_list_inc; /**< number of elements to alloc when growing free lists */
lam_free_list_t tcp_send_requests;
lam_free_list_t tcp_send_frags;
lam_free_list_t tcp_recv_frags;
lam_list_t tcp_procs;
lam_list_t tcp_pending_acks;
struct mca_ptl_tcp_proc_t* tcp_local;
lam_event_t tcp_send_event;
lam_event_t tcp_recv_event;
lam_mutex_t tcp_lock;
mca_ptl_base_module_1_0_0_t super; /**< base PTL module */
struct mca_ptl_tcp_t** tcp_ptls; /**< array of available PTLs */
size_t tcp_num_ptls; /**< number of ptls actually used */
size_t tcp_max_ptls; /**< maximum number of ptls - available kernel ifs */
int tcp_listen_sd; /**< listen socket for incoming connection requests */
unsigned short tcp_listen_port; /**< listen port */
char* tcp_if_include; /**< comma seperated list of interface to include */
char* tcp_if_exclude; /**< comma seperated list of interface to exclude */
int tcp_free_list_num; /**< initial size of free lists */
int tcp_free_list_max; /**< maximum size of free lists */
int tcp_free_list_inc; /**< number of elements to alloc when growing free lists */
lam_free_list_t tcp_send_requests; /**< free list of tcp send requests -- sendreq + sendfrag */
lam_free_list_t tcp_send_frags; /**< free list of tcp send fragments */
lam_free_list_t tcp_recv_frags; /**< free list of tcp recv fragments */
lam_list_t tcp_procs; /**< list of tcp proc structures */
lam_list_t tcp_pending_acks; /**< list of pending acks - retry as sends complete */
struct mca_ptl_tcp_proc_t* tcp_local; /**< the tcp proc instance corresponding to the local process */
lam_event_t tcp_send_event; /**< event structure for sends */
lam_event_t tcp_recv_event; /**< event structure for recvs */
lam_mutex_t tcp_lock; /**< lock for accessing module state */
};
typedef struct mca_ptl_tcp_module_1_0_0_t mca_ptl_tcp_module_1_0_0_t;
typedef struct mca_ptl_tcp_module_1_0_0_t mca_ptl_tcp_module_t;
@ -48,76 +48,140 @@ struct mca_ptl_tcp_send_frag_t;
extern mca_ptl_tcp_module_1_0_0_t mca_ptl_tcp_module;
/**
* Register TCP module parameters with the MCA framework
*/
extern int mca_ptl_tcp_module_open(void);
/**
* Any final cleanup before being unloaded.
*/
extern int mca_ptl_tcp_module_close(void);
/**
* TCP module initialization.
*
* @param num_ptls (OUT) Number of PTLs returned in PTL array.
* @param allow_multi_user_threads (OUT) Flag indicating wether PTL supports user threads (TRUE)
* @param have_hidden_threads (OUT) Flag indicating wether PTL uses threads (TRUE)
*
* (1) read interface list from kernel and compare against module parameters
* then create a PTL instance for selected interfaces
* (2) setup TCP listen socket for incoming connection attempts
* (3) publish PTL addressing info
*
*/
extern mca_ptl_t** mca_ptl_tcp_module_init(
int *num_ptls,
bool *allow_multi_user_threads,
bool *have_hidden_threads
);
extern void mca_ptl_tcp_module_progress(
mca_ptl_base_tstamp_t tstamp
);
/**
* TCP PTL Interface
*
*/
struct mca_ptl_tcp_t {
mca_ptl_t super;
int ptl_ifindex;
struct sockaddr_in ptl_ifaddr;
struct sockaddr_in ptl_ifmask;
mca_ptl_t super; /**< base PTL interface */
int ptl_ifindex; /**< PTL interface index */
struct sockaddr_in ptl_ifaddr; /**< PTL interface address */
struct sockaddr_in ptl_ifmask; /**< PTL interface netmask */
};
typedef struct mca_ptl_tcp_t mca_ptl_tcp_t;
extern mca_ptl_tcp_t mca_ptl_tcp;
extern int mca_ptl_tcp_create(
int if_index
);
/**
* Cleanup any resources held by the PTL.
*
* @param ptl PTL instance.
* @return LAM_SUCCESS or error status on failure.
*/
extern int mca_ptl_tcp_finalize(
struct mca_ptl_t* ptl
);
/**
* PML->PTL notification of change in the process list.
*
* @param ptl (IN)
* @param proc (IN)
* @param peer (OUT)
* @return LAM_SUCCESS or error status on failure.
*
*/
extern int mca_ptl_tcp_add_proc(
struct mca_ptl_t* ptl,
struct lam_proc_t *procs,
struct mca_ptl_base_peer_t** addr
struct lam_proc_t *proc,
struct mca_ptl_base_peer_t** peer
);
/**
* PML->PTL notification of change in the process list.
*
* @param ptl (IN) PTL instance
* @param proc (IN) Peer process
* @param peer (IN) Peer addressing information.
* @return Status indicating if cleanup was successful
*
*/
extern int mca_ptl_tcp_del_proc(
struct mca_ptl_t* ptl,
struct lam_proc_t *procs,
struct mca_ptl_base_peer_t* addr
);
/**
* PML->PTL Allocate a send request from the PTL modules free list.
*
* @param ptl (IN) PTL instance
* @param request (OUT) Pointer to allocated request.
* @return Status indicating if allocation was successful.
*
*/
extern int mca_ptl_tcp_request_alloc(
struct mca_ptl_t* ptl,
struct mca_ptl_base_send_request_t**
);
/**
* PML->PTL Return a send request to the PTL modules free list.
*
* @param ptl (IN) PTL instance
* @param request (IN) Pointer to allocated request.
*
*/
extern void mca_ptl_tcp_request_return(
struct mca_ptl_t* ptl,
struct mca_ptl_base_send_request_t*
);
extern void mca_ptl_tcp_recv_frag_return(
/**
* PML->PTL Notification that a receive fragment has been matched.
*
* @param ptl (IN) PTL instance
* @param recv_frag (IN) Receive fragment
*
*/
extern void mca_ptl_tcp_recv(
struct mca_ptl_t* ptl,
struct mca_ptl_tcp_recv_frag_t*
struct mca_ptl_base_recv_frag_t* frag
);
extern void mca_ptl_tcp_send_frag_return(
struct mca_ptl_t* ptl,
struct mca_ptl_tcp_send_frag_t*
);
/**
* PML->PTL Initiate a send of the specified size.
*
* @param ptl (IN) PTL instance
* @param ptl_base_peer (IN) PTL peer addressing
* @param send_request (IN/OUT) Send request (allocated by PML via mca_ptl_base_request_alloc_fn_t)
* @param size (IN) Number of bytes PML is requesting PTL to deliver
* @param flags (IN) Flags that should be passed to the peer via the message header.
* @param request (OUT) LAM_SUCCESS if the PTL was able to queue one or more fragments
*/
extern int mca_ptl_tcp_send(
struct mca_ptl_t* ptl,
struct mca_ptl_base_peer_t* ptl_peer,
@ -125,13 +189,32 @@ extern int mca_ptl_tcp_send(
size_t size,
int flags
);
extern void mca_ptl_tcp_recv(
/**
* Return a recv fragment to the modules free list.
*
* @param ptl (IN) PTL instance
* @param frag (IN) TCP receive fragment
*
*/
extern void mca_ptl_tcp_recv_frag_return(
struct mca_ptl_t* ptl,
struct mca_ptl_base_recv_frag_t* frag,
struct lam_status_public_t* status
struct mca_ptl_tcp_recv_frag_t* frag
);
/**
* Return a send fragment to the modules free list.
*
* @param ptl (IN) PTL instance
* @param frag (IN) TCP send fragment
*
*/
extern void mca_ptl_tcp_send_frag_return(
struct mca_ptl_t* ptl,
struct mca_ptl_tcp_send_frag_t*
);
#endif

Просмотреть файл

@ -1,8 +1,9 @@
/* @file
*
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_TCP_ADDR_H
#define MCA_PTL_TCP_ADDR_H
@ -11,10 +12,13 @@
#include <netinet/in.h>
/**
* Structure used to publish TCP connection information to peers.
*/
struct mca_ptl_tcp_addr_t {
struct in_addr addr_inet;
in_port_t addr_port;
unsigned short addr_inuse;
struct in_addr addr_inet; /**< IPv4 address in network byte order */
in_port_t addr_port; /**< listen port */
unsigned short addr_inuse; /**< local meaning only */
};
typedef struct mca_ptl_tcp_addr_t mca_ptl_tcp_addr_t;

Просмотреть файл

@ -3,6 +3,7 @@
*/
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/socket.h>
@ -54,8 +55,7 @@ mca_ptl_tcp_module_1_0_0_t mca_ptl_tcp_module = {
false
},
mca_ptl_tcp_module_init, /* module init */
mca_ptl_tcp_module_progress /* module progress */
mca_ptl_tcp_module_init /* module init */
}
};
@ -124,7 +124,7 @@ int mca_ptl_tcp_module_open(void)
mca_ptl_tcp.super.ptl_exclusivity =
mca_ptl_tcp_param_register_int("exclusivity", 0);
mca_ptl_tcp.super.ptl_first_frag_size =
mca_ptl_tcp_param_register_int("first_frag_size", 16*1024);
mca_ptl_tcp_param_register_int("first_frag_size", 64*1024);
mca_ptl_tcp.super.ptl_min_frag_size =
mca_ptl_tcp_param_register_int("min_frag_size", 64*1024);
mca_ptl_tcp.super.ptl_max_frag_size =
@ -149,6 +149,27 @@ int mca_ptl_tcp_module_close(void)
}
/*
* Create a ptl instance and add to modules list.
*/
static int mca_ptl_tcp_create(int if_index)
{
mca_ptl_tcp_t* ptl = malloc(sizeof(mca_ptl_tcp_t));
if(NULL == ptl)
return LAM_ERR_OUT_OF_RESOURCE;
memcpy(ptl, &mca_ptl_tcp, sizeof(mca_ptl_tcp));
mca_ptl_tcp_module.tcp_ptls[mca_ptl_tcp_module.tcp_num_ptls++] = ptl;
/* initialize the ptl */
ptl->ptl_ifindex = if_index;
lam_ifindextoaddr(if_index, (struct sockaddr*)&ptl->ptl_ifaddr, sizeof(ptl->ptl_ifaddr));
lam_ifindextomask(if_index, (struct sockaddr*)&ptl->ptl_ifmask, sizeof(ptl->ptl_ifmask));
return LAM_SUCCESS;
}
/*
* Create a TCP PTL instance for either:
* (1) all interfaces specified by the user
@ -173,7 +194,7 @@ static int mca_ptl_tcp_module_create_instances(void)
if(NULL == mca_ptl_tcp_module.tcp_ptls)
return LAM_ERR_OUT_OF_RESOURCE;
/* if the user specified an interface list - use these only */
/* if the user specified an interface list - use these exclusively */
argv = include = lam_argv_split(mca_ptl_tcp_module.tcp_if_include,'\'');
while(argv && *argv) {
char* if_name = *argv;
@ -217,8 +238,8 @@ static int mca_ptl_tcp_module_create_instances(void)
static int mca_ptl_tcp_module_create_listen(void)
{
int flags;
struct sockaddr_in inaddr;
lam_socklen_t addrlen = sizeof(struct sockaddr_in);
struct sockaddr_in inaddr;
lam_socklen_t addrlen;
/* create a listen socket for incoming connections */
mca_ptl_tcp_module.tcp_listen_sd = socket(AF_INET, SOCK_STREAM, 0);
@ -239,6 +260,7 @@ static int mca_ptl_tcp_module_create_listen(void)
}
/* resolve system assignend port */
addrlen = sizeof(struct sockaddr_in);
if(getsockname(mca_ptl_tcp_module.tcp_listen_sd, (struct sockaddr*)&inaddr, &addrlen) < 0) {
lam_output(0, "mca_ptl_tcp_module_init: getsockname() failed with errno=%d", errno);
return LAM_ERROR;
@ -281,7 +303,8 @@ static int mca_ptl_tcp_module_create_listen(void)
static int mca_ptl_tcp_module_exchange(void)
{
size_t i, rc;
int rc;
size_t i;
size_t size = mca_ptl_tcp_module.tcp_num_ptls * sizeof(mca_ptl_tcp_addr_t);
mca_ptl_tcp_addr_t *addrs = malloc(size);
for(i=0; i<mca_ptl_tcp_module.tcp_num_ptls; i++) {
@ -351,7 +374,7 @@ mca_ptl_t** mca_ptl_tcp_module_init(int *num_ptls,
if(mca_ptl_tcp_module_create_listen() != LAM_SUCCESS)
return 0;
/* register TCP parameters with the MCA framework */
/* publish TCP parameters with the MCA framework */
if(mca_ptl_tcp_module_exchange() != LAM_SUCCESS)
return 0;
@ -364,19 +387,6 @@ mca_ptl_t** mca_ptl_tcp_module_init(int *num_ptls,
return ptls;
}
/*
* All TCP progress is handled via an event loop based on select. Events
* are dispatched to the appropriate callbacks as file descriptors become
* available for read/write.
*/
void mca_ptl_tcp_module_progress(mca_ptl_base_tstamp_t tstamp)
{
lam_event_loop(LAM_EVLOOP_NONBLOCK);
}
/*
* Called by mca_ptl_tcp_module_recv() when the TCP listen
* socket has pending connection requests. Accept incoming
@ -388,8 +398,8 @@ static void mca_ptl_tcp_module_accept(void)
while(true) {
lam_socklen_t addrlen = sizeof(struct sockaddr_in);
struct sockaddr_in addr;
int sd = accept(mca_ptl_tcp_module.tcp_listen_sd, (struct sockaddr*)&addr, &addrlen);
lam_event_t* event;
int sd = accept(mca_ptl_tcp_module.tcp_listen_sd, (struct sockaddr*)&addr, &addrlen);
if(sd < 0) {
if(errno == EINTR)
continue;
@ -415,9 +425,9 @@ static void mca_ptl_tcp_module_recv_handler(int sd, short flags, void* user)
void* guid;
uint32_t size;
struct sockaddr_in addr;
lam_socklen_t addr_len = sizeof(addr);
int retval;
mca_ptl_tcp_proc_t* ptl_proc;
lam_socklen_t addr_len = sizeof(addr);
/* accept new connections on the listen socket */
if(mca_ptl_tcp_module.tcp_listen_sd == sd) {

Просмотреть файл

@ -2,6 +2,7 @@
* $HEADER$
*/
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/errno.h>
#include <sys/types.h>
@ -331,9 +332,9 @@ static int mca_ptl_tcp_peer_recv_connect_ack(mca_ptl_base_peer_t* ptl_peer)
static int mca_ptl_tcp_peer_start_connect(mca_ptl_base_peer_t* ptl_peer)
{
int rc;
int flags;
int rc,flags;
struct sockaddr_in peer_addr;
ptl_peer->peer_sd = socket(AF_INET, SOCK_STREAM, 0);
if (ptl_peer->peer_sd < 0) {
ptl_peer->peer_retries++;

Просмотреть файл

@ -1,8 +1,9 @@
/* @file
*
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_TCP_PEER_H
#define MCA_PTL_TCP_PEER_H
@ -26,7 +27,6 @@ typedef enum {
} mca_ptl_tcp_state_t;
/**
* An abstraction that represents a connection to a peer process.
* An instance of mca_ptl_base_peer_t is associated w/ each process
@ -36,18 +36,18 @@ typedef enum {
struct mca_ptl_base_peer_t {
lam_list_item_t super;
struct mca_ptl_tcp_t* peer_ptl;
struct mca_ptl_tcp_proc_t* peer_proc;
struct mca_ptl_tcp_addr_t* peer_addr;
int peer_sd;
mca_ptl_tcp_send_frag_t* peer_send_frag;
mca_ptl_tcp_recv_frag_t* peer_recv_frag;
mca_ptl_tcp_state_t peer_state;
size_t peer_retries;
lam_list_t peer_frags; /* list of pending frags to send */
lam_mutex_t peer_lock;
lam_event_t peer_send_event;
lam_event_t peer_recv_event;
struct mca_ptl_tcp_t* peer_ptl; /**< PTL instance that created this connection */
struct mca_ptl_tcp_proc_t* peer_proc; /**< proc structure corresponding to peer */
struct mca_ptl_tcp_addr_t* peer_addr; /**< address of peer */
int peer_sd; /**< socket connection to peer */
mca_ptl_tcp_send_frag_t* peer_send_frag; /**< current send frag being processed */
mca_ptl_tcp_recv_frag_t* peer_recv_frag; /**< current recv frag being processed */
mca_ptl_tcp_state_t peer_state; /**< current state of the connection */
size_t peer_retries; /**< number of connection retries attempted */
lam_list_t peer_frags; /**< list of pending frags to send */
lam_mutex_t peer_lock; /**< lock for concurrent access to peer state */
lam_event_t peer_send_event; /**< event for async processing of send frags */
lam_event_t peer_recv_event; /**< event for async processing of recv frags */
};
typedef struct mca_ptl_base_peer_t mca_ptl_base_peer_t;

Просмотреть файл

@ -1,6 +1,7 @@
/*
* $HEADER$
*/
#include <string.h>
#include "atomic.h"
#include "lfc/lam_hash_table.h"
#include "mca/base/mca_base_module_exchange.h"
@ -81,7 +82,7 @@ mca_ptl_tcp_proc_t* mca_ptl_tcp_proc_create(lam_proc_t* lam_proc)
return 0;
}
memcpy(ptl_proc->proc_guid, lam_proc->proc_job, size);
memcpy(((char*) ptl_proc->proc_guid) + size, &vpid, sizeof(uint32_t));
memcpy(((unsigned char*)ptl_proc->proc_guid)+size, &vpid, sizeof(uint32_t));
/* lookup tcp parameters exported by this proc */
rc = mca_base_modex_recv(

Просмотреть файл

@ -1,8 +1,9 @@
/* @file
*
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_TCP_PROC_H
#define MCA_PTL_TCP_PROC_H
@ -19,19 +20,19 @@ extern lam_class_t mca_ptl_tcp_proc_t_class;
/**
* Represents the state of a remote process and the set of addresses
* that it exports. Also cache an instance or mca_ptl_base_peer_t for each
* that it exports. Also cache an instance of mca_ptl_base_peer_t for each
* PTL instance that attempts to open a connection to the process.
*/
struct mca_ptl_tcp_proc_t {
lam_list_item_t super;
lam_proc_t *proc_lam;
void* proc_guid;
size_t proc_guid_size;
struct mca_ptl_tcp_addr_t *proc_addrs;
size_t proc_addr_count;
struct mca_ptl_base_peer_t **proc_peers;
size_t proc_peer_count;
lam_mutex_t proc_lock;
lam_list_item_t super; /**< allow proc to be placed on a list */
lam_proc_t *proc_lam; /**< pointer to corresponding lam_proc_t */
void* proc_guid; /**< globally unique identifier for the process */
size_t proc_guid_size; /**< size of the guid */
struct mca_ptl_tcp_addr_t *proc_addrs; /**< array of addresses published by peer */
size_t proc_addr_count; /**< number of addresses published by peer */
struct mca_ptl_base_peer_t **proc_peers; /**< array of peers that have been created to access this proc */
size_t proc_peer_count; /**< number of peers */
lam_mutex_t proc_lock; /**< lock to protect against concurrent access to proc state */
};
typedef struct mca_ptl_tcp_proc_t mca_ptl_tcp_proc_t;

Просмотреть файл

@ -48,8 +48,10 @@ void mca_ptl_tcp_recv_frag_init(mca_ptl_tcp_recv_frag_t* frag, mca_ptl_base_peer
{
frag->frag_owner = &peer->peer_ptl->super;
frag->super.frag_request = 0;
frag->super.super.frag_addr = NULL;
frag->super.super.frag_size = 0;
frag->super.frag_is_buffered = false;
frag->frag_peer = peer;
frag->frag_buff = NULL;
frag->frag_hdr_cnt = 0;
frag->frag_msg_cnt = 0;
frag->frag_ack_pending = false;
@ -140,15 +142,11 @@ static bool mca_ptl_tcp_recv_frag_match(mca_ptl_tcp_recv_frag_t* frag, int sd)
/* match was not made - so allocate buffer for eager send */
if (NULL == frag->super.frag_request) {
if(frag->frag_header.hdr_frag.hdr_frag_length > 0) {
frag->frag_buff = malloc(frag->frag_header.hdr_frag.hdr_frag_length);
frag->super.super.frag_addr = malloc(frag->frag_header.hdr_frag.hdr_frag_length);
frag->super.super.frag_size = frag->frag_header.hdr_frag.hdr_frag_length;
frag->super.frag_is_buffered = true;
}
/* match was made - use application buffer */
} else {
frag->frag_buff = (unsigned char*)frag->super.super.frag_addr;
}
}
@ -178,7 +176,6 @@ static bool mca_ptl_tcp_recv_frag_frag(mca_ptl_tcp_recv_frag_t* frag, int sd)
if(frag->frag_msg_cnt == 0) {
frag->super.frag_request = frag->frag_header.hdr_frag.hdr_dst_ptr.pval;
mca_ptl_base_recv_frag_init(&frag->super);
frag->frag_buff = frag->super.super.frag_addr;
}
/* continue to receive user data */
@ -207,7 +204,7 @@ static bool mca_ptl_tcp_recv_frag_data(mca_ptl_tcp_recv_frag_t* frag, int sd)
{
int cnt = -1;
while(cnt < 0) {
cnt = recv(sd, (unsigned char*)frag->frag_buff+frag->frag_msg_cnt,
cnt = recv(sd, (unsigned char*)frag->super.super.frag_addr+frag->frag_msg_cnt,
frag->super.super.frag_size-frag->frag_msg_cnt, 0);
if(cnt == 0) {
mca_ptl_tcp_peer_close(frag->frag_peer);

Просмотреть файл

@ -1,7 +1,9 @@
/* @file
*
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_TCP_RECV_FRAG_H
#define MCA_PTL_TCP_RECV_FRAG_H
@ -18,13 +20,14 @@
extern lam_class_t mca_ptl_tcp_recv_frag_t_class;
/**
* TCP received fragment derived type.
*/
struct mca_ptl_tcp_recv_frag_t {
mca_ptl_base_recv_frag_t super;
mca_ptl_base_ack_header_t frag_ack;
unsigned char* frag_buff;
size_t frag_hdr_cnt;
size_t frag_msg_cnt;
bool frag_ack_pending;
mca_ptl_base_recv_frag_t super; /**< base receive fragment descriptor */
size_t frag_hdr_cnt; /**< number of header bytes received */
size_t frag_msg_cnt; /**< number of msg bytes received */
bool frag_ack_pending; /**< is an ack pending for this fragment */
};
typedef struct mca_ptl_tcp_recv_frag_t mca_ptl_tcp_recv_frag_t;
@ -42,8 +45,11 @@ bool mca_ptl_tcp_recv_frag_send_ack(mca_ptl_tcp_recv_frag_t* frag);
static inline void mca_ptl_tcp_recv_frag_progress(mca_ptl_tcp_recv_frag_t* frag)
{
if(frag->frag_msg_cnt >= frag->super.super.frag_header.hdr_frag.hdr_frag_length) {
if(frag->frag_buff != frag->super.super.frag_addr) {
memcpy(frag->super.super.frag_addr, frag->frag_buff, frag->super.super.frag_size);
if(frag->super.frag_is_buffered) {
struct iovec iov;
iov.iov_base = frag->super.super.frag_addr;
iov.iov_len = frag->super.super.frag_size;
lam_convertor_unpack(&frag->super.super.frag_convertor, &iov, 1);
}
frag->super.super.frag_owner->ptl_recv_progress(frag->super.frag_request, &frag->super);
if(frag->frag_ack_pending == false) {

Просмотреть файл

@ -5,14 +5,17 @@
#include <sys/types.h>
#include <sys/errno.h>
#include "types.h"
#include "datatype/datatype.h"
#include "mca/ptl/base/ptl_base_sendreq.h"
#include "ptl_tcp.h"
#include "ptl_tcp_peer.h"
#include "ptl_tcp_proc.h"
#include "ptl_tcp_sendfrag.h"
#define frag_header super.super.frag_header
#define frag_owner super.super.frag_owner
#define frag_peer super.super.frag_peer
#define frag_header super.super.frag_header
#define frag_owner super.super.frag_owner
#define frag_peer super.super.frag_peer
#define frag_convertor super.super.frag_convertor
static void mca_ptl_tcp_send_frag_construct(mca_ptl_tcp_send_frag_t* frag);
@ -42,7 +45,7 @@ static void mca_ptl_tcp_send_frag_destruct(mca_ptl_tcp_send_frag_t* frag)
* data buffer, and the indicated size.
*/
void mca_ptl_tcp_send_frag_init(
int mca_ptl_tcp_send_frag_init(
mca_ptl_tcp_send_frag_t* sendfrag,
mca_ptl_base_peer_t* ptl_peer,
mca_ptl_base_send_request_t* sendreq,
@ -63,7 +66,7 @@ void mca_ptl_tcp_send_frag_init(
hdr->hdr_match.hdr_src = sendreq->super.req_comm->c_my_rank;
hdr->hdr_match.hdr_dst = sendreq->super.req_peer;
hdr->hdr_match.hdr_tag = sendreq->super.req_tag;
hdr->hdr_match.hdr_msg_length = sendreq->super.req_length;
hdr->hdr_match.hdr_msg_length = sendreq->req_packed_size;
hdr->hdr_match.hdr_msg_seq = sendreq->req_msg_seq;
} else {
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG;
@ -75,29 +78,61 @@ void mca_ptl_tcp_send_frag_init(
hdr->hdr_frag.hdr_dst_ptr = sendreq->req_peer_request;
}
/* update request */
if(sendreq->req_offset + size > sendreq->super.req_length)
size = sendreq->super.req_length - sendreq->req_offset;
/* initialize convertor */
if(size > 0) {
lam_convertor_t *convertor;
int rc;
/* first fragment (eager send) and first fragment of long protocol
* can use the convertor initialized on the request, remaining fragments
* must copy/reinit the convertor as the transfer could be in parallel.
*/
if(sendreq->req_frags < 2) {
convertor = &sendreq->req_convertor;
} else {
convertor = &sendfrag->frag_convertor;
if((rc = lam_convertor_copy(&sendreq->req_convertor, convertor)) != LAM_SUCCESS)
return rc;
if((rc = lam_convertor_init_for_send(
convertor,
0,
sendreq->super.req_datatype,
sendreq->super.req_count,
sendreq->super.req_addr,
sendreq->req_offset)) != LAM_SUCCESS)
return rc;
}
/* if data is contigous convertor will return an offset
* into users buffer - otherwise will return an allocated buffer
* that holds the packed data
*/
sendfrag->frag_vec[1].iov_base = NULL;
sendfrag->frag_vec[1].iov_len = size;
if((rc = lam_convertor_pack(convertor, &sendfrag->frag_vec[1], 1)) != LAM_SUCCESS)
return rc;
/* adjust size and request offset to reflect actual number of bytes packed by convertor */
size = sendfrag->frag_vec[1].iov_len;
sendreq->req_offset += size;
}
hdr->hdr_frag.hdr_frag_length = size;
sendreq->req_offset += size;
sendreq->req_frags++;
/* fragment state */
sendfrag->frag_owner = &ptl_peer->peer_ptl->super;
sendfrag->super.frag_request = sendreq;
sendfrag->super.super.frag_addr = ((char*) sendreq->super.req_addr) + hdr->hdr_frag.hdr_frag_offset;
sendfrag->super.super.frag_addr = sendfrag->frag_vec[1].iov_base;
sendfrag->super.super.frag_size = size;
sendfrag->frag_peer = ptl_peer;
sendfrag->frag_vec_ptr = sendfrag->frag_vec;
sendfrag->frag_vec_cnt = (size == 0) ? 1 : 2;
sendfrag->frag_vec[0].iov_base = (lam_iov_base_ptr_t)hdr;
sendfrag->frag_vec[0].iov_len = sizeof(mca_ptl_base_header_t);
sendfrag->frag_vec_cnt = 1;
if(size > 0) {
sendfrag->frag_vec[1].iov_base = (lam_iov_base_ptr_t)sendfrag->super.super.frag_addr;
sendfrag->frag_vec[1].iov_len = sendfrag->super.super.frag_size;
sendfrag->frag_vec_cnt++;
}
return LAM_SUCCESS;
}

Просмотреть файл

@ -1,8 +1,9 @@
/* @file
*
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_TCP_SEND_FRAG_H
#define MCA_PTL_TCP_SEND_FRAG_H
@ -19,11 +20,14 @@ extern lam_class_t mca_ptl_tcp_send_frag_t_class;
struct mca_ptl_base_peer_t;
/**
* TCP send fragment derived type.
*/
struct mca_ptl_tcp_send_frag_t {
mca_ptl_base_send_frag_t super;
struct iovec *frag_vec_ptr;
size_t frag_vec_cnt;
struct iovec frag_vec[2];
mca_ptl_base_send_frag_t super; /**< base send fragment descriptor */
struct iovec *frag_vec_ptr; /**< pointer into iovec array */
size_t frag_vec_cnt; /**< number of iovec structs left to process */
struct iovec frag_vec[2]; /**< array of iovecs for send */
};
typedef struct mca_ptl_tcp_send_frag_t mca_ptl_tcp_send_frag_t;
@ -36,7 +40,8 @@ static inline mca_ptl_tcp_send_frag_t* mca_ptl_tcp_send_frag_alloc(int* rc)
bool mca_ptl_tcp_send_frag_handler(mca_ptl_tcp_send_frag_t*, int sd);
void mca_ptl_tcp_send_frag_init(
int mca_ptl_tcp_send_frag_init(
mca_ptl_tcp_send_frag_t*,
struct mca_ptl_base_peer_t*,
struct mca_ptl_base_send_request_t*,

Просмотреть файл

@ -1,7 +1,9 @@
/* @file
*
/*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PTL_TCP_SEND_REQUEST_H
#define MCA_PTL_TCP_SEND_REQUEST_H
@ -13,9 +15,14 @@
#include "mca/ptl/base/ptl_base_sendreq.h"
#include "ptl_tcp_sendfrag.h"
extern lam_class_t mca_ptl_tcp_send_request_t_class;
/**
* TCP send request derived type. The send request contains both the
* base send request, and space for the first TCP send fragment descriptor.
* This avoids the overhead of a second allocation for the initial send
* fragment on every send request.
*/
struct mca_ptl_tcp_send_request_t {
mca_ptl_base_send_request_t super;
mca_ptl_tcp_send_frag_t req_frag; /* first fragment */

Просмотреть файл

@ -17,20 +17,19 @@
int MPI_Bsend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm)
{
int rc;
if (dest == MPI_PROC_NULL) {
return MPI_SUCCESS;
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
@ -38,11 +37,10 @@ int MPI_Bsend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Co
} else if (lam_comm_peer_invalid(comm, dest)) {
rc = MPI_ERR_RANK;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Bsend");
}
return mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_BUFFERED, comm);
rc = mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_BUFFERED, comm);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Bsend");
}

Просмотреть файл

@ -16,20 +16,19 @@
int MPI_Bsend_init(void *buf, int count, MPI_Datatype type,
int dest, int tag, MPI_Comm comm, MPI_Request *request)
{
int rc;
if (dest == MPI_PROC_NULL) {
return MPI_SUCCESS;
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
@ -39,11 +38,10 @@ int MPI_Bsend_init(void *buf, int count, MPI_Datatype type,
} else if (request == NULL) {
rc = MPI_ERR_REQUEST;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Bsend_init");
}
return mca_pml.pml_isend_init(buf, count, type, dest, tag, MCA_PML_BASE_SEND_BUFFERED, comm, request);;
rc = mca_pml.pml_isend_init(buf,count,type,dest,tag,MCA_PML_BASE_SEND_BUFFERED,comm,request);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Bsend_init");
}

Просмотреть файл

@ -17,20 +17,19 @@
int MPI_Ibsend(void *buf, int count, MPI_Datatype type, int dest,
int tag, MPI_Comm comm, MPI_Request *request)
{
int rc;
if (dest == MPI_PROC_NULL) {
return MPI_SUCCESS;
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
@ -40,11 +39,10 @@ int MPI_Ibsend(void *buf, int count, MPI_Datatype type, int dest,
} else if (request == NULL) {
rc = MPI_ERR_REQUEST;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Ibsend");
}
return mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_BUFFERED,comm,request);
rc = mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_BUFFERED,comm,request);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Ibsend");
}

Просмотреть файл

@ -17,32 +17,32 @@
int MPI_Irecv(void *buf, int count, MPI_Datatype type, int source,
int tag, MPI_Comm comm, MPI_Request *request)
{
int rc;
if (source == MPI_PROC_NULL) {
return mca_pml.pml_null(request);
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
rc = MPI_ERR_COMM;
} else if (source != MPI_ANY_SOURCE && source != MPI_PROC_NULL && lam_comm_peer_invalid(comm, source)) {
} else if (source != MPI_ANY_SOURCE &&
source != MPI_PROC_NULL &&
lam_comm_peer_invalid(comm, source)) {
rc = MPI_ERR_RANK;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Irecv");
}
return mca_pml.pml_irecv(buf,count,type,source,tag,comm,request);
rc = mca_pml.pml_irecv(buf,count,type,source,tag,comm,request);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Irecv");
}

Просмотреть файл

@ -17,20 +17,19 @@
int MPI_Irsend(void *buf, int count, MPI_Datatype type, int dest,
int tag, MPI_Comm comm, MPI_Request *request)
{
int rc;
if (dest == MPI_PROC_NULL) {
return MPI_SUCCESS;
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
@ -40,11 +39,10 @@ int MPI_Irsend(void *buf, int count, MPI_Datatype type, int dest,
} else if (request == NULL) {
rc = MPI_ERR_REQUEST;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Irsend");
}
return mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_READY,comm,request);
rc = mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_READY,comm,request);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Irsend");
}

Просмотреть файл

@ -17,20 +17,19 @@
int MPI_Isend(void *buf, int count, MPI_Datatype type, int dest,
int tag, MPI_Comm comm, MPI_Request *request)
{
int rc;
if (dest == MPI_PROC_NULL) {
return MPI_SUCCESS;
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
@ -40,11 +39,10 @@ int MPI_Isend(void *buf, int count, MPI_Datatype type, int dest,
} else if (request == NULL) {
rc = MPI_ERR_REQUEST;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Isend");
}
return mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_STANDARD,comm,request);
rc = mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_STANDARD,comm,request);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Isend");
}

Просмотреть файл

@ -5,6 +5,7 @@
#include "lam_config.h"
#include "mpi.h"
#include "errhandler/errhandler.h"
#include "runtime/runtime.h"
#include "mpi/c/bindings.h"
#include "mca/pml/pml.h"
@ -17,20 +18,19 @@
int MPI_Issend(void *buf, int count, MPI_Datatype type, int dest,
int tag, MPI_Comm comm, MPI_Request *request)
{
int rc;
if (dest == MPI_PROC_NULL) {
return MPI_SUCCESS;
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
@ -40,11 +40,10 @@ int MPI_Issend(void *buf, int count, MPI_Datatype type, int dest,
} else if (request == NULL) {
rc = MPI_ERR_REQUEST;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Issend");
}
return mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_SYNCHRONOUS,comm,request);
rc = mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_SYNCHRONOUS,comm,request);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Issend");
}

Просмотреть файл

@ -17,6 +17,7 @@
int MPI_Recv(void *buf, int count, MPI_Datatype type, int source,
int tag, MPI_Comm comm, MPI_Status *status)
{
int rc;
if (source == MPI_PROC_NULL) {
if (status) {
status->MPI_SOURCE = MPI_PROC_NULL;
@ -28,15 +29,13 @@ int MPI_Recv(void *buf, int count, MPI_Datatype type, int source,
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
@ -44,10 +43,10 @@ int MPI_Recv(void *buf, int count, MPI_Datatype type, int source,
} else if (source != MPI_ANY_SOURCE && lam_comm_peer_invalid(comm, source)) {
rc = MPI_ERR_RANK;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Recv");
}
return mca_pml.pml_recv(buf, count, type, source, tag, comm, status);
rc = mca_pml.pml_recv(buf, count, type, source, tag, comm, status);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Recv");
}

Просмотреть файл

@ -17,31 +17,32 @@
int MPI_Recv_init(void *buf, int count, MPI_Datatype type, int source,
int tag, MPI_Comm comm, MPI_Request *request)
{
int rc;
if (source == MPI_PROC_NULL) {
return mca_pml.pml_null(request);
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
rc = MPI_ERR_COMM;
} else if (source != MPI_ANY_SOURCE && source != MPI_PROC_NULL && lam_comm_peer_invalid(comm, source)) {
} else if (source != MPI_ANY_SOURCE &&
source != MPI_PROC_NULL &&
lam_comm_peer_invalid(comm, source)) {
rc = MPI_ERR_RANK;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Recv_init");
}
return mca_pml.pml_irecv_init(buf,count,type,source,tag,comm,request);
rc = mca_pml.pml_irecv_init(buf,count,type,source,tag,comm,request);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Recv_init");
}

Просмотреть файл

@ -16,20 +16,19 @@
int MPI_Rsend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm)
{
int rc;
if (dest == MPI_PROC_NULL) {
return MPI_SUCCESS;
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
@ -37,10 +36,10 @@ int MPI_Rsend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Co
} else if (lam_comm_peer_invalid(comm, dest)) {
rc = MPI_ERR_RANK;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Rsend");
}
return mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_READY, comm);
rc = mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_READY, comm);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Rsend");
}

Просмотреть файл

@ -18,20 +18,19 @@ int MPI_Rsend_init(void *buf, int count, MPI_Datatype type,
int dest, int tag, MPI_Comm comm,
MPI_Request *request)
{
int rc;
if (dest == MPI_PROC_NULL) {
return MPI_SUCCESS;
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
@ -41,11 +40,10 @@ int MPI_Rsend_init(void *buf, int count, MPI_Datatype type,
} else if (request == NULL) {
rc = MPI_ERR_REQUEST;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Rsend_init");
}
return mca_pml.pml_isend_init(buf, count, type, dest, tag, MCA_PML_BASE_SEND_READY, comm, request);;
rc = mca_pml.pml_isend_init(buf,count,type,dest,tag,MCA_PML_BASE_SEND_READY,comm,request);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Rsend_init");
}

Просмотреть файл

@ -17,20 +17,19 @@
int MPI_Send(void *buf, int count, MPI_Datatype type, int dest,
int tag, MPI_Comm comm)
{
int rc;
if (dest == MPI_PROC_NULL) {
return MPI_SUCCESS;
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
@ -38,11 +37,10 @@ int MPI_Send(void *buf, int count, MPI_Datatype type, int dest,
} else if (lam_comm_peer_invalid(comm, dest)) {
rc = MPI_ERR_RANK;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Send");
}
return mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm);
rc = mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Send");
}

Просмотреть файл

@ -18,20 +18,19 @@ int MPI_Send_init(void *buf, int count, MPI_Datatype type,
int dest, int tag, MPI_Comm comm,
MPI_Request *request)
{
int rc;
if (dest == MPI_PROC_NULL) {
return MPI_SUCCESS;
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
@ -41,11 +40,10 @@ int MPI_Send_init(void *buf, int count, MPI_Datatype type,
} else if (request == NULL) {
rc = MPI_ERR_REQUEST;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Send_init");
}
return mca_pml.pml_isend_init(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm, request);;
rc = mca_pml.pml_isend_init(buf,count,type,dest,tag,MCA_PML_BASE_SEND_STANDARD,comm,request);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Send_init");
}

Просмотреть файл

@ -5,7 +5,10 @@
#include <stdio.h>
#include "mpi.h"
#include "runtime/runtime.h"
#include "mpi/c/bindings.h"
#include "mca/pml/pml.h"
#if LAM_HAVE_WEAK_SYMBOLS && LAM_PROFILING_DEFINES
#pragma weak MPI_Sendrecv = PMPI_Sendrecv
@ -14,6 +17,60 @@
int MPI_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype recvtype,
int dest, int sendtag, void *recvbuf, int recvcount,
MPI_Datatype sendtype, int source, int recvtag,
MPI_Comm comm, MPI_Status *status) {
return MPI_SUCCESS;
MPI_Comm comm, MPI_Status *status)
{
lam_request_t* req;
int rc;
if ( MPI_PARAM_CHECK ) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (lam_comm_invalid(comm)) {
rc = MPI_ERR_COMM;
} else if (sendcount < 0) {
rc = MPI_ERR_COUNT;
} else if (sendtype == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
} else if (lam_comm_peer_invalid(comm, dest)) {
rc = MPI_ERR_RANK;
} else if (sendtag < 0 || sendtag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (recvcount < 0) {
rc = MPI_ERR_COUNT;
} else if (recvtype == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
} else if (source != MPI_PROC_NULL && source != MPI_ANY_SOURCE && lam_comm_peer_invalid(comm, source)) {
rc = MPI_ERR_RANK;
} else if (recvtag < MPI_ANY_TAG || recvtag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Sendrecv");
}
if (source != MPI_PROC_NULL) { /* post recv */
rc = mca_pml.pml_irecv(recvbuf, recvcount, recvtype,
source, recvtag, comm, &req);
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Sendrecv");
}
if (dest != MPI_PROC_NULL) { /* send */
rc = mca_pml.pml_send(sendbuf, sendcount, sendtype, dest,
sendtag, MCA_PML_BASE_SEND_STANDARD, comm);
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Sendrecv");
}
if (source != MPI_PROC_NULL) { /* wait for recv */
rc = mca_pml.pml_wait(1, &req, NULL, status);
} else {
status->MPI_ERROR = MPI_SUCCESS;
status->MPI_SOURCE = MPI_PROC_NULL;
status->MPI_TAG = MPI_ANY_TAG;
status->_count = 0;
rc = MPI_SUCCESS;
}
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Sendrecv");
}

Просмотреть файл

@ -13,6 +13,7 @@
int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype,
int dest, int sendtag, int source, int recvtag,
MPI_Comm comm, MPI_Status *status) {
MPI_Comm comm, MPI_Status *status)
{
return MPI_SUCCESS;
}

Просмотреть файл

@ -16,20 +16,19 @@
int MPI_Ssend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm)
{
int rc;
if (dest == MPI_PROC_NULL) {
return MPI_SUCCESS;
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
@ -37,11 +36,10 @@ int MPI_Ssend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Co
} else if (lam_comm_peer_invalid(comm, dest)) {
rc = MPI_ERR_RANK;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Ssend");
}
return mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_SYNCHRONOUS, comm);
rc = mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_SYNCHRONOUS, comm);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Ssend");
}

Просмотреть файл

@ -18,20 +18,19 @@ int MPI_Ssend_init(void *buf, int count, MPI_Datatype type,
int dest, int tag, MPI_Comm comm,
MPI_Request *request)
{
int rc;
if (dest == MPI_PROC_NULL) {
return MPI_SUCCESS;
}
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (count < 0) {
rc = MPI_ERR_COUNT;
#if 0
} else if (type == MPI_DATATYPE_NULL) {
rc = MPI_ERR_TYPE;
#endif
} else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
rc = MPI_ERR_TAG;
} else if (lam_comm_invalid(comm)) {
@ -41,10 +40,10 @@ int MPI_Ssend_init(void *buf, int count, MPI_Datatype type,
} else if (request == NULL) {
rc = MPI_ERR_REQUEST;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Ssend_init");
}
return mca_pml.pml_isend_init(buf, count, type, dest, tag, MCA_PML_BASE_SEND_SYNCHRONOUS, comm, request);;
rc = mca_pml.pml_isend_init(buf,count,type,dest,tag,MCA_PML_BASE_SEND_SYNCHRONOUS,comm,request);
LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Ssend_init");
}

Просмотреть файл

@ -27,6 +27,6 @@ int MPI_Start(MPI_Request *request)
return rc;
}
}
return mca_pml.pml_start(request);
return mca_pml.pml_start(1, request);
}

Просмотреть файл

@ -5,12 +5,29 @@
#include <stdio.h>
#include "mpi.h"
#include "runtime/runtime.h"
#include "mpi/c/bindings.h"
#include "mca/pml/pml.h"
#if LAM_HAVE_WEAK_SYMBOLS && LAM_PROFILING_DEFINES
#pragma weak MPI_Startall = PMPI_Startall
#endif
int MPI_Startall(int count, MPI_Request *array_of_requests) {
return MPI_SUCCESS;
int MPI_Startall(int count, MPI_Request *requests)
{
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_ERR_INTERN;
} else if (requests == NULL) {
rc = MPI_ERR_REQUEST;
}
if (rc != MPI_SUCCESS) {
return rc;
}
}
return mca_pml.pml_start(count, requests);
}

Просмотреть файл

@ -16,18 +16,17 @@
int MPI_Test(MPI_Request *request, int *completed, MPI_Status *status)
{
int rc, index;
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if ( lam_mpi_finalized ) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (request == NULL) {
rc = MPI_ERR_REQUEST;
} else if (completed == NULL) {
rc = MPI_ERR_ARG;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Test");
}
if(*request == NULL) {
@ -38,6 +37,9 @@ int MPI_Test(MPI_Request *request, int *completed, MPI_Status *status)
status->_count = 0;
return MPI_SUCCESS;
}
return mca_pml.pml_test(request,completed,status);
rc = mca_pml.pml_test(1, request, index, completed, status);
if(completed < 0)
completed = 0;
LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Test");
}

Просмотреть файл

@ -12,7 +12,7 @@
#endif
int MPI_Testall(int count, MPI_Request array_of_requests[], int *flag,
MPI_Status array_of_statuses[]) {
MPI_Status array_of_statuses[])
{
return MPI_SUCCESS;
}

Просмотреть файл

@ -5,13 +5,30 @@
#include <stdio.h>
#include "mpi.h"
#include "runtime/runtime.h"
#include "mpi/c/bindings.h"
#include "mca/pml/pml.h"
#if LAM_HAVE_WEAK_SYMBOLS && LAM_PROFILING_DEFINES
#pragma weak MPI_Testany = PMPI_Testany
#endif
int MPI_Testany(int count, MPI_Request array_of_requests[], int *index,
MPI_Status *status) {
return MPI_SUCCESS;
int MPI_Testany(int count, MPI_Request requests[], int *index, int *completed, MPI_Status *status)
{
int rc;
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (NULL == requests) {
rc = MPI_ERR_REQUEST;
} else if (NULL == index) {
rc = MPI_ERR_ARG;
}
LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Testany");
}
rc = mca_pml.pml_test(count, requests, index, completed, status);
LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Testany");
}

Просмотреть файл

@ -5,14 +5,40 @@
#include <stdio.h>
#include "mpi.h"
#include "runtime/runtime.h"
#include "mpi/c/bindings.h"
#include "mca/pml/pml.h"
#if LAM_HAVE_WEAK_SYMBOLS && LAM_PROFILING_DEFINES
#pragma weak MPI_Testsome = PMPI_Testsome
#endif
int MPI_Testsome(int incount, MPI_Request array_of_requests[],
int *outcount, int array_of_indices,
MPI_Status array_of_statuses) {
int MPI_Testsome(int incount, MPI_Request requests[],
int *outcount, int indices[],
MPI_Status statuses[])
{
int rc, index, completed;
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (NULL == requests) {
rc = MPI_ERR_REQUEST;
} else if (NULL == indices) {
rc = MPI_ERR_ARG;
}
LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Testsome");
}
/* optimize this in the future */
rc = mca_pml.pml_test(incount, requests, &index, &completed, statuses);
LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Testsome");
if(completed) {
*outcount = 1;
indices[0] = index;
} else {
*outcount = 0;
}
return MPI_SUCCESS;
}

Просмотреть файл

@ -11,6 +11,7 @@
#pragma weak MPI_Topo_test = PMPI_Topo_test
#endif
int MPI_Topo_test(MPI_Comm comm, int *status) {
int MPI_Topo_test(MPI_Comm comm, int *status)
{
return MPI_SUCCESS;
}

Просмотреть файл

@ -15,26 +15,27 @@
int MPI_Wait(MPI_Request *request, MPI_Status *status)
{
int index;
int index, rc;
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (request == NULL) {
rc = MPI_ERR_REQUEST;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Wait");
}
if (*request == NULL) {
status->MPI_SOURCE = MPI_PROC_NULL;
status->MPI_TAG = MPI_ANY_TAG;
status->MPI_ERROR = MPI_SUCCESS;
status->_count = 0;
if (NULL == *request) {
if (NULL != status) {
status->MPI_SOURCE = MPI_PROC_NULL;
status->MPI_TAG = MPI_ANY_TAG;
status->MPI_ERROR = MPI_SUCCESS;
status->_count = 0;
}
return MPI_SUCCESS;
}
return mca_pml.pml_wait(1, request, &index, status);
rc = mca_pml.pml_wait(1, request, &index, status);
LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Wait");
}

Просмотреть файл

@ -16,17 +16,17 @@
int MPI_Waitall(int count, MPI_Request *requests, MPI_Status *statuses)
{
int rc;
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (requests == NULL) {
rc = MPI_ERR_REQUEST;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitall");
}
return mca_pml.pml_wait_all(count, requests, statuses);
rc = mca_pml.pml_wait_all(count, requests, statuses);
LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitall");
}

Просмотреть файл

@ -15,17 +15,17 @@
int MPI_Waitany(int count, MPI_Request *requests, int *index, MPI_Status *status)
{
int rc;
if ( MPI_PARAM_CHECK ) {
int rc = MPI_SUCCESS;
if (lam_mpi_finalized) {
rc = MPI_SUCCESS;
if ( LAM_MPI_INVALID_STATE ) {
rc = MPI_ERR_INTERN;
} else if (requests == NULL) {
rc = MPI_ERR_REQUEST;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitany");
}
return mca_pml.pml_wait(count, requests, index, status);
rc = mca_pml.pml_wait(count, requests, index, status);
LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitany");
}

Просмотреть файл

@ -29,14 +29,12 @@ int MPI_Waitsome(int incount, MPI_Request *requests,
} else if (requests == NULL) {
rc = MPI_ERR_REQUEST;
}
if (rc != MPI_SUCCESS) {
return rc;
}
LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitsome");
}
/* optimize this in the future */
if((rc = mca_pml.pml_wait(incount, requests, &index, statuses)) != LAM_SUCCESS)
return rc;
rc = mca_pml.pml_wait(incount, requests, &index, statuses);
LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitsome");
*outcount = 1;
indices[0] = index;
return MPI_SUCCESS;

Просмотреть файл

@ -32,6 +32,10 @@ void lam_proc_construct(lam_proc_t* proc)
proc->proc_vpid = 0;
proc->proc_pml = NULL;
proc->proc_modex = NULL;
proc->proc_arch = 0;
/* FIX - need to determine remote process architecture */
proc->proc_convertor = lam_convertor_create(0, 0);
THREAD_LOCK(&lam_proc_lock);
lam_list_append(&lam_proc_list, (lam_list_item_t*)proc);

Просмотреть файл

@ -7,6 +7,7 @@
#include "include/types.h"
#include "lfc/lam_list.h"
#include "datatype/datatype.h"
extern lam_class_t lam_proc_t_class;
@ -18,6 +19,8 @@ struct lam_proc_t {
uint32_t proc_vpid; /* process identifier w/in the job */
struct mca_pml_proc_t* proc_pml; /* PML specific proc data */
struct mca_base_modex_t* proc_modex; /* MCA module exchange data */
int proc_arch;
lam_convertor_t* proc_convertor;
/* JMS: need to have the following information:

Просмотреть файл

@ -142,5 +142,6 @@ int lam_mpi_init(int argc, char **argv, int requested, int *provided)
/* All done */
lam_mpi_initialized = true;
lam_mpi_finalized = false;
return MPI_SUCCESS;
}

Просмотреть файл

@ -12,8 +12,11 @@
* Global variables and symbols for the MPI layer
*/
#define LAM_MPI_INVALID_STATE (!lam_mpi_initialized || lam_mpi_finalized)
extern bool lam_mpi_initialized;
extern bool lam_mpi_finalized;
extern bool lam_mpi_invalid_state;
extern bool lam_mpi_thread_multiple;
extern int lam_mpi_thread_requested;