1
1

First crack at adding atomic operation support

Этот коммит содержится в:
Nathan Hjelm 2014-11-02 13:56:14 -07:00
родитель 249e5e009f
Коммит 2a70238f4d
2 изменённых файлов: 185 добавлений и 35 удалений

Просмотреть файл

@ -46,13 +46,15 @@ int mca_btl_base_param_register(mca_base_component_t *version,
MCA_BASE_VAR_SCOPE_READONLY,
&module->btl_exclusivity);
asprintf(&msg, "BTL bit flags (general flags: SEND=%d, PUT=%d, GET=%d, SEND_INPLACE=%d, RDMA_MATCHED=%d, HETEROGENEOUS_RDMA=%d; flags only used by the \"dr\" PML (ignored by others): ACK=%d, CHECKSUM=%d, RDMA_COMPLETION=%d; flags only used by the \"bfo\" PML (ignored by others): FAILOVER_SUPPORT=%d)",
asprintf(&msg, "BTL bit flags (general flags: SEND=%d, PUT=%d, GET=%d, SEND_INPLACE=%d, HETEROGENEOUS_RDMA=%d, "
"ATOMIC_OPS=%d; flags only used by the \"dr\" PML (ignored by others): ACK=%d, CHECKSUM=%d, "
"RDMA_COMPLETION=%d; flags only used by the \"bfo\" PML (ignored by others): FAILOVER_SUPPORT=%d)",
MCA_BTL_FLAGS_SEND,
MCA_BTL_FLAGS_PUT,
MCA_BTL_FLAGS_GET,
MCA_BTL_FLAGS_SEND_INPLACE,
MCA_BTL_FLAGS_RDMA_MATCHED,
MCA_BTL_FLAGS_HETEROGENEOUS_RDMA,
MCA_BTL_FLAGS_ATOMIC_OPS,
MCA_BTL_FLAGS_NEED_ACK,
MCA_BTL_FLAGS_NEED_CSUM,
MCA_BTL_FLAGS_RDMA_COMPLETION,
@ -64,6 +66,14 @@ int mca_btl_base_param_register(mca_base_component_t *version,
&module->btl_flags);
free(msg);
asprintf (&msg, "BTL atomic bit flags (general flags: ADD=%d, AND=%d, OR=%d, XOR=%d",
MCA_BTL_ATOMIC_SUPPORTS_ADD, MCA_BTL_ATOMIC_SUPPORTS_AND, MCA_BTL_ATOMIC_SUPPORTS_OR,
MCA_BTL_ATOMIC_SUPPORTS_XOR);
(void) mca_base_component_var_register(version, "atomic_flags", msg, MCA_BASE_VAR_TYPE_UNSIGNED_INT,
NULL, 0, MCA_BASE_VAR_FLAG_DEFAULT_ONLY, OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT, &module->btl_atomic_flags);
free(msg);
(void) mca_base_component_var_register(version, "rndv_eager_limit", "Size (in bytes, including header) of \"phase 1\" fragment sent for all large messages (must be >= 0 and <= eager_limit)",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
OPAL_INFO_LVL_4,
@ -178,6 +188,10 @@ int mca_btl_base_param_verify(mca_btl_base_module_t *module)
module->btl_flags &= ~MCA_BTL_FLAGS_GET;
}
if (0 == module->btl_atomic_flags) {
module->btl_flags &= ~MCA_BTL_FLAGS_ATOMIC_OPS;
}
if (0 == module->btl_get_limit) {
module->btl_get_limit = SIZE_MAX;
}

Просмотреть файл

@ -226,6 +226,10 @@ typedef uint8_t mca_btl_base_tag_t;
*/
#define MCA_BTL_FLAGS_SIGNALED 0x4000
/** The BTL supports network atomic operations */
#define MCA_BTL_FLAGS_ATOMIC_OPS 0x8000
/* Default exclusivity levels */
#define MCA_BTL_EXCLUSIVITY_HIGH (64*1024) /* internal loopback */
#define MCA_BTL_EXCLUSIVITY_DEFAULT 1024 /* GM/IB/etc. */
@ -264,6 +268,32 @@ enum {
#endif
};
/** supported atomic operations */
enum {
/** The btl supports atomic add */
MCA_BTL_ATOMIC_SUPPORTS_ADD = 0x00000001,
/** The btl supports atomic bitwise and */
MCA_BTL_ATOMIC_SUPPORTS_AND = 0x00000200,
/** The btl supports atomic bitwise or */
MCA_BTL_ATOMIC_SUPPORTS_OR = 0x00000400,
/** The btl supports atomic bitwise exclusive or */
MCA_BTL_ATOMIC_SUPPORTS_XOR = 0x00000800,
/** The btl supports atomic compare-and-swap */
MCA_BTL_ATOMIC_SUPPORTS_CSWAP = 0x80000000,
};
enum mca_btl_base_atomic_op_t {
/** Atomic add: (*remote_address) = (*remote_address) + operand */
MCA_BTL_ATOMIC_ADD = 0x0001,
/** Atomic and: (*remote_address) = (*remote_address) & operand */
MCA_BTL_ATOMIC_AND = 0x0011,
/** Atomic or: (*remote_address) = (*remote_address) | operand */
MCA_BTL_ATOMIC_OR = 0x0012,
/** Atomic xor: (*remote_address) = (*remote_address) ^ operand */
MCA_BTL_ATOMIC_XOR = 0x0014,
};
typedef enum mca_btl_base_atomic_op_t mca_btl_base_atomic_op_t;
/**
* Asynchronous callback function on completion of an operation.
* Completion Semantics: The descriptor can be reused or returned to the
@ -840,12 +870,6 @@ typedef int (*mca_btl_base_module_sendi_fn_t)(
* until all outstanding operations on that handle
* have been completed.
*
* BTLs that do not have the MCA_BTL_FLAGS_RDMA_MATCHED flag set
* allow multiple concurrent put operations on the same descriptor.
* BTLs that do have the MCA_BTL_FLAGS_RDMA_MATCHED flag set require
* a corresponding prepare_src/dst call for each put operation and
* therefore prohibit multiple concurrent put operations.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param local_address (IN) Local address to put from (registered)
@ -874,27 +898,6 @@ typedef int (*mca_btl_base_module_put_fn_t) (struct mca_btl_base_module_t *btl,
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
/**
* Initiate an asynchronous get.
*
* Completion Semantics: the descriptor has been queued for a get operation
* the BTL now controls the descriptor until local
* completion callback is made on the descriptor
*
* BTLs that do not have the MCA_BTL_FLAGS_RDMA_MATCHED flag set
* allow multiple concurrent get operations on the same descriptor.
* BTLs that do have the MCA_BTL_FLAGS_RDMA_MATCHED flag set require
* a corresponding prepare_src/dst call for each get operation and
* therefore prohibit multiple concurrent get operations.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*
* @retval OPAL_SUCCESS The descriptor was successfully queued for a get
* @retval OPAL_ERROR The descriptor was NOT successfully queued for a get
*
*/
/**
* Initiate an asynchronous get.
* Completion Semantics: if this function returns a 1 then the operation
@ -904,12 +907,6 @@ typedef int (*mca_btl_base_module_put_fn_t) (struct mca_btl_base_module_t *btl,
* until all outstanding operations on that handle
* have been completed.
*
* BTLs that do not have the MCA_BTL_FLAGS_RDMA_MATCHED flag set
* allow multiple concurrent put operations on the same descriptor.
* BTLs that do have the MCA_BTL_FLAGS_RDMA_MATCHED flag set require
* a corresponding prepare_src/dst call for each put operation and
* therefore prohibit multiple concurrent put operations.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param local_address (IN) Local address to put from (registered)
@ -938,6 +935,139 @@ typedef int (*mca_btl_base_module_get_fn_t) (struct mca_btl_base_module_t *btl,
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
/**
* Initiate an asynchronous atomic operation.
* Completion Semantics: if this function returns a 1 then the operation
* is complete. a return of OPAL_SUCCESS indicates
* the atomic operation has been queued with the
* network.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param remote_address (IN) Remote address to put to (registered remotely)
* @param remote_handle (IN) Remote registration handle for region containing
* (remote_address, remote_address + 8)
* @param op (IN) Operation to perform
* @param operand (IN) Operand for the operation
* @param flags (IN) Flags for this put operation
* @param order (IN) Ordering
* @param cbfunc (IN) Function to call on completion (if queued)
* @param cbcontext (IN) Context for the callback
* @param cbdata (IN) Data for callback
*
* @retval OPAL_SUCCESS The operation was successfully queued
* @retval 1 The operation is complete
* @retval OPAL_ERROR The operation was NOT successfully queued
* @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the atomic
* operation. Try again later
* @retval OPAL_ERR_NOT_AVAILABLE Atomic operation can not be performed due to
* alignment restrictions or the operation {op} is not supported
* by the hardware.
*
* After the operation is complete the remote address specified by {remote_address} and
* {remote_handle} will be updated with (*remote_address) = (*remote_address) op operand.
* The btl will guarantee consistency of atomic operations performed via the btl. Note,
* however, that not all btls will provide consistency between btl atomic operations and
* cpu atomics.
*/
typedef int (*mca_btl_base_module_atomic_op64_fn_t) (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address,
struct mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata);
/**
* Initiate an asynchronous fetching atomic operation.
* Completion Semantics: if this function returns a 1 then the operation
* is complete. a return of OPAL_SUCCESS indicates
* the atomic operation has been queued with the
* network.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param local_address (OUT) Local address to store the result in
* @param remote_address (IN) Remote address perfom operation on to (registered remotely)
* @param local_handle (IN) Local registration handle for region containing
* (local_address, local_address + 8)
* @param remote_handle (IN) Remote registration handle for region containing
* (remote_address, remote_address + 8)
* @param op (IN) Operation to perform
* @param operand (IN) Operand for the operation
* @param flags (IN) Flags for this put operation
* @param order (IN) Ordering
* @param cbfunc (IN) Function to call on completion (if queued)
* @param cbcontext (IN) Context for the callback
* @param cbdata (IN) Data for callback
*
* @retval OPAL_SUCCESS The operation was successfully queued
* @retval 1 The operation is complete
* @retval OPAL_ERROR The operation was NOT successfully queued
* @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the atomic
* operation. Try again later
* @retval OPAL_ERR_NOT_AVAILABLE Atomic operation can not be performed due to
* alignment restrictions or the operation {op} is not supported
* by the hardware.
*
* After the operation is complete the remote address specified by {remote_address} and
* {remote_handle} will be updated with (*remote_address) = (*remote_address) op operand.
* {local_address} will be updated with the previous value stored in {remote_address}.
* The btl will guarantee consistency of atomic operations performed via the btl. Note,
* however, that not all btls will provide consistency between btl atomic operations and
* cpu atomics.
*/
typedef int (*mca_btl_base_module_atomic_fop64_fn_t) (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address,
struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata);
/**
* Initiate an asynchronous compare and swap operation.
* Completion Semantics: if this function returns a 1 then the operation
* is complete. a return of OPAL_SUCCESS indicates
* the atomic operation has been queued with the
* network.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param local_address (OUT) Local address to store the result in
* @param remote_address (IN) Remote address perfom operation on to (registered remotely)
* @param local_handle (IN) Local registration handle for region containing
* (local_address, local_address + 8)
* @param remote_handle (IN) Remote registration handle for region containing
* (remote_address, remote_address + 8)
* @param compare (IN) Operand for the operation
* @param value (IN) Value to store on success
* @param flags (IN) Flags for this put operation
* @param order (IN) Ordering
* @param cbfunc (IN) Function to call on completion (if queued)
* @param cbcontext (IN) Context for the callback
* @param cbdata (IN) Data for callback
*
* @retval OPAL_SUCCESS The operation was successfully queued
* @retval 1 The operation is complete
* @retval OPAL_ERROR The operation was NOT successfully queued
* @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the atomic
* operation. Try again later
* @retval OPAL_ERR_NOT_AVAILABLE Atomic operation can not be performed due to
* alignment restrictions or the operation {op} is not supported
* by the hardware.
*
* After the operation is complete the remote address specified by {remote_address} and
* {remote_handle} will be updated with {value} if *remote_address == compare.
* {local_address} will be updated with the previous value stored in {remote_address}.
* The btl will guarantee consistency of atomic operations performed via the btl. Note,
* however, that not all btls will provide consistency between btl atomic operations and
* cpu atomics.
*/
typedef int (*mca_btl_base_module_atomic_cswap_fn_t) (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address,
struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle, uint64_t compare,
uint64_t value, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata);
/**
* Diagnostic dump of btl state.
*
@ -976,6 +1106,7 @@ struct mca_btl_base_module_t {
uint32_t btl_latency; /**< relative ranking of latency used to prioritize btls */
uint32_t btl_bandwidth; /**< bandwidth (Mbytes/sec) supported by each endpoint */
uint32_t btl_flags; /**< flags (put/get...) */
uint32_t btl_atomic_flags; /**< atomic operations supported (add, and, xor, etc) */
size_t btl_registration_handle_size; /**< size of the BTLs registration handles */
/* One-sided limitations (0 for no alignment, SIZE_MAX for no limit ) */
@ -999,6 +1130,11 @@ struct mca_btl_base_module_t {
mca_btl_base_module_get_fn_t btl_get;
mca_btl_base_module_dump_fn_t btl_dump;
/* atomic operations */
mca_btl_base_module_atomic_op64_fn_t btl_atomic_op;
mca_btl_base_module_atomic_fop64_fn_t btl_atomic_fop;
mca_btl_base_module_atomic_cswap_fn_t btl_atomic_cswap;
/* new memory registration functions */
mca_btl_base_module_register_mem_fn_t btl_register_mem; /**< memory registration function (NULL if not needed) */
mca_btl_base_module_deregister_mem_fn_t btl_deregister_mem; /**< memory deregistration function (NULL if not needed) */