diff --git a/opal/mca/btl/base/btl_base_mca.c b/opal/mca/btl/base/btl_base_mca.c index 3a53b67727..5c14a32aaf 100644 --- a/opal/mca/btl/base/btl_base_mca.c +++ b/opal/mca/btl/base/btl_base_mca.c @@ -46,13 +46,15 @@ int mca_btl_base_param_register(mca_base_component_t *version, MCA_BASE_VAR_SCOPE_READONLY, &module->btl_exclusivity); - asprintf(&msg, "BTL bit flags (general flags: SEND=%d, PUT=%d, GET=%d, SEND_INPLACE=%d, RDMA_MATCHED=%d, HETEROGENEOUS_RDMA=%d; flags only used by the \"dr\" PML (ignored by others): ACK=%d, CHECKSUM=%d, RDMA_COMPLETION=%d; flags only used by the \"bfo\" PML (ignored by others): FAILOVER_SUPPORT=%d)", + asprintf(&msg, "BTL bit flags (general flags: SEND=%d, PUT=%d, GET=%d, SEND_INPLACE=%d, HETEROGENEOUS_RDMA=%d, " + "ATOMIC_OPS=%d; flags only used by the \"dr\" PML (ignored by others): ACK=%d, CHECKSUM=%d, " + "RDMA_COMPLETION=%d; flags only used by the \"bfo\" PML (ignored by others): FAILOVER_SUPPORT=%d)", MCA_BTL_FLAGS_SEND, MCA_BTL_FLAGS_PUT, MCA_BTL_FLAGS_GET, MCA_BTL_FLAGS_SEND_INPLACE, - MCA_BTL_FLAGS_RDMA_MATCHED, MCA_BTL_FLAGS_HETEROGENEOUS_RDMA, + MCA_BTL_FLAGS_ATOMIC_OPS, MCA_BTL_FLAGS_NEED_ACK, MCA_BTL_FLAGS_NEED_CSUM, MCA_BTL_FLAGS_RDMA_COMPLETION, @@ -64,6 +66,14 @@ int mca_btl_base_param_register(mca_base_component_t *version, &module->btl_flags); free(msg); + asprintf (&msg, "BTL atomic bit flags (general flags: ADD=%d, AND=%d, OR=%d, XOR=%d", + MCA_BTL_ATOMIC_SUPPORTS_ADD, MCA_BTL_ATOMIC_SUPPORTS_AND, MCA_BTL_ATOMIC_SUPPORTS_OR, + MCA_BTL_ATOMIC_SUPPORTS_XOR); + (void) mca_base_component_var_register(version, "atomic_flags", msg, MCA_BASE_VAR_TYPE_UNSIGNED_INT, + NULL, 0, MCA_BASE_VAR_FLAG_DEFAULT_ONLY, OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_CONSTANT, &module->btl_atomic_flags); + free(msg); + (void) mca_base_component_var_register(version, "rndv_eager_limit", "Size (in bytes, including header) of \"phase 1\" fragment sent for all large messages (must be >= 0 and <= eager_limit)", MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, OPAL_INFO_LVL_4, @@ -183,6 +193,10 @@ int mca_btl_base_param_verify(mca_btl_base_module_t *module) module->btl_flags &= ~MCA_BTL_FLAGS_GET; } + if (0 == module->btl_atomic_flags) { + module->btl_flags &= ~MCA_BTL_FLAGS_ATOMIC_OPS; + } + if (0 == module->btl_get_limit) { module->btl_get_limit = SIZE_MAX; } diff --git a/opal/mca/btl/btl.h b/opal/mca/btl/btl.h index f932b08958..0607b46149 100644 --- a/opal/mca/btl/btl.h +++ b/opal/mca/btl/btl.h @@ -225,6 +225,12 @@ typedef uint8_t mca_btl_base_tag_t; */ #define MCA_BTL_FLAGS_SIGNALED 0x4000 + +/** The BTL supports network atomic operations */ +#define MCA_BTL_FLAGS_ATOMIC_OPS 0x08000 +/** The BTL supports fetching network atomic operations */ +#define MCA_BTL_FLAGS_ATOMIC_FOPS 0x10000 + /* Default exclusivity levels */ #define MCA_BTL_EXCLUSIVITY_HIGH (64*1024) /* internal loopback */ #define MCA_BTL_EXCLUSIVITY_DEFAULT 1024 /* GM/IB/etc. */ @@ -263,6 +269,34 @@ enum { #endif }; +/** supported atomic operations */ +enum { + /** The btl supports atomic add */ + MCA_BTL_ATOMIC_SUPPORTS_ADD = 0x00000001, + /** The btl supports atomic bitwise and */ + MCA_BTL_ATOMIC_SUPPORTS_AND = 0x00000200, + /** The btl supports atomic bitwise or */ + MCA_BTL_ATOMIC_SUPPORTS_OR = 0x00000400, + /** The btl supports atomic bitwise exclusive or */ + MCA_BTL_ATOMIC_SUPPORTS_XOR = 0x00000800, + /** The btl supports atomic compare-and-swap */ + MCA_BTL_ATOMIC_SUPPORTS_CSWAP = 0x10000000, + /** The btl guarantees global atomicity (can mix btl atomics with cpu atomics) */ + MCA_BTL_ATOMIC_SUPPORTS_GLOB = 0x20000000, +}; + +enum mca_btl_base_atomic_op_t { + /** Atomic add: (*remote_address) = (*remote_address) + operand */ + MCA_BTL_ATOMIC_ADD = 0x0001, + /** Atomic and: (*remote_address) = (*remote_address) & operand */ + MCA_BTL_ATOMIC_AND = 0x0011, + /** Atomic or: (*remote_address) = (*remote_address) | operand */ + MCA_BTL_ATOMIC_OR = 0x0012, + /** Atomic xor: (*remote_address) = (*remote_address) ^ operand */ + MCA_BTL_ATOMIC_XOR = 0x0014, +}; +typedef enum mca_btl_base_atomic_op_t mca_btl_base_atomic_op_t; + /** * Asynchronous callback function on completion of an operation. * Completion Semantics: The descriptor can be reused or returned to the @@ -825,12 +859,6 @@ typedef int (*mca_btl_base_module_sendi_fn_t)( * until all outstanding operations on that handle * have been completed. * - * BTLs that do not have the MCA_BTL_FLAGS_RDMA_MATCHED flag set - * allow multiple concurrent put operations on the same descriptor. - * BTLs that do have the MCA_BTL_FLAGS_RDMA_MATCHED flag set require - * a corresponding prepare_src/dst call for each put operation and - * therefore prohibit multiple concurrent put operations. - * * @param btl (IN) BTL module * @param endpoint (IN) BTL addressing information * @param local_address (IN) Local address to put from (registered) @@ -859,27 +887,6 @@ typedef int (*mca_btl_base_module_put_fn_t) (struct mca_btl_base_module_t *btl, struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); -/** - * Initiate an asynchronous get. - * - * Completion Semantics: the descriptor has been queued for a get operation - * the BTL now controls the descriptor until local - * completion callback is made on the descriptor - * - * BTLs that do not have the MCA_BTL_FLAGS_RDMA_MATCHED flag set - * allow multiple concurrent get operations on the same descriptor. - * BTLs that do have the MCA_BTL_FLAGS_RDMA_MATCHED flag set require - * a corresponding prepare_src/dst call for each get operation and - * therefore prohibit multiple concurrent get operations. - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param descriptor (IN) Description of the data to be transferred - * - * @retval OPAL_SUCCESS The descriptor was successfully queued for a get - * @retval OPAL_ERROR The descriptor was NOT successfully queued for a get - * - */ /** * Initiate an asynchronous get. * Completion Semantics: if this function returns a 1 then the operation @@ -889,12 +896,6 @@ typedef int (*mca_btl_base_module_put_fn_t) (struct mca_btl_base_module_t *btl, * until all outstanding operations on that handle * have been completed. * - * BTLs that do not have the MCA_BTL_FLAGS_RDMA_MATCHED flag set - * allow multiple concurrent put operations on the same descriptor. - * BTLs that do have the MCA_BTL_FLAGS_RDMA_MATCHED flag set require - * a corresponding prepare_src/dst call for each put operation and - * therefore prohibit multiple concurrent put operations. - * * @param btl (IN) BTL module * @param endpoint (IN) BTL addressing information * @param local_address (IN) Local address to put from (registered) @@ -923,6 +924,139 @@ typedef int (*mca_btl_base_module_get_fn_t) (struct mca_btl_base_module_t *btl, struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); +/** + * Initiate an asynchronous atomic operation. + * Completion Semantics: if this function returns a 1 then the operation + * is complete. a return of OPAL_SUCCESS indicates + * the atomic operation has been queued with the + * network. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param remote_address (IN) Remote address to put to (registered remotely) + * @param remote_handle (IN) Remote registration handle for region containing + * (remote_address, remote_address + 8) + * @param op (IN) Operation to perform + * @param operand (IN) Operand for the operation + * @param flags (IN) Flags for this put operation + * @param order (IN) Ordering + * @param cbfunc (IN) Function to call on completion (if queued) + * @param cbcontext (IN) Context for the callback + * @param cbdata (IN) Data for callback + * + * @retval OPAL_SUCCESS The operation was successfully queued + * @retval 1 The operation is complete + * @retval OPAL_ERROR The operation was NOT successfully queued + * @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the atomic + * operation. Try again later + * @retval OPAL_ERR_NOT_AVAILABLE Atomic operation can not be performed due to + * alignment restrictions or the operation {op} is not supported + * by the hardware. + * + * After the operation is complete the remote address specified by {remote_address} and + * {remote_handle} will be updated with (*remote_address) = (*remote_address) op operand. + * The btl will guarantee consistency of atomic operations performed via the btl. Note, + * however, that not all btls will provide consistency between btl atomic operations and + * cpu or other btl atomics. + */ +typedef int (*mca_btl_base_module_atomic_op64_fn_t) (struct mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, + struct mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op, + uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, + void *cbcontext, void *cbdata); + +/** + * Initiate an asynchronous fetching atomic operation. + * Completion Semantics: if this function returns a 1 then the operation + * is complete. a return of OPAL_SUCCESS indicates + * the atomic operation has been queued with the + * network. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param local_address (OUT) Local address to store the result in + * @param remote_address (IN) Remote address perfom operation on to (registered remotely) + * @param local_handle (IN) Local registration handle for region containing + * (local_address, local_address + 8) + * @param remote_handle (IN) Remote registration handle for region containing + * (remote_address, remote_address + 8) + * @param op (IN) Operation to perform + * @param operand (IN) Operand for the operation + * @param flags (IN) Flags for this put operation + * @param order (IN) Ordering + * @param cbfunc (IN) Function to call on completion (if queued) + * @param cbcontext (IN) Context for the callback + * @param cbdata (IN) Data for callback + * + * @retval OPAL_SUCCESS The operation was successfully queued + * @retval 1 The operation is complete + * @retval OPAL_ERROR The operation was NOT successfully queued + * @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the atomic + * operation. Try again later + * @retval OPAL_ERR_NOT_AVAILABLE Atomic operation can not be performed due to + * alignment restrictions or the operation {op} is not supported + * by the hardware. + * + * After the operation is complete the remote address specified by {remote_address} and + * {remote_handle} will be updated with (*remote_address) = (*remote_address) op operand. + * {local_address} will be updated with the previous value stored in {remote_address}. + * The btl will guarantee consistency of atomic operations performed via the btl. Note, + * however, that not all btls will provide consistency between btl atomic operations and + * cpu or other btl atomics. + */ +typedef int (*mca_btl_base_module_atomic_fop64_fn_t) (struct mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, + struct mca_btl_base_registration_handle_t *local_handle, + struct mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op, + uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, + void *cbcontext, void *cbdata); + +/** + * Initiate an asynchronous compare and swap operation. + * Completion Semantics: if this function returns a 1 then the operation + * is complete. a return of OPAL_SUCCESS indicates + * the atomic operation has been queued with the + * network. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param local_address (OUT) Local address to store the result in + * @param remote_address (IN) Remote address perfom operation on to (registered remotely) + * @param local_handle (IN) Local registration handle for region containing + * (local_address, local_address + 8) + * @param remote_handle (IN) Remote registration handle for region containing + * (remote_address, remote_address + 8) + * @param compare (IN) Operand for the operation + * @param value (IN) Value to store on success + * @param flags (IN) Flags for this put operation + * @param order (IN) Ordering + * @param cbfunc (IN) Function to call on completion (if queued) + * @param cbcontext (IN) Context for the callback + * @param cbdata (IN) Data for callback + * + * @retval OPAL_SUCCESS The operation was successfully queued + * @retval 1 The operation is complete + * @retval OPAL_ERROR The operation was NOT successfully queued + * @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the atomic + * operation. Try again later + * @retval OPAL_ERR_NOT_AVAILABLE Atomic operation can not be performed due to + * alignment restrictions or the operation {op} is not supported + * by the hardware. + * + * After the operation is complete the remote address specified by {remote_address} and + * {remote_handle} will be updated with {value} if *remote_address == compare. + * {local_address} will be updated with the previous value stored in {remote_address}. + * The btl will guarantee consistency of atomic operations performed via the btl. Note, + * however, that not all btls will provide consistency between btl atomic operations and + * cpu atomics. + */ +typedef int (*mca_btl_base_module_atomic_cswap64_fn_t) (struct mca_btl_base_module_t *btl, + struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, + struct mca_btl_base_registration_handle_t *local_handle, + struct mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, + uint64_t value, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, + void *cbcontext, void *cbdata); + /** * Diagnostic dump of btl state. * @@ -961,6 +1095,7 @@ struct mca_btl_base_module_t { uint32_t btl_latency; /**< relative ranking of latency used to prioritize btls */ uint32_t btl_bandwidth; /**< bandwidth (Mbytes/sec) supported by each endpoint */ uint32_t btl_flags; /**< flags (put/get...) */ + uint32_t btl_atomic_flags; /**< atomic operations supported (add, and, xor, etc) */ size_t btl_registration_handle_size; /**< size of the BTLs registration handles */ /* One-sided limitations (0 for no alignment, SIZE_MAX for no limit ) */ @@ -984,6 +1119,11 @@ struct mca_btl_base_module_t { mca_btl_base_module_get_fn_t btl_get; mca_btl_base_module_dump_fn_t btl_dump; + /* atomic operations */ + mca_btl_base_module_atomic_op64_fn_t btl_atomic_op; + mca_btl_base_module_atomic_fop64_fn_t btl_atomic_fop; + mca_btl_base_module_atomic_cswap64_fn_t btl_atomic_cswap; + /* new memory registration functions */ mca_btl_base_module_register_mem_fn_t btl_register_mem; /**< memory registration function (NULL if not needed) */ mca_btl_base_module_deregister_mem_fn_t btl_deregister_mem; /**< memory deregistration function (NULL if not needed) */