Update smcuda to match recent changes in sm BTL.
This commit was SVN r27803.
Этот коммит содержится в:
родитель
34d1f0a585
Коммит
a07a4bb3f7
@ -34,14 +34,22 @@
|
||||
#include <sys/mman.h>
|
||||
#endif /* HAVE_SYS_MMAN_H */
|
||||
|
||||
#ifdef OMPI_BTL_SM_CMA_NEED_SYSCALL_DEFS
|
||||
#include "opal/sys/cma.h"
|
||||
#endif /* OMPI_BTL_SM_CMA_NEED_SYSCALL_DEFS */
|
||||
|
||||
#include "opal/sys/atomic.h"
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/printf.h"
|
||||
#include "opal/mca/hwloc/base/base.h"
|
||||
#include "opal/mca/shmem/base/base.h"
|
||||
#include "opal/mca/shmem/shmem.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#include "ompi/mca/common/cuda/common_cuda.h"
|
||||
@ -83,7 +91,7 @@ mca_btl_smcuda_t mca_btl_smcuda = {
|
||||
mca_btl_smcuda_alloc,
|
||||
mca_btl_smcuda_free,
|
||||
mca_btl_smcuda_prepare_src,
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#if OMPI_CUDA_SUPPORT || OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA
|
||||
mca_btl_smcuda_prepare_dst,
|
||||
#else
|
||||
NULL,
|
||||
@ -92,7 +100,7 @@ mca_btl_smcuda_t mca_btl_smcuda = {
|
||||
mca_btl_smcuda_sendi,
|
||||
NULL, /* put */
|
||||
NULL, /* get -- optionally filled during initialization */
|
||||
mca_btl_base_dump,
|
||||
mca_btl_smcuda_dump,
|
||||
NULL, /* mpool */
|
||||
mca_btl_smcuda_register_error_cb, /* register error */
|
||||
mca_btl_smcuda_ft_event
|
||||
@ -110,7 +118,6 @@ mca_btl_smcuda_t mca_btl_smcuda = {
|
||||
*/
|
||||
#define OFFSET2ADDR(OFFSET, BASE) ((ptrdiff_t)(OFFSET) + (char*)(BASE))
|
||||
|
||||
|
||||
static void *mpool_calloc(size_t nmemb, size_t size)
|
||||
{
|
||||
void *buf;
|
||||
@ -126,16 +133,104 @@ static void *mpool_calloc(size_t nmemb, size_t size)
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
||||
static int smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int n)
|
||||
static int
|
||||
setup_mpool_base_resources(mca_btl_smcuda_component_t *comp_ptr,
|
||||
mca_mpool_base_resources_t *out_res)
|
||||
{
|
||||
size_t size, length, length_payload;
|
||||
char *sm_ctl_file;
|
||||
int rc = OMPI_SUCCESS;
|
||||
int fd = -1;
|
||||
ssize_t bread = 0;
|
||||
|
||||
if (-1 == (fd = open(comp_ptr->sm_mpool_rndv_file_name, O_RDONLY))) {
|
||||
int err = errno;
|
||||
orte_show_help("help-mpi-btl-smcuda.txt", "sys call fail", true,
|
||||
"open(2)", strerror(err), err);
|
||||
rc = OMPI_ERR_IN_ERRNO;
|
||||
goto out;
|
||||
}
|
||||
if ((ssize_t)sizeof(opal_shmem_ds_t) != (bread =
|
||||
read(fd, &out_res->bs_meta_buf, sizeof(opal_shmem_ds_t)))) {
|
||||
opal_output(0, "setup_mpool_base_resources: "
|
||||
"Read inconsistency -- read: %lu, but expected: %lu!\n",
|
||||
(unsigned long)bread,
|
||||
(unsigned long)sizeof(opal_shmem_ds_t));
|
||||
rc = OMPI_ERROR;
|
||||
goto out;
|
||||
}
|
||||
if ((ssize_t)sizeof(out_res->size) != (bread =
|
||||
read(fd, &out_res->size, sizeof(size_t)))) {
|
||||
opal_output(0, "setup_mpool_base_resources: "
|
||||
"Read inconsistency -- read: %lu, but expected: %lu!\n",
|
||||
(unsigned long)bread,
|
||||
(unsigned long)sizeof(opal_shmem_ds_t));
|
||||
rc = OMPI_ERROR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
if (-1 != fd) {
|
||||
(void)close(fd);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int
|
||||
sm_segment_attach(mca_btl_smcuda_component_t *comp_ptr)
|
||||
{
|
||||
int rc = OMPI_SUCCESS;
|
||||
int fd = -1;
|
||||
ssize_t bread = 0;
|
||||
opal_shmem_ds_t *tmp_shmem_ds = calloc(1, sizeof(*tmp_shmem_ds));
|
||||
|
||||
if (NULL == tmp_shmem_ds) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
if (-1 == (fd = open(comp_ptr->sm_rndv_file_name, O_RDONLY))) {
|
||||
int err = errno;
|
||||
orte_show_help("help-mpi-btl-sm.txt", "sys call fail", true,
|
||||
"open(2)", strerror(err), err);
|
||||
rc = OMPI_ERR_IN_ERRNO;
|
||||
goto out;
|
||||
}
|
||||
if ((ssize_t)sizeof(opal_shmem_ds_t) != (bread =
|
||||
read(fd, tmp_shmem_ds, sizeof(opal_shmem_ds_t)))) {
|
||||
opal_output(0, "sm_segment_attach: "
|
||||
"Read inconsistency -- read: %lu, but expected: %lu!\n",
|
||||
(unsigned long)bread,
|
||||
(unsigned long)sizeof(opal_shmem_ds_t));
|
||||
rc = OMPI_ERROR;
|
||||
goto out;
|
||||
}
|
||||
if (NULL == (comp_ptr->sm_seg =
|
||||
mca_common_sm_module_attach(tmp_shmem_ds,
|
||||
sizeof(mca_common_sm_seg_header_t),
|
||||
opal_cache_line_size))) {
|
||||
/* don't have to detach here, because module_attach cleans up after
|
||||
* itself on failure. */
|
||||
opal_output(0, "sm_segment_attach: "
|
||||
"mca_common_sm_module_attach failure!\n");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
out:
|
||||
if (-1 != fd) {
|
||||
(void)close(fd);
|
||||
}
|
||||
if (tmp_shmem_ds) {
|
||||
free(tmp_shmem_ds);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int
|
||||
smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl,
|
||||
int32_t my_smp_rank,
|
||||
int n)
|
||||
{
|
||||
size_t length, length_payload;
|
||||
sm_fifo_t *my_fifos;
|
||||
int my_mem_node, num_mem_nodes, i;
|
||||
ompi_proc_t **procs;
|
||||
size_t num_procs;
|
||||
mca_mpool_base_resources_t res;
|
||||
int my_mem_node, num_mem_nodes, i, rc;
|
||||
mca_mpool_base_resources_t *res = NULL;
|
||||
mca_btl_smcuda_component_t* m = &mca_btl_smcuda_component;
|
||||
|
||||
/* Assume we don't have hwloc support and fill in dummy info */
|
||||
@ -189,67 +284,34 @@ static int smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int n)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* lookup shared memory pool */
|
||||
mca_btl_smcuda_component.sm_mpools = (mca_mpool_base_module_t **) calloc(num_mem_nodes,
|
||||
sizeof(mca_mpool_base_module_t*));
|
||||
|
||||
/* Create one mpool. Per discussion with George and a UTK Euro
|
||||
MPI 2010 paper, it may be beneficial to create multiple mpools.
|
||||
Leaving that for a future optimization, however. */
|
||||
/* Disable memory binding, because each MPI process will claim
|
||||
pages in the mpool for their local NUMA node */
|
||||
res.mem_node = -1;
|
||||
|
||||
/* determine how much memory to create */
|
||||
/*
|
||||
* This heuristic formula mostly says that we request memory for:
|
||||
* - nfifos FIFOs, each comprising:
|
||||
* . a sm_fifo_t structure
|
||||
* . many pointers (fifo_size of them per FIFO)
|
||||
* - eager fragments (2*n of them, allocated in sm_free_list_inc chunks)
|
||||
* - max fragments (sm_free_list_num of them)
|
||||
*
|
||||
* On top of all that, we sprinkle in some number of
|
||||
* "opal_cache_line_size" additions to account for some
|
||||
* padding and edge effects that may lie in the allocator.
|
||||
*/
|
||||
res.size =
|
||||
FIFO_MAP_NUM(n) * ( sizeof(sm_fifo_t) + sizeof(void *) * m->fifo_size + 4 * opal_cache_line_size )
|
||||
+ ( 2 * n + m->sm_free_list_inc ) * ( m->eager_limit + 2 * opal_cache_line_size )
|
||||
+ m->sm_free_list_num * ( m->max_frag_size + 2 * opal_cache_line_size );
|
||||
|
||||
/* before we multiply by n, make sure the result won't overflow */
|
||||
/* Stick that little pad in, particularly since we'll eventually
|
||||
* need a little extra space. E.g., in mca_mpool_sm_init() in
|
||||
* mpool_sm_component.c when sizeof(mca_common_sm_module_t) is
|
||||
* added.
|
||||
*/
|
||||
if ( ((double) res.size) * n > LONG_MAX - 4096 ) {
|
||||
if (NULL == (res = calloc(1, sizeof(*res)))) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
res.size *= n;
|
||||
|
||||
/* now, create it */
|
||||
|
||||
/* lookup shared memory pool */
|
||||
mca_btl_smcuda_component.sm_mpools =
|
||||
(mca_mpool_base_module_t **)calloc(num_mem_nodes,
|
||||
sizeof(mca_mpool_base_module_t *));
|
||||
|
||||
/* Disable memory binding, because each MPI process will claim pages in the
|
||||
* mpool for their local NUMA node */
|
||||
res->mem_node = -1;
|
||||
|
||||
if (OMPI_SUCCESS != (rc = setup_mpool_base_resources(m, res))) {
|
||||
free(res);
|
||||
return rc;
|
||||
}
|
||||
/* now that res is fully populated, create the thing */
|
||||
mca_btl_smcuda_component.sm_mpools[0] =
|
||||
mca_mpool_base_module_create(mca_btl_smcuda_component.sm_mpool_name,
|
||||
smcuda_btl, &res);
|
||||
smcuda_btl, res);
|
||||
/* Sanity check to ensure that we found it */
|
||||
if (NULL == mca_btl_smcuda_component.sm_mpools[0]) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
free(res);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
mca_btl_smcuda_component.sm_mpool = mca_btl_smcuda_component.sm_mpools[0];
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
/* Create a local memory pool that sends handles to the remote
|
||||
* side. Note that the res argument is not really used, but
|
||||
* needed to satisfy function signature. */
|
||||
smcuda_btl->super.btl_mpool = mca_mpool_base_module_create("gpusm",
|
||||
smcuda_btl,
|
||||
&res);
|
||||
if (NULL == smcuda_btl->super.btl_mpool) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
|
||||
mca_btl_smcuda_component.sm_mpool_base =
|
||||
mca_btl_smcuda_component.sm_mpools[0]->mpool_base(mca_btl_smcuda_component.sm_mpools[0]);
|
||||
@ -258,37 +320,30 @@ static int smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, int n)
|
||||
mca_btl_smcuda_component.sm_peers = (struct mca_btl_base_endpoint_t**)
|
||||
calloc(n, sizeof(struct mca_btl_base_endpoint_t*));
|
||||
if (NULL == mca_btl_smcuda_component.sm_peers) {
|
||||
free(res);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Allocate Shared Memory BTL process coordination
|
||||
* data structure. This will reside in shared memory */
|
||||
|
||||
/* set file name */
|
||||
if (asprintf(&sm_ctl_file, "%s"OPAL_PATH_SEP"shared_mem_btl_module.%s",
|
||||
orte_process_info.job_session_dir,
|
||||
orte_process_info.nodename) < 0) {
|
||||
/* remember that node rank zero is already attached */
|
||||
if (0 != my_smp_rank) {
|
||||
if (OMPI_SUCCESS != (rc = sm_segment_attach(m))) {
|
||||
free(res);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
/* Create a local memory pool that sends handles to the remote
|
||||
* side. Note that the res argument is not really used, but
|
||||
* needed to satisfy function signature. */
|
||||
smcuda_btl->super.btl_mpool = mca_mpool_base_module_create("gpusm",
|
||||
smcuda_btl,
|
||||
res);
|
||||
if (NULL == smcuda_btl->super.btl_mpool) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
|
||||
/* Pass in a data segment alignment of 0 to get no data
|
||||
segment (only the shared control structure) */
|
||||
size = sizeof(mca_common_sm_seg_header_t) +
|
||||
n * (sizeof(sm_fifo_t*) + sizeof(char *) + sizeof(uint16_t)) + opal_cache_line_size;
|
||||
procs = ompi_proc_world(&num_procs);
|
||||
if (!(mca_btl_smcuda_component.sm_seg =
|
||||
mca_common_sm_init(procs, num_procs, size, sm_ctl_file,
|
||||
sizeof(mca_common_sm_seg_header_t),
|
||||
opal_cache_line_size))) {
|
||||
opal_output(0, "mca_btl_smcuda_add_procs: unable to create shared memory "
|
||||
"BTL coordinating strucure :: size %lu \n",
|
||||
(unsigned long)size);
|
||||
free(procs);
|
||||
free(sm_ctl_file);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
free(procs);
|
||||
free(sm_ctl_file);
|
||||
/* it is now safe to free the mpool resources */
|
||||
free(res);
|
||||
|
||||
/* check to make sure number of local procs is within the
|
||||
* specified limits */
|
||||
@ -387,6 +442,7 @@ static struct mca_btl_base_endpoint_t *
|
||||
create_sm_endpoint(int local_proc, struct ompi_proc_t *proc)
|
||||
{
|
||||
struct mca_btl_base_endpoint_t *ep;
|
||||
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
char path[PATH_MAX];
|
||||
#endif
|
||||
@ -426,22 +482,6 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc)
|
||||
return ep;
|
||||
}
|
||||
|
||||
static void calc_sm_max_procs(int n)
|
||||
{
|
||||
/* see if need to allocate space for extra procs */
|
||||
if(0 > mca_btl_smcuda_component.sm_max_procs) {
|
||||
/* no limit */
|
||||
if(0 <= mca_btl_smcuda_component.sm_extra_procs) {
|
||||
/* limit */
|
||||
mca_btl_smcuda_component.sm_max_procs =
|
||||
n + mca_btl_smcuda_component.sm_extra_procs;
|
||||
} else {
|
||||
/* no limit */
|
||||
mca_btl_smcuda_component.sm_max_procs = 2 * n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int mca_btl_smcuda_add_procs(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
size_t nprocs,
|
||||
@ -455,6 +495,9 @@ int mca_btl_smcuda_add_procs(
|
||||
mca_btl_smcuda_t *smcuda_btl;
|
||||
bool have_connected_peer = false;
|
||||
char **bases;
|
||||
/* for easy access to the mpool_sm_module */
|
||||
mca_mpool_sm_module_t *sm_mpool_modp = NULL;
|
||||
|
||||
/* initializion */
|
||||
|
||||
smcuda_btl = (mca_btl_smcuda_t *)btl;
|
||||
@ -467,7 +510,7 @@ int mca_btl_smcuda_add_procs(
|
||||
* and idetify procs that are on this host. Add procs on this
|
||||
* host to shared memory reachbility list. Also, get number
|
||||
* of local procs in the procs list. */
|
||||
for(proc = 0; proc < (int32_t)nprocs; proc++) {
|
||||
for (proc = 0; proc < (int32_t)nprocs; proc++) {
|
||||
/* check to see if this proc can be reached via shmem (i.e.,
|
||||
if they're on my local host and in my job) */
|
||||
if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid ||
|
||||
@ -502,18 +545,18 @@ int mca_btl_smcuda_add_procs(
|
||||
goto CLEANUP;
|
||||
|
||||
/* make sure that my_smp_rank has been defined */
|
||||
if(-1 == my_smp_rank) {
|
||||
if (-1 == my_smp_rank) {
|
||||
return_code = OMPI_ERROR;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
calc_sm_max_procs(n_local_procs);
|
||||
|
||||
if (!smcuda_btl->btl_inited) {
|
||||
return_code =
|
||||
smcuda_btl_first_time_init(smcuda_btl, mca_btl_smcuda_component.sm_max_procs);
|
||||
if(return_code != OMPI_SUCCESS)
|
||||
smcuda_btl_first_time_init(smcuda_btl, my_smp_rank,
|
||||
mca_btl_smcuda_component.sm_max_procs);
|
||||
if (return_code != OMPI_SUCCESS) {
|
||||
goto CLEANUP;
|
||||
}
|
||||
}
|
||||
|
||||
/* set local proc's smp rank in the peers structure for
|
||||
@ -526,6 +569,7 @@ int mca_btl_smcuda_add_procs(
|
||||
}
|
||||
|
||||
bases = mca_btl_smcuda_component.shm_bases;
|
||||
sm_mpool_modp = (mca_mpool_sm_module_t *)mca_btl_smcuda_component.sm_mpool;
|
||||
|
||||
/* initialize own FIFOs */
|
||||
/*
|
||||
@ -549,13 +593,48 @@ int mca_btl_smcuda_add_procs(
|
||||
/* Sync with other local procs. Force the FIFO initialization to always
|
||||
* happens before the readers access it.
|
||||
*/
|
||||
opal_atomic_add_32( &mca_btl_smcuda_component.sm_seg->module_seg->seg_inited, 1);
|
||||
opal_atomic_add_32(&mca_btl_smcuda_component.sm_seg->module_seg->seg_inited, 1);
|
||||
while( n_local_procs >
|
||||
mca_btl_smcuda_component.sm_seg->module_seg->seg_inited) {
|
||||
opal_progress();
|
||||
opal_atomic_rmb();
|
||||
}
|
||||
|
||||
/* it is now safe to unlink the shared memory segment. only one process
|
||||
* needs to do this, so just let smp rank zero take care of it. */
|
||||
if (0 == my_smp_rank) {
|
||||
if (OMPI_SUCCESS !=
|
||||
mca_common_sm_module_unlink(mca_btl_smcuda_component.sm_seg)) {
|
||||
/* it is "okay" if this fails at this point. we have gone this far,
|
||||
* so just warn about the failure and continue. this is probably
|
||||
* only triggered by a programming error. */
|
||||
opal_output(0, "WARNING: common_sm_module_unlink failed.\n");
|
||||
}
|
||||
/* SKG - another abstraction violation here, but I don't want to add
|
||||
* extra code in the sm mpool for further synchronization. */
|
||||
|
||||
/* at this point, all processes have attached to the mpool segment. so
|
||||
* it is safe to unlink it here. */
|
||||
if (OMPI_SUCCESS !=
|
||||
mca_common_sm_module_unlink(sm_mpool_modp->sm_common_module)) {
|
||||
opal_output(0, "WARNING: common_sm_module_unlink failed.\n");
|
||||
}
|
||||
if (-1 == unlink(mca_btl_smcuda_component.sm_mpool_rndv_file_name)) {
|
||||
opal_output(0, "WARNING: %s unlink failed.\n",
|
||||
mca_btl_smcuda_component.sm_mpool_rndv_file_name);
|
||||
}
|
||||
if (-1 == unlink(mca_btl_smcuda_component.sm_rndv_file_name)) {
|
||||
opal_output(0, "WARNING: %s unlink failed.\n",
|
||||
mca_btl_smcuda_component.sm_rndv_file_name);
|
||||
}
|
||||
}
|
||||
|
||||
/* free up some space used by the name buffers */
|
||||
free(mca_btl_smcuda_component.sm_mpool_ctl_file_name);
|
||||
free(mca_btl_smcuda_component.sm_mpool_rndv_file_name);
|
||||
free(mca_btl_smcuda_component.sm_ctl_file_name);
|
||||
free(mca_btl_smcuda_component.sm_rndv_file_name);
|
||||
|
||||
/* coordinate with other processes */
|
||||
for(j = mca_btl_smcuda_component.num_smp_procs;
|
||||
j < mca_btl_smcuda_component.num_smp_procs + n_local_procs; j++) {
|
||||
@ -699,6 +778,7 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
|
||||
uint32_t iov_count = 1;
|
||||
size_t max_data = *size;
|
||||
int rc;
|
||||
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
if (0 != reserve) {
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
@ -767,7 +847,7 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
|
||||
}
|
||||
|
||||
#if 0
|
||||
#define MCA_BTL_SMCUDA_TOUCH_DATA_TILL_CACHELINE_BOUNDARY(sm_frag) \
|
||||
#define MCA_BTL_SMCUDA_TOUCH_DATA_TILL_CACHELINE_BOUNDARY(sm_frag) \
|
||||
do { \
|
||||
char* _memory = (char*)(sm_frag)->segment.base.seg_addr.pval + \
|
||||
(sm_frag)->segment.base.seg_len; \
|
||||
@ -1054,6 +1134,32 @@ int mca_btl_smcuda_get_cuda(struct mca_btl_base_module_t* btl,
|
||||
}
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
void mca_btl_smcuda_dump(struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
int verbose)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
mca_btl_smcuda_frag_t* frag;
|
||||
|
||||
mca_btl_base_err("BTL SM %p endpoint %p [smp_rank %d] [peer_rank %d]\n",
|
||||
(void*) btl, (void*) endpoint,
|
||||
endpoint->my_smp_rank, endpoint->peer_smp_rank);
|
||||
if( NULL != endpoint ) {
|
||||
for(item = opal_list_get_first(&endpoint->pending_sends);
|
||||
item != opal_list_get_end(&endpoint->pending_sends);
|
||||
item = opal_list_get_next(item)) {
|
||||
frag = (mca_btl_smcuda_frag_t*)item;
|
||||
mca_btl_base_err(" | frag %p size %lu (hdr frag %p len %lu rank %d tag %d)\n",
|
||||
(void*) frag, frag->size, (void*) frag->hdr->frag,
|
||||
frag->hdr->len, frag->hdr->my_smp_rank,
|
||||
frag->hdr->tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 0
|
||||
int mca_btl_smcuda_ft_event(int state) {
|
||||
return OMPI_SUCCESS;
|
||||
|
@ -39,6 +39,7 @@
|
||||
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "opal/class/opal_free_list.h"
|
||||
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/common/sm/common_sm.h"
|
||||
|
||||
@ -191,6 +192,16 @@ struct mca_btl_smcuda_component_t {
|
||||
/** If we want DMA and DMA is supported, this will be loaded with
|
||||
KNEM_FLAG_DMA. Otherwise, it'll be 0. */
|
||||
int knem_dma_flag;
|
||||
|
||||
/** MCA: should we be using CMA or not?
|
||||
0 = no, 1 = yes */
|
||||
int use_cma;
|
||||
|
||||
/* /// well-known file names for sm and sm mpool init /// */
|
||||
char *sm_mpool_ctl_file_name;
|
||||
char *sm_mpool_rndv_file_name;
|
||||
char *sm_ctl_file_name;
|
||||
char *sm_rndv_file_name;
|
||||
};
|
||||
typedef struct mca_btl_smcuda_component_t mca_btl_smcuda_component_t;
|
||||
OMPI_MODULE_DECLSPEC extern mca_btl_smcuda_component_t mca_btl_smcuda_component;
|
||||
@ -207,10 +218,6 @@ struct mca_btl_smcuda_t {
|
||||
typedef struct mca_btl_smcuda_t mca_btl_smcuda_t;
|
||||
OMPI_MODULE_DECLSPEC extern mca_btl_smcuda_t mca_btl_smcuda;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
struct btl_smcuda_pending_send_item_t
|
||||
{
|
||||
opal_free_list_item_t super;
|
||||
@ -484,6 +491,11 @@ extern struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_dst(
|
||||
uint32_t flags);
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
|
||||
|
||||
extern void mca_btl_smcuda_dump(struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
int verbose);
|
||||
|
||||
/**
|
||||
* Fault Tolerance Event Notification Function
|
||||
* @param state Checkpoint Stae
|
||||
|
@ -41,22 +41,25 @@
|
||||
#include <sys/stat.h> /* for mkfifo */
|
||||
#endif /* HAVE_SYS_STAT_H */
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "opal/mca/shmem/base/base.h"
|
||||
#include "opal/mca/shmem/shmem.h"
|
||||
#include "opal/util/bit_ops.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/common/sm/common_sm.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#if OMPI_CUDA_SUPPORT
|
||||
#include "ompi/runtime/params.h"
|
||||
#include "ompi/mca/common/cuda/common_cuda.h"
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
#include "ompi/mca/common/sm/common_sm.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
#include "opal/runtime/opal_cr.h"
|
||||
@ -75,6 +78,10 @@ static mca_btl_base_module_t** mca_btl_smcuda_component_init(
|
||||
bool enable_mpi_threads
|
||||
);
|
||||
|
||||
typedef enum {
|
||||
MCA_BTL_SM_RNDV_MOD_SM = 0,
|
||||
MCA_BTL_SM_RNDV_MOD_MPOOL
|
||||
} mca_btl_sm_rndv_module_type_t;
|
||||
|
||||
/*
|
||||
* Shared Memory (SM) component instance.
|
||||
@ -290,49 +297,417 @@ CLEANUP:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the number of processes on the node.
|
||||
*/
|
||||
static inline int
|
||||
get_num_local_procs(void)
|
||||
{
|
||||
/* num_local_peers does not include us in
|
||||
* its calculation, so adjust for that */
|
||||
return (int)(1 + orte_process_info.num_local_peers);
|
||||
}
|
||||
|
||||
static void
|
||||
calc_sm_max_procs(int n)
|
||||
{
|
||||
/* see if need to allocate space for extra procs */
|
||||
if (0 > mca_btl_smcuda_component.sm_max_procs) {
|
||||
/* no limit */
|
||||
if (0 <= mca_btl_smcuda_component.sm_extra_procs) {
|
||||
/* limit */
|
||||
mca_btl_smcuda_component.sm_max_procs =
|
||||
n + mca_btl_smcuda_component.sm_extra_procs;
|
||||
} else {
|
||||
/* no limit */
|
||||
mca_btl_smcuda_component.sm_max_procs = 2 * n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
create_and_attach(mca_btl_smcuda_component_t *comp_ptr,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment,
|
||||
mca_common_sm_module_t **out_modp)
|
||||
|
||||
{
|
||||
if (NULL == (*out_modp =
|
||||
mca_common_sm_module_create_and_attach(size, file_name,
|
||||
size_ctl_structure,
|
||||
data_seg_alignment))) {
|
||||
opal_output(0, "create_and_attach: unable to create shared memory "
|
||||
"BTL coordinating strucure :: size %lu \n",
|
||||
(unsigned long)size);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* SKG - I'm not happy with this, but I can't figure out a better way of
|
||||
* finding the sm mpool's minimum size 8-|. The way I see it. This BTL only
|
||||
* uses the sm mpool, so maybe this isn't so bad...
|
||||
*
|
||||
* The problem is the we need to size the mpool resources at sm BTL component
|
||||
* init. That means we need to know the mpool's minimum size at create.
|
||||
*/
|
||||
static int
|
||||
get_min_mpool_size(mca_btl_smcuda_component_t *comp_ptr,
|
||||
size_t *out_size)
|
||||
{
|
||||
char *type_name = "mpool";
|
||||
char *param_name = "min_size";
|
||||
char *min_size = NULL;
|
||||
int id = 0;
|
||||
size_t default_min = 67108864;
|
||||
size_t size = 0;
|
||||
long tmp_size = 0;
|
||||
|
||||
if (0 > (id = mca_base_param_find(type_name, comp_ptr->sm_mpool_name,
|
||||
param_name))) {
|
||||
opal_output(0, "mca_base_param_find: failure looking for %s_%s_%s\n",
|
||||
type_name, comp_ptr->sm_mpool_name, param_name);
|
||||
return OMPI_ERR_NOT_FOUND;
|
||||
}
|
||||
if (OPAL_ERROR == mca_base_param_lookup_string(id, &min_size)) {
|
||||
opal_output(0, "mca_base_param_lookup_string failure\n");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
errno = 0;
|
||||
tmp_size = strtol(min_size, (char **)NULL, 10);
|
||||
if (ERANGE == errno || EINVAL == errno || tmp_size <= 0) {
|
||||
opal_output(0, "mca_btl_sm::get_min_mpool_size: "
|
||||
"Unusable %s_%s_min_size provided. "
|
||||
"Continuing with %lu.", type_name,
|
||||
comp_ptr->sm_mpool_name,
|
||||
(unsigned long)default_min);
|
||||
|
||||
size = default_min;
|
||||
}
|
||||
else {
|
||||
size = (size_t)tmp_size;
|
||||
}
|
||||
free(min_size);
|
||||
*out_size = size;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int
|
||||
get_mpool_res_size(int32_t max_procs,
|
||||
size_t *out_res_size)
|
||||
{
|
||||
size_t size = 0;
|
||||
|
||||
*out_res_size = 0;
|
||||
/* determine how much memory to create */
|
||||
/*
|
||||
* This heuristic formula mostly says that we request memory for:
|
||||
* - nfifos FIFOs, each comprising:
|
||||
* . a sm_fifo_t structure
|
||||
* . many pointers (fifo_size of them per FIFO)
|
||||
* - eager fragments (2*n of them, allocated in sm_free_list_inc chunks)
|
||||
* - max fragments (sm_free_list_num of them)
|
||||
*
|
||||
* On top of all that, we sprinkle in some number of
|
||||
* "opal_cache_line_size" additions to account for some
|
||||
* padding and edge effects that may lie in the allocator.
|
||||
*/
|
||||
size = FIFO_MAP_NUM(max_procs) *
|
||||
(sizeof(sm_fifo_t) + sizeof(void *) *
|
||||
mca_btl_smcuda_component.fifo_size + 4 * opal_cache_line_size) +
|
||||
(2 * max_procs + mca_btl_smcuda_component.sm_free_list_inc) *
|
||||
(mca_btl_smcuda_component.eager_limit + 2 * opal_cache_line_size) +
|
||||
mca_btl_smcuda_component.sm_free_list_num *
|
||||
(mca_btl_smcuda_component.max_frag_size + 2 * opal_cache_line_size);
|
||||
|
||||
/* add something for the control structure */
|
||||
size += sizeof(mca_common_sm_module_t);
|
||||
|
||||
/* before we multiply by max_procs, make sure the result won't overflow */
|
||||
/* Stick that little pad in, particularly since we'll eventually
|
||||
* need a little extra space. E.g., in mca_mpool_sm_init() in
|
||||
* mpool_sm_component.c when sizeof(mca_common_sm_module_t) is
|
||||
* added.
|
||||
*/
|
||||
if (((double)size) * max_procs > LONG_MAX - 4096) {
|
||||
return OMPI_ERR_VALUE_OUT_OF_BOUNDS;
|
||||
}
|
||||
size *= (size_t)max_procs;
|
||||
*out_res_size = size;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/* Generates all the unique paths for the shared-memory segments that this BTL
|
||||
* needs along with other file paths used to share "connection information". */
|
||||
static int
|
||||
set_uniq_paths_for_init_rndv(mca_btl_smcuda_component_t *comp_ptr)
|
||||
{
|
||||
int rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
|
||||
/* NOTE: don't forget to free these after init */
|
||||
comp_ptr->sm_mpool_ctl_file_name = NULL;
|
||||
comp_ptr->sm_mpool_rndv_file_name = NULL;
|
||||
comp_ptr->sm_ctl_file_name = NULL;
|
||||
comp_ptr->sm_rndv_file_name = NULL;
|
||||
|
||||
if (asprintf(&comp_ptr->sm_mpool_ctl_file_name,
|
||||
"%s"OPAL_PATH_SEP"shared_mem_cuda_pool.%s",
|
||||
orte_process_info.job_session_dir,
|
||||
orte_process_info.nodename) < 0) {
|
||||
/* rc set */
|
||||
goto out;
|
||||
}
|
||||
if (asprintf(&comp_ptr->sm_mpool_rndv_file_name,
|
||||
"%s"OPAL_PATH_SEP"shared_mem_cuda_pool_rndv.%s",
|
||||
orte_process_info.job_session_dir,
|
||||
orte_process_info.nodename) < 0) {
|
||||
/* rc set */
|
||||
goto out;
|
||||
}
|
||||
if (asprintf(&comp_ptr->sm_ctl_file_name,
|
||||
"%s"OPAL_PATH_SEP"shared_mem_cuda_btl_module.%s",
|
||||
orte_process_info.job_session_dir,
|
||||
orte_process_info.nodename) < 0) {
|
||||
/* rc set */
|
||||
goto out;
|
||||
}
|
||||
if (asprintf(&comp_ptr->sm_rndv_file_name,
|
||||
"%s"OPAL_PATH_SEP"shared_mem_cuda_btl_rndv.%s",
|
||||
orte_process_info.job_session_dir,
|
||||
orte_process_info.nodename) < 0) {
|
||||
/* rc set */
|
||||
goto out;
|
||||
}
|
||||
/* all is well */
|
||||
rc = OMPI_SUCCESS;
|
||||
|
||||
out:
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
if (comp_ptr->sm_mpool_ctl_file_name) {
|
||||
free(comp_ptr->sm_mpool_ctl_file_name);
|
||||
}
|
||||
if (comp_ptr->sm_mpool_rndv_file_name) {
|
||||
free(comp_ptr->sm_mpool_rndv_file_name);
|
||||
}
|
||||
if (comp_ptr->sm_ctl_file_name) {
|
||||
free(comp_ptr->sm_ctl_file_name);
|
||||
}
|
||||
if (comp_ptr->sm_rndv_file_name) {
|
||||
free(comp_ptr->sm_rndv_file_name);
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int
|
||||
create_rndv_file(mca_btl_smcuda_component_t *comp_ptr,
|
||||
mca_btl_sm_rndv_module_type_t type)
|
||||
{
|
||||
size_t size = 0;
|
||||
int rc = OMPI_SUCCESS;
|
||||
int fd = -1;
|
||||
char *fname = NULL;
|
||||
/* used as a temporary store so we can extract shmem_ds info */
|
||||
mca_common_sm_module_t *tmp_modp = NULL;
|
||||
|
||||
if (MCA_BTL_SM_RNDV_MOD_MPOOL == type) {
|
||||
size_t min_size = 0;
|
||||
/* get the segment size for the sm mpool. */
|
||||
if (OMPI_SUCCESS != (rc = get_mpool_res_size(comp_ptr->sm_max_procs,
|
||||
&size))) {
|
||||
/* rc is already set */
|
||||
goto out;
|
||||
}
|
||||
/* do we need to update the size based on the sm mpool's min size? */
|
||||
if (OMPI_SUCCESS != (rc = get_min_mpool_size(comp_ptr, &min_size))) {
|
||||
goto out;
|
||||
}
|
||||
/* update size if less than required minimum */
|
||||
if (size < min_size) {
|
||||
size = min_size;
|
||||
}
|
||||
/* we only need the shmem_ds info at this point. initilization will be
|
||||
* completed in the mpool module code. the idea is that we just need this
|
||||
* info so we can populate the rndv file (or modex when we have it). */
|
||||
if (OMPI_SUCCESS != (rc =
|
||||
create_and_attach(comp_ptr, size, comp_ptr->sm_mpool_ctl_file_name,
|
||||
sizeof(mca_common_sm_module_t), 8, &tmp_modp))) {
|
||||
/* rc is set */
|
||||
goto out;
|
||||
}
|
||||
fname = comp_ptr->sm_mpool_rndv_file_name;
|
||||
}
|
||||
else if (MCA_BTL_SM_RNDV_MOD_SM == type) {
|
||||
/* calculate the segment size. */
|
||||
size = sizeof(mca_common_sm_seg_header_t) +
|
||||
comp_ptr->sm_max_procs *
|
||||
(sizeof(sm_fifo_t *) +
|
||||
sizeof(char *) + sizeof(uint16_t)) +
|
||||
opal_cache_line_size;
|
||||
|
||||
if (OMPI_SUCCESS != (rc =
|
||||
create_and_attach(comp_ptr, size, comp_ptr->sm_ctl_file_name,
|
||||
sizeof(mca_common_sm_seg_header_t),
|
||||
opal_cache_line_size, &comp_ptr->sm_seg))) {
|
||||
/* rc is set */
|
||||
goto out;
|
||||
}
|
||||
fname = comp_ptr->sm_rndv_file_name;
|
||||
tmp_modp = comp_ptr->sm_seg;
|
||||
}
|
||||
else {
|
||||
return OMPI_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* at this point, we have all the info we need to populate the rendezvous
|
||||
* file containing all the meta info required for attach. */
|
||||
|
||||
/* now just write the contents of tmp_modp->shmem_ds to the full
|
||||
* sizeof(opal_shmem_ds_t), so we know where the mpool_res_size starts. */
|
||||
if (-1 == (fd = open(fname, O_CREAT | O_RDWR, 0600))) {
|
||||
int err = errno;
|
||||
orte_show_help("help-mpi-btl-sm.txt", "sys call fail", true,
|
||||
"open(2)", strerror(err), err);
|
||||
rc = OMPI_ERR_IN_ERRNO;
|
||||
goto out;
|
||||
}
|
||||
if ((ssize_t)sizeof(opal_shmem_ds_t) != write(fd, &(tmp_modp->shmem_ds),
|
||||
sizeof(opal_shmem_ds_t))) {
|
||||
int err = errno;
|
||||
orte_show_help("help-mpi-btl-sm.txt", "sys call fail", true,
|
||||
"write(2)", strerror(err), err);
|
||||
rc = OMPI_ERR_IN_ERRNO;
|
||||
goto out;
|
||||
}
|
||||
if (MCA_BTL_SM_RNDV_MOD_MPOOL == type) {
|
||||
if ((ssize_t)sizeof(size) != write(fd, &size, sizeof(size))) {
|
||||
int err = errno;
|
||||
orte_show_help("help-mpi-btl-sm.txt", "sys call fail", true,
|
||||
"write(2)", strerror(err), err);
|
||||
rc = OMPI_ERR_IN_ERRNO;
|
||||
goto out;
|
||||
}
|
||||
/* only do this for the mpool case */
|
||||
OBJ_RELEASE(tmp_modp);
|
||||
}
|
||||
|
||||
out:
|
||||
if (-1 != fd) {
|
||||
(void)close(fd);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Creates information required for the sm modex and modex sends it.
|
||||
*/
|
||||
static int
|
||||
backing_store_init(mca_btl_smcuda_component_t *comp_ptr,
|
||||
orte_node_rank_t node_rank)
|
||||
{
|
||||
int rc = OMPI_SUCCESS;
|
||||
|
||||
if (OMPI_SUCCESS != (rc = set_uniq_paths_for_init_rndv(comp_ptr))) {
|
||||
goto out;
|
||||
}
|
||||
if (0 == node_rank) {
|
||||
/* === sm mpool === */
|
||||
if (OMPI_SUCCESS != (rc =
|
||||
create_rndv_file(comp_ptr, MCA_BTL_SM_RNDV_MOD_MPOOL))) {
|
||||
goto out;
|
||||
}
|
||||
/* === sm === */
|
||||
if (OMPI_SUCCESS != (rc =
|
||||
create_rndv_file(comp_ptr, MCA_BTL_SM_RNDV_MOD_SM))) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* SM component initialization
|
||||
*/
|
||||
static mca_btl_base_module_t** mca_btl_smcuda_component_init(
|
||||
int *num_btls,
|
||||
bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
static mca_btl_base_module_t **
|
||||
mca_btl_smcuda_component_init(int *num_btls,
|
||||
bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
{
|
||||
int num_local_procs = 0;
|
||||
mca_btl_base_module_t **btls = NULL;
|
||||
orte_node_rank_t my_node_rank = ORTE_NODE_RANK_INVALID;
|
||||
|
||||
*num_btls = 0;
|
||||
|
||||
/* if no session directory was created, then we cannot be used */
|
||||
if (!orte_create_session_dirs) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* lookup/create shared memory pool only when used */
|
||||
mca_btl_smcuda_component.sm_mpool = NULL;
|
||||
mca_btl_smcuda_component.sm_mpool_base = NULL;
|
||||
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
/* create a named pipe to receive events */
|
||||
sprintf( mca_btl_smcuda_component.sm_fifo_path,
|
||||
"%s"OPAL_PATH_SEP"sm_fifo.%lu", orte_process_info.job_session_dir,
|
||||
(unsigned long)ORTE_PROC_MY_NAME->vpid );
|
||||
if(mkfifo(mca_btl_smcuda_component.sm_fifo_path, 0660) < 0) {
|
||||
opal_output(0, "mca_btl_smcuda_component_init: mkfifo failed with errno=%d\n",errno);
|
||||
/* if no session directory was created, then we cannot be used */
|
||||
/* SKG - this isn't true anymore. Some backing facilities don't require a
|
||||
* file-backed store. Extend shmem to provide this info one day. Especially
|
||||
* when we use a proper modex for init. */
|
||||
if (!orte_create_session_dirs) {
|
||||
return NULL;
|
||||
}
|
||||
mca_btl_smcuda_component.sm_fifo_fd = open(mca_btl_smcuda_component.sm_fifo_path, O_RDWR);
|
||||
/* if we don't have locality information, then we cannot be used because we
|
||||
* need to know who the respective node ranks for initialization. */
|
||||
if (ORTE_NODE_RANK_INVALID ==
|
||||
(my_node_rank = orte_process_info.my_node_rank)) {
|
||||
orte_show_help("help-mpi-btl-sm.txt", "no locality", true);
|
||||
return NULL;
|
||||
}
|
||||
/* no use trying to use sm with less than two procs, so just bail. */
|
||||
if ((num_local_procs = get_num_local_procs()) < 2) {
|
||||
return NULL;
|
||||
}
|
||||
/* calculate max procs so we can figure out how large to make the
|
||||
* shared-memory segment. this routine sets component sm_max_procs. */
|
||||
calc_sm_max_procs(num_local_procs);
|
||||
|
||||
/* This is where the modex will live some day. For now, just have local rank
|
||||
* 0 create a rendezvous file containing the backing store info, so the
|
||||
* other local procs can read from it during add_procs. The rest will just
|
||||
* stash the known paths for use later in init. */
|
||||
if (OMPI_SUCCESS != backing_store_init(&mca_btl_smcuda_component,
|
||||
my_node_rank)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
/* create a named pipe to receive events */
|
||||
sprintf(mca_btl_smcuda_component.sm_fifo_path,
|
||||
"%s"OPAL_PATH_SEP"sm_fifo.%lu",
|
||||
orte_process_info.job_session_dir,
|
||||
(unsigned long)ORTE_PROC_MY_NAME->vpid);
|
||||
if (mkfifo(mca_btl_smcuda_component.sm_fifo_path, 0660) < 0) {
|
||||
opal_output(0, "mca_btl_smcuda_component_init: "
|
||||
"mkfifo failed with errno=%d\n",errno);
|
||||
return NULL;
|
||||
}
|
||||
mca_btl_smcuda_component.sm_fifo_fd = open(mca_btl_smcuda_component.sm_fifo_path,
|
||||
O_RDWR);
|
||||
if(mca_btl_smcuda_component.sm_fifo_fd < 0) {
|
||||
opal_output(0, "mca_btl_smcuda_component_init: open(%s) failed with errno=%d\n",
|
||||
opal_output(0, "mca_btl_smcuda_component_init: "
|
||||
"open(%s) failed with errno=%d\n",
|
||||
mca_btl_smcuda_component.sm_fifo_path, errno);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_fifo_thread, opal_thread_t);
|
||||
mca_btl_smcuda_component.sm_fifo_thread.t_run = (opal_thread_fn_t) mca_btl_smcuda_component_event_thread;
|
||||
mca_btl_smcuda_component.sm_fifo_thread.t_run =
|
||||
(opal_thread_fn_t)mca_btl_smcuda_component_event_thread;
|
||||
opal_thread_start(&mca_btl_smcuda_component.sm_fifo_thread);
|
||||
#endif
|
||||
|
||||
mca_btl_smcuda_component.sm_btls = (mca_btl_smcuda_t **) malloc( mca_btl_smcuda_component.sm_max_btls * sizeof (mca_btl_smcuda_t *));
|
||||
mca_btl_smcuda_component.sm_btls =
|
||||
(mca_btl_smcuda_t **)malloc(mca_btl_smcuda_component.sm_max_btls *
|
||||
sizeof(mca_btl_smcuda_t *));
|
||||
if (NULL == mca_btl_smcuda_component.sm_btls) {
|
||||
return NULL;
|
||||
}
|
||||
@ -361,6 +736,7 @@ static mca_btl_base_module_t** mca_btl_smcuda_component_init(
|
||||
mca_btl_smcuda.super.btl_get = mca_btl_smcuda_get_cuda;
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
|
||||
|
||||
return btls;
|
||||
|
||||
}
|
||||
@ -482,8 +858,8 @@ int mca_btl_smcuda_component_progress(void)
|
||||
#endif
|
||||
/* recv upcall */
|
||||
reg = mca_btl_base_active_message_trigger + hdr->tag;
|
||||
seg.seg_addr.pval = ((char*)hdr) + sizeof(mca_btl_smcuda_hdr_t);
|
||||
seg.seg_len = hdr->len;
|
||||
seg.seg_addr.pval = ((char *)hdr) + sizeof(mca_btl_smcuda_hdr_t);
|
||||
seg.seg_len = hdr->len;
|
||||
Frag.base.des_dst_cnt = 1;
|
||||
Frag.base.des_dst = &seg;
|
||||
reg->cbfunc(&mca_btl_smcuda.super, hdr->tag, &(Frag.base),
|
||||
|
@ -43,7 +43,7 @@ struct mca_btl_base_endpoint_t {
|
||||
opal_list_t pending_sends; /**< pending data to send */
|
||||
|
||||
/** lock for concurrent access to endpoint state */
|
||||
opal_mutex_t endpoint_lock;
|
||||
opal_mutex_t endpoint_lock;
|
||||
|
||||
};
|
||||
|
||||
|
@ -54,7 +54,7 @@ struct mca_btl_smcuda_segment_t {
|
||||
ompi_ptr_t memh_seg_addr;
|
||||
/** Length in bytes of entire memory handle */
|
||||
uint32_t memh_seg_len;
|
||||
#endif
|
||||
#endif /* OMPI_CUDA_SUPPORT */
|
||||
};
|
||||
typedef struct mca_btl_smcuda_segment_t mca_btl_smcuda_segment_t;
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user