From 08dce4f161c69a8a48ca85e8e9062beb58ff464f Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Fri, 18 Nov 2016 19:26:15 -0700 Subject: [PATCH] pmix/cray: whitespace cleanup Get rid of tabs. This is anti-ompi style. Signed-off-by: Howard Pritchard --- opal/mca/pmix/cray/pmix_cray.c | 494 ++++++++++----------- opal/mca/pmix/cray/pmix_cray.h | 2 + opal/mca/pmix/cray/pmix_cray_component.c | 70 +-- opal/mca/pmix/cray/pmix_cray_pmap_parser.c | 106 ++--- 4 files changed, 338 insertions(+), 334 deletions(-) diff --git a/opal/mca/pmix/cray/pmix_cray.c b/opal/mca/pmix/cray/pmix_cray.c index f53b73692a..e17697f74c 100644 --- a/opal/mca/pmix/cray/pmix_cray.c +++ b/opal/mca/pmix/cray/pmix_cray.c @@ -42,42 +42,42 @@ static int cray_init(void); static int cray_fini(void); static int cray_initialized(void); static int cray_abort(int flat, const char *msg, - opal_list_t *procs); + opal_list_t *procs); static int cray_spawn(opal_list_t *jobinfo, opal_list_t *apps, opal_jobid_t *jobid); static int cray_spawn_nb(opal_list_t *jobinfo, opal_list_t *apps, - opal_pmix_spawn_cbfunc_t cbfunc, - void *cbdata); + opal_pmix_spawn_cbfunc_t cbfunc, + void *cbdata); static int cray_job_connect(opal_list_t *procs); static int cray_job_disconnect(opal_list_t *procs); static int cray_job_disconnect_nb(opal_list_t *procs, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); static int cray_resolve_peers(const char *nodename, - opal_jobid_t jobid, - opal_list_t *procs); + opal_jobid_t jobid, + opal_list_t *procs); static int cray_resolve_nodes(opal_jobid_t jobid, char **nodelist); static int cray_put(opal_pmix_scope_t scope, opal_value_t *kv); static int cray_fencenb(opal_list_t *procs, int collect_data, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); static int cray_commit(void); static int cray_get(const opal_process_name_t *id, - const char *key, opal_list_t *info, - opal_value_t **kv); + const char *key, opal_list_t *info, + opal_value_t **kv); static int cray_get_nb(const opal_process_name_t *id, const char *key, - opal_list_t *info, - opal_pmix_value_cbfunc_t cbfunc, void *cbdata); + opal_list_t *info, + opal_pmix_value_cbfunc_t cbfunc, void *cbdata); static int cray_publish(opal_list_t *info); static int cray_publish_nb(opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); static int cray_lookup(opal_list_t *data, opal_list_t *info); static int cray_lookup_nb(char **keys, opal_list_t *info, - opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); static int cray_unpublish(char **keys, opal_list_t *info); static int cray_unpublish_nb(char **keys, opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); static const char *cray_get_version(void); static int cray_store_local(const opal_process_name_t *proc, - opal_value_t *val); + opal_value_t *val); static const char *cray_get_nspace(opal_jobid_t jobid); static void cray_register_jobid(opal_jobid_t jobid, const char *nspace); @@ -128,8 +128,8 @@ typedef struct { void *cbdata; } pmi_opcaddy_t; static OBJ_CLASS_INSTANCE(pmi_opcaddy_t, - opal_object_t, - NULL, NULL); + opal_object_t, + NULL, NULL); // PMI constant values: static int pmix_kvslen_max = 0; @@ -153,9 +153,9 @@ static uint32_t pmix_jobid = -1; static char* pmix_error(int pmix_err); #define OPAL_PMI_ERROR(pmi_err, pmi_func) \ do { \ - opal_output(0, "%s [%s:%d:%s]: %s\n", \ - pmi_func, __FILE__, __LINE__, __func__, \ - pmix_error(pmi_err)); \ + opal_output(0, "%s [%s:%d:%s]: %s\n", \ + pmi_func, __FILE__, __LINE__, __func__, \ + pmix_error(pmi_err)); \ } while(0); static int cray_init(void) @@ -177,21 +177,21 @@ static int cray_init(void) /* if we can't startup PMI, we can't be used */ if ( PMI2_Initialized () ) { - opal_output_verbose(10, opal_pmix_base_framework.framework_output, - "%s pmix:cray: pmi already initialized", - OPAL_NAME_PRINT(pmix_pname)); - return OPAL_SUCCESS; + opal_output_verbose(10, opal_pmix_base_framework.framework_output, + "%s pmix:cray: pmi already initialized", + OPAL_NAME_PRINT(pmix_pname)); + return OPAL_SUCCESS; } size = -1; rank = -1; appnum = -1; if (PMI_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) { - opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc); - return OPAL_ERROR; + opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc); + return OPAL_ERROR; } if( size < 0 || rank < 0 ){ - opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true); - goto err_exit; + opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true); + goto err_exit; } pmix_size = size; @@ -209,32 +209,32 @@ static int cray_init(void) */ if (PMI_SUCCESS != PMI_Get_version_info(&major,&minor,&revision)) { - return OPAL_ERROR; + return OPAL_ERROR; } snprintf(cray_pmi_version, sizeof(cray_pmi_version), - "%d.%d.%d", major, minor, revision); + "%d.%d.%d", major, minor, revision); pmix_kvs_name = (char*)malloc(pmix_kvslen_max); if( pmix_kvs_name == NULL ){ - PMI2_Finalize(); - ret = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; + PMI2_Finalize(); + ret = OPAL_ERR_OUT_OF_RESOURCE; + goto err_exit; } rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max); if( PMI_SUCCESS != rc ) { - OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); - goto err_exit; + OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); + goto err_exit; } rc = sscanf(pmix_kvs_name,"kvs_%u",&jobfam); if (rc != 1) { - opal_output_verbose(10, opal_pmix_base_framework.framework_output, - "%s pmix:cray: pmix_kvs_name %s", - OPAL_NAME_PRINT(pmix_pname), pmix_kvs_name); - rc = OPAL_ERROR; - goto err_exit; + opal_output_verbose(10, opal_pmix_base_framework.framework_output, + "%s pmix:cray: pmix_kvs_name %s", + OPAL_NAME_PRINT(pmix_pname), pmix_kvs_name); + rc = OPAL_ERROR; + goto err_exit; } pmix_jobid = jobfam << 16; @@ -247,27 +247,27 @@ static int cray_init(void) pmix_pname.vpid = pmix_rank; opal_proc_set_name(&pmix_pname); opal_output_verbose(10, opal_pmix_base_framework.framework_output, - "%s pmix:cray: assigned tmp name %d %d pmix_kvs_name %s", - OPAL_NAME_PRINT(pmix_pname),pmix_pname.jobid,pmix_pname.vpid,pmix_kvs_name); + "%s pmix:cray: assigned tmp name %d %d pmix_kvs_name %s", + OPAL_NAME_PRINT(pmix_pname),pmix_pname.jobid,pmix_pname.vpid,pmix_kvs_name); pmapping = (char*)malloc(PMI2_MAX_VALLEN); if( pmapping == NULL ){ - rc = OPAL_ERR_OUT_OF_RESOURCE; - OPAL_ERROR_LOG(rc); - return rc; + rc = OPAL_ERR_OUT_OF_RESOURCE; + OPAL_ERROR_LOG(rc); + return rc; } rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found); if( !found || PMI_SUCCESS != rc ) { - OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr"); - return OPAL_ERROR; + OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr"); + return OPAL_ERROR; } pmix_lranks = pmix_cray_parse_pmap(pmapping, pmix_rank, &my_node, &pmix_nlranks); if (NULL == pmix_lranks) { - rc = OPAL_ERR_OUT_OF_RESOURCE; - OPAL_ERROR_LOG(rc); - return rc; + rc = OPAL_ERR_OUT_OF_RESOURCE; + OPAL_ERROR_LOG(rc); + return rc; } free(pmapping); @@ -285,9 +285,9 @@ static int cray_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = pmix_size; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&name, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -297,16 +297,16 @@ static int cray_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = pmix_appnum; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(ret); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found); if( PMI_SUCCESS != rc ) { - OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); - goto err_exit; + OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); + goto err_exit; } pmix_usize = atoi(buf); @@ -316,9 +316,9 @@ static int cray_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = pmix_usize; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -328,9 +328,9 @@ static int cray_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = pmix_usize; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&name, &kv))) { - OPAL_ERROR_LOG(ret); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -339,9 +339,9 @@ static int cray_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = pmix_jobid; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&name, &kv))) { - OPAL_ERROR_LOG(ret); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -351,9 +351,9 @@ static int cray_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = pmix_nlranks; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&name, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -363,12 +363,12 @@ static int cray_init(void) /* find ourselves and build up a string for local peer info */ memset(nmtmp, 0, 64); for (i=0; i < pmix_nlranks; i++) { - ret = snprintf(nmtmp, 64, "%d", pmix_lranks[i]); - opal_argv_append_nosize(&localranks, nmtmp); - if (pmix_rank == pmix_lranks[i]) { - pmix_lrank = i; - pmix_nrank = i; - } + ret = snprintf(nmtmp, 64, "%d", pmix_lranks[i]); + opal_argv_append_nosize(&localranks, nmtmp); + if (pmix_rank == pmix_lranks[i]) { + pmix_lrank = i; + pmix_nrank = i; + } } str = opal_argv_join(localranks, ','); @@ -379,9 +379,9 @@ static int cray_init(void) kv.type = OPAL_STRING; kv.data.string = str; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(ret); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -391,9 +391,9 @@ static int cray_init(void) kv.type = OPAL_UINT64; kv.data.uint64 = *(uint64_t*)&ldr; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(ret); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; } /* save our local rank */ @@ -402,9 +402,9 @@ static int cray_init(void) kv.type = OPAL_UINT16; kv.data.uint16 = pmix_lrank; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(ret); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; } /* and our node rank */ @@ -413,9 +413,9 @@ static int cray_init(void) kv.type = OPAL_UINT16; kv.data.uint16 = pmix_nrank; if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(ret); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -428,21 +428,21 @@ err_exit: static int cray_fini(void) { if (0 == pmix_init_count) { - return OPAL_SUCCESS; + return OPAL_SUCCESS; } if (0 == --pmix_init_count) { - PMI2_Finalize(); + PMI2_Finalize(); } if (NULL != pmix_kvs_name) { - free(pmix_kvs_name); - pmix_kvs_name = NULL; + free(pmix_kvs_name); + pmix_kvs_name = NULL; } if (NULL != pmix_lranks) { - free(pmix_lranks); - pmix_lranks = NULL; + free(pmix_lranks); + pmix_lranks = NULL; } return OPAL_SUCCESS; @@ -451,13 +451,13 @@ static int cray_fini(void) { static int cray_initialized(void) { if (0 < pmix_init_count) { - return 1; + return 1; } return 0; } static int cray_abort(int flag, const char *msg, - opal_list_t *procs) + opal_list_t *procs) { PMI2_Abort(flag, msg); return OPAL_SUCCESS; @@ -469,8 +469,8 @@ static int cray_spawn(opal_list_t *jobinfo, opal_list_t *apps, opal_jobid_t *job } static int cray_spawn_nb(opal_list_t *jobinfo, opal_list_t *apps, - opal_pmix_spawn_cbfunc_t cbfunc, - void *cbdata) + opal_pmix_spawn_cbfunc_t cbfunc, + void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } @@ -486,15 +486,15 @@ static int cray_job_disconnect(opal_list_t *procs) } static int cray_job_disconnect_nb(opal_list_t *procs, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } static int cray_resolve_peers(const char *nodename, - opal_jobid_t jobid, - opal_list_t *procs) + opal_jobid_t jobid, + opal_list_t *procs) { return OPAL_ERR_NOT_IMPLEMENTED; } @@ -505,16 +505,16 @@ static int cray_resolve_nodes(opal_jobid_t jobid, char **nodelist) } static int cray_put(opal_pmix_scope_t scope, - opal_value_t *kv) + opal_value_t *kv) { int rc; opal_output_verbose(10, opal_pmix_base_framework.framework_output, - "%s pmix:cray cray_put key %s scope %d\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope); + "%s pmix:cray cray_put key %s scope %d\n", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope); if (!pmix_init_count) { - return OPAL_ERROR; + return OPAL_ERROR; } /* @@ -522,15 +522,15 @@ static int cray_put(opal_pmix_scope_t scope, */ if (NULL == mca_pmix_cray_component.cache_global) { - mca_pmix_cray_component.cache_global = OBJ_NEW(opal_buffer_t); + mca_pmix_cray_component.cache_global = OBJ_NEW(opal_buffer_t); } opal_output_verbose(20, opal_pmix_base_framework.framework_output, - "%s pmix:cray put global data for key %s type %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, kv->type); + "%s pmix:cray put global data for key %s type %d", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, kv->type); if (OPAL_SUCCESS != (rc = opal_dss.pack(mca_pmix_cray_component.cache_global, &kv, 1, OPAL_VALUE))) { - OPAL_PMI_ERROR(rc,"pmix:cray opal_dss.pack returned error"); - OPAL_ERROR_LOG(rc); + OPAL_PMI_ERROR(rc,"pmix:cray opal_dss.pack returned error"); + OPAL_ERROR_LOG(rc); } return rc; @@ -554,9 +554,9 @@ static void fencenb(int sd, short args, void *cbdata) char *cptr, *rcv_buff = NULL; opal_process_name_t id; typedef struct { - uint32_t pmix_rank; - opal_process_name_t name; - int32_t nbytes; + uint32_t pmix_rank; + opal_process_name_t name; + int32_t nbytes; } bytes_and_rank_t; int32_t rcv_nbytes_tot; bytes_and_rank_t s_bytes_and_rank; @@ -566,10 +566,10 @@ static void fencenb(int sd, short args, void *cbdata) char *cpuset = NULL; opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:cray executing fence cache_global %p cache_local %p", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (void *)mca_pmix_cray_component.cache_global, - (void *)mca_pmix_cray_component.cache_local); + "%s pmix:cray executing fence cache_global %p cache_local %p", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + (void *)mca_pmix_cray_component.cache_global, + (void *)mca_pmix_cray_component.cache_local); /* @@ -580,8 +580,8 @@ static void fencenb(int sd, short args, void *cbdata) send_buffer = OBJ_NEW(opal_buffer_t); if (NULL == send_buffer) { - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto fn_exit; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto fn_exit; } opal_dss.copy_payload(send_buffer, mca_pmix_cray_component.cache_global); @@ -591,8 +591,8 @@ static void fencenb(int sd, short args, void *cbdata) r_bytes_and_ranks = (bytes_and_rank_t *)malloc(pmix_size * sizeof(bytes_and_rank_t)); if (NULL == r_bytes_and_ranks) { - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto fn_exit; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto fn_exit; } /* @@ -603,39 +603,39 @@ static void fencenb(int sd, short args, void *cbdata) */ if (PMI_SUCCESS != (rc = PMI_Allgather(&s_bytes_and_rank,r_bytes_and_ranks,sizeof(bytes_and_rank_t)))) { - OPAL_PMI_ERROR(rc,"PMI_Allgather"); - rc = OPAL_ERR_COMM_FAILURE; - goto fn_exit; + OPAL_PMI_ERROR(rc,"PMI_Allgather"); + rc = OPAL_ERR_COMM_FAILURE; + goto fn_exit; } for (rcv_nbytes_tot=0,i=0; i < pmix_size; i++) { - rcv_nbytes_tot += r_bytes_and_ranks[i].nbytes; + rcv_nbytes_tot += r_bytes_and_ranks[i].nbytes; } opal_output_verbose(20, opal_pmix_base_framework.framework_output, - "%s pmix:cray total number of bytes to receive %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), rcv_nbytes_tot); + "%s pmix:cray total number of bytes to receive %d", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), rcv_nbytes_tot); rcv_buff = (char *) malloc(rcv_nbytes_tot * sizeof(char)); if (NULL == rcv_buff) { - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto fn_exit; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto fn_exit; } all_lens = (int *)malloc(sizeof(int) * pmix_size); if (NULL == all_lens) { - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto fn_exit; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto fn_exit; } for (i=0; i< pmix_size; i++) { - all_lens[r_bytes_and_ranks[i].pmix_rank] = r_bytes_and_ranks[i].nbytes; + all_lens[r_bytes_and_ranks[i].pmix_rank] = r_bytes_and_ranks[i].nbytes; } if (PMI_SUCCESS != (rc = PMI_Allgatherv(sbuf_ptr,s_bytes_and_rank.nbytes,rcv_buff,all_lens))) { - OPAL_PMI_ERROR(rc,"PMI_Allgatherv"); - rc = OPAL_ERR_COMM_FAILURE; - goto fn_exit; + OPAL_PMI_ERROR(rc,"PMI_Allgatherv"); + rc = OPAL_ERR_COMM_FAILURE; + goto fn_exit; } OBJ_RELEASE(send_buffer); @@ -643,36 +643,36 @@ static void fencenb(int sd, short args, void *cbdata) buf = OBJ_NEW(opal_buffer_t); if (buf == NULL) { - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto fn_exit; + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto fn_exit; } for (cptr = rcv_buff, i=0; i < pmix_size; i++) { - id = r_bytes_and_ranks[i].name; + id = r_bytes_and_ranks[i].name; - buf->base_ptr = NULL; /* TODO: ugh */ - if (OPAL_SUCCESS != (rc = opal_dss.load(buf, (void *)cptr, r_bytes_and_ranks[i].nbytes))) { - OPAL_PMI_ERROR(rc,"pmix:cray opal_dss.load failed"); - goto fn_exit; - } + buf->base_ptr = NULL; /* TODO: ugh */ + if (OPAL_SUCCESS != (rc = opal_dss.load(buf, (void *)cptr, r_bytes_and_ranks[i].nbytes))) { + OPAL_PMI_ERROR(rc,"pmix:cray opal_dss.load failed"); + goto fn_exit; + } - /* unpack and stuff in to the dstore */ + /* unpack and stuff in to the dstore */ - cnt = 1; - while (OPAL_SUCCESS == (rc = opal_dss.unpack(buf, &kp, &cnt, OPAL_VALUE))) { - opal_output_verbose(20, opal_pmix_base_framework.framework_output, - "%s pmix:cray unpacked kp with key %s type(%d) for id %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kp->key, kp->type, OPAL_NAME_PRINT(id)); - if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&id, kp))) { - OPAL_ERROR_LOG(rc); - goto fn_exit; - } - OBJ_RELEASE(kp); - cnt = 1; - } + cnt = 1; + while (OPAL_SUCCESS == (rc = opal_dss.unpack(buf, &kp, &cnt, OPAL_VALUE))) { + opal_output_verbose(20, opal_pmix_base_framework.framework_output, + "%s pmix:cray unpacked kp with key %s type(%d) for id %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kp->key, kp->type, OPAL_NAME_PRINT(id)); + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&id, kp))) { + OPAL_ERROR_LOG(rc); + goto fn_exit; + } + OBJ_RELEASE(kp); + cnt = 1; + } - cptr += r_bytes_and_ranks[i].nbytes; + cptr += r_bytes_and_ranks[i].nbytes; } @@ -680,17 +680,17 @@ static void fencenb(int sd, short args, void *cbdata) OBJ_RELEASE(buf); opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:cray kvs_fence complete", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + "%s pmix:cray kvs_fence complete", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); /* fetch my cpuset */ OBJ_CONSTRUCT(&vals, opal_list_t); if (OPAL_SUCCESS == (rc = opal_pmix_base_fetch(&pmix_pname, - OPAL_PMIX_CPUSET, &vals))) { - kp = (opal_value_t*)opal_list_get_first(&vals); - cpuset = strdup(kp->data.string); + OPAL_PMIX_CPUSET, &vals))) { + kp = (opal_value_t*)opal_list_get_first(&vals); + cpuset = strdup(kp->data.string); } else { - cpuset = NULL; + cpuset = NULL; } OPAL_LIST_DESTRUCT(&vals); @@ -703,75 +703,75 @@ static void fencenb(int sd, short args, void *cbdata) */ for (i=0; i < pmix_nlranks; i++) { - id.vpid = pmix_lranks[i]; - id.jobid = pmix_jobid; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s checking out if %s is local to me", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - OPAL_NAME_PRINT(id)); - /* fetch cpuset for this vpid */ - OBJ_CONSTRUCT(&vals, opal_list_t); - if (OPAL_SUCCESS != (rc = opal_pmix_base_fetch(&id, - OPAL_PMIX_CPUSET, &vals))) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s cpuset for local proc %s not found", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - OPAL_NAME_PRINT(id)); - OPAL_LIST_DESTRUCT(&vals); - /* even though the cpuset wasn't found, we at least know it is - * on the same node with us */ - locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; - } else { - kp = (opal_value_t*)opal_list_get_first(&vals); - if (NULL == kp->data.string) { - /* if we share a node, but we don't know anything more, then - * mark us as on the node as this is all we know - */ - locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; - } else { - /* determine relative location on our node */ - locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, - cpuset, - kp->data.string); - } - OPAL_LIST_DESTRUCT(&vals); - } - OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output, - "%s pmix:cray proc %s locality %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - OPAL_NAME_PRINT(id), - opal_hwloc_base_print_locality(locality))); + id.vpid = pmix_lranks[i]; + id.jobid = pmix_jobid; + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s checking out if %s is local to me", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(id)); + /* fetch cpuset for this vpid */ + OBJ_CONSTRUCT(&vals, opal_list_t); + if (OPAL_SUCCESS != (rc = opal_pmix_base_fetch(&id, + OPAL_PMIX_CPUSET, &vals))) { + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s cpuset for local proc %s not found", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(id)); + OPAL_LIST_DESTRUCT(&vals); + /* even though the cpuset wasn't found, we at least know it is + * on the same node with us */ + locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; + } else { + kp = (opal_value_t*)opal_list_get_first(&vals); + if (NULL == kp->data.string) { + /* if we share a node, but we don't know anything more, then + * mark us as on the node as this is all we know + */ + locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; + } else { + /* determine relative location on our node */ + locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, + cpuset, + kp->data.string); + } + OPAL_LIST_DESTRUCT(&vals); + } + OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output, + "%s pmix:cray proc %s locality %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(id), + opal_hwloc_base_print_locality(locality))); - OBJ_CONSTRUCT(&kvn, opal_value_t); - kvn.key = strdup(OPAL_PMIX_LOCALITY); - kvn.type = OPAL_UINT16; - kvn.data.uint16 = locality; - opal_pmix_base_store(&id, &kvn); - OBJ_DESTRUCT(&kvn); + OBJ_CONSTRUCT(&kvn, opal_value_t); + kvn.key = strdup(OPAL_PMIX_LOCALITY); + kvn.type = OPAL_UINT16; + kvn.data.uint16 = locality; + opal_pmix_base_store(&id, &kvn); + OBJ_DESTRUCT(&kvn); } fn_exit: if (NULL != cpuset) { - free(cpuset); + free(cpuset); } if (all_lens != NULL) { - free(all_lens); + free(all_lens); } if (rcv_buff != NULL) { - free(rcv_buff); + free(rcv_buff); } if (r_bytes_and_ranks != NULL) { - free(r_bytes_and_ranks); + free(r_bytes_and_ranks); } if (NULL != op->opcbfunc) { - op->opcbfunc(rc, op->cbdata); + op->opcbfunc(rc, op->cbdata); } OBJ_RELEASE(op); return; } static int cray_fencenb(opal_list_t *procs, int collect_data, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { pmi_opcaddy_t *op; @@ -780,7 +780,7 @@ static int cray_fencenb(opal_list_t *procs, int collect_data, op->opcbfunc = cbfunc; op->cbdata = cbdata; event_assign(&op->ev, opal_pmix_base.evbase, -1, - EV_WRITE, fencenb, op); + EV_WRITE, fencenb, op); event_active(&op->ev, EV_WRITE, 1); return OPAL_SUCCESS; @@ -792,19 +792,19 @@ static int cray_get(const opal_process_name_t *id, const char *key, opal_list_t opal_list_t vals; opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:cray getting value for proc %s key %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - OPAL_NAME_PRINT(*id), key); + "%s pmix:cray getting value for proc %s key %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(*id), key); OBJ_CONSTRUCT(&vals, opal_list_t); rc = opal_pmix_base_fetch(id, key, &vals); if (OPAL_SUCCESS == rc) { - *kv = (opal_value_t*)opal_list_remove_first(&vals); - return OPAL_SUCCESS; + *kv = (opal_value_t*)opal_list_remove_first(&vals); + return OPAL_SUCCESS; } else { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:cray fetch from dstore failed: %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), rc); + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:cray fetch from dstore failed: %d", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), rc); } OPAL_LIST_DESTRUCT(&vals); @@ -813,7 +813,7 @@ static int cray_get(const opal_process_name_t *id, const char *key, opal_list_t } static int cray_get_nb(const opal_process_name_t *id, const char *key, - opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) + opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_IMPLEMENTED; } @@ -824,7 +824,7 @@ static int cray_publish(opal_list_t *info) } static int cray_publish_nb(opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } @@ -835,7 +835,7 @@ static int cray_lookup(opal_list_t *data, opal_list_t *info) } static int cray_lookup_nb(char **keys, opal_list_t *info, - opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } @@ -846,7 +846,7 @@ static int cray_unpublish(char **keys, opal_list_t *info) } static int cray_unpublish_nb(char **keys, opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } @@ -857,7 +857,7 @@ static const char *cray_get_version(void) } static int cray_store_local(const opal_process_name_t *proc, - opal_value_t *val) + opal_value_t *val) { opal_pmix_base_store(proc, val); @@ -879,26 +879,26 @@ static char* pmix_error(int pmix_err) char * err_msg; switch(pmix_err) { - case PMI_FAIL: err_msg = "Operation failed"; break; - case PMI_ERR_INIT: err_msg = "PMI is not initialized"; break; - case PMI_ERR_NOMEM: err_msg = "Input buffer not large enough"; break; - case PMI_ERR_INVALID_ARG: err_msg = "Invalid argument"; break; - case PMI_ERR_INVALID_KEY: err_msg = "Invalid key argument"; break; - case PMI_ERR_INVALID_KEY_LENGTH: err_msg = "Invalid key length argument"; break; - case PMI_ERR_INVALID_VAL: err_msg = "Invalid value argument"; break; - case PMI_ERR_INVALID_VAL_LENGTH: err_msg = "Invalid value length argument"; break; - case PMI_ERR_INVALID_LENGTH: err_msg = "Invalid length argument"; break; - case PMI_ERR_INVALID_NUM_ARGS: err_msg = "Invalid number of arguments"; break; - case PMI_ERR_INVALID_ARGS: err_msg = "Invalid args argument"; break; - case PMI_ERR_INVALID_NUM_PARSED: err_msg = "Invalid num_parsed length argument"; break; - case PMI_ERR_INVALID_KEYVALP: err_msg = "Invalid keyvalp argument"; break; - case PMI_ERR_INVALID_SIZE: err_msg = "Invalid size argument"; break; + case PMI_FAIL: err_msg = "Operation failed"; break; + case PMI_ERR_INIT: err_msg = "PMI is not initialized"; break; + case PMI_ERR_NOMEM: err_msg = "Input buffer not large enough"; break; + case PMI_ERR_INVALID_ARG: err_msg = "Invalid argument"; break; + case PMI_ERR_INVALID_KEY: err_msg = "Invalid key argument"; break; + case PMI_ERR_INVALID_KEY_LENGTH: err_msg = "Invalid key length argument"; break; + case PMI_ERR_INVALID_VAL: err_msg = "Invalid value argument"; break; + case PMI_ERR_INVALID_VAL_LENGTH: err_msg = "Invalid value length argument"; break; + case PMI_ERR_INVALID_LENGTH: err_msg = "Invalid length argument"; break; + case PMI_ERR_INVALID_NUM_ARGS: err_msg = "Invalid number of arguments"; break; + case PMI_ERR_INVALID_ARGS: err_msg = "Invalid args argument"; break; + case PMI_ERR_INVALID_NUM_PARSED: err_msg = "Invalid num_parsed length argument"; break; + case PMI_ERR_INVALID_KEYVALP: err_msg = "Invalid keyvalp argument"; break; + case PMI_ERR_INVALID_SIZE: err_msg = "Invalid size argument"; break; #if defined(PMI_ERR_INVALID_KVS) - /* pmi.h calls this a valid return code but mpich doesn't define it (slurm does). */ - case PMI_ERR_INVALID_KVS: err_msg = "Invalid kvs argument"; break; + /* pmi.h calls this a valid return code but mpich doesn't define it (slurm does). */ + case PMI_ERR_INVALID_KVS: err_msg = "Invalid kvs argument"; break; #endif - case PMI_SUCCESS: err_msg = "Success"; break; - default: err_msg = "Unkown error"; + case PMI_SUCCESS: err_msg = "Success"; break; + default: err_msg = "Unkown error"; } return err_msg; } diff --git a/opal/mca/pmix/cray/pmix_cray.h b/opal/mca/pmix/cray/pmix_cray.h index 58af6a23b8..f311a66edd 100644 --- a/opal/mca/pmix/cray/pmix_cray.h +++ b/opal/mca/pmix/cray/pmix_cray.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/cray/pmix_cray_component.c b/opal/mca/pmix/cray/pmix_cray_component.c index 43eb9fc604..1858ddb32b 100644 --- a/opal/mca/pmix/cray/pmix_cray_component.c +++ b/opal/mca/pmix/cray/pmix_cray_component.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ @@ -49,29 +49,29 @@ opal_pmix_cray_component_t mca_pmix_cray_component = { /* First, the mca_component_t struct containing meta information about the component itself */ - .base_version = { - /* Indicate that we are a pmix v1.1.0 component (which also - implies a specific MCA version) */ + .base_version = { + /* Indicate that we are a pmix v1.1.0 component (which also + implies a specific MCA version) */ - OPAL_PMIX_BASE_VERSION_2_0_0, + OPAL_PMIX_BASE_VERSION_2_0_0, - /* Component name and version */ + /* Component name and version */ - .mca_component_name = "cray", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), + .mca_component_name = "cray", + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), - /* Component open and close functions */ + /* Component open and close functions */ - .mca_open_component = pmix_cray_component_open, - .mca_close_component = pmix_cray_component_close, - .mca_query_component = pmix_cray_component_query, - }, - /* Next the MCA v1.0.0 component meta data */ - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - } + .mca_open_component = pmix_cray_component_open, + .mca_close_component = pmix_cray_component_close, + .mca_query_component = pmix_cray_component_query, + }, + /* Next the MCA v1.0.0 component meta data */ + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } }, .cache_local = NULL, .cache_global = NULL, @@ -93,23 +93,23 @@ static int pmix_cray_component_query(mca_base_module_t **module, int *priority) were launched by the orte/mpirun launcher */ fd = fopen(proc_job_file, "r"); if ((fd == NULL) || (getenv("OMPI_NO_USE_CRAY_PMI") != NULL)) { - *priority = 0; - *module = NULL; - rc = OPAL_ERROR; + *priority = 0; + *module = NULL; + rc = OPAL_ERROR; } else { - snprintf(task_is_app_fname,sizeof(task_is_app_fname), - "/proc/self/task/%ld/task_is_app",syscall(SYS_gettid)); - fd_task_is_app = fopen(task_is_app_fname, "r"); - if (fd_task_is_app != NULL) { /* okay we're in a PAGG container, - and we are an app task (not just a process - running on a mom node, for example), - so we should give cray pmi a shot. */ - *priority = 90; - *module = (mca_base_module_t *)&opal_pmix_cray_module; - fclose(fd_task_is_app); - rc = OPAL_SUCCESS; - } - fclose(fd); + snprintf(task_is_app_fname,sizeof(task_is_app_fname), + "/proc/self/task/%ld/task_is_app",syscall(SYS_gettid)); + fd_task_is_app = fopen(task_is_app_fname, "r"); + if (fd_task_is_app != NULL) { /* okay we're in a PAGG container, + and we are an app task (not just a process + running on a mom node, for example), + so we should give cray pmi a shot. */ + *priority = 90; + *module = (mca_base_module_t *)&opal_pmix_cray_module; + fclose(fd_task_is_app); + rc = OPAL_SUCCESS; + } + fclose(fd); } return rc; diff --git a/opal/mca/pmix/cray/pmix_cray_pmap_parser.c b/opal/mca/pmix/cray/pmix_cray_pmap_parser.c index e7fc726a1a..63a811a6dd 100644 --- a/opal/mca/pmix/cray/pmix_cray_pmap_parser.c +++ b/opal/mca/pmix/cray/pmix_cray_pmap_parser.c @@ -5,6 +5,8 @@ * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * Additional copyrights may follow * @@ -32,14 +34,14 @@ slurm block distro of 4 ranks over 2 nodes: Tuple can be visualized as a rectangle on two dimensional (Hosts, Local Ranks) plane: - ------------------------------------ Hosts -> - | H - | +--------+ - |<- base -->| | - | | | L - | +--------+ - Local Ranks - V + ------------------------------------ Hosts -> + | H + | +--------+ + |<- base -->| | + | | | L + | +--------+ + Local Ranks + V Note that ranks increase by column. Tuple (0,2,3) looks like: 0 3 @@ -59,14 +61,14 @@ static int find_my_node(char *map, int me) p = map; abs_rank = 0; while (NULL != (p = strstr(p+1, ",("))) { - if (3 != sscanf(p, ",(%d,%d,%d)", &base, &H, &L)) { - return -1; - } - if (me >= abs_rank && me < abs_rank + H*L) { - /* found my rectangle, compute node */ - return base + (me - abs_rank)/L; - } - abs_rank += H*L; + if (3 != sscanf(p, ",(%d,%d,%d)", &base, &H, &L)) { + return -1; + } + if (me >= abs_rank && me < abs_rank + H*L) { + /* found my rectangle, compute node */ + return base + (me - abs_rank)/L; + } + abs_rank += H*L; } return -1; } @@ -86,34 +88,34 @@ static int *find_lrs(char *map, int my_node, int *nlrs) max_lr = 16; lrs = malloc(max_lr * sizeof(int)); while (NULL != (p = strstr(p+1, ",("))) { - if (3 != sscanf(p, ",(%d,%d,%d)", &base, &H, &L)) { - free(lrs); - return NULL; - } - if (base <= my_node && my_node < base + H) { - if (*nlrs + L >= max_lr) { - lrs = realloc(lrs, (max_lr + L) * sizeof(int)); - if (NULL == lrs) { - *nlrs = 0; - free(lrs); - return NULL; - } - max_lr += L; - } - /* skip (my_node - base) columns of L elems, - * numbers in my column are local to me - */ - for (i = 0; i < L; i++) { - lrs[*nlrs] = (my_node - base) * L + i + abs_rank; - (*nlrs) ++; - } - } - abs_rank += H*L; + if (3 != sscanf(p, ",(%d,%d,%d)", &base, &H, &L)) { + free(lrs); + return NULL; + } + if (base <= my_node && my_node < base + H) { + if (*nlrs + L >= max_lr) { + lrs = realloc(lrs, (max_lr + L) * sizeof(int)); + if (NULL == lrs) { + *nlrs = 0; + free(lrs); + return NULL; + } + max_lr += L; + } + /* skip (my_node - base) columns of L elems, + * numbers in my column are local to me + */ + for (i = 0; i < L; i++) { + lrs[*nlrs] = (my_node - base) * L + i + abs_rank; + (*nlrs) ++; + } + } + abs_rank += H*L; } if (0 == *nlrs) { - free(lrs); - lrs = 0; + free(lrs); + lrs = 0; } return lrs; } @@ -129,13 +131,13 @@ static int *find_lrs(char *map, int my_node, int *nlrs) * on failure. Array must be freed by the caller. */ int *pmix_cray_parse_pmap(char *pmap, int my_rank, - int *node, int *nlrs) + int *node, int *nlrs) { char *p; p = strstr(pmap, "(vector"); if (NULL == p) { - return NULL; + return NULL; } *node = find_my_node(p, my_rank); @@ -155,7 +157,7 @@ static void dump_lrs(int *lrs, int me, int node, int n) printf("Total %d ranks/node, node %d me %d\n", n, node, me); for (i = 0; i < n; i++) { - printf("%d ", lrs[i]); + printf("%d ", lrs[i]); } printf("\n"); free(lrs); @@ -175,14 +177,14 @@ int main(int argc, char **argv) if (argc == 3) { - me = atoi(argv[1]); - lrs = orte_grpcomm_pmi2_parse_pmap(argv[2], me, &node, &n); - if (NULL == lrs) { - printf("can not parse pmap\n"); - exit(1); - } - dump_lrs(lrs, me, node, n); - exit(0); + me = atoi(argv[1]); + lrs = orte_grpcomm_pmi2_parse_pmap(argv[2], me, &node, &n); + if (NULL == lrs) { + printf("can not parse pmap\n"); + exit(1); + } + dump_lrs(lrs, me, node, n); + exit(0); } /* built in cases */