Nathan had requested this before as he needs to know the #procs in the job to optimize the UGNI btl. Add the fetch for that data - the native pmix component already provides it, but ensure the Slurm PMI-1 support does too. If not found, fall back to the non-optimized number
This commit was SVN r32648.
Этот коммит содержится в:
родитель
cb0739dfd4
Коммит
9ac75451ff
@ -3,6 +3,7 @@
|
|||||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||||
|
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -17,6 +18,8 @@
|
|||||||
#include "btl_ugni_smsg.h"
|
#include "btl_ugni_smsg.h"
|
||||||
|
|
||||||
#include "opal/include/opal/align.h"
|
#include "opal/include/opal/align.h"
|
||||||
|
#include "opal/mca/dstore/dstore.h"
|
||||||
|
|
||||||
#define INITIAL_GNI_EPS 10000
|
#define INITIAL_GNI_EPS 10000
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@ -226,6 +229,8 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
|||||||
unsigned int mbox_increment, nprocs;
|
unsigned int mbox_increment, nprocs;
|
||||||
const char *mpool_name;
|
const char *mpool_name;
|
||||||
int rc;
|
int rc;
|
||||||
|
opal_list_t vals;
|
||||||
|
opal_value_t *kv;
|
||||||
|
|
||||||
rc = opal_pointer_array_init (&ugni_module->pending_smsg_frags_bb, 0,
|
rc = opal_pointer_array_init (&ugni_module->pending_smsg_frags_bb, 0,
|
||||||
1 << 30, 32768);
|
1 << 30, 32768);
|
||||||
@ -234,8 +239,16 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* determine how many procs are in the job (might want to check universe size here) */
|
/* determine how many procs are in the job (might want to check universe size here) */
|
||||||
/* TODO: need to fix this with something else now that btl is in opal */
|
OBJ_CONSTRUCT(&vals, opal_list_t);
|
||||||
|
if (OPAL_SUCCESS == opal_dstore.fetch(opal_dstore_internal, &OPAL_PROC_MY_NAME,
|
||||||
|
OPAL_DSTORE_UNIV_SIZE, &vals)) {
|
||||||
|
/* the number of procs in the job is in the uint32 field */
|
||||||
|
kv = (opal_value_t*)opal_list_get_first(&vals);
|
||||||
|
nprocs = kv->data.uint32;
|
||||||
|
} else {
|
||||||
nprocs = 512;
|
nprocs = 512;
|
||||||
|
}
|
||||||
|
OPAL_LIST_DESTRUCT(&vals);
|
||||||
|
|
||||||
rc = mca_btl_ugni_smsg_setup (nprocs);
|
rc = mca_btl_ugni_smsg_setup (nprocs);
|
||||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||||
|
@ -142,6 +142,7 @@ static int s1_init(void)
|
|||||||
int i;
|
int i;
|
||||||
char *pmix_id, *tmp;
|
char *pmix_id, *tmp;
|
||||||
uint32_t jobfam, stepid;
|
uint32_t jobfam, stepid;
|
||||||
|
opal_value_t kv;
|
||||||
|
|
||||||
if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) {
|
if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) {
|
||||||
OPAL_PMI_ERROR(rc, "PMI_Initialized");
|
OPAL_PMI_ERROR(rc, "PMI_Initialized");
|
||||||
@ -263,6 +264,17 @@ static int s1_init(void)
|
|||||||
OPAL_PMI_ERROR(ret, "PMI_Get_universe_size");
|
OPAL_PMI_ERROR(ret, "PMI_Get_universe_size");
|
||||||
goto err_exit;
|
goto err_exit;
|
||||||
}
|
}
|
||||||
|
/* push this into the dstore for subsequent fetches */
|
||||||
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
||||||
|
kv.key = strdup(OPAL_DSTORE_UNIV_SIZE);
|
||||||
|
kv.type = OPAL_UINT32;
|
||||||
|
kv.data.uint32 = s1_usize;
|
||||||
|
if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal, &OPAL_PROC_MY_NAME, &kv))) {
|
||||||
|
OPAL_ERROR_LOG(ret);
|
||||||
|
OBJ_DESTRUCT(&kv);
|
||||||
|
goto err_exit;
|
||||||
|
}
|
||||||
|
OBJ_DESTRUCT(&kv);
|
||||||
|
|
||||||
/* get job size */
|
/* get job size */
|
||||||
ret = PMI_Get_size(&s1_jsize);
|
ret = PMI_Get_size(&s1_jsize);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user