1
1

Nathan had requested this before as he needs to know the #procs in the job to optimize the UGNI btl. Add the fetch for that data - the native pmix component already provides it, but ensure the Slurm PMI-1 support does too. If not found, fall back to the non-optimized number

This commit was SVN r32648.
Этот коммит содержится в:
Ralph Castain 2014-08-29 22:53:35 +00:00
родитель cb0739dfd4
Коммит 9ac75451ff
2 изменённых файлов: 27 добавлений и 2 удалений

Просмотреть файл

@ -3,6 +3,7 @@
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -17,6 +18,8 @@
#include "btl_ugni_smsg.h" #include "btl_ugni_smsg.h"
#include "opal/include/opal/align.h" #include "opal/include/opal/align.h"
#include "opal/mca/dstore/dstore.h"
#define INITIAL_GNI_EPS 10000 #define INITIAL_GNI_EPS 10000
static int static int
@ -226,6 +229,8 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
unsigned int mbox_increment, nprocs; unsigned int mbox_increment, nprocs;
const char *mpool_name; const char *mpool_name;
int rc; int rc;
opal_list_t vals;
opal_value_t *kv;
rc = opal_pointer_array_init (&ugni_module->pending_smsg_frags_bb, 0, rc = opal_pointer_array_init (&ugni_module->pending_smsg_frags_bb, 0,
1 << 30, 32768); 1 << 30, 32768);
@ -234,8 +239,16 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
} }
/* determine how many procs are in the job (might want to check universe size here) */ /* determine how many procs are in the job (might want to check universe size here) */
/* TODO: need to fix this with something else now that btl is in opal */ OBJ_CONSTRUCT(&vals, opal_list_t);
if (OPAL_SUCCESS == opal_dstore.fetch(opal_dstore_internal, &OPAL_PROC_MY_NAME,
OPAL_DSTORE_UNIV_SIZE, &vals)) {
/* the number of procs in the job is in the uint32 field */
kv = (opal_value_t*)opal_list_get_first(&vals);
nprocs = kv->data.uint32;
} else {
nprocs = 512; nprocs = 512;
}
OPAL_LIST_DESTRUCT(&vals);
rc = mca_btl_ugni_smsg_setup (nprocs); rc = mca_btl_ugni_smsg_setup (nprocs);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {

Просмотреть файл

@ -142,6 +142,7 @@ static int s1_init(void)
int i; int i;
char *pmix_id, *tmp; char *pmix_id, *tmp;
uint32_t jobfam, stepid; uint32_t jobfam, stepid;
opal_value_t kv;
if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) { if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) {
OPAL_PMI_ERROR(rc, "PMI_Initialized"); OPAL_PMI_ERROR(rc, "PMI_Initialized");
@ -263,6 +264,17 @@ static int s1_init(void)
OPAL_PMI_ERROR(ret, "PMI_Get_universe_size"); OPAL_PMI_ERROR(ret, "PMI_Get_universe_size");
goto err_exit; goto err_exit;
} }
/* push this into the dstore for subsequent fetches */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_DSTORE_UNIV_SIZE);
kv.type = OPAL_UINT32;
kv.data.uint32 = s1_usize;
if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal, &OPAL_PROC_MY_NAME, &kv))) {
OPAL_ERROR_LOG(ret);
OBJ_DESTRUCT(&kv);
goto err_exit;
}
OBJ_DESTRUCT(&kv);
/* get job size */ /* get job size */
ret = PMI_Get_size(&s1_jsize); ret = PMI_Get_size(&s1_jsize);