1
1

The fork pls now checks the total number of processes to be launched

against the total number of processors.  If not oversubscribing, emit
the MCA environment variable mpi_paffinity_processor with the
processor number to bind the process to.  This parameter is picked up
during MPI_Init (i.e., ompi_mpi_init()) and used to bind the process,
but currently iif the MCA param mpi_paffinity_alone is set to a
nonzero value (i.e., the user asks for it).

This commit was SVN r6906.
Этот коммит содержится в:
Jeff Squyres 2005-08-16 16:23:20 +00:00
родитель 409b9e73b2
Коммит 5e5fd5a8f2

Просмотреть файл

@ -37,26 +37,27 @@
#include "opal/event/event.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "util/sys_info.h"
#include "util/univ_info.h"
#include "opal/mca/paffinity/base/base.h"
#include "opal/util/sys_info.h"
#include "orte/util/univ_info.h"
#include "opal/util/opal_environ.h"
#include "util/session_dir.h"
#include "runtime/orte_wait.h"
#include "mca/errmgr/errmgr.h"
#include "mca/iof/iof.h"
#include "mca/iof/base/iof_base_setup.h"
#include "mca/base/mca_base_param.h"
#include "mca/ns/ns.h"
#include "orte/util/session_dir.h"
#include "orte/runtime/orte_wait.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/iof_base_setup.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/sds/base/base.h"
#include "mca/pls/pls.h"
#include "mca/pls/base/base.h"
#include "mca/rml/rml.h"
#include "mca/gpr/gpr.h"
#include "mca/rmaps/base/base.h"
#include "mca/rmaps/base/rmaps_base_map.h"
#include "mca/soh/soh.h"
#include "mca/soh/base/base.h"
#include "mca/pls/fork/pls_fork.h"
#include "orte/mca/pls/pls.h"
#include "orte/mca/pls/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/rmaps/base/base.h"
#include "orte/mca/rmaps/base/rmaps_base_map.h"
#include "orte/mca/soh/soh.h"
#include "orte/mca/soh/base/base.h"
#include "orte/mca/pls/fork/pls_fork.h"
extern char **environ;
@ -127,7 +128,9 @@ static int orte_pls_fork_proc(
orte_app_context_t* context,
orte_rmaps_base_proc_t* proc,
orte_vpid_t vpid_start,
orte_vpid_t vpid_range)
orte_vpid_t vpid_range,
bool want_processor,
size_t processor)
{
pid_t pid;
orte_iof_base_io_conf_t opts;
@ -201,7 +204,7 @@ static int orte_pls_fork_proc(
}
if(pid == 0) {
char* param;
char *param, *param2;
char *uri;
char **environ_copy;
@ -233,6 +236,18 @@ static int orte_pls_fork_proc(
}
param = mca_base_param_environ_variable("rmgr","bootproxy","jobid");
opal_unsetenv(param, &environ_copy);
free(param);
/* Set the relative vpid */
if (want_processor) {
param = mca_base_param_environ_variable("mpi", NULL,
"paffinity_processor");
asprintf(&param2, "%lu", processor);
opal_setenv(param, param2, true, &environ_copy);
free(param);
free(param2);
}
/* setup universe info */
if (NULL != orte_universe_info.name) {
@ -357,6 +372,7 @@ int orte_pls_fork_launch(orte_jobid_t jobid)
orte_vpid_t vpid_start;
orte_vpid_t vpid_range;
int rc;
size_t num_processors, num_processes;
/* query the allocation for this node */
OBJ_CONSTRUCT(&map, opal_list_t);
@ -373,6 +389,16 @@ int orte_pls_fork_launch(orte_jobid_t jobid)
goto cleanup;
}
/* are we oversubscribing? */
opal_paffinity_base_get_num_processors(&rc);
num_processors = (size_t) rc;
for (num_processes = 0, item = opal_list_get_first(&map);
item != opal_list_get_end(&map);
item = opal_list_get_next(item)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item;
num_processes += map->num_procs;
}
/* attempt to launch each of the apps */
for (item = opal_list_get_first(&map);
item != opal_list_get_end(&map);
@ -380,7 +406,10 @@ int orte_pls_fork_launch(orte_jobid_t jobid)
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item;
size_t i;
for (i=0; i<map->num_procs; i++) {
rc = orte_pls_fork_proc(map->app, map->procs[i], vpid_start, vpid_range);
rc = orte_pls_fork_proc(map->app, map->procs[i], vpid_start,
vpid_range,
(num_processes > num_processors) ?
false : true, i);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;