From 6db641c86da8f90941e75d2f5c38ba1a8dc2e0cb Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 19 Feb 2009 20:45:07 +0000 Subject: [PATCH] Pass the number of nodes in a job to the process This commit was SVN r20595. --- orte/mca/odls/base/odls_base_default_fns.c | 21 +++++++++++++++++++++ orte/mca/odls/odls_types.h | 1 + orte/util/proc_info.c | 8 ++++++++ orte/util/proc_info.h | 1 + 4 files changed, 31 insertions(+) diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 53403b40f1..a2d0c49215 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -208,6 +208,12 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data, return rc; } + /* pack the number of nodes involved in this job */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &map->num_nodes, 1, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* pack the number of procs in this launch */ if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->num_procs, 1, ORTE_VPID))) { ORTE_ERROR_LOG(rc); @@ -485,6 +491,12 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data, } /* UNPACK JOB-SPECIFIC DATA */ + /* unpack the number of nodes involved in this job */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->num_nodes, &cnt, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; + } /* unpack the number of procs in this launch */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->num_procs, &cnt, ORTE_VPID))) { @@ -682,6 +694,7 @@ static int odls_base_default_setup_fork(orte_app_context_t *context, int32_t num_local_procs, orte_vpid_t vpid_range, orte_std_cntr_t total_slots_alloc, + int num_nodes, bool oversubscribed, char ***environ_copy) { int i; @@ -780,6 +793,13 @@ static int odls_base_default_setup_fork(orte_app_context_t *context, opal_setenv("OMPI_UNIVERSE_SIZE", param2, true, environ_copy); free(param2); + /* pass the number of nodes involved in this job */ + param = mca_base_param_environ_variable("orte","num","nodes"); + asprintf(¶m2, "%ld", (long)num_nodes); + opal_setenv(param, param2, true, environ_copy); + free(param); + free(param2); + /* push data into environment - don't push any single proc * info, though. We are setting the environment up on a * per-context basis, and will add the individual proc @@ -1003,6 +1023,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job, jobdat->num_local_procs, jobdat->num_procs, jobdat->total_slots_alloc, + jobdat->num_nodes, oversubscribed, &app->env))) { diff --git a/orte/mca/odls/odls_types.h b/orte/mca/odls/odls_types.h index 1b2bc074c4..ab4db8aec7 100644 --- a/orte/mca/odls/odls_types.h +++ b/orte/mca/odls/odls_types.h @@ -108,6 +108,7 @@ typedef struct orte_odls_job_t { orte_job_controls_t controls; /* control flags for job */ orte_vpid_t stdin_target; /* where stdin is to go */ orte_std_cntr_t total_slots_alloc; + orte_std_cntr_t num_nodes; /* number of nodes involved in the job */ orte_vpid_t num_procs; int32_t num_local_procs; opal_byte_object_t *pmap; /* local copy of pidmap byte object */ diff --git a/orte/util/proc_info.c b/orte/util/proc_info.c index 99b7c489ce..8c233c4167 100644 --- a/orte/util/proc_info.c +++ b/orte/util/proc_info.c @@ -46,6 +46,7 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = { /* .hnp_pid = */ 0, /* .app_num = */ -1, /* .num_procs = */ 1, + /* .num_nodes = */ 1, /* .nodename = */ NULL, /* .arch = */ 0, /* .pid = */ 0, @@ -136,6 +137,13 @@ int orte_proc_info(void) return ORTE_ERROR; } + /* get the number of nodes in the job */ + mca_base_param_reg_int_name("orte", "num_nodes", + "Number of nodes in the job", + true, false, + orte_process_info.num_nodes, &tmp); + orte_process_info.num_nodes = tmp; + /* setup the sync buffer */ orte_process_info.sync_buf = OBJ_NEW(opal_buffer_t); diff --git a/orte/util/proc_info.h b/orte/util/proc_info.h index 90a411d3c9..c8e61ef87b 100644 --- a/orte/util/proc_info.h +++ b/orte/util/proc_info.h @@ -57,6 +57,7 @@ struct orte_proc_info_t { pid_t hnp_pid; /**< hnp pid - used if singleton */ orte_std_cntr_t app_num; /**< our index into the app_context array */ orte_vpid_t num_procs; /**< number of processes in this job */ + int num_nodes; /**< number of nodes in the job */ char *nodename; /**< string name for this node */ uint32_t arch; /**< arch for this node */ pid_t pid; /**< Local process ID for this process */