Do some cleanup of the --without-hwloc build - no need to work on coprocessors since we can't detect them anyway, cleanup some unused variables in the ppr mapper
This commit was SVN r29476.
Этот коммит содержится в:
родитель
5bf6555604
Коммит
960a255e7f
@ -333,10 +333,6 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata)
|
||||
{
|
||||
orte_job_t *jdata, *jdatorted;
|
||||
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
|
||||
int i, rc;
|
||||
orte_node_t *node;
|
||||
uint32_t h;
|
||||
orte_vpid_t *vptr;
|
||||
|
||||
/* if we don't want to launch the apps, now is the time to leave */
|
||||
if (orte_do_not_launch) {
|
||||
@ -410,40 +406,49 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* if coprocessors were detected, now is the time to
|
||||
* identify who is attached to what host - this info
|
||||
* will be shipped to the daemons in the nidmap. Someday,
|
||||
* there may be a direct way for daemons on coprocessors
|
||||
* to detect their hosts - but not today.
|
||||
*/
|
||||
if (orte_coprocessors_detected) {
|
||||
/* cycle thru the nodes looking for coprocessors */
|
||||
for (i=0; i < orte_node_pool->size; i++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
|
||||
continue;
|
||||
#if OPAL_HAVE_HWLOC
|
||||
{
|
||||
orte_node_t *node;
|
||||
uint32_t h;
|
||||
orte_vpid_t *vptr;
|
||||
int i, rc;
|
||||
|
||||
/* if coprocessors were detected, now is the time to
|
||||
* identify who is attached to what host - this info
|
||||
* will be shipped to the daemons in the nidmap. Someday,
|
||||
* there may be a direct way for daemons on coprocessors
|
||||
* to detect their hosts - but not today.
|
||||
*/
|
||||
if (orte_coprocessors_detected) {
|
||||
/* cycle thru the nodes looking for coprocessors */
|
||||
for (i=0; i < orte_node_pool->size; i++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
|
||||
continue;
|
||||
}
|
||||
/* if we don't have a serial number, then we are not a coprocessor */
|
||||
if (NULL == node->serial_number) {
|
||||
/* set our hostid to our own daemon vpid */
|
||||
node->hostid = node->daemon->name.vpid;
|
||||
continue;
|
||||
}
|
||||
/* if we have a serial number, then we are a coprocessor - so
|
||||
* compute our hash and lookup our hostid
|
||||
*/
|
||||
OPAL_HASH_STR(node->serial_number, h);
|
||||
if (OPAL_SUCCESS != (rc = opal_hash_table_get_value_uint32(orte_coprocessors, h,
|
||||
(void**)&vptr))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
break;
|
||||
}
|
||||
node->hostid = (*vptr);
|
||||
}
|
||||
/* if we don't have a serial number, then we are not a coprocessor */
|
||||
if (NULL == node->serial_number) {
|
||||
/* set our hostid to our own daemon vpid */
|
||||
node->hostid = node->daemon->name.vpid;
|
||||
continue;
|
||||
}
|
||||
/* if we have a serial number, then we are a coprocessor - so
|
||||
* compute our hash and lookup our hostid
|
||||
*/
|
||||
OPAL_HASH_STR(node->serial_number, h);
|
||||
if (OPAL_SUCCESS != (rc = opal_hash_table_get_value_uint32(orte_coprocessors, h,
|
||||
(void**)&vptr))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
break;
|
||||
}
|
||||
node->hostid = (*vptr);
|
||||
}
|
||||
/* done with the coprocessor mapping at this time */
|
||||
if (NULL != orte_coprocessors) {
|
||||
OBJ_RELEASE(orte_coprocessors);
|
||||
}
|
||||
}
|
||||
/* done with the coprocessor mapping at this time */
|
||||
if (NULL != orte_coprocessors) {
|
||||
OBJ_RELEASE(orte_coprocessors);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* set the job state to the next position */
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_LAUNCH_APPS);
|
||||
@ -666,8 +671,6 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
|
||||
orte_job_t *jdata;
|
||||
orte_process_name_t dname;
|
||||
opal_buffer_t *relay;
|
||||
char *coprocessors, **sns;
|
||||
uint32_t h;
|
||||
|
||||
/* get the daemon job, if necessary */
|
||||
if (NULL == jdatorted) {
|
||||
@ -814,12 +817,15 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
|
||||
|
||||
#if OPAL_HAVE_HWLOC
|
||||
{
|
||||
char *coprocessors, **sns;
|
||||
uint32_t h;
|
||||
hwloc_topology_t topo, t;
|
||||
int i;
|
||||
bool found;
|
||||
|
||||
/* store the local resources for that node */
|
||||
if (1 == dname.vpid || orte_hetero_nodes) {
|
||||
hwloc_topology_t topo, t;
|
||||
int i;
|
||||
bool found;
|
||||
|
||||
|
||||
idx=1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -859,56 +865,56 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
|
||||
node->topology = topo;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* unpack any coprocessors */
|
||||
idx=1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
orted_failed_launch = true;
|
||||
goto CLEANUP;
|
||||
}
|
||||
if (NULL != coprocessors) {
|
||||
/* init the hash table, if necessary */
|
||||
if (NULL == orte_coprocessors) {
|
||||
orte_coprocessors = OBJ_NEW(opal_hash_table_t);
|
||||
opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs);
|
||||
}
|
||||
/* separate the serial numbers of the coprocessors
|
||||
* on this host
|
||||
*/
|
||||
sns = opal_argv_split(coprocessors, ',');
|
||||
for (idx=0; NULL != sns[idx]; idx++) {
|
||||
/* compute the hash */
|
||||
OPAL_HASH_STR(sns[idx], h);
|
||||
/* mark that this coprocessor is hosted by this node */
|
||||
opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&node->daemon->name.vpid);
|
||||
}
|
||||
opal_argv_free(sns);
|
||||
free(coprocessors);
|
||||
orte_coprocessors_detected = true;
|
||||
}
|
||||
/* see if this daemon is on a coprocessor */
|
||||
idx=1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
orted_failed_launch = true;
|
||||
goto CLEANUP;
|
||||
}
|
||||
if (NULL != coprocessors) {
|
||||
if (NULL != node->serial_number) {
|
||||
/* this is not allowed - a coprocessor cannot be host
|
||||
* to another coprocessor at this time
|
||||
*/
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
|
||||
/* unpack any coprocessors */
|
||||
idx=1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
orted_failed_launch = true;
|
||||
free(coprocessors);
|
||||
goto CLEANUP;
|
||||
}
|
||||
node->serial_number = coprocessors;
|
||||
orte_coprocessors_detected = true;
|
||||
if (NULL != coprocessors) {
|
||||
/* init the hash table, if necessary */
|
||||
if (NULL == orte_coprocessors) {
|
||||
orte_coprocessors = OBJ_NEW(opal_hash_table_t);
|
||||
opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs);
|
||||
}
|
||||
/* separate the serial numbers of the coprocessors
|
||||
* on this host
|
||||
*/
|
||||
sns = opal_argv_split(coprocessors, ',');
|
||||
for (idx=0; NULL != sns[idx]; idx++) {
|
||||
/* compute the hash */
|
||||
OPAL_HASH_STR(sns[idx], h);
|
||||
/* mark that this coprocessor is hosted by this node */
|
||||
opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&node->daemon->name.vpid);
|
||||
}
|
||||
opal_argv_free(sns);
|
||||
free(coprocessors);
|
||||
orte_coprocessors_detected = true;
|
||||
}
|
||||
/* see if this daemon is on a coprocessor */
|
||||
idx=1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
orted_failed_launch = true;
|
||||
goto CLEANUP;
|
||||
}
|
||||
if (NULL != coprocessors) {
|
||||
if (NULL != node->serial_number) {
|
||||
/* this is not allowed - a coprocessor cannot be host
|
||||
* to another coprocessor at this time
|
||||
*/
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
|
||||
orted_failed_launch = true;
|
||||
free(coprocessors);
|
||||
goto CLEANUP;
|
||||
}
|
||||
node->serial_number = coprocessors;
|
||||
orte_coprocessors_detected = true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
CLEANUP:
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
|
||||
|
@ -75,12 +75,14 @@ static int ppr_mapper(orte_job_t *jdata)
|
||||
orte_proc_t *proc;
|
||||
orte_app_context_t *app;
|
||||
orte_vpid_t total_procs, nprocs_mapped;
|
||||
opal_hwloc_level_t level, start=OPAL_HWLOC_NODE_LEVEL;
|
||||
opal_hwloc_level_t start=OPAL_HWLOC_NODE_LEVEL;
|
||||
#if OPAL_HAVE_HWLOC
|
||||
hwloc_obj_t obj;
|
||||
hwloc_obj_type_t lowest;
|
||||
unsigned cache_level=0;
|
||||
unsigned int nobjs, i;
|
||||
bool pruning_reqd = false;
|
||||
opal_hwloc_level_t level;
|
||||
#endif
|
||||
opal_list_t node_list;
|
||||
opal_list_item_t *item;
|
||||
@ -88,7 +90,6 @@ static int ppr_mapper(orte_job_t *jdata)
|
||||
orte_app_idx_t idx;
|
||||
char **ppr_req, **ck;
|
||||
size_t len;
|
||||
bool pruning_reqd = false;
|
||||
bool initial_map=true;
|
||||
|
||||
/* only handle initial launch of loadbalanced
|
||||
@ -218,19 +219,21 @@ static int ppr_mapper(orte_job_t *jdata)
|
||||
opal_output(0, "NOTHING GIVEN");
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
#if OPAL_HAVE_HWLOC
|
||||
/* if more than one level was specified, then pruning will be reqd */
|
||||
if (1 < n) {
|
||||
pruning_reqd = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:ppr: job %s assigned policy %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid),
|
||||
orte_rmaps_base_print_mapping(jdata->map->mapping));
|
||||
|
||||
#if OPAL_HAVE_HWLOC
|
||||
/* convenience */
|
||||
level = start;
|
||||
#if OPAL_HAVE_HWLOC
|
||||
lowest = opal_hwloc_levels[start];
|
||||
#endif
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user