1
1

Do some cleanup of the --without-hwloc build - no need to work on coprocessors since we can't detect them anyway, cleanup some unused variables in the ppr mapper

This commit was SVN r29476.
Этот коммит содержится в:
Ralph Castain 2013-10-23 01:45:21 +00:00
родитель 5bf6555604
Коммит 960a255e7f
2 изменённых файлов: 98 добавлений и 89 удалений

Просмотреть файл

@ -333,10 +333,6 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata)
{
orte_job_t *jdata, *jdatorted;
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
int i, rc;
orte_node_t *node;
uint32_t h;
orte_vpid_t *vptr;
/* if we don't want to launch the apps, now is the time to leave */
if (orte_do_not_launch) {
@ -410,40 +406,49 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata)
}
#endif
/* if coprocessors were detected, now is the time to
* identify who is attached to what host - this info
* will be shipped to the daemons in the nidmap. Someday,
* there may be a direct way for daemons on coprocessors
* to detect their hosts - but not today.
*/
if (orte_coprocessors_detected) {
/* cycle thru the nodes looking for coprocessors */
for (i=0; i < orte_node_pool->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
continue;
#if OPAL_HAVE_HWLOC
{
orte_node_t *node;
uint32_t h;
orte_vpid_t *vptr;
int i, rc;
/* if coprocessors were detected, now is the time to
* identify who is attached to what host - this info
* will be shipped to the daemons in the nidmap. Someday,
* there may be a direct way for daemons on coprocessors
* to detect their hosts - but not today.
*/
if (orte_coprocessors_detected) {
/* cycle thru the nodes looking for coprocessors */
for (i=0; i < orte_node_pool->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
continue;
}
/* if we don't have a serial number, then we are not a coprocessor */
if (NULL == node->serial_number) {
/* set our hostid to our own daemon vpid */
node->hostid = node->daemon->name.vpid;
continue;
}
/* if we have a serial number, then we are a coprocessor - so
* compute our hash and lookup our hostid
*/
OPAL_HASH_STR(node->serial_number, h);
if (OPAL_SUCCESS != (rc = opal_hash_table_get_value_uint32(orte_coprocessors, h,
(void**)&vptr))) {
ORTE_ERROR_LOG(rc);
break;
}
node->hostid = (*vptr);
}
/* if we don't have a serial number, then we are not a coprocessor */
if (NULL == node->serial_number) {
/* set our hostid to our own daemon vpid */
node->hostid = node->daemon->name.vpid;
continue;
}
/* if we have a serial number, then we are a coprocessor - so
* compute our hash and lookup our hostid
*/
OPAL_HASH_STR(node->serial_number, h);
if (OPAL_SUCCESS != (rc = opal_hash_table_get_value_uint32(orte_coprocessors, h,
(void**)&vptr))) {
ORTE_ERROR_LOG(rc);
break;
}
node->hostid = (*vptr);
}
/* done with the coprocessor mapping at this time */
if (NULL != orte_coprocessors) {
OBJ_RELEASE(orte_coprocessors);
}
}
/* done with the coprocessor mapping at this time */
if (NULL != orte_coprocessors) {
OBJ_RELEASE(orte_coprocessors);
}
#endif
/* set the job state to the next position */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_LAUNCH_APPS);
@ -666,8 +671,6 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
orte_job_t *jdata;
orte_process_name_t dname;
opal_buffer_t *relay;
char *coprocessors, **sns;
uint32_t h;
/* get the daemon job, if necessary */
if (NULL == jdatorted) {
@ -814,12 +817,15 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
#if OPAL_HAVE_HWLOC
{
char *coprocessors, **sns;
uint32_t h;
hwloc_topology_t topo, t;
int i;
bool found;
/* store the local resources for that node */
if (1 == dname.vpid || orte_hetero_nodes) {
hwloc_topology_t topo, t;
int i;
bool found;
idx=1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) {
ORTE_ERROR_LOG(rc);
@ -859,56 +865,56 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
node->topology = topo;
}
}
}
#endif
/* unpack any coprocessors */
idx=1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
orted_failed_launch = true;
goto CLEANUP;
}
if (NULL != coprocessors) {
/* init the hash table, if necessary */
if (NULL == orte_coprocessors) {
orte_coprocessors = OBJ_NEW(opal_hash_table_t);
opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs);
}
/* separate the serial numbers of the coprocessors
* on this host
*/
sns = opal_argv_split(coprocessors, ',');
for (idx=0; NULL != sns[idx]; idx++) {
/* compute the hash */
OPAL_HASH_STR(sns[idx], h);
/* mark that this coprocessor is hosted by this node */
opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&node->daemon->name.vpid);
}
opal_argv_free(sns);
free(coprocessors);
orte_coprocessors_detected = true;
}
/* see if this daemon is on a coprocessor */
idx=1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
orted_failed_launch = true;
goto CLEANUP;
}
if (NULL != coprocessors) {
if (NULL != node->serial_number) {
/* this is not allowed - a coprocessor cannot be host
* to another coprocessor at this time
*/
ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
/* unpack any coprocessors */
idx=1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
orted_failed_launch = true;
free(coprocessors);
goto CLEANUP;
}
node->serial_number = coprocessors;
orte_coprocessors_detected = true;
if (NULL != coprocessors) {
/* init the hash table, if necessary */
if (NULL == orte_coprocessors) {
orte_coprocessors = OBJ_NEW(opal_hash_table_t);
opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs);
}
/* separate the serial numbers of the coprocessors
* on this host
*/
sns = opal_argv_split(coprocessors, ',');
for (idx=0; NULL != sns[idx]; idx++) {
/* compute the hash */
OPAL_HASH_STR(sns[idx], h);
/* mark that this coprocessor is hosted by this node */
opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&node->daemon->name.vpid);
}
opal_argv_free(sns);
free(coprocessors);
orte_coprocessors_detected = true;
}
/* see if this daemon is on a coprocessor */
idx=1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
orted_failed_launch = true;
goto CLEANUP;
}
if (NULL != coprocessors) {
if (NULL != node->serial_number) {
/* this is not allowed - a coprocessor cannot be host
* to another coprocessor at this time
*/
ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
orted_failed_launch = true;
free(coprocessors);
goto CLEANUP;
}
node->serial_number = coprocessors;
orte_coprocessors_detected = true;
}
}
#endif
CLEANUP:
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,

Просмотреть файл

@ -75,12 +75,14 @@ static int ppr_mapper(orte_job_t *jdata)
orte_proc_t *proc;
orte_app_context_t *app;
orte_vpid_t total_procs, nprocs_mapped;
opal_hwloc_level_t level, start=OPAL_HWLOC_NODE_LEVEL;
opal_hwloc_level_t start=OPAL_HWLOC_NODE_LEVEL;
#if OPAL_HAVE_HWLOC
hwloc_obj_t obj;
hwloc_obj_type_t lowest;
unsigned cache_level=0;
unsigned int nobjs, i;
bool pruning_reqd = false;
opal_hwloc_level_t level;
#endif
opal_list_t node_list;
opal_list_item_t *item;
@ -88,7 +90,6 @@ static int ppr_mapper(orte_job_t *jdata)
orte_app_idx_t idx;
char **ppr_req, **ck;
size_t len;
bool pruning_reqd = false;
bool initial_map=true;
/* only handle initial launch of loadbalanced
@ -218,19 +219,21 @@ static int ppr_mapper(orte_job_t *jdata)
opal_output(0, "NOTHING GIVEN");
return ORTE_ERR_SILENT;
}
#if OPAL_HAVE_HWLOC
/* if more than one level was specified, then pruning will be reqd */
if (1 < n) {
pruning_reqd = true;
}
#endif
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:ppr: job %s assigned policy %s",
ORTE_JOBID_PRINT(jdata->jobid),
orte_rmaps_base_print_mapping(jdata->map->mapping));
#if OPAL_HAVE_HWLOC
/* convenience */
level = start;
#if OPAL_HAVE_HWLOC
lowest = opal_hwloc_levels[start];
#endif