1
1

Back out some prior commits. These commits fixed bproc so it would run, but broke several other things (singleton comm_spawn and hostfile operations have been identified so far). Since bproc is the culprit here, let's leave bproc broken for now - I'll work on a fix for that environment that doesn't impact everythig else.

This commit was SVN r12648.
Этот коммит содержится в:
Ralph Castain 2006-11-22 13:30:21 +00:00
родитель 20d5c35f43
Коммит 6fca1431f3
2 изменённых файлов: 28 добавлений и 6 удалений

Просмотреть файл

@ -27,7 +27,6 @@
#include "orte/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/ras/base/proxy/ras_base_proxy.h"
#include "orte/mca/ras/base/ras_private.h"
@ -129,11 +128,23 @@ int orte_ras_base_allocate(orte_jobid_t jobid, opal_list_t *attributes)
* want to allocate new nodes. Otherwise allocate all the existing nodes to
* our job */
OBJ_CONSTRUCT(&nodes, opal_list_t);
/* See if there are any nodes already on the registry. Most of the time
* these would have been put there by the RDS reading the hostfile. */
if (ORTE_SUCCESS != (ret = orte_ras_base_node_query(&nodes))) {
OBJ_DESTRUCT(&nodes);
return ret;
}
/* If there are any nodes at all, allocate them all to this job */
if (!opal_list_is_empty(&nodes)) {
opal_output(orte_ras_base.ras_output,
"orte:ras:base:allocate: reallocating nodes that are already on registry");
ret = orte_ras_base_allocate_nodes(jobid, &nodes);
OBJ_DESTRUCT(&nodes);
return ret;
}
/* Run the RAS components from highest to lowest priority (they are already sorted).
* Stop when the node segment is no longer empty. This ensures we go through the
* allocator components at least once
*/
/* there were no nodes already on the registry, so get them from the
* RAS components */
/* If no components are available, then return an error */
if (opal_list_is_empty(&orte_ras_base.ras_available)) {

Просмотреть файл

@ -434,6 +434,7 @@ int orte_init_stage1(bool infrastructure)
orte_rds_cell_desc_t *rds_item;
orte_rds_cell_attr_t *new_attr;
orte_ras_node_t *ras_item;
opal_list_t attrs;
OBJ_CONSTRUCT(&single_host, opal_list_t);
OBJ_CONSTRUCT(&rds_single_host, opal_list_t);
@ -523,11 +524,21 @@ int orte_init_stage1(bool infrastructure)
goto error;;
}
if (ORTE_SUCCESS != (ret = orte_ras_base_allocate_nodes(my_jobid, &single_host))) {
/* JMS: Same as above -- fix this after 1.0: force a
selection so that orte_ras has initialized pointers in
case anywhere else tries to use it. This may end up
putting a bunch more nodes on the node segment - e.g.,
if you're in a SLURM allocation and you "./a.out",
you'll end up with the localhost *and* all the other
nodes in your allocation on the node segment -- which
is probably fine */
OBJ_CONSTRUCT(&attrs, opal_list_t);
if (ORTE_SUCCESS != (ret = orte_ras.allocate_job(my_jobid, &attrs))) {
ORTE_ERROR_LOG(ret);
error = "allocate for a singleton";
goto error;
}
OBJ_DESTRUCT(&attrs);
OBJ_DESTRUCT(&single_host);
OBJ_DESTRUCT(&rds_single_host);