1
1

Enable restart of individual processes upon command via the errmgr callback function. It needs an external application to drive this capability, so normal operations shouldn't be affected.

Does not support MPI applications. More work coming to update daemon accounting on movement of procs across nodes.

This commit was SVN r21545.
Этот коммит содержится в:
Ralph Castain 2009-06-26 20:54:58 +00:00
родитель e5496fcc8a
Коммит b96a71b62e
11 изменённых файлов: 279 добавлений и 124 удалений

Просмотреть файл

@ -636,18 +636,20 @@ static void rte_abort(int status, bool report)
static uint8_t proc_get_locality(orte_process_name_t *proc)
{
orte_node_t **nodes;
orte_proc_t **procs;
orte_vpid_t i;
orte_node_t *node;
orte_proc_t *myproc;
int i;
/* the HNP is always on node=0 of the node array */
nodes = (orte_node_t**)orte_node_pool->addr;
procs = (orte_proc_t**)nodes[0]->procs->addr;
node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i);
/* cycle through the array of local procs */
for (i=0; i < nodes[0]->num_procs; i++) {
if (procs[i]->name.jobid == proc->jobid &&
procs[i]->name.vpid == proc->vpid) {
for (i=0; i < node->procs->size; i++) {
if (NULL == (myproc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
continue;
}
if (myproc->name.jobid == proc->jobid &&
myproc->name.vpid == proc->vpid) {
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:hnp: proc %s is LOCAL",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -668,18 +670,12 @@ static uint8_t proc_get_locality(orte_process_name_t *proc)
static orte_proc_t* find_proc(orte_process_name_t *proc)
{
orte_job_t *jdata;
orte_proc_t **procs;
if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
return NULL;
}
procs = (orte_proc_t**)jdata->procs->addr;
if (jdata->num_procs < proc->vpid) {
return NULL;
}
return procs[proc->vpid];
return (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid);
}

Просмотреть файл

@ -302,6 +302,12 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data,
return rc;
}
/* pack the job state so it can be extracted later */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->state, 1, ORTE_JOB_STATE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the number of nodes involved in this job */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &map->num_nodes, 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
@ -551,6 +557,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
int8_t *app_idx=NULL;
char **slot_str=NULL;
orte_jobid_t debugger;
bool add_child;
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:constructing child list",
@ -712,6 +719,14 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
/* UNPACK JOB-SPECIFIC DATA */
/* unpack the job state so we can know if this is a restart vs initial launch */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->state, &cnt, ORTE_JOB_STATE))) {
*job = ORTE_JOBID_INVALID;
ORTE_ERROR_LOG(rc);
goto REPORT_ERROR;
}
/* unpack the number of nodes involved in this job */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->num_nodes, &cnt, ORTE_STD_CNTR))) {
@ -755,6 +770,9 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
/* allocate space and unpack the app_contexts for this job - the HNP checked
* that there must be at least one, so don't bother checking here again
*/
if (NULL != jobdat->apps) {
free(jobdat->apps);
}
jobdat->apps = (orte_app_context_t**)malloc(jobdat->num_apps * sizeof(orte_app_context_t*));
if (NULL == jobdat->apps) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
@ -772,6 +790,10 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
goto REPORT_ERROR;
}
/* retain a copy for downloading to child processes */
if (NULL != jobdat->pmap && NULL != jobdat->pmap->bytes) {
free(jobdat->pmap->bytes);
free(jobdat->pmap);
}
opal_dss.copy((void**)&jobdat->pmap, bo, OPAL_BYTE_OBJECT);
/* decode the pidmap - this will also free the bytes in bo */
if (ORTE_SUCCESS != (rc = orte_ess.update_pidmap(bo))) {
@ -831,26 +853,56 @@ find_my_procs:
"%s odls:constructing child list - found proc %s for me!",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_VPID_PRINT(j)));
/* keep tabs of the number of local procs */
jobdat->num_local_procs++;
/* add this proc to our child list */
child = OBJ_NEW(orte_odls_child_t);
/* copy the name to preserve it */
if (ORTE_SUCCESS != (rc = opal_dss.copy((void**)&child->name, &proc, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto REPORT_ERROR;
add_child = true;
/* if this job is restarting procs, then we need to treat things
* a little differently. We may be adding a proc to our local
* children (if the proc moved here from somewhere else), or we
* may simply be restarting someone already here.
*/
if (ORTE_JOB_STATE_RESTART == jobdat->state) {
/* look for this job on our current list of children */
for (item = opal_list_get_first(&orte_local_children);
item != opal_list_get_end(&orte_local_children);
item = opal_list_get_next(item)) {
child = (orte_odls_child_t*)item;
if (child->name->jobid == proc.jobid &&
child->name->vpid == proc.vpid) {
/* do not duplicate this child on the list! */
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"proc %s is on list and is %s",
ORTE_NAME_PRINT(&proc),
(child->alive) ? "ALIVE" : "DEAD"));
add_child = false;
/* mark that this app_context is being used on this node */
jobdat->apps[app_idx[j]]->used_on_node = true;
break;
}
}
}
child->app_idx = app_idx[j]; /* save the index into the app_context objects */
if (NULL != slot_str && NULL != slot_str[j]) {
child->slot_list = strdup(slot_str[j]);
/* if we need to add the child, do so */
if (add_child) {
/* keep tabs of the number of local procs */
jobdat->num_local_procs++;
/* add this proc to our child list */
child = OBJ_NEW(orte_odls_child_t);
/* copy the name to preserve it */
if (ORTE_SUCCESS != (rc = opal_dss.copy((void**)&child->name, &proc, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto REPORT_ERROR;
}
child->app_idx = app_idx[j]; /* save the index into the app_context objects */
if (NULL != slot_str && NULL != slot_str[j]) {
child->slot_list = strdup(slot_str[j]);
}
/* mark that this app_context is being used on this node */
jobdat->apps[app_idx[j]]->used_on_node = true;
/* protect operation on the global list of children */
OPAL_THREAD_LOCK(&orte_odls_globals.mutex);
opal_list_append(&orte_local_children, &child->super);
opal_condition_signal(&orte_odls_globals.cond);
OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex);
}
/* mark that this app_context is being used on this node */
jobdat->apps[app_idx[j]]->used_on_node = true;
/* protect operation on the global list of children */
OPAL_THREAD_LOCK(&orte_odls_globals.mutex);
opal_list_append(&orte_local_children, &child->super);
opal_condition_signal(&orte_odls_globals.cond);
OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex);
}
}
@ -1217,7 +1269,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
}
}
if (ORTE_SUCCESS != opal_paffinity_base_get_processor_info(&num_processors)) {
if (ORTE_SUCCESS != (rc = opal_paffinity_base_get_processor_info(&num_processors))) {
/* if we cannot find the number of local processors, we have no choice
* but to default to conservative settings
*/
@ -1238,7 +1290,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:launch found %d processors for %d children and set oversubscribed to %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
num_processors, (int)opal_list_get_size(&orte_local_children),
(ORTE_SUCCESS == rc) ? num_processors: -1, (int)opal_list_get_size(&orte_local_children),
oversubscribed ? "true" : "false"));
/* setup to report the proc state to the HNP */
@ -1457,6 +1509,20 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
continue;
}
/* ensure we clear any prior info regarding state or exit status in
* case this is a restart
*/
child->state = ORTE_PROC_STATE_FAILED_TO_START;
child->exit_code = 0;
child->waitpid_recvd = false;
child->iof_complete = false;
child->coll_recvd = false;
child->pid = 0;
if (NULL != child->rml_uri) {
free(child->rml_uri);
child->rml_uri = NULL;
}
/* check to see if we have enough available file descriptors
* to launch another child - if not, then let's wait a little
* while to see if some come free. This can happen if we are
@ -1628,6 +1694,13 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
free(param);
free(value);
/* if the proc isn't going to forward IO, then we need to flag that
* it has "completed" iof termination as otherwise it will never fire
*/
if (!(ORTE_JOB_CONTROL_FORWARD_OUTPUT & jobdat->controls)) {
child->iof_complete = true;
}
/* if we are timing things, record when we are going to launch this proc */
if (orte_timing) {
gettimeofday(&child->starttime, NULL);

Просмотреть файл

@ -103,6 +103,7 @@ OBJ_CLASS_INSTANCE(orte_odls_child_t,
static void orte_odls_job_constructor(orte_odls_job_t *ptr)
{
ptr->jobid = ORTE_JOBID_INVALID;
ptr->state = ORTE_JOB_STATE_UNDEF;
ptr->launch_msg_processed = false;
ptr->apps = NULL;
ptr->num_apps = 0;

Просмотреть файл

@ -29,6 +29,8 @@
#include "opal/class/opal_list.h"
#include "opal/dss/dss_types.h"
#include "orte/mca/plm/plm_types.h"
#include "orte/mca/grpcomm/grpcomm_types.h"
#include "orte/runtime/orte_globals.h"
@ -100,6 +102,7 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_odls_child_t);
*/
typedef struct orte_odls_job_t {
opal_list_item_t super; /* required to place this on a list */
orte_job_state_t state; /* state of the job */
orte_jobid_t jobid; /* jobid for this data */
bool launch_msg_processed; /* launch msg has been fully processed */
orte_app_context_t **apps; /* app_contexts for this job */

Просмотреть файл

@ -1411,6 +1411,13 @@ void orte_plm_base_check_job_completed(orte_job_t *jdata)
ORTE_JOBID_PRINT(jdata->jobid)));
CHECK_ALL_JOBS:
/* if this job is a continuously operating one, then don't do
* anything further - just return here
*/
if (NULL != jdata && ORTE_JOB_CONTROL_CONTINUOUS_OP & jdata->controls) {
return;
}
/* if the job that is being checked is the HNP, then we are
* trying to terminate the orteds. In that situation, we
* do -not- check all jobs - we simply notify the HNP
@ -1445,6 +1452,10 @@ CHECK_ALL_JOBS:
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, index))) {
continue;
}
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s releasing procs from node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
node->name));
for( i = 0; i < node->procs->size; i++ ) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
continue;
@ -1456,9 +1467,9 @@ CHECK_ALL_JOBS:
node->slots_inuse--;
node->num_procs--;
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s releasing proc %s",
"%s releasing proc %s from node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&proc->name)));
ORTE_NAME_PRINT(&proc->name), node->name));
/* set the entry in the node array to NULL */
opal_pointer_array_set_item(node->procs, i, NULL);
/* release the proc once for the map entry */

Просмотреть файл

@ -106,12 +106,12 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data)
orte_job_t *jdata, *parent;
opal_buffer_t answer;
orte_vpid_t vpid;
orte_proc_t **procs;
orte_proc_t *proc;
orte_proc_state_t state;
orte_exit_code_t exit_code;
int rc, ret;
struct timeval beat;
orte_app_context_t **apps, **child_apps;
orte_app_context_t *app, *child_app;
/* setup a default response */
OBJ_CONSTRUCT(&answer, opal_buffer_t);
@ -166,21 +166,22 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data)
* need to check that here. However, be sure not to overwrite
* the prefix if the user already provide it!
*/
apps = (orte_app_context_t**)parent->apps->addr;
child_apps = (orte_app_context_t**)jdata->apps->addr;
if (NULL != apps[0]->prefix_dir &&
NULL == child_apps[0]->prefix_dir) {
child_apps[0]->prefix_dir = strdup(apps[0]->prefix_dir);
app = (orte_app_context_t*)opal_pointer_array_get_item(parent->apps, 0);
child_app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0);
if (NULL != app->prefix_dir &&
NULL == child_app->prefix_dir) {
child_app->prefix_dir = strdup(app->prefix_dir);
}
/* find the sender's node in the job map */
procs = (orte_proc_t**)parent->procs->addr;
/* set the bookmark so the child starts from that place - this means
* that the first child process could be co-located with the proc
* that called comm_spawn, assuming slots remain on that node. Otherwise,
* the procs will start on the next available node
*/
jdata->bookmark = procs[mev->sender.vpid]->node;
if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(parent->procs, mev->sender.vpid))) {
/* set the bookmark so the child starts from that place - this means
* that the first child process could be co-located with the proc
* that called comm_spawn, assuming slots remain on that node. Otherwise,
* the procs will start on the next available node
*/
jdata->bookmark = proc->node;
}
/* launch it */
if (ORTE_SUCCESS != (rc = orte_plm.spawn(jdata))) {
@ -232,7 +233,6 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data)
*/
goto CLEANUP;
}
procs = (orte_proc_t**)jdata->procs->addr;
count = 1;
while (ORTE_SUCCESS == (rc = opal_dss.unpack(mev->buffer, &vpid, &count, ORTE_VPID))) {
if (ORTE_VPID_INVALID == vpid) {
@ -257,16 +257,25 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(unsigned long)vpid, (unsigned int)state, (int)exit_code));
/* retrieve the proc object */
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, vpid))) {
/* this proc is no longer in table - skip it */
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:receive proc %s is not in proc table",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_VPID_PRINT(vpid)));
continue;
}
/* update the termination counter IFF the state is changing to something
* indicating terminated
*/
if (ORTE_PROC_STATE_UNTERMINATED < state &&
ORTE_PROC_STATE_UNTERMINATED > procs[vpid]->state) {
ORTE_PROC_STATE_UNTERMINATED > proc->state) {
++jdata->num_terminated;
}
/* update the data */
procs[vpid]->state = state;
procs[vpid]->exit_code = exit_code;
proc->state = state;
proc->exit_code = exit_code;
/* update orte's exit status if it is non-zero */
ORTE_UPDATE_EXIT_STATUS(exit_code);
@ -300,9 +309,16 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data)
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
goto CLEANUP;
}
procs = (orte_proc_t**)jdata->procs->addr;
gettimeofday(&beat, NULL);
procs[mev->sender.vpid]->beat = beat.tv_sec;
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, mev->sender.vpid))) {
/* this proc is no longer in table - skip it */
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:receive daemon %s is not in proc table",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_VPID_PRINT(mev->sender.vpid)));
break;
}
proc->beat = beat.tv_sec;
break;
default:

Просмотреть файл

@ -1339,8 +1339,9 @@ int orte_plm_base_append_bootproxy_args(orte_app_context_t *app, char ***argv,
void orte_plm_base_reset_job(orte_job_t *jdata)
{
int n;
orte_proc_t *proc;
int n, i, j;
orte_proc_t *proc, *proc_from_node;
orte_node_t *node_from_map, *node;
/* set the state to restart */
jdata->state = ORTE_JOB_STATE_RESTART;
@ -1353,6 +1354,39 @@ void orte_plm_base_reset_job(orte_job_t *jdata)
/* this proc abnormally terminated */
proc->state = ORTE_PROC_STATE_RESTART;
proc->pid = 0;
/* remove the proc from the node upon which it was mapped */
node = proc->node;
for (i=0; i < node->procs->size; i++) {
if (NULL == (proc_from_node = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
continue;
}
if (proc_from_node->name.jobid == proc->name.jobid &&
proc_from_node->name.vpid == proc->name.vpid) {
/* got it! */
OBJ_RELEASE(proc); /* keep accounting straight */
opal_pointer_array_set_item(node->procs, i, NULL);
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"removing proc %s from node %s at index %d",
ORTE_NAME_PRINT(&proc->name), node->name, i));
node->num_procs--;
node->slots_inuse--;
if (0 == node->num_procs) {
/* this node has been emptied - remove it from map */
for (j=0; j < jdata->map->nodes->size; j++) {
if (NULL == (node_from_map = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
continue;
}
if (node_from_map->index == node->index) {
/* got it! */
OBJ_RELEASE(node); /* keep accounting straight*/
opal_pointer_array_set_item(jdata->map->nodes, i, NULL);
break;
}
}
}
break;
}
}
/* adjust job accounting */
jdata->num_terminated--;
jdata->num_launched--;

Просмотреть файл

@ -44,7 +44,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
orte_app_context_t *app, uint8_t policy)
{
opal_list_item_t *item, *next;
orte_node_t *node, **nodes;
orte_node_t *node;
orte_std_cntr_t num_slots;
orte_std_cntr_t i;
int rc;
@ -52,24 +52,22 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
/** set default answer */
*total_num_slots = 0;
/* create a working list of nodes */
nodes = (orte_node_t**)orte_node_pool->addr;
/* if the hnp was allocated, include it */
if (orte_hnp_is_allocated) {
OBJ_RETAIN(nodes[0]);
opal_list_append(allocated_nodes, &nodes[0]->super);
node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
OBJ_RETAIN(node);
opal_list_append(allocated_nodes, &node->super);
}
/* add everything in the node pool */
for (i=1; i < orte_node_pool->size; i++) {
if (NULL == nodes[i]) {
break; /* nodes are left aligned, so stop when we hit a null */
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
/* retain a copy for our use in case the item gets
* destructed along the way
*/
OBJ_RETAIN(node);
opal_list_append(allocated_nodes, &node->super);
}
/* retain a copy for our use in case the item gets
* destructed along the way
*/
OBJ_RETAIN(nodes[i]);
opal_list_append(allocated_nodes, &nodes[i]->super);
}
/** check that anything is here */
@ -201,15 +199,17 @@ int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_node_t *node,
bool oversubscribed, orte_proc_t *proc)
{
orte_std_cntr_t i;
orte_node_t **nodes;
orte_node_t *node_from_map;
int rc;
/* see if this node has already been assigned to the map - if
* not, then add the pointer to the pointer array
*/
nodes = (orte_node_t**)map->nodes->addr;
for (i=0; i < map->num_nodes; i++) {
if (nodes[i]->index == node->index) {
for (i=0; i < map->nodes->size; i++) {
if (NULL == (node_from_map = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
continue;
}
if (node_from_map->index == node->index) {
/* we have this node in the array */
goto PROCESS;
}
@ -262,21 +262,51 @@ int orte_rmaps_base_claim_slot(orte_job_t *jdata,
bool oversubscribe,
bool remove_from_list)
{
orte_proc_t *proc;
orte_proc_t *proc, *proc_from_job;
bool oversub;
int rc;
int n;
/* create mapped_proc object */
proc = OBJ_NEW(orte_proc_t);
/* does this proc already exist within the job? */
proc = NULL;
for (n=0; n < jdata->procs->size; n++) {
if (NULL == (proc_from_job = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, n))) {
continue;
}
if (proc_from_job->name.vpid == vpid) {
/* already have it! */
proc = proc_from_job;
if (NULL != proc->slot_list) {
/* cleanout stale info */
free(proc->slot_list);
}
break;
}
}
if (NULL == proc) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
/* need to create mapped_proc object */
proc = OBJ_NEW(orte_proc_t);
if (NULL == proc) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* create the process name */
proc->name.jobid = jdata->jobid;
proc->name.vpid = vpid;
proc->app_idx = app_idx;
/* add this proc to the job's data - we don't have to worry here
* about keeping the array left-justified as all vpids
* from 0 to num_procs will be filled
*/
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs,
(int)vpid,
(void*)proc))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(proc);
return rc;
}
}
/* create the process name */
proc->name.jobid = jdata->jobid;
proc->name.vpid = vpid;
proc->app_idx = app_idx;
OBJ_RETAIN(current_node); /* maintain accounting on object */
if ( NULL != slot_list) {
@ -285,21 +315,10 @@ int orte_rmaps_base_claim_slot(orte_job_t *jdata,
proc->node = current_node;
proc->nodename = current_node->name;
/* add this proc to the job's data - we don't have to worry here
* about keeping the array left-justified as all vpids
* from 0 to num_procs will be filled
*/
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
"%s rmaps:base:claim_slot mapping rank %d to job %s",
"%s rmaps:base:claim_slot mapping rank %d in job %s to node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
vpid, ORTE_JOBID_PRINT(jdata->jobid)));
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs,
(int)vpid,
(void*)proc))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(proc);
return rc;
}
vpid, ORTE_JOBID_PRINT(jdata->jobid), current_node->name));
/* Be sure to demarcate this slot as claimed for the node */
current_node->slots_inuse++;

Просмотреть файл

@ -185,7 +185,7 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
if (proc->state != ORTE_PROC_STATE_RESTART) {
continue;
}
opal_output(0, "proc %s is to be restarted", ORTE_NAME_PRINT(&proc->name));
/* it is to be restarted - remove the proc from its current node */
oldnode = proc->node;
oldnode->num_procs--;
@ -261,9 +261,10 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
}
}
/* put proc on the found node */
OBJ_RETAIN(nd); /* required to maintain bookeeping */
OBJ_RETAIN(nd); /* required to maintain bookkeeping */
proc->node = nd;
opal_pointer_array_add(nd->procs, (void*)proc);
OBJ_RETAIN(proc); /* required to maintain bookkeeping */
nd->num_procs++;
/* flag the proc state as non-launched so we'll know to launch it */
proc->state = ORTE_PROC_STATE_INIT;

Просмотреть файл

@ -258,6 +258,7 @@ typedef uint8_t orte_job_controls_t;
#define ORTE_JOB_CONTROL_FORWARD_OUTPUT 0x08
#define ORTE_JOB_CONTROL_DO_NOT_MONITOR 0x10
#define ORTE_JOB_CONTROL_FORWARD_COMM 0x20
#define ORTE_JOB_CONTROL_CONTINUOUS_OP 0x40
/* error manager callback function */
typedef void (*orte_err_cb_fn_t)(orte_jobid_t job, orte_job_state_t state, void *cbdata);

Просмотреть файл

@ -722,12 +722,11 @@ int orte_util_encode_pidmap(opal_byte_object_t *boptr)
{
int32_t *nodes;
orte_proc_t *proc;
orte_vpid_t i;
int i, j;
opal_buffer_t buf;
orte_local_rank_t *lrank;
orte_node_rank_t *nrank;
orte_job_t *jdata;
int j;
int rc;
/* setup the working buffer */
@ -756,12 +755,11 @@ int orte_util_encode_pidmap(opal_byte_object_t *boptr)
nodes = (int32_t*)malloc(jdata->num_procs * 4);
/* transfer and pack the node info in one pack */
for (i=0; i < jdata->num_procs; i++) {
for (i=0, j=0; i < jdata->procs->size; i++) {
if (NULL == (proc = (orte_proc_t *) opal_pointer_array_get_item(jdata->procs, i))) {
nodes[i] = ORTE_STD_CNTR_INVALID;
continue;
}
nodes[i] = proc->node->index;
nodes[j++] = proc->node->index;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, nodes, jdata->num_procs, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
@ -772,12 +770,11 @@ int orte_util_encode_pidmap(opal_byte_object_t *boptr)
/* transfer and pack the local_ranks in one pack */
lrank = (orte_local_rank_t*)malloc(jdata->num_procs*sizeof(orte_local_rank_t));
for (i=0; i < jdata->num_procs; i++) {
for (i=0, j=0; i < jdata->procs->size; i++) {
if (NULL == (proc = (orte_proc_t *) opal_pointer_array_get_item(jdata->procs, i))) {
lrank[i] = ORTE_LOCAL_RANK_INVALID;
continue;
}
lrank[i] = proc->local_rank;
lrank[j++] = proc->local_rank;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, lrank, jdata->num_procs, ORTE_LOCAL_RANK))) {
ORTE_ERROR_LOG(rc);
@ -787,12 +784,11 @@ int orte_util_encode_pidmap(opal_byte_object_t *boptr)
/* transfer and pack the node ranks in one pack */
nrank = (orte_node_rank_t*)malloc(jdata->num_procs*sizeof(orte_node_rank_t));
for (i=0; i < jdata->num_procs; i++) {
for (i=0, j=0; i < jdata->procs->size; i++) {
if (NULL == (proc = (orte_proc_t *) opal_pointer_array_get_item(jdata->procs, i))) {
nrank[i] = ORTE_NODE_RANK_INVALID;
continue;
}
nrank[i] = proc->node_rank;
nrank[j++] = proc->node_rank;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, nrank, jdata->num_procs, ORTE_NODE_RANK))) {
ORTE_ERROR_LOG(rc);
@ -891,20 +887,24 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
* other than where it previously was
*/
if (already_present) {
/* we already have the jmap object, so let's cycle through
* its pidmap and see if anything is different
/* we already have the jmap object, so let's refresh its pidmap
* using the new data - start by cleaning out the old array
*/
for (j=0; j < jmap->pmap.size; j++) {
if (NULL == (pmap = (orte_pmap_t*)opal_pointer_array_get_item(&jmap->pmap, j))) {
continue;
}
OBJ_RELEASE(pmap);
}
/* add in the updated array */
for (i=0; i < num_procs; i++) {
if (NULL == (pmap = (orte_pmap_t*)opal_pointer_array_get_item(&jmap->pmap, i))) {
/* this proc is new! better add it */
pmap = OBJ_NEW(orte_pmap_t);
/* add the pidmap entry at the specific site corresponding
* to the proc's vpid
*/
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(&jmap->pmap, i, pmap))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
pmap = OBJ_NEW(orte_pmap_t);
/* add the pidmap entry at the specific site corresponding
* to the proc's vpid
*/
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(&jmap->pmap, i, pmap))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* add/update the data */
pmap->node = nodes[i];