1
1

Well, it certainly helps triggers to fire if the respective responsible routines adjust the counters!

The INIT counter is supposed to be adjusted when the processes are mapped - this is now done correctly.

The LAUNCHED counter is supposed to be adjusted when the pls sets the process pid info into the registry and changes the state to LAUNCHED. This could probably be changed to have that function use the set_proc_soh API, but this fixes the problem for now.

Thanks to Brian for finding that the triggers were not being fired.

This commit was SVN r8948.
Этот коммит содержится в:
Ralph Castain 2006-02-09 15:39:06 +00:00
родитель 4767843235
Коммит 1abe8ef368
2 изменённых файлов: 51 добавлений и 7 удалений

Просмотреть файл

@ -58,33 +58,58 @@ int orte_pls_base_set_proc_pid(const orte_process_name_t *name, pid_t pid)
free(segment);
return ORTE_ERR_OUT_OF_RESOURCE;
}
free(segment);
if(ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(values[0]->tokens), &(values[0]->num_tokens), (orte_process_name_t*)name))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
free(segment);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[0]), ORTE_PROC_PID_KEY, ORTE_PID, &pid))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
free(segment);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[1]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &proc_state))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
free(segment);
return rc;
}
rc = orte_gpr.put(1, values);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
free(segment);
return rc;
}
OBJ_RELEASE(values[0]);
/** now increment the LAUNCHED counter so that the LAUNCHED trigger can fire! */
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&values[0],
ORTE_GPR_OVERWRITE, segment, 1, 1))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
free(segment);
return ORTE_ERR_OUT_OF_RESOURCE;
}
free(segment); /** done with this now */
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[0]), ORTE_PROC_NUM_LAUNCHED, ORTE_UNDEF, NULL))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
return rc;
}
values[0]->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* counter is in the job's globals container */
if (ORTE_SUCCESS != (rc = orte_gpr.increment_value(values[0]))) {
ORTE_ERROR_LOG(rc);
}
OBJ_RELEASE(values[0]);
return rc;
}
@ -255,7 +280,7 @@ int orte_pls_base_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid
asprintf(&key, "%s-%s", ORTE_PROC_PID_KEY, jobid_string);
free(jobid_string);
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[0]), key, ORTE_PID, &pid))) {
ORTE_ERROR_LOG(rc);
free(key);

Просмотреть файл

@ -757,8 +757,12 @@ int orte_rmaps_base_set_map(orte_jobid_t jobid, opal_list_t* mapping_list)
return ORTE_ERR_BAD_PARAM;
}
/* allocate value array */
values = (orte_gpr_value_t**)malloc(num_procs * sizeof(orte_gpr_value_t*));
/**
* allocate value array. We need to reserve one extra spot so we can set the counter
* for the process INIT state to indicate that all procs are at that state. This will
* allow the INIT trigger to fire.
*/
values = (orte_gpr_value_t**)malloc((1+num_procs) * sizeof(orte_gpr_value_t*));
if(NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
@ -769,6 +773,22 @@ int orte_rmaps_base_set_map(orte_jobid_t jobid, opal_list_t* mapping_list)
return rc;
}
/** setup the last value in the array to update the INIT counter */
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[num_procs]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 1, 1))) {
ORTE_ERROR_LOG(rc);
free(values);
free(segment);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[num_procs]->keyvals[0]), ORTE_PROC_NUM_AT_INIT, ORTE_SIZE, &num_procs))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
values[num_procs]->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* counter is in the job's globals container */
for(i=0; i<num_procs; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
@ -783,7 +803,6 @@ int orte_rmaps_base_set_map(orte_jobid_t jobid, opal_list_t* mapping_list)
}
}
/* iterate through all processes and initialize value array */
for(item = opal_list_get_first(mapping_list);
item != opal_list_get_end(mapping_list);
@ -840,7 +859,7 @@ int orte_rmaps_base_set_map(orte_jobid_t jobid, opal_list_t* mapping_list)
}
/* insert all values in one call */
if (ORTE_SUCCESS != (rc = orte_gpr.put(num_procs, values))) {
if (ORTE_SUCCESS != (rc = orte_gpr.put((1+num_procs), values))) {
ORTE_ERROR_LOG(rc);
}