Merge pull request #2600 from rhc54/topic/dbg
Transfer debugger support changes
Этот коммит содержится в:
Коммит
c1b8538216
@ -455,8 +455,8 @@ static void _notify_client_event(int sd, short args, void *cbdata)
|
|||||||
bool matched;
|
bool matched;
|
||||||
|
|
||||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||||
"pmix_server: _notify_error notifying clients of error %d",
|
"pmix_server: _notify_error notifying clients of error %s",
|
||||||
cd->status);
|
PMIx_Error_string(cd->status));
|
||||||
|
|
||||||
/* we cannot know if everyone who wants this notice has had a chance
|
/* we cannot know if everyone who wants this notice has had a chance
|
||||||
* to register for it - the notice may be coming too early. So cache
|
* to register for it - the notice may be coming too early. So cache
|
||||||
|
@ -32,7 +32,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <pmix_common.h>
|
#include <pmix_common.h>
|
||||||
|
#include "src/include/pmix_globals.h"
|
||||||
#include "src/util/error.h"
|
#include "src/util/error.h"
|
||||||
|
|
||||||
const char* PMIx_Error_string(pmix_status_t errnum)
|
const char* PMIx_Error_string(pmix_status_t errnum)
|
||||||
@ -151,8 +151,12 @@ const char* PMIx_Error_string(pmix_status_t errnum)
|
|||||||
return "PMIX_ERR_FILE_READ_FAILURE";
|
return "PMIX_ERR_FILE_READ_FAILURE";
|
||||||
case PMIX_ERR_PERM:
|
case PMIX_ERR_PERM:
|
||||||
return "PMIX_ERR_PERM";
|
return "PMIX_ERR_PERM";
|
||||||
|
case PMIX_ERR_JOB_TERMINATED:
|
||||||
|
return "PMIX_ERR_JOB_TERMINATED";
|
||||||
case PMIX_SUCCESS:
|
case PMIX_SUCCESS:
|
||||||
return "SUCCESS";
|
return "SUCCESS";
|
||||||
|
case PMIX_MAX_ERR_CONSTANT:
|
||||||
|
return "PMIX_ERR_WILDCARD";
|
||||||
default:
|
default:
|
||||||
return "ERROR STRING NOT FOUND";
|
return "ERROR STRING NOT FOUND";
|
||||||
}
|
}
|
||||||
|
@ -518,16 +518,74 @@ static void _send_notification(int status, orte_process_name_t *proc)
|
|||||||
OBJ_DESTRUCT(&buf);
|
OBJ_DESTRUCT(&buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void _send_direct_notify(int status, orte_process_name_t *proc)
|
||||||
|
{
|
||||||
|
opal_buffer_t *buf;
|
||||||
|
int rc;
|
||||||
|
opal_value_t kv, *kvptr;
|
||||||
|
orte_process_name_t daemon;
|
||||||
|
|
||||||
|
buf = OBJ_NEW(opal_buffer_t);
|
||||||
|
|
||||||
|
/* pack the status */
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &status, 1, OPAL_INT))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
OBJ_RELEASE(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* the source is me */
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
OBJ_RELEASE(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* pass along the proc to be notified (one opal_value_t) */
|
||||||
|
rc = 1;
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rc, 1, OPAL_INT))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
OBJ_RELEASE(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
||||||
|
kv.key = strdup(OPAL_PMIX_EVENT_CUSTOM_RANGE);
|
||||||
|
kv.type = OPAL_NAME;
|
||||||
|
kv.data.name.jobid = proc->jobid;
|
||||||
|
kv.data.name.vpid = proc->vpid;
|
||||||
|
kvptr = &kv;
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &kvptr, 1, OPAL_VALUE))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
OBJ_DESTRUCT(&kv);
|
||||||
|
OBJ_RELEASE(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
OBJ_DESTRUCT(&kv);
|
||||||
|
|
||||||
|
|
||||||
|
/* get the daemon hosting the proc to be notified */
|
||||||
|
daemon.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||||
|
daemon.vpid = orte_get_proc_daemon_vpid(proc);
|
||||||
|
/* send the notification to that daemon */
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||||
|
&daemon, buf,
|
||||||
|
ORTE_RML_TAG_NOTIFICATION,
|
||||||
|
orte_rml_send_callback, NULL))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
OBJ_RELEASE(buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void orte_state_base_track_procs(int fd, short argc, void *cbdata)
|
void orte_state_base_track_procs(int fd, short argc, void *cbdata)
|
||||||
{
|
{
|
||||||
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
|
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
|
||||||
orte_process_name_t *proc = &caddy->name;
|
orte_process_name_t *proc = &caddy->name;
|
||||||
orte_process_name_t wildcard_rank;
|
|
||||||
orte_proc_state_t state = caddy->proc_state;
|
orte_proc_state_t state = caddy->proc_state;
|
||||||
orte_job_t *jdata;
|
orte_job_t *jdata;
|
||||||
orte_proc_t *pdata;
|
orte_proc_t *pdata;
|
||||||
int i;
|
int i;
|
||||||
char *rtmod;
|
char *rtmod;
|
||||||
|
orte_process_name_t parent, *npptr;
|
||||||
|
|
||||||
opal_output_verbose(5, orte_state_base_framework.framework_output,
|
opal_output_verbose(5, orte_state_base_framework.framework_output,
|
||||||
"%s state:base:track_procs called for proc %s state %s",
|
"%s state:base:track_procs called for proc %s state %s",
|
||||||
@ -636,9 +694,15 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata)
|
|||||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED);
|
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED);
|
||||||
/* if they requested notification upon completion, provide it */
|
/* if they requested notification upon completion, provide it */
|
||||||
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NOTIFY_COMPLETION, NULL, OPAL_BOOL)) {
|
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NOTIFY_COMPLETION, NULL, OPAL_BOOL)) {
|
||||||
wildcard_rank.jobid = jdata->jobid;
|
/* notify_completion => notify the parent of the termination
|
||||||
wildcard_rank.vpid = ORTE_VPID_WILDCARD;
|
* of this child job. So get the parent jobid info */
|
||||||
_send_notification(OPAL_ERR_JOB_TERMINATED, &wildcard_rank);
|
npptr = &parent;
|
||||||
|
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCH_PROXY, (void**)&npptr, OPAL_NAME)) {
|
||||||
|
/* notify everyone who asked for it */
|
||||||
|
_send_direct_notify(OPAL_ERR_JOB_TERMINATED, ORTE_NAME_WILDCARD);
|
||||||
|
} else {
|
||||||
|
_send_direct_notify(OPAL_ERR_JOB_TERMINATED, &parent);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if (ORTE_PROC_STATE_TERMINATED < pdata->state &&
|
} else if (ORTE_PROC_STATE_TERMINATED < pdata->state &&
|
||||||
!orte_job_term_ordered) {
|
!orte_job_term_ordered) {
|
||||||
|
@ -245,7 +245,7 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor,
|
|||||||
} else if (0 == strcmp(info->key, OPAL_PMIX_NOTIFY_COMPLETION)) {
|
} else if (0 == strcmp(info->key, OPAL_PMIX_NOTIFY_COMPLETION)) {
|
||||||
if (OPAL_UNDEF == info->type || info->data.flag) {
|
if (OPAL_UNDEF == info->type || info->data.flag) {
|
||||||
orte_set_attribute(&jdata->attributes, ORTE_JOB_NOTIFY_COMPLETION,
|
orte_set_attribute(&jdata->attributes, ORTE_JOB_NOTIFY_COMPLETION,
|
||||||
ORTE_ATTR_LOCAL, NULL, OPAL_BOOL);
|
ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
|
||||||
}
|
}
|
||||||
} else if (0 == strcmp(info->key, OPAL_PMIX_DEBUG_STOP_ON_EXEC)) {
|
} else if (0 == strcmp(info->key, OPAL_PMIX_DEBUG_STOP_ON_EXEC)) {
|
||||||
/* we don't know how to do this */
|
/* we don't know how to do this */
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user