1
1
openmpi/orte/util/attr.c

562 строки
16 KiB
C
Исходник Обычный вид История

/*
Refactor the ORTE DVM code so that external codes can submit multiple jobs using only a single connection to the HNP. * Clean up the DVM so it continues to run even when applications error out and we would ordinarily abort the daemons. * Create a new errmgr component for the DVM to handle the differences. * Cleanup the DVM state component. * Add ORTE bindings directory and brief README * Pass a local tool index around to match jobs. * Pass the jobid on job completion. * Fix initialization logic. * Add framework for python wrapper. * Fix terminate-with-non-zero-exit behavior so it properly terminates only the indicated procs, notifies orte-submit, and orte-dvm continues executing. * Add some missing options to orte-dvm * Fix a bug in -host processing that caused us to ignore the #slots designator. Add a new attribute to indicate "do not expand the DVM" when submitting job spawn requests. * It actually makes no sense that we treat the termination of all children differently than terminating the children of a specific job - it only creates confusion over the difference in behavior. So terminate children the same way regardless. Extend the cmd_line utility to easily allow layering of command line definitions Catch up with ORTE interface change and make build more generic. Disable "fixed dvm" logic for now. Add another cmd_line function to merge a table of cmd line options with another one, reporting as errors any duplicate entries. Use this to allow orterun to reuse the orted_submit code Fix the "fixed_dvm" logic by ensuring we reset num_new_daemons to zero. Also ensure that the nidmap is sent with the first job so the downstream daemons get the node info. Remove a duplicate cmd line entry in orterun. Revise the DVM startup procedure to pass the nidmap only once, at the startup of the DVM. This reduces the overhead on each job launch and ensures that the nidmap doesn't get overwritten. Add new commands to get_orted_comm_cmd_str(). Move ORTE command line options to orte_globals.[ch]. Catch up with extra orte_submit_init parameter. Add example code. Add documentation. Bump version. The nidmap and routing data must be updated prior to propagating the xcast or else the xcast will fail. Fix the return code so it is something more expected when an error occurs. Ensure we get an error returned to us when we fail to launch for some reason. In this case, we will always get a launch_cb as we did indeed attempt to spawn it. The error code will be returned in the complete_cb. Fix the return code from orte_submit_job - it was returning the tracker index instead of "success". Take advantage of ORTE's pretty-print capabilities to provide a nice error output explaining why we failed to launch. Ensure we always get a launch_cb when we fail to launch, but no complete_cb as the job never launched. Extend the error reporting capability to job completion as well. Add index parameter to orte_submit_job(). Add orte_job_cancel and implement ORTE_DAEMON_TERMINATE_JOB_CMD. Factor out dvm termination. Parse the terminate option at tool level. Add error string for ORTE_ERR_JOB_CANCELLED. Add some safeguards. Cleanup and/of comments. Enable the return. Properly ORTE_DECLSPEC orte_submit_halt. Add orte_submit_halt and orte_submit_cancel to interface. Use the plm interface to terminate the job
2015-12-17 02:30:40 +03:00
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
2015-06-24 06:59:57 +03:00
*
* Additional copyrights may follow
2015-06-24 06:59:57 +03:00
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/types.h"
#include "orte/constants.h"
#include "opal/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/attr.h"
#define MAX_CONVERTERS 5
#define MAX_CONVERTER_PROJECT_LEN 10
typedef struct {
int init;
char project[MAX_CONVERTER_PROJECT_LEN];
orte_attribute_key_t key_base;
orte_attribute_key_t key_max;
orte_attr2str_fn_t converter;
} orte_attr_converter_t;
/* all default to NULL */
static orte_attr_converter_t converters[MAX_CONVERTERS];
static int orte_attr_unload(orte_attribute_t *kv,
void **data, opal_data_type_t type);
static int orte_attr_load(orte_attribute_t *kv,
void *data, opal_data_type_t type);
bool orte_get_attribute(opal_list_t *attributes,
orte_attribute_key_t key,
void **data, opal_data_type_t type)
{
orte_attribute_t *kv;
int rc;
OPAL_LIST_FOREACH(kv, attributes, orte_attribute_t) {
if (key == kv->key) {
if (kv->type != type) {
ORTE_ERROR_LOG(ORTE_ERR_TYPE_MISMATCH);
return false;
}
if (NULL != data) {
if (ORTE_SUCCESS != (rc = orte_attr_unload(kv, data, type))) {
ORTE_ERROR_LOG(rc);
}
}
return true;
}
}
/* not found */
return false;
}
int orte_set_attribute(opal_list_t *attributes,
orte_attribute_key_t key, bool local,
void *data, opal_data_type_t type)
{
orte_attribute_t *kv;
int rc;
OPAL_LIST_FOREACH(kv, attributes, orte_attribute_t) {
if (key == kv->key) {
if (kv->type != type) {
return ORTE_ERR_TYPE_MISMATCH;
}
if (ORTE_SUCCESS != (rc = orte_attr_load(kv, data, type))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
}
/* not found - add it */
kv = OBJ_NEW(orte_attribute_t);
kv->key = key;
kv->local = local;
if (OPAL_SUCCESS != (rc = orte_attr_load(kv, data, type))) {
OBJ_RELEASE(kv);
return rc;
}
opal_list_append(attributes, &kv->super);
return ORTE_SUCCESS;
}
void orte_remove_attribute(opal_list_t *attributes, orte_attribute_key_t key)
{
orte_attribute_t *kv;
OPAL_LIST_FOREACH(kv, attributes, orte_attribute_t) {
if (key == kv->key) {
opal_list_remove_item(attributes, &kv->super);
OBJ_RELEASE(kv);
return;
}
}
}
int orte_attr_register(const char *project,
orte_attribute_key_t key_base,
orte_attribute_key_t key_max,
orte_attr2str_fn_t converter)
{
int i;
for (i = 0 ; i < MAX_CONVERTERS ; ++i) {
if (0 == converters[i].init) {
converters[i].init = 1;
strncpy(converters[i].project, project, MAX_CONVERTER_PROJECT_LEN);
converters[i].project[MAX_CONVERTER_PROJECT_LEN-1] = '\0';
converters[i].key_base = key_base;
converters[i].key_max = key_max;
converters[i].converter = converter;
return ORTE_SUCCESS;
}
}
return ORTE_ERR_OUT_OF_RESOURCE;
}
const char *orte_attr_key_to_str(orte_attribute_key_t key)
{
int i;
2015-06-24 06:59:57 +03:00
if (ORTE_ATTR_KEY_BASE < key &&
key < ORTE_ATTR_KEY_MAX) {
/* belongs to ORTE, so we handle it */
switch(key) {
case ORTE_APP_HOSTFILE:
return "APP-HOSTFILE";
case ORTE_APP_ADD_HOSTFILE:
return "APP-ADD-HOSTFILE";
case ORTE_APP_DASH_HOST:
return "APP-DASH-HOST";
case ORTE_APP_ADD_HOST:
return "APP-ADD-HOST";
case ORTE_APP_USER_CWD:
return "APP-USER-CWD";
case ORTE_APP_SSNDIR_CWD:
return "APP-USE-SESSION-DIR-AS-CWD";
case ORTE_APP_PRELOAD_BIN:
return "APP-PRELOAD-BIN";
case ORTE_APP_PRELOAD_FILES:
return "APP-PRELOAD-FILES";
case ORTE_APP_SSTORE_LOAD:
return "APP-SSTORE-LOAD";
case ORTE_APP_RECOV_DEF:
return "APP-RECOVERY-DEFINED";
case ORTE_APP_MAX_RESTARTS:
return "APP-MAX-RESTARTS";
case ORTE_APP_MIN_NODES:
return "APP-MIN-NODES";
case ORTE_APP_MANDATORY:
return "APP-NODES-MANDATORY";
case ORTE_APP_MAX_PPN:
return "APP-MAX-PPN";
case ORTE_APP_PREFIX_DIR:
return "APP-PREFIX-DIR";
case ORTE_NODE_USERNAME:
return "NODE-USERNAME";
case ORTE_NODE_LAUNCH_ID:
return "NODE-LAUNCHID";
case ORTE_NODE_HOSTID:
return "NODE-HOSTID";
case ORTE_NODE_ALIAS:
return "NODE-ALIAS";
case ORTE_NODE_SERIAL_NUMBER:
return "NODE-SERIAL-NUM";
case ORTE_JOB_LAUNCH_MSG_SENT:
return "JOB-LAUNCH-MSG-SENT";
case ORTE_JOB_LAUNCH_MSG_RECVD:
return "JOB-LAUNCH-MSG-RECVD";
case ORTE_JOB_MAX_LAUNCH_MSG_RECVD:
return "JOB-MAX-LAUNCH-MSG-RECVD";
case ORTE_JOB_FILE_MAPS:
return "JOB-FILE-MAPS";
case ORTE_JOB_CKPT_STATE:
return "JOB-CKPT-STATE";
case ORTE_JOB_SNAPSHOT_REF:
return "JOB-SNAPSHOT-REF";
case ORTE_JOB_SNAPSHOT_LOC:
return "JOB-SNAPSHOT-LOC";
case ORTE_JOB_SNAPC_INIT_BAR:
return "JOB-SNAPC-INIT-BARRIER-ID";
case ORTE_JOB_SNAPC_FINI_BAR:
return "JOB-SNAPC-FINI-BARRIER-ID";
case ORTE_JOB_NUM_NONZERO_EXIT:
return "JOB-NUM-NONZERO-EXIT";
case ORTE_JOB_FAILURE_TIMER_EVENT:
return "JOB-FAILURE-TIMER-EVENT";
case ORTE_JOB_ABORTED_PROC:
return "JOB-ABORTED-PROC";
case ORTE_JOB_MAPPER:
return "JOB-MAPPER";
case ORTE_JOB_REDUCER:
return "JOB-REDUCER";
case ORTE_JOB_COMBINER:
return "JOB-COMBINER";
case ORTE_JOB_INDEX_ARGV:
return "JOB-INDEX-ARGV";
case ORTE_JOB_NO_VM:
return "JOB-NO-VM";
case ORTE_JOB_SPIN_FOR_DEBUG:
return "JOB-SPIN-FOR-DEBUG";
case ORTE_JOB_CONTINUOUS_OP:
return "JOB-CONTINUOUS-OP";
case ORTE_JOB_RECOVER_DEFINED:
return "JOB-RECOVERY-DEFINED";
case ORTE_JOB_ENABLE_RECOVERY:
return "JOB-ENABLE-RECOVERY";
case ORTE_JOB_NON_ORTE_JOB:
return "JOB-NON-ORTE-JOB";
case ORTE_JOB_STDOUT_TARGET:
return "JOB-STDOUT-TARGET";
case ORTE_JOB_POWER:
return "JOB-POWER";
case ORTE_JOB_MAX_FREQ:
return "JOB-MAX_FREQ";
case ORTE_JOB_MIN_FREQ:
return "JOB-MIN_FREQ";
case ORTE_JOB_GOVERNOR:
return "JOB-FREQ-GOVERNOR";
case ORTE_JOB_FAIL_NOTIFIED:
return "JOB-FAIL-NOTIFIED";
case ORTE_JOB_TERM_NOTIFIED:
return "JOB-TERM-NOTIFIED";
case ORTE_JOB_PEER_MODX_ID:
return "JOB-PEER-MODX-ID";
case ORTE_JOB_INIT_BAR_ID:
return "JOB-INIT-BAR-ID";
case ORTE_JOB_FINI_BAR_ID:
return "JOB-FINI-BAR-ID";
case ORTE_JOB_FWDIO_TO_TOOL:
return "JOB-FWD-IO-TO-TOOL";
case ORTE_JOB_PHYSICAL_CPUIDS:
return "JOB-PHYSICAL-CPUIDS";
case ORTE_JOB_LAUNCHED_DAEMONS:
return "JOB-LAUNCHED-DAEMONS";
case ORTE_JOB_REPORT_BINDINGS:
return "JOB-REPORT-BINDINGS";
case ORTE_JOB_SLOT_LIST:
return "JOB-SLOT-LIST";
case ORTE_JOB_NOTIFICATIONS:
return "JOB-NOTIFICATIONS";
case ORTE_JOB_ROOM_NUM:
return "JOB-ROOM-NUM";
case ORTE_JOB_LAUNCH_PROXY:
return "JOB-LAUNCH-PROXY";
case ORTE_JOB_NSPACE_REGISTERED:
return "JOB-NSPACE-REGISTERED";
Refactor the ORTE DVM code so that external codes can submit multiple jobs using only a single connection to the HNP. * Clean up the DVM so it continues to run even when applications error out and we would ordinarily abort the daemons. * Create a new errmgr component for the DVM to handle the differences. * Cleanup the DVM state component. * Add ORTE bindings directory and brief README * Pass a local tool index around to match jobs. * Pass the jobid on job completion. * Fix initialization logic. * Add framework for python wrapper. * Fix terminate-with-non-zero-exit behavior so it properly terminates only the indicated procs, notifies orte-submit, and orte-dvm continues executing. * Add some missing options to orte-dvm * Fix a bug in -host processing that caused us to ignore the #slots designator. Add a new attribute to indicate "do not expand the DVM" when submitting job spawn requests. * It actually makes no sense that we treat the termination of all children differently than terminating the children of a specific job - it only creates confusion over the difference in behavior. So terminate children the same way regardless. Extend the cmd_line utility to easily allow layering of command line definitions Catch up with ORTE interface change and make build more generic. Disable "fixed dvm" logic for now. Add another cmd_line function to merge a table of cmd line options with another one, reporting as errors any duplicate entries. Use this to allow orterun to reuse the orted_submit code Fix the "fixed_dvm" logic by ensuring we reset num_new_daemons to zero. Also ensure that the nidmap is sent with the first job so the downstream daemons get the node info. Remove a duplicate cmd line entry in orterun. Revise the DVM startup procedure to pass the nidmap only once, at the startup of the DVM. This reduces the overhead on each job launch and ensures that the nidmap doesn't get overwritten. Add new commands to get_orted_comm_cmd_str(). Move ORTE command line options to orte_globals.[ch]. Catch up with extra orte_submit_init parameter. Add example code. Add documentation. Bump version. The nidmap and routing data must be updated prior to propagating the xcast or else the xcast will fail. Fix the return code so it is something more expected when an error occurs. Ensure we get an error returned to us when we fail to launch for some reason. In this case, we will always get a launch_cb as we did indeed attempt to spawn it. The error code will be returned in the complete_cb. Fix the return code from orte_submit_job - it was returning the tracker index instead of "success". Take advantage of ORTE's pretty-print capabilities to provide a nice error output explaining why we failed to launch. Ensure we always get a launch_cb when we fail to launch, but no complete_cb as the job never launched. Extend the error reporting capability to job completion as well. Add index parameter to orte_submit_job(). Add orte_job_cancel and implement ORTE_DAEMON_TERMINATE_JOB_CMD. Factor out dvm termination. Parse the terminate option at tool level. Add error string for ORTE_ERR_JOB_CANCELLED. Add some safeguards. Cleanup and/of comments. Enable the return. Properly ORTE_DECLSPEC orte_submit_halt. Add orte_submit_halt and orte_submit_cancel to interface. Use the plm interface to terminate the job
2015-12-17 02:30:40 +03:00
case ORTE_JOB_FIXED_DVM:
return "ORTE-JOB-FIXED-DVM";
case ORTE_JOB_DVM_JOB:
return "ORTE-JOB-DVM-JOB";
case ORTE_JOB_CANCELLED:
return "ORTE-JOB-CANCELLED";
case ORTE_JOB_OUTPUT_TO_FILE:
return "ORTE-JOB-OUTPUT-TO-FILE";
case ORTE_JOB_MERGE_STDERR_STDOUT:
return "ORTE-JOB-MERGE-STDERR-STDOUT";
case ORTE_JOB_TAG_OUTPUT:
return "ORTE-JOB-TAG-OUTPUT";
case ORTE_JOB_TIMESTAMP_OUTPUT:
return "ORTE-JOB-TIMESTAMP-OUTPUT";
2015-06-24 06:59:57 +03:00
case ORTE_PROC_NOBARRIER:
return "PROC-NOBARRIER";
case ORTE_PROC_CPU_BITMAP:
return "PROC-CPU-BITMAP";
case ORTE_PROC_HWLOC_LOCALE:
return "PROC-HWLOC-LOCALE";
case ORTE_PROC_HWLOC_BOUND:
return "PROC-HWLOC-BOUND";
case ORTE_PROC_PRIOR_NODE:
return "PROC-PRIOR-NODE";
case ORTE_PROC_NRESTARTS:
return "PROC-NUM-RESTARTS";
case ORTE_PROC_RESTART_TIME:
return "PROC-RESTART-TIME";
case ORTE_PROC_FAST_FAILS:
return "PROC-FAST-FAILS";
case ORTE_PROC_CKPT_STATE:
return "PROC-CKPT-STATE";
case ORTE_PROC_SNAPSHOT_REF:
return "PROC-SNAPHOT-REF";
case ORTE_PROC_SNAPSHOT_LOC:
return "PROC-SNAPSHOT-LOC";
case ORTE_PROC_NODENAME:
return "PROC-NODENAME";
case ORTE_PROC_CGROUP:
return "PROC-CGROUP";
case ORTE_PROC_NBEATS:
return "PROC-NBEATS";
default:
return "UNKNOWN-KEY";
}
}
/* see if one of the converters can handle it */
for (i = 0 ; i < MAX_CONVERTERS ; ++i) {
if (0 != converters[i].init) {
2015-06-24 06:59:57 +03:00
if (converters[i].key_base < key &&
key < converters[i].key_max) {
return converters[i].converter(key);
}
}
}
/* get here if nobody know what to do */
return "UNKNOWN-KEY";
}
static int orte_attr_load(orte_attribute_t *kv,
void *data, opal_data_type_t type)
{
opal_byte_object_t *boptr;
struct timeval *tv;
kv->type = type;
if (NULL == data && OPAL_STRING != type && OPAL_BYTE_OBJECT != type) {
/* just set the fields to zero */
memset(&kv->data, 0, sizeof(kv->data));
return OPAL_SUCCESS;
}
switch (type) {
case OPAL_BOOL:
kv->data.flag = *(bool*)(data);
break;
case OPAL_BYTE:
kv->data.byte = *(uint8_t*)(data);
break;
case OPAL_STRING:
if (NULL != kv->data.string) {
free(kv->data.string);
}
if (NULL != data) {
kv->data.string = strdup( (const char *) data);
} else {
kv->data.string = NULL;
}
break;
case OPAL_SIZE:
kv->data.size = *(size_t*)(data);
break;
case OPAL_PID:
kv->data.pid = *(pid_t*)(data);
break;
case OPAL_INT:
kv->data.integer = *(int*)(data);
break;
case OPAL_INT8:
kv->data.int8 = *(int8_t*)(data);
break;
case OPAL_INT16:
kv->data.int16 = *(int16_t*)(data);
break;
case OPAL_INT32:
kv->data.int32 = *(int32_t*)(data);
break;
case OPAL_INT64:
kv->data.int64 = *(int64_t*)(data);
break;
case OPAL_UINT:
kv->data.uint = *(unsigned int*)(data);
break;
case OPAL_UINT8:
kv->data.uint8 = *(uint8_t*)(data);
break;
case OPAL_UINT16:
kv->data.uint16 = *(uint16_t*)(data);
break;
case OPAL_UINT32:
kv->data.uint32 = *(uint32_t*)data;
break;
case OPAL_UINT64:
kv->data.uint64 = *(uint64_t*)(data);
break;
case OPAL_BYTE_OBJECT:
if (NULL != kv->data.bo.bytes) {
free(kv->data.bo.bytes);
}
boptr = (opal_byte_object_t*)data;
if (NULL != boptr && NULL != boptr->bytes && 0 < boptr->size) {
kv->data.bo.bytes = (uint8_t *) malloc(boptr->size);
memcpy(kv->data.bo.bytes, boptr->bytes, boptr->size);
kv->data.bo.size = boptr->size;
} else {
kv->data.bo.bytes = NULL;
kv->data.bo.size = 0;
}
break;
case OPAL_FLOAT:
kv->data.fval = *(float*)(data);
break;
case OPAL_TIMEVAL:
tv = (struct timeval*)data;
kv->data.tv.tv_sec = tv->tv_sec;
kv->data.tv.tv_usec = tv->tv_usec;
break;
case OPAL_PTR:
kv->data.ptr = data;
break;
case OPAL_VPID:
kv->data.vpid = *(orte_vpid_t *)data;
break;
case OPAL_JOBID:
kv->data.jobid = *(orte_jobid_t *)data;
break;
case OPAL_NAME:
kv->data.name = *(opal_process_name_t *)data;
break;
default:
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
return OPAL_ERR_NOT_SUPPORTED;
}
return OPAL_SUCCESS;
}
static int orte_attr_unload(orte_attribute_t *kv,
void **data, opal_data_type_t type)
{
opal_byte_object_t *boptr;
if (type != kv->type) {
return OPAL_ERR_TYPE_MISMATCH;
}
2015-02-18 01:21:23 +03:00
if (NULL == data ||
(NULL == *data && OPAL_STRING != type && OPAL_BYTE_OBJECT != type &&
OPAL_BUFFER != type && OPAL_PTR != type)) {
assert(0);
OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
return OPAL_ERR_BAD_PARAM;
}
switch (type) {
case OPAL_BOOL:
memcpy(*data, &kv->data.flag, sizeof(bool));
break;
case OPAL_BYTE:
memcpy(*data, &kv->data.byte, sizeof(uint8_t));
break;
case OPAL_STRING:
if (NULL != kv->data.string) {
*data = strdup(kv->data.string);
} else {
*data = NULL;
}
break;
case OPAL_SIZE:
memcpy(*data, &kv->data.size, sizeof(size_t));
break;
case OPAL_PID:
memcpy(*data, &kv->data.pid, sizeof(pid_t));
break;
case OPAL_INT:
memcpy(*data, &kv->data.integer, sizeof(int));
break;
case OPAL_INT8:
memcpy(*data, &kv->data.int8, sizeof(int8_t));
break;
case OPAL_INT16:
memcpy(*data, &kv->data.int16, sizeof(int16_t));
break;
case OPAL_INT32:
memcpy(*data, &kv->data.int32, sizeof(int32_t));
break;
case OPAL_INT64:
memcpy(*data, &kv->data.int64, sizeof(int64_t));
break;
case OPAL_UINT:
memcpy(*data, &kv->data.uint, sizeof(unsigned int));
break;
case OPAL_UINT8:
memcpy(*data, &kv->data.uint8, 1);
break;
case OPAL_UINT16:
memcpy(*data, &kv->data.uint16, 2);
break;
case OPAL_UINT32:
memcpy(*data, &kv->data.uint32, 4);
break;
case OPAL_UINT64:
memcpy(*data, &kv->data.uint64, 8);
break;
case OPAL_BYTE_OBJECT:
boptr = (opal_byte_object_t*)malloc(sizeof(opal_byte_object_t));
if (NULL != kv->data.bo.bytes && 0 < kv->data.bo.size) {
boptr->bytes = (uint8_t *) malloc(kv->data.bo.size);
memcpy(boptr->bytes, kv->data.bo.bytes, kv->data.bo.size);
boptr->size = kv->data.bo.size;
} else {
boptr->bytes = NULL;
boptr->size = 0;
}
*data = boptr;
break;
case OPAL_BUFFER:
*data = OBJ_NEW(opal_buffer_t);
opal_dss.copy_payload(*data, &kv->data.buf);
break;
case OPAL_FLOAT:
memcpy(*data, &kv->data.fval, sizeof(float));
break;
case OPAL_TIMEVAL:
memcpy(*data, &kv->data.tv, sizeof(struct timeval));
break;
case OPAL_PTR:
*data = kv->data.ptr;
break;
case OPAL_VPID:
memcpy(*data, &kv->data.vpid, sizeof(orte_vpid_t));
break;
case OPAL_JOBID:
memcpy(*data, &kv->data.jobid, sizeof(orte_jobid_t));
break;
case OPAL_NAME:
memcpy(*data, &kv->data.name, sizeof(orte_process_name_t));
break;
default:
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
return OPAL_ERR_NOT_SUPPORTED;
}
return OPAL_SUCCESS;
}