1
1

Merge pull request #5287 from rhc54/topic/tools

Correct accounting for tools
Этот коммит содержится в:
Ralph Castain 2018-06-18 14:57:17 -07:00 коммит произвёл GitHub
родитель bb1522472f 081a0d98eb
Коммит d6a81ecf41
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 179 добавлений и 94 удалений

Просмотреть файл

@ -64,3 +64,4 @@ mca_base_component_show_load_errors = 1
orte_abort_timeout = 10
hwloc_base_mem_bind_failure_action = silent
btl_tcp_if_include=10.10.10.0/24
oob=^ud

Просмотреть файл

@ -1074,6 +1074,8 @@ static void server_tool_connection(pmix_info_t *info, size_t ninfo,
opal_value_t *oinfo;
int rc;
pmix_status_t err;
opal_pmix3x_jobid_trkr_t *job;
bool found;
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix3x_opalcaddy_t);
@ -1085,12 +1087,36 @@ static void server_tool_connection(pmix_info_t *info, size_t ninfo,
oinfo = OBJ_NEW(opal_value_t);
opal_list_append(&opalcaddy->info, &oinfo->super);
oinfo->key = strdup(info[n].key);
if (OPAL_SUCCESS != (rc = pmix3x_value_unload(oinfo, &info[n].value))) {
if (0 == strncmp(oinfo->key, PMIX_NSPACE, PMIX_MAX_KEYLEN)) {
/* will pass it up as a jobid */
oinfo->type = OPAL_JOBID;
/* see if this job is in our list of known nspaces */
found = false;
OPAL_LIST_FOREACH(job, &mca_pmix_pmix3x_component.jobids, opal_pmix3x_jobid_trkr_t) {
if (0 == strncmp(job->nspace, info[n].value.data.proc->nspace, PMIX_MAX_NSLEN)) {
oinfo->data.name.jobid = job->jobid;
found = true;
break;
}
}
if (!found) {
if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&oinfo->data.name.jobid, info[n].value.data.proc->nspace))) {
OPAL_ERROR_LOG(rc);
OBJ_RELEASE(opalcaddy);
err = pmix3x_convert_opalrc(rc);
if (NULL != cbfunc) {
cbfunc(err, NULL, cbdata);
}
return;
}
}
} else if (OPAL_SUCCESS != (rc = pmix3x_value_unload(oinfo, &info[n].value))) {
OBJ_RELEASE(opalcaddy);
err = pmix3x_convert_opalrc(rc);
if (NULL != cbfunc) {
cbfunc(err, NULL, cbdata);
}
return;
}
}

Просмотреть файл

@ -217,6 +217,10 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
continue;
}
/* ignore nodes that are non-usable */
if (ORTE_FLAG_TEST(node, ORTE_NODE_NON_USABLE)) {
continue;
}
OPAL_LIST_FOREACH_SAFE(nptr, next, &nodes, orte_node_t) {
if (0 != strcmp(node->name, nptr->name)) {
OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,
@ -320,6 +324,10 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
}
for (i=1; i < orte_node_pool->size; i++) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
/* ignore nodes that are non-usable */
if (ORTE_FLAG_TEST(node, ORTE_NODE_NON_USABLE)) {
continue;
}
/* ignore nodes that are marked as do-not-use for this mapping */
if (ORTE_NODE_STATE_DO_NOT_USE == node->state) {
OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,

Просмотреть файл

@ -1,6 +1,6 @@
/*
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -405,8 +405,10 @@ static void cleanup_node(orte_proc_t *proc)
if (NULL == (node = proc->node)) {
return;
}
node->num_procs--;
node->slots_inuse--;
if (!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_TOOL)) {
node->num_procs--;
node->slots_inuse--;
}
for (i=0; i < node->procs->size; i++) {
if (NULL == (p = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
continue;

Просмотреть файл

@ -511,8 +511,11 @@ static void check_complete(int fd, short args, void *cbdata)
/* skip procs from another job */
continue;
}
node->slots_inuse--;
node->num_procs--;
if (!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_TOOL)) {
node->slots_inuse--;
node->num_procs--;
}
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
"%s state:dvm releasing proc %s from node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),

Просмотреть файл

@ -1,7 +1,7 @@
/*
* Copyright (c) 2011-2017 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -480,8 +480,10 @@ static void track_procs(int fd, short argc, void *cbdata)
/* skip procs from another job */
continue;
}
node->slots_inuse--;
node->num_procs--;
if (!ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_TOOL)) {
node->slots_inuse--;
node->num_procs--;
}
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
"%s state:orted releasing proc %s from node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),

Просмотреть файл

@ -832,11 +832,12 @@ static void _toolconn(int sd, short args, void *cbdata)
orte_job_t *jdata;
orte_app_context_t *app;
orte_proc_t *proc;
orte_node_t *node;
orte_process_name_t tool;
int rc;
orte_node_t *node, *nptr;
char *hostname = NULL;
orte_process_name_t tool = {ORTE_JOBID_INVALID, ORTE_VPID_INVALID};
int rc, i;
opal_value_t *val;
bool flag;
bool flag = false, flag_given = false;;
ORTE_ACQUIRE_OBJECT(cd);
@ -844,109 +845,149 @@ static void _toolconn(int sd, short args, void *cbdata)
"%s TOOL CONNECTION PROCESSING",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* if we are the HNP, we can directly assign the jobid */
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_MASTER) {
jdata = OBJ_NEW(orte_job_t);
rc = orte_plm_base_create_jobid(jdata);
if (ORTE_SUCCESS != rc) {
tool.jobid = ORTE_JOBID_INVALID;
/* check for directives */
if (NULL != cd->info) {
OPAL_LIST_FOREACH(val, cd->info, opal_value_t) {
if (0 == strcmp(val->key, OPAL_PMIX_EVENT_SILENT_TERMINATION)) {
if (OPAL_UNDEF == val->type || val->data.flag) {
flag = true;
flag_given = true;
}
} else if (0 == strcmp(val->key, OPAL_PMIX_NSPACE)) {
tool.jobid = val->data.name.jobid;
} else if (0 == strcmp(val->key, OPAL_PMIX_RANK)) {
tool.vpid = val->data.name.vpid;
} else if (0 == strcmp(val->key, OPAL_PMIX_HOSTNAME)) {
hostname = strdup(val->data.string);
}
}
}
/* if we are not the HNP or master, and the tool doesn't
* already have a name (i.e., we didn't spawn it), then
* there is nothing we can currently do.
* Eventually, when we switch to nspace instead of an
* integer jobid, we'll just locally assign this value */
if (ORTE_JOBID_INVALID == tool.jobid ||
ORTE_VPID_INVALID == tool.vpid) {
/* if we are the HNP, we can directly assign the jobid */
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_MASTER) {
jdata = OBJ_NEW(orte_job_t);
rc = orte_plm_base_create_jobid(jdata);
if (ORTE_SUCCESS != rc) {
OBJ_RELEASE(jdata);
if (NULL != cd->toolcbfunc) {
cd->toolcbfunc(ORTE_ERROR, tool, cd->cbdata);
}
OBJ_RELEASE(cd);
return;
}
tool.jobid = jdata->jobid;
tool.vpid = 0;
} else {
/* we currently do not support connections to non-HNP/master
* daemons from tools that were not spawned by a daemon */
if (NULL != cd->toolcbfunc) {
cd->toolcbfunc(rc, tool, cd->cbdata);
cd->toolcbfunc(ORTE_ERR_NOT_SUPPORTED, tool, cd->cbdata);
}
OBJ_RELEASE(cd);
return;
}
opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
/* setup some required job-level fields in case this
* tool calls spawn, or uses some other functions that
* need them */
/* must create a map for it (even though it has no
* info in it) so that the job info will be picked
* up in subsequent pidmaps or other daemons won't
* know how to route
*/
jdata->map = OBJ_NEW(orte_job_map_t);
} else {
jdata = OBJ_NEW(orte_job_t);
jdata->jobid = tool.jobid;
}
/* setup an app_context for the singleton */
app = OBJ_NEW(orte_app_context_t);
app->app = strdup("tool");
app->num_procs = 1;
opal_pointer_array_add(jdata->apps, app);
jdata->num_apps = 1;
opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
/* setup some required job-level fields in case this
* tool calls spawn, or uses some other functions that
* need them */
/* must create a map for it (even though it has no
* info in it) so that the job info will be picked
* up in subsequent pidmaps or other daemons won't
* know how to route
*/
jdata->map = OBJ_NEW(orte_job_map_t);
/* setup a proc object for the singleton - since we
* -must- be the HNP, and therefore we stored our
* node on the global node pool, and since the singleton
* -must- be on the same node as us, indicate that
*/
proc = OBJ_NEW(orte_proc_t);
proc->name.jobid = jdata->jobid;
proc->name.vpid = 0;
proc->parent = ORTE_PROC_MY_NAME->vpid;
ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE);
proc->state = ORTE_PROC_STATE_RUNNING;
proc->app_idx = 0;
/* obviously, it is on my node */
/* setup an app_context for the singleton */
app = OBJ_NEW(orte_app_context_t);
app->app = strdup("tool");
app->num_procs = 1;
opal_pointer_array_add(jdata->apps, app);
jdata->num_apps = 1;
/* setup a proc object for the singleton - since we
* -must- be the HNP, and therefore we stored our
* node on the global node pool, and since the singleton
* -must- be on the same node as us, indicate that
*/
proc = OBJ_NEW(orte_proc_t);
proc->name.jobid = jdata->jobid;
proc->name.vpid = tool.vpid;
proc->parent = ORTE_PROC_MY_NAME->vpid;
ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE);
ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_TOOL);
proc->state = ORTE_PROC_STATE_RUNNING;
/* set the trivial */
proc->local_rank = 0;
proc->node_rank = 0;
proc->app_rank = 0;
proc->app_idx = 0;
if (NULL == hostname) {
/* it is on my node */
node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
proc->node = node;
OBJ_RETAIN(node); /* keep accounting straight */
opal_pointer_array_add(jdata->procs, proc);
jdata->num_procs = 1;
/* add the node to the job map */
OBJ_RETAIN(node);
opal_pointer_array_add(jdata->map->nodes, node);
jdata->map->num_nodes++;
/* and it obviously is on the node - note that
* we do _not_ increment the #procs on the node
* as the tool doesn't count against the slot
* allocation */
OBJ_RETAIN(proc);
opal_pointer_array_add(node->procs, proc);
/* set the trivial */
proc->local_rank = 0;
proc->node_rank = 0;
proc->app_rank = 0;
proc->state = ORTE_PROC_STATE_RUNNING;
proc->app_idx = 0;
ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_LOCAL);
/* check for directives */
if (NULL != cd->info) {
OPAL_LIST_FOREACH(val, cd->info, opal_value_t) {
if (0 == strcmp(val->key, OPAL_PMIX_EVENT_SILENT_TERMINATION)) {
if (OPAL_UNDEF == val->type || val->data.flag) {
flag = true;
orte_set_attribute(&jdata->attributes, ORTE_JOB_SILENT_TERMINATION,
ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL);
}
}
} else {
/* we need to locate it */
node = NULL;
for (i=0; i < orte_node_pool->size; i++) {
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
continue;
}
if (0 == strcmp(hostname, nptr->name)) {
node = nptr;
break;
}
}
if (NULL == node) {
/* not in our allocation - which is still okay */
node = OBJ_NEW(orte_node_t);
node->name = strdup(hostname);
ORTE_FLAG_SET(node, ORTE_NODE_NON_USABLE);
opal_pointer_array_add(orte_node_pool, node);
}
}
proc->node = node;
OBJ_RETAIN(node); /* keep accounting straight */
opal_pointer_array_add(jdata->procs, proc);
jdata->num_procs = 1;
/* add the node to the job map */
OBJ_RETAIN(node);
opal_pointer_array_add(jdata->map->nodes, node);
jdata->map->num_nodes++;
/* and it obviously is on the node - note that
* we do _not_ increment the #procs on the node
* as the tool doesn't count against the slot
* allocation */
OBJ_RETAIN(proc);
opal_pointer_array_add(node->procs, proc);
/* if they indicated a preference for termination, set it */
if (flag_given) {
orte_set_attribute(&jdata->attributes, ORTE_JOB_SILENT_TERMINATION,
ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL);
} else {
/* we default to silence */
flag = true;
orte_set_attribute(&jdata->attributes, ORTE_JOB_SILENT_TERMINATION,
ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL);
/* pass back the assigned jobid */
tool.jobid = jdata->jobid;
tool.vpid = 0;
if (NULL != cd->toolcbfunc) {
cd->toolcbfunc(rc, tool, cd->cbdata);
}
OBJ_RELEASE(cd);
return;
}
/* otherwise, we have to send the request to the HNP.
* Eventually, when we switch to nspace instead of an
* integer jobid, we'll just locally assign this value */
tool.jobid = ORTE_JOBID_INVALID;
tool.vpid = ORTE_VPID_INVALID;
if (NULL != cd->toolcbfunc) {
cd->toolcbfunc(ORTE_ERR_NOT_SUPPORTED, tool, cd->cbdata);
}
OBJ_RELEASE(cd);
}
void pmix_tool_connected_fn(opal_list_t *info,
opal_pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata)

Просмотреть файл

@ -64,6 +64,7 @@ typedef uint8_t orte_node_flags_t;
#define ORTE_NODE_FLAG_OVERSUBSCRIBED 0x04 // whether or not this node is oversubscribed
#define ORTE_NODE_FLAG_MAPPED 0x08 // whether we have been added to the current map
#define ORTE_NODE_FLAG_SLOTS_GIVEN 0x10 // the number of slots was specified - used only in non-managed environments
#define ORTE_NODE_NON_USABLE 0x20 // the node is hosting a tool and is NOT to be used for jobs
/*** NODE ATTRIBUTE KEYS - never sent anywhere ***/
@ -177,6 +178,7 @@ typedef uint16_t orte_proc_flags_t;
#define ORTE_PROC_FLAG_DATA_IN_SM 0x0800 // modex data has been stored in the local shared memory region
#define ORTE_PROC_FLAG_DATA_RECVD 0x1000 // modex data for this proc has been received
#define ORTE_PROC_FLAG_SM_ACCESS 0x2000 // indicate if process can read modex data from shared memory region
#define ORTE_PROC_FLAG_TOOL 0x4000 // proc is a tool and doesn't count against allocations
/*** PROCESS ATTRIBUTE KEYS ***/
#define ORTE_PROC_START_KEY ORTE_JOB_MAX_KEY