1
1

Merge pull request #3679 from rhc54/topic/spawn

Fix the backend mapper algorithm for comm_spawn. The front and back e…
Этот коммит содержится в:
Ralph Castain 2017-06-08 10:23:07 -07:00 коммит произвёл GitHub
родитель bdc7206230 7b39f19f60
Коммит a9005d6f72
6 изменённых файлов: 61 добавлений и 59 удалений

Просмотреть файл

@ -378,26 +378,18 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
*/
if (ORTE_ERR_TAKE_NEXT_OPTION != rc) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(caddy);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
return;
goto cleanup;
}
}
/* reset any node map flags we used so the next job will start clean */
for (i=0; i < jdata->map->nodes->size; i++) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
}
}
if (did_map && ORTE_ERR_RESOURCE_BUSY == rc) {
/* the map was done but nothing could be mapped
* for launch as all the resources were busy
*/
orte_show_help("help-orte-rmaps-base.txt", "cannot-launch", true);
OBJ_RELEASE(caddy);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
return;
goto cleanup;
}
/* if we get here without doing the map, or with zero procs in
@ -407,9 +399,8 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
orte_show_help("help-orte-rmaps-base.txt", "failed-map", true,
did_map ? "mapped" : "unmapped",
jdata->num_procs, jdata->map->num_nodes);
OBJ_RELEASE(caddy);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
return;
goto cleanup;
}
/* if any node is oversubscribed, then check to see if a binding
@ -423,28 +414,38 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
}
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
/* we didn't add the nodes to the node map as it would cause them to
* be in a different order than on the backend if this is a dynamic
* spawn (which means we may have started somewhere other than at
* the beginning of the allocation) */
for (i=0; i < orte_node_pool->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
continue;
}
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
OBJ_RETAIN(node);
opal_pointer_array_add(jdata->map->nodes, node);
}
}
/* compute and save location assignments */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(caddy);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
return;
goto cleanup;
}
} else {
/* compute and save local ranks */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(caddy);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
return;
goto cleanup;
}
/* compute and save bindings */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(caddy);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
return;
goto cleanup;
}
}
@ -465,6 +466,14 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
/* set the job state to the next position */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_COMPLETE);
cleanup:
/* reset any node map flags we used so the next job will start clean */
for (i=0; i < jdata->map->nodes->size; i++) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
}
}
/* cleanup */
OBJ_RELEASE(caddy);
}

Просмотреть файл

@ -275,12 +275,7 @@ static int ppr_mapper(orte_job_t *jdata)
}
/* add the node to the map, if needed */
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
ORTE_ERROR_LOG(rc);
goto error;
}
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
OBJ_RETAIN(node); /* maintain accounting on object */
jdata->map->num_nodes++;
}
/* if we are mapping solely at the node level, just put
@ -407,7 +402,7 @@ static int ppr_mapper(orte_job_t *jdata)
}
return ORTE_SUCCESS;
error:
error:
while (NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item);
}

Просмотреть файл

@ -43,7 +43,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
orte_std_cntr_t num_slots,
orte_vpid_t num_procs)
{
int rc, i, nprocs_mapped;
int i, nprocs_mapped;
orte_node_t *node;
orte_proc_t *proc;
int num_procs_to_assign, extra_procs_to_assign=0, nxtra_nodes=0;
@ -94,12 +94,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
for (i=0; i < num_procs_to_assign && nprocs_mapped < app->num_procs; i++) {
/* add this node to the map - do it only once */
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
ORTE_ERROR_LOG(rc);
return rc;
}
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
OBJ_RETAIN(node); /* maintain accounting on object */
++(jdata->map->num_nodes);
}
if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) {
@ -149,12 +144,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata,
/* add this node to the map - do it only once */
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
ORTE_ERROR_LOG(rc);
return rc;
}
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
OBJ_RETAIN(node); /* maintain accounting on object */
++(jdata->map->num_nodes);
}
if (add_one) {
@ -221,7 +211,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
int j, nprocs_mapped, nnodes;
orte_node_t *node;
orte_proc_t *proc;
int num_procs_to_assign, navg, idx;
int num_procs_to_assign, navg;
int extra_procs_to_assign=0, nxtra_nodes=0;
hwloc_obj_t obj=NULL;
float balance;
@ -293,12 +283,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata,
}
/* add this node to the map, but only do so once */
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
ORTE_ERROR_LOG(idx);
return idx;
}
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
OBJ_RETAIN(node); /* maintain accounting on object */
++(jdata->map->num_nodes);
}
if (oversubscribed) {
@ -456,7 +441,6 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
orte_node_t *node;
orte_proc_t *proc;
int nprocs, start;
int idx;
hwloc_obj_t obj=NULL;
unsigned int nobjs;
bool add_one;
@ -547,12 +531,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
}
/* add this node to the map, if reqd */
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
ORTE_ERROR_LOG(idx);
return idx;
}
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
OBJ_RETAIN(node); /* maintain accounting on object */
++(jdata->map->num_nodes);
}
nmapped = 0;
@ -638,7 +617,6 @@ static int byobj_span(orte_job_t *jdata,
orte_node_t *node;
orte_proc_t *proc;
int nprocs, nxtra_objs;
int idx;
hwloc_obj_t obj=NULL;
unsigned int nobjs;
@ -699,12 +677,7 @@ static int byobj_span(orte_job_t *jdata,
OPAL_LIST_FOREACH(node, node_list, orte_node_t) {
/* add this node to the map, if reqd */
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) {
ORTE_ERROR_LOG(idx);
return idx;
}
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
OBJ_RETAIN(node); /* maintain accounting on object */
++(jdata->map->num_nodes);
}
/* get the number of objects of this type on this node */

Просмотреть файл

@ -64,7 +64,7 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
int32_t num_vals, opal_data_type_t type)
{
int rc;
int32_t i, j, count;
int32_t i, j, count, bookmark;
orte_job_t **jobs;
orte_app_context_t *app;
orte_proc_t *proc;
@ -241,7 +241,16 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
}
}
/* do not pack the bookmark or oversubscribe_override flags */
/* pack the bookmark */
if (NULL == jobs[i]->bookmark) {
bookmark = -1;
} else {
bookmark = jobs[i]->bookmark->index;
}
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &bookmark, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the job state */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,

Просмотреть файл

@ -61,7 +61,7 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
int32_t *num_vals, opal_data_type_t type)
{
int rc;
int32_t i, k, n, count;
int32_t i, k, n, count, bookmark;
orte_job_t **jobs;
orte_app_idx_t j;
orte_attribute_t *kv;
@ -237,7 +237,17 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
}
}
/* no bookmark of oversubscribe_override flags to unpack */
/* unpack the bookmark */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
&bookmark, &n, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (0 <= bookmark) {
/* retrieve it */
jobs[i]->bookmark = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, bookmark);
}
/* unpack the job state */
n = 1;

Просмотреть файл

@ -1,4 +1,5 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/param.h>
@ -12,9 +13,15 @@ int main(int argc, char* argv[])
int rank, size;
char hostname[MAXHOSTNAMELEN];
pid_t pid;
char *env_rank,*env_nspace;
env_rank = getenv("PMIX_RANK");
env_nspace = getenv("PMIX_NAMESPACE");
pid = getpid();
printf("[pid %ld] starting up!\n", (long)pid);
gethostname(hostname, sizeof(hostname));
printf("[%s:%s pid %ld] starting up on node %s!\n", env_nspace, env_rank, (long)pid, hostname);
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
printf("%d completed MPI_Init\n", rank);
@ -42,7 +49,6 @@ int main(int argc, char* argv[])
else {
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
gethostname(hostname, sizeof(hostname));
pid = getpid();
printf("Hello from the child %d of %d on host %s pid %ld\n", rank, 3, hostname, (long)pid);
if (0 == rank) {