Add debug verbosity to the orte data server and pmix pub/lookup functions
Start updating the various mappers to the new procedure. Remove the stale lama component as it is now very out-of-date. Bring round_robin and PPR online, and modify the mindist component (but cannot test/debug it). Remove unneeded test Fix memory corruption by re-initializing variable to NULL in loop Resolve the race condition identified by @ggouaillardet by resetting the mapped flag within the same event where it was set. There is no need to retain the flag beyond that point as it isn't used again. Add a new job attribute ORTE_JOB_FULLY_DESCRIBED to indicate that all the job information (including locations and binding) is included in the launch message. Thus, the backend daemons do not need to do any map computation for the job. Use this for the seq, rankfile, and mindist mappers until someone decides to update them. Note that this will maintain functionality, but means that users of those three mappers will see large launch messages and less performant scaling than those using the other mappers. Have the mindist module add procs to the job's proc array as it is a fully described module Protect the hnp-not-in-allocation case Per path suggested by Gilles - protect the HNP node when it gets added in the absence of any other allocation or hostfile Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
02c288c853
Коммит
657e701c65
1
.gitignore
поставляемый
1
.gitignore
поставляемый
@ -415,6 +415,7 @@ orte/test/mpi/memcached-dummy
|
||||
orte/test/mpi/coll_test
|
||||
orte/test/mpi/badcoll
|
||||
orte/test/mpi/iof
|
||||
orte/test/mpi/no-disconnect
|
||||
|
||||
orte/test/system/radix
|
||||
orte/test/system/sigusr_trap
|
||||
|
@ -2,7 +2,7 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016 Mellanox Technologies, Inc.
|
||||
@ -118,6 +118,12 @@ static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata)
|
||||
cd->active = false;
|
||||
}
|
||||
|
||||
static void opcbfunc(int status, void *cbdata)
|
||||
{
|
||||
struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata;
|
||||
cd->active = false;
|
||||
}
|
||||
|
||||
int opal_pmix_base_exchange(opal_value_t *indat,
|
||||
opal_pmix_pdata_t *outdat,
|
||||
int timeout)
|
||||
@ -141,12 +147,30 @@ int opal_pmix_base_exchange(opal_value_t *indat,
|
||||
opal_list_append(&ilist, &info->super);
|
||||
|
||||
/* publish it with "session" scope */
|
||||
if (NULL == opal_pmix.publish_nb) {
|
||||
rc = opal_pmix.publish(&ilist);
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
} else {
|
||||
caddy.active = true;
|
||||
rc = opal_pmix.publish_nb(&ilist, opcbfunc, &caddy);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
return rc;
|
||||
}
|
||||
while (caddy.active) {
|
||||
usleep(10);
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
if (OPAL_SUCCESS != caddy.status) {
|
||||
OPAL_ERROR_LOG(caddy.status);
|
||||
return caddy.status;
|
||||
}
|
||||
}
|
||||
|
||||
/* lookup the other side's info - if a non-blocking form
|
||||
* of lookup isn't available, then we use the blocking
|
||||
|
@ -131,7 +131,7 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
|
||||
/* if we couldn't provide the allocation regex on the orted
|
||||
* cmd line, then we need to provide all the info here */
|
||||
if (!orte_nidmap_communicated) {
|
||||
if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(&nidmap))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, &nidmap))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
@ -246,6 +246,22 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
|
||||
/* compute and pack the ppn regex */
|
||||
if (ORTE_SUCCESS != (rc = orte_util_nidmap_generate_ppn(jdata, &nidmap))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &nidmap, 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(nidmap);
|
||||
return rc;
|
||||
}
|
||||
free(nidmap);
|
||||
}
|
||||
|
||||
/* compute and pack the regex of ppn */
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -262,13 +278,12 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
||||
int rc;
|
||||
orte_std_cntr_t cnt;
|
||||
orte_job_t *jdata=NULL, *daemons;
|
||||
int32_t n, k, m;
|
||||
int32_t n, k;
|
||||
opal_buffer_t *bptr;
|
||||
orte_node_t *node;
|
||||
orte_proc_t *pptr, *dmn;
|
||||
orte_app_context_t *app;
|
||||
bool newmap = false;
|
||||
int8_t flag;
|
||||
char *ppn;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
|
||||
"%s odls:constructing child list",
|
||||
@ -356,7 +371,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
||||
* the storage */
|
||||
jdata->jobid = ORTE_JOBID_INVALID;
|
||||
OBJ_RELEASE(jdata);
|
||||
/* get the correct job object */
|
||||
/* get the correct job object - it will be completely filled out */
|
||||
if (NULL == (jdata = orte_get_job_data_object(*job))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
@ -364,17 +379,54 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
||||
}
|
||||
} else {
|
||||
opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
|
||||
}
|
||||
|
||||
/* ensure the map object is present */
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
newmap = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (orte_no_vm) {
|
||||
/* if we are operating novm, then mpirun will have sent us
|
||||
* the complete array of procs - process it */
|
||||
/* if the job is fully described, then mpirun will have computed
|
||||
* and sent us the complete array of procs in the orte_job_t, so we
|
||||
* don't need to do anything more here */
|
||||
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
|
||||
if (!ORTE_PROC_IS_HNP) {
|
||||
/* extract the ppn regex */
|
||||
cnt = 1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ppn, &cnt, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
/* populate the node array of the job map and the proc array of
|
||||
* the job object so we know how many procs are on each node */
|
||||
if (ORTE_SUCCESS != (rc = orte_util_nidmap_parse_ppn(jdata, ppn))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(ppn);
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
free(ppn);
|
||||
/* now assign locations to the procs */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
}
|
||||
/* compute the ranks and add the proc objects
|
||||
* to the jdata->procs array */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
/* and finally, compute the local and node ranks */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/* now that the node array in the job map and jdata are completely filled out,.
|
||||
* we need to "wireup" the procs to their nodes so other utilities can
|
||||
* locate them */
|
||||
for (n=0; n < jdata->procs->size; n++) {
|
||||
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, n))) {
|
||||
continue;
|
||||
@ -383,6 +435,9 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
||||
/* not ready for use yet */
|
||||
continue;
|
||||
}
|
||||
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
|
||||
/* the parser will have already made the connection, but the fully described
|
||||
* case won't have done it, so connect the proc to its node here */
|
||||
opal_output_verbose(5, orte_odls_base_framework.framework_output,
|
||||
"%s GETTING DAEMON FOR PROC %s WITH PARENT %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
@ -401,22 +456,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
||||
}
|
||||
OBJ_RETAIN(dmn->node);
|
||||
pptr->node = dmn->node;
|
||||
/* add proc to node - note that num_procs for the
|
||||
* node was already correctly unpacked, so don't
|
||||
* increment it here */
|
||||
OBJ_RETAIN(pptr);
|
||||
opal_pointer_array_add(dmn->node->procs, pptr);
|
||||
|
||||
/* add the node to the map, if not already there */
|
||||
if (!ORTE_FLAG_TEST(dmn->node, ORTE_NODE_FLAG_MAPPED)) {
|
||||
OBJ_RETAIN(dmn->node);
|
||||
ORTE_FLAG_SET(dmn->node, ORTE_NODE_FLAG_MAPPED);
|
||||
opal_pointer_array_add(jdata->map->nodes, dmn->node);
|
||||
if (newmap) {
|
||||
jdata->map->num_nodes++;
|
||||
}
|
||||
}
|
||||
|
||||
/* see if it belongs to us */
|
||||
if (pptr->parent == ORTE_PROC_MY_NAME->vpid) {
|
||||
/* is this child on our current list of children */
|
||||
@ -444,43 +484,9 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
||||
ORTE_FLAG_SET(app, ORTE_APP_FLAG_USED_ON_NODE);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* create the map - will already have been done for the novm case */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_map_job(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
/* find our local procs */
|
||||
for (n=0; n < jdata->map->nodes->size; n++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) {
|
||||
continue;
|
||||
}
|
||||
if (node->index != (int)ORTE_PROC_MY_NAME->vpid) {
|
||||
continue;
|
||||
}
|
||||
for (m=0; m < node->procs->size; m++) {
|
||||
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, m))) {
|
||||
continue;
|
||||
}
|
||||
if (!ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_LOCAL)) {
|
||||
/* not on the local list */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
|
||||
"%s[%s:%d] adding proc %s to my local list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&pptr->name)));
|
||||
/* keep tabs of the number of local procs */
|
||||
jdata->num_local_procs++;
|
||||
/* add this proc to our child list */
|
||||
OBJ_RETAIN(pptr);
|
||||
ORTE_FLAG_SET(pptr, ORTE_PROC_FLAG_LOCAL);
|
||||
opal_pointer_array_add(orte_local_children, pptr);
|
||||
/* mark that this app_context is being used on this node */
|
||||
app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, pptr->app_idx);
|
||||
ORTE_FLAG_SET(app, ORTE_APP_FLAG_USED_ON_NODE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!ORTE_PROC_IS_HNP &&
|
||||
!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
|
||||
/* compute and save bindings of local children */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -488,13 +494,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
||||
}
|
||||
}
|
||||
|
||||
/* reset any node map flags we used so the next job will start clean */
|
||||
for (n=0; n < jdata->map->nodes->size; n++) {
|
||||
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) {
|
||||
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
|
||||
}
|
||||
}
|
||||
|
||||
/* if we wanted to see the map, now is the time to display it */
|
||||
if (jdata->map->display_map) {
|
||||
orte_rmaps_base_display_map(jdata);
|
||||
|
@ -209,7 +209,7 @@ static void files_ready(int status, void *cbdata)
|
||||
if (ORTE_SUCCESS != status) {
|
||||
ORTE_FORCED_TERMINATE(status);
|
||||
} else {
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SYSTEM_PREP);
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1497,7 +1497,7 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
|
||||
|
||||
/* convert the nodes with daemons to a regex */
|
||||
param = NULL;
|
||||
if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(¶m))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, ¶m))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
@ -12,7 +12,7 @@
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2015 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -31,7 +31,8 @@ libmca_rmaps_la_SOURCES += \
|
||||
base/rmaps_base_support_fns.c \
|
||||
base/rmaps_base_ranking.c \
|
||||
base/rmaps_base_print_fns.c \
|
||||
base/rmaps_base_binding.c
|
||||
base/rmaps_base_binding.c \
|
||||
base/rmaps_base_assign_locations.c
|
||||
|
||||
|
||||
dist_ortedata_DATA = base/help-orte-rmaps-base.txt
|
||||
|
@ -99,7 +99,8 @@ OBJ_CLASS_DECLARATION(orte_rmaps_base_selected_module_t);
|
||||
/*
|
||||
* Map a job
|
||||
*/
|
||||
ORTE_DECLSPEC int orte_rmaps_base_map_job(orte_job_t *jdata);
|
||||
ORTE_DECLSPEC void orte_rmaps_base_map_job(int sd, short args, void *cbdata);
|
||||
ORTE_DECLSPEC int orte_rmaps_base_assign_locations(orte_job_t *jdata);
|
||||
|
||||
/**
|
||||
* Utility routines to get/set vpid mapping for the job
|
||||
|
@ -13,7 +13,7 @@
|
||||
# Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -410,3 +410,13 @@ Either the -host or -hostfile options were given, but the number
|
||||
of processes to start was omitted. This combination is not supported.
|
||||
|
||||
Please specify the number of processes to run and try again.
|
||||
#
|
||||
[failed-assignments]
|
||||
The attempt to assign hardware locations to processes on a
|
||||
compute node failed:
|
||||
|
||||
Node: %s
|
||||
Policy: %s
|
||||
|
||||
We cannot continue - please check that the policy is in
|
||||
accordance with the actual available hardware.
|
||||
|
80
orte/mca/rmaps/base/rmaps_base_assign_locations.c
Обычный файл
80
orte/mca/rmaps/base/rmaps_base_assign_locations.c
Обычный файл
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "orte/mca/mca.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
|
||||
|
||||
int orte_rmaps_base_assign_locations(orte_job_t *jdata)
|
||||
{
|
||||
int rc;
|
||||
orte_rmaps_base_selected_module_t *mod;
|
||||
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps: assigning locations for job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
|
||||
/* cycle thru the available mappers until one agrees to assign
|
||||
* locations for the job
|
||||
*/
|
||||
if (1 == opal_list_get_size(&orte_rmaps_base.selected_modules)) {
|
||||
/* forced selection */
|
||||
mod = (orte_rmaps_base_selected_module_t*)opal_list_get_first(&orte_rmaps_base.selected_modules);
|
||||
jdata->map->req_mapper = strdup(mod->component->mca_component_name);
|
||||
}
|
||||
OPAL_LIST_FOREACH(mod, &orte_rmaps_base.selected_modules, orte_rmaps_base_selected_module_t) {
|
||||
if (NULL == mod->module->assign_locations) {
|
||||
continue;
|
||||
}
|
||||
if (ORTE_SUCCESS == (rc = mod->module->assign_locations(jdata))) {
|
||||
return rc;
|
||||
}
|
||||
/* mappers return "next option" if they didn't attempt to
|
||||
* process the job. anything else is a true error.
|
||||
*/
|
||||
if (ORTE_ERR_TAKE_NEXT_OPTION != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* if we get here without doing the assignments, then that's an error */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "failed-assignments", true,
|
||||
orte_process_info.nodename,
|
||||
orte_rmaps_base_print_mapping(jdata->map->mapping));
|
||||
return ORTE_ERROR;
|
||||
}
|
@ -42,8 +42,10 @@
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
|
||||
|
||||
int orte_rmaps_base_map_job(orte_job_t *jdata)
|
||||
void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
||||
{
|
||||
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
|
||||
orte_job_t *jdata = caddy->jdata;
|
||||
orte_node_t *node;
|
||||
int rc, i, ppx = 0;
|
||||
bool did_map, given, pernode = false;
|
||||
@ -116,7 +118,9 @@ int orte_rmaps_base_map_job(orte_job_t *jdata)
|
||||
/* inform the user of the error */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "num-procs-not-specified", true);
|
||||
OPAL_LIST_DESTRUCT(&nodes);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
OBJ_RELEASE(caddy);
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
|
||||
return;
|
||||
}
|
||||
}
|
||||
nprocs += slots;
|
||||
@ -335,7 +339,9 @@ int orte_rmaps_base_map_job(orte_job_t *jdata)
|
||||
int i;
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
OBJ_RELEASE(caddy);
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
|
||||
return;
|
||||
}
|
||||
t0 = node->topology;
|
||||
for (i=1; i < orte_node_pool->size; i++) {
|
||||
@ -368,15 +374,26 @@ int orte_rmaps_base_map_job(orte_job_t *jdata)
|
||||
*/
|
||||
if (ORTE_ERR_TAKE_NEXT_OPTION != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
OBJ_RELEASE(caddy);
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* reset any node map flags we used so the next job will start clean */
|
||||
for (i=0; i < jdata->map->nodes->size; i++) {
|
||||
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) {
|
||||
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
|
||||
}
|
||||
}
|
||||
|
||||
if (did_map && ORTE_ERR_RESOURCE_BUSY == rc) {
|
||||
/* the map was done but nothing could be mapped
|
||||
* for launch as all the resources were busy
|
||||
*/
|
||||
orte_show_help("help-orte-rmaps-base.txt", "cannot-launch", true);
|
||||
return rc;
|
||||
OBJ_RELEASE(caddy);
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
|
||||
return;
|
||||
}
|
||||
|
||||
/* if we get here without doing the map, or with zero procs in
|
||||
@ -386,7 +403,9 @@ int orte_rmaps_base_map_job(orte_job_t *jdata)
|
||||
orte_show_help("help-orte-rmaps-base.txt", "failed-map", true,
|
||||
did_map ? "mapped" : "unmapped",
|
||||
jdata->num_procs, jdata->map->num_nodes);
|
||||
return ORTE_ERR_INVALID_NUM_PROCS;
|
||||
OBJ_RELEASE(caddy);
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
|
||||
return;
|
||||
}
|
||||
|
||||
/* if any node is oversubscribed, then check to see if a binding
|
||||
@ -399,17 +418,29 @@ int orte_rmaps_base_map_job(orte_job_t *jdata)
|
||||
}
|
||||
}
|
||||
|
||||
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
|
||||
/* compute and save location assignments */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(caddy);
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
/* compute and save local ranks */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
OBJ_RELEASE(caddy);
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
|
||||
return;
|
||||
}
|
||||
|
||||
if (orte_no_vm) {
|
||||
/* compute and save bindings */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
OBJ_RELEASE(caddy);
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -427,7 +458,11 @@ int orte_rmaps_base_map_job(orte_job_t *jdata)
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
/* set the job state to the next position */
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_COMPLETE);
|
||||
|
||||
/* cleanup */
|
||||
OBJ_RELEASE(caddy);
|
||||
}
|
||||
|
||||
void orte_rmaps_base_display_map(orte_job_t *jdata)
|
||||
|
@ -49,19 +49,17 @@
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
|
||||
static int rank_span(orte_job_t *jdata,
|
||||
orte_app_context_t *app,
|
||||
opal_list_t *nodes,
|
||||
hwloc_obj_type_t target,
|
||||
unsigned cache_level)
|
||||
{
|
||||
orte_app_context_t *app;
|
||||
hwloc_obj_t obj;
|
||||
int num_objs, i, j, rc;
|
||||
int num_objs, i, j, m, n, rc;
|
||||
orte_vpid_t num_ranked=0;
|
||||
orte_node_t *node;
|
||||
orte_proc_t *proc;
|
||||
orte_proc_t *proc, *pptr;
|
||||
orte_vpid_t vpid;
|
||||
int cnt;
|
||||
opal_list_item_t *item;
|
||||
hwloc_obj_t locale;
|
||||
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
@ -85,13 +83,18 @@ static int rank_span(orte_job_t *jdata,
|
||||
* are mapped
|
||||
*/
|
||||
|
||||
vpid = jdata->num_procs;
|
||||
vpid = 0;
|
||||
for (n=0; n < jdata->apps->size; n++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cnt = 0;
|
||||
while (cnt < app->num_procs) {
|
||||
for (item = opal_list_get_first(nodes);
|
||||
item != opal_list_get_end(nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
node = (orte_node_t*)item;
|
||||
for (m=0; m < jdata->map->nodes->size; m++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
|
||||
continue;
|
||||
}
|
||||
/* get the number of objects - only consider those we can actually use */
|
||||
num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target,
|
||||
cache_level, OPAL_HWLOC_AVAILABLE);
|
||||
@ -151,7 +154,11 @@ static int rank_span(orte_job_t *jdata,
|
||||
}
|
||||
cnt++;
|
||||
|
||||
/* insert the proc into the jdata array - no harm if already there */
|
||||
/* insert the proc into the jdata array */
|
||||
if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
|
||||
OBJ_RELEASE(pptr);
|
||||
}
|
||||
OBJ_RETAIN(proc);
|
||||
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -166,24 +173,23 @@ static int rank_span(orte_job_t *jdata,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int rank_fill(orte_job_t *jdata,
|
||||
orte_app_context_t *app,
|
||||
opal_list_t *nodes,
|
||||
hwloc_obj_type_t target,
|
||||
unsigned cache_level)
|
||||
{
|
||||
orte_app_context_t *app;
|
||||
hwloc_obj_t obj;
|
||||
int num_objs, i, j, rc;
|
||||
int num_objs, i, j, m, n, rc;
|
||||
orte_vpid_t num_ranked=0;
|
||||
orte_node_t *node;
|
||||
orte_proc_t *proc;
|
||||
orte_proc_t *proc, *pptr;
|
||||
orte_vpid_t vpid;
|
||||
int cnt;
|
||||
opal_list_item_t *item;
|
||||
hwloc_obj_t locale;
|
||||
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
@ -199,12 +205,17 @@ static int rank_fill(orte_job_t *jdata,
|
||||
* 2 3 6 7 10 11 14 15
|
||||
*/
|
||||
|
||||
vpid = jdata->num_procs;
|
||||
vpid = 0;
|
||||
for (n=0; n < jdata->apps->size; n++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cnt = 0;
|
||||
for (item = opal_list_get_first(nodes);
|
||||
item != opal_list_get_end(nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
node = (orte_node_t*)item;
|
||||
for (m=0; m < jdata->map->nodes->size; m++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
|
||||
continue;
|
||||
}
|
||||
/* get the number of objects - only consider those we can actually use */
|
||||
num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target,
|
||||
cache_level, OPAL_HWLOC_AVAILABLE);
|
||||
@ -264,7 +275,11 @@ static int rank_fill(orte_job_t *jdata,
|
||||
}
|
||||
cnt++;
|
||||
|
||||
/* insert the proc into the jdata array - no harm if already there */
|
||||
/* insert the proc into the jdata array */
|
||||
if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
|
||||
OBJ_RELEASE(pptr);
|
||||
}
|
||||
OBJ_RETAIN(proc);
|
||||
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -276,32 +291,31 @@ static int rank_fill(orte_job_t *jdata,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int rank_by(orte_job_t *jdata,
|
||||
orte_app_context_t *app,
|
||||
opal_list_t *nodes,
|
||||
hwloc_obj_type_t target,
|
||||
unsigned cache_level)
|
||||
{
|
||||
orte_app_context_t *app;
|
||||
hwloc_obj_t obj;
|
||||
int num_objs, i, j, rc;
|
||||
int num_objs, i, j, m, n, rc;
|
||||
orte_vpid_t num_ranked=0;
|
||||
orte_node_t *node;
|
||||
orte_proc_t *proc;
|
||||
orte_proc_t *proc, *pptr;
|
||||
orte_vpid_t vpid;
|
||||
int cnt;
|
||||
opal_pointer_array_t objs;
|
||||
bool all_done;
|
||||
opal_list_item_t *item;
|
||||
hwloc_obj_t locale;
|
||||
|
||||
if (ORTE_RANKING_SPAN & ORTE_GET_RANKING_DIRECTIVE(jdata->map->ranking)) {
|
||||
return rank_span(jdata, app, nodes, target, cache_level);
|
||||
return rank_span(jdata, target, cache_level);
|
||||
} else if (ORTE_RANKING_FILL & ORTE_GET_RANKING_DIRECTIVE(jdata->map->ranking)) {
|
||||
return rank_fill(jdata, app, nodes, target, cache_level);
|
||||
return rank_fill(jdata, target, cache_level);
|
||||
}
|
||||
|
||||
/* if ranking is not spanned or filled, then we
|
||||
@ -316,16 +330,22 @@ static int rank_by(orte_job_t *jdata,
|
||||
* 4 6 5 7 12 14 13 15
|
||||
*/
|
||||
|
||||
vpid = 0;
|
||||
for (n=0; n < jdata->apps->size; n++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* setup the pointer array */
|
||||
OBJ_CONSTRUCT(&objs, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&objs, 2, INT_MAX, 2);
|
||||
|
||||
vpid = jdata->num_procs;
|
||||
cnt = 0;
|
||||
for (item = opal_list_get_first(nodes);
|
||||
item != opal_list_get_end(nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
node = (orte_node_t*)item;
|
||||
for (m=0; m < jdata->map->nodes->size; m++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* get the number of objects - only consider those we can actually use */
|
||||
num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target,
|
||||
cache_level, OPAL_HWLOC_AVAILABLE);
|
||||
@ -333,6 +353,7 @@ static int rank_by(orte_job_t *jdata,
|
||||
"mca:rmaps:rank_by: found %d objects on node %s with %d procs",
|
||||
num_objs, node->name, (int)node->num_procs);
|
||||
if (0 == num_objs) {
|
||||
OBJ_DESTRUCT(&objs);
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
/* collect all the objects */
|
||||
@ -357,10 +378,9 @@ static int rank_by(orte_job_t *jdata,
|
||||
while (!all_done && cnt < app->num_procs) {
|
||||
all_done = true;
|
||||
/* cycle across the objects */
|
||||
for (i=0; i < num_objs && cnt < app->num_procs; i++) {
|
||||
for (i=0; i < num_objs && cnt < app->num_procs && all_done; i++) {
|
||||
obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i);
|
||||
|
||||
/* find the next proc on this object */
|
||||
/* find the next proc for this job and app_context */
|
||||
for (j=0; j < node->procs->size && cnt < app->num_procs; j++) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||
continue;
|
||||
@ -374,22 +394,32 @@ static int rank_by(orte_job_t *jdata,
|
||||
}
|
||||
/* ignore procs that are already ranked */
|
||||
if (ORTE_VPID_INVALID != proc->name.vpid) {
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rank_by skipping proc %s - already ranked, num_ranked %d",
|
||||
ORTE_NAME_PRINT(&proc->name), num_ranked);
|
||||
continue;
|
||||
}
|
||||
/* ignore procs from other apps */
|
||||
if (proc->app_idx != app->idx) {
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rank_by skipping proc %s - from another app, num_ranked %d",
|
||||
ORTE_NAME_PRINT(&proc->name), num_ranked);
|
||||
continue;
|
||||
}
|
||||
/* protect against bozo case */
|
||||
locale = NULL;
|
||||
if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) {
|
||||
continue;
|
||||
ORTE_ERROR_LOG(ORTE_ERROR);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
/* ignore procs on other objects */
|
||||
/* ignore procs not on this object */
|
||||
if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rank_by: proc at position %d is not on object %d",
|
||||
j, i);
|
||||
continue;
|
||||
}
|
||||
/* assign the vpid */
|
||||
proc->name.vpid = vpid++;
|
||||
if (0 == cnt) {
|
||||
app->first_rank = proc->name.vpid;
|
||||
@ -397,9 +427,14 @@ static int rank_by(orte_job_t *jdata,
|
||||
cnt++;
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rank_by: assigned rank %s", ORTE_VPID_PRINT(proc->name.vpid));
|
||||
/* insert the proc into the jdata array - no harm if already there */
|
||||
/* insert the proc into the jdata array */
|
||||
if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
|
||||
OBJ_RELEASE(pptr);
|
||||
}
|
||||
OBJ_RETAIN(proc);
|
||||
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&objs);
|
||||
return rc;
|
||||
}
|
||||
/* flag that one was mapped */
|
||||
@ -414,24 +449,21 @@ static int rank_by(orte_job_t *jdata,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* cleanup */
|
||||
OBJ_DESTRUCT(&objs);
|
||||
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
orte_app_context_t *app,
|
||||
opal_list_t *nodes)
|
||||
int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
|
||||
{
|
||||
orte_job_map_t *map;
|
||||
orte_app_context_t *app;
|
||||
orte_vpid_t vpid;
|
||||
int j, cnt;
|
||||
int j, m, n, cnt;
|
||||
orte_node_t *node;
|
||||
orte_proc_t *proc;
|
||||
orte_proc_t *proc, *pptr;
|
||||
int rc;
|
||||
opal_list_item_t *item;
|
||||
bool one_found;
|
||||
|
||||
map = jdata->map;
|
||||
@ -445,7 +477,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps: computing ranks by NUMA for job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_NODE, 0))) {
|
||||
if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_NODE, 0))) {
|
||||
if (ORTE_ERR_NOT_SUPPORTED == rc &&
|
||||
!(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
|
||||
ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
|
||||
@ -460,7 +492,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps: computing ranks by socket for job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_SOCKET, 0))) {
|
||||
if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_SOCKET, 0))) {
|
||||
if (ORTE_ERR_NOT_SUPPORTED == rc &&
|
||||
!(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
|
||||
ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
|
||||
@ -475,7 +507,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps: computing ranks by L3cache for job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 3))) {
|
||||
if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CACHE, 3))) {
|
||||
if (ORTE_ERR_NOT_SUPPORTED == rc &&
|
||||
!(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
|
||||
ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
|
||||
@ -490,7 +522,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps: computing ranks by L2cache for job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 2))) {
|
||||
if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CACHE, 2))) {
|
||||
if (ORTE_ERR_NOT_SUPPORTED == rc &&
|
||||
!(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
|
||||
ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
|
||||
@ -505,7 +537,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps: computing ranks by L1cache for job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 1))) {
|
||||
if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CACHE, 1))) {
|
||||
if (ORTE_ERR_NOT_SUPPORTED == rc &&
|
||||
!(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
|
||||
ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
|
||||
@ -520,7 +552,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps: computing ranks by core for job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CORE, 0))) {
|
||||
if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CORE, 0))) {
|
||||
if (ORTE_ERR_NOT_SUPPORTED == rc &&
|
||||
!(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
|
||||
ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
|
||||
@ -528,6 +560,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
}
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
opal_output(0, "DONE");
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -535,7 +568,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps: computing ranks by hwthread for job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_PU, 0))) {
|
||||
if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_PU, 0))) {
|
||||
if (ORTE_ERR_NOT_SUPPORTED == rc &&
|
||||
!(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
|
||||
ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
|
||||
@ -549,26 +582,24 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
if (ORTE_RANK_BY_NODE == ORTE_GET_RANKING_POLICY(map->ranking) ||
|
||||
ORTE_RANK_BY_BOARD == ORTE_GET_RANKING_POLICY(map->ranking)) {
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:base: computing vpids by node for job %s app %d on %d nodes",
|
||||
ORTE_JOBID_PRINT(jdata->jobid), (int)app->idx,
|
||||
(int)opal_list_get_size(nodes));
|
||||
/* bozo check */
|
||||
if (0 == opal_list_get_size(nodes)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
"mca:rmaps:base: computing vpids by node for job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
/* assign the ranks round-robin across nodes - only one board/node
|
||||
* at this time, so they are equivalent
|
||||
*/
|
||||
vpid=0;
|
||||
for (n=0; n < jdata->apps->size; n++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
|
||||
continue;
|
||||
}
|
||||
cnt=0;
|
||||
vpid=jdata->num_procs;
|
||||
one_found = true;
|
||||
while (cnt < app->num_procs && one_found) {
|
||||
one_found = false;
|
||||
for (item = opal_list_get_first(nodes);
|
||||
item != opal_list_get_end(nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
node = (orte_node_t*)item;
|
||||
for (m=0; m < jdata->map->nodes->size; m++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
|
||||
continue;
|
||||
}
|
||||
for (j=0; j < node->procs->size; j++) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||
continue;
|
||||
@ -585,7 +616,11 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
continue;
|
||||
}
|
||||
proc->name.vpid = vpid++;
|
||||
/* insert the proc into the jdata array - no harm if already there */
|
||||
/* insert the proc into the jdata array */
|
||||
if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
|
||||
OBJ_RELEASE(pptr);
|
||||
}
|
||||
OBJ_RETAIN(proc);
|
||||
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -604,6 +639,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
|
||||
return ORTE_ERR_FATAL;
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -613,11 +649,15 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:base: computing vpids by slot for job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
vpid = jdata->num_procs;
|
||||
for (item = opal_list_get_first(nodes);
|
||||
item != opal_list_get_end(nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
node = (orte_node_t*)item;
|
||||
vpid = 0;
|
||||
for (n=0; n < jdata->apps->size; n++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
|
||||
continue;
|
||||
}
|
||||
for (m=0; m < jdata->map->nodes->size; m++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (j=0; j < node->procs->size; j++) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||
@ -641,13 +681,18 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
*/
|
||||
jdata->bookmark = node;
|
||||
}
|
||||
/* insert the proc into the jdata array - no harm if already there */
|
||||
/* insert the proc into the jdata array */
|
||||
if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
|
||||
OBJ_RELEASE(pptr);
|
||||
}
|
||||
OBJ_RETAIN(proc);
|
||||
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -351,6 +351,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
||||
/* the list is empty - if the HNP is allocated, then add it */
|
||||
if (orte_hnp_is_allocated) {
|
||||
nd = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
|
||||
OBJ_RETAIN(nd);
|
||||
opal_list_append(allocated_nodes, &nd->super);
|
||||
} else {
|
||||
nd = NULL;
|
||||
@ -476,8 +477,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
||||
/* if the hnp was not allocated, or flagged not to be used,
|
||||
* then remove it here */
|
||||
if (!orte_hnp_is_allocated || (ORTE_GET_MAPPING_DIRECTIVE(policy) & ORTE_MAPPING_NO_USE_LOCAL)) {
|
||||
node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
|
||||
if (node == (orte_node_t*)item) {
|
||||
if (0 == node->index) {
|
||||
opal_list_remove_item(allocated_nodes, item);
|
||||
OBJ_RELEASE(item); /* "un-retain" it */
|
||||
item = next;
|
||||
|
@ -12,6 +12,7 @@
|
||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2017 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -55,9 +56,7 @@ ORTE_DECLSPEC orte_proc_t* orte_rmaps_base_setup_proc(orte_job_t *jdata,
|
||||
ORTE_DECLSPEC orte_node_t* orte_rmaps_base_get_starting_point(opal_list_t *node_list,
|
||||
orte_job_t *jdata);
|
||||
|
||||
ORTE_DECLSPEC int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
||||
orte_app_context_t *app,
|
||||
opal_list_t *nodes);
|
||||
ORTE_DECLSPEC int orte_rmaps_base_compute_vpids(orte_job_t *jdata);
|
||||
|
||||
ORTE_DECLSPEC int orte_rmaps_base_compute_local_ranks(orte_job_t *jdata);
|
||||
|
||||
|
@ -1,40 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
#
|
||||
# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
dist_ortedata_DATA = help-orte-rmaps-lama.txt
|
||||
|
||||
sources = \
|
||||
rmaps_lama_module.c \
|
||||
rmaps_lama_max_tree.c \
|
||||
rmaps_lama_params.c \
|
||||
rmaps_lama.h \
|
||||
rmaps_lama_component.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_rmaps_lama_DSO
|
||||
component_noinst =
|
||||
component_install = mca_rmaps_lama.la
|
||||
else
|
||||
component_noinst = libmca_rmaps_lama.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(ortelibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_rmaps_lama_la_SOURCES = $(sources)
|
||||
mca_rmaps_lama_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_rmaps_lama_la_SOURCES =$(sources)
|
||||
libmca_rmaps_lama_la_LDFLAGS = -module -avoid-version
|
@ -1,173 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
# Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||
#
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English general help file for LAMA Mapper.
|
||||
#
|
||||
[orte-rmaps-lama:multi-apps-and-zero-np]
|
||||
RMAPS found multiple applications to be launched, with at least one that failed
|
||||
to specify the number of processes to execute. When specifying multiple
|
||||
applications, you must specify how many processes of each to launch via the
|
||||
-np argument.
|
||||
#
|
||||
[orte-rmaps-lama:oversubscribe]
|
||||
RMaps LAMA detected oversubscription after mapping %d of %d processes.
|
||||
Since you have asked not to oversubscribe the resources the job will not
|
||||
be launched. If you would instead like to oversubscribe the resources
|
||||
try using the --oversubscribe option to mpirun.
|
||||
#
|
||||
[orte-rmaps-lama:no-resources-available]
|
||||
RMaps LAMA detected that there are not enough resources to map the
|
||||
remainder of the job. Check the command line options, and the number of
|
||||
nodes allocated to this job.
|
||||
Application Context : %d
|
||||
# of Processes Successfully Mapped: %d
|
||||
# of Processes Requested : %d
|
||||
Mapping : %s
|
||||
Binding : %s
|
||||
MPPR : %s
|
||||
Ordering : %s
|
||||
#
|
||||
[orte-rmaps-lama:merge-conflict-bad-prune-src]
|
||||
RMaps LAMA detected that it needed to prune a level of the hierarchy that
|
||||
was necessary for one of the command line parameters. Check your allocation
|
||||
and the options below to make sure they are correct.
|
||||
Conflicting Level Description: %s
|
||||
Mapping : %s
|
||||
Binding : %s
|
||||
MPPR : %s
|
||||
Ordering : %s
|
||||
#
|
||||
[invalid mapping option]
|
||||
The specified mapping option is not supported with the LAMA rmaps
|
||||
mapper:
|
||||
|
||||
Specified mapping option: %s
|
||||
Reason it is invalid: %s
|
||||
|
||||
LAMA supports the following options to the mpirun --map-by option:
|
||||
|
||||
node, numa, socket, l1cache, l2cache, l3cache, core, hwthread, slot
|
||||
|
||||
Alternatively, LAMA supports specifying a sequence of letters in the
|
||||
rmaps_lama_map MCA parameter; each letter indicates a "direction" for
|
||||
mapping. The rmaps_lama_map MCA parameter is richer/more flexible
|
||||
than the --may-by CLI option. If rmaps_lama_map is specified, the
|
||||
following letters must be specified:
|
||||
|
||||
h: hardware thread
|
||||
c: processor core
|
||||
s: processor socket
|
||||
n: node (server)
|
||||
|
||||
The following may also optionally be included in the mapping string:
|
||||
|
||||
N: NUMA node
|
||||
L1: L1 cache
|
||||
L2: L2 cache
|
||||
L3: L3 cache
|
||||
|
||||
For example, the two commands below are equivalent:
|
||||
|
||||
mpirun --mca rmaps lama --mca rmaps_lama_map csNh ...
|
||||
mpirun --mca rmaps lama --map-by core ...
|
||||
#
|
||||
[invalid binding option]
|
||||
The specified binding option is not supported with the LAMA rmaps
|
||||
mapper:
|
||||
|
||||
Specified binding option: %s
|
||||
Reason it is invalid: %s
|
||||
|
||||
LAMA binding options can be specified via the mpirun --bind-to command
|
||||
line option or rmaps_lama_bind MCA param:
|
||||
|
||||
--bind-to rmaps_lama_binding
|
||||
Locality option option
|
||||
---------------- --------- ------------------
|
||||
Hardware thread hwthread h
|
||||
Processor core core c
|
||||
Processor socket socket s
|
||||
NUMA node numa N
|
||||
L1 cache l1cache L1
|
||||
L2 cache l2cache L2
|
||||
L3 cache l3cache L3
|
||||
Node (server) node n
|
||||
|
||||
The --bind-to option assumes a single locality (e.g., bind each MPI
|
||||
process to a single core, socket, etc.). The rmaps_lama_bind MCA
|
||||
param requires an integer specifying how many localities to which to
|
||||
bind. For example, the following two command lines are equivalent,
|
||||
and bind each MPI process to a single core:
|
||||
|
||||
mpirun --btl rmaps lama --mca rmaps_lama_bind 1c ...
|
||||
mpirun --btl rmaps lama --bind-to core ...
|
||||
|
||||
The rmaps_lama_bind MCA parameter is more flexible than the --bind-to
|
||||
CLI option, because it allows binding to multiple resources. For
|
||||
example, specifing an rmaps_lama_bind value of "2c" binds each MPI
|
||||
process to two cores.
|
||||
#
|
||||
[invalid ordering option]
|
||||
The specified ordering option is not supported.
|
||||
|
||||
Specified ordering option: %s
|
||||
|
||||
The LAMA ordering can be specified via the rmaps_lama_ordering MCA
|
||||
parameter.
|
||||
|
||||
Two options are supported for ordering ranks in MPI_COMM_WORLD (MCW):
|
||||
|
||||
s: Sequential. MCW rank ordering is sequential by hardware thread
|
||||
across all nodes. E.g., MCW rank 0 is the first process on node
|
||||
0; MCW rank 1 is the second process on node 0, and so on.
|
||||
n: Natural. MCW rank ordering follows the "natural" mapping layout.
|
||||
For example, in a by-socket layout, MCW rank 0 is the first
|
||||
process on the 1st socket on node 0. MCW rank 1 is then the
|
||||
first process on the 2nd socket on node 0. And so on.
|
||||
#
|
||||
[invalid mppr option]
|
||||
The specified Max Processes Per Resource (MPPR) value is invalid (in
|
||||
the rmaps_lama_mppr MCA paramter):
|
||||
|
||||
Specified MPPR: %s
|
||||
Reason is is invalid: %s
|
||||
|
||||
The MPPR is a comma-delimited list of specifications indicating how
|
||||
many processes are allowed on a given type of resource before an MPI
|
||||
job is considered to have oversubscribed that resource. Each
|
||||
specification is a token in the format of "NUMBER:RESOURCE". For
|
||||
example, the default MPPR of "1:c" means that Open MPI will map one
|
||||
process per processor core before considering cores to be
|
||||
oversubscribed.
|
||||
|
||||
Multiple specifications may be useful; for example "1:c,2:s" maintains
|
||||
the default one-process-per-core limitation, but places an additional
|
||||
limitation of only two processes per processor socket (assuming that
|
||||
there are more than two cores per socket).
|
||||
|
||||
The LAMA MPPR specifications are set via the rmaps_lama_mppr MCA
|
||||
parameter. The following resources can be specified:
|
||||
|
||||
Hardware thread h
|
||||
Processor core c
|
||||
Processor socket s
|
||||
NUMA node N
|
||||
L1 cache L1
|
||||
L2 cache L2
|
||||
L3 cache L3
|
||||
Node (server) n
|
||||
#
|
||||
[internal error]
|
||||
An unexpected internal error occurred in the LAMA mapper; your job
|
||||
will now fail. Sorry.
|
||||
|
||||
File: %s
|
||||
Message: %s
|
@ -1,7 +0,0 @@
|
||||
#
|
||||
# owner/status file
|
||||
# owner: institution that is responsible for this package
|
||||
# status: e.g. active, maintenance, unmaintained
|
||||
#
|
||||
owner: CISCO
|
||||
status: maintenance
|
@ -1,177 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2013-2017 Cisco Systems, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* Resource Mapping
|
||||
*/
|
||||
#ifndef ORTE_RMAPS_LAMA_H
|
||||
#define ORTE_RMAPS_LAMA_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/class/opal_tree.h"
|
||||
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_lama_component;
|
||||
|
||||
extern orte_rmaps_base_module_t orte_rmaps_lama_module;
|
||||
|
||||
|
||||
/*********************************
|
||||
* Structures & Defines
|
||||
*********************************/
|
||||
/*
|
||||
* JJH: Can we reuse the opal_hwloc_level_t data structure in
|
||||
* opal/mca/hwloc/hwloc-internal.h
|
||||
*/
|
||||
typedef enum {
|
||||
LAMA_LEVEL_MACHINE = 0,
|
||||
LAMA_LEVEL_BOARD = 1,
|
||||
LAMA_LEVEL_NUMA = 2,
|
||||
LAMA_LEVEL_SOCKET = 3,
|
||||
LAMA_LEVEL_CACHE_L3 = 4,
|
||||
LAMA_LEVEL_CACHE_L2 = 5,
|
||||
LAMA_LEVEL_CACHE_L1 = 6,
|
||||
LAMA_LEVEL_CORE = 7,
|
||||
LAMA_LEVEL_PU = 8,
|
||||
LAMA_LEVEL_UNKNOWN = 9
|
||||
} rmaps_lama_level_type_t;
|
||||
|
||||
typedef enum {
|
||||
LAMA_ORDER_NATURAL = 0,
|
||||
LAMA_ORDER_SEQ = 1
|
||||
} rmaps_lama_order_type_t;
|
||||
|
||||
struct rmaps_lama_level_info_t {
|
||||
rmaps_lama_level_type_t type;
|
||||
int max_resources;
|
||||
};
|
||||
typedef struct rmaps_lama_level_info_t rmaps_lama_level_info_t;
|
||||
|
||||
/*
|
||||
* Structure to attach to the hwloc tree
|
||||
* Accounting for mppr
|
||||
*/
|
||||
struct rmaps_lama_hwloc_user_t {
|
||||
opal_object_t super;
|
||||
|
||||
opal_pointer_array_t *node_mppr;
|
||||
};
|
||||
typedef struct rmaps_lama_hwloc_user_t rmaps_lama_hwloc_user_t;
|
||||
OBJ_CLASS_DECLARATION(rmaps_lama_hwloc_user_t);
|
||||
|
||||
struct rmaps_lama_node_mppr_t {
|
||||
int max;
|
||||
int cur;
|
||||
};
|
||||
typedef struct rmaps_lama_node_mppr_t rmaps_lama_node_mppr_t;
|
||||
|
||||
rmaps_lama_level_type_t lama_type_str_to_enum(char *param);
|
||||
char * lama_type_enum_to_str(rmaps_lama_level_type_t param);
|
||||
|
||||
|
||||
/*********************************
|
||||
* Command Line Interface Parsing
|
||||
*********************************/
|
||||
/*
|
||||
* User defined command line interface (CLI) arguments
|
||||
*/
|
||||
extern char * rmaps_lama_cmd_map;
|
||||
extern char * rmaps_lama_cmd_bind;
|
||||
extern char * rmaps_lama_cmd_mppr;
|
||||
extern char * rmaps_lama_cmd_ordering;
|
||||
extern bool rmaps_lama_timing_enabled;
|
||||
extern bool rmaps_lama_can_oversubscribe;
|
||||
extern bool rmaps_lama_am_oversubscribing;
|
||||
|
||||
/*
|
||||
* Internal representations of command line arguments
|
||||
*/
|
||||
extern int lama_mapping_num_layouts;
|
||||
extern rmaps_lama_level_type_t *lama_mapping_layout;
|
||||
|
||||
extern rmaps_lama_level_type_t lama_binding_level;
|
||||
|
||||
extern rmaps_lama_level_info_t *lama_mppr_levels;
|
||||
extern int lama_mppr_num_levels;
|
||||
|
||||
/*
|
||||
* Homogeneous system optimization
|
||||
*/
|
||||
extern bool lama_mppr_max_tree_homogeneous_system;
|
||||
|
||||
/*
|
||||
* Maximum length of digits in CLI
|
||||
*/
|
||||
#define MAX_BIND_DIGIT_LEN 4
|
||||
|
||||
int rmaps_lama_process_alias_params(orte_job_t *jdata);
|
||||
|
||||
int rmaps_lama_parse_mapping(char *layout,
|
||||
rmaps_lama_level_type_t **layout_types,
|
||||
rmaps_lama_level_type_t **layout_types_sorted,
|
||||
int *num_types);
|
||||
int rmaps_lama_parse_binding(char *layout,
|
||||
rmaps_lama_level_type_t *binding_level,
|
||||
int *num_types);
|
||||
int rmaps_lama_parse_mppr(char *layout,
|
||||
rmaps_lama_level_info_t **mppr_levels,
|
||||
int *num_types);
|
||||
int rmaps_lama_parse_ordering(char *layout,
|
||||
rmaps_lama_order_type_t *order);
|
||||
|
||||
bool rmaps_lama_ok_to_prune_level(rmaps_lama_level_type_t level);
|
||||
|
||||
/*********************************
|
||||
* Max Tree Structure
|
||||
*********************************/
|
||||
struct rmaps_lama_max_tree_item_t {
|
||||
opal_tree_item_t tree_element;
|
||||
|
||||
rmaps_lama_level_type_t type;
|
||||
};
|
||||
typedef struct rmaps_lama_max_tree_item_t rmaps_lama_max_tree_item_t;
|
||||
|
||||
|
||||
/*
|
||||
* Union all topologies into the max tree
|
||||
*/
|
||||
int rmaps_lama_build_max_tree(orte_job_t *jdata, opal_list_t *node_list,
|
||||
opal_tree_t * max_tree, bool *is_homogeneous);
|
||||
|
||||
/*
|
||||
* Find a matching subtree
|
||||
*/
|
||||
hwloc_obj_t * rmaps_lama_find_nth_subtree_match(hwloc_topology_t hwloc_topo,
|
||||
hwloc_obj_t parent_obj,
|
||||
int nth,
|
||||
rmaps_lama_level_type_t lama_key);
|
||||
hwloc_obj_t * rmaps_lama_find_parent(hwloc_topology_t hwloc_topo,
|
||||
hwloc_obj_t *child_obj,
|
||||
rmaps_lama_level_type_t lama_key);
|
||||
|
||||
/*
|
||||
* Create Empty Tree
|
||||
*/
|
||||
opal_tree_t * rmaps_lama_create_empty_max_tree(void);
|
||||
|
||||
/*
|
||||
* Pretty Print
|
||||
*/
|
||||
void rmaps_lama_max_tree_pretty_print_tree(opal_tree_t *tree);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* ORTE_RMAPS_LAMA_H */
|
@ -1,136 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
|
||||
#include "rmaps_lama.h"
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
|
||||
static int orte_rmaps_lama_register(void);
|
||||
static int orte_rmaps_lama_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
static int module_priority;
|
||||
|
||||
char * rmaps_lama_cmd_map = NULL;
|
||||
char * rmaps_lama_cmd_bind = NULL;
|
||||
char * rmaps_lama_cmd_mppr = NULL;
|
||||
char * rmaps_lama_cmd_ordering = NULL;
|
||||
bool rmaps_lama_timing_enabled = false;
|
||||
bool rmaps_lama_can_oversubscribe = false;
|
||||
bool rmaps_lama_am_oversubscribing = false;
|
||||
|
||||
orte_rmaps_base_component_t mca_rmaps_lama_component = {
|
||||
.base_version = {
|
||||
ORTE_RMAPS_BASE_VERSION_2_0_0,
|
||||
|
||||
.mca_component_name = "lama",
|
||||
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION),
|
||||
.mca_query_component = orte_rmaps_lama_query,
|
||||
.mca_register_component_params = orte_rmaps_lama_register,
|
||||
},
|
||||
.base_data = {
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
static int orte_rmaps_lama_register(void)
|
||||
{
|
||||
mca_base_component_t *c = &mca_rmaps_lama_component.base_version;
|
||||
|
||||
/* JMS Artifically low for now */
|
||||
module_priority = 0;
|
||||
(void) mca_base_component_var_register (c, "priority", "Priority of the LAMA rmaps component",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&module_priority);
|
||||
|
||||
rmaps_lama_timing_enabled = false;
|
||||
(void) mca_base_component_var_register (c, "timing",
|
||||
"Enable timing information. [Default = disabled]",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&rmaps_lama_timing_enabled);
|
||||
|
||||
rmaps_lama_cmd_map = NULL;
|
||||
(void) mca_base_component_var_register (c, "map", "LAMA Map: Process layout iteration ordering (See documentation)",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&rmaps_lama_cmd_map);
|
||||
|
||||
rmaps_lama_cmd_bind = NULL;
|
||||
(void) mca_base_component_var_register (c, "bind", "LAMA Bind: Bind to the specified number of resources (See documentation)",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&rmaps_lama_cmd_bind);
|
||||
|
||||
rmaps_lama_cmd_mppr = NULL;
|
||||
(void) mca_base_component_var_register (c, "mppr", "LAMA MPPR: Maximum number of the specified resources available (See documentation)",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&rmaps_lama_cmd_mppr);
|
||||
|
||||
rmaps_lama_cmd_ordering = NULL;
|
||||
(void) mca_base_component_var_register (c, "ordering", "LAMA Ordering: Ordering (s) sequential, (n) natural - Default: n (See documentation)",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&rmaps_lama_cmd_ordering);
|
||||
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:lama: Priority %3d",
|
||||
module_priority);
|
||||
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:lama: Map : %s",
|
||||
(NULL == rmaps_lama_cmd_map) ? "NULL" : rmaps_lama_cmd_map);
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:lama: Bind : %s",
|
||||
(NULL == rmaps_lama_cmd_bind) ? "NULL" : rmaps_lama_cmd_bind);
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:lama: MPPR : %s",
|
||||
(NULL == rmaps_lama_cmd_mppr) ? "NULL" : rmaps_lama_cmd_mppr);
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:lama: Order : %s",
|
||||
(NULL == rmaps_lama_cmd_ordering) ? "NULL" : rmaps_lama_cmd_ordering);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int orte_rmaps_lama_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* Only run on the HNP */
|
||||
|
||||
*priority = module_priority;
|
||||
*module = (mca_base_module_t *)&orte_rmaps_lama_module;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -1,878 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* Processing for command line interface options
|
||||
*
|
||||
*/
|
||||
#include "rmaps_lama.h"
|
||||
|
||||
#include "opal/util/argv.h"
|
||||
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
/*********************************
|
||||
* Local Functions
|
||||
*********************************/
|
||||
/*
|
||||
* QSort: Integer comparison
|
||||
*/
|
||||
static int lama_parse_int_sort(const void *a, const void *b);
|
||||
|
||||
/*
|
||||
* Convert the '-ppr' syntax from the 'ppr' component to the 'lama' '-mppr' syntax.
|
||||
*/
|
||||
static char * rmaps_lama_covert_ppr(char * given_ppr);
|
||||
|
||||
/*********************************
|
||||
* Parsing Functions
|
||||
*********************************/
|
||||
int rmaps_lama_process_alias_params(orte_job_t *jdata)
|
||||
{
|
||||
int exit_status = ORTE_SUCCESS;
|
||||
|
||||
/*
|
||||
* Mapping options
|
||||
* Note: L1, L2, L3 are not exposed in orterun to the user, so
|
||||
* there is no need to specify them here.
|
||||
*/
|
||||
if( NULL == rmaps_lama_cmd_map ) {
|
||||
/* orte_rmaps_base.mapping */
|
||||
switch( ORTE_GET_MAPPING_POLICY(jdata->map->mapping) ) {
|
||||
case ORTE_MAPPING_BYNODE:
|
||||
/* rmaps_lama_cmd_map = strdup("nbNsL3L2L1ch"); */
|
||||
rmaps_lama_cmd_map = strdup("nbsch");
|
||||
break;
|
||||
case ORTE_MAPPING_BYBOARD:
|
||||
/* rmaps_lama_cmd_map = strdup("bnNsL3L2L1ch"); */
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mapping option",
|
||||
true,
|
||||
"by board", "mapping by board not supported by LAMA");
|
||||
exit_status = ORTE_ERR_NOT_SUPPORTED;
|
||||
goto cleanup;
|
||||
break;
|
||||
case ORTE_MAPPING_BYNUMA:
|
||||
/* rmaps_lama_cmd_map = strdup("NbnsL3L2L1ch"); */
|
||||
rmaps_lama_cmd_map = strdup("Nbnsch");
|
||||
break;
|
||||
case ORTE_MAPPING_BYSOCKET:
|
||||
/* rmaps_lama_cmd_map = strdup("sNbnL3L2L1ch"); */
|
||||
rmaps_lama_cmd_map = strdup("sbnch");
|
||||
break;
|
||||
case ORTE_MAPPING_BYL3CACHE:
|
||||
rmaps_lama_cmd_map = strdup("L3sNbnL2L1ch");
|
||||
break;
|
||||
case ORTE_MAPPING_BYL2CACHE:
|
||||
rmaps_lama_cmd_map = strdup("L2sNbnL1ch");
|
||||
break;
|
||||
case ORTE_MAPPING_BYL1CACHE:
|
||||
rmaps_lama_cmd_map = strdup("L1sNbnch");
|
||||
break;
|
||||
case ORTE_MAPPING_BYCORE:
|
||||
case ORTE_MAPPING_BYSLOT:
|
||||
/* rmaps_lama_cmd_map = strdup("cL1L2L3sNbnh"); */
|
||||
rmaps_lama_cmd_map = strdup("csbnh");
|
||||
break;
|
||||
case ORTE_MAPPING_BYHWTHREAD:
|
||||
/* rmaps_lama_cmd_map = strdup("hcL1L2L3sNbn"); */
|
||||
rmaps_lama_cmd_map = strdup("hcsbn");
|
||||
break;
|
||||
case ORTE_MAPPING_RR:
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mapping option",
|
||||
true,
|
||||
"round robin", "mapping by round robin not supported by LAMA");
|
||||
exit_status = ORTE_ERR_NOT_SUPPORTED;
|
||||
goto cleanup;
|
||||
case ORTE_MAPPING_SEQ:
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mapping option",
|
||||
true,
|
||||
"sequential", "mapping by sequential not supported by LAMA");
|
||||
exit_status = ORTE_ERR_NOT_SUPPORTED;
|
||||
goto cleanup;
|
||||
case ORTE_MAPPING_BYUSER:
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mapping option",
|
||||
true,
|
||||
"by user", "mapping by user not supported by LAMA");
|
||||
exit_status = ORTE_ERR_NOT_SUPPORTED;
|
||||
goto cleanup;
|
||||
default:
|
||||
/*
|
||||
* Default is map-by core
|
||||
*/
|
||||
rmaps_lama_cmd_map = strdup("cL1L2L3sNbnh");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Binding Options
|
||||
*/
|
||||
if( NULL == rmaps_lama_cmd_bind ) {
|
||||
/*
|
||||
* No binding specified, use default
|
||||
*/
|
||||
if( !OPAL_BINDING_POLICY_IS_SET(jdata->map->binding) ||
|
||||
!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy) ||
|
||||
OPAL_BIND_TO_NONE == OPAL_GET_BINDING_POLICY(jdata->map->binding) ) {
|
||||
rmaps_lama_cmd_bind = NULL;
|
||||
}
|
||||
|
||||
switch( OPAL_GET_BINDING_POLICY(jdata->map->binding) ) {
|
||||
case OPAL_BIND_TO_BOARD:
|
||||
/* rmaps_lama_cmd_bind = strdup("1b"); */
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid binding option",
|
||||
true,
|
||||
"by board", "binding to board not supported by LAMA");
|
||||
exit_status = ORTE_ERR_NOT_SUPPORTED;
|
||||
goto cleanup;
|
||||
break;
|
||||
case OPAL_BIND_TO_NUMA:
|
||||
rmaps_lama_cmd_bind = strdup("1N");
|
||||
break;
|
||||
case OPAL_BIND_TO_SOCKET:
|
||||
rmaps_lama_cmd_bind = strdup("1s");
|
||||
break;
|
||||
case OPAL_BIND_TO_L3CACHE:
|
||||
rmaps_lama_cmd_bind = strdup("1L3");
|
||||
break;
|
||||
case OPAL_BIND_TO_L2CACHE:
|
||||
rmaps_lama_cmd_bind = strdup("1L2");
|
||||
break;
|
||||
case OPAL_BIND_TO_L1CACHE:
|
||||
rmaps_lama_cmd_bind = strdup("1L1");
|
||||
break;
|
||||
case OPAL_BIND_TO_CORE:
|
||||
rmaps_lama_cmd_bind = strdup("1c");
|
||||
break;
|
||||
case OPAL_BIND_TO_HWTHREAD:
|
||||
rmaps_lama_cmd_bind = strdup("1h");
|
||||
break;
|
||||
case OPAL_BIND_TO_CPUSET:
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid binding option",
|
||||
true,
|
||||
"by CPU set", "binding to CPU set not supported by LAMA");
|
||||
exit_status = ORTE_ERR_NOT_SUPPORTED;
|
||||
goto cleanup;
|
||||
break;
|
||||
default:
|
||||
rmaps_lama_cmd_bind = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Ordering (a.k.a. Ranking) Options
|
||||
*/
|
||||
if( NULL == rmaps_lama_cmd_ordering ) {
|
||||
/* orte_rmaps_base.ranking */
|
||||
switch( ORTE_GET_RANKING_POLICY(jdata->map->ranking) ) {
|
||||
case ORTE_RANK_BY_SLOT:
|
||||
rmaps_lama_cmd_ordering = strdup("s");
|
||||
break;
|
||||
case ORTE_RANK_BY_NODE:
|
||||
case ORTE_RANK_BY_NUMA:
|
||||
case ORTE_RANK_BY_SOCKET:
|
||||
case ORTE_RANK_BY_L3CACHE:
|
||||
case ORTE_RANK_BY_L2CACHE:
|
||||
case ORTE_RANK_BY_L1CACHE:
|
||||
case ORTE_RANK_BY_CORE:
|
||||
case ORTE_RANK_BY_HWTHREAD:
|
||||
rmaps_lama_cmd_ordering = strdup("n");
|
||||
break;
|
||||
case ORTE_RANK_BY_BOARD:
|
||||
/* rmaps_lama_cmd_ordering = strdup("n"); */
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid ordering option",
|
||||
true,
|
||||
"by board", "ordering by board not supported by LAMA");
|
||||
exit_status = ORTE_ERR_NOT_SUPPORTED;
|
||||
goto cleanup;
|
||||
break;
|
||||
default:
|
||||
rmaps_lama_cmd_ordering = strdup("n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* MPPR
|
||||
*/
|
||||
if( NULL == rmaps_lama_cmd_mppr ) {
|
||||
/*
|
||||
* The ppr is given in the map
|
||||
*/
|
||||
if( NULL != jdata->map->ppr) {
|
||||
rmaps_lama_cmd_mppr = rmaps_lama_covert_ppr(jdata->map->ppr);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Oversubscription
|
||||
*/
|
||||
if( ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping) ) {
|
||||
rmaps_lama_can_oversubscribe = false;
|
||||
}
|
||||
else {
|
||||
rmaps_lama_can_oversubscribe = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Display revised values
|
||||
*/
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:lama: Revised Parameters -----");
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:lama: Map : %s",
|
||||
rmaps_lama_cmd_map);
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:lama: Bind : %s",
|
||||
rmaps_lama_cmd_bind);
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:lama: MPPR : %s",
|
||||
rmaps_lama_cmd_mppr);
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:lama: Order : %s",
|
||||
rmaps_lama_cmd_ordering);
|
||||
|
||||
cleanup:
|
||||
return exit_status;
|
||||
}
|
||||
|
||||
static char * rmaps_lama_covert_ppr(char * given_ppr)
|
||||
{
|
||||
return strdup(given_ppr);
|
||||
}
|
||||
|
||||
int rmaps_lama_parse_mapping(char *layout,
|
||||
rmaps_lama_level_type_t **layout_types,
|
||||
rmaps_lama_level_type_t **layout_types_sorted,
|
||||
int *num_types)
|
||||
{
|
||||
int exit_status = ORTE_SUCCESS;
|
||||
char param[3];
|
||||
int i, j, len;
|
||||
bool found_req_param_n = false;
|
||||
bool found_req_param_h = false;
|
||||
bool found_req_param_bind = false;
|
||||
|
||||
/*
|
||||
* Sanity Check:
|
||||
* There is no default layout, so if we get here and nothing is specified
|
||||
* then this is an error.
|
||||
*/
|
||||
if( NULL == layout ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"internal error",
|
||||
true,
|
||||
"rmaps_lama_parse_mapping",
|
||||
"internal error 1");
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
*num_types = 0;
|
||||
|
||||
/*
|
||||
* Extract and convert all the keys
|
||||
*/
|
||||
len = strlen(layout);
|
||||
for(i = 0; i < len; ++i) {
|
||||
/*
|
||||
* L1 : L1 Cache
|
||||
* L2 : L2 Cache
|
||||
* L3 : L3 Cache
|
||||
*/
|
||||
if( layout[i] == 'L' ) {
|
||||
param[0] = layout[i];
|
||||
++i;
|
||||
/*
|
||||
* Check for 2 characters
|
||||
*/
|
||||
if( i >= len ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mapping option",
|
||||
true,
|
||||
layout, "cache level missing number");
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
param[1] = layout[i];
|
||||
param[2] = '\0';
|
||||
}
|
||||
/*
|
||||
* n : Machine
|
||||
* b : Board
|
||||
* s : Socket
|
||||
* c : Core
|
||||
* h : Hardware Thread
|
||||
* N : NUMA Node
|
||||
*/
|
||||
else {
|
||||
param[0] = layout[i];
|
||||
param[1] = '\0';
|
||||
}
|
||||
|
||||
/*
|
||||
* Append level
|
||||
*/
|
||||
*num_types += 1;
|
||||
*layout_types = (rmaps_lama_level_type_t*)realloc(*layout_types, sizeof(rmaps_lama_level_type_t) * (*num_types));
|
||||
(*layout_types)[(*num_types)-1] = lama_type_str_to_enum(param);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for duplicates and unknowns
|
||||
* Copy to sorted list
|
||||
*/
|
||||
*layout_types_sorted = (rmaps_lama_level_type_t*)malloc(sizeof(rmaps_lama_level_type_t) * (*num_types));
|
||||
for( i = 0; i < *num_types; ++i ) {
|
||||
/*
|
||||
* Copy for later sorting
|
||||
*/
|
||||
(*layout_types_sorted)[i] = (*layout_types)[i];
|
||||
|
||||
/*
|
||||
* Look for unknown and unsupported options
|
||||
*/
|
||||
if( LAMA_LEVEL_UNKNOWN <= (*layout_types)[i] ) {
|
||||
char *msg;
|
||||
asprintf(&msg, "unknown mapping level at position %d", i + 1);
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mapping option",
|
||||
true,
|
||||
layout, msg);
|
||||
free(msg);
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if( LAMA_LEVEL_MACHINE == (*layout_types)[i] ) {
|
||||
found_req_param_n = true;
|
||||
}
|
||||
|
||||
if( LAMA_LEVEL_PU == (*layout_types)[i] ) {
|
||||
found_req_param_h = true;
|
||||
}
|
||||
|
||||
if( lama_binding_level == (*layout_types)[i] ) {
|
||||
found_req_param_bind = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look for duplicates
|
||||
*/
|
||||
for( j = i+1; j < *num_types; ++j ) {
|
||||
if( (*layout_types)[i] == (*layout_types)[j] ) {
|
||||
char *msg;
|
||||
asprintf(&msg, "duplicate mapping levels at position %d and %d",
|
||||
i + 1, j + 1);
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mapping option",
|
||||
true,
|
||||
layout, msg);
|
||||
free(msg);
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The user is required to specify at least the:
|
||||
* - machine
|
||||
* - hardware thread (needed for lower bound binding) JJH: We should be able to lift this...
|
||||
* - binding layer (need it to stride the mapping)
|
||||
* Only print the error message once, for brevity.
|
||||
*/
|
||||
if( !found_req_param_n ) {
|
||||
char *msg;
|
||||
asprintf(&msg, "missing required 'n' mapping token");
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mapping option",
|
||||
true,
|
||||
layout, msg);
|
||||
free(msg);
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
else if(!found_req_param_h) {
|
||||
char *msg;
|
||||
asprintf(&msg, "missing required 'h' mapping token");
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mapping option",
|
||||
true,
|
||||
layout, msg);
|
||||
free(msg);
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
} else if (!found_req_param_bind) {
|
||||
char *msg;
|
||||
asprintf(&msg, "missing required mapping token for the current binding level");
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mapping option",
|
||||
true,
|
||||
layout, msg);
|
||||
free(msg);
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sort the items
|
||||
*/
|
||||
qsort((*layout_types_sorted ), (*num_types), sizeof(int), lama_parse_int_sort);
|
||||
|
||||
cleanup:
|
||||
return exit_status;
|
||||
}
|
||||
|
||||
int rmaps_lama_parse_binding(char *layout, rmaps_lama_level_type_t *binding_level, int *num_types)
|
||||
{
|
||||
int exit_status = ORTE_SUCCESS;
|
||||
char param[3];
|
||||
char num[MAX_BIND_DIGIT_LEN];
|
||||
int i, n, p, len;
|
||||
|
||||
/*
|
||||
* Default: If nothing specified
|
||||
* - Bind to machine
|
||||
*/
|
||||
if( NULL == layout ) {
|
||||
*binding_level = LAMA_LEVEL_MACHINE;
|
||||
*num_types = 1;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
*num_types = 0;
|
||||
|
||||
/*
|
||||
* Extract and convert all the keys
|
||||
*/
|
||||
len = strlen(layout);
|
||||
n = 0;
|
||||
p = 0;
|
||||
for(i = 0; i < len; ++i) {
|
||||
/*
|
||||
* Must start with a digit
|
||||
*/
|
||||
if( isdigit(layout[i]) ) {
|
||||
/*
|
||||
* Check: Digits must come first
|
||||
*/
|
||||
if( p != 0 ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid binding option",
|
||||
true,
|
||||
layout, "missing digit(s) before binding level token");
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
num[n] = layout[i];
|
||||
++n;
|
||||
/*
|
||||
* Check: Exceed bound of number of digits
|
||||
*/
|
||||
if( n >= MAX_BIND_DIGIT_LEN ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid binding option",
|
||||
true,
|
||||
layout, "too many digits");
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Extract the level
|
||||
*/
|
||||
else {
|
||||
/*
|
||||
* Check: Digits must come first
|
||||
*/
|
||||
if( n == 0 ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid binding option",
|
||||
true,
|
||||
layout, "missing digit(s) before binding level token");
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
/*
|
||||
* Check: Only one level allowed
|
||||
*/
|
||||
if( p != 0 ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid binding option",
|
||||
true,
|
||||
layout, "only one binding level may be specified");
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/*
|
||||
* L1 : L1 Cache
|
||||
* L2 : L2 Cache
|
||||
* L3 : L3 Cache
|
||||
*/
|
||||
if( layout[i] == 'L' ) {
|
||||
param[0] = layout[i];
|
||||
++i;
|
||||
/*
|
||||
* Check for 2 characters
|
||||
*/
|
||||
if( i >= len ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid binding option",
|
||||
true,
|
||||
layout, "only one binding level may be specified");
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
param[1] = layout[i];
|
||||
p = 2;
|
||||
}
|
||||
/*
|
||||
* n : Machine
|
||||
* b : Board
|
||||
* s : Socket
|
||||
* c : Core
|
||||
* h : Hardware Thread
|
||||
* N : NUMA Node
|
||||
*/
|
||||
else {
|
||||
param[0] = layout[i];
|
||||
p = 1;
|
||||
}
|
||||
param[p] = '\0';
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Check that the level was specified
|
||||
*/
|
||||
if( p == 0 ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid binding option",
|
||||
true,
|
||||
layout, "binding specification is empty");
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
num[n] = '\0';
|
||||
|
||||
*binding_level = lama_type_str_to_enum(param);
|
||||
*num_types = atoi(num);
|
||||
|
||||
/*
|
||||
* Check for unknown level
|
||||
*/
|
||||
if( LAMA_LEVEL_UNKNOWN <= *binding_level ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid binding option",
|
||||
true,
|
||||
layout, "unknown binding level");
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
return exit_status;
|
||||
}
|
||||
|
||||
int rmaps_lama_parse_mppr(char *layout, rmaps_lama_level_info_t **mppr_levels, int *num_types)
|
||||
{
|
||||
int exit_status = ORTE_SUCCESS;
|
||||
char param[3];
|
||||
char num[MAX_BIND_DIGIT_LEN];
|
||||
char **argv = NULL;
|
||||
int argc = 0;
|
||||
int i, j, len;
|
||||
int p, n;
|
||||
|
||||
/*
|
||||
* Default: Unrestricted allocation
|
||||
* 'oversubscribe' flag accounted for elsewhere
|
||||
*/
|
||||
if( NULL == layout ) {
|
||||
*mppr_levels = NULL;
|
||||
*num_types = 0;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
*num_types = 0;
|
||||
|
||||
/*
|
||||
* Split by ','
|
||||
* <#:level>,<#:level>,...
|
||||
*/
|
||||
argv = opal_argv_split(layout, ',');
|
||||
argc = opal_argv_count(argv);
|
||||
for(j = 0; j < argc; ++j) {
|
||||
/*
|
||||
* Parse <#:level>
|
||||
*/
|
||||
len = strlen(argv[j]);
|
||||
n = 0;
|
||||
p = 0;
|
||||
for(i = 0; i < len; ++i) {
|
||||
/*
|
||||
* Skip the ':' separator and whitespace
|
||||
*/
|
||||
if( argv[j][i] == ':' || isblank(argv[j][i])) {
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Must start with a digit
|
||||
*/
|
||||
else if( isdigit(argv[j][i]) ) {
|
||||
/*
|
||||
* Check: Digits must come first
|
||||
*/
|
||||
if( p != 0 ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mppr option",
|
||||
true,
|
||||
layout, "missing digit(s) before resource specification");
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
num[n] = argv[j][i];
|
||||
++n;
|
||||
/*
|
||||
* Check: Exceed bound of number of digits
|
||||
*/
|
||||
if( n >= MAX_BIND_DIGIT_LEN ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mppr option",
|
||||
true,
|
||||
layout, "too many digits");
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Extract the level
|
||||
*/
|
||||
else {
|
||||
/*
|
||||
* Check: Digits must come first
|
||||
*/
|
||||
if( n == 0 ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mppr option",
|
||||
true,
|
||||
layout, "missing digit(s) before resource specification");
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
/*
|
||||
* Check: Only one level allowed
|
||||
*/
|
||||
if( p != 0 ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mppr option",
|
||||
true,
|
||||
layout, "only one resource type may be listed per specification");
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/*
|
||||
* L1 : L1 Cache
|
||||
* L2 : L2 Cache
|
||||
* L3 : L3 Cache
|
||||
*/
|
||||
if( argv[j][i] == 'L' ) {
|
||||
param[0] = argv[j][i];
|
||||
++i;
|
||||
/*
|
||||
* Check for 2 characters
|
||||
*/
|
||||
if( i >= len ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mppr option",
|
||||
true,
|
||||
layout, "cache level missing number");
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
param[1] = argv[j][i];
|
||||
p = 2;
|
||||
}
|
||||
/*
|
||||
* n : Machine
|
||||
* b : Board
|
||||
* s : Socket
|
||||
* c : Core
|
||||
* h : Hardware Thread
|
||||
* N : NUMA Node
|
||||
*/
|
||||
else {
|
||||
param[0] = argv[j][i];
|
||||
p = 1;
|
||||
}
|
||||
param[p] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Whitespace, just skip
|
||||
*/
|
||||
if( n == 0 && p == 0 ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that the level was specified
|
||||
*/
|
||||
if( p == 0 ) {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mppr option",
|
||||
true,
|
||||
layout, "resource type not specified");
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
num[n] = '\0';
|
||||
|
||||
/*
|
||||
* Append level
|
||||
*/
|
||||
*num_types += 1;
|
||||
*mppr_levels = (rmaps_lama_level_info_t*)realloc(*mppr_levels, sizeof(rmaps_lama_level_info_t) * (*num_types));
|
||||
(*mppr_levels)[(*num_types)-1].type = lama_type_str_to_enum(param);
|
||||
(*mppr_levels)[(*num_types)-1].max_resources = atoi(num);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for duplicates and unknowns
|
||||
*/
|
||||
for( i = 0; i < *num_types; ++i ) {
|
||||
/*
|
||||
* Look for unknown and unsupported options
|
||||
*/
|
||||
if( LAMA_LEVEL_UNKNOWN <= (*mppr_levels)[i].type ) {
|
||||
char *msg;
|
||||
asprintf(&msg, "unknown resource type at position %d", i + 1);
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mppr option",
|
||||
true,
|
||||
layout, msg);
|
||||
free(msg);
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look for duplicates
|
||||
*/
|
||||
for( j = i+1; j < *num_types; ++j ) {
|
||||
if( (*mppr_levels)[i].type == (*mppr_levels)[j].type ) {
|
||||
char *msg;
|
||||
asprintf(&msg, "duplicate resource tpyes at position %d and %d",
|
||||
i + 1, j + 1);
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid mppr option",
|
||||
true,
|
||||
layout, msg);
|
||||
free(msg);
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
if( NULL != argv ) {
|
||||
opal_argv_free(argv);
|
||||
argv = NULL;
|
||||
}
|
||||
|
||||
return exit_status;
|
||||
}
|
||||
|
||||
int rmaps_lama_parse_ordering(char *layout,
|
||||
rmaps_lama_order_type_t *order)
|
||||
{
|
||||
/*
|
||||
* Default: Natural ordering
|
||||
*/
|
||||
if( NULL == layout ) {
|
||||
*order = LAMA_ORDER_NATURAL;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sequential Ordering
|
||||
*/
|
||||
if( 0 == strncmp(layout, "s", strlen("s")) ||
|
||||
0 == strncmp(layout, "S", strlen("S")) ) {
|
||||
*order = LAMA_ORDER_SEQ;
|
||||
}
|
||||
/*
|
||||
* Natural Ordering
|
||||
*/
|
||||
else if( 0 == strncmp(layout, "n", strlen("n")) ||
|
||||
0 == strncmp(layout, "N", strlen("N")) ) {
|
||||
*order = LAMA_ORDER_NATURAL;
|
||||
}
|
||||
/*
|
||||
* Check for unknown options
|
||||
*/
|
||||
else {
|
||||
orte_show_help("help-orte-rmaps-lama.txt",
|
||||
"invalid ordering option",
|
||||
true,
|
||||
"unsupported ordering option", layout);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
bool rmaps_lama_ok_to_prune_level(rmaps_lama_level_type_t level)
|
||||
{
|
||||
int i;
|
||||
|
||||
for( i = 0; i < lama_mapping_num_layouts; ++i ) {
|
||||
if( level == lama_mapping_layout[i] ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*********************************
|
||||
* Support Functions
|
||||
*********************************/
|
||||
static int lama_parse_int_sort(const void *a, const void *b) {
|
||||
int left = *((int*)a);
|
||||
int right = *((int*)b);
|
||||
|
||||
if( left < right ) {
|
||||
return -1;
|
||||
}
|
||||
else if( left > right ) {
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
}
|
@ -45,7 +45,7 @@
|
||||
static int mindist_map(orte_job_t *jdata);
|
||||
|
||||
orte_rmaps_base_module_t orte_rmaps_mindist_module = {
|
||||
mindist_map
|
||||
.map_job = mindist_map
|
||||
};
|
||||
|
||||
/*
|
||||
@ -391,15 +391,6 @@ static int mindist_map(orte_job_t *jdata)
|
||||
}
|
||||
}
|
||||
|
||||
/* compute vpids and add proc objects to the job - do this after
|
||||
* each app_context so that the ranks within each context are
|
||||
* contiguous
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* track the total number of processes we mapped - must update
|
||||
* this value AFTER we compute vpids so that computation
|
||||
* is done correctly
|
||||
@ -415,6 +406,17 @@ static int mindist_map(orte_job_t *jdata)
|
||||
OBJ_DESTRUCT(&node_list);
|
||||
}
|
||||
free(orte_rmaps_base.device);
|
||||
/* compute vpids and add proc objects to the job - do this after
|
||||
* each app_context so that the ranks within each context are
|
||||
* contiguous
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* mark the job as fully described */
|
||||
orte_set_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
error:
|
||||
@ -425,3 +427,96 @@ error:
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int assign_locations(orte_job_t *jdata)
|
||||
{
|
||||
int j, k, m, n, npus;
|
||||
orte_app_context_t *app;
|
||||
orte_node_t *node;
|
||||
orte_proc_t *proc;
|
||||
hwloc_obj_t obj=NULL;
|
||||
mca_base_component_t *c = &mca_rmaps_mindist_component.base_version;
|
||||
int rc;
|
||||
opal_list_t numa_list;
|
||||
opal_rmaps_numa_node_t *numa;
|
||||
|
||||
if (NULL == jdata->map->last_mapper||
|
||||
0 != strcasecmp(jdata->map->last_mapper, c->mca_component_name)) {
|
||||
/* the mapper should have been set to me */
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:mindist: job %s not using mindist mapper",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:mindist: assign locations for job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
|
||||
/* start assigning procs to objects, filling each object as we go until
|
||||
* all procs are assigned. If one pass doesn't catch all the required procs,
|
||||
* then loop thru the list again to handle the oversubscription
|
||||
*/
|
||||
for (n=0; n < jdata->apps->size; n++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
|
||||
continue;
|
||||
}
|
||||
for (m=0; m < jdata->map->nodes->size; m++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
|
||||
continue;
|
||||
}
|
||||
if (NULL == node->topology || NULL == node->topology->topo) {
|
||||
orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing",
|
||||
true, node->name);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
|
||||
/* first we need to fill summary object for root with information about nodes
|
||||
* so we call opal_hwloc_base_get_nbobjs_by_type */
|
||||
opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_NODE, 0, OPAL_HWLOC_AVAILABLE);
|
||||
OBJ_CONSTRUCT(&numa_list, opal_list_t);
|
||||
rc = opal_hwloc_get_sorted_numa_list(node->topology->topo, orte_rmaps_base.device, &numa_list);
|
||||
if (rc > 1) {
|
||||
orte_show_help("help-orte-rmaps-md.txt", "orte-rmaps-mindist:several-devices",
|
||||
true, orte_rmaps_base.device, rc, node->name);
|
||||
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
|
||||
OPAL_LIST_DESTRUCT(&numa_list);
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
} else if (rc < 0) {
|
||||
orte_show_help("help-orte-rmaps-md.txt", "orte-rmaps-mindist:device-not-found",
|
||||
true, orte_rmaps_base.device, node->name);
|
||||
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
|
||||
OPAL_LIST_DESTRUCT(&numa_list);
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
j = 0;
|
||||
OPAL_LIST_FOREACH(numa, &numa_list, opal_rmaps_numa_node_t) {
|
||||
/* get the hwloc object for this numa */
|
||||
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, HWLOC_OBJ_NODE, 0, numa->index, OPAL_HWLOC_AVAILABLE))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
OPAL_LIST_DESTRUCT(&numa_list);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
npus = opal_hwloc_base_get_npus(node->topology->topo, obj);
|
||||
/* fill the numa region with procs from this job until we either
|
||||
* have assigned everyone or the region is full */
|
||||
for (k = j; k < node->procs->size && 0 < npus; k++) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, k))) {
|
||||
continue;
|
||||
}
|
||||
if (proc->name.jobid != jdata->jobid) {
|
||||
continue;
|
||||
}
|
||||
orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR);
|
||||
++j;
|
||||
--npus;
|
||||
}
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&numa_list);
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
@ -33,9 +33,11 @@
|
||||
#include "rmaps_ppr.h"
|
||||
|
||||
static int ppr_mapper(orte_job_t *jdata);
|
||||
static int assign_locations(orte_job_t *jdata);
|
||||
|
||||
orte_rmaps_base_module_t orte_rmaps_ppr_module = {
|
||||
ppr_mapper
|
||||
.map_job = ppr_mapper,
|
||||
.assign_locations = assign_locations
|
||||
};
|
||||
|
||||
/* RHC: will eventually remove this
|
||||
@ -391,11 +393,6 @@ static int ppr_mapper(orte_job_t *jdata)
|
||||
rc = ORTE_ERR_SILENT;
|
||||
goto error;
|
||||
}
|
||||
/* compute vpids and add proc objects to the job */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* track the total number of processes we mapped - must update
|
||||
* this AFTER we compute vpids so that computation is done
|
||||
@ -623,3 +620,122 @@ static void prune(orte_jobid_t jobid,
|
||||
error:
|
||||
opal_output(0, "INFINITE LOOP");
|
||||
}
|
||||
|
||||
static int assign_locations(orte_job_t *jdata)
|
||||
{
|
||||
int i, j, m, n;
|
||||
mca_base_component_t *c=&mca_rmaps_ppr_component.base_version;
|
||||
orte_node_t *node;
|
||||
orte_proc_t *proc;
|
||||
orte_app_context_t *app;
|
||||
opal_hwloc_level_t level;
|
||||
hwloc_obj_t obj;
|
||||
unsigned int cache_level=0;
|
||||
int ppr, cnt, nobjs, nprocs_mapped;
|
||||
char **ppr_req, **ck;
|
||||
|
||||
if (NULL == jdata->map->last_mapper ||
|
||||
0 != strcasecmp(jdata->map->last_mapper, c->mca_component_name)) {
|
||||
/* a mapper has been specified, and it isn't me */
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:ppr: job %s not using ppr assign: %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid),
|
||||
(NULL == jdata->map->last_mapper) ? "NULL" : jdata->map->last_mapper);
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:ppr: assigning locations for job %s with ppr %s policy %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid), jdata->map->ppr,
|
||||
orte_rmaps_base_print_mapping(jdata->map->mapping));
|
||||
|
||||
/* pickup the object level */
|
||||
if (ORTE_MAPPING_BYNODE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
level = OPAL_HWLOC_NODE_LEVEL;
|
||||
} else if (ORTE_MAPPING_BYHWTHREAD == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
level = OPAL_HWLOC_HWTHREAD_LEVEL;
|
||||
} else if (ORTE_MAPPING_BYCORE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
level = OPAL_HWLOC_CORE_LEVEL;
|
||||
} else if (ORTE_MAPPING_BYSOCKET == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
level = OPAL_HWLOC_SOCKET_LEVEL;
|
||||
} else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
level = OPAL_HWLOC_L1CACHE_LEVEL;
|
||||
cache_level = 1;
|
||||
} else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
level = OPAL_HWLOC_L2CACHE_LEVEL;
|
||||
cache_level = 2;
|
||||
} else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
level = OPAL_HWLOC_L3CACHE_LEVEL;
|
||||
cache_level = 3;
|
||||
} else if (ORTE_MAPPING_BYNUMA == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
level = OPAL_HWLOC_NUMA_LEVEL;
|
||||
} else {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
/* get the ppr value */
|
||||
ppr_req = opal_argv_split(jdata->map->ppr, ',');
|
||||
ck = opal_argv_split(ppr_req[0], ':');
|
||||
ppr = strtol(ck[0], NULL, 10);
|
||||
opal_argv_free(ck);
|
||||
opal_argv_free(ppr_req);
|
||||
|
||||
/* start assigning procs to objects, filling each object as we go until
|
||||
* all procs are assigned. */
|
||||
for (n=0; n < jdata->apps->size; n++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
|
||||
continue;
|
||||
}
|
||||
nprocs_mapped = 0;
|
||||
for (m=0; m < jdata->map->nodes->size; m++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
|
||||
continue;
|
||||
}
|
||||
if (NULL == node->topology || NULL == node->topology->topo) {
|
||||
orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing",
|
||||
true, node->name);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
if (OPAL_HWLOC_NODE_LEVEL == level) {
|
||||
obj = hwloc_get_root_obj(node->topology->topo);
|
||||
for (j=0; j < node->procs->size; j++) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||
continue;
|
||||
}
|
||||
if (proc->name.jobid != jdata->jobid) {
|
||||
continue;
|
||||
}
|
||||
orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR);
|
||||
}
|
||||
} else {
|
||||
/* get the number of resources on this node at this level */
|
||||
nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo,
|
||||
level, cache_level,
|
||||
OPAL_HWLOC_AVAILABLE);
|
||||
|
||||
/* map the specified number of procs to each such resource on this node,
|
||||
* recording the locale of each proc so we know its cpuset
|
||||
*/
|
||||
cnt = 0;
|
||||
for (i=0; i < nobjs; i++) {
|
||||
obj = opal_hwloc_base_get_obj_by_type(node->topology->topo,
|
||||
level, cache_level,
|
||||
i, OPAL_HWLOC_AVAILABLE);
|
||||
for (j=0; j < node->procs->size && cnt < ppr && nprocs_mapped < app->num_procs; j++) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||
continue;
|
||||
}
|
||||
if (proc->name.jobid != jdata->jobid) {
|
||||
continue;
|
||||
}
|
||||
nprocs_mapped++;
|
||||
cnt++;
|
||||
orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -51,6 +51,13 @@
|
||||
#include "orte/mca/rmaps/rank_file/rmaps_rank_file_lex.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
static int orte_rmaps_rf_map(orte_job_t *jdata);
|
||||
|
||||
orte_rmaps_base_module_t orte_rmaps_rank_file_module = {
|
||||
.map_job = orte_rmaps_rf_map
|
||||
};
|
||||
|
||||
|
||||
static int orte_rmaps_rank_file_parse(const char *);
|
||||
static char *orte_rmaps_rank_file_parse_string_or_int(void);
|
||||
static const char *orte_rmaps_rank_file_name_cur = NULL;
|
||||
@ -363,6 +370,9 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
||||
}
|
||||
}
|
||||
OBJ_DESTRUCT(&rankmap);
|
||||
/* mark the job as fully described */
|
||||
orte_set_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
|
||||
|
||||
return rc;
|
||||
|
||||
error:
|
||||
@ -371,11 +381,6 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
||||
return rc;
|
||||
}
|
||||
|
||||
orte_rmaps_base_module_t orte_rmaps_rank_file_module = {
|
||||
orte_rmaps_rf_map
|
||||
};
|
||||
|
||||
|
||||
static int orte_rmaps_rank_file_parse(const char *rankfile)
|
||||
{
|
||||
int token;
|
||||
|
@ -5,7 +5,7 @@
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -36,6 +36,14 @@
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
#include "rmaps_resilient.h"
|
||||
|
||||
static int orte_rmaps_resilient_map(orte_job_t *jdata);
|
||||
static int resilient_assign(orte_job_t *jdata);
|
||||
|
||||
orte_rmaps_base_module_t orte_rmaps_resilient_module = {
|
||||
.map_job = orte_rmaps_resilient_map,
|
||||
.assign_locations = resilient_assign
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Local variable
|
||||
@ -270,9 +278,22 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
|
||||
return rc;
|
||||
}
|
||||
|
||||
orte_rmaps_base_module_t orte_rmaps_resilient_module = {
|
||||
orte_rmaps_resilient_map
|
||||
};
|
||||
static int resilient_assign(orte_job_t *jdata)
|
||||
{
|
||||
mca_base_component_t *c = &mca_rmaps_resilient_component.super.base_version;
|
||||
|
||||
if (NULL == jdata->map->last_mapper ||
|
||||
0 != strcasecmp(jdata->map->last_mapper, c->mca_component_name)) {
|
||||
/* a mapper has been specified, and it isn't me */
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:resilient: job %s not using resilient assign: %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid),
|
||||
(NULL == jdata->map->last_mapper) ? "NULL" : jdata->map->last_mapper);
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
return ORTE_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
static char *orte_getline(FILE *fp)
|
||||
{
|
||||
@ -855,15 +876,6 @@ static int map_to_ftgrps(orte_job_t *jdata)
|
||||
/* track number of procs */
|
||||
jdata->num_procs += app->num_procs;
|
||||
|
||||
/* compute vpids and add proc objects to the job - this has to be
|
||||
* done after each app_context is mapped in order to keep the
|
||||
* vpids contiguous within an app_context
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* cleanup the node list - it can differ from one app_context
|
||||
* to another, so we have to get it every time
|
||||
*/
|
||||
@ -873,11 +885,5 @@ static int map_to_ftgrps(orte_job_t *jdata)
|
||||
OBJ_DESTRUCT(&node_list);
|
||||
}
|
||||
|
||||
/* compute and save local ranks */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -60,23 +60,30 @@ BEGIN_C_DECLS
|
||||
* rmaps module functions
|
||||
*/
|
||||
|
||||
/* mapping event - the event one activates to schedule mapping
|
||||
* of procs to nodes for pending jobs
|
||||
*/
|
||||
ORTE_DECLSPEC extern opal_event_t orte_mapping_event;
|
||||
|
||||
/**
|
||||
* RMAPS module functions - these are not accessible to the outside world,
|
||||
* but are defined here by convention
|
||||
*/
|
||||
|
||||
/* map a job - used by the HNP to compute the #procs on each node.
|
||||
* This is passed to the backend daemons as a regex which they
|
||||
* use to create an orte_job_map_t for the job */
|
||||
typedef int (*orte_rmaps_base_module_map_fn_t)(orte_job_t *jdata);
|
||||
|
||||
/* assign a location to each process. Used by the backend daemons,
|
||||
* this function takes the orte_job_map_t created from the regex
|
||||
* and assigns each process to a specific location within the
|
||||
* hardware topology based on the --map-by directive */
|
||||
typedef int (*orte_rmaps_base_module_assign_loc_fn_t)(orte_job_t *jdata);
|
||||
|
||||
/*
|
||||
* rmaps module version 3.0.0
|
||||
*/
|
||||
struct orte_rmaps_base_module_3_0_0_t {
|
||||
/** Mapping function pointer */
|
||||
orte_rmaps_base_module_map_fn_t map_job;
|
||||
/* assign locations */
|
||||
orte_rmaps_base_module_assign_loc_fn_t assign_locations;
|
||||
};
|
||||
/** Convenience typedef */
|
||||
typedef struct orte_rmaps_base_module_3_0_0_t orte_rmaps_base_module_3_0_0_t;
|
||||
|
@ -10,6 +10,7 @@
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2017 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -23,7 +24,8 @@ sources = \
|
||||
rmaps_rr.c \
|
||||
rmaps_rr.h \
|
||||
rmaps_rr_component.c \
|
||||
rmaps_rr_mappers.c
|
||||
rmaps_rr_mappers.c \
|
||||
rmaps_rr_assign.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
|
@ -243,15 +243,6 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* compute vpids and add proc objects to the job - do this after
|
||||
* each app_context so that the ranks within each context are
|
||||
* contiguous
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* track the total number of processes we mapped - must update
|
||||
* this value AFTER we compute vpids so that computation
|
||||
* is done correctly
|
||||
@ -278,6 +269,113 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int orte_rmaps_rr_assign_locations(orte_job_t *jdata)
|
||||
{
|
||||
mca_base_component_t *c = &mca_rmaps_round_robin_component.base_version;
|
||||
int rc;
|
||||
|
||||
if (NULL == jdata->map->last_mapper ||
|
||||
0 != strcasecmp(jdata->map->last_mapper, c->mca_component_name)) {
|
||||
/* a mapper has been specified, and it isn't me */
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr: job %s not using rr mapper",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr: assign locations for job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
|
||||
/* if the mapping directive was byslot or bynode, then we
|
||||
* assign locations to the root object level */
|
||||
if (ORTE_MAPPING_BYNODE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping) ||
|
||||
ORTE_MAPPING_BYSLOT == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
return orte_rmaps_rr_assign_root_level(jdata);
|
||||
}
|
||||
|
||||
/* otherwise, assign by object */
|
||||
if (ORTE_MAPPING_BYHWTHREAD == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_PU, 0);
|
||||
if (ORTE_ERR_NOT_FOUND == rc) {
|
||||
/* if the mapper couldn't assign by this object because
|
||||
* it isn't available, but the error allows us to try
|
||||
* byslot, then do so
|
||||
*/
|
||||
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
|
||||
rc = orte_rmaps_rr_assign_root_level(jdata);
|
||||
}
|
||||
} else if (ORTE_MAPPING_BYCORE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CORE, 0);
|
||||
if (ORTE_ERR_NOT_FOUND == rc) {
|
||||
/* if the mapper couldn't map by this object because
|
||||
* it isn't available, but the error allows us to try
|
||||
* byslot, then do so
|
||||
*/
|
||||
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
|
||||
rc = orte_rmaps_rr_assign_root_level(jdata);
|
||||
}
|
||||
} else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CACHE, 1);
|
||||
if (ORTE_ERR_NOT_FOUND == rc) {
|
||||
/* if the mapper couldn't map by this object because
|
||||
* it isn't available, but the error allows us to try
|
||||
* byslot, then do so
|
||||
*/
|
||||
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
|
||||
rc = orte_rmaps_rr_assign_root_level(jdata);
|
||||
}
|
||||
} else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CACHE, 2);
|
||||
if (ORTE_ERR_NOT_FOUND == rc) {
|
||||
/* if the mapper couldn't map by this object because
|
||||
* it isn't available, but the error allows us to try
|
||||
* byslot, then do so
|
||||
*/
|
||||
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
|
||||
rc = orte_rmaps_rr_assign_root_level(jdata);
|
||||
}
|
||||
} else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CACHE, 3);
|
||||
if (ORTE_ERR_NOT_FOUND == rc) {
|
||||
/* if the mapper couldn't map by this object because
|
||||
* it isn't available, but the error allows us to try
|
||||
* byslot, then do so
|
||||
*/
|
||||
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
|
||||
rc = orte_rmaps_rr_assign_root_level(jdata);
|
||||
}
|
||||
} else if (ORTE_MAPPING_BYSOCKET == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_SOCKET, 0);
|
||||
if (ORTE_ERR_NOT_FOUND == rc) {
|
||||
/* if the mapper couldn't map by this object because
|
||||
* it isn't available, but the error allows us to try
|
||||
* byslot, then do so
|
||||
*/
|
||||
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
|
||||
rc = orte_rmaps_rr_assign_root_level(jdata);
|
||||
}
|
||||
} else if (ORTE_MAPPING_BYNUMA == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_NODE, 0);
|
||||
if (ORTE_ERR_NOT_FOUND == rc) {
|
||||
/* if the mapper couldn't map by this object because
|
||||
* it isn't available, but the error allows us to try
|
||||
* byslot, then do so
|
||||
*/
|
||||
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
|
||||
rc = orte_rmaps_rr_assign_root_level(jdata);
|
||||
}
|
||||
} else {
|
||||
/* unrecognized mapping directive */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy",
|
||||
true, "mapping",
|
||||
orte_rmaps_base_print_mapping(jdata->map->mapping));
|
||||
rc = ORTE_ERR_SILENT;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
orte_rmaps_base_module_t orte_rmaps_round_robin_module = {
|
||||
orte_rmaps_rr_map
|
||||
.map_job = orte_rmaps_rr_map,
|
||||
.assign_locations = orte_rmaps_rr_assign_locations
|
||||
};
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 Cisco Systems, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -54,6 +54,13 @@ ORTE_MODULE_DECLSPEC int orte_rmaps_rr_byobj(orte_job_t *jdata, orte_app_context
|
||||
orte_vpid_t num_procs,
|
||||
hwloc_obj_type_t target, unsigned cache_level);
|
||||
|
||||
ORTE_MODULE_DECLSPEC int orte_rmaps_rr_assign_root_level(orte_job_t *jdata);
|
||||
|
||||
ORTE_MODULE_DECLSPEC int orte_rmaps_rr_assign_byobj(orte_job_t *jdata,
|
||||
hwloc_obj_type_t target,
|
||||
unsigned cache_level);
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
171
orte/mca/rmaps/round_robin/rmaps_rr_assign.c
Обычный файл
171
orte/mca/rmaps/round_robin/rmaps_rr_assign.c
Обычный файл
@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/hwloc/base/base.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
#include "rmaps_rr.h"
|
||||
|
||||
int orte_rmaps_rr_assign_root_level(orte_job_t *jdata)
|
||||
{
|
||||
int i, m;
|
||||
orte_node_t *node;
|
||||
orte_proc_t *proc;
|
||||
hwloc_obj_t obj=NULL;
|
||||
|
||||
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr: assigning procs to root level for job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
|
||||
for (m=0; m < jdata->map->nodes->size; m++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
|
||||
continue;
|
||||
}
|
||||
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr:slot working node %s",
|
||||
node->name);
|
||||
/* get the root object as we are not assigning
|
||||
* locale here except at the node level */
|
||||
if (NULL == node->topology || NULL == node->topology->topo) {
|
||||
/* nothing we can do */
|
||||
continue;
|
||||
}
|
||||
obj = hwloc_get_root_obj(node->topology->topo);
|
||||
for (i=0; i < node->procs->size; i++) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
|
||||
continue;
|
||||
}
|
||||
/* ignore procs from other jobs */
|
||||
if (proc->name.jobid != jdata->jobid) {
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr:assign skipping proc %s - from another job",
|
||||
ORTE_NAME_PRINT(&proc->name));
|
||||
continue;
|
||||
}
|
||||
orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR);
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* mapping by hwloc object looks a lot like mapping by node,
|
||||
* but has the added complication of possibly having different
|
||||
* numbers of objects on each node
|
||||
*/
|
||||
int orte_rmaps_rr_assign_byobj(orte_job_t *jdata,
|
||||
hwloc_obj_type_t target,
|
||||
unsigned cache_level)
|
||||
{
|
||||
int start, j, m, n;
|
||||
orte_app_context_t *app;
|
||||
orte_node_t *node;
|
||||
orte_proc_t *proc;
|
||||
hwloc_obj_t obj=NULL;
|
||||
unsigned int nobjs;
|
||||
|
||||
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr: assigning locations by %s for job %s",
|
||||
hwloc_obj_type_string(target),
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
|
||||
|
||||
/* start mapping procs onto objects, filling each object as we go until
|
||||
* all procs are mapped. If one pass doesn't catch all the required procs,
|
||||
* then loop thru the list again to handle the oversubscription
|
||||
*/
|
||||
for (n=0; n < jdata->apps->size; n++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
|
||||
continue;
|
||||
}
|
||||
for (m=0; m < jdata->map->nodes->size; m++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
|
||||
continue;
|
||||
}
|
||||
if (NULL == node->topology || NULL == node->topology->topo) {
|
||||
orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing",
|
||||
true, node->name);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* get the number of objects of this type on this node */
|
||||
nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, cache_level, OPAL_HWLOC_AVAILABLE);
|
||||
if (0 == nobjs) {
|
||||
continue;
|
||||
}
|
||||
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr: found %u %s objects on node %s",
|
||||
nobjs, hwloc_obj_type_string(target), node->name);
|
||||
|
||||
/* if this is a comm_spawn situation, start with the object
|
||||
* where the parent left off and increment */
|
||||
if (ORTE_JOBID_INVALID != jdata->originator.jobid) {
|
||||
start = (jdata->bkmark_obj + 1) % nobjs;
|
||||
} else {
|
||||
start = 0;
|
||||
}
|
||||
/* loop over the procs on this node */
|
||||
for (j=0; j < node->procs->size; j++) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||
continue;
|
||||
}
|
||||
/* ignore procs from other jobs */
|
||||
if (proc->name.jobid != jdata->jobid) {
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr:assign skipping proc %s - from another job",
|
||||
ORTE_NAME_PRINT(&proc->name));
|
||||
continue;
|
||||
}
|
||||
/* ignore procs from other apps */
|
||||
if (proc->app_idx != app->idx) {
|
||||
continue;
|
||||
}
|
||||
opal_output_verbose(20, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rr: assigning proc to object %d", (j + start) % nobjs);
|
||||
/* get the hwloc object */
|
||||
if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, cache_level, (j + start) % nobjs, OPAL_HWLOC_AVAILABLE))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
if (orte_rmaps_base.cpus_per_rank > (int)opal_hwloc_base_get_npus(node->topology->topo, obj)) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low", true,
|
||||
orte_rmaps_base.cpus_per_rank, opal_hwloc_base_get_npus(node->topology->topo, obj),
|
||||
orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -54,7 +54,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata);
|
||||
|
||||
/* define the module */
|
||||
orte_rmaps_base_module_t orte_rmaps_seq_module = {
|
||||
orte_rmaps_seq_map
|
||||
.map_job = orte_rmaps_seq_map
|
||||
};
|
||||
|
||||
/* local object for tracking rank locations */
|
||||
@ -517,6 +517,10 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
|
||||
}
|
||||
}
|
||||
|
||||
/* mark that this job is to be fully
|
||||
* described in the launch msg */
|
||||
orte_set_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
error:
|
||||
|
@ -899,8 +899,6 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata)
|
||||
opal_pointer_array_set_item(map->nodes, index, NULL);
|
||||
/* maintain accounting */
|
||||
OBJ_RELEASE(node);
|
||||
/* flag that the node is no longer in a map */
|
||||
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
|
||||
}
|
||||
OBJ_RELEASE(map);
|
||||
jdata->map = NULL;
|
||||
|
@ -255,7 +255,7 @@ static void vm_ready(int fd, short args, void *cbdata)
|
||||
/* if we couldn't provide the allocation regex on the orted
|
||||
* cmd line, then we need to provide all the info here */
|
||||
if (!orte_nidmap_communicated) {
|
||||
if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(&nidmap))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, &nidmap))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
return;
|
||||
|
@ -73,6 +73,8 @@ static orte_job_state_t launch_states[] = {
|
||||
ORTE_JOB_STATE_DAEMONS_LAUNCHED,
|
||||
ORTE_JOB_STATE_DAEMONS_REPORTED,
|
||||
ORTE_JOB_STATE_VM_READY,
|
||||
ORTE_JOB_STATE_MAP,
|
||||
ORTE_JOB_STATE_MAP_COMPLETE,
|
||||
ORTE_JOB_STATE_SYSTEM_PREP,
|
||||
ORTE_JOB_STATE_LAUNCH_APPS,
|
||||
ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE,
|
||||
@ -91,6 +93,8 @@ static orte_state_cbfunc_t launch_callbacks[] = {
|
||||
orte_plm_base_daemons_launched,
|
||||
orte_plm_base_daemons_reported,
|
||||
orte_plm_base_vm_ready,
|
||||
orte_rmaps_base_map_job,
|
||||
orte_plm_base_mapping_complete,
|
||||
orte_plm_base_complete_setup,
|
||||
orte_plm_base_launch_apps,
|
||||
orte_state_base_local_launch_complete,
|
||||
|
@ -61,6 +61,7 @@ orte_state_base_module_t orte_state_novm_module = {
|
||||
};
|
||||
|
||||
static void allocation_complete(int fd, short args, void *cbdata);
|
||||
static void map_complete(int fd, short args, void *cbdata);
|
||||
static void vm_ready(int fd, short args, void *cbdata);
|
||||
|
||||
/* defined state machine sequence for no VM - individual
|
||||
@ -74,6 +75,8 @@ static orte_job_state_t launch_states[] = {
|
||||
ORTE_JOB_STATE_DAEMONS_LAUNCHED,
|
||||
ORTE_JOB_STATE_DAEMONS_REPORTED,
|
||||
ORTE_JOB_STATE_VM_READY,
|
||||
ORTE_JOB_STATE_MAP,
|
||||
ORTE_JOB_STATE_MAP_COMPLETE,
|
||||
ORTE_JOB_STATE_SYSTEM_PREP,
|
||||
ORTE_JOB_STATE_LAUNCH_APPS,
|
||||
ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE,
|
||||
@ -93,6 +96,8 @@ static orte_state_cbfunc_t launch_callbacks[] = {
|
||||
orte_plm_base_daemons_launched,
|
||||
orte_plm_base_daemons_reported,
|
||||
vm_ready,
|
||||
orte_rmaps_base_map_job,
|
||||
map_complete,
|
||||
orte_plm_base_complete_setup,
|
||||
orte_plm_base_launch_apps,
|
||||
orte_state_base_local_launch_complete,
|
||||
@ -195,7 +200,7 @@ static void allocation_complete(int fd, short args, void *cbdata)
|
||||
orte_job_t *daemons;
|
||||
orte_topology_t *t;
|
||||
orte_node_t *node;
|
||||
int i, rc;
|
||||
int i;
|
||||
|
||||
jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
|
||||
|
||||
@ -235,21 +240,27 @@ static void allocation_complete(int fd, short args, void *cbdata)
|
||||
}
|
||||
}
|
||||
|
||||
/* perform the map */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_map_job(jdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* after we map, we are ready to launch the daemons */
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS);
|
||||
/* move to the map stage */
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
|
||||
|
||||
done:
|
||||
/* cleanup */
|
||||
OBJ_RELEASE(state);
|
||||
}
|
||||
|
||||
/* after we map, we are ready to launch the daemons */
|
||||
static void map_complete(int fd, short args, void *cbdata)
|
||||
{
|
||||
orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
|
||||
orte_job_t *jdata = state->jdata;
|
||||
|
||||
jdata->state = ORTE_JOB_STATE_MAP_COMPLETE;
|
||||
/* move to the map stage */
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS);
|
||||
|
||||
/* cleanup */
|
||||
OBJ_RELEASE(state);
|
||||
}
|
||||
|
||||
static void vm_ready(int fd, short args, void *cbdata)
|
||||
{
|
||||
|
@ -99,6 +99,10 @@ int pmix_server_publish_fn(opal_process_name_t *proc,
|
||||
opal_pmix_persistence_t persist = OPAL_PMIX_PERSIST_APP;
|
||||
bool rset, pset;
|
||||
|
||||
opal_output_verbose(1, orte_pmix_server_globals.output,
|
||||
"%s orted:pmix:server PUBLISH",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* create the caddy */
|
||||
req = OBJ_NEW(pmix_server_req_t);
|
||||
(void)asprintf(&req->operation, "PUBLISH: %s:%d", __FILE__, __LINE__);
|
||||
@ -259,6 +263,10 @@ int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys,
|
||||
|
||||
/* pack the keys too */
|
||||
for (i=0; i < nkeys; i++) {
|
||||
opal_output_verbose(5, orte_pmix_server_globals.output,
|
||||
"%s lookup data %s for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), keys[i],
|
||||
ORTE_NAME_PRINT(proc));
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &keys[i], 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(req);
|
||||
|
@ -89,6 +89,53 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the attributes that need to be sent */
|
||||
count = 0;
|
||||
OPAL_LIST_FOREACH(kv, &jobs[i]->attributes, orte_attribute_t) {
|
||||
if (ORTE_ATTR_GLOBAL == kv->local) {
|
||||
++count;
|
||||
}
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&count), 1, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
OPAL_LIST_FOREACH(kv, &jobs[i]->attributes, orte_attribute_t) {
|
||||
if (ORTE_ATTR_GLOBAL == kv->local) {
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)&kv, 1, ORTE_ATTRIBUTE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* check for job info attribute */
|
||||
cache = NULL;
|
||||
if (orte_get_attribute(&jobs[i]->attributes, ORTE_JOB_INFO_CACHE, (void**)&cache, OPAL_PTR) &&
|
||||
NULL != cache) {
|
||||
/* we need to pack these as well, but they are composed
|
||||
* of opal_value_t's on a list. So first pack the number
|
||||
* of list elements */
|
||||
count = opal_list_get_size(cache);
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&count), 1, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* now pack each element on the list */
|
||||
OPAL_LIST_FOREACH(val, cache, opal_value_t) {
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)&val, 1, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* pack a zero to indicate no job info is being passed */
|
||||
count = 0;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&count), 1, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* pack the personality */
|
||||
count = opal_argv_count(jobs[i]->personality);
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &count, 1, OPAL_INT32))) {
|
||||
@ -134,7 +181,10 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (orte_no_vm && 0 < jobs[i]->num_procs) {
|
||||
if (0 < jobs[i]->num_procs) {
|
||||
/* check attributes to see if this job is to be fully
|
||||
* described in the launch msg */
|
||||
if (orte_get_attribute(&jobs[i]->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
|
||||
for (j=0; j < jobs[i]->procs->size; j++) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jobs[i]->procs, j))) {
|
||||
continue;
|
||||
@ -145,6 +195,7 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* pack the stdin target */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
|
||||
@ -198,53 +249,6 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the attributes that need to be sent */
|
||||
count = 0;
|
||||
OPAL_LIST_FOREACH(kv, &jobs[i]->attributes, orte_attribute_t) {
|
||||
if (ORTE_ATTR_GLOBAL == kv->local) {
|
||||
++count;
|
||||
}
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&count), 1, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
OPAL_LIST_FOREACH(kv, &jobs[i]->attributes, orte_attribute_t) {
|
||||
if (ORTE_ATTR_GLOBAL == kv->local) {
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)&kv, 1, ORTE_ATTRIBUTE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* check for job info attribute */
|
||||
cache = NULL;
|
||||
if (orte_get_attribute(&jobs[i]->attributes, ORTE_JOB_INFO_CACHE, (void**)&cache, OPAL_PTR) &&
|
||||
NULL != cache) {
|
||||
/* we need to pack these as well, but they are composed
|
||||
* of opal_value_t's on a list. So first pack the number
|
||||
* of list elements */
|
||||
count = opal_list_get_size(cache);
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&count), 1, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* now pack each element on the list */
|
||||
OPAL_LIST_FOREACH(val, cache, opal_value_t) {
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)&val, 1, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* pack a zero to indicate no job info is being passed */
|
||||
count = 0;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&count), 1, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -594,7 +598,11 @@ int orte_dt_pack_map(opal_buffer_t *buffer, const void *src,
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the last mapper */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->last_mapper), 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* pack the policies */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->mapping), 1, ORTE_MAPPING_POLICY))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -95,6 +95,44 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* unpack the attributes */
|
||||
n=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count,
|
||||
&n, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
for (k=0; k < count; k++) {
|
||||
n=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &kv,
|
||||
&n, ORTE_ATTRIBUTE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
kv->local = ORTE_ATTR_GLOBAL; // obviously not a local value
|
||||
opal_list_append(&jobs[i]->attributes, &kv->super);
|
||||
}
|
||||
/* unpack any job info */
|
||||
n=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count,
|
||||
&n, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
if (0 < count){
|
||||
cache = OBJ_NEW(opal_list_t);
|
||||
orte_set_attribute(&jobs[i]->attributes, ORTE_JOB_INFO_CACHE, ORTE_ATTR_LOCAL, (void*)cache, OPAL_PTR);
|
||||
for (k=0; k < count; k++) {
|
||||
n=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &val,
|
||||
&n, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
opal_list_append(cache, &val->super);
|
||||
}
|
||||
}
|
||||
|
||||
/* unpack the personality */
|
||||
n=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count, &n, OPAL_INT32))) {
|
||||
@ -147,7 +185,10 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (orte_no_vm && 0 < jobs[i]->num_procs) {
|
||||
if (0 < jobs[i]->num_procs) {
|
||||
/* check attributes to see if this job was fully
|
||||
* described in the launch msg */
|
||||
if (orte_get_attribute(&jobs[i]->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
|
||||
orte_proc_t *proc;
|
||||
for (j=0; j < jobs[i]->num_procs; j++) {
|
||||
n = 1;
|
||||
@ -159,6 +200,7 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
|
||||
opal_pointer_array_add(jobs[i]->procs, proc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* unpack stdin target */
|
||||
n = 1;
|
||||
@ -204,44 +246,6 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* unpack the attributes */
|
||||
n=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count,
|
||||
&n, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
for (k=0; k < count; k++) {
|
||||
n=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &kv,
|
||||
&n, ORTE_ATTRIBUTE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
kv->local = ORTE_ATTR_GLOBAL; // obviously not a local value
|
||||
opal_list_append(&jobs[i]->attributes, &kv->super);
|
||||
}
|
||||
/* unpack any job info */
|
||||
n=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count,
|
||||
&n, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
if (0 < count){
|
||||
cache = OBJ_NEW(opal_list_t);
|
||||
orte_set_attribute(&jobs[i]->attributes, ORTE_JOB_INFO_CACHE, ORTE_ATTR_LOCAL, (void*)cache, OPAL_PTR);
|
||||
for (k=0; k < count; k++) {
|
||||
n=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &val,
|
||||
&n, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
opal_list_append(cache, &val->super);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
@ -655,6 +659,14 @@ int orte_dt_unpack_map(opal_buffer_t *buffer, void *dest,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* unpack the last mapper */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
|
||||
&(maps[i]->last_mapper), &n, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* unpack the policies */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2016 Los Alamos National Security, LLC.
|
||||
* All rights reserved
|
||||
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -111,6 +111,8 @@ OBJ_CLASS_INSTANCE(orte_data_req_t,
|
||||
static opal_pointer_array_t orte_data_server_store;
|
||||
static opal_list_t pending;
|
||||
static bool initialized = false;
|
||||
static int orte_data_server_output = -1;
|
||||
static int orte_data_server_verbosity = -1;
|
||||
|
||||
int orte_data_server_init(void)
|
||||
{
|
||||
@ -121,6 +123,19 @@ int orte_data_server_init(void)
|
||||
}
|
||||
initialized = true;
|
||||
|
||||
/* register a verbosity */
|
||||
orte_data_server_verbosity = -1;
|
||||
(void) mca_base_var_register ("orte", "orte", "data", "server_verbose",
|
||||
"Debug verbosity for ORTE data server",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
|
||||
&orte_data_server_verbosity);
|
||||
if (0 <= orte_data_server_verbosity) {
|
||||
orte_data_server_output = opal_output_open(NULL);
|
||||
opal_output_set_verbosity(orte_data_server_output,
|
||||
orte_data_server_verbosity);
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&orte_data_server_store, opal_pointer_array_t);
|
||||
if (ORTE_SUCCESS != (rc = opal_pointer_array_init(&orte_data_server_store,
|
||||
1,
|
||||
@ -180,7 +195,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
orte_data_req_t *req, *rqnext;
|
||||
orte_jobid_t jobid = ORTE_JOBID_INVALID;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_data_server_output,
|
||||
"%s data server got message from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
@ -218,7 +233,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_data_server_output,
|
||||
"%s data server: publishing data from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&data->owner)));
|
||||
@ -245,7 +260,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
data->uid = iptr->data.uint32;
|
||||
OBJ_RELEASE(iptr);
|
||||
} else {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_data_server_output,
|
||||
"%s data server: adding %s to data from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), iptr->key,
|
||||
ORTE_NAME_PRINT(&data->owner)));
|
||||
@ -255,7 +270,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
|
||||
data->index = opal_pointer_array_add(&orte_data_server_store, data);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_data_server_output,
|
||||
"%s data server: checking for pending requests",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -276,7 +291,14 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
for (i=0; NULL != req->keys[i]; i++) {
|
||||
/* cycle thru the data keys for matches */
|
||||
OPAL_LIST_FOREACH(iptr, &data->values, opal_value_t) {
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_data_server_output,
|
||||
"%s\tCHECKING %s TO %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
iptr->key, req->keys[i]));
|
||||
if (0 == strcmp(iptr->key, req->keys[i])) {
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_data_server_output,
|
||||
"%s data server: packaging return",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
/* found it - package it for return */
|
||||
if (NULL == reply) {
|
||||
reply = OBJ_NEW(opal_buffer_t);
|
||||
@ -296,7 +318,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
ORTE_ERROR_LOG(rc);
|
||||
break;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_data_server_output,
|
||||
"%s data server: adding %s data from %s to response",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), iptr->key,
|
||||
ORTE_NAME_PRINT(&data->owner)));
|
||||
@ -309,7 +331,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
}
|
||||
if (NULL != reply) {
|
||||
/* send it back to the requestor */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_data_server_output,
|
||||
"%s data server: returning data to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&req->requestor)));
|
||||
@ -326,11 +348,11 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
reply = NULL;
|
||||
/* if the persistence is "first_read", then delete this data */
|
||||
if (OPAL_PMIX_PERSIST_FIRST_READ == data->persistence) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_data_server_output,
|
||||
"%s NOT STORING DATA FROM %s AT INDEX %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&data->owner), data->index));
|
||||
opal_pointer_array_set_item(&orte_data_server_store, data->index, NULL);
|
||||
ORTE_NAME_PRINT(&data->owner), data->index);
|
||||
opal_pointer_array_set_item(&orte_data_server_store, data->index, NULL));
|
||||
OBJ_RELEASE(data);
|
||||
goto release;
|
||||
}
|
||||
@ -349,7 +371,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
break;
|
||||
|
||||
case ORTE_PMIX_LOOKUP_CMD:
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_data_server_output,
|
||||
"%s data server: lookup data from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
@ -416,7 +438,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
/* cycle across the provided keys */
|
||||
ret_packed = false;
|
||||
for (i=0; NULL != keys[i]; i++) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_data_server_output,
|
||||
"%s data server: looking for %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), keys[i]));
|
||||
/* cycle across the stored data, looking for a match */
|
||||
@ -428,6 +450,10 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
}
|
||||
/* for security reasons, can only access data posted by the same user id */
|
||||
if (uid != data->uid) {
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_data_server_output,
|
||||
"%s\tMISMATCH UID %u %u",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(unsigned)uid, (unsigned)data->uid));
|
||||
continue;
|
||||
}
|
||||
/* if the published range is constrained to namespace, then only
|
||||
@ -435,12 +461,17 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
* in the same namespace as the requestor */
|
||||
if (OPAL_PMIX_RANGE_NAMESPACE == data->range) {
|
||||
if (jobid != data->owner.jobid) {
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_data_server_output,
|
||||
"%s\tMISMATCH JOBID %s %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jobid),
|
||||
ORTE_JOBID_PRINT(data->owner.jobid)));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/* see if we have this key */
|
||||
OPAL_LIST_FOREACH(iptr, &data->values, opal_value_t) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_data_server_output,
|
||||
"%s COMPARING %s %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
keys[i], iptr->key));
|
||||
@ -461,7 +492,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
opal_argv_free(keys);
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_data_server_output,
|
||||
"%s data server: adding %s to data from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), iptr->key,
|
||||
ORTE_NAME_PRINT(&data->owner)));
|
||||
@ -473,7 +504,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
}
|
||||
}
|
||||
if (data_added && OPAL_PMIX_PERSIST_FIRST_READ == data->persistence) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_data_server_output,
|
||||
"%s REMOVING DATA FROM %s AT INDEX %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&data->owner), data->index));
|
||||
@ -483,14 +514,14 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
}
|
||||
}
|
||||
if (!ret_packed) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_data_server_output,
|
||||
"%s data server:lookup: data not found",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if we were told to wait for the data, then queue this up
|
||||
* for later processing */
|
||||
if (wait) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_data_server_output,
|
||||
"%s data server:lookup: pushing request to wait",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
OBJ_RELEASE(answer);
|
||||
@ -510,7 +541,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
}
|
||||
|
||||
opal_argv_free(keys);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_data_server_output,
|
||||
"%s data server:lookup: data found",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
goto SEND_ANSWER;
|
||||
@ -524,7 +555,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_data_server_output,
|
||||
"%s data server: unpublish data from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&requestor)));
|
||||
@ -629,7 +660,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
}
|
||||
|
||||
SEND_ERROR:
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_data_server_output,
|
||||
"%s data server: sending error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_ERROR_NAME(rc)));
|
||||
@ -646,5 +677,3 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
OBJ_RELEASE(answer);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,4 +1,11 @@
|
||||
PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spawn concurrent_spawn spawn_multiple mpi_spin delayed_abort loop_spawn loop_child bad_exit pubsub hello_barrier segv accept connect hello_output hello_show_help crisscross read_write ziatest slave reduce-hang ziaprobe ziatest bcast_loop parallel_w8 parallel_w64 parallel_r8 parallel_r64 sio sendrecv_blaster early_abort debugger singleton_client_server intercomm_create spawn_tree init-exit77 mpi_info info_spawn server client paccept pconnect ring hello.sapp binding badcoll attach xlib
|
||||
PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spawn \
|
||||
concurrent_spawn spawn_multiple mpi_spin delayed_abort loop_spawn loop_child \
|
||||
bad_exit pubsub hello_barrier segv accept connect hello_output hello_show_help \
|
||||
crisscross read_write ziatest slave reduce-hang ziaprobe ziatest bcast_loop \
|
||||
parallel_w8 parallel_w64 parallel_r8 parallel_r64 sio sendrecv_blaster early_abort \
|
||||
debugger singleton_client_server intercomm_create spawn_tree init-exit77 mpi_info \
|
||||
info_spawn server client paccept pconnect ring hello.sapp binding badcoll attach xlib \
|
||||
no-disconnect
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
|
210
orte/test/mpi/no-disconnect.c
Обычный файл
210
orte/test/mpi/no-disconnect.c
Обычный файл
@ -0,0 +1,210 @@
|
||||
/* Contributed by Marcia Cristina Cera
|
||||
<marcia.cristina.cera@gmail.com>,
|
||||
http://www.open-mpi.org/community/lists/users/2009/12/11540.php */
|
||||
|
||||
/* It was decided that the issue highlighted by this test will NOT be
|
||||
fixed in the 1.3/1.4 series. It is already fixed in the 1.5
|
||||
series. Hence, if we detect Open MPI < v1.5, return 77/skip. */
|
||||
/* Turns out the hnp cannot handle concurrent MPI_Comm_spawns
|
||||
as of Open MPI 1.7. However, we hope this feature will
|
||||
work in 2.0. with the new state machine based orte. */
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <sys/utsname.h>
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
#define NCHARS 30
|
||||
const int max_depth = 4;
|
||||
|
||||
/*
|
||||
* Here are some replacements for standard, blocking MPI
|
||||
* functions. These replacements are "nice" and yield the
|
||||
* CPU instead of spinning hard. The interfaces are the same.
|
||||
* Just replace:
|
||||
* MPI_Recv with nice_recv
|
||||
* MPI_Send with nice_send
|
||||
* MPI_Barrier with nice_barrier
|
||||
*/
|
||||
|
||||
|
||||
static int nice_send(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) {
|
||||
/* Assume a standard (presumably short/eager) send suffices. */
|
||||
return MPI_Send(buf, count, datatype, dest, tag, comm);
|
||||
}
|
||||
|
||||
|
||||
static int nice_recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status) {
|
||||
MPI_Request req;
|
||||
int flag;
|
||||
struct timespec dt;
|
||||
|
||||
/*
|
||||
* We're only interested in modest levels of oversubscription
|
||||
* -- e.g., 2-4x more processes than physical processors.
|
||||
* So, the sleep time only needs to be about 2-4x longer than
|
||||
* a futile MPI_Test call. For a wide range of processors,
|
||||
* something less than a millisecond should be sufficient.
|
||||
* Excessive sleep times (e.g., 1 second) would degrade performance.
|
||||
*/
|
||||
dt.tv_sec = 0;
|
||||
dt.tv_nsec = 100000;
|
||||
|
||||
MPI_Irecv(buf, count, datatype, source, tag, comm, &req);
|
||||
|
||||
MPI_Test(&req, &flag, status);
|
||||
while ( ! flag ) {
|
||||
nanosleep(&dt, NULL);
|
||||
MPI_Test(&req, &flag, status);
|
||||
}
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static void nice_barrier(MPI_Comm comm) {
|
||||
int me, np, jump, buf = -1;
|
||||
|
||||
MPI_Comm_rank(comm,&me);
|
||||
MPI_Comm_size(comm,&np);
|
||||
|
||||
/* fan in */
|
||||
for ( jump = 1; jump < np; jump <<= 1 ) {
|
||||
if ( ( me & jump ) != 0 ) {
|
||||
nice_send(&buf, 1, MPI_INT, me - jump, 343, comm);
|
||||
break;
|
||||
} else if ( me + jump < np ) {
|
||||
nice_recv(&buf, 1, MPI_INT, me + jump, 343, comm, MPI_STATUS_IGNORE);
|
||||
}
|
||||
}
|
||||
|
||||
/* fan out */
|
||||
if ( 0 != me ) {
|
||||
nice_recv(&buf, 1, MPI_INT, me - jump, 344, comm, MPI_STATUS_IGNORE);
|
||||
}
|
||||
jump >>= 1;
|
||||
for ( ; jump > 0; jump >>= 1 ) {
|
||||
if ( me + jump < np ) {
|
||||
nice_send(&buf, 1, MPI_INT, me + jump, 344, comm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main (int argc, char **argv)
|
||||
{
|
||||
char bufs [NCHARS]; /* send buffer */
|
||||
char bufr[2][NCHARS]; /* recv buffers */
|
||||
MPI_Comm parent;
|
||||
int level = 0, participate = 1;
|
||||
struct utsname buf;
|
||||
|
||||
/* If this is prior to OMPI v2.0, return 77/skip */
|
||||
#if defined(OPEN_MPI)
|
||||
if (OMPI_MAJOR_VERSION < 2) {
|
||||
printf("Skipping, because the orte cannot handle concurrent MPI_Comm_spawns\n");
|
||||
return 77;
|
||||
} else {
|
||||
printf("Verify that this test is truly working because conncurrent MPI_Comm_spawns"
|
||||
" has not worked before.\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
uname(&buf);
|
||||
printf("I AM pid %d with level %d on %s\n", getpid(), (argc < 2)?0:atoi(argv[1]), buf.nodename);
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm_get_parent(&parent);
|
||||
|
||||
if (MPI_COMM_NULL != parent) {
|
||||
/* spawned processes get stuff from parent */
|
||||
level = atoi(argv[1]);
|
||||
MPI_Recv(&bufr[0], sizeof(char)*NCHARS, MPI_CHAR, MPI_ANY_SOURCE,
|
||||
MPI_ANY_TAG, parent, MPI_STATUS_IGNORE);
|
||||
printf("Parent sent: %s\n", bufr[0]);
|
||||
} else {
|
||||
|
||||
/* original processes have to decide whether to participate */
|
||||
|
||||
/* In this test, each process launched by "mpirun -n <np>" spawns a
|
||||
* binary tree of processes. You end up with <np> * ( 1 << max_depth )
|
||||
* processes altogether. For max_depth=4, this means 16*<np>. There
|
||||
* is potential here for heavy oversubscription, especially if in
|
||||
* testing we launch tests with <np> set to the number of available
|
||||
* processors. This test tolerates oversubscription somewhat since
|
||||
* it entails little inter-process synchronization. Nevertheless,
|
||||
* we try to idle all but <np>/4 of the original processes, using a
|
||||
* minimum of at least two processes
|
||||
*/
|
||||
|
||||
int me, np;
|
||||
|
||||
MPI_Comm_size(MPI_COMM_WORLD,&np);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&me);
|
||||
|
||||
if ( np > 4 ) {
|
||||
/* turn off all but every 4th process */
|
||||
if ( ( me & 3 ) != 0 ) participate = 0;
|
||||
} else
|
||||
if ( np > 2 ) {
|
||||
/* turn off all but every 2nd process */
|
||||
if ( ( me & 1 ) != 0 ) participate = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* all spawned processes and selected "root" processes participate */
|
||||
if ( participate ) {
|
||||
printf("level = %d\n", level);
|
||||
|
||||
/* prepare send buffer */
|
||||
sprintf(bufs,"level %d (pid:%d)", level, getpid());
|
||||
|
||||
/* spawn */
|
||||
if (level < max_depth) {
|
||||
int i, nspawn = 2, errcodes[1];
|
||||
MPI_Request req[2];
|
||||
MPI_Comm comm[2];
|
||||
char argv1[NCHARS];
|
||||
char *args[2];
|
||||
|
||||
/* level 0 spawns only one process to mimic the original test */
|
||||
if ( level == 0 ) nspawn = 1;
|
||||
|
||||
/* prepare command line arguments */
|
||||
snprintf(argv1, sizeof(argv1), "%d", level+1);
|
||||
args[0] = argv1;
|
||||
args[1] = NULL;
|
||||
|
||||
/* spawn, with a message sent to and received from each child */
|
||||
for ( i = 0; i < nspawn; i++ ) {
|
||||
MPI_Comm_spawn(argv[0], args, 1, MPI_INFO_NULL, 0, MPI_COMM_SELF,
|
||||
&comm[i], errcodes);
|
||||
MPI_Send(&bufs, sizeof(char)*NCHARS, MPI_CHAR, 0, 100, comm[i]);
|
||||
MPI_Irecv(&bufr[i], sizeof(char)*NCHARS, MPI_CHAR, MPI_ANY_SOURCE,
|
||||
MPI_ANY_TAG, comm[i], &req[i]);
|
||||
}
|
||||
|
||||
/* wait for messages from children and print them */
|
||||
MPI_Waitall(nspawn, req, MPI_STATUSES_IGNORE);
|
||||
for ( i = 0; i < nspawn; i++ )
|
||||
printf("Child %d sent: %s\n", i, bufr[i]);
|
||||
}
|
||||
|
||||
/* send message back to parent */
|
||||
if (MPI_COMM_NULL != parent) {
|
||||
MPI_Send(&bufs, sizeof(char)*NCHARS, MPI_CHAR, 0, 100, parent);
|
||||
}
|
||||
}
|
||||
|
||||
/* non-participating processes wait at this barrier for their peers */
|
||||
/* (This barrier won't cost that many CPU cycles.) */
|
||||
if (MPI_COMM_NULL == parent) {
|
||||
nice_barrier(MPI_COMM_WORLD);
|
||||
}
|
||||
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
@ -286,6 +286,8 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key)
|
||||
return "ORTE_JOB_TRANSPORT_KEY";
|
||||
case ORTE_JOB_INFO_CACHE:
|
||||
return "ORTE_JOB_INFO_CACHE";
|
||||
case ORTE_JOB_FULLY_DESCRIBED:
|
||||
return "ORTE_JOB_FULLY_DESCRIBED";
|
||||
|
||||
case ORTE_PROC_NOBARRIER:
|
||||
return "PROC-NOBARRIER";
|
||||
|
@ -143,6 +143,7 @@ typedef uint16_t orte_job_flags_t;
|
||||
#define ORTE_JOB_NOTIFY_COMPLETION (ORTE_JOB_START_KEY + 50) // bool - notify parent proc when spawned job terminates
|
||||
#define ORTE_JOB_TRANSPORT_KEY (ORTE_JOB_START_KEY + 51) // string - transport keys assigned to this job
|
||||
#define ORTE_JOB_INFO_CACHE (ORTE_JOB_START_KEY + 52) // opal_list_t - list of opal_value_t to be included in job_info
|
||||
#define ORTE_JOB_FULLY_DESCRIBED (ORTE_JOB_START_KEY + 53) // bool - job is fully described in launch msg
|
||||
|
||||
#define ORTE_JOB_MAX_KEY 300
|
||||
|
||||
|
@ -198,7 +198,7 @@ int orte_util_build_daemon_nidmap(void)
|
||||
return rc;
|
||||
}
|
||||
|
||||
int orte_util_nidmap_create(char **regex)
|
||||
int orte_util_nidmap_create(opal_pointer_array_t *pool, char **regex)
|
||||
{
|
||||
char *node;
|
||||
char prefix[ORTE_MAX_NODE_PREFIX];
|
||||
@ -217,8 +217,8 @@ int orte_util_nidmap_create(char **regex)
|
||||
OBJ_CONSTRUCT(&dvpids, opal_list_t);
|
||||
|
||||
rng = NULL;
|
||||
for (n=0; n < orte_node_pool->size; n++) {
|
||||
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) {
|
||||
for (n=0; n < pool->size; n++) {
|
||||
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) {
|
||||
continue;
|
||||
}
|
||||
/* if no daemon has been assigned, then this node is not being used */
|
||||
@ -1180,3 +1180,217 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer)
|
||||
OPAL_LIST_DESTRUCT(&flgs);
|
||||
return rc;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
int ctx;
|
||||
int nprocs;
|
||||
int cnt;
|
||||
} orte_nidmap_regex_t;
|
||||
static void nrcon(orte_nidmap_regex_t *p)
|
||||
{
|
||||
p->ctx = 0;
|
||||
p->nprocs = -1;
|
||||
p->cnt = 0;
|
||||
}
|
||||
static OBJ_CLASS_INSTANCE(orte_nidmap_regex_t,
|
||||
opal_list_item_t,
|
||||
nrcon, NULL);
|
||||
|
||||
/* since not every node is involved in a job, we have to create a
|
||||
* regex that indicates the ppn for every node, marking those that
|
||||
* are not involved. Since each daemon knows the entire
|
||||
* node pool, we simply provide a ppn for every daemon, with a -1
|
||||
* to indicate that the node is empty for that job */
|
||||
int orte_util_nidmap_generate_ppn(orte_job_t *jdata, char **ppn)
|
||||
{
|
||||
orte_nidmap_regex_t *prng, **actives;
|
||||
opal_list_t *prk;
|
||||
orte_node_t *nptr;
|
||||
orte_proc_t *proc;
|
||||
size_t n;
|
||||
int *cnt, i, k;
|
||||
char *tmp2, *ptmp, **cache = NULL;
|
||||
|
||||
/* create an array of lists to handle the number of app_contexts in this job */
|
||||
prk = (opal_list_t*)malloc(jdata->num_apps * sizeof(opal_list_t));
|
||||
cnt = (int*)malloc(jdata->num_apps * sizeof(int));
|
||||
actives = (orte_nidmap_regex_t**)malloc(jdata->num_apps * sizeof(orte_nidmap_regex_t*));
|
||||
for (n=0; n < jdata->num_apps; n++) {
|
||||
OBJ_CONSTRUCT(&prk[n], opal_list_t);
|
||||
actives[n] = NULL;
|
||||
}
|
||||
|
||||
/* we provide a complete map in the regex, with an entry for every
|
||||
* node in the pool */
|
||||
for (i=0; i < orte_node_pool->size; i++) {
|
||||
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
|
||||
continue;
|
||||
}
|
||||
/* if a daemon has been assigned, then count how many procs
|
||||
* for each app_context from the specified job are assigned to this node */
|
||||
memset(cnt, 0, jdata->num_apps * sizeof(int));
|
||||
if (NULL != nptr->daemon) {
|
||||
for (k=0; k < nptr->procs->size; k++) {
|
||||
if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(nptr->procs, k))) {
|
||||
if (proc->name.jobid == jdata->jobid) {
|
||||
++cnt[proc->app_idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* track the #procs on this node */
|
||||
for (n=0; n < jdata->num_apps; n++) {
|
||||
if (NULL == actives[n]) {
|
||||
/* just starting */
|
||||
actives[n] = OBJ_NEW(orte_nidmap_regex_t);
|
||||
actives[n]->nprocs = cnt[n];
|
||||
actives[n]->cnt = 1;
|
||||
opal_list_append(&prk[n], &actives[n]->super);
|
||||
} else {
|
||||
/* is this the next in line */
|
||||
if (cnt[n] == actives[n]->nprocs) {
|
||||
actives[n]->cnt++;
|
||||
} else {
|
||||
/* need to start another range */
|
||||
actives[n] = OBJ_NEW(orte_nidmap_regex_t);
|
||||
actives[n]->nprocs = cnt[n];
|
||||
actives[n]->cnt = 1;
|
||||
opal_list_append(&prk[n], &actives[n]->super);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* construct the regex from the found ranges for each app_context */
|
||||
ptmp = NULL;
|
||||
for (n=0; n < jdata->num_apps; n++) {
|
||||
OPAL_LIST_FOREACH(prng, &prk[n], orte_nidmap_regex_t) {
|
||||
if (1 < prng->cnt) {
|
||||
if (NULL == ptmp) {
|
||||
asprintf(&ptmp, "%u(%u)", prng->nprocs, prng->cnt);
|
||||
} else {
|
||||
asprintf(&tmp2, "%s,%u(%u)", ptmp, prng->nprocs, prng->cnt);
|
||||
free(ptmp);
|
||||
ptmp = tmp2;
|
||||
}
|
||||
} else {
|
||||
if (NULL == ptmp) {
|
||||
asprintf(&ptmp, "%u", prng->nprocs);
|
||||
} else {
|
||||
asprintf(&tmp2, "%s,%u", ptmp, prng->nprocs);
|
||||
free(ptmp);
|
||||
ptmp = tmp2;
|
||||
}
|
||||
}
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&prk[n]); // releases all the actives objects
|
||||
opal_argv_append_nosize(&cache, ptmp);
|
||||
free(ptmp);
|
||||
ptmp = NULL;
|
||||
}
|
||||
free(prk);
|
||||
free(cnt);
|
||||
free(actives);
|
||||
|
||||
*ppn = opal_argv_join(cache, '@');
|
||||
opal_argv_free(cache);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_util_nidmap_parse_ppn(orte_job_t *jdata, char *regex)
|
||||
{
|
||||
orte_node_t *node;
|
||||
orte_proc_t *proc;
|
||||
int n, k, m, cnt;
|
||||
char **tmp, *ptr, **ppn;
|
||||
orte_nidmap_regex_t *rng;
|
||||
opal_list_t trk;
|
||||
int rc = ORTE_SUCCESS;
|
||||
|
||||
/* split the regex by app_context */
|
||||
tmp = opal_argv_split(regex, '@');
|
||||
|
||||
/* for each app_context, set the ppn */
|
||||
for (n=0; NULL != tmp[n]; n++) {
|
||||
ppn = opal_argv_split(tmp[n], ',');
|
||||
/* decompress the ppn */
|
||||
OBJ_CONSTRUCT(&trk, opal_list_t);
|
||||
for (m=0; NULL != ppn[m]; m++) {
|
||||
rng = OBJ_NEW(orte_nidmap_regex_t);
|
||||
opal_list_append(&trk, &rng->super);
|
||||
/* check for a count */
|
||||
if (NULL != (ptr = strchr(ppn[m], '('))) {
|
||||
ppn[m][strlen(ppn[m])-1] = '\0'; // remove trailing paren
|
||||
*ptr = '\0';
|
||||
++ptr;
|
||||
rng->cnt = strtoul(ptr, NULL, 10);
|
||||
} else {
|
||||
rng->cnt = 1;
|
||||
}
|
||||
/* convert the number */
|
||||
rng->nprocs = strtoul(ppn[m], NULL, 10);
|
||||
}
|
||||
opal_argv_free(ppn);
|
||||
|
||||
/* cycle thru our node pool and add the indicated number of procs
|
||||
* to each node */
|
||||
rng = (orte_nidmap_regex_t*)opal_list_get_first(&trk);
|
||||
cnt = 0;
|
||||
for (m=0; m < orte_node_pool->size; m++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, m))) {
|
||||
continue;
|
||||
}
|
||||
/* see if it has any procs for this job and app_context */
|
||||
if (0 < rng->nprocs) {
|
||||
/* add this node to the job map if it isn't already there */
|
||||
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
|
||||
OBJ_RETAIN(node);
|
||||
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
|
||||
opal_pointer_array_add(jdata->map->nodes, node);
|
||||
}
|
||||
/* create a proc object for each one */
|
||||
for (k=0; k < rng->nprocs; k++) {
|
||||
proc = OBJ_NEW(orte_proc_t);
|
||||
proc->name.jobid = jdata->jobid;
|
||||
/* leave the vpid undefined as this will be determined
|
||||
* later when we do the overall ranking */
|
||||
proc->app_idx = n;
|
||||
proc->parent = node->daemon->name.vpid;
|
||||
OBJ_RETAIN(node);
|
||||
proc->node = node;
|
||||
/* flag the proc as ready for launch */
|
||||
proc->state = ORTE_PROC_STATE_INIT;
|
||||
opal_pointer_array_add(node->procs, proc);
|
||||
/* we will add the proc to the jdata array when we
|
||||
* compute its rank */
|
||||
}
|
||||
node->num_procs += rng->nprocs;
|
||||
}
|
||||
++cnt;
|
||||
if (rng->cnt <= cnt) {
|
||||
rng = (orte_nidmap_regex_t*)opal_list_get_next(&rng->super);
|
||||
if (NULL == rng) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
opal_argv_free(tmp);
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
goto complete;
|
||||
}
|
||||
cnt = 0;
|
||||
}
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&trk);
|
||||
}
|
||||
opal_argv_free(tmp);
|
||||
|
||||
complete:
|
||||
/* reset any node map flags we used so the next job will start clean */
|
||||
for (n=0; n < jdata->map->nodes->size; n++) {
|
||||
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) {
|
||||
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -46,7 +46,7 @@ BEGIN_C_DECLS
|
||||
|
||||
ORTE_DECLSPEC void orte_util_nidmap_init(void);
|
||||
|
||||
ORTE_DECLSPEC int orte_util_nidmap_create(char **regex);
|
||||
ORTE_DECLSPEC int orte_util_nidmap_create(opal_pointer_array_t *pool, char **regex);
|
||||
ORTE_DECLSPEC int orte_util_nidmap_parse(char *regex);
|
||||
|
||||
/* create a regular expression describing the nodes in the
|
||||
@ -59,6 +59,12 @@ ORTE_DECLSPEC int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer);
|
||||
|
||||
ORTE_DECLSPEC int orte_util_build_daemon_nidmap(void);
|
||||
|
||||
/* create a regular expression describing the ppn for a job */
|
||||
ORTE_DECLSPEC int orte_util_nidmap_generate_ppn(orte_job_t *jdata, char **ppn);
|
||||
|
||||
/* decode the ppn */
|
||||
ORTE_DECLSPEC int orte_util_nidmap_parse_ppn(orte_job_t *jdata, char *ppn);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user