From 40e2299fa740aa97fd9cde53ef0a1bd5a383ac04 Mon Sep 17 00:00:00 2001 From: Ralph Castain <rhc@open-mpi.org> Date: Fri, 9 Oct 2009 15:26:23 +0000 Subject: [PATCH] Test to ensure that num_procs was provided for the resilient mapper - it cannot be used with options like npernode. Cleanup the show_help text file This commit was SVN r22082. --- .../resilient/help-orte-rmaps-resilient.txt | 44 +++++-------------- orte/mca/rmaps/resilient/rmaps_resilient.c | 11 ++++- 2 files changed, 20 insertions(+), 35 deletions(-) diff --git a/orte/mca/rmaps/resilient/help-orte-rmaps-resilient.txt b/orte/mca/rmaps/resilient/help-orte-rmaps-resilient.txt index b91d5f375d..5b512d162e 100644 --- a/orte/mca/rmaps/resilient/help-orte-rmaps-resilient.txt +++ b/orte/mca/rmaps/resilient/help-orte-rmaps-resilient.txt @@ -10,43 +10,19 @@ # # This is the US/English general help file for the resilient mapper. # -[orte-rmaps-resilient:alloc-error] -There are not enough slots available in the system to satisfy the %d slots -that were requested by the application: - %s - -Either request fewer slots for your application, or make more slots available -for use. -[orte-rmaps-resilient:multi-apps-and-zero-np] -RMAPS found multiple applications to be launched, with -at least one that failed to specify the number of processes to execute. -When specifying multiple applications, you must specify how many processes -of each to launch via the -np argument. - -[orte-rmaps-resilient:per-node-and-too-many-procs] -There are not enough nodes in your allocation to satisfy your request to launch -%d processes on a per-node basis - only %d nodes were available. - -Either request fewer processes, or obtain a larger allocation. -[orte-rmaps-resilient:n-per-node-and-too-many-procs] -There are not enough nodes in your allocation to satisfy your request to launch -%d processes on a %d per-node basis - only %d nodes with a total of %d slots were available. - -Either request fewer processes, or obtain a larger allocation. -[orte-rmaps-resilient:n-per-node-and-not-enough-slots] -There are not enough slots on the nodes in your allocation to satisfy your request to launch on a %d process-per-node basis - only %d slots/node were available. - -Either request fewer processes/node, or obtain a larger allocation. - -[orte-rmaps-resilient:no-np-and-user-map] -You have specified a rank-to-node/slot mapping, but failed to provide -the number of processes to be executed. For some reason, this information -could not be obtained from the mapping you provided, so we cannot continue -with executing the specified application. -# [orte-rmaps-resilient:file-not-found] The specified file that describes the fault groups for this system: FILE: %s was not found. Please verify the file name and location. +# +[orte-rmaps-resilient:num-procs] +The resilient mapper requires that you specify the number of processes +to be launched for each application. Please provide the required information +and try again. + +Alternatively, if you truly wish to take advantage of the -perxxx options +or to simply launch one process on every available slot, do not specify the +resilient mapper. Mpirun will automatically select the appropriate mapper +to support your request. diff --git a/orte/mca/rmaps/resilient/rmaps_resilient.c b/orte/mca/rmaps/resilient/rmaps_resilient.c index fe5c06482c..06dd411dcb 100644 --- a/orte/mca/rmaps/resilient/rmaps_resilient.c +++ b/orte/mca/rmaps/resilient/rmaps_resilient.c @@ -136,7 +136,7 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata) if (NULL == fp) { /* not found */ orte_show_help("help-orte-rmaps-resilient.txt", "orte-rmaps-resilient:file-not-found", true, mca_rmaps_resilient_component.fault_group_file); - return ORTE_ERROR; + return ORTE_ERR_SILENT; } /* build list of fault groups */ grp = 0; @@ -356,6 +356,15 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata) if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; } + /* you cannot use this mapper unless you specify the number of procs to + * launch for each app + */ + if (0 == app->num_procs) { + orte_show_help("help-orte-rmaps-resilient.txt", + "orte-rmaps-resilient:num-procs", + true); + return ORTE_ERR_SILENT; + } num_assigned = 0; /* for each app_context, we have to get the list of nodes that it can * use since that can now be modified with a hostfile and/or -host