1
1

Merge pull request #6321 from hppritcha/topic/fix_6236_for_v4.x

Topic/fix 6236 for v4.x
Этот коммит содержится в:
Howard Pritchard 2019-01-31 19:50:05 -06:00 коммит произвёл GitHub
родитель fb39c7f7e6 dae71d3a75
Коммит 4dfb9384cb
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 138 добавлений и 128 удалений

Просмотреть файл

@ -123,7 +123,8 @@ ORTE_DECLSPEC int orte_rmaps_base_filter_nodes(orte_app_context_t *app,
opal_list_t *nodes,
bool remove);
ORTE_DECLSPEC int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
ORTE_DECLSPEC int orte_rmaps_base_set_mapping_policy(orte_job_t *jdata,
orte_mapping_policy_t *policy,
char **device, char *spec);
ORTE_DECLSPEC int orte_rmaps_base_set_ranking_policy(orte_ranking_policy_t *policy,
orte_mapping_policy_t mapping,

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -296,7 +296,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
"rmaps_base_cpus_per_proc", "rmaps_base_mapping_policy=<obj>:PE=N, default <obj>=NUMA");
}
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping,
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(NULL, &orte_rmaps_base.mapping,
&orte_rmaps_base.device,
rmaps_base_mapping_policy))) {
return rc;
@ -593,7 +593,8 @@ static int check_modifiers(char *ck, orte_mapping_policy_t *tmp)
return ORTE_ERR_TAKE_NEXT_OPTION;
}
int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
int orte_rmaps_base_set_mapping_policy(orte_job_t *jdata,
orte_mapping_policy_t *policy,
char **device, char *inspec)
{
char *ck;
@ -618,136 +619,144 @@ int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
if (NULL == inspec) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
} else {
spec = strdup(inspec); // protect the input string
/* see if a colon was included - if so, then we have a policy + modifier */
ck = strchr(spec, ':');
if (NULL != ck) {
/* if the colon is the first character of the string, then we
* just have modifiers on the default mapping policy */
if (ck == spec) {
ck++;
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"%s rmaps:base only modifiers %s provided - assuming bysocket mapping",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ck);
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
if (ORTE_ERR_SILENT == (rc = check_modifiers(ck, &tmp)) &&
goto setpolicy;
}
spec = strdup(inspec); // protect the input string
/* see if a colon was included - if so, then we have a policy + modifier */
ck = strchr(spec, ':');
if (NULL != ck) {
/* if the colon is the first character of the string, then we
* just have modifiers on the default mapping policy */
if (ck == spec) {
ck++; // step over the colon
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"%s rmaps:base only modifiers %s provided - assuming bysocket mapping",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ck);
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
if (ORTE_ERR_SILENT == (rc = check_modifiers(ck, &tmp)) &&
ORTE_ERR_BAD_PARAM != rc) {
free(spec);
return ORTE_ERR_SILENT;
}
free(spec);
goto setpolicy;
}
*ck = '\0'; // terminate spec where the colon was
ck++; // step past the colon
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"%s rmaps:base policy %s modifiers %s provided",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), spec, ck);
if (0 == strncasecmp(spec, "ppr", strlen(spec))) {
/* at this point, ck points to a string that contains at least
* two fields (specifying the #procs/obj and the object we are
* to map by). we have to allow additional modifiers here - e.g.,
* specifying #pe's/proc or oversubscribe - so check for modifiers. if
* they are present, ck will look like "N:obj:mod1,mod2,mod3"
*/
if (NULL == (ptr = strchr(ck, ':'))) {
/* this is an error - there had to be at least one
* colon to delimit the number from the object type
*/
orte_show_help("help-orte-rmaps-base.txt", "invalid-pattern", true, inspec);
free(spec);
return ORTE_ERR_SILENT;
}
ptr++; // move past the colon
/* at this point, ptr is pointing to the beginning of the string that describes
* the object plus any modifiers (i.e., "obj:mod1,mod2". We first check to see if there
* is another colon indicating that there are modifiers to the request */
if (NULL != (cptr = strchr(ptr, ':'))) {
/* there are modifiers, so we terminate the object string
* at the location of the colon */
*cptr = '\0';
/* step over that colon */
cptr++;
/* now check for modifiers - may be none, so
* don't emit an error message if the modifier
* isn't recognized */
if (ORTE_ERR_SILENT == (rc = check_modifiers(cptr, &tmp)) &&
ORTE_ERR_BAD_PARAM != rc) {
free(spec);
return ORTE_ERR_SILENT;
}
free(spec);
goto setpolicy;
}
/* split the string */
*ck = '\0';
ck++;
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"%s rmaps:base policy %s modifiers %s provided",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), spec, ck);
/* if the policy is "dist", then we set the policy to that value
* and save the second argument as the device
*/
if (0 == strncasecmp(spec, "ppr", strlen(spec))) {
/* we have to allow additional modifiers here - e.g., specifying
* #pe's/proc or oversubscribe - so check for modifiers
*/
if (NULL == (ptr = strrchr(ck, ':'))) {
/* this is an error - there had to be at least one
* colon to delimit the number from the object type
*/
orte_show_help("help-orte-rmaps-base.txt", "invalid-pattern", true, inspec);
free(spec);
return ORTE_ERR_SILENT;
}
ptr++; // move past the colon
/* at this point, ck is pointing to the number of procs/object
* and ptr is pointing to the beginning of the string that describes
* the object plus any modifiers. We first check to see if there
* is a comma indicating that there are modifiers to the request */
if (NULL != (cptr = strchr(ptr, ','))) {
/* there are modifiers, so we terminate the object string
* at the location of the first comma */
*cptr = '\0';
/* step over that comma */
cptr++;
/* now check for modifiers - may be none, so
* don't emit an error message if the modifier
* isn't recognized */
if (ORTE_ERR_SILENT == (rc = check_modifiers(cptr, &tmp)) &&
ORTE_ERR_BAD_PARAM != rc) {
free(spec);
return ORTE_ERR_SILENT;
}
}
/* now save the pattern */
/* now save the pattern */
if (NULL == jdata || NULL == jdata->map) {
orte_rmaps_base.ppr = strdup(ck);
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_PPR);
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
free(spec);
goto setpolicy;
}
if (ORTE_SUCCESS != (rc = check_modifiers(ck, &tmp)) &&
ORTE_ERR_TAKE_NEXT_OPTION != rc) {
if (ORTE_ERR_BAD_PARAM == rc) {
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, inspec);
}
free(spec);
return rc;
}
}
len = strlen(spec);
if (0 == strncasecmp(spec, "slot", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT);
} else if (0 == strncasecmp(spec, "node", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE);
} else if (0 == strncasecmp(spec, "seq", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_SEQ);
} else if (0 == strncasecmp(spec, "core", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE);
} else if (0 == strncasecmp(spec, "l1cache", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL1CACHE);
} else if (0 == strncasecmp(spec, "l2cache", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL2CACHE);
} else if (0 == strncasecmp(spec, "l3cache", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL3CACHE);
} else if (0 == strncasecmp(spec, "socket", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
} else if (0 == strncasecmp(spec, "numa", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNUMA);
} else if (0 == strncasecmp(spec, "board", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYBOARD);
} else if (0 == strncasecmp(spec, "hwthread", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYHWTHREAD);
/* if we are mapping processes to individual hwthreads, then
* we need to treat those hwthreads as separate cpus
*/
opal_hwloc_use_hwthreads_as_cpus = true;
} else if (0 == strncasecmp(spec, "dist", len)) {
if (NULL != rmaps_dist_device) {
if (NULL != (pch = strchr(rmaps_dist_device, ':'))) {
*pch = '\0';
}
if (NULL != device) {
*device = strdup(rmaps_dist_device);
}
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYDIST);
} else {
orte_show_help("help-orte-rmaps-base.txt", "device-not-specified", true);
free(spec);
return ORTE_ERR_SILENT;
jdata->map->ppr = strdup(ck);
}
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_PPR);
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
free(spec);
goto setpolicy;
}
if (ORTE_SUCCESS != (rc = check_modifiers(ck, &tmp)) &&
ORTE_ERR_TAKE_NEXT_OPTION != rc) {
if (ORTE_ERR_BAD_PARAM == rc) {
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, inspec);
}
free(spec);
return rc;
}
}
len = strlen(spec);
if (0 == strncasecmp(spec, "slot", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT);
} else if (0 == strncasecmp(spec, "node", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE);
} else if (0 == strncasecmp(spec, "seq", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_SEQ);
} else if (0 == strncasecmp(spec, "core", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE);
} else if (0 == strncasecmp(spec, "l1cache", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL1CACHE);
} else if (0 == strncasecmp(spec, "l2cache", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL2CACHE);
} else if (0 == strncasecmp(spec, "l3cache", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL3CACHE);
} else if (0 == strncasecmp(spec, "socket", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
} else if (0 == strncasecmp(spec, "numa", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNUMA);
} else if (0 == strncasecmp(spec, "board", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYBOARD);
} else if (0 == strncasecmp(spec, "hwthread", len)) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYHWTHREAD);
/* if we are mapping processes to individual hwthreads, then
* we need to treat those hwthreads as separate cpus
*/
opal_hwloc_use_hwthreads_as_cpus = true;
} else if (0 == strncasecmp(spec, "dist", len)) {
if (NULL != rmaps_dist_device) {
if (NULL != (pch = strchr(rmaps_dist_device, ':'))) {
*pch = '\0';
}
if (NULL != device) {
*device = strdup(rmaps_dist_device);
}
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYDIST);
} else {
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", spec);
orte_show_help("help-orte-rmaps-base.txt", "device-not-specified", true);
free(spec);
return ORTE_ERR_SILENT;
}
} else {
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", spec);
free(spec);
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
return ORTE_ERR_SILENT;
}
free(spec);
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
setpolicy:
*policy = tmp;
if (NULL == jdata || NULL == jdata->map) {
*policy = tmp;
} else {
jdata->map->mapping = tmp;
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -876,7 +876,7 @@ int orte_submit_job(char *argv[], int *index,
jdata->map = OBJ_NEW(orte_job_map_t);
if (NULL != orte_cmd_options.mapping_policy) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping, NULL, orte_cmd_options.mapping_policy))) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(jdata, &jdata->map->mapping, NULL, orte_cmd_options.mapping_policy))) {
ORTE_ERROR_LOG(rc);
return rc;
}

Просмотреть файл

@ -308,7 +308,7 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor,
orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
return ORTE_ERR_BAD_PARAM;
}
rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping,
rc = orte_rmaps_base_set_mapping_policy(jdata, &jdata->map->mapping,
NULL, info->data.string);
if (ORTE_SUCCESS != rc) {
return rc;

Просмотреть файл

@ -489,7 +489,7 @@ static void _query(int sd, short args, void *cbdata)
orte_job_t *jdata;
orte_proc_t *proct;
orte_app_context_t *app;
int rc, i, k, num_replies;
int rc = ORTE_SUCCESS, i, k, num_replies;
opal_list_t *results, targets, *array;
size_t n;
uint32_t key;
@ -716,7 +716,7 @@ static void _query(int sd, short args, void *cbdata)
}
}
if (ORTE_JOBID_INVALID == jobid) {
rc = ORTE_ERR_BAD_PARAM;
rc = ORTE_ERR_NOT_FOUND;
goto done;
}
/* construct a list of values with opal_proc_info_t
@ -810,12 +810,12 @@ static void _query(int sd, short args, void *cbdata)
}
done:
if (0 == opal_list_get_size(results)) {
rc = ORTE_ERR_NOT_FOUND;
} else if (opal_list_get_size(results) < opal_list_get_size(cd->info)) {
rc = ORTE_ERR_PARTIAL_SUCCESS;
} else {
rc = ORTE_SUCCESS;
if (ORTE_SUCCESS == rc) {
if (0 == opal_list_get_size(results)) {
rc = ORTE_ERR_NOT_FOUND;
} else if (opal_list_get_size(results) < opal_list_get_size(cd->info)) {
rc = ORTE_ERR_PARTIAL_SUCCESS;
}
}
cd->infocbfunc(rc, results, cd->cbdata, qrel, results);
}