From 6fb81f20e4ed104de76ba7d9e02cb9e8e1d15d4d Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Fri, 7 Jul 2017 09:18:55 -0600 Subject: [PATCH] mtl/psm2: create mca variables to shadow PSM2 environment variables This commit enables MCA support for the following PSM2 environment variables: PSM2_DEVICES, PSM2_MEMORY, PSM2_MQ_SENDREQS_MAX, PSM2_MQ_RECVREQS_MAX, PSM2_MQ_RNDV_HFI_THRESH, PSM2_MQ_RNDV_SHM_THRESH, PSM2_RCVTHREAD, PSM2_SHAREDCONTEXTS, PSM2_SHAREDCONTEXTS_MAX, and PSM2_TRACEMASK. These variable can be set by MCA if they are not already set in the environment. Signed-off-by: Nathan Hjelm --- ompi/mca/mtl/psm2/mtl_psm2_component.c | 169 +++++++++++++++++++++---- ompi/mca/mtl/psm2/mtl_psm2_types.h | 15 ++- 2 files changed, 155 insertions(+), 29 deletions(-) diff --git a/ompi/mca/mtl/psm2/mtl_psm2_component.c b/ompi/mca/mtl/psm2/mtl_psm2_component.c index c16acb6e3c..e899dde4f6 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_component.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_component.c @@ -11,8 +11,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. - * All rights reserved. + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -77,9 +77,129 @@ mca_mtl_psm2_component_t mca_mtl_psm2_component = { } }; +struct ompi_mtl_psm2_shadow_variable { + int variable_type; + void *storage; + mca_base_var_storage_t default_value; + const char *env_name; + mca_base_var_info_lvl_t info_level; + const char *mca_name; + const char *description; +}; + +struct ompi_mtl_psm2_shadow_variable ompi_mtl_psm2_shadow_variables[] = { + {MCA_BASE_VAR_TYPE_STRING, &ompi_mtl_psm2.psm2_devices, {.stringval = "self,shm,hfi"}, "PSM2_DEVICES", OPAL_INFO_LVL_3, + "devices", "Comma-delimited list of PSM2 devices. Valid values: self, shm, hfi (default: self,shm,hfi)"}, + {MCA_BASE_VAR_TYPE_STRING, &ompi_mtl_psm2.psm2_memory, {.stringval = "normal"}, "PSM2_MEMORY", OPAL_INFO_LVL_9, + "memory_model", "PSM2 memory usage mode (default: normal)"}, + {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_sendreqs_max, {.ulval = 1048576}, "PSM2_MQ_SENDREQS_MAX", OPAL_INFO_LVL_3, + "mq_sendreqs_max", "PSM2 maximum number of isend requests in flight (default: 1M)"}, + {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_recvreqs_max, {.ulval = 1048576}, "PSM2_MQ_RECVREQS_MAX", OPAL_INFO_LVL_3, + "mq_recvreqs_max", "PSM2 maximum number of irecv requests in flight (default: 1M)"}, + {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_rndv_hfi_threshold, {.ulval = 64000}, "PSM2_MQ_RNDV_HFI_THRESH", OPAL_INFO_LVL_3, + "hfi_eager_limit", "PSM2 eager to rendezvous threshold (default: 64000)"}, + {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_rndv_shm_threshold, {.ulval = 16000}, "PSM2_MQ_RNDV_SHM_THRESH", OPAL_INFO_LVL_3, + "shm_eager_limit", "PSM2 shared memory eager to rendezvous threshold (default: 16000)"}, + {MCA_BASE_VAR_TYPE_BOOL, &ompi_mtl_psm2.psm2_recvthread, {.boolval = true}, "PSM2_RCVTHREAD", OPAL_INFO_LVL_3, + "use_receive_thread", "Use PSM2 progress thread (default: true)"}, + {MCA_BASE_VAR_TYPE_BOOL, &ompi_mtl_psm2.psm2_shared_contexts, {.boolval = true}, "PSM2_SHAREDCONTEXTS", OPAL_INFO_LVL_6, + "use_shared_contexts", "Share PSM contexts between MPI processes (default: true)"}, + {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_shared_contexts_max, {.ulval = 8}, "PSM2_SHAREDCONTEXTS_MAX", OPAL_INFO_LVL_9, + "max_shared_contexts", "Maximum number of contexts available on a node (default: 8, max: 8)"}, + {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_tracemask, {.ulval = 1}, "PSM2_TRACEMASK", OPAL_INFO_LVL_9, + "trace_mask", "PSM2 tracemask value. See PSM2 documentation for accepted values (default: 1)"}, + {-1}, +}; + +static void ompi_mtl_psm2_set_shadow_env (struct ompi_mtl_psm2_shadow_variable *variable) +{ + mca_base_var_storage_t *storage = variable->storage; + char *env_value; + int ret; + + switch (variable->variable_type) { + case MCA_BASE_VAR_TYPE_BOOL: + ret = asprintf (&env_value, "%s=%s", variable->env_name, storage->boolval ? "YES" : "NO"); + break; + case MCA_BASE_VAR_TYPE_UNSIGNED_LONG: + if (0 == strcmp (variable->env_name, "PSM2_TRACEMASK")) { + /* PSM2 documentation shows the tracemask as a hexidecimal number. to be consitent + * use hexidecimal here. */ + ret = asprintf (&env_value, "%s=0x%lx", variable->env_name, storage->ulval); + } else { + ret = asprintf (&env_value, "%s=%lu", variable->env_name, storage->ulval); + } + break; + case MCA_BASE_VAR_TYPE_STRING: + ret = asprintf (&env_value, "%s=%s", variable->env_name, storage->stringval); + break; + } + + if (0 > ret) { + fprintf (stderr, "ERROR setting PSM2 environment variable: %s\n", variable->env_name); + } else { + putenv (env_value); + } +} + +static void ompi_mtl_psm2_register_shadow_env (struct ompi_mtl_psm2_shadow_variable *variable) +{ + mca_base_var_storage_t *storage = variable->storage; + char *env_value; + + env_value = getenv (variable->env_name); + switch (variable->variable_type) { + case MCA_BASE_VAR_TYPE_BOOL: + if (env_value) { + int tmp; + (void) mca_base_var_enum_bool.value_from_string (&mca_base_var_enum_bool, env_value, &tmp); + storage->boolval = !!tmp; + } else { + storage->boolval = variable->default_value.boolval; + } + break; + case MCA_BASE_VAR_TYPE_UNSIGNED_LONG: + if (env_value) { + storage->ulval = strtol (env_value, NULL, 0); + } else { + storage->ulval = variable->default_value.ulval; + } + break; + case MCA_BASE_VAR_TYPE_STRING: + if (env_value) { + storage->stringval = env_value; + } else { + storage->stringval = variable->default_value.stringval; + } + break; + } + + (void) mca_base_component_var_register (&mca_mtl_psm2_component.super.mtl_version, variable->mca_name, variable->description, + variable->variable_type, NULL, 0, 0, variable->info_level, MCA_BASE_VAR_SCOPE_READONLY, + variable->storage); +} + +static int +get_num_total_procs(int *out_ntp) +{ + *out_ntp = (int)ompi_process_info.num_procs; + return OMPI_SUCCESS; +} + +static int +get_num_local_procs(int *out_nlp) +{ + /* num_local_peers does not include us in + * its calculation, so adjust for that */ + *out_nlp = (int)(1 + ompi_process_info.num_local_peers); + return OMPI_SUCCESS; +} + static int ompi_mtl_psm2_component_register(void) { + int num_local_procs, num_total_procs; + ompi_mtl_psm2.connect_timeout = 180; (void) mca_base_component_var_register(&mca_mtl_psm2_component.super.mtl_version, "connect_timeout", @@ -89,8 +209,20 @@ ompi_mtl_psm2_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_mtl_psm2.connect_timeout); + + (void) get_num_local_procs(&num_local_procs); + (void) get_num_total_procs(&num_total_procs); + /* set priority high enough to beat ob1's default (also set higher than psm) */ - param_priority = 40; + if (num_local_procs == num_total_procs) { + /* disable hfi if all processes are local */ + setenv("PSM2_DEVICES", "self,shm", 0); + /* ob1 is much faster than psm2 with shared memory */ + param_priority = 10; + } else { + param_priority = 40; + } + (void) mca_base_component_var_register (&mca_mtl_psm2_component.super.mtl_version, "priority", "Priority of the PSM2 MTL component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -98,6 +230,11 @@ ompi_mtl_psm2_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, ¶m_priority); + + for (int i = 0 ; ompi_mtl_psm2_shadow_variables[i].variable_type >= 0 ; ++i) { + ompi_mtl_psm2_register_shadow_env (ompi_mtl_psm2_shadow_variables + i); + } + return OMPI_SUCCESS; } @@ -172,22 +309,6 @@ ompi_mtl_psm2_component_close(void) return OMPI_SUCCESS; } -static int -get_num_total_procs(int *out_ntp) -{ - *out_ntp = (int)ompi_process_info.num_procs; - return OMPI_SUCCESS; -} - -static int -get_num_local_procs(int *out_nlp) -{ - /* num_local_peers does not include us in - * its calculation, so adjust for that */ - *out_nlp = (int)(1 + ompi_process_info.num_local_peers); - return OMPI_SUCCESS; -} - static int get_local_rank(int *out_rank) { @@ -211,7 +332,6 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads, int verno_major = PSM2_VERNO_MAJOR; int verno_minor = PSM2_VERNO_MINOR; int local_rank = -1, num_local_procs = 0; - int num_total_procs = 0; /* Compute the total number of processes on this host and our local rank * on that node. We need to provide PSM2 with these values so it can @@ -226,11 +346,6 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads, opal_output(0, "Cannot determine local rank. Cannot continue.\n"); return NULL; } - if (OMPI_SUCCESS != get_num_total_procs(&num_total_procs)) { - opal_output(0, "Cannot determine total number of processes. " - "Cannot continue.\n"); - return NULL; - } err = psm2_error_register_handler(NULL /* no ep */, PSM2_ERRHANDLER_NOP); @@ -240,8 +355,8 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads, return NULL; } - if (num_local_procs == num_total_procs) { - setenv("PSM2_DEVICES", "self,shm", 0); + for (int i = 0 ; ompi_mtl_psm2_shadow_variables[i].variable_type >= 0 ; ++i) { + ompi_mtl_psm2_set_shadow_env (ompi_mtl_psm2_shadow_variables + i); } err = psm2_init(&verno_major, &verno_minor); diff --git a/ompi/mca/mtl/psm2/mtl_psm2_types.h b/ompi/mca/mtl/psm2/mtl_psm2_types.h index 31f0deb7ca..806447fefa 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_types.h +++ b/ompi/mca/mtl/psm2/mtl_psm2_types.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -10,8 +11,8 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 QLogic Corporation. All rights reserved. - * Copyright (c) 2011 Los Alamos National Security, LLC. - * All rights reserved. + * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * $COPYRIGHT$ * @@ -49,6 +50,16 @@ struct mca_mtl_psm2_module_t { psm2_mq_t mq; psm2_epid_t epid; psm2_epaddr_t epaddr; + char *psm2_devices; + char *psm2_memory; + unsigned long psm2_mq_sendreqs_max; + unsigned long psm2_mq_recvreqs_max; + unsigned long psm2_mq_rndv_hfi_threshold; + unsigned long psm2_mq_rndv_shm_threshold; + unsigned long psm2_shared_contexts_max; + unsigned long psm2_tracemask; + bool psm2_recvthread; + bool psm2_shared_contexts; }; typedef struct mca_mtl_psm2_module_t mca_mtl_psm2_module_t;