From a4557d4ed206b7b44abc80fc0ca90758689585dd Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 27 Jun 2015 17:17:31 -0700 Subject: [PATCH] Add new component to support OpenMP envars per request from IBM and LLNL --- orte/mca/rtc/omp/Makefile.am | 34 +++++ orte/mca/rtc/omp/owner.txt | 7 + orte/mca/rtc/omp/rtc_omp.c | 141 ++++++++++++++++++ orte/mca/rtc/omp/rtc_omp.h | 28 ++++ orte/mca/rtc/omp/rtc_omp_component.c | 70 +++++++++ .../data_type_support/orte_dt_packing_fns.c | 6 +- .../data_type_support/orte_dt_unpacking_fns.c | 8 +- 7 files changed, 292 insertions(+), 2 deletions(-) create mode 100644 orte/mca/rtc/omp/Makefile.am create mode 100644 orte/mca/rtc/omp/owner.txt create mode 100644 orte/mca/rtc/omp/rtc_omp.c create mode 100644 orte/mca/rtc/omp/rtc_omp.h create mode 100644 orte/mca/rtc/omp/rtc_omp_component.c diff --git a/orte/mca/rtc/omp/Makefile.am b/orte/mca/rtc/omp/Makefile.am new file mode 100644 index 0000000000..b698f7bb7f --- /dev/null +++ b/orte/mca/rtc/omp/Makefile.am @@ -0,0 +1,34 @@ +# +# Copyright (c) 2015 Intel, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + rtc_omp.c \ + rtc_omp.h \ + rtc_omp_component.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_rtc_omp_DSO +component_noinst = +component_install = mca_rtc_omp.la +else +component_noinst = libmca_rtc_omp.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_rtc_omp_la_SOURCES = $(sources) +mca_rtc_omp_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_rtc_omp_la_SOURCES =$(sources) +libmca_rtc_omp_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/rtc/omp/owner.txt b/orte/mca/rtc/omp/owner.txt new file mode 100644 index 0000000000..85b4416d20 --- /dev/null +++ b/orte/mca/rtc/omp/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: INTEL +status: active diff --git a/orte/mca/rtc/omp/rtc_omp.c b/orte/mca/rtc/omp/rtc_omp.c new file mode 100644 index 0000000000..9fae86d00c --- /dev/null +++ b/orte/mca/rtc/omp/rtc_omp.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2015 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" +#include "orte/types.h" + +#include +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#ifdef HAVE_STRING_H +#include +#endif /* HAVE_STRING_H */ + +#include "opal/mca/hwloc/hwloc.h" +#include "opal/util/argv.h" +#include "opal/util/opal_environ.h" + +#include "orte/util/show_help.h" +#include "orte/util/error_strings.h" +#include "orte/runtime/orte_globals.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmaps/rmaps_types.h" + +#include "orte/mca/rtc/base/base.h" +#include "rtc_omp.h" + +static int init(void); +static void finalize(void); +static void set(orte_job_t *jdata, + orte_proc_t *proc, + char ***environ_copy, + int write_fd); + +orte_rtc_base_module_t orte_rtc_omp_module = { + init, + finalize, + NULL, + set, + NULL +}; + +static int init(void) +{ + return ORTE_SUCCESS; +} + +static void finalize(void) +{ + return; +} + +static void set(orte_job_t *jobdat, + orte_proc_t *child, + char ***environ_copy, + int write_fd) +{ + char *param; + char *cpu_bitmap; + char **ranges, *ptr, *tmp, **newrange, **results; + int i, start, end; + + opal_output_verbose(2, orte_rtc_base_framework.framework_output, + "%s hwloc:set on child %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == child) ? "NULL" : ORTE_NAME_PRINT(&child->name)); + + if (NULL == jobdat || NULL == child) { + /* nothing for us to do */ + opal_output_verbose(2, orte_rtc_base_framework.framework_output, + "%s hwloc:set jobdat %s child %s - nothing to do", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == jobdat) ? "NULL" : ORTE_JOBID_PRINT(jobdat->jobid), + (NULL == child) ? "NULL" : ORTE_NAME_PRINT(&child->name)); + return; + } + + /* See if we are bound */ + cpu_bitmap = NULL; + if (!orte_get_attribute(&child->attributes, ORTE_PROC_CPU_BITMAP, (void**)&cpu_bitmap, OPAL_STRING) || + NULL == cpu_bitmap || 0 == strlen(cpu_bitmap)) { + /* we are not bound, so indicate that by setting OMP_PROC_BIND = false */ + opal_setenv("OMP_PROC_BIND", "0", true, environ_copy); + } else { + /* we are bound to something, so indicate that by setting OMP_PROC_BIND = true */ + opal_setenv("OMP_PROC_BIND", "1", true, environ_copy); + /* compose OMP_PLACES to indicate where we are bound - sadly, the OMP folks + * use a different syntax than HWLOC, an so we can't just provide the bitmap + * string. So we will traverse the bitmap and convert as required */ + ranges = opal_argv_split(cpu_bitmap, ','); + newrange = NULL; + results = NULL; + for (i=0; NULL != ranges[i]; i++) { + if (NULL == (ptr = strchr(ranges[i], '-'))) { + opal_argv_append_nosize(&newrange, ranges[i]); + } else { + /* terminate any existing range */ + if (NULL != newrange) { + param = opal_argv_join(newrange, ','); + asprintf(&tmp, "{%s}", param); + opal_argv_append_nosize(&results, tmp); + free(tmp); + free(param); + opal_argv_free(newrange); + newrange = NULL; + } + *ptr = '\0'; + ++ptr; + start = strtol(ranges[i], NULL, 10); + end = strtol(ptr, NULL, 10); + asprintf(&tmp, "{%d:%d}", start, end - start + 1); + opal_argv_append_nosize(&results, tmp); + free(tmp); + } + } + opal_argv_free(ranges); + if (NULL != newrange) { + param = opal_argv_join(newrange, ','); + asprintf(&tmp, "{%s}", param); + opal_argv_append_nosize(&results, tmp); + free(tmp); + free(param); + opal_argv_free(newrange); + newrange = NULL; + } + param = opal_argv_join(results, ','); + opal_argv_free(results); + opal_setenv("OMP_PLACES", param, true, environ_copy); + free(param); + } + if (NULL != cpu_bitmap) { + free(cpu_bitmap); + } +} diff --git a/orte/mca/rtc/omp/rtc_omp.h b/orte/mca/rtc/omp/rtc_omp.h new file mode 100644 index 0000000000..6ee42c869c --- /dev/null +++ b/orte/mca/rtc/omp/rtc_omp.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2015 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + */ +#ifndef ORTE_RTC_OMP_H +#define ORTE_RTC_OMP_H + +#include "orte_config.h" + +#include "orte/mca/rtc/rtc.h" + +BEGIN_C_DECLS + +ORTE_MODULE_DECLSPEC extern orte_rtc_base_component_t mca_rtc_omp_component; + +extern orte_rtc_base_module_t orte_rtc_omp_module; + + +END_C_DECLS + +#endif /* ORTE_RTC_OMP_H */ diff --git a/orte/mca/rtc/omp/rtc_omp_component.c b/orte/mca/rtc/omp/rtc_omp_component.c new file mode 100644 index 0000000000..8f2c766a02 --- /dev/null +++ b/orte/mca/rtc/omp/rtc_omp_component.c @@ -0,0 +1,70 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2015 Intel, Inc. All rights reserved + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include "opal/mca/base/base.h" + +#include "rtc_omp.h" + +/* + * Local functions + */ + +static int rtc_omp_query(mca_base_module_t **module, int *priority); +static int rtc_omp_register(void); + +static int my_priority; + +orte_rtc_base_component_t mca_rtc_omp_component = { + .base_version = { + ORTE_RTC_BASE_VERSION_1_0_0, + + .mca_component_name = "omp", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + .mca_query_component = rtc_omp_query, + .mca_register_component_params = rtc_omp_register, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, +}; + + +static int rtc_omp_register(void) +{ + mca_base_component_t *c = &mca_rtc_omp_component.base_version; + + /* set below the default */ + my_priority = 50; + (void) mca_base_component_var_register (c, "priority", "Priority of the OMP rtc component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &my_priority); + + return ORTE_SUCCESS; +} + + +static int rtc_omp_query(mca_base_module_t **module, int *priority) +{ + /* Only run on the HNP */ + + *priority = my_priority; + *module = (mca_base_module_t *)&orte_rtc_omp_module; + + return ORTE_SUCCESS; +} diff --git a/orte/runtime/data_type_support/orte_dt_packing_fns.c b/orte/runtime/data_type_support/orte_dt_packing_fns.c index 025b670d41..79a941d195 100644 --- a/orte/runtime/data_type_support/orte_dt_packing_fns.c +++ b/orte/runtime/data_type_support/orte_dt_packing_fns.c @@ -570,7 +570,11 @@ int orte_dt_pack_map(opal_buffer_t *buffer, const void *src, ORTE_ERROR_LOG(rc); return rc; } - + /* pack the cpus/rank */ + if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->cpus_per_rank), 1, OPAL_INT16))) { + ORTE_ERROR_LOG(rc); + return rc; + } /* pack the display map flag */ if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->display_map), 1, OPAL_BOOL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c index f0c54fe444..069416bb2d 100644 --- a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c +++ b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c @@ -641,7 +641,13 @@ int orte_dt_unpack_map(opal_buffer_t *buffer, void *dest, ORTE_ERROR_LOG(rc); return rc; } - + /* unpack the cpus/rank */ + n = 1; + if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, + &(maps[i]->cpus_per_rank), &n, OPAL_INT16))) { + ORTE_ERROR_LOG(rc); + return rc; + } /* unpack the display map flag */ n = 1; if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,