Minor improvements to the usnic BTL:
1. Fix ompi_info memory leak in usnic BTL: do not allocate memory in the component register function, because ompi_info only calls the component register function and then dlclose's the component -- it does not call component finalize. Instead, defer parsing the MCA param (and alloc'ing memory) until the component init function so that any allocated memory can be freed in the component close function. 1. Also add a new check to ensure that we actually have some part numbers to check. Add a show_help message if we don't find any vendor part IDs to check. 1. Add a verbose output if usnic disqualifies itself from selection because THREAD_MULTIPLE was specified. cmr=v1.7.5:reviewer=dgoodell This commit was SVN r30073.
Этот коммит содержится в:
родитель
365ce2cd03
Коммит
6003702a51
@ -122,6 +122,7 @@ typedef struct ompi_btl_usnic_component_t {
|
||||
|
||||
char *if_include;
|
||||
char *if_exclude;
|
||||
char *vendor_part_ids_string;
|
||||
uint32_t *vendor_part_ids;
|
||||
|
||||
/* Cached hashed version of my RTE proc name (to stuff in
|
||||
|
@ -200,10 +200,6 @@ static int usnic_component_close(void)
|
||||
in their destructor */
|
||||
OBJ_DESTRUCT(&mca_btl_usnic_component.usnic_procs);
|
||||
|
||||
if (NULL != mca_btl_usnic_component.vendor_part_ids) {
|
||||
free(mca_btl_usnic_component.vendor_part_ids);
|
||||
}
|
||||
|
||||
if (usnic_clock_timer_event_set) {
|
||||
opal_event_del(&usnic_clock_timer_event);
|
||||
usnic_clock_timer_event_set = false;
|
||||
@ -299,6 +295,68 @@ static int check_reg_mem_basics(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Parse the value returned in the btl_usnic_vendor_part_ids MCA param
|
||||
*/
|
||||
static int parse_vendor_part_ids(void)
|
||||
{
|
||||
int i, ret = OMPI_SUCCESS;
|
||||
char *str = mca_btl_usnic_component.vendor_part_ids_string;
|
||||
char **parts = NULL;
|
||||
|
||||
/* Defensive programming; this should never actually happen */
|
||||
if (NULL == mca_btl_usnic_component.vendor_part_ids_string) {
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
mca_btl_usnic_component.vendor_part_ids_string = strdup("<empty>");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Make sure the string starts with (optional whitespace and) a
|
||||
number: stop at the first non-whitespace and see if it's a
|
||||
digit. */
|
||||
for (i = 0; !isspace(str[i]) && '\0' != str[i]; ++i) {
|
||||
continue;
|
||||
}
|
||||
if (!isdigit(str[i])) {
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Ok, we have at least one number */
|
||||
parts =
|
||||
opal_argv_split(mca_btl_usnic_component.vendor_part_ids_string, ',');
|
||||
mca_btl_usnic_component.vendor_part_ids =
|
||||
calloc(sizeof(uint32_t), opal_argv_count(parts) + 1);
|
||||
if (NULL == mca_btl_usnic_component.vendor_part_ids) {
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto out;
|
||||
}
|
||||
for (i = 0, str = parts[0]; NULL != str; str = parts[++i]) {
|
||||
mca_btl_usnic_component.vendor_part_ids[i] = (uint32_t) atoi(str);
|
||||
}
|
||||
|
||||
out:
|
||||
if (NULL != parts) {
|
||||
opal_argv_free(parts);
|
||||
}
|
||||
|
||||
/* If the parameter was bad, show a help message */
|
||||
if (OMPI_ERR_BAD_PARAM == ret) {
|
||||
opal_show_help("help-mpi-btl-usnic.txt",
|
||||
"bad value for btl_usnic_vendor_part_ids",
|
||||
true,
|
||||
mca_btl_usnic_component.vendor_part_ids_string);
|
||||
}
|
||||
|
||||
/* Free the value that we got back from the MCA param (the MCA var
|
||||
system maintains its own interncal copy) */
|
||||
free(mca_btl_usnic_component.vendor_part_ids_string);
|
||||
mca_btl_usnic_component.vendor_part_ids_string = NULL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int read_device_sysfs(ompi_btl_usnic_module_t *module, const char *name)
|
||||
{
|
||||
int ret, fd;
|
||||
@ -421,6 +479,8 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
|
||||
/* Currently refuse to run if MPI_THREAD_MULTIPLE is enabled */
|
||||
if (ompi_mpi_thread_multiple && !mca_btl_base_thread_multiple_override) {
|
||||
opal_output_verbose(5, USNIC_OUT,
|
||||
"btl:usnic: MPI_THREAD_MULTIPLE not supported; skipping this component");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -439,6 +499,12 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Parse the vendor part IDs returned in
|
||||
btl_usnic_vendor_part_ids */
|
||||
if (OMPI_SUCCESS != parse_vendor_part_ids()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
* Below this line, we assume that usnic is loaded on all procs,
|
||||
* and therefore we will guarantee to the the modex send, even if
|
||||
|
@ -128,8 +128,7 @@ static int reg_int(const char* param_name,
|
||||
|
||||
int ompi_btl_usnic_component_register(void)
|
||||
{
|
||||
int i, tmp, ret = 0;
|
||||
char *str, **parts;
|
||||
int tmp, ret = 0;
|
||||
static int max_modules;
|
||||
static int stats_relative;
|
||||
static int want_numa_device_assignment;
|
||||
@ -142,7 +141,6 @@ int ompi_btl_usnic_component_register(void)
|
||||
static int eager_limit;
|
||||
static int rndv_eager_limit;
|
||||
static int pack_lazy_threshold;
|
||||
static char *vendor_part_ids;
|
||||
|
||||
#define CHECK(expr) do {\
|
||||
tmp = (expr); \
|
||||
@ -166,20 +164,18 @@ int ompi_btl_usnic_component_register(void)
|
||||
REGSTR_EMPTY_OK, OPAL_INFO_LVL_1));
|
||||
|
||||
/* Cisco Sereno-based VICs are part ID 207 */
|
||||
vendor_part_ids = NULL;
|
||||
CHECK(reg_string("vendor_part_ids",
|
||||
"Comma-delimited list verbs vendor part IDs to search for/use",
|
||||
"207", &vendor_part_ids, 0, OPAL_INFO_LVL_5));
|
||||
parts = opal_argv_split(vendor_part_ids, ',');
|
||||
mca_btl_usnic_component.vendor_part_ids =
|
||||
calloc(sizeof(uint32_t), opal_argv_count(parts) + 1);
|
||||
if (NULL == mca_btl_usnic_component.vendor_part_ids) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
for (i = 0, str = parts[0]; NULL != str; str = parts[++i]) {
|
||||
mca_btl_usnic_component.vendor_part_ids[i] = (uint32_t) atoi(str);
|
||||
}
|
||||
opal_argv_free(parts);
|
||||
"207",
|
||||
&mca_btl_usnic_component.vendor_part_ids_string,
|
||||
0, OPAL_INFO_LVL_5));
|
||||
/* Initialize the array to NULL here so that it can be checked as
|
||||
a sentinel value later. The value will be analyzed in
|
||||
component_init because it requires allocating memory. This is
|
||||
not a problem for MPI processes, but ompi_info only calls this
|
||||
register function without calling component finalize (meaning:
|
||||
the allocated memory would not be freed/leaked). */
|
||||
mca_btl_usnic_component.vendor_part_ids = NULL;
|
||||
|
||||
CHECK(reg_int("stats",
|
||||
"A non-negative integer specifying the frequency at which each USNIC BTL will output statistics (default: 0 seconds, meaning that statistics are disabled)",
|
||||
|
@ -184,3 +184,11 @@ remote host.
|
||||
Local MTU: %d
|
||||
Remote host: %s
|
||||
Remote MTU: %d
|
||||
#
|
||||
[bad value for btl_usnic_vendor_part_ids]
|
||||
A non-numeric value was specified for the btl_usnic_vendor_part_ids
|
||||
MCA parameter. This parameter is supposed to be a comma-delimited
|
||||
list of decimal verbs vendor part IDs. This usnic BTL will be ignored
|
||||
for this job.
|
||||
|
||||
btl_usnic_vendor_part_ids value: %s
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user