1
1

mostly another hack around for PML selection, allows CM be select itself if an

MTL is available, if not OB1 is used. Still prevents DR and OB1 from stomping
on each other though. 

This commit was SVN r13481.
Этот коммит содержится в:
Galen Shipman 2007-02-03 02:01:18 +00:00
родитель 4e506e69e5
Коммит a94101fa62
8 изменённых файлов: 89 добавлений и 47 удалений

Просмотреть файл

@ -377,8 +377,10 @@ mca_btl_base_module_t** mca_btl_mx_component_init(int *num_btl_modules,
/* First check if MX is available ... */
if( MX_SUCCESS != (status = mx_init()) ) {
opal_output( 0, "mca_btl_mx_component_init: mx_init() failed with status = %d (%s)\n",
status, mx_strerror(status) );
if(MX_ALREADY_INITIALIZED != status) {
opal_output( 0, "mca_btl_mx_component_init: mx_init() failed with status = %d (%s)\n",
status, mx_strerror(status) );
}
mca_pml_base_modex_send(&mca_btl_mx_component.super.btl_version,
NULL, 0);
return NULL;

Просмотреть файл

@ -126,11 +126,14 @@ ompi_mtl_mx_component_init(bool enable_progress_threads,
/* initialize the mx library */
mx_return = mx_init();
if(mx_return!=MX_SUCCESS) {
opal_output(ompi_mtl_base_output,
"Error in mx_init (error %s)\n",
mx_strerror(mx_return));
return NULL;
if(MX_SUCCESS != mx_return){
if(MX_ALREADY_INITIALIZED != mx_return) {
opal_output(ompi_mtl_base_output,
"Error in mx_init (error %s)\n",
mx_strerror(mx_return));
} else {
return NULL;
}
}
ret = ompi_mtl_mx_module_init();

Просмотреть файл

@ -23,7 +23,7 @@
#include "opal/mca/mca.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/class/ompi_pointer_array.h"
/*
* Global functions for the PML
@ -46,7 +46,7 @@ OMPI_DECLSPEC extern int mca_pml_base_output;
OMPI_DECLSPEC extern opal_list_t mca_pml_base_components_available;
OMPI_DECLSPEC extern mca_pml_base_component_t mca_pml_base_selected_component;
OMPI_DECLSPEC extern mca_pml_base_module_t mca_pml;
OMPI_DECLSPEC extern char* mca_pml_base_pml;
OMPI_DECLSPEC extern ompi_pointer_array_t mca_pml_base_pml;
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -69,7 +69,7 @@ mca_pml_base_module_t mca_pml = {
opal_list_t mca_pml_base_components_available;
mca_pml_base_component_t mca_pml_base_selected_component;
char *mca_pml_base_pml;
ompi_pointer_array_t mca_pml_base_pml;
/**
* Function for finding and opening either all MCA components, or the one
@ -77,7 +77,7 @@ char *mca_pml_base_pml;
*/
int mca_pml_base_open(void)
{
char* default_pml;
char* default_pml = NULL;
/* Open up all available components */
@ -95,26 +95,32 @@ int mca_pml_base_open(void)
/**
* Right now our selection of BTLs is completely broken. If we have
* multiple PMLs we will open all BTLs several times, leading to
* multiple PMLs that use BTLs than we will open all BTLs several times, leading to
* undefined behaviors. The simplest solution, at least until we
* figure out the correct way to do it, is to force a default value
* in the mca_pml_base_pml global.
* figure out the correct way to do it, is to force a default PML that
* uses BTLs and any other PMLs that do not in the mca_pml_base_pml array.
*/
OBJ_CONSTRUCT(&mca_pml_base_pml, ompi_pointer_array_t);
#if MCA_pml_DIRECT_CALL
default_pml = stringify(MCA_pml_DIRECT_CALL_COMPONENT);
ompi_pointer_array_add(&mca_pml_base_pml,
stringify(MCA_pml_DIRECT_CALL_COMPONENT));
#else
default_pml = "ob1";
mca_base_param_reg_string_name("pml", NULL,
"Specify a specific PML to use",
false, false, "", &default_pml);
if(0 == strlen(default_pml)){
ompi_pointer_array_add(&mca_pml_base_pml, strdup("ob1"));
ompi_pointer_array_add(&mca_pml_base_pml, strdup("cm"));
} else {
ompi_pointer_array_add(&mca_pml_base_pml, strdup(default_pml));
}
#endif
mca_base_param_lookup_string(
mca_base_param_register_string("pml",
NULL,
NULL,
NULL,
default_pml),
&mca_pml_base_pml);
if( NULL == mca_pml_base_pml )
mca_pml_base_pml = default_pml;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -47,14 +47,15 @@ typedef struct opened_component_t {
int mca_pml_base_select(bool enable_progress_threads,
bool enable_mpi_threads)
{
int priority = 0, best_priority = 0;
int i, priority = 0, best_priority = 0;
bool skip_pml = false;
opal_list_item_t *item = NULL;
mca_base_component_list_item_t *cli = NULL;
mca_pml_base_component_t *component = NULL, *best_component = NULL;
mca_pml_base_module_t *module = NULL, *best_module = NULL;
opal_list_t opened;
opened_component_t *om = NULL;
/* Traverse the list of available components; call their init
functions. */
@ -67,16 +68,22 @@ int mca_pml_base_select(bool enable_progress_threads,
item = opal_list_get_next(item) ) {
cli = (mca_base_component_list_item_t *) item;
component = (mca_pml_base_component_t *) cli->cli_component;
skip_pml = false;
/* if there is an include list - item must be in the list to be included */
if( (NULL != mca_pml_base_pml) &&
(strcmp(component->pmlm_version.mca_component_name, mca_pml_base_pml) != 0) ) {
opal_output_verbose( 10, mca_pml_base_output,
"select: component %s not in the include list",
component->pmlm_version.mca_component_name );
for( i = 0; i < ompi_pointer_array_get_size(&mca_pml_base_pml); i++) {
if((strcmp(component->pmlm_version.mca_component_name,
(char *) ompi_pointer_array_get_item(&mca_pml_base_pml, i)) != 0)) {
opal_output_verbose( 10, mca_pml_base_output,
"select: component %s not in the include list",
component->pmlm_version.mca_component_name );
skip_pml = true;
} else {
skip_pml = false;
}
}
if(skip_pml) {
continue;
}
if (NULL == component->pmlm_init) {
opal_output_verbose( 10, mca_pml_base_output,
"select: no init function; ignoring component %s",
@ -87,6 +94,7 @@ int mca_pml_base_select(bool enable_progress_threads,
"select: initializing %s component %s",
component->pmlm_version.mca_type_name,
component->pmlm_version.mca_component_name );
priority = best_priority;
module = component->pmlm_init(&priority, enable_progress_threads,
enable_mpi_threads);
if (NULL == module) {
@ -113,16 +121,21 @@ int mca_pml_base_select(bool enable_progress_threads,
}
/* Finished querying all components. Check for the bozo case. */
if( NULL == best_component ) {
opal_show_help("help-mca-base.txt", "find-available:none-found", true, "pml");
if( NULL != mca_pml_base_pml ) {
orte_errmgr.error_detected(1, "PML %s cannot be selected", mca_pml_base_pml, NULL);
} else {
for( i = 0; i < ompi_pointer_array_get_size(&mca_pml_base_pml); i++) {
orte_errmgr.error_detected(1, "PML %s cannot be selected", (char*) ompi_pointer_array_get_item(&mca_pml_base_pml, i), NULL);
}
if(0 == i) {
orte_errmgr.error_detected(2, "No pml component available. This shouldn't happen.", NULL);
}
}
opal_output_verbose( 10, mca_pml_base_output,
"selected %s best priority %d\n",
best_component->pmlm_version.mca_component_name, best_priority);
/* Finalize all non-selected components */
for (item = opal_list_remove_first(&opened);

Просмотреть файл

@ -61,7 +61,7 @@ mca_pml_base_component_1_0_0_t mca_pml_cm_component = {
static int free_list_num = 0;
static int free_list_max = 0;
static int free_list_inc = 0;
static int default_priority = 0;
static int default_priority = 2;
static int
mca_pml_cm_component_open(void)
@ -100,7 +100,7 @@ mca_pml_cm_component_open(void)
"CM PML selection priority",
false,
false,
1,
2,
&default_priority);
return OMPI_SUCCESS;
@ -120,13 +120,20 @@ mca_pml_cm_component_init(int* priority,
bool enable_mpi_threads)
{
int ret;
if((*priority) > default_priority) {
*priority = default_priority;
return NULL;
}
*priority = default_priority;
opal_output_verbose( 10, 0,
"in cm pml priority is %d\n", *priority);
/* find a useable MTL */
ret = ompi_mtl_base_select(enable_progress_threads, enable_mpi_threads);
if (OMPI_SUCCESS != ret) return NULL;
if (OMPI_SUCCESS != ret) {
*priority = -1;
return NULL;
}
/* update our tag / context id max values based on MTL
information */
ompi_pml_cm.super.pml_max_contextid = ompi_mtl->mtl_max_contextid;

Просмотреть файл

@ -192,8 +192,12 @@ mca_pml_base_module_t* mca_pml_dr_component_init(int* priority,
bool enable_progress_threads,
bool enable_mpi_threads)
{
if((*priority) > mca_pml_dr.priority) {
*priority = mca_pml_dr.priority;
return NULL;
}
*priority = mca_pml_dr.priority;
/* buffered send */
if(OMPI_SUCCESS != mca_pml_base_bsend_init(enable_mpi_threads)) {
opal_output(0, "mca_pml_dr_component_init: mca_pml_bsend_init failed\n");

Просмотреть файл

@ -219,6 +219,13 @@ mca_pml_base_module_t* mca_pml_ob1_component_init(int* priority,
bool enable_progress_threads,
bool enable_mpi_threads)
{
opal_output_verbose( 10, 0,
"in ob1, my priority is %d\n", mca_pml_ob1.priority);
if((*priority) > mca_pml_ob1.priority) {
*priority = mca_pml_ob1.priority;
return NULL;
}
*priority = mca_pml_ob1.priority;
/* buffered send */