1
1
* fixup lookup of supported ops by name:
        in ompi 1.5.x the op string representation were changed from MPI_XXX to MPI_OP_XXX (relative to OMPI 1.4.x)
		* keep compat between diff versions of FCA
		* better error handling (return error if symbol not found)
		* register to opal_progress and call fca_progress API

This commit was SVN r23597.
Этот коммит содержится в:
Mike Dubman 2010-08-12 08:15:55 +00:00
родитель 5715a5b421
Коммит ba5bc9b674
3 изменённых файлов: 120 добавлений и 62 удалений

Просмотреть файл

@ -48,9 +48,10 @@ BEGIN_C_DECLS
*/ */
struct mca_coll_fca_fca_ops_t { struct mca_coll_fca_fca_ops_t {
/* Initialization / cleanup */ /* FCA Context operations */
int (*init)(fca_init_spec_t *spec, fca_t **context); int (*init)(fca_init_spec_t *spec, fca_t **context);
void (*cleanup)(fca_t *context); void (*cleanup)(fca_t *context);
void (*progress)(fca_t *context);
/* Fabric communicator creation */ /* Fabric communicator creation */
int (*comm_new)(fca_t *context, fca_comm_new_spec_t *spec, fca_comm_desc_t *comm_desc); int (*comm_new)(fca_t *context, fca_comm_new_spec_t *spec, fca_comm_desc_t *comm_desc);
@ -71,6 +72,7 @@ struct mca_coll_fca_fca_ops_t {
int (*do_barrier)(fca_comm_t *comm); int (*do_barrier)(fca_comm_t *comm);
/* Helper functions */ /* Helper functions */
unsigned long (*get_version)(void);
int (*maddr_ib_pton)(const char *mlid_str, const char *mgid_str, fca_mcast_addr_t *dst); int (*maddr_ib_pton)(const char *mlid_str, const char *mgid_str, fca_mcast_addr_t *dst);
int (*maddr_inet_pton)(int af, const char *src, fca_mcast_addr_t *dst); int (*maddr_inet_pton)(int af, const char *src, fca_mcast_addr_t *dst);
fca_init_spec_t *(*parse_spec_file)(char* spec_ini_file); fca_init_spec_t *(*parse_spec_file)(char* spec_ini_file);

Просмотреть файл

@ -61,81 +61,41 @@ mca_coll_fca_component_t mca_coll_fca_component = {
} }
}; };
#define FCA_MINOR_BIT (16UL)
#define FCA_MAJOR_BIT (24UL)
static int fca_open(void) #define FCA_API_CLEAR_MICRO(__x) ((__x>>FCA_MINOR_BIT)<<FCA_MINOR_BIT)
{ #define FCA_API_VER(__major,__minor) (__major<<FCA_MAJOR_BIT | __minor<<FCA_MINOR_BIT)
FCA_VERBOSE(2, "==>");
const mca_base_component_t *c = &mca_coll_fca_component.super.collm_version;
mca_base_param_reg_int(c, "priority",
"Priority of the fca coll component",
false, false,
80,
&mca_coll_fca_component.fca_priority);
mca_base_param_reg_int(c, "verbose",
"Verbose level of the fca coll component",
false, false,
0,
&mca_coll_fca_component.fca_verbose);
mca_base_param_reg_int(c, "enable",
"[1|0|] Enable/Disable Fabric Collective Accelerator",
false, false,
1,
&mca_coll_fca_component.fca_enable);
mca_base_param_reg_string(c, "spec_file",
"Path to the FCA configuration file fca_mpi_spec.ini",
false, false,
""COLL_FCA_HOME"/etc/fca_mpi_spec.ini",
&mca_coll_fca_component.fca_spec_file);
mca_base_param_reg_string(c, "library_path",
"FCA /path/to/libfca.so",
false, false,
""COLL_FCA_HOME"/lib/libfca.so",
&mca_coll_fca_component.fca_lib_path);
mca_base_param_reg_int(c, "np",
"[integer] Minimal allowed job's NP to activate FCA",
false, false,
64,
&mca_coll_fca_component.fca_np);
mca_coll_fca_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_coll_fca_output, mca_coll_fca_component.fca_verbose);
mca_coll_fca_component.fca_lib_handle = NULL;
mca_coll_fca_component.fca_context = NULL;
return OMPI_SUCCESS;
}
static int fca_close(void)
{
FCA_VERBOSE(2, "==>");
if (!mca_coll_fca_component.fca_lib_handle || !mca_coll_fca_component.fca_context)
return OMPI_SUCCESS;
mca_coll_fca_component.fca_ops.cleanup(mca_coll_fca_component.fca_context);
dlclose(mca_coll_fca_component.fca_lib_handle);
return OMPI_SUCCESS;
}
#define GET_FCA_SYM(__name) \ #define GET_FCA_SYM(__name) \
{ \ { \
mca_coll_fca_component.fca_ops.__name = dlsym(mca_coll_fca_component.fca_lib_handle, "fca_" #__name);\ mca_coll_fca_component.fca_ops.__name = dlsym(mca_coll_fca_component.fca_lib_handle, "fca_" #__name);\
if (!mca_coll_fca_component.fca_ops.__name) { \ if (!mca_coll_fca_component.fca_ops.__name) { \
FCA_ERROR("Symbol %s not found", "fca_" #__name); \ FCA_ERROR("Symbol %s not found", "fca_" #__name); \
return OMPI_ERROR; \
} \ } \
} }
/**
* Called from FCA blocking functions to progress MPI
*/
static void mca_coll_fca_progress_cb(void *arg) static void mca_coll_fca_progress_cb(void *arg)
{ {
opal_progress(); opal_progress();
} }
/**
* Called from MPI blocking functions to progress FCA
*/
static int mca_coll_fca_mpi_progress_cb(void)
{
if (!mca_coll_fca_component.fca_context)
return 0;
mca_coll_fca_component.fca_ops.progress(mca_coll_fca_component.fca_context);
return 0;
}
/** /**
* Initialize translation tables for FCA datatypes and operations * Initialize translation tables for FCA datatypes and operations
*/ */
@ -159,6 +119,7 @@ int mca_coll_fca_get_fca_lib(struct ompi_communicator_t *comm)
{ {
struct fca_init_spec *spec; struct fca_init_spec *spec;
int ret; int ret;
unsigned long fca_ver;
if (mca_coll_fca_component.fca_lib_handle) if (mca_coll_fca_component.fca_lib_handle)
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -169,7 +130,12 @@ int mca_coll_fca_get_fca_lib(struct ompi_communicator_t *comm)
return OMPI_ERROR; return OMPI_ERROR;
} }
memset(&mca_coll_fca_component.fca_ops, 0, sizeof(mca_coll_fca_component.fca_ops));
FCA_VERBOSE(1, "FCA Loaded from: %s", mca_coll_fca_component.fca_lib_path); FCA_VERBOSE(1, "FCA Loaded from: %s", mca_coll_fca_component.fca_lib_path);
GET_FCA_SYM(get_version);
fca_ver = FCA_API_CLEAR_MICRO(mca_coll_fca_component.fca_ops.get_version());
GET_FCA_SYM(init); GET_FCA_SYM(init);
GET_FCA_SYM(cleanup); GET_FCA_SYM(cleanup);
GET_FCA_SYM(comm_new); GET_FCA_SYM(comm_new);
@ -210,5 +176,81 @@ int mca_coll_fca_get_fca_lib(struct ompi_communicator_t *comm)
mca_coll_fca_component.fca_ops.free_init_spec(spec); mca_coll_fca_component.fca_ops.free_init_spec(spec);
mca_coll_fca_init_fca_translations(); mca_coll_fca_init_fca_translations();
if (fca_ver > FCA_API_VER(1,2)) {
GET_FCA_SYM(progress);
opal_progress_register(mca_coll_fca_mpi_progress_cb);
}
return OMPI_SUCCESS;
}
static void mca_coll_fca_close_fca_lib(void)
{
if (NULL != mca_coll_fca_component.fca_ops.progress) {
opal_progress_unregister(mca_coll_fca_mpi_progress_cb);
}
mca_coll_fca_component.fca_ops.cleanup(mca_coll_fca_component.fca_context);
mca_coll_fca_component.fca_context = NULL;
dlclose(mca_coll_fca_component.fca_lib_handle);
mca_coll_fca_component.fca_lib_handle = NULL;
}
static int fca_open(void)
{
FCA_VERBOSE(2, "==>");
const mca_base_component_t *c = &mca_coll_fca_component.super.collm_version;
mca_base_param_reg_int(c, "priority",
"Priority of the fca coll component",
false, false,
80,
&mca_coll_fca_component.fca_priority);
mca_base_param_reg_int(c, "verbose",
"Verbose level of the fca coll component",
false, false,
0,
&mca_coll_fca_component.fca_verbose);
mca_base_param_reg_int(c, "enable",
"[1|0|] Enable/Disable Fabric Collective Accelerator",
false, false,
1,
&mca_coll_fca_component.fca_enable);
mca_base_param_reg_string(c, "spec_file",
"Path to the FCA configuration file fca_mpi_spec.ini",
false, false,
""COLL_FCA_HOME"/etc/fca_mpi_spec.ini",
&mca_coll_fca_component.fca_spec_file);
mca_base_param_reg_string(c, "library_path",
"FCA /path/to/libfca.so",
false, false,
""COLL_FCA_HOME"/lib/libfca.so",
&mca_coll_fca_component.fca_lib_path);
mca_base_param_reg_int(c, "np",
"[integer] Minimal allowed job's NP to activate FCA",
false, false,
64,
&mca_coll_fca_component.fca_np);
mca_coll_fca_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_coll_fca_output, mca_coll_fca_component.fca_verbose);
mca_coll_fca_component.fca_lib_handle = NULL;
mca_coll_fca_component.fca_context = NULL;
return OMPI_SUCCESS;
}
static int fca_close(void)
{
FCA_VERBOSE(2, "==>");
if (!mca_coll_fca_component.fca_lib_handle || !mca_coll_fca_component.fca_context)
return OMPI_SUCCESS;
mca_coll_fca_close_fca_lib();
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -57,10 +57,23 @@ static mca_coll_fca_dtype_info_t* mca_coll_fca_get_dtype(ompi_datatype_t *dtype)
return dtype_info; return dtype_info;
} }
static void mca_coll_fca_get_op_name(ompi_op_t *op, char *name, int maxlen)
{
const char *ompi_op_prefix = "MPI_OP_";
const char *fca_op_prefix = "MPI_";
memset(name, 0, maxlen);
if (!strncmp(op->o_name, ompi_op_prefix, strlen(ompi_op_prefix)))
snprintf(name, maxlen, "%s%s", fca_op_prefix, op->o_name + strlen(ompi_op_prefix));
else
strncpy(name, op->o_name, maxlen);
}
static mca_coll_fca_op_info_t *mca_coll_fca_get_op(ompi_op_t *op) static mca_coll_fca_op_info_t *mca_coll_fca_get_op(ompi_op_t *op)
{ {
mca_coll_fca_op_info_t *op_info; mca_coll_fca_op_info_t *op_info;
int i, fca_op; int i, fca_op;
char opname[MPI_MAX_OBJECT_NAME + 1];
/* /*
* Find 'op' in the array by exhaustive search. We assume all valid ops are * Find 'op' in the array by exhaustive search. We assume all valid ops are
@ -72,7 +85,8 @@ static mca_coll_fca_op_info_t *mca_coll_fca_get_op(ompi_op_t *op)
if (op_info->mpi_op == op) { if (op_info->mpi_op == op) {
return op_info; return op_info;
} else if (op_info->mpi_op == MPI_OP_NULL) { } else if (op_info->mpi_op == MPI_OP_NULL) {
fca_op = mca_coll_fca_component.fca_ops.translate_mpi_op(op->o_name); mca_coll_fca_get_op_name(op, opname, MPI_MAX_OBJECT_NAME);
fca_op = mca_coll_fca_component.fca_ops.translate_mpi_op(opname);
if (fca_op < 0) if (fca_op < 0)
return NULL; return NULL;
op_info->mpi_op = op; op_info->mpi_op = op;