From ba5bc9b674ef3ff108c64a9697ad014480261315 Mon Sep 17 00:00:00 2001 From: Mike Dubman Date: Thu, 12 Aug 2010 08:15:55 +0000 Subject: [PATCH] fixes: * fixup lookup of supported ops by name: in ompi 1.5.x the op string representation were changed from MPI_XXX to MPI_OP_XXX (relative to OMPI 1.4.x) * keep compat between diff versions of FCA * better error handling (return error if symbol not found) * register to opal_progress and call fca_progress API This commit was SVN r23597. --- ompi/mca/coll/fca/coll_fca.h | 4 +- ompi/mca/coll/fca/coll_fca_component.c | 162 ++++++++++++++++--------- ompi/mca/coll/fca/coll_fca_ops.c | 16 ++- 3 files changed, 120 insertions(+), 62 deletions(-) diff --git a/ompi/mca/coll/fca/coll_fca.h b/ompi/mca/coll/fca/coll_fca.h index 5c5c87cfc8..1f4f09331d 100644 --- a/ompi/mca/coll/fca/coll_fca.h +++ b/ompi/mca/coll/fca/coll_fca.h @@ -48,9 +48,10 @@ BEGIN_C_DECLS */ struct mca_coll_fca_fca_ops_t { - /* Initialization / cleanup */ + /* FCA Context operations */ int (*init)(fca_init_spec_t *spec, fca_t **context); void (*cleanup)(fca_t *context); + void (*progress)(fca_t *context); /* Fabric communicator creation */ int (*comm_new)(fca_t *context, fca_comm_new_spec_t *spec, fca_comm_desc_t *comm_desc); @@ -71,6 +72,7 @@ struct mca_coll_fca_fca_ops_t { int (*do_barrier)(fca_comm_t *comm); /* Helper functions */ + unsigned long (*get_version)(void); int (*maddr_ib_pton)(const char *mlid_str, const char *mgid_str, fca_mcast_addr_t *dst); int (*maddr_inet_pton)(int af, const char *src, fca_mcast_addr_t *dst); fca_init_spec_t *(*parse_spec_file)(char* spec_ini_file); diff --git a/ompi/mca/coll/fca/coll_fca_component.c b/ompi/mca/coll/fca/coll_fca_component.c index aed7607c69..9761fe3e95 100644 --- a/ompi/mca/coll/fca/coll_fca_component.c +++ b/ompi/mca/coll/fca/coll_fca_component.c @@ -61,81 +61,41 @@ mca_coll_fca_component_t mca_coll_fca_component = { } }; +#define FCA_MINOR_BIT (16UL) +#define FCA_MAJOR_BIT (24UL) -static int fca_open(void) -{ - FCA_VERBOSE(2, "==>"); - - const mca_base_component_t *c = &mca_coll_fca_component.super.collm_version; - - mca_base_param_reg_int(c, "priority", - "Priority of the fca coll component", - false, false, - 80, - &mca_coll_fca_component.fca_priority); - - mca_base_param_reg_int(c, "verbose", - "Verbose level of the fca coll component", - false, false, - 0, - &mca_coll_fca_component.fca_verbose); - - mca_base_param_reg_int(c, "enable", - "[1|0|] Enable/Disable Fabric Collective Accelerator", - false, false, - 1, - &mca_coll_fca_component.fca_enable); - - mca_base_param_reg_string(c, "spec_file", - "Path to the FCA configuration file fca_mpi_spec.ini", - false, false, - ""COLL_FCA_HOME"/etc/fca_mpi_spec.ini", - &mca_coll_fca_component.fca_spec_file); - - mca_base_param_reg_string(c, "library_path", - "FCA /path/to/libfca.so", - false, false, - ""COLL_FCA_HOME"/lib/libfca.so", - &mca_coll_fca_component.fca_lib_path); - - mca_base_param_reg_int(c, "np", - "[integer] Minimal allowed job's NP to activate FCA", - false, false, - 64, - &mca_coll_fca_component.fca_np); - - mca_coll_fca_output = opal_output_open(NULL); - opal_output_set_verbosity(mca_coll_fca_output, mca_coll_fca_component.fca_verbose); - mca_coll_fca_component.fca_lib_handle = NULL; - mca_coll_fca_component.fca_context = NULL; - return OMPI_SUCCESS; -} - -static int fca_close(void) -{ - FCA_VERBOSE(2, "==>"); - - if (!mca_coll_fca_component.fca_lib_handle || !mca_coll_fca_component.fca_context) - return OMPI_SUCCESS; - - mca_coll_fca_component.fca_ops.cleanup(mca_coll_fca_component.fca_context); - dlclose(mca_coll_fca_component.fca_lib_handle); - return OMPI_SUCCESS; -} +#define FCA_API_CLEAR_MICRO(__x) ((__x>>FCA_MINOR_BIT)< FCA_API_VER(1,2)) { + GET_FCA_SYM(progress); + opal_progress_register(mca_coll_fca_mpi_progress_cb); + } + return OMPI_SUCCESS; +} + +static void mca_coll_fca_close_fca_lib(void) +{ + if (NULL != mca_coll_fca_component.fca_ops.progress) { + opal_progress_unregister(mca_coll_fca_mpi_progress_cb); + } + mca_coll_fca_component.fca_ops.cleanup(mca_coll_fca_component.fca_context); + mca_coll_fca_component.fca_context = NULL; + dlclose(mca_coll_fca_component.fca_lib_handle); + mca_coll_fca_component.fca_lib_handle = NULL; +} + +static int fca_open(void) +{ + FCA_VERBOSE(2, "==>"); + + const mca_base_component_t *c = &mca_coll_fca_component.super.collm_version; + + mca_base_param_reg_int(c, "priority", + "Priority of the fca coll component", + false, false, + 80, + &mca_coll_fca_component.fca_priority); + + mca_base_param_reg_int(c, "verbose", + "Verbose level of the fca coll component", + false, false, + 0, + &mca_coll_fca_component.fca_verbose); + + mca_base_param_reg_int(c, "enable", + "[1|0|] Enable/Disable Fabric Collective Accelerator", + false, false, + 1, + &mca_coll_fca_component.fca_enable); + + mca_base_param_reg_string(c, "spec_file", + "Path to the FCA configuration file fca_mpi_spec.ini", + false, false, + ""COLL_FCA_HOME"/etc/fca_mpi_spec.ini", + &mca_coll_fca_component.fca_spec_file); + + mca_base_param_reg_string(c, "library_path", + "FCA /path/to/libfca.so", + false, false, + ""COLL_FCA_HOME"/lib/libfca.so", + &mca_coll_fca_component.fca_lib_path); + + mca_base_param_reg_int(c, "np", + "[integer] Minimal allowed job's NP to activate FCA", + false, false, + 64, + &mca_coll_fca_component.fca_np); + + mca_coll_fca_output = opal_output_open(NULL); + opal_output_set_verbosity(mca_coll_fca_output, mca_coll_fca_component.fca_verbose); + mca_coll_fca_component.fca_lib_handle = NULL; + mca_coll_fca_component.fca_context = NULL; + return OMPI_SUCCESS; +} + +static int fca_close(void) +{ + FCA_VERBOSE(2, "==>"); + + if (!mca_coll_fca_component.fca_lib_handle || !mca_coll_fca_component.fca_context) + return OMPI_SUCCESS; + + mca_coll_fca_close_fca_lib(); return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/fca/coll_fca_ops.c b/ompi/mca/coll/fca/coll_fca_ops.c index 6ab0b154c5..ad929325fd 100644 --- a/ompi/mca/coll/fca/coll_fca_ops.c +++ b/ompi/mca/coll/fca/coll_fca_ops.c @@ -57,10 +57,23 @@ static mca_coll_fca_dtype_info_t* mca_coll_fca_get_dtype(ompi_datatype_t *dtype) return dtype_info; } +static void mca_coll_fca_get_op_name(ompi_op_t *op, char *name, int maxlen) +{ + const char *ompi_op_prefix = "MPI_OP_"; + const char *fca_op_prefix = "MPI_"; + + memset(name, 0, maxlen); + if (!strncmp(op->o_name, ompi_op_prefix, strlen(ompi_op_prefix))) + snprintf(name, maxlen, "%s%s", fca_op_prefix, op->o_name + strlen(ompi_op_prefix)); + else + strncpy(name, op->o_name, maxlen); +} + static mca_coll_fca_op_info_t *mca_coll_fca_get_op(ompi_op_t *op) { mca_coll_fca_op_info_t *op_info; int i, fca_op; + char opname[MPI_MAX_OBJECT_NAME + 1]; /* * Find 'op' in the array by exhaustive search. We assume all valid ops are @@ -72,7 +85,8 @@ static mca_coll_fca_op_info_t *mca_coll_fca_get_op(ompi_op_t *op) if (op_info->mpi_op == op) { return op_info; } else if (op_info->mpi_op == MPI_OP_NULL) { - fca_op = mca_coll_fca_component.fca_ops.translate_mpi_op(op->o_name); + mca_coll_fca_get_op_name(op, opname, MPI_MAX_OBJECT_NAME); + fca_op = mca_coll_fca_component.fca_ops.translate_mpi_op(opname); if (fca_op < 0) return NULL; op_info->mpi_op = op;