1
1
openmpi/ompi/mca/pml/teg/pml_teg.c
George Bosilca 3b52a31e1f Make some compilers quiet. Otherwise they complain about uninitialized variables even if
the logic inside prevent an execution path where they can be used uninitialized.

This commit was SVN r6560.
2005-07-20 06:47:10 +00:00

467 строки
17 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdlib.h>
#include <string.h>
#include "class/ompi_bitmap.h"
#include "mca/pml/pml.h"
#include "mca/ptl/ptl.h"
#include "mca/ptl/base/base.h"
#include "mca/ptl/base/ptl_base_comm.h"
#include "mca/ptl/base/ptl_base_header.h"
#include "mca/ptl/base/ptl_base_recvfrag.h"
#include "mca/ptl/base/ptl_base_sendfrag.h"
#include "pml_teg.h"
#include "pml_teg_component.h"
#include "pml_teg_proc.h"
#include "pml_teg_ptl.h"
#include "pml_teg_recvreq.h"
#include "pml_teg_sendreq.h"
#include "pml_teg_recvfrag.h"
mca_pml_teg_t mca_pml_teg = {
{
mca_pml_teg_add_procs,
mca_pml_teg_del_procs,
mca_pml_teg_enable,
mca_pml_teg_progress,
mca_pml_teg_add_comm,
mca_pml_teg_del_comm,
mca_pml_teg_irecv_init,
mca_pml_teg_irecv,
mca_pml_teg_recv,
mca_pml_teg_isend_init,
mca_pml_teg_isend,
mca_pml_teg_send,
mca_pml_teg_iprobe,
mca_pml_teg_probe,
mca_pml_teg_start
}
};
int mca_pml_teg_add_comm(ompi_communicator_t* comm)
{
/* allocate pml specific comm data */
mca_pml_ptl_comm_t* pml_comm = OBJ_NEW(mca_pml_ptl_comm_t);
if (NULL == pml_comm) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_pml_ptl_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count);
comm->c_pml_comm = pml_comm;
return OMPI_SUCCESS;
}
int mca_pml_teg_del_comm(ompi_communicator_t* comm)
{
OBJ_RELEASE(comm->c_pml_comm);
comm->c_pml_comm = NULL; /* make sure it's set to NULL */
return OMPI_SUCCESS;
}
static int ptl_exclusivity_compare(const void* arg1, const void* arg2)
{
mca_ptl_base_module_t* ptl1 = *(struct mca_ptl_base_module_t**)arg1;
mca_ptl_base_module_t* ptl2 = *(struct mca_ptl_base_module_t**)arg2;
if( ptl1->ptl_exclusivity > ptl2->ptl_exclusivity ) {
return -1;
} else if (ptl1->ptl_exclusivity == ptl2->ptl_exclusivity ) {
return 0;
} else {
return 1;
}
}
static int mca_pml_teg_add_ptls(void)
{
/* build an array of ptls and ptl modules */
mca_ptl_base_selected_module_t* selected_ptl;
size_t num_ptls = opal_list_get_size(&mca_ptl_base_modules_initialized);
size_t cache_bytes = 0;
mca_pml_teg.teg_num_ptl_modules = 0;
mca_pml_teg.teg_num_ptl_progress = 0;
mca_pml_teg.teg_num_ptl_components = 0;
mca_pml_teg.teg_ptl_modules = (mca_ptl_base_module_t **)malloc(sizeof(mca_ptl_base_module_t*) * num_ptls);
mca_pml_teg.teg_ptl_progress = (mca_ptl_base_component_progress_fn_t*)malloc(sizeof(mca_ptl_base_component_progress_fn_t) * num_ptls);
mca_pml_teg.teg_ptl_components = (mca_ptl_base_component_t **)malloc(sizeof(mca_ptl_base_component_t*) * num_ptls);
if (NULL == mca_pml_teg.teg_ptl_modules ||
NULL == mca_pml_teg.teg_ptl_progress ||
NULL == mca_pml_teg.teg_ptl_components) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for(selected_ptl = (mca_ptl_base_selected_module_t*)
opal_list_get_first(&mca_ptl_base_modules_initialized);
selected_ptl != (mca_ptl_base_selected_module_t*)
opal_list_get_end(&mca_ptl_base_modules_initialized);
selected_ptl = (mca_ptl_base_selected_module_t*)opal_list_get_next(selected_ptl)) {
mca_ptl_base_module_t *ptl = selected_ptl->pbsm_module;
size_t i;
mca_pml_teg.teg_ptl_modules[mca_pml_teg.teg_num_ptl_modules++] = ptl;
for(i=0; i<mca_pml_teg.teg_num_ptl_components; i++) {
if(mca_pml_teg.teg_ptl_components[i] == ptl->ptl_component) {
break;
}
}
if(i == mca_pml_teg.teg_num_ptl_components) {
mca_pml_teg.teg_ptl_components[mca_pml_teg.teg_num_ptl_components++] = ptl->ptl_component;
}
/*
*setup ptl
*/
/* set pointer to fragment matching logic routine, if this
* not already set by the ptl */
if( NULL == ptl->ptl_match)
ptl->ptl_match = mca_pml_teg_recv_frag_match;
ptl->ptl_send_progress = mca_pml_teg_send_request_progress;
ptl->ptl_recv_progress = mca_pml_teg_recv_request_progress;
ptl->ptl_stack = ptl;
ptl->ptl_base = NULL;
/* find maximum required size for cache */
if(ptl->ptl_cache_bytes > cache_bytes) {
cache_bytes = ptl->ptl_cache_bytes;
}
}
/* setup send fragments based on largest required send request */
ompi_free_list_init(
&mca_pml_teg.teg_send_requests,
sizeof(mca_pml_teg_send_request_t) + cache_bytes,
OBJ_CLASS(mca_pml_teg_send_request_t),
mca_pml_teg.teg_free_list_num,
mca_pml_teg.teg_free_list_max,
mca_pml_teg.teg_free_list_inc,
NULL);
/* sort ptl list by exclusivity */
qsort(mca_pml_teg.teg_ptl_modules, mca_pml_teg.teg_num_ptl_modules, sizeof(struct mca_ptl_t*), ptl_exclusivity_compare);
return OMPI_SUCCESS;
}
/*
* Called by the base PML in order to notify the PMLs about their selected status. After the init pass,
* the base module will choose one PML (depending on informations provided by the init function) and then
* it will call the pml_enable function with true (for the selected one) and with false for all the
* others. The selected one can then pass control information through to all PTL modules.
*/
int mca_pml_teg_enable(bool enable)
{
size_t i=0;
int value = enable, rc;
uint32_t proc_arch;
/* If I'm not selected then prepare for close */
if( false == enable ) return OMPI_SUCCESS;
/* recv requests */
ompi_free_list_init( &mca_pml_teg.teg_recv_requests,
sizeof(mca_pml_teg_recv_request_t),
OBJ_CLASS(mca_pml_teg_recv_request_t),
mca_pml_teg.teg_free_list_num,
mca_pml_teg.teg_free_list_max,
mca_pml_teg.teg_free_list_inc,
NULL );
/* I get selected. Publish my information */
proc_arch = ompi_proc_local()->proc_arch;
proc_arch = htonl(proc_arch);
rc = mca_base_modex_send(&mca_pml_teg_component.pmlm_version, &proc_arch, sizeof(proc_arch));
if(rc != OMPI_SUCCESS)
return rc;
/* Grab all the PTLs and prepare them */
mca_pml_teg_add_ptls();
/* and now notify them about the status */
for(i=0; i<mca_pml_teg.teg_num_ptl_components; i++) {
if(NULL != mca_pml_teg.teg_ptl_components[i]->ptlm_control) {
int rc = mca_pml_teg.teg_ptl_components[i]->ptlm_control(MCA_PTL_ENABLE,&value,sizeof(value));
if(rc != OMPI_SUCCESS)
return rc;
}
}
return OMPI_SUCCESS;
}
/*
* For each proc setup a datastructure that indicates the PTLs
* that can be used to reach the destination.
*
*/
int mca_pml_teg_add_procs(ompi_proc_t** procs, size_t nprocs)
{
size_t p;
ompi_bitmap_t reachable;
struct mca_ptl_base_peer_t** ptl_peers = NULL;
int rc;
size_t p_index;
if(nprocs == 0)
return OMPI_SUCCESS;
OBJ_CONSTRUCT(&reachable, ompi_bitmap_t);
rc = ompi_bitmap_init(&reachable, nprocs);
if(OMPI_SUCCESS != rc)
return rc;
/* iterate through each of the procs and set the peers architecture */
for(p=0; p<nprocs; p++) {
uint32_t* proc_arch;
size_t size = sizeof(uint32_t);
rc = mca_base_modex_recv(&mca_pml_teg_component.pmlm_version, procs[p],
(void**)&proc_arch, &size);
if(rc != OMPI_SUCCESS)
return rc;
if(size != sizeof(uint32_t))
return OMPI_ERROR;
procs[p]->proc_arch = ntohl(*proc_arch);
free(proc_arch);
}
/* attempt to add all procs to each ptl */
ptl_peers = (struct mca_ptl_base_peer_t **)malloc(nprocs * sizeof(struct mca_ptl_base_peer_t*));
for(p_index = 0; p_index < mca_pml_teg.teg_num_ptl_modules; p_index++) {
mca_ptl_base_module_t* ptl = mca_pml_teg.teg_ptl_modules[p_index];
int ptl_inuse = 0;
/* if the ptl can reach the destination proc it sets the
* corresponding bit (proc index) in the reachable bitmap
* and can return addressing information for each proc
* that is passed back to the ptl on data transfer calls
*/
ompi_bitmap_clear_all_bits(&reachable);
memset(ptl_peers, 0, nprocs * sizeof(struct mca_ptl_base_peer_t*));
rc = ptl->ptl_add_procs(ptl, nprocs, procs, ptl_peers, &reachable);
if(OMPI_SUCCESS != rc) {
free(ptl_peers);
return rc;
}
/* for each proc that is reachable - add the ptl to the procs array(s) */
for(p=0; p<nprocs; p++) {
if(ompi_bitmap_is_set_bit(&reachable, p)) {
ompi_proc_t *proc = procs[p];
mca_pml_proc_t* proc_pml = proc->proc_pml;
mca_ptl_proc_t* proc_ptl;
size_t size;
/* this ptl can be used */
ptl_inuse++;
/* initialize each proc */
if(NULL == proc_pml) {
/* allocate pml specific proc data */
proc_pml = OBJ_NEW(mca_pml_teg_proc_t);
if (NULL == proc_pml) {
opal_output(0, "mca_pml_teg_add_procs: unable to allocate resources");
free(ptl_peers);
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* preallocate space in array for max number of ptls */
mca_ptl_array_reserve(&proc_pml->proc_ptl_first, mca_pml_teg.teg_num_ptl_modules);
mca_ptl_array_reserve(&proc_pml->proc_ptl_next, mca_pml_teg.teg_num_ptl_modules);
proc_pml->proc_ompi = proc;
proc->proc_pml = proc_pml;
}
/* dont allow an additional PTL with a lower exclusivity ranking */
size = mca_ptl_array_get_size(&proc_pml->proc_ptl_next);
if(size > 0) {
proc_ptl = mca_ptl_array_get_index(&proc_pml->proc_ptl_next, size-1);
/* skip this ptl if the exclusivity is less than the previous */
if(proc_ptl->ptl->ptl_exclusivity > ptl->ptl_exclusivity) {
if(ptl_peers[p] != NULL) {
ptl->ptl_del_procs(ptl, 1, &proc, &ptl_peers[p]);
}
continue;
}
}
/* cache the ptl on the proc */
proc_ptl = mca_ptl_array_insert(&proc_pml->proc_ptl_next);
proc_ptl->ptl = ptl;
proc_ptl->ptl_peer = ptl_peers[p];
proc_ptl->ptl_weight = 0;
proc_pml->proc_ptl_flags |= ptl->ptl_flags;
}
}
if(ptl_inuse > 0 && NULL != ptl->ptl_component->ptlm_progress) {
size_t p;
bool found = false;
for(p=0; p<mca_pml_teg.teg_num_ptl_progress; p++) {
if(mca_pml_teg.teg_ptl_progress[p] == ptl->ptl_component->ptlm_progress) {
found = true;
break;
}
}
if(found == false) {
mca_pml_teg.teg_ptl_progress[mca_pml_teg.teg_num_ptl_progress] =
ptl->ptl_component->ptlm_progress;
mca_pml_teg.teg_num_ptl_progress++;
}
}
}
free(ptl_peers);
/* iterate back through procs and compute metrics for registered ptls */
for(p=0; p<nprocs; p++) {
ompi_proc_t *proc = procs[p];
mca_pml_proc_t* proc_pml = proc->proc_pml;
double total_bandwidth = 0;
uint32_t latency = 0;
size_t n_index;
size_t n_size;
/* skip over procs w/ no ptls registered */
if(NULL == proc_pml)
continue;
/* (1) determine the total bandwidth available across all ptls
* note that we need to do this here, as we may already have ptls configured
* (2) determine the highest priority ranking for latency
*/
n_size = mca_ptl_array_get_size(&proc_pml->proc_ptl_next);
for(n_index = 0; n_index < n_size; n_index++) {
struct mca_ptl_proc_t* proc_ptl = mca_ptl_array_get_index(&proc_pml->proc_ptl_next, n_index);
struct mca_ptl_base_module_t* ptl = proc_ptl->ptl;
total_bandwidth += proc_ptl->ptl->ptl_bandwidth;
if(ptl->ptl_latency > latency)
latency = ptl->ptl_latency;
}
/* (1) set the weight of each ptl as a percentage of overall bandwidth
* (2) copy all ptl instances at the highest priority ranking into the
* list of ptls used for first fragments
*/
for(n_index = 0; n_index < n_size; n_index++) {
struct mca_ptl_proc_t* proc_ptl = mca_ptl_array_get_index(&proc_pml->proc_ptl_next, n_index);
struct mca_ptl_base_module_t *ptl = proc_ptl->ptl;
double weight;
/* compute weighting factor for this ptl */
if(ptl->ptl_bandwidth)
weight = proc_ptl->ptl->ptl_bandwidth / total_bandwidth;
else
weight = 1.0 / n_size;
proc_ptl->ptl_weight = (int)(weight * 100);
/*
* save/create ptl extension for use by pml
*/
proc_ptl->ptl_base = ptl->ptl_base;
if (NULL == proc_ptl->ptl_base &&
ptl->ptl_cache_bytes > 0 &&
NULL != ptl->ptl_request_init &&
NULL != ptl->ptl_request_fini) {
mca_pml_base_ptl_t* ptl_base = OBJ_NEW(mca_pml_base_ptl_t);
ptl_base->ptl = ptl;
ptl_base->ptl_cache_size = ptl->ptl_cache_size;
proc_ptl->ptl_base = ptl->ptl_base = ptl_base;
}
/* check to see if this ptl is already in the array of ptls used for first
* fragments - if not add it.
*/
if(ptl->ptl_latency == latency) {
struct mca_ptl_proc_t* proc_new = mca_ptl_array_insert(&proc_pml->proc_ptl_first);
*proc_new = *proc_ptl;
}
}
}
return OMPI_SUCCESS;
}
/*
* iterate through each proc and notify any PTLs associated
* with the proc that it is/has gone away
*/
int mca_pml_teg_del_procs(ompi_proc_t** procs, size_t nprocs)
{
size_t p;
int rc;
for(p = 0; p < nprocs; p++) {
ompi_proc_t *proc = procs[p];
mca_pml_proc_t* proc_pml = proc->proc_pml;
size_t f_index, f_size;
size_t n_index, n_size;
/* notify each ptl that the proc is going away */
f_size = mca_ptl_array_get_size(&proc_pml->proc_ptl_first);
for(f_index = 0; f_index < f_size; f_index++) {
mca_ptl_proc_t* ptl_proc = mca_ptl_array_get_index(&proc_pml->proc_ptl_first, f_index);
mca_ptl_base_module_t* ptl = ptl_proc->ptl;
rc = ptl->ptl_del_procs(ptl,1,&proc,&ptl_proc->ptl_peer);
if(OMPI_SUCCESS != rc) {
return rc;
}
/* remove this from next array so that we dont call it twice w/
* the same address pointer
*/
n_size = mca_ptl_array_get_size(&proc_pml->proc_ptl_first);
for(n_index = 0; n_index < n_size; n_index++) {
mca_ptl_proc_t* next_proc = mca_ptl_array_get_index(&proc_pml->proc_ptl_next, n_index);
if(next_proc->ptl == ptl) {
memset(next_proc, 0, sizeof(mca_ptl_proc_t));
break;
}
}
}
/* notify each ptl that was not in the array of ptls for first fragments */
n_size = mca_ptl_array_get_size(&proc_pml->proc_ptl_next);
for(n_index = 0; n_index < n_size; n_index++) {
mca_ptl_proc_t* ptl_proc = mca_ptl_array_get_index(&proc_pml->proc_ptl_first, n_index);
mca_ptl_base_module_t* ptl = ptl_proc->ptl;
if (ptl != 0) {
rc = ptl->ptl_del_procs(ptl,1,&proc,&ptl_proc->ptl_peer);
if(OMPI_SUCCESS != rc)
return rc;
}
}
/* do any required cleanup */
OBJ_RELEASE(proc_pml);
proc->proc_pml = NULL;
}
return OMPI_SUCCESS;
}
int mca_pml_teg_component_fini(void)
{
/* FIX */
return OMPI_SUCCESS;
}