call it what it is...
we are looking at subnet_id's and we are counting active ports per subnet. move subnet count out of procs loop,, no need to do it there... This commit was SVN r13105.
Этот коммит содержится в:
родитель
075161afa9
Коммит
df099a4731
@ -85,9 +85,19 @@ int mca_btl_openib_add_procs(
|
||||
{
|
||||
mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*)btl;
|
||||
int i,j, rc;
|
||||
int remote_subnets;
|
||||
int local_subnets;
|
||||
int rem_subnet_id_port_cnt;
|
||||
int lcl_subnet_id_port_cnt = 0;
|
||||
int btl_rank = 0;
|
||||
|
||||
for(j=0; j < mca_btl_openib_component.ib_num_btls; j++){
|
||||
if(mca_btl_openib_component.openib_btls[j].port_info.subnet_id
|
||||
== openib_btl->port_info.subnet_id) {
|
||||
lcl_subnet_id_port_cnt++;
|
||||
}
|
||||
if(openib_btl == &(mca_btl_openib_component.openib_btls[j])) {
|
||||
btl_rank = j;
|
||||
}
|
||||
}
|
||||
for(i = 0; i < (int) nprocs; i++) {
|
||||
|
||||
struct ompi_proc_t* ompi_proc = ompi_procs[i];
|
||||
@ -99,42 +109,33 @@ int mca_btl_openib_add_procs(
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
remote_subnets = 0;
|
||||
rem_subnet_id_port_cnt = 0;
|
||||
/* check if the remote proc has a reachable subnet first */
|
||||
BTL_VERBOSE(("got %d port_infos \n", ib_proc->proc_port_count));
|
||||
for(j = 0; j < (int) ib_proc->proc_port_count; j++){
|
||||
BTL_VERBOSE(("got a subnet %016x\n",
|
||||
ib_proc->proc_ports[j].subnet));
|
||||
if(ib_proc->proc_ports[j].subnet ==
|
||||
openib_btl->port_info.subnet) {
|
||||
ib_proc->proc_ports[j].subnet_id));
|
||||
if(ib_proc->proc_ports[j].subnet_id ==
|
||||
openib_btl->port_info.subnet_id) {
|
||||
BTL_VERBOSE(("Got a matching subnet!\n"));
|
||||
remote_subnets++;
|
||||
rem_subnet_id_port_cnt ++;
|
||||
}
|
||||
}
|
||||
if(!remote_subnets) {
|
||||
if(!rem_subnet_id_port_cnt ) {
|
||||
/* no use trying to communicate with this endpointlater */
|
||||
BTL_VERBOSE(("No matching subnet was found, moving on.. \n"));
|
||||
BTL_VERBOSE(("No matching subnet id was found, moving on.. \n"));
|
||||
continue;
|
||||
}
|
||||
|
||||
local_subnets = 0;
|
||||
for(j=0; j < mca_btl_openib_component.ib_num_btls; j++){
|
||||
if(mca_btl_openib_component.openib_btls[j].port_info.subnet
|
||||
== openib_btl->port_info.subnet) {
|
||||
local_subnets++;
|
||||
}
|
||||
if(openib_btl == &(mca_btl_openib_component.openib_btls[j])) {
|
||||
btl_rank = j;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
num_endpoints = remote_subnets / local_subnets +
|
||||
(btl_rank < (remote_subnets / local_subnets)) ? 1:0;
|
||||
num_endpoints = rem_subnet_id_port_cnt / lcl_subnet_id_port_cnt +
|
||||
(btl_rank < (rem_subnet_id_port_cnt / lcl_subnet_id_port_cnt)) ? 1:0;
|
||||
|
||||
#endif
|
||||
if(remote_subnets < local_subnets &&
|
||||
btl_rank >= remote_subnets) {
|
||||
BTL_VERBOSE(("Not enough remote subnets, moving on.. \n"));
|
||||
if(rem_subnet_id_port_cnt < lcl_subnet_id_port_cnt &&
|
||||
btl_rank >= rem_subnet_id_port_cnt ) {
|
||||
BTL_VERBOSE(("Not enough remote ports on this subnet id, moving on.. \n"));
|
||||
continue;
|
||||
|
||||
}
|
||||
@ -151,7 +152,7 @@ int mca_btl_openib_add_procs(
|
||||
}
|
||||
|
||||
endpoint->endpoint_btl = openib_btl;
|
||||
endpoint->subnet = openib_btl->port_info.subnet;
|
||||
endpoint->subnet_id = openib_btl->port_info.subnet_id;
|
||||
rc = mca_btl_openib_proc_insert(ib_proc, endpoint);
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
OBJ_RELEASE(endpoint);
|
||||
|
@ -148,7 +148,7 @@ struct mca_btl_openib_port_info_t {
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t padding[4];
|
||||
#endif
|
||||
uint64_t subnet;
|
||||
uint64_t subnet_id;
|
||||
};
|
||||
typedef struct mca_btl_openib_port_info_t mca_btl_openib_port_info_t;
|
||||
|
||||
@ -178,7 +178,7 @@ struct mca_btl_openib_module_t {
|
||||
mca_btl_base_module_t super; /**< base PTL interface */
|
||||
bool btl_inited;
|
||||
mca_btl_openib_recv_reg_t ib_reg[256];
|
||||
mca_btl_openib_port_info_t port_info; /* contains only the subnet right now */
|
||||
mca_btl_openib_port_info_t port_info; /* contains only the subnet id right now */
|
||||
mca_btl_openib_hca_t *hca;
|
||||
uint8_t port_num; /**< ID of the PORT */
|
||||
struct ibv_cq *ib_cq[2];
|
||||
|
@ -277,14 +277,14 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
|
||||
mca_btl_openib_module_t *openib_btl;
|
||||
mca_btl_base_selected_module_t *ib_selected;
|
||||
union ibv_gid gid;
|
||||
uint64_t subnet;
|
||||
uint64_t subnet_id;
|
||||
|
||||
ibv_query_gid(hca->ib_dev_context, port_num, 0, &gid);
|
||||
subnet = ntoh64(gid.global.subnet_prefix);
|
||||
BTL_VERBOSE(("my subnet is %016x\n", subnet));
|
||||
subnet_id = ntoh64(gid.global.subnet_prefix);
|
||||
BTL_VERBOSE(("my subnet_id is %016x\n", subnet_id));
|
||||
|
||||
if(mca_btl_openib_component.ib_num_btls > 0 &&
|
||||
IB_DEFAULT_GID_PREFIX == subnet &&
|
||||
IB_DEFAULT_GID_PREFIX == subnet_id &&
|
||||
mca_btl_openib_component.warn_default_gid_prefix) {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "default subnet prefix",
|
||||
true, orte_system_info.nodename);
|
||||
@ -315,8 +315,8 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
|
||||
openib_btl->port_num = (uint8_t) port_num;
|
||||
openib_btl->lid = lid;
|
||||
openib_btl->src_path_bits = lid - ib_port_attr->lid;
|
||||
/* store the subnet for multi-nic support */
|
||||
openib_btl->port_info.subnet = subnet;
|
||||
/* store the subnet_id for multi-nic support */
|
||||
openib_btl->port_info.subnet_id = subnet_id;
|
||||
openib_btl->port_info.mtu = hca->mtu;
|
||||
openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbfunc = btl_openib_control;
|
||||
openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbdata = NULL;
|
||||
|
@ -278,7 +278,7 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
|
||||
endpoint->rem_info.rem_lid = 0;
|
||||
endpoint->rem_info.rem_psn_hp = 0;
|
||||
endpoint->rem_info.rem_psn_lp = 0;
|
||||
endpoint->rem_info.rem_subnet = 0;
|
||||
endpoint->rem_info.rem_subnet_id = 0;
|
||||
endpoint->rem_info.rem_mtu = 0;
|
||||
}
|
||||
|
||||
@ -349,7 +349,7 @@ static int mca_btl_openib_endpoint_send_connect_data(mca_btl_base_endpoint_t* en
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = orte_dss.pack(buffer, &endpoint->subnet, 1, ORTE_UINT64);
|
||||
rc = orte_dss.pack(buffer, &endpoint->subnet_id, 1, ORTE_UINT64);
|
||||
if(rc != ORTE_SUCCESS) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -376,7 +376,7 @@ static int mca_btl_openib_endpoint_send_connect_data(mca_btl_base_endpoint_t* en
|
||||
endpoint->lcl_qp[BTL_OPENIB_HP_QP]->qp_num,
|
||||
endpoint->lcl_qp[BTL_OPENIB_LP_QP]->qp_num,
|
||||
endpoint->endpoint_btl->lid,
|
||||
endpoint->subnet));
|
||||
endpoint->subnet_id));
|
||||
|
||||
|
||||
|
||||
@ -617,7 +617,7 @@ static void mca_btl_openib_endpoint_recv(
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
rc = orte_dss.unpack(buffer, &rem_info.rem_subnet, &cnt, ORTE_UINT64);
|
||||
rc = orte_dss.unpack(buffer, &rem_info.rem_subnet_id, &cnt, ORTE_UINT64);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
@ -637,7 +637,7 @@ static void mca_btl_openib_endpoint_recv(
|
||||
rem_info.rem_qp_num_hp,
|
||||
rem_info.rem_qp_num_lp,
|
||||
rem_info.rem_lid,
|
||||
rem_info.rem_subnet));
|
||||
rem_info.rem_subnet_id));
|
||||
|
||||
for(ib_proc = (mca_btl_openib_proc_t*)
|
||||
opal_list_get_first(&mca_btl_openib_component.ib_procs);
|
||||
@ -669,7 +669,7 @@ static void mca_btl_openib_endpoint_recv(
|
||||
port_info = ib_proc->proc_ports[i];
|
||||
ib_endpoint = ib_proc->proc_endpoints[i];
|
||||
if(!ib_endpoint->rem_info.rem_lid &&
|
||||
ib_endpoint->subnet == rem_info.rem_subnet) {
|
||||
ib_endpoint->subnet_id == rem_info.rem_subnet_id) {
|
||||
/* found a match based on subnet! */
|
||||
found = true;
|
||||
break;
|
||||
|
@ -78,8 +78,8 @@ struct mca_btl_openib_rem_info_t {
|
||||
uint32_t rem_psn_lp;
|
||||
/* Remote processes port sequence number (Low and High) */
|
||||
|
||||
uint64_t rem_subnet;
|
||||
/* subnet of remote process */
|
||||
uint64_t rem_subnet_id;
|
||||
/* subnet id of remote process */
|
||||
|
||||
/* MTU of remote process */
|
||||
uint32_t rem_mtu;
|
||||
@ -146,7 +146,7 @@ struct mca_btl_base_endpoint_t {
|
||||
int32_t sd_credits[2]; /**< number of send wqe entries being used to return credits */
|
||||
int32_t sd_wqe[2]; /**< number of available send wqe entries */
|
||||
|
||||
uint64_t subnet; /**< subnet of this endpoint*/
|
||||
uint64_t subnet_id; /**< subnet id of this endpoint*/
|
||||
|
||||
int32_t eager_recv_count; /**< number of eager received */
|
||||
mca_btl_openib_eager_rdma_remote_t eager_rdma_remote;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user