1
1

After fix 22669 was applied which allowed for more than 8 interfaces, it was discovered that the connection algorithm did not scale. Therefore, switch to a simpler algorithm in the extremely rare case when one has more than 8 interfaces. This commit fixes trac:2301.

This commit was SVN r22976.

The following Trac tickets were found above:
  Ticket 2301 --> https://svn.open-mpi.org/trac/ompi/ticket/2301
Этот коммит содержится в:
Rolf vandeVaart 2010-04-14 14:18:35 +00:00
родитель eeccf2f15c
Коммит 892091c77d
2 изменённых файлов: 55 добавлений и 18 удалений

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2010 Oracle and/or its affiliates. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -580,24 +580,54 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
return OMPI_ERR_OUT_OF_RESOURCE;
}
memset(a, 0, perm_size * sizeof(int));
max_assignment_cardinality = -1;
max_assignment_weight = -1;
visit(0, -1, perm_size, a);
/* Can only find the best set of connections when the number of
* interfaces is not too big. When it gets larger, we fall back
* to a simpler and faster (and not as optimal) algorithm. */
if (perm_size <= MAX_PERMUTATION_INTERFACES) {
memset(a, 0, perm_size * sizeof(int));
max_assignment_cardinality = -1;
max_assignment_weight = -1;
visit(0, -1, perm_size, a);
rc = OMPI_ERR_UNREACH;
for(i = 0; i < perm_size; ++i) {
if(best_assignment[i] > num_peer_interfaces
|| weights[i][best_assignment[i]] == CQ_NO_CONNECTION
|| peer_interfaces[best_assignment[i]]->inuse
|| NULL == peer_interfaces[best_assignment[i]]) {
continue;
}
peer_interfaces[best_assignment[i]]->inuse++;
btl_endpoint->endpoint_addr = best_addr[i][best_assignment[i]];
btl_endpoint->endpoint_addr->addr_inuse++;
rc = OMPI_SUCCESS;
break;
rc = OMPI_ERR_UNREACH;
for(i = 0; i < perm_size; ++i) {
if(best_assignment[i] > num_peer_interfaces
|| weights[i][best_assignment[i]] == CQ_NO_CONNECTION
|| peer_interfaces[best_assignment[i]]->inuse
|| NULL == peer_interfaces[best_assignment[i]]) {
continue;
}
peer_interfaces[best_assignment[i]]->inuse++;
btl_endpoint->endpoint_addr = best_addr[i][best_assignment[i]];
btl_endpoint->endpoint_addr->addr_inuse++;
rc = OMPI_SUCCESS;
break;
}
} else {
enum mca_btl_tcp_connection_quality max;
int i_max, j_max;
/* Find the best connection that is not in use. Save away
* the indices of the best location. */
max = CQ_NO_CONNECTION;
for(i=0; i<num_local_interfaces; ++i) {
for(j=0; j<num_peer_interfaces; ++j) {
if (!peer_interfaces[j]->inuse) {
if (weights[i][j] > max) {
max = weights[i][j];
i_max = i;
j_max = j;
}
}
}
}
/* Now see if there is a some type of connection available. */
rc = OMPI_ERR_UNREACH;
if (CQ_NO_CONNECTION != max) {
peer_interfaces[j_max]->inuse++;
btl_endpoint->endpoint_addr = best_addr[i_max][j_max];
btl_endpoint->endpoint_addr->addr_inuse++;
rc = OMPI_SUCCESS;
}
}
for(i = 0; i < perm_size; ++i) {

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -65,6 +66,12 @@ OBJ_CLASS_DECLARATION(mca_btl_tcp_proc_t);
/* the maximum number of kernel interfaces we can handle */
#define MAX_KERNEL_INTERFACES 8
/* The maximum number of interfaces that we can have and use the
* recursion code for determining the best set of connections. When
* the number is greater than this, we switch to a simpler algorithm
* to speed things up. */
#define MAX_PERMUTATION_INTERFACES 8
/*
* FIXME: this should probably be part of an ompi list, so we need the
* appropriate definitions