1
1
openmpi/ompi/mca/btl/usnic/btl_usnic_graph.c
Dave Goodell db14c706ce usnic: add graph utility code
This code is intended to support usNIC interface matching functionality.
We currently view that problem as essentially the "Assignment Problem"
(http://en.wikipedia.org/wiki/Assignment_problem), for which there are
many possible solution approaches, including flow-network analysis.  In
the future, we might transition to a more nuanced view of the problem
which would likely also be flow-network based.

To this end, the current code focuses on providing one major algorithm
to the core usnic BTL: `ompi_btl_usnic_solve_bipartite_assignment`.  It
also exposes several typical and necessary functions for constructing,
manipulating, and querying weighted, directed graphs.

Reviewed-by: Jeff Squyres <jsquyres@cisco.com>

cmr=v1.7.5:ticket=trac:4253

This commit was SVN r30847.

The following Trac tickets were found above:
  Ticket 4253 --> https://svn.open-mpi.org/trac/ompi/ticket/4253
2014-02-26 07:49:54 +00:00

1044 строки
31 KiB
C

/*
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdlib.h>
#include "opal_stdint.h"
#include "opal/class/opal_pointer_array.h"
#include "ompi/constants.h"
/* mainly for BTL_ERROR */
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "btl_usnic.h"
#include "btl_usnic_graph.h"
#include "btl_usnic_compat.h"
#define GRAPH_DEBUG 0
#if GRAPH_DEBUG
# define GRAPH_DEBUG_OUT(args) BTL_OUTPUT(args)
#else
# define GRAPH_DEBUG_OUT(args) do {} while(0)
#endif
#define MAX_COST INT64_MAX
struct ompi_btl_usnic_edge_t {
opal_object_t super;
opal_list_item_t outbound_li;
opal_list_item_t inbound_li;
/** source of this edge */
int source;
/** v_index of target of this edge */
int target;
/** cost (weight) of this edge */
int64_t cost;
/**
* (flow-network) capacity of this edge. Zero-capacity edges essentially do
* not exist and will be ignored by most of the algorithms implemented here.
*/
int capacity;
/** any other information associated with this edge */
void *e_data;
};
struct ompi_btl_usnic_vertex_t {
/** index in the graph's array of vertices */
int v_index;
/** any other information associated with the vertex */
void *v_data;
/** linked list of edges for which this vertex is a source */
opal_list_t out_edges;
/** linked list of edges for which this vertex is a target */
opal_list_t in_edges;
};
struct ompi_btl_usnic_graph_t {
/** number of vertices currently in this graph */
int num_vertices;
/** vertices in this graph (with number of set elements == num_vertices) */
opal_pointer_array_t vertices;
/** index of the source vertex, or -1 if not present */
int source_idx;
/** index of the sink vertex, or -1 if not present */
int sink_idx;
/** user callback to clean up the v_data */
ompi_btl_usnic_cleanup_fn_t v_data_cleanup_fn;
/** user callback to clean up the e_data */
ompi_btl_usnic_cleanup_fn_t e_data_cleanup_fn;
};
#ifndef MAX
# define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
#ifndef MIN
# define MIN(a,b) ((a) < (b) ? (a) : (b))
#endif
#define f(i,j) flow[n*i + j]
#define LIST_FOREACH_CONTAINED(item, list, type, member) \
for (item = container_of( (list)->opal_list_sentinel.opal_list_next, type, member ); \
&item->member != &(list)->opal_list_sentinel; \
item = container_of( \
((opal_list_item_t *) (&item->member))->opal_list_next, type, member ))
#define LIST_FOREACH_SAFE_CONTAINED(item, next, list, type, member) \
for (item = container_of( (list)->opal_list_sentinel.opal_list_next, type, member ), \
next = container_of( \
((opal_list_item_t *) (&item->member))->opal_list_next, type, member ); \
&item->member != &(list)->opal_list_sentinel; \
item = next, \
next = container_of( \
((opal_list_item_t *) (&item->member))->opal_list_next, type, member ))
#define NUM_VERTICES(g) (g->num_vertices)
#define CHECK_VERTEX_RANGE(g,v) \
do { \
if ((v) < 0 || \
(v) >= NUM_VERTICES(g)) { \
return OMPI_ERR_BAD_PARAM; \
} \
} while (0)
/* cast away any constness of &g->vertices b/c the opal_pointer_array API is
* not const-correct */
#define V_ID_TO_PTR(g, v_id) \
((ompi_btl_usnic_vertex_t *) \
opal_pointer_array_get_item((opal_pointer_array_t *)&g->vertices, v_id))
#define FOREACH_OUT_EDGE(g,v_id,e_ptr) \
LIST_FOREACH_CONTAINED(e_ptr, \
&(V_ID_TO_PTR(g, v_id)->out_edges), \
ompi_btl_usnic_edge_t, \
outbound_li)
#define FOREACH_IN_EDGE(g,v_id,e_ptr) \
LIST_FOREACH_CONTAINED(e_ptr, \
&(V_ID_TO_PTR(g, v_id)->in_edges), \
ompi_btl_usnic_edge_t, \
inbound_li)
/* Iterate over (u,v) edge pairs along the given path, where path is defined
* by the predecessor array "pred". Stops when a -1 predecessor is
* encountered. Note: because it is a *predecessor* array, the traversal
* starts at the sink and progresses towards the source. */
#define FOREACH_UV_ON_PATH(pred, source, sink, u, v) \
for (u = pred[sink], v = sink; u != -1; v = u, u = pred[u])
/* ensure that (a+b<=max) */
static inline void check_add64_overflow(int64_t a, int64_t b)
{
assert(!((b > 0) && (a > (INT64_MAX - b))) &&
!((b < 0) && (a < (INT64_MIN - b))));
}
static void edge_constructor(ompi_btl_usnic_edge_t *e)
{
OBJ_CONSTRUCT(&e->outbound_li, opal_list_item_t);
OBJ_CONSTRUCT(&e->inbound_li, opal_list_item_t);
}
static void edge_destructor(ompi_btl_usnic_edge_t *e)
{
OBJ_DESTRUCT(&e->outbound_li);
OBJ_DESTRUCT(&e->inbound_li);
}
OBJ_CLASS_DECLARATION(ompi_btl_usnic_edge_t);
OBJ_CLASS_INSTANCE(ompi_btl_usnic_edge_t, opal_object_t,
edge_constructor, edge_destructor);
static void dump_vec(const char *name, int *vec, int n)
__opal_attribute_unused__;
static void dump_vec(const char *name, int *vec, int n)
{
int i;
fprintf(stderr, "%s={", name);
for (i = 0; i < n; ++i) {
fprintf(stderr, "[%d]=%2d, ", i, vec[i]);
}
fprintf(stderr, "}\n");
}
static void dump_vec64(const char *name, int64_t *vec, int n)
__opal_attribute_unused__;
static void dump_vec64(const char *name, int64_t *vec, int n)
{
int i;
fprintf(stderr, "%s={", name);
for (i = 0; i < n; ++i) {
fprintf(stderr, "[%d]=%2" PRIi64 ", ", i, vec[i]);
}
fprintf(stderr, "}\n");
}
static void dump_flow(int *flow, int n)
__opal_attribute_unused__;
static void dump_flow(int *flow, int n)
{
int u, v;
fprintf(stderr, "flow={\n");
for (u = 0; u < n; ++u) {
fprintf(stderr, "u=%d| ", u);
for (v = 0; v < n; ++v) {
fprintf(stderr, "%2d,", f(u,v));
}
fprintf(stderr, "\n");
}
fprintf(stderr, "}\n");
}
static int get_capacity(ompi_btl_usnic_graph_t *g, int source, int target)
{
ompi_btl_usnic_edge_t *e;
CHECK_VERTEX_RANGE(g, source);
CHECK_VERTEX_RANGE(g, target);
FOREACH_OUT_EDGE(g, source, e) {
assert(e->source == source);
if (e->target == target) {
return e->capacity;
}
}
return 0;
}
static int
set_capacity(ompi_btl_usnic_graph_t *g, int source, int target, int cap)
{
ompi_btl_usnic_edge_t *e;
CHECK_VERTEX_RANGE(g, source);
CHECK_VERTEX_RANGE(g, target);
FOREACH_OUT_EDGE(g, source, e) {
assert(e->source == source);
if (e->target == target) {
e->capacity = cap;
return OMPI_SUCCESS;
}
}
return OMPI_ERR_NOT_FOUND;
}
static void free_vertex(ompi_btl_usnic_graph_t *g,
ompi_btl_usnic_vertex_t *v)
{
if (NULL != v) {
if (NULL != g->v_data_cleanup_fn && NULL != v->v_data) {
g->v_data_cleanup_fn(v->v_data);
}
free(v);
}
}
int ompi_btl_usnic_gr_create(ompi_btl_usnic_cleanup_fn_t v_data_cleanup_fn,
ompi_btl_usnic_cleanup_fn_t e_data_cleanup_fn,
ompi_btl_usnic_graph_t **g_out)
{
int err;
ompi_btl_usnic_graph_t *g = NULL;
if (NULL == g_out) {
return OMPI_ERR_BAD_PARAM;
}
*g_out = NULL;
g = calloc(1, sizeof(*g));
if (NULL == g) {
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
err = OMPI_ERR_OUT_OF_RESOURCE;
goto out_free_g;
}
g->source_idx = -1;
g->sink_idx = -1;
g->v_data_cleanup_fn = v_data_cleanup_fn;
g->e_data_cleanup_fn = e_data_cleanup_fn;
/* now that we essentially have an empty graph, add vertices to it */
OBJ_CONSTRUCT(&g->vertices, opal_pointer_array_t);
err = opal_pointer_array_init(&g->vertices, 0, INT_MAX, 32);
if (OPAL_SUCCESS != err) {
goto out_free_g;
}
*g_out = g;
return OMPI_SUCCESS;
out_free_g:
free(g);
return err;
}
int ompi_btl_usnic_gr_free(ompi_btl_usnic_graph_t *g)
{
int i;
ompi_btl_usnic_edge_t *e, *next;
ompi_btl_usnic_vertex_t *v;
/* remove all edges from all out_edges lists */
for (i = 0; i < NUM_VERTICES(g); ++i) {
v = V_ID_TO_PTR(g, i);
LIST_FOREACH_SAFE_CONTAINED(e, next, &v->out_edges,
ompi_btl_usnic_edge_t, outbound_li) {
opal_list_remove_item(&v->out_edges, &e->outbound_li);
OBJ_RELEASE(e);
}
}
/* now remove from all in_edges lists and free the edge */
for (i = 0; i < NUM_VERTICES(g); ++i) {
v = V_ID_TO_PTR(g, i);
LIST_FOREACH_SAFE_CONTAINED(e, next, &v->in_edges,
ompi_btl_usnic_edge_t, inbound_li) {
opal_list_remove_item(&v->in_edges, &e->inbound_li);
if (NULL != g->e_data_cleanup_fn && NULL != e->e_data) {
g->e_data_cleanup_fn(e->e_data);
}
OBJ_RELEASE(e);
}
free_vertex(g, V_ID_TO_PTR(g, i));
opal_pointer_array_set_item(&g->vertices, i, NULL);
}
g->num_vertices = 0;
OBJ_DESTRUCT(&g->vertices);
free(g);
return OMPI_SUCCESS;
}
int ompi_btl_usnic_gr_clone(const ompi_btl_usnic_graph_t *g,
bool copy_user_data,
ompi_btl_usnic_graph_t **g_clone_out)
{
int err;
int i;
int index;
ompi_btl_usnic_graph_t *gx;
ompi_btl_usnic_edge_t *e;
if (NULL == g_clone_out) {
return OMPI_ERR_BAD_PARAM;
}
*g_clone_out = NULL;
if (copy_user_data) {
BTL_ERROR(("user data copy requested but not yet supported"));
abort();
return OMPI_ERR_FATAL;
}
gx = NULL;
err = ompi_btl_usnic_gr_create(NULL, NULL, &gx);
if (OMPI_SUCCESS != err) {
return err;
}
assert(NULL != gx);
/* reconstruct all vertices */
for (i = 0; i < NUM_VERTICES(g); ++i) {
err = ompi_btl_usnic_gr_add_vertex(gx, NULL, &index);
if (OMPI_SUCCESS != err) {
goto out_free_gx;
}
assert(index == i);
}
/* now reconstruct all the edges (iterate by source vertex only to avoid
* double-adding) */
for (i = 0; i < NUM_VERTICES(g); ++i) {
FOREACH_OUT_EDGE(g, i, e) {
assert(i == e->source);
err = ompi_btl_usnic_gr_add_edge(gx, e->source, e->target,
e->cost, e->capacity, NULL);
if (OMPI_SUCCESS != err) {
goto out_free_gx;
}
}
}
*g_clone_out = gx;
return OMPI_SUCCESS;
out_free_gx:
/* we don't reach in and manipulate gx's state directly, so it should be
* safe to use the standard free function */
ompi_btl_usnic_gr_free(gx);
return err;
}
int ompi_btl_usnic_gr_indegree(const ompi_btl_usnic_graph_t *g,
int vertex)
{
ompi_btl_usnic_vertex_t *v;
v = V_ID_TO_PTR(g, vertex);
return opal_list_get_size(&v->in_edges);
}
int ompi_btl_usnic_gr_outdegree(const ompi_btl_usnic_graph_t *g,
int vertex)
{
ompi_btl_usnic_vertex_t *v;
v = V_ID_TO_PTR(g, vertex);
return opal_list_get_size(&v->out_edges);
}
int ompi_btl_usnic_gr_add_edge(ompi_btl_usnic_graph_t *g,
int from,
int to,
int64_t cost,
int capacity,
void *e_data)
{
ompi_btl_usnic_edge_t *e;
ompi_btl_usnic_vertex_t *v_from, *v_to;
if (from < 0 || from >= NUM_VERTICES(g)) {
return OMPI_ERR_BAD_PARAM;
}
if (to < 0 || to >= NUM_VERTICES(g)) {
return OMPI_ERR_BAD_PARAM;
}
if (cost == MAX_COST) {
return OMPI_ERR_BAD_PARAM;
}
if (capacity < 0) {
/* negative cost is fine, but negative capacity is not currently
* handled appropriately */
return OMPI_ERR_BAD_PARAM;
}
FOREACH_OUT_EDGE(g, from, e) {
assert(e->source == from);
if (e->target == to) {
return OMPI_EXISTS;
}
}
/* this reference is owned by the out_edges list */
e = OBJ_NEW(ompi_btl_usnic_edge_t);
if (NULL == e) {
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
return OMPI_ERR_OUT_OF_RESOURCE;
}
e->source = from;
e->target = to;
e->cost = cost;
e->capacity = capacity;
e->e_data = e_data;
v_from = V_ID_TO_PTR(g, from);
opal_list_append(&v_from->out_edges, &e->outbound_li);
OBJ_RETAIN(e); /* ref owned by in_edges list */
v_to = V_ID_TO_PTR(g, to);
opal_list_append(&v_to->in_edges, &e->inbound_li);
return OMPI_SUCCESS;
}
int ompi_btl_usnic_gr_add_vertex(ompi_btl_usnic_graph_t *g,
void *v_data,
int *index_out)
{
ompi_btl_usnic_vertex_t *v;
v = calloc(1, sizeof(*v));
if (NULL == v) {
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* add to the ptr array early to simplify cleanup in the incredibly rare
* chance that adding fails */
v->v_index = opal_pointer_array_add(&g->vertices, v);
if (-1 == v->v_index) {
free(v);
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
return OMPI_ERR_OUT_OF_RESOURCE;
}
assert(v->v_index == g->num_vertices);
++g->num_vertices;
v->v_data = v_data;
OBJ_CONSTRUCT(&v->out_edges, opal_list_t);
OBJ_CONSTRUCT(&v->in_edges, opal_list_t);
if (NULL != index_out) {
*index_out = v->v_index;
}
return OMPI_SUCCESS;
}
int ompi_btl_usnic_gr_order(const ompi_btl_usnic_graph_t *g)
{
return NUM_VERTICES(g);
}
/**
* shrink a flow matrix for old_n vertices to one works for new_n
*
* Takes a matrix stored in a one-dimensional array of size (old_n*old_n) and
* "truncates" it into a dense array of size (new_n*new_n) that only contain
* the flow values for the first new_n vertices. E.g., it turns this array
* (old_n=5, new_n=3):
*
* 1 2 3 4 5
* 6 7 8 9 10
* 11 12 13 14 15
* 16 17 18 19 20
* 21 22 23 24 25
*
* into this array;
*
* 1 2 3
* 6 7 8
* 11 12 13
*/
static void shrink_flow_matrix(int *flow, int old_n, int new_n)
{
int u, v;
assert(old_n > new_n);
for (u = 0; u < new_n; ++u) {
for (v = 0; v < new_n; ++v) {
flow[new_n*u + v] = flow[old_n*u + v];
}
}
}
/**
* Compute the so-called "bottleneck" capacity value for a path "pred" through
* graph "gx".
*/
static int
bottleneck_path(
ompi_btl_usnic_graph_t *gx,
int n,
int *pred)
{
int u, v;
int min;
min = INT_MAX;
FOREACH_UV_ON_PATH(pred, gx->source_idx, gx->sink_idx, u, v) {
int cap_f_uv = get_capacity(gx, u, v);
min = MIN(min, cap_f_uv);
}
return min;
}
/**
* This routine implements the Bellman-Ford shortest paths algorithm, slightly
* specialized for our forumlation of flow networks:
* http://en.wikipedia.org/wiki/Bellman%E2%80%93Ford_algorithm
*
* Specifically, it attempts to find the shortest path from "source" to
* "target". It returns true if such a path was found, false otherwise. Any
* found path is returned in "pred" as a predecessor chain (i.e., pred[sink]
* is the start of the path and pred[pred[sink]] is its predecessor, etc.).
*
* The contents of "pred" are only valid if this routine returns true.
*/
static bool bellman_ford(ompi_btl_usnic_graph_t *gx,
int source,
int target,
int *pred)
{
int64_t *dist;
int i;
int n;
int u, v;
bool found_target = false;
if (NULL == gx) {
OMPI_ERROR_LOG(OMPI_ERR_BAD_PARAM);
return false;
}
if (NULL == pred) {
OMPI_ERROR_LOG(OMPI_ERR_BAD_PARAM);
return false;
}
if (source < 0 || source >= NUM_VERTICES(gx)) {
return OMPI_ERR_BAD_PARAM;
}
if (target < 0 || target >= NUM_VERTICES(gx)) {
return OMPI_ERR_BAD_PARAM;
}
/* initialize */
n = ompi_btl_usnic_gr_order(gx);
dist = malloc(n * sizeof(*dist));
if (NULL == dist) {
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
goto out;
}
for (i = 0; i < n; ++i) {
dist[i] = MAX_COST;
pred[i] = -1;
}
dist[source] = 0;
/* relax repeatedly */
for (i = 1; i < NUM_VERTICES(gx); ++i) {
bool relaxed = false;
#if GRAPH_DEBUG
dump_vec("pred", pred, NUM_VERTICES(gx));
dump_vec64("dist", dist, NUM_VERTICES(gx));
#endif
for (u = 0; u < NUM_VERTICES(gx); ++u) {
ompi_btl_usnic_edge_t *e_ptr;
FOREACH_OUT_EDGE(gx, u, e_ptr) {
v = e_ptr->target;
/* make sure to only construct paths from edges that actually have
* non-zero capacity */
if (e_ptr->capacity > 0 &&
dist[u] != MAX_COST) { /* avoid signed overflow for "infinity" */
check_add64_overflow(dist[u], e_ptr->cost);
if ((dist[u] + e_ptr->cost) < dist[v]) {
dist[v] = dist[u] + e_ptr->cost;
pred[v] = u;
relaxed = true;
}
}
}
}
/* optimization: stop if an outer iteration did not succeed in
* changing any dist/pred values (already at optimum) */
if (!relaxed) {
GRAPH_DEBUG_OUT(("relaxed==false, breaking out"));
break;
}
}
/* check for negative-cost cycles */
for (u = 0; u < NUM_VERTICES(gx); ++u) {
ompi_btl_usnic_edge_t * e_ptr;
FOREACH_OUT_EDGE(gx, u, e_ptr) {
v = e_ptr->target;
if (e_ptr->capacity > 0 &&
dist[u] != MAX_COST && /* avoid signed overflow */
(dist[u] + e_ptr->cost) < dist[v]) {
BTL_ERROR(("negative-weight cycle detected"));
abort();
goto out;
}
}
}
if (dist[target] != MAX_COST) {
found_target = true;
}
out:
#if GRAPH_DEBUG
dump_vec("pred", pred, NUM_VERTICES(gx));
#endif
assert(pred[source] == -1);
free(dist);
GRAPH_DEBUG_OUT(("bellman_ford: found_target=%s", found_target ? "true" : "false"));
return found_target;
}
/**
* Transform the given connected, bipartite, acyclic digraph into a flow
* network (i.e., add a source and a sink, with the source connected to vertex
* set V1 and the sink connected to vertex set V2). This also creates
* residual edges suitable for augmenting-path algorithms. All "source" nodes
* in the original graph are considered to have an output of 1 and "sink"
* nodes can take an input of 1. The result is that "forward" edges are all
* created with capacity=1, "backward" (residual) edges are created with
* capacity=0.
*
* After this routine, all capacities are "residual capacities" ($c_f$ in the
* literature).
*
* Initial flow throughout the network is assumed to be 0 at all edges.
*
* The graph will be left in an undefined state if an error occurs (though
* freeing it should still be safe).
*/
static int bipartite_to_flow(ompi_btl_usnic_graph_t *g)
{
int err;
int order;
int u, v;
int num_left, num_right;
/* grab size before adding extra vertices */
order = ompi_btl_usnic_gr_order(g);
err = ompi_btl_usnic_gr_add_vertex(g, NULL, &g->source_idx);
if (OMPI_SUCCESS != err) {
return err;
}
err = ompi_btl_usnic_gr_add_vertex(g, NULL, &g->sink_idx);
if (OMPI_SUCCESS != err) {
return err;
}
/* The networks we are interested in are bipartite and have edges only
* from one partition to the other partition (none vice versa). We
* visualize this conventionally with all of the source vertices on the
* left-hand side of an imaginary rendering of the graph and the target
* vertices on the right-hand side of the rendering. The direction
* "forward" is considered to be moving from left to right.
*/
num_left = 0;
num_right = 0;
for (u = 0; u < order; ++u) {
int inbound = ompi_btl_usnic_gr_indegree(g, u);
int outbound = ompi_btl_usnic_gr_outdegree(g, u);
if (inbound > 0 && outbound > 0) {
BTL_ERROR(("graph is not (unidirectionally) bipartite"));
abort();
}
else if (inbound > 0) {
/* "right" side of the graph, create edges to the sink */
++num_right;
err = ompi_btl_usnic_gr_add_edge(g, u, g->sink_idx,
0, /* no cost */
/*capacity=*/1,
/*e_data=*/NULL);
if (OMPI_SUCCESS != err) {
GRAPH_DEBUG_OUT(("add_edge failed"));
return err;
}
}
else if (outbound > 0) {
/* "left" side of the graph, create edges to the source */
++num_left;
err = ompi_btl_usnic_gr_add_edge(g, g->source_idx, u,
0, /* no cost */
/*capacity=*/1,
/*e_data=*/NULL);
if (OMPI_SUCCESS != err) {
GRAPH_DEBUG_OUT(("add_edge failed"));
return err;
}
}
}
/* it doesn't make sense to extend this graph with a source and sink
* unless */
if (num_right == 0 || num_left == 0) {
return OMPI_ERR_BAD_PARAM;
}
/* now run through and create "residual" edges as well (i.e., create edges
* in the reverse direction with 0 initial flow and a residual capacity of
* $c_f(u,v)=c(u,v)-f(u,v)$). Residual edges can exist where no edges
* exist in the original graph.
*/
order = ompi_btl_usnic_gr_order(g); /* need residuals for newly created
source/sink edges too */
for (u = 0; u < order; ++u) {
ompi_btl_usnic_edge_t * e_ptr;
FOREACH_OUT_EDGE(g, u, e_ptr) {
v = e_ptr->target;
/* (u,v) exists, add (v,u) if not already present. Cost is
* negative for these edges because "giving back" flow pays us
* back any cost already incurred. */
err = ompi_btl_usnic_gr_add_edge(g, v, u,
-e_ptr->cost,
/*capacity=*/0,
/*e_data=*/NULL);
if (OMPI_SUCCESS != err && OMPI_EXISTS != err) {
return err;
}
}
}
return OMPI_SUCCESS;
}
/**
* Implements the "Successive Shortest Path" algorithm for computing the
* minimum cost flow problem. This is a generalized version of the
* Ford-Fulkerson algorithm. There are two major changes from F-F:
* 1. In addition to capacities and flows, this algorithm pays attention to
* costs for traversing an edge. This particular function leaves the
* caller's costs alone but sets its own capacities.
* 2. Shortest paths are computed using the cost metric.
*
* The algorithm's sketch looks like:
* 1 Transform network G by adding source and sink, create residual edges
* 2 Initial flow x is zero
* 3 while ( Gx contains a path from s to t ) do
* 4 Find any shortest path P from s to t
* 5 Augment current flow x along P
* 6 update Gx
*
* This function mutates the given graph (adding vertices and edges, changing
* capacties, etc.), so callers may wish to clone the graph before calling
* this routine.
*
* The result is an array of (u,v) vertex pairs, where (u,v) is an edge in the
* original graph which has non-zero flow.
*
* Returns OMPI error codes like OMPI_SUCCESS/OMPI_ERR_OUT_OF_RESOURCE.
*
* This version of the algorithm has a theoretical upper bound on its running
* time of O(|V|^2 * |E| * f), where f is essentially the maximum flow in the
* graph. In our case, f=min(|V1|,|V2|), where V1 and V2 are the two
* constituent sets of the bipartite graph.
*
* This algorithm's performance could probably be improved by modifying it to
* use vertex potentials and Dijkstra's Algorithm instead of Bellman-Ford.
* Normally vertex potentials are needed in order to use Dijkstra's safely,
* but our graphs are constrained enough that this may not be necessary.
* Switching to Dijkstra's implemented with a heap should yield a reduced
* upper bound of O(|V| * |E| * f * log(|V|)). Let's consider this a future
* enhancement for the time being, since it's not obvious at this point that
* the faster running time will be worth the additional implementation
* complexity.
*/
static int min_cost_flow_ssp(ompi_btl_usnic_graph_t *gx,
int **flow_out)
{
int err = OMPI_SUCCESS;
int n;
int *pred = NULL;
int *flow = NULL;
int u, v;
int c;
GRAPH_DEBUG_OUT(("begin min_cost_flow_ssp()"));
if (NULL == flow_out) {
return OMPI_ERR_BAD_PARAM;
}
*flow_out = NULL;
n = ompi_btl_usnic_gr_order(gx);
pred = malloc(n*sizeof(*pred));
if (NULL == pred) {
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
err = OMPI_ERR_OUT_OF_RESOURCE;
goto out_error;
}
/* "flow" is a 2d matrix of current flow values, all initialized to zero */
flow = calloc(n*n, sizeof(*flow));
if (NULL == flow) {
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
err = OMPI_ERR_OUT_OF_RESOURCE;
goto out_error;
}
/* loop as long as paths exist from source to sink */
while (bellman_ford(gx, gx->source_idx, gx->sink_idx, pred)) {
int cap_f_path;
/* find any shortest path P from s to t (already present in pred) */
GRAPH_DEBUG_OUT(("start outer iteration of SSP algorithm"));
#if GRAPH_DEBUG
dump_vec("pred", pred, NUM_VERTICES(gx));
dump_flow(flow, n);
#endif
cap_f_path = bottleneck_path(gx, n, pred);
/* augment current flow along P */
FOREACH_UV_ON_PATH(pred, gx->source_idx, gx->sink_idx, u, v) {
assert(u == pred[v]);
f(u,v) = f(u,v) + cap_f_path; /* "forward" edge */
f(v,u) = f(v,u) - cap_f_path; /* residual network edge */
assert(f(u,v) == -f(v,u)); /* skew symmetry invariant */
/* update Gx as we go along: decrease capacity by this new
* augmenting flow */
c = get_capacity(gx, u, v) - cap_f_path;
assert(c >= 0);
err = set_capacity(gx, u, v, c);
if (OMPI_SUCCESS != err) {
BTL_ERROR(("unable to set capacity, missing edge?"));
abort();
}
c = get_capacity(gx, v, u) + cap_f_path;
assert(c >= 0);
err = set_capacity(gx, v, u, c);
if (OMPI_SUCCESS != err) {
BTL_ERROR(("unable to set capacity, missing edge?"));
abort();
}
}
}
out:
*flow_out = flow;
free(pred);
return err;
out_error:
free(*flow_out);
GRAPH_DEBUG_OUT(("returning error %d", err));
goto out;
}
int ompi_btl_usnic_solve_bipartite_assignment(const ompi_btl_usnic_graph_t *g,
int *num_match_edges_out,
int **match_edges_out)
{
int err;
int i;
int u, v;
int n;
int *flow = NULL;
ompi_btl_usnic_graph_t *gx = NULL;
if (NULL == match_edges_out || NULL == num_match_edges_out) {
return OMPI_ERR_BAD_PARAM;
}
*num_match_edges_out = 0;
*match_edges_out = NULL;
/* don't perturb the caller's data structure */
err = ompi_btl_usnic_gr_clone(g, false, &gx);
if (OMPI_SUCCESS != err) {
GRAPH_DEBUG_OUT(("ompi_btl_usnic_gr_clone failed"));
goto out;
}
/* Transform gx into a residual flow network with capacities, a source, a
* sink, and residual edges. We track the actual flow separately in the
* "flow" matrix. Initial capacity for every forward edge is 1. Initial
* capacity for every backward (residual) edge is 0.
*
* For the remainder of this routine (and the ssp routine) the capacities
* refer to residual capacities ($c_f$) not capacities in the original
* graph. For convenience we adjust all residual capacities as we go
* along rather than recomputing them from the flow and capacities in the
* original graph. This allows many other graph operations to have no
* direct knowledge of the flow matrix.
*/
err = bipartite_to_flow(gx);
if (OMPI_SUCCESS != err) {
GRAPH_DEBUG_OUT(("bipartite_to_flow failed"));
OMPI_ERROR_LOG(err);
return err;
}
/* Use the SSP algorithm to compute the min-cost flow over this network.
* Edges with non-zero flow in the result should be part of the matching.
*
* Note that the flow array returned is sized for gx, not for g. Index
* accordingly later on.
*/
err = min_cost_flow_ssp(gx, &flow);
if (OMPI_SUCCESS != err) {
GRAPH_DEBUG_OUT(("min_cost_flow_ssp failed"));
return err;
}
assert(NULL != flow);
/* don't care about new edges in gx, only old edges in g */
n = ompi_btl_usnic_gr_order(g);
#if GRAPH_DEBUG
dump_flow(flow, NUM_VERTICES(gx));
#endif
shrink_flow_matrix(flow, ompi_btl_usnic_gr_order(gx), n);
#if GRAPH_DEBUG
dump_flow(flow, n);
#endif
for (u = 0; u < n; ++u) {
for (v = 0; v < n; ++v) {
if (f(u,v) > 0) {
++(*num_match_edges_out);
}
}
}
if (0 == *num_match_edges_out) {
/* avoid attempting to allocate a zero-byte buffer */
goto out;
}
*match_edges_out = malloc(*num_match_edges_out * sizeof(*match_edges_out));
if (NULL == *match_edges_out) {
*num_match_edges_out = 0;
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
err = OMPI_ERR_OUT_OF_RESOURCE;
goto out;
}
i = 0;
for (u = 0; u < n; ++u) {
for (v = 0; v < n; ++v) {
/* flow exists on this edge so include this edge in the matching */
if (f(u,v) > 0) {
(*match_edges_out)[i++] = u;
(*match_edges_out)[i++] = v;
}
}
}
out:
free(flow);
ompi_btl_usnic_gr_free(gx);
return err;
}
#include "test/btl_usnic_graph_test.h"