1
1
openmpi/ompi/mca/btl/mx/btl_mx.h

314 строки
10 KiB
C
Исходник Обычный вид История

/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
First stable version of the MX BTL (at least we pass NetPipe). The perfs are not amazing but are not that bad either. On a 2 procs Intel(R) Xeon(TM) CPU 3.20GHz with MYRICOM Inc. Myrinet 2000 Scalable Cluster Interconnect (rev 04) I get: 0: 1 bytes 13096 times --> 1.10 Mbps in 6.94 usec 1: 2 bytes 14408 times --> 2.17 Mbps in 7.02 usec 2: 3 bytes 14243 times --> 3.24 Mbps in 7.07 usec 3: 4 bytes 9428 times --> 4.27 Mbps in 7.15 usec 4: 6 bytes 10493 times --> 6.26 Mbps in 7.32 usec 5: 8 bytes 6834 times --> 8.18 Mbps in 7.47 usec 6: 12 bytes 8371 times --> 11.89 Mbps in 7.70 usec 7: 13 bytes 5411 times --> 12.72 Mbps in 7.80 usec 8: 16 bytes 5919 times --> 15.35 Mbps in 7.95 usec 9: 19 bytes 7074 times --> 17.66 Mbps in 8.21 usec 10: 21 bytes 7696 times --> 19.00 Mbps in 8.43 usec 11: 24 bytes 7906 times --> 20.87 Mbps in 8.77 usec 12: 27 bytes 8073 times --> 23.05 Mbps in 8.94 usec 13: 29 bytes 4972 times --> 24.32 Mbps in 9.10 usec 14: 32 bytes 5307 times --> 26.29 Mbps in 9.29 usec 15: 35 bytes 5720 times --> 33.61 Mbps in 7.95 usec 16: 45 bytes 7191 times --> 39.50 Mbps in 8.69 usec 17: 48 bytes 7670 times --> 41.33 Mbps in 8.86 usec 18: 51 bytes 7759 times --> 42.80 Mbps in 9.09 usec 19: 61 bytes 4313 times --> 47.44 Mbps in 9.81 usec 20: 64 bytes 5012 times --> 57.61 Mbps in 8.48 usec 21: 67 bytes 6083 times --> 59.31 Mbps in 8.62 usec 22: 93 bytes 6234 times --> 68.08 Mbps in 10.42 usec 23: 96 bytes 6396 times --> 80.65 Mbps in 9.08 usec 24: 99 bytes 7455 times --> 81.56 Mbps in 9.26 usec 25: 125 bytes 3926 times --> 112.46 Mbps in 8.48 usec 26: 128 bytes 5848 times --> 116.87 Mbps in 8.36 usec 27: 131 bytes 6077 times --> 119.22 Mbps in 8.38 usec 28: 189 bytes 6192 times --> 163.79 Mbps in 8.80 usec 29: 192 bytes 7572 times --> 168.01 Mbps in 8.72 usec 30: 195 bytes 7705 times --> 171.13 Mbps in 8.69 usec 31: 253 bytes 4011 times --> 210.21 Mbps in 9.18 usec 32: 256 bytes 5423 times --> 214.55 Mbps in 9.10 usec 33: 259 bytes 5535 times --> 217.64 Mbps in 9.08 usec 34: 381 bytes 5613 times --> 290.55 Mbps in 10.00 usec 35: 384 bytes 6663 times --> 296.11 Mbps in 9.89 usec 36: 387 bytes 6764 times --> 298.74 Mbps in 9.88 usec 37: 509 bytes 3451 times --> 353.78 Mbps in 10.98 usec 38: 512 bytes 4546 times --> 359.36 Mbps in 10.87 usec 39: 515 bytes 4617 times --> 361.53 Mbps in 10.87 usec 40: 765 bytes 4645 times --> 461.41 Mbps in 12.65 usec 41: 768 bytes 5270 times --> 468.59 Mbps in 12.50 usec 42: 771 bytes 5341 times --> 470.16 Mbps in 12.51 usec 43: 1021 bytes 2695 times --> 508.42 Mbps in 15.32 usec 44: 1024 bytes 3260 times --> 514.44 Mbps in 15.19 usec 45: 1027 bytes 3298 times --> 515.72 Mbps in 15.19 usec 46: 1533 bytes 3307 times --> 707.12 Mbps in 16.54 usec 47: 1536 bytes 4030 times --> 714.93 Mbps in 16.39 usec 48: 1539 bytes 4071 times --> 714.41 Mbps in 16.44 usec 49: 2045 bytes 2040 times --> 761.38 Mbps in 20.49 usec 50: 2048 bytes 2438 times --> 769.78 Mbps in 20.30 usec 51: 2051 bytes 2465 times --> 769.78 Mbps in 20.33 usec 52: 3069 bytes 2465 times --> 923.43 Mbps in 25.36 usec 53: 3072 bytes 2629 times --> 928.48 Mbps in 25.24 usec 54: 3075 bytes 2642 times --> 929.07 Mbps in 25.25 usec 55: 4093 bytes 1323 times --> 1012.38 Mbps in 30.85 usec 56: 4096 bytes 1620 times --> 1016.69 Mbps in 30.74 usec 57: 4099 bytes 1627 times --> 1015.16 Mbps in 30.81 usec 58: 6141 bytes 1625 times --> 1171.82 Mbps in 39.98 usec 59: 6144 bytes 1667 times --> 1173.85 Mbps in 39.93 usec 60: 6147 bytes 1669 times --> 1174.44 Mbps in 39.93 usec 61: 8189 bytes 835 times --> 1232.43 Mbps in 50.69 usec 62: 8192 bytes 986 times --> 1234.87 Mbps in 50.61 usec 63: 8195 bytes 988 times --> 1234.85 Mbps in 50.63 usec 64: 12285 bytes 988 times --> 1360.73 Mbps in 68.88 usec 65: 12288 bytes 967 times --> 1364.20 Mbps in 68.72 usec 66: 12291 bytes 970 times --> 1364.56 Mbps in 68.72 usec 67: 16381 bytes 485 times --> 1385.48 Mbps in 90.21 usec 68: 16384 bytes 554 times --> 1388.76 Mbps in 90.01 usec 69: 16387 bytes 555 times --> 1388.41 Mbps in 90.05 usec 70: 24573 bytes 555 times --> 1499.72 Mbps in 125.01 usec 71: 24576 bytes 533 times --> 1499.36 Mbps in 125.05 usec 72: 24579 bytes 533 times --> 1500.44 Mbps in 124.98 usec 73: 32765 bytes 266 times --> 1499.31 Mbps in 166.73 usec 74: 32768 bytes 299 times --> 1497.10 Mbps in 166.99 usec 75: 32771 bytes 299 times --> 1495.29 Mbps in 167.21 usec 76: 49149 bytes 299 times --> 1528.78 Mbps in 245.28 usec 77: 49152 bytes 271 times --> 1527.97 Mbps in 245.42 usec 78: 49155 bytes 271 times --> 1529.35 Mbps in 245.22 usec 79: 65533 bytes 135 times --> 1586.19 Mbps in 315.21 usec 80: 65536 bytes 158 times --> 1591.11 Mbps in 314.25 usec 81: 65539 bytes 159 times --> 1586.50 Mbps in 315.17 usec 82: 98301 bytes 158 times --> 1668.05 Mbps in 449.61 usec 83: 98304 bytes 148 times --> 1667.40 Mbps in 449.80 usec 84: 98307 bytes 148 times --> 1667.29 Mbps in 449.84 usec 85: 131069 bytes 74 times --> 1709.11 Mbps in 585.09 usec 86: 131072 bytes 85 times --> 1711.09 Mbps in 584.42 usec 87: 131075 bytes 85 times --> 1710.92 Mbps in 584.49 usec 88: 196605 bytes 85 times --> 1727.93 Mbps in 868.08 usec 89: 196608 bytes 76 times --> 1726.28 Mbps in 868.92 usec 90: 196611 bytes 76 times --> 1727.06 Mbps in 868.54 usec 91: 262141 bytes 38 times --> 1757.65 Mbps in 1137.87 usec 92: 262144 bytes 43 times --> 1758.69 Mbps in 1137.21 usec 93: 262147 bytes 43 times --> 1759.38 Mbps in 1136.78 usec 94: 393213 bytes 43 times --> 1801.51 Mbps in 1665.25 usec 95: 393216 bytes 40 times --> 1803.26 Mbps in 1663.65 usec 96: 393219 bytes 40 times --> 1800.73 Mbps in 1666.00 usec 97: 524285 bytes 20 times --> 1805.33 Mbps in 2215.65 usec 98: 524288 bytes 22 times --> 1806.80 Mbps in 2213.86 usec 99: 524291 bytes 22 times --> 1805.77 Mbps in 2215.14 usec 100: 786429 bytes 22 times --> 1827.24 Mbps in 3283.64 usec 101: 786432 bytes 20 times --> 1827.03 Mbps in 3284.03 usec 102: 786435 bytes 20 times --> 1827.20 Mbps in 3283.73 usec 103: 1048573 bytes 10 times --> 1840.05 Mbps in 4347.71 usec 104: 1048576 bytes 11 times --> 1839.68 Mbps in 4348.58 usec 105: 1048579 bytes 11 times --> 1840.13 Mbps in 4347.54 usec 106: 1572861 bytes 11 times --> 1853.99 Mbps in 6472.50 usec 107: 1572864 bytes 10 times --> 1854.11 Mbps in 6472.10 usec 108: 1572867 bytes 10 times --> 1854.12 Mbps in 6472.10 usec 109: 2097149 bytes 5 times --> 1861.41 Mbps in 8595.61 usec 110: 2097152 bytes 5 times --> 1861.25 Mbps in 8596.40 usec 111: 2097155 bytes 5 times --> 1860.99 Mbps in 8597.59 usec 112: 3145725 bytes 5 times --> 1868.34 Mbps in 12845.59 usec 113: 3145728 bytes 5 times --> 1868.30 Mbps in 12845.90 usec 114: 3145731 bytes 5 times --> 1868.59 Mbps in 12843.89 usec 115: 4194301 bytes 3 times --> 1872.16 Mbps in 17092.51 usec 116: 4194304 bytes 3 times --> 1872.31 Mbps in 17091.19 usec 117: 4194307 bytes 3 times --> 1872.13 Mbps in 17092.82 usec 118: 6291453 bytes 3 times --> 1875.88 Mbps in 25588.00 usec 119: 6291456 bytes 3 times --> 1875.98 Mbps in 25586.68 usec 120: 6291459 bytes 3 times --> 1875.93 Mbps in 25587.36 usec 121: 8388605 bytes 3 times --> 1877.79 Mbps in 34082.69 usec 122: 8388608 bytes 3 times --> 1877.72 Mbps in 34083.84 usec 123: 8388611 bytes 3 times --> 1877.66 Mbps in 34085.00 usec This commit was SVN r7180.
2005-09-04 22:08:13 +00:00
#ifndef MCA_PTL_MX_H
#define MCA_PTL_MX_H
/* Standard system includes */
#include <sys/types.h>
#include <string.h>
/* Open MPI includes */
#include "ompi/class/ompi_free_list.h"
#include "ompi/class/ompi_bitmap.h"
This commit represents a bunch of work on a Mercurial side branch. As such, the commit message back to the master SVN repository is fairly long. = ORTE Job-Level Output Messages = Add two new interfaces that should be used for all new code throughout the ORTE and OMPI layers (we already make the search-and-replace on the existing ORTE / OMPI layers): * orte_output(): (and corresponding friends ORTE_OUTPUT, orte_output_verbose, etc.) This function sends the output directly to the HNP for processing as part of a job-specific output channel. It supports all the same outputs as opal_output() (syslog, file, stdout, stderr), but for stdout/stderr, the output is sent to the HNP for processing and output. More on this below. * orte_show_help(): This function is a drop-in-replacement for opal_show_help(), with two differences in functionality: 1. the rendered text help message output is sent to the HNP for display (rather than outputting directly into the process' stderr stream) 1. the HNP detects duplicate help messages and does not display them (so that you don't see the same error message N times, once from each of your N MPI processes); instead, it counts "new" instances of the help message and displays a message every ~5 seconds when there are new ones ("I got X new copies of the help message...") opal_show_help and opal_output still exist, but they only output in the current process. The intent for the new orte_* functions is that they can apply job-level intelligence to the output. As such, we recommend that all new ORTE and OMPI code use the new orte_* functions, not thei opal_* functions. === New code === For ORTE and OMPI programmers, here's what you need to do differently in new code: * Do not include opal/util/show_help.h or opal/util/output.h. Instead, include orte/util/output.h (this one header file has declarations for both the orte_output() series of functions and orte_show_help()). * Effectively s/opal_output/orte_output/gi throughout your code. Note that orte_output_open() takes a slightly different argument list (as a way to pass data to the filtering stream -- see below), so you if explicitly call opal_output_open(), you'll need to slightly adapt to the new signature of orte_output_open(). * Literally s/opal_show_help/orte_show_help/. The function signature is identical. === Notes === * orte_output'ing to stream 0 will do similar to what opal_output'ing did, so leaving a hard-coded "0" as the first argument is safe. * For systems that do not use ORTE's RML or the HNP, the effect of orte_output_* and orte_show_help will be identical to their opal counterparts (the additional information passed to orte_output_open() will be lost!). Indeed, the orte_* functions simply become trivial wrappers to their opal_* counterparts. Note that we have not tested this; the code is simple but it is quite possible that we mucked something up. = Filter Framework = Messages sent view the new orte_* functions described above and messages output via the IOF on the HNP will now optionally be passed through a new "filter" framework before being output to stdout/stderr. The "filter" OPAL MCA framework is intended to allow preprocessing to messages before they are sent to their final destinations. The first component that was written in the filter framework was to create an XML stream, segregating all the messages into different XML tags, etc. This will allow 3rd party tools to read the stdout/stderr from the HNP and be able to know exactly what each text message is (e.g., a help message, another OMPI infrastructure message, stdout from the user process, stderr from the user process, etc.). Filtering is not active by default. Filter components must be specifically requested, such as: {{{ $ mpirun --mca filter xml ... }}} There can only be one filter component active. = New MCA Parameters = The new functionality described above introduces two new MCA parameters: * '''orte_base_help_aggregate''': Defaults to 1 (true), meaning that help messages will be aggregated, as described above. If set to 0, all help messages will be displayed, even if they are duplicates (i.e., the original behavior). * '''orte_base_show_output_recursions''': An MCA parameter to help debug one of the known issues, described below. It is likely that this MCA parameter will disappear before v1.3 final. = Known Issues = * The XML filter component is not complete. The current output from this component is preliminary and not real XML. A bit more work needs to be done to configure.m4 search for an appropriate XML library/link it in/use it at run time. * There are possible recursion loops in the orte_output() and orte_show_help() functions -- e.g., if RML send calls orte_output() or orte_show_help(). We have some ideas how to fix these, but figured that it was ok to commit before feature freeze with known issues. The code currently contains sub-optimal workarounds so that this will not be a problem, but it would be good to actually solve the problem rather than have hackish workarounds before v1.3 final. This commit was SVN r18434.
2008-05-13 20:00:55 +00:00
#include "orte/util/output.h"
#include "opal/event/event.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/mpool/mpool.h"
#include <myriexpress.h>
#ifdef HAVE_MX_EXTENSIONS_H
#include <mx_extensions.h>
#endif /* HAVE_MX_EXTENSIONS_H */
First stable version of the MX BTL (at least we pass NetPipe). The perfs are not amazing but are not that bad either. On a 2 procs Intel(R) Xeon(TM) CPU 3.20GHz with MYRICOM Inc. Myrinet 2000 Scalable Cluster Interconnect (rev 04) I get: 0: 1 bytes 13096 times --> 1.10 Mbps in 6.94 usec 1: 2 bytes 14408 times --> 2.17 Mbps in 7.02 usec 2: 3 bytes 14243 times --> 3.24 Mbps in 7.07 usec 3: 4 bytes 9428 times --> 4.27 Mbps in 7.15 usec 4: 6 bytes 10493 times --> 6.26 Mbps in 7.32 usec 5: 8 bytes 6834 times --> 8.18 Mbps in 7.47 usec 6: 12 bytes 8371 times --> 11.89 Mbps in 7.70 usec 7: 13 bytes 5411 times --> 12.72 Mbps in 7.80 usec 8: 16 bytes 5919 times --> 15.35 Mbps in 7.95 usec 9: 19 bytes 7074 times --> 17.66 Mbps in 8.21 usec 10: 21 bytes 7696 times --> 19.00 Mbps in 8.43 usec 11: 24 bytes 7906 times --> 20.87 Mbps in 8.77 usec 12: 27 bytes 8073 times --> 23.05 Mbps in 8.94 usec 13: 29 bytes 4972 times --> 24.32 Mbps in 9.10 usec 14: 32 bytes 5307 times --> 26.29 Mbps in 9.29 usec 15: 35 bytes 5720 times --> 33.61 Mbps in 7.95 usec 16: 45 bytes 7191 times --> 39.50 Mbps in 8.69 usec 17: 48 bytes 7670 times --> 41.33 Mbps in 8.86 usec 18: 51 bytes 7759 times --> 42.80 Mbps in 9.09 usec 19: 61 bytes 4313 times --> 47.44 Mbps in 9.81 usec 20: 64 bytes 5012 times --> 57.61 Mbps in 8.48 usec 21: 67 bytes 6083 times --> 59.31 Mbps in 8.62 usec 22: 93 bytes 6234 times --> 68.08 Mbps in 10.42 usec 23: 96 bytes 6396 times --> 80.65 Mbps in 9.08 usec 24: 99 bytes 7455 times --> 81.56 Mbps in 9.26 usec 25: 125 bytes 3926 times --> 112.46 Mbps in 8.48 usec 26: 128 bytes 5848 times --> 116.87 Mbps in 8.36 usec 27: 131 bytes 6077 times --> 119.22 Mbps in 8.38 usec 28: 189 bytes 6192 times --> 163.79 Mbps in 8.80 usec 29: 192 bytes 7572 times --> 168.01 Mbps in 8.72 usec 30: 195 bytes 7705 times --> 171.13 Mbps in 8.69 usec 31: 253 bytes 4011 times --> 210.21 Mbps in 9.18 usec 32: 256 bytes 5423 times --> 214.55 Mbps in 9.10 usec 33: 259 bytes 5535 times --> 217.64 Mbps in 9.08 usec 34: 381 bytes 5613 times --> 290.55 Mbps in 10.00 usec 35: 384 bytes 6663 times --> 296.11 Mbps in 9.89 usec 36: 387 bytes 6764 times --> 298.74 Mbps in 9.88 usec 37: 509 bytes 3451 times --> 353.78 Mbps in 10.98 usec 38: 512 bytes 4546 times --> 359.36 Mbps in 10.87 usec 39: 515 bytes 4617 times --> 361.53 Mbps in 10.87 usec 40: 765 bytes 4645 times --> 461.41 Mbps in 12.65 usec 41: 768 bytes 5270 times --> 468.59 Mbps in 12.50 usec 42: 771 bytes 5341 times --> 470.16 Mbps in 12.51 usec 43: 1021 bytes 2695 times --> 508.42 Mbps in 15.32 usec 44: 1024 bytes 3260 times --> 514.44 Mbps in 15.19 usec 45: 1027 bytes 3298 times --> 515.72 Mbps in 15.19 usec 46: 1533 bytes 3307 times --> 707.12 Mbps in 16.54 usec 47: 1536 bytes 4030 times --> 714.93 Mbps in 16.39 usec 48: 1539 bytes 4071 times --> 714.41 Mbps in 16.44 usec 49: 2045 bytes 2040 times --> 761.38 Mbps in 20.49 usec 50: 2048 bytes 2438 times --> 769.78 Mbps in 20.30 usec 51: 2051 bytes 2465 times --> 769.78 Mbps in 20.33 usec 52: 3069 bytes 2465 times --> 923.43 Mbps in 25.36 usec 53: 3072 bytes 2629 times --> 928.48 Mbps in 25.24 usec 54: 3075 bytes 2642 times --> 929.07 Mbps in 25.25 usec 55: 4093 bytes 1323 times --> 1012.38 Mbps in 30.85 usec 56: 4096 bytes 1620 times --> 1016.69 Mbps in 30.74 usec 57: 4099 bytes 1627 times --> 1015.16 Mbps in 30.81 usec 58: 6141 bytes 1625 times --> 1171.82 Mbps in 39.98 usec 59: 6144 bytes 1667 times --> 1173.85 Mbps in 39.93 usec 60: 6147 bytes 1669 times --> 1174.44 Mbps in 39.93 usec 61: 8189 bytes 835 times --> 1232.43 Mbps in 50.69 usec 62: 8192 bytes 986 times --> 1234.87 Mbps in 50.61 usec 63: 8195 bytes 988 times --> 1234.85 Mbps in 50.63 usec 64: 12285 bytes 988 times --> 1360.73 Mbps in 68.88 usec 65: 12288 bytes 967 times --> 1364.20 Mbps in 68.72 usec 66: 12291 bytes 970 times --> 1364.56 Mbps in 68.72 usec 67: 16381 bytes 485 times --> 1385.48 Mbps in 90.21 usec 68: 16384 bytes 554 times --> 1388.76 Mbps in 90.01 usec 69: 16387 bytes 555 times --> 1388.41 Mbps in 90.05 usec 70: 24573 bytes 555 times --> 1499.72 Mbps in 125.01 usec 71: 24576 bytes 533 times --> 1499.36 Mbps in 125.05 usec 72: 24579 bytes 533 times --> 1500.44 Mbps in 124.98 usec 73: 32765 bytes 266 times --> 1499.31 Mbps in 166.73 usec 74: 32768 bytes 299 times --> 1497.10 Mbps in 166.99 usec 75: 32771 bytes 299 times --> 1495.29 Mbps in 167.21 usec 76: 49149 bytes 299 times --> 1528.78 Mbps in 245.28 usec 77: 49152 bytes 271 times --> 1527.97 Mbps in 245.42 usec 78: 49155 bytes 271 times --> 1529.35 Mbps in 245.22 usec 79: 65533 bytes 135 times --> 1586.19 Mbps in 315.21 usec 80: 65536 bytes 158 times --> 1591.11 Mbps in 314.25 usec 81: 65539 bytes 159 times --> 1586.50 Mbps in 315.17 usec 82: 98301 bytes 158 times --> 1668.05 Mbps in 449.61 usec 83: 98304 bytes 148 times --> 1667.40 Mbps in 449.80 usec 84: 98307 bytes 148 times --> 1667.29 Mbps in 449.84 usec 85: 131069 bytes 74 times --> 1709.11 Mbps in 585.09 usec 86: 131072 bytes 85 times --> 1711.09 Mbps in 584.42 usec 87: 131075 bytes 85 times --> 1710.92 Mbps in 584.49 usec 88: 196605 bytes 85 times --> 1727.93 Mbps in 868.08 usec 89: 196608 bytes 76 times --> 1726.28 Mbps in 868.92 usec 90: 196611 bytes 76 times --> 1727.06 Mbps in 868.54 usec 91: 262141 bytes 38 times --> 1757.65 Mbps in 1137.87 usec 92: 262144 bytes 43 times --> 1758.69 Mbps in 1137.21 usec 93: 262147 bytes 43 times --> 1759.38 Mbps in 1136.78 usec 94: 393213 bytes 43 times --> 1801.51 Mbps in 1665.25 usec 95: 393216 bytes 40 times --> 1803.26 Mbps in 1663.65 usec 96: 393219 bytes 40 times --> 1800.73 Mbps in 1666.00 usec 97: 524285 bytes 20 times --> 1805.33 Mbps in 2215.65 usec 98: 524288 bytes 22 times --> 1806.80 Mbps in 2213.86 usec 99: 524291 bytes 22 times --> 1805.77 Mbps in 2215.14 usec 100: 786429 bytes 22 times --> 1827.24 Mbps in 3283.64 usec 101: 786432 bytes 20 times --> 1827.03 Mbps in 3284.03 usec 102: 786435 bytes 20 times --> 1827.20 Mbps in 3283.73 usec 103: 1048573 bytes 10 times --> 1840.05 Mbps in 4347.71 usec 104: 1048576 bytes 11 times --> 1839.68 Mbps in 4348.58 usec 105: 1048579 bytes 11 times --> 1840.13 Mbps in 4347.54 usec 106: 1572861 bytes 11 times --> 1853.99 Mbps in 6472.50 usec 107: 1572864 bytes 10 times --> 1854.11 Mbps in 6472.10 usec 108: 1572867 bytes 10 times --> 1854.12 Mbps in 6472.10 usec 109: 2097149 bytes 5 times --> 1861.41 Mbps in 8595.61 usec 110: 2097152 bytes 5 times --> 1861.25 Mbps in 8596.40 usec 111: 2097155 bytes 5 times --> 1860.99 Mbps in 8597.59 usec 112: 3145725 bytes 5 times --> 1868.34 Mbps in 12845.59 usec 113: 3145728 bytes 5 times --> 1868.30 Mbps in 12845.90 usec 114: 3145731 bytes 5 times --> 1868.59 Mbps in 12843.89 usec 115: 4194301 bytes 3 times --> 1872.16 Mbps in 17092.51 usec 116: 4194304 bytes 3 times --> 1872.31 Mbps in 17091.19 usec 117: 4194307 bytes 3 times --> 1872.13 Mbps in 17092.82 usec 118: 6291453 bytes 3 times --> 1875.88 Mbps in 25588.00 usec 119: 6291456 bytes 3 times --> 1875.98 Mbps in 25586.68 usec 120: 6291459 bytes 3 times --> 1875.93 Mbps in 25587.36 usec 121: 8388605 bytes 3 times --> 1877.79 Mbps in 34082.69 usec 122: 8388608 bytes 3 times --> 1877.72 Mbps in 34083.84 usec 123: 8388611 bytes 3 times --> 1877.66 Mbps in 34085.00 usec This commit was SVN r7180.
2005-09-04 22:08:13 +00:00
BEGIN_C_DECLS
/**
* The mask used for receive and for the PUT protocol
*/
#define BTL_MX_RECV_MASK 0x00000000000000ffULL
#define BTL_MX_PUT_MASK 0xffffffffffffffffULL
/**
* MX BTL component.
*/
struct mca_btl_mx_component_t {
mca_btl_base_component_1_0_1_t super; /**< base BTL component */
int32_t mx_num_btls;
int32_t mx_max_btls;
First stable version of the MX BTL (at least we pass NetPipe). The perfs are not amazing but are not that bad either. On a 2 procs Intel(R) Xeon(TM) CPU 3.20GHz with MYRICOM Inc. Myrinet 2000 Scalable Cluster Interconnect (rev 04) I get: 0: 1 bytes 13096 times --> 1.10 Mbps in 6.94 usec 1: 2 bytes 14408 times --> 2.17 Mbps in 7.02 usec 2: 3 bytes 14243 times --> 3.24 Mbps in 7.07 usec 3: 4 bytes 9428 times --> 4.27 Mbps in 7.15 usec 4: 6 bytes 10493 times --> 6.26 Mbps in 7.32 usec 5: 8 bytes 6834 times --> 8.18 Mbps in 7.47 usec 6: 12 bytes 8371 times --> 11.89 Mbps in 7.70 usec 7: 13 bytes 5411 times --> 12.72 Mbps in 7.80 usec 8: 16 bytes 5919 times --> 15.35 Mbps in 7.95 usec 9: 19 bytes 7074 times --> 17.66 Mbps in 8.21 usec 10: 21 bytes 7696 times --> 19.00 Mbps in 8.43 usec 11: 24 bytes 7906 times --> 20.87 Mbps in 8.77 usec 12: 27 bytes 8073 times --> 23.05 Mbps in 8.94 usec 13: 29 bytes 4972 times --> 24.32 Mbps in 9.10 usec 14: 32 bytes 5307 times --> 26.29 Mbps in 9.29 usec 15: 35 bytes 5720 times --> 33.61 Mbps in 7.95 usec 16: 45 bytes 7191 times --> 39.50 Mbps in 8.69 usec 17: 48 bytes 7670 times --> 41.33 Mbps in 8.86 usec 18: 51 bytes 7759 times --> 42.80 Mbps in 9.09 usec 19: 61 bytes 4313 times --> 47.44 Mbps in 9.81 usec 20: 64 bytes 5012 times --> 57.61 Mbps in 8.48 usec 21: 67 bytes 6083 times --> 59.31 Mbps in 8.62 usec 22: 93 bytes 6234 times --> 68.08 Mbps in 10.42 usec 23: 96 bytes 6396 times --> 80.65 Mbps in 9.08 usec 24: 99 bytes 7455 times --> 81.56 Mbps in 9.26 usec 25: 125 bytes 3926 times --> 112.46 Mbps in 8.48 usec 26: 128 bytes 5848 times --> 116.87 Mbps in 8.36 usec 27: 131 bytes 6077 times --> 119.22 Mbps in 8.38 usec 28: 189 bytes 6192 times --> 163.79 Mbps in 8.80 usec 29: 192 bytes 7572 times --> 168.01 Mbps in 8.72 usec 30: 195 bytes 7705 times --> 171.13 Mbps in 8.69 usec 31: 253 bytes 4011 times --> 210.21 Mbps in 9.18 usec 32: 256 bytes 5423 times --> 214.55 Mbps in 9.10 usec 33: 259 bytes 5535 times --> 217.64 Mbps in 9.08 usec 34: 381 bytes 5613 times --> 290.55 Mbps in 10.00 usec 35: 384 bytes 6663 times --> 296.11 Mbps in 9.89 usec 36: 387 bytes 6764 times --> 298.74 Mbps in 9.88 usec 37: 509 bytes 3451 times --> 353.78 Mbps in 10.98 usec 38: 512 bytes 4546 times --> 359.36 Mbps in 10.87 usec 39: 515 bytes 4617 times --> 361.53 Mbps in 10.87 usec 40: 765 bytes 4645 times --> 461.41 Mbps in 12.65 usec 41: 768 bytes 5270 times --> 468.59 Mbps in 12.50 usec 42: 771 bytes 5341 times --> 470.16 Mbps in 12.51 usec 43: 1021 bytes 2695 times --> 508.42 Mbps in 15.32 usec 44: 1024 bytes 3260 times --> 514.44 Mbps in 15.19 usec 45: 1027 bytes 3298 times --> 515.72 Mbps in 15.19 usec 46: 1533 bytes 3307 times --> 707.12 Mbps in 16.54 usec 47: 1536 bytes 4030 times --> 714.93 Mbps in 16.39 usec 48: 1539 bytes 4071 times --> 714.41 Mbps in 16.44 usec 49: 2045 bytes 2040 times --> 761.38 Mbps in 20.49 usec 50: 2048 bytes 2438 times --> 769.78 Mbps in 20.30 usec 51: 2051 bytes 2465 times --> 769.78 Mbps in 20.33 usec 52: 3069 bytes 2465 times --> 923.43 Mbps in 25.36 usec 53: 3072 bytes 2629 times --> 928.48 Mbps in 25.24 usec 54: 3075 bytes 2642 times --> 929.07 Mbps in 25.25 usec 55: 4093 bytes 1323 times --> 1012.38 Mbps in 30.85 usec 56: 4096 bytes 1620 times --> 1016.69 Mbps in 30.74 usec 57: 4099 bytes 1627 times --> 1015.16 Mbps in 30.81 usec 58: 6141 bytes 1625 times --> 1171.82 Mbps in 39.98 usec 59: 6144 bytes 1667 times --> 1173.85 Mbps in 39.93 usec 60: 6147 bytes 1669 times --> 1174.44 Mbps in 39.93 usec 61: 8189 bytes 835 times --> 1232.43 Mbps in 50.69 usec 62: 8192 bytes 986 times --> 1234.87 Mbps in 50.61 usec 63: 8195 bytes 988 times --> 1234.85 Mbps in 50.63 usec 64: 12285 bytes 988 times --> 1360.73 Mbps in 68.88 usec 65: 12288 bytes 967 times --> 1364.20 Mbps in 68.72 usec 66: 12291 bytes 970 times --> 1364.56 Mbps in 68.72 usec 67: 16381 bytes 485 times --> 1385.48 Mbps in 90.21 usec 68: 16384 bytes 554 times --> 1388.76 Mbps in 90.01 usec 69: 16387 bytes 555 times --> 1388.41 Mbps in 90.05 usec 70: 24573 bytes 555 times --> 1499.72 Mbps in 125.01 usec 71: 24576 bytes 533 times --> 1499.36 Mbps in 125.05 usec 72: 24579 bytes 533 times --> 1500.44 Mbps in 124.98 usec 73: 32765 bytes 266 times --> 1499.31 Mbps in 166.73 usec 74: 32768 bytes 299 times --> 1497.10 Mbps in 166.99 usec 75: 32771 bytes 299 times --> 1495.29 Mbps in 167.21 usec 76: 49149 bytes 299 times --> 1528.78 Mbps in 245.28 usec 77: 49152 bytes 271 times --> 1527.97 Mbps in 245.42 usec 78: 49155 bytes 271 times --> 1529.35 Mbps in 245.22 usec 79: 65533 bytes 135 times --> 1586.19 Mbps in 315.21 usec 80: 65536 bytes 158 times --> 1591.11 Mbps in 314.25 usec 81: 65539 bytes 159 times --> 1586.50 Mbps in 315.17 usec 82: 98301 bytes 158 times --> 1668.05 Mbps in 449.61 usec 83: 98304 bytes 148 times --> 1667.40 Mbps in 449.80 usec 84: 98307 bytes 148 times --> 1667.29 Mbps in 449.84 usec 85: 131069 bytes 74 times --> 1709.11 Mbps in 585.09 usec 86: 131072 bytes 85 times --> 1711.09 Mbps in 584.42 usec 87: 131075 bytes 85 times --> 1710.92 Mbps in 584.49 usec 88: 196605 bytes 85 times --> 1727.93 Mbps in 868.08 usec 89: 196608 bytes 76 times --> 1726.28 Mbps in 868.92 usec 90: 196611 bytes 76 times --> 1727.06 Mbps in 868.54 usec 91: 262141 bytes 38 times --> 1757.65 Mbps in 1137.87 usec 92: 262144 bytes 43 times --> 1758.69 Mbps in 1137.21 usec 93: 262147 bytes 43 times --> 1759.38 Mbps in 1136.78 usec 94: 393213 bytes 43 times --> 1801.51 Mbps in 1665.25 usec 95: 393216 bytes 40 times --> 1803.26 Mbps in 1663.65 usec 96: 393219 bytes 40 times --> 1800.73 Mbps in 1666.00 usec 97: 524285 bytes 20 times --> 1805.33 Mbps in 2215.65 usec 98: 524288 bytes 22 times --> 1806.80 Mbps in 2213.86 usec 99: 524291 bytes 22 times --> 1805.77 Mbps in 2215.14 usec 100: 786429 bytes 22 times --> 1827.24 Mbps in 3283.64 usec 101: 786432 bytes 20 times --> 1827.03 Mbps in 3284.03 usec 102: 786435 bytes 20 times --> 1827.20 Mbps in 3283.73 usec 103: 1048573 bytes 10 times --> 1840.05 Mbps in 4347.71 usec 104: 1048576 bytes 11 times --> 1839.68 Mbps in 4348.58 usec 105: 1048579 bytes 11 times --> 1840.13 Mbps in 4347.54 usec 106: 1572861 bytes 11 times --> 1853.99 Mbps in 6472.50 usec 107: 1572864 bytes 10 times --> 1854.11 Mbps in 6472.10 usec 108: 1572867 bytes 10 times --> 1854.12 Mbps in 6472.10 usec 109: 2097149 bytes 5 times --> 1861.41 Mbps in 8595.61 usec 110: 2097152 bytes 5 times --> 1861.25 Mbps in 8596.40 usec 111: 2097155 bytes 5 times --> 1860.99 Mbps in 8597.59 usec 112: 3145725 bytes 5 times --> 1868.34 Mbps in 12845.59 usec 113: 3145728 bytes 5 times --> 1868.30 Mbps in 12845.90 usec 114: 3145731 bytes 5 times --> 1868.59 Mbps in 12843.89 usec 115: 4194301 bytes 3 times --> 1872.16 Mbps in 17092.51 usec 116: 4194304 bytes 3 times --> 1872.31 Mbps in 17091.19 usec 117: 4194307 bytes 3 times --> 1872.13 Mbps in 17092.82 usec 118: 6291453 bytes 3 times --> 1875.88 Mbps in 25588.00 usec 119: 6291456 bytes 3 times --> 1875.98 Mbps in 25586.68 usec 120: 6291459 bytes 3 times --> 1875.93 Mbps in 25587.36 usec 121: 8388605 bytes 3 times --> 1877.79 Mbps in 34082.69 usec 122: 8388608 bytes 3 times --> 1877.72 Mbps in 34083.84 usec 123: 8388611 bytes 3 times --> 1877.66 Mbps in 34085.00 usec This commit was SVN r7180.
2005-09-04 22:08:13 +00:00
/**< number of hcas available to the MX component */
struct mca_btl_mx_module_t** mx_btls;
/**< array of available BTL modules */
int32_t mx_free_list_num;
/**< initial size of free lists */
int32_t mx_free_list_max;
/**< maximum size of free lists */
int32_t mx_max_posted_recv;
First stable version of the MX BTL (at least we pass NetPipe). The perfs are not amazing but are not that bad either. On a 2 procs Intel(R) Xeon(TM) CPU 3.20GHz with MYRICOM Inc. Myrinet 2000 Scalable Cluster Interconnect (rev 04) I get: 0: 1 bytes 13096 times --> 1.10 Mbps in 6.94 usec 1: 2 bytes 14408 times --> 2.17 Mbps in 7.02 usec 2: 3 bytes 14243 times --> 3.24 Mbps in 7.07 usec 3: 4 bytes 9428 times --> 4.27 Mbps in 7.15 usec 4: 6 bytes 10493 times --> 6.26 Mbps in 7.32 usec 5: 8 bytes 6834 times --> 8.18 Mbps in 7.47 usec 6: 12 bytes 8371 times --> 11.89 Mbps in 7.70 usec 7: 13 bytes 5411 times --> 12.72 Mbps in 7.80 usec 8: 16 bytes 5919 times --> 15.35 Mbps in 7.95 usec 9: 19 bytes 7074 times --> 17.66 Mbps in 8.21 usec 10: 21 bytes 7696 times --> 19.00 Mbps in 8.43 usec 11: 24 bytes 7906 times --> 20.87 Mbps in 8.77 usec 12: 27 bytes 8073 times --> 23.05 Mbps in 8.94 usec 13: 29 bytes 4972 times --> 24.32 Mbps in 9.10 usec 14: 32 bytes 5307 times --> 26.29 Mbps in 9.29 usec 15: 35 bytes 5720 times --> 33.61 Mbps in 7.95 usec 16: 45 bytes 7191 times --> 39.50 Mbps in 8.69 usec 17: 48 bytes 7670 times --> 41.33 Mbps in 8.86 usec 18: 51 bytes 7759 times --> 42.80 Mbps in 9.09 usec 19: 61 bytes 4313 times --> 47.44 Mbps in 9.81 usec 20: 64 bytes 5012 times --> 57.61 Mbps in 8.48 usec 21: 67 bytes 6083 times --> 59.31 Mbps in 8.62 usec 22: 93 bytes 6234 times --> 68.08 Mbps in 10.42 usec 23: 96 bytes 6396 times --> 80.65 Mbps in 9.08 usec 24: 99 bytes 7455 times --> 81.56 Mbps in 9.26 usec 25: 125 bytes 3926 times --> 112.46 Mbps in 8.48 usec 26: 128 bytes 5848 times --> 116.87 Mbps in 8.36 usec 27: 131 bytes 6077 times --> 119.22 Mbps in 8.38 usec 28: 189 bytes 6192 times --> 163.79 Mbps in 8.80 usec 29: 192 bytes 7572 times --> 168.01 Mbps in 8.72 usec 30: 195 bytes 7705 times --> 171.13 Mbps in 8.69 usec 31: 253 bytes 4011 times --> 210.21 Mbps in 9.18 usec 32: 256 bytes 5423 times --> 214.55 Mbps in 9.10 usec 33: 259 bytes 5535 times --> 217.64 Mbps in 9.08 usec 34: 381 bytes 5613 times --> 290.55 Mbps in 10.00 usec 35: 384 bytes 6663 times --> 296.11 Mbps in 9.89 usec 36: 387 bytes 6764 times --> 298.74 Mbps in 9.88 usec 37: 509 bytes 3451 times --> 353.78 Mbps in 10.98 usec 38: 512 bytes 4546 times --> 359.36 Mbps in 10.87 usec 39: 515 bytes 4617 times --> 361.53 Mbps in 10.87 usec 40: 765 bytes 4645 times --> 461.41 Mbps in 12.65 usec 41: 768 bytes 5270 times --> 468.59 Mbps in 12.50 usec 42: 771 bytes 5341 times --> 470.16 Mbps in 12.51 usec 43: 1021 bytes 2695 times --> 508.42 Mbps in 15.32 usec 44: 1024 bytes 3260 times --> 514.44 Mbps in 15.19 usec 45: 1027 bytes 3298 times --> 515.72 Mbps in 15.19 usec 46: 1533 bytes 3307 times --> 707.12 Mbps in 16.54 usec 47: 1536 bytes 4030 times --> 714.93 Mbps in 16.39 usec 48: 1539 bytes 4071 times --> 714.41 Mbps in 16.44 usec 49: 2045 bytes 2040 times --> 761.38 Mbps in 20.49 usec 50: 2048 bytes 2438 times --> 769.78 Mbps in 20.30 usec 51: 2051 bytes 2465 times --> 769.78 Mbps in 20.33 usec 52: 3069 bytes 2465 times --> 923.43 Mbps in 25.36 usec 53: 3072 bytes 2629 times --> 928.48 Mbps in 25.24 usec 54: 3075 bytes 2642 times --> 929.07 Mbps in 25.25 usec 55: 4093 bytes 1323 times --> 1012.38 Mbps in 30.85 usec 56: 4096 bytes 1620 times --> 1016.69 Mbps in 30.74 usec 57: 4099 bytes 1627 times --> 1015.16 Mbps in 30.81 usec 58: 6141 bytes 1625 times --> 1171.82 Mbps in 39.98 usec 59: 6144 bytes 1667 times --> 1173.85 Mbps in 39.93 usec 60: 6147 bytes 1669 times --> 1174.44 Mbps in 39.93 usec 61: 8189 bytes 835 times --> 1232.43 Mbps in 50.69 usec 62: 8192 bytes 986 times --> 1234.87 Mbps in 50.61 usec 63: 8195 bytes 988 times --> 1234.85 Mbps in 50.63 usec 64: 12285 bytes 988 times --> 1360.73 Mbps in 68.88 usec 65: 12288 bytes 967 times --> 1364.20 Mbps in 68.72 usec 66: 12291 bytes 970 times --> 1364.56 Mbps in 68.72 usec 67: 16381 bytes 485 times --> 1385.48 Mbps in 90.21 usec 68: 16384 bytes 554 times --> 1388.76 Mbps in 90.01 usec 69: 16387 bytes 555 times --> 1388.41 Mbps in 90.05 usec 70: 24573 bytes 555 times --> 1499.72 Mbps in 125.01 usec 71: 24576 bytes 533 times --> 1499.36 Mbps in 125.05 usec 72: 24579 bytes 533 times --> 1500.44 Mbps in 124.98 usec 73: 32765 bytes 266 times --> 1499.31 Mbps in 166.73 usec 74: 32768 bytes 299 times --> 1497.10 Mbps in 166.99 usec 75: 32771 bytes 299 times --> 1495.29 Mbps in 167.21 usec 76: 49149 bytes 299 times --> 1528.78 Mbps in 245.28 usec 77: 49152 bytes 271 times --> 1527.97 Mbps in 245.42 usec 78: 49155 bytes 271 times --> 1529.35 Mbps in 245.22 usec 79: 65533 bytes 135 times --> 1586.19 Mbps in 315.21 usec 80: 65536 bytes 158 times --> 1591.11 Mbps in 314.25 usec 81: 65539 bytes 159 times --> 1586.50 Mbps in 315.17 usec 82: 98301 bytes 158 times --> 1668.05 Mbps in 449.61 usec 83: 98304 bytes 148 times --> 1667.40 Mbps in 449.80 usec 84: 98307 bytes 148 times --> 1667.29 Mbps in 449.84 usec 85: 131069 bytes 74 times --> 1709.11 Mbps in 585.09 usec 86: 131072 bytes 85 times --> 1711.09 Mbps in 584.42 usec 87: 131075 bytes 85 times --> 1710.92 Mbps in 584.49 usec 88: 196605 bytes 85 times --> 1727.93 Mbps in 868.08 usec 89: 196608 bytes 76 times --> 1726.28 Mbps in 868.92 usec 90: 196611 bytes 76 times --> 1727.06 Mbps in 868.54 usec 91: 262141 bytes 38 times --> 1757.65 Mbps in 1137.87 usec 92: 262144 bytes 43 times --> 1758.69 Mbps in 1137.21 usec 93: 262147 bytes 43 times --> 1759.38 Mbps in 1136.78 usec 94: 393213 bytes 43 times --> 1801.51 Mbps in 1665.25 usec 95: 393216 bytes 40 times --> 1803.26 Mbps in 1663.65 usec 96: 393219 bytes 40 times --> 1800.73 Mbps in 1666.00 usec 97: 524285 bytes 20 times --> 1805.33 Mbps in 2215.65 usec 98: 524288 bytes 22 times --> 1806.80 Mbps in 2213.86 usec 99: 524291 bytes 22 times --> 1805.77 Mbps in 2215.14 usec 100: 786429 bytes 22 times --> 1827.24 Mbps in 3283.64 usec 101: 786432 bytes 20 times --> 1827.03 Mbps in 3284.03 usec 102: 786435 bytes 20 times --> 1827.20 Mbps in 3283.73 usec 103: 1048573 bytes 10 times --> 1840.05 Mbps in 4347.71 usec 104: 1048576 bytes 11 times --> 1839.68 Mbps in 4348.58 usec 105: 1048579 bytes 11 times --> 1840.13 Mbps in 4347.54 usec 106: 1572861 bytes 11 times --> 1853.99 Mbps in 6472.50 usec 107: 1572864 bytes 10 times --> 1854.11 Mbps in 6472.10 usec 108: 1572867 bytes 10 times --> 1854.12 Mbps in 6472.10 usec 109: 2097149 bytes 5 times --> 1861.41 Mbps in 8595.61 usec 110: 2097152 bytes 5 times --> 1861.25 Mbps in 8596.40 usec 111: 2097155 bytes 5 times --> 1860.99 Mbps in 8597.59 usec 112: 3145725 bytes 5 times --> 1868.34 Mbps in 12845.59 usec 113: 3145728 bytes 5 times --> 1868.30 Mbps in 12845.90 usec 114: 3145731 bytes 5 times --> 1868.59 Mbps in 12843.89 usec 115: 4194301 bytes 3 times --> 1872.16 Mbps in 17092.51 usec 116: 4194304 bytes 3 times --> 1872.31 Mbps in 17091.19 usec 117: 4194307 bytes 3 times --> 1872.13 Mbps in 17092.82 usec 118: 6291453 bytes 3 times --> 1875.88 Mbps in 25588.00 usec 119: 6291456 bytes 3 times --> 1875.98 Mbps in 25586.68 usec 120: 6291459 bytes 3 times --> 1875.93 Mbps in 25587.36 usec 121: 8388605 bytes 3 times --> 1877.79 Mbps in 34082.69 usec 122: 8388608 bytes 3 times --> 1877.72 Mbps in 34083.84 usec 123: 8388611 bytes 3 times --> 1877.66 Mbps in 34085.00 usec This commit was SVN r7180.
2005-09-04 22:08:13 +00:00
/**< number of posted receives on each NIC */
int32_t mx_free_list_inc;
/**< number of elements to alloc when growing free lists */
int32_t mx_support_sharedmem;
/**< true if we want to activate the MX support for shared memory */
int32_t mx_support_self;
/**< true if we want to activate the MX support for self communications */
int32_t mx_use_unexpected;
/**< true if Open MPI is allowed to register an unexpected handler with the MX library */
First stable version of the MX BTL (at least we pass NetPipe). The perfs are not amazing but are not that bad either. On a 2 procs Intel(R) Xeon(TM) CPU 3.20GHz with MYRICOM Inc. Myrinet 2000 Scalable Cluster Interconnect (rev 04) I get: 0: 1 bytes 13096 times --> 1.10 Mbps in 6.94 usec 1: 2 bytes 14408 times --> 2.17 Mbps in 7.02 usec 2: 3 bytes 14243 times --> 3.24 Mbps in 7.07 usec 3: 4 bytes 9428 times --> 4.27 Mbps in 7.15 usec 4: 6 bytes 10493 times --> 6.26 Mbps in 7.32 usec 5: 8 bytes 6834 times --> 8.18 Mbps in 7.47 usec 6: 12 bytes 8371 times --> 11.89 Mbps in 7.70 usec 7: 13 bytes 5411 times --> 12.72 Mbps in 7.80 usec 8: 16 bytes 5919 times --> 15.35 Mbps in 7.95 usec 9: 19 bytes 7074 times --> 17.66 Mbps in 8.21 usec 10: 21 bytes 7696 times --> 19.00 Mbps in 8.43 usec 11: 24 bytes 7906 times --> 20.87 Mbps in 8.77 usec 12: 27 bytes 8073 times --> 23.05 Mbps in 8.94 usec 13: 29 bytes 4972 times --> 24.32 Mbps in 9.10 usec 14: 32 bytes 5307 times --> 26.29 Mbps in 9.29 usec 15: 35 bytes 5720 times --> 33.61 Mbps in 7.95 usec 16: 45 bytes 7191 times --> 39.50 Mbps in 8.69 usec 17: 48 bytes 7670 times --> 41.33 Mbps in 8.86 usec 18: 51 bytes 7759 times --> 42.80 Mbps in 9.09 usec 19: 61 bytes 4313 times --> 47.44 Mbps in 9.81 usec 20: 64 bytes 5012 times --> 57.61 Mbps in 8.48 usec 21: 67 bytes 6083 times --> 59.31 Mbps in 8.62 usec 22: 93 bytes 6234 times --> 68.08 Mbps in 10.42 usec 23: 96 bytes 6396 times --> 80.65 Mbps in 9.08 usec 24: 99 bytes 7455 times --> 81.56 Mbps in 9.26 usec 25: 125 bytes 3926 times --> 112.46 Mbps in 8.48 usec 26: 128 bytes 5848 times --> 116.87 Mbps in 8.36 usec 27: 131 bytes 6077 times --> 119.22 Mbps in 8.38 usec 28: 189 bytes 6192 times --> 163.79 Mbps in 8.80 usec 29: 192 bytes 7572 times --> 168.01 Mbps in 8.72 usec 30: 195 bytes 7705 times --> 171.13 Mbps in 8.69 usec 31: 253 bytes 4011 times --> 210.21 Mbps in 9.18 usec 32: 256 bytes 5423 times --> 214.55 Mbps in 9.10 usec 33: 259 bytes 5535 times --> 217.64 Mbps in 9.08 usec 34: 381 bytes 5613 times --> 290.55 Mbps in 10.00 usec 35: 384 bytes 6663 times --> 296.11 Mbps in 9.89 usec 36: 387 bytes 6764 times --> 298.74 Mbps in 9.88 usec 37: 509 bytes 3451 times --> 353.78 Mbps in 10.98 usec 38: 512 bytes 4546 times --> 359.36 Mbps in 10.87 usec 39: 515 bytes 4617 times --> 361.53 Mbps in 10.87 usec 40: 765 bytes 4645 times --> 461.41 Mbps in 12.65 usec 41: 768 bytes 5270 times --> 468.59 Mbps in 12.50 usec 42: 771 bytes 5341 times --> 470.16 Mbps in 12.51 usec 43: 1021 bytes 2695 times --> 508.42 Mbps in 15.32 usec 44: 1024 bytes 3260 times --> 514.44 Mbps in 15.19 usec 45: 1027 bytes 3298 times --> 515.72 Mbps in 15.19 usec 46: 1533 bytes 3307 times --> 707.12 Mbps in 16.54 usec 47: 1536 bytes 4030 times --> 714.93 Mbps in 16.39 usec 48: 1539 bytes 4071 times --> 714.41 Mbps in 16.44 usec 49: 2045 bytes 2040 times --> 761.38 Mbps in 20.49 usec 50: 2048 bytes 2438 times --> 769.78 Mbps in 20.30 usec 51: 2051 bytes 2465 times --> 769.78 Mbps in 20.33 usec 52: 3069 bytes 2465 times --> 923.43 Mbps in 25.36 usec 53: 3072 bytes 2629 times --> 928.48 Mbps in 25.24 usec 54: 3075 bytes 2642 times --> 929.07 Mbps in 25.25 usec 55: 4093 bytes 1323 times --> 1012.38 Mbps in 30.85 usec 56: 4096 bytes 1620 times --> 1016.69 Mbps in 30.74 usec 57: 4099 bytes 1627 times --> 1015.16 Mbps in 30.81 usec 58: 6141 bytes 1625 times --> 1171.82 Mbps in 39.98 usec 59: 6144 bytes 1667 times --> 1173.85 Mbps in 39.93 usec 60: 6147 bytes 1669 times --> 1174.44 Mbps in 39.93 usec 61: 8189 bytes 835 times --> 1232.43 Mbps in 50.69 usec 62: 8192 bytes 986 times --> 1234.87 Mbps in 50.61 usec 63: 8195 bytes 988 times --> 1234.85 Mbps in 50.63 usec 64: 12285 bytes 988 times --> 1360.73 Mbps in 68.88 usec 65: 12288 bytes 967 times --> 1364.20 Mbps in 68.72 usec 66: 12291 bytes 970 times --> 1364.56 Mbps in 68.72 usec 67: 16381 bytes 485 times --> 1385.48 Mbps in 90.21 usec 68: 16384 bytes 554 times --> 1388.76 Mbps in 90.01 usec 69: 16387 bytes 555 times --> 1388.41 Mbps in 90.05 usec 70: 24573 bytes 555 times --> 1499.72 Mbps in 125.01 usec 71: 24576 bytes 533 times --> 1499.36 Mbps in 125.05 usec 72: 24579 bytes 533 times --> 1500.44 Mbps in 124.98 usec 73: 32765 bytes 266 times --> 1499.31 Mbps in 166.73 usec 74: 32768 bytes 299 times --> 1497.10 Mbps in 166.99 usec 75: 32771 bytes 299 times --> 1495.29 Mbps in 167.21 usec 76: 49149 bytes 299 times --> 1528.78 Mbps in 245.28 usec 77: 49152 bytes 271 times --> 1527.97 Mbps in 245.42 usec 78: 49155 bytes 271 times --> 1529.35 Mbps in 245.22 usec 79: 65533 bytes 135 times --> 1586.19 Mbps in 315.21 usec 80: 65536 bytes 158 times --> 1591.11 Mbps in 314.25 usec 81: 65539 bytes 159 times --> 1586.50 Mbps in 315.17 usec 82: 98301 bytes 158 times --> 1668.05 Mbps in 449.61 usec 83: 98304 bytes 148 times --> 1667.40 Mbps in 449.80 usec 84: 98307 bytes 148 times --> 1667.29 Mbps in 449.84 usec 85: 131069 bytes 74 times --> 1709.11 Mbps in 585.09 usec 86: 131072 bytes 85 times --> 1711.09 Mbps in 584.42 usec 87: 131075 bytes 85 times --> 1710.92 Mbps in 584.49 usec 88: 196605 bytes 85 times --> 1727.93 Mbps in 868.08 usec 89: 196608 bytes 76 times --> 1726.28 Mbps in 868.92 usec 90: 196611 bytes 76 times --> 1727.06 Mbps in 868.54 usec 91: 262141 bytes 38 times --> 1757.65 Mbps in 1137.87 usec 92: 262144 bytes 43 times --> 1758.69 Mbps in 1137.21 usec 93: 262147 bytes 43 times --> 1759.38 Mbps in 1136.78 usec 94: 393213 bytes 43 times --> 1801.51 Mbps in 1665.25 usec 95: 393216 bytes 40 times --> 1803.26 Mbps in 1663.65 usec 96: 393219 bytes 40 times --> 1800.73 Mbps in 1666.00 usec 97: 524285 bytes 20 times --> 1805.33 Mbps in 2215.65 usec 98: 524288 bytes 22 times --> 1806.80 Mbps in 2213.86 usec 99: 524291 bytes 22 times --> 1805.77 Mbps in 2215.14 usec 100: 786429 bytes 22 times --> 1827.24 Mbps in 3283.64 usec 101: 786432 bytes 20 times --> 1827.03 Mbps in 3284.03 usec 102: 786435 bytes 20 times --> 1827.20 Mbps in 3283.73 usec 103: 1048573 bytes 10 times --> 1840.05 Mbps in 4347.71 usec 104: 1048576 bytes 11 times --> 1839.68 Mbps in 4348.58 usec 105: 1048579 bytes 11 times --> 1840.13 Mbps in 4347.54 usec 106: 1572861 bytes 11 times --> 1853.99 Mbps in 6472.50 usec 107: 1572864 bytes 10 times --> 1854.11 Mbps in 6472.10 usec 108: 1572867 bytes 10 times --> 1854.12 Mbps in 6472.10 usec 109: 2097149 bytes 5 times --> 1861.41 Mbps in 8595.61 usec 110: 2097152 bytes 5 times --> 1861.25 Mbps in 8596.40 usec 111: 2097155 bytes 5 times --> 1860.99 Mbps in 8597.59 usec 112: 3145725 bytes 5 times --> 1868.34 Mbps in 12845.59 usec 113: 3145728 bytes 5 times --> 1868.30 Mbps in 12845.90 usec 114: 3145731 bytes 5 times --> 1868.59 Mbps in 12843.89 usec 115: 4194301 bytes 3 times --> 1872.16 Mbps in 17092.51 usec 116: 4194304 bytes 3 times --> 1872.31 Mbps in 17091.19 usec 117: 4194307 bytes 3 times --> 1872.13 Mbps in 17092.82 usec 118: 6291453 bytes 3 times --> 1875.88 Mbps in 25588.00 usec 119: 6291456 bytes 3 times --> 1875.98 Mbps in 25586.68 usec 120: 6291459 bytes 3 times --> 1875.93 Mbps in 25587.36 usec 121: 8388605 bytes 3 times --> 1877.79 Mbps in 34082.69 usec 122: 8388608 bytes 3 times --> 1877.72 Mbps in 34083.84 usec 123: 8388611 bytes 3 times --> 1877.66 Mbps in 34085.00 usec This commit was SVN r7180.
2005-09-04 22:08:13 +00:00
opal_list_t mx_procs; /**< list of mx proc structures */
int32_t mx_filter;
int32_t mx_timeout;
int32_t mx_connection_retries;
First stable version of the MX BTL (at least we pass NetPipe). The perfs are not amazing but are not that bad either. On a 2 procs Intel(R) Xeon(TM) CPU 3.20GHz with MYRICOM Inc. Myrinet 2000 Scalable Cluster Interconnect (rev 04) I get: 0: 1 bytes 13096 times --> 1.10 Mbps in 6.94 usec 1: 2 bytes 14408 times --> 2.17 Mbps in 7.02 usec 2: 3 bytes 14243 times --> 3.24 Mbps in 7.07 usec 3: 4 bytes 9428 times --> 4.27 Mbps in 7.15 usec 4: 6 bytes 10493 times --> 6.26 Mbps in 7.32 usec 5: 8 bytes 6834 times --> 8.18 Mbps in 7.47 usec 6: 12 bytes 8371 times --> 11.89 Mbps in 7.70 usec 7: 13 bytes 5411 times --> 12.72 Mbps in 7.80 usec 8: 16 bytes 5919 times --> 15.35 Mbps in 7.95 usec 9: 19 bytes 7074 times --> 17.66 Mbps in 8.21 usec 10: 21 bytes 7696 times --> 19.00 Mbps in 8.43 usec 11: 24 bytes 7906 times --> 20.87 Mbps in 8.77 usec 12: 27 bytes 8073 times --> 23.05 Mbps in 8.94 usec 13: 29 bytes 4972 times --> 24.32 Mbps in 9.10 usec 14: 32 bytes 5307 times --> 26.29 Mbps in 9.29 usec 15: 35 bytes 5720 times --> 33.61 Mbps in 7.95 usec 16: 45 bytes 7191 times --> 39.50 Mbps in 8.69 usec 17: 48 bytes 7670 times --> 41.33 Mbps in 8.86 usec 18: 51 bytes 7759 times --> 42.80 Mbps in 9.09 usec 19: 61 bytes 4313 times --> 47.44 Mbps in 9.81 usec 20: 64 bytes 5012 times --> 57.61 Mbps in 8.48 usec 21: 67 bytes 6083 times --> 59.31 Mbps in 8.62 usec 22: 93 bytes 6234 times --> 68.08 Mbps in 10.42 usec 23: 96 bytes 6396 times --> 80.65 Mbps in 9.08 usec 24: 99 bytes 7455 times --> 81.56 Mbps in 9.26 usec 25: 125 bytes 3926 times --> 112.46 Mbps in 8.48 usec 26: 128 bytes 5848 times --> 116.87 Mbps in 8.36 usec 27: 131 bytes 6077 times --> 119.22 Mbps in 8.38 usec 28: 189 bytes 6192 times --> 163.79 Mbps in 8.80 usec 29: 192 bytes 7572 times --> 168.01 Mbps in 8.72 usec 30: 195 bytes 7705 times --> 171.13 Mbps in 8.69 usec 31: 253 bytes 4011 times --> 210.21 Mbps in 9.18 usec 32: 256 bytes 5423 times --> 214.55 Mbps in 9.10 usec 33: 259 bytes 5535 times --> 217.64 Mbps in 9.08 usec 34: 381 bytes 5613 times --> 290.55 Mbps in 10.00 usec 35: 384 bytes 6663 times --> 296.11 Mbps in 9.89 usec 36: 387 bytes 6764 times --> 298.74 Mbps in 9.88 usec 37: 509 bytes 3451 times --> 353.78 Mbps in 10.98 usec 38: 512 bytes 4546 times --> 359.36 Mbps in 10.87 usec 39: 515 bytes 4617 times --> 361.53 Mbps in 10.87 usec 40: 765 bytes 4645 times --> 461.41 Mbps in 12.65 usec 41: 768 bytes 5270 times --> 468.59 Mbps in 12.50 usec 42: 771 bytes 5341 times --> 470.16 Mbps in 12.51 usec 43: 1021 bytes 2695 times --> 508.42 Mbps in 15.32 usec 44: 1024 bytes 3260 times --> 514.44 Mbps in 15.19 usec 45: 1027 bytes 3298 times --> 515.72 Mbps in 15.19 usec 46: 1533 bytes 3307 times --> 707.12 Mbps in 16.54 usec 47: 1536 bytes 4030 times --> 714.93 Mbps in 16.39 usec 48: 1539 bytes 4071 times --> 714.41 Mbps in 16.44 usec 49: 2045 bytes 2040 times --> 761.38 Mbps in 20.49 usec 50: 2048 bytes 2438 times --> 769.78 Mbps in 20.30 usec 51: 2051 bytes 2465 times --> 769.78 Mbps in 20.33 usec 52: 3069 bytes 2465 times --> 923.43 Mbps in 25.36 usec 53: 3072 bytes 2629 times --> 928.48 Mbps in 25.24 usec 54: 3075 bytes 2642 times --> 929.07 Mbps in 25.25 usec 55: 4093 bytes 1323 times --> 1012.38 Mbps in 30.85 usec 56: 4096 bytes 1620 times --> 1016.69 Mbps in 30.74 usec 57: 4099 bytes 1627 times --> 1015.16 Mbps in 30.81 usec 58: 6141 bytes 1625 times --> 1171.82 Mbps in 39.98 usec 59: 6144 bytes 1667 times --> 1173.85 Mbps in 39.93 usec 60: 6147 bytes 1669 times --> 1174.44 Mbps in 39.93 usec 61: 8189 bytes 835 times --> 1232.43 Mbps in 50.69 usec 62: 8192 bytes 986 times --> 1234.87 Mbps in 50.61 usec 63: 8195 bytes 988 times --> 1234.85 Mbps in 50.63 usec 64: 12285 bytes 988 times --> 1360.73 Mbps in 68.88 usec 65: 12288 bytes 967 times --> 1364.20 Mbps in 68.72 usec 66: 12291 bytes 970 times --> 1364.56 Mbps in 68.72 usec 67: 16381 bytes 485 times --> 1385.48 Mbps in 90.21 usec 68: 16384 bytes 554 times --> 1388.76 Mbps in 90.01 usec 69: 16387 bytes 555 times --> 1388.41 Mbps in 90.05 usec 70: 24573 bytes 555 times --> 1499.72 Mbps in 125.01 usec 71: 24576 bytes 533 times --> 1499.36 Mbps in 125.05 usec 72: 24579 bytes 533 times --> 1500.44 Mbps in 124.98 usec 73: 32765 bytes 266 times --> 1499.31 Mbps in 166.73 usec 74: 32768 bytes 299 times --> 1497.10 Mbps in 166.99 usec 75: 32771 bytes 299 times --> 1495.29 Mbps in 167.21 usec 76: 49149 bytes 299 times --> 1528.78 Mbps in 245.28 usec 77: 49152 bytes 271 times --> 1527.97 Mbps in 245.42 usec 78: 49155 bytes 271 times --> 1529.35 Mbps in 245.22 usec 79: 65533 bytes 135 times --> 1586.19 Mbps in 315.21 usec 80: 65536 bytes 158 times --> 1591.11 Mbps in 314.25 usec 81: 65539 bytes 159 times --> 1586.50 Mbps in 315.17 usec 82: 98301 bytes 158 times --> 1668.05 Mbps in 449.61 usec 83: 98304 bytes 148 times --> 1667.40 Mbps in 449.80 usec 84: 98307 bytes 148 times --> 1667.29 Mbps in 449.84 usec 85: 131069 bytes 74 times --> 1709.11 Mbps in 585.09 usec 86: 131072 bytes 85 times --> 1711.09 Mbps in 584.42 usec 87: 131075 bytes 85 times --> 1710.92 Mbps in 584.49 usec 88: 196605 bytes 85 times --> 1727.93 Mbps in 868.08 usec 89: 196608 bytes 76 times --> 1726.28 Mbps in 868.92 usec 90: 196611 bytes 76 times --> 1727.06 Mbps in 868.54 usec 91: 262141 bytes 38 times --> 1757.65 Mbps in 1137.87 usec 92: 262144 bytes 43 times --> 1758.69 Mbps in 1137.21 usec 93: 262147 bytes 43 times --> 1759.38 Mbps in 1136.78 usec 94: 393213 bytes 43 times --> 1801.51 Mbps in 1665.25 usec 95: 393216 bytes 40 times --> 1803.26 Mbps in 1663.65 usec 96: 393219 bytes 40 times --> 1800.73 Mbps in 1666.00 usec 97: 524285 bytes 20 times --> 1805.33 Mbps in 2215.65 usec 98: 524288 bytes 22 times --> 1806.80 Mbps in 2213.86 usec 99: 524291 bytes 22 times --> 1805.77 Mbps in 2215.14 usec 100: 786429 bytes 22 times --> 1827.24 Mbps in 3283.64 usec 101: 786432 bytes 20 times --> 1827.03 Mbps in 3284.03 usec 102: 786435 bytes 20 times --> 1827.20 Mbps in 3283.73 usec 103: 1048573 bytes 10 times --> 1840.05 Mbps in 4347.71 usec 104: 1048576 bytes 11 times --> 1839.68 Mbps in 4348.58 usec 105: 1048579 bytes 11 times --> 1840.13 Mbps in 4347.54 usec 106: 1572861 bytes 11 times --> 1853.99 Mbps in 6472.50 usec 107: 1572864 bytes 10 times --> 1854.11 Mbps in 6472.10 usec 108: 1572867 bytes 10 times --> 1854.12 Mbps in 6472.10 usec 109: 2097149 bytes 5 times --> 1861.41 Mbps in 8595.61 usec 110: 2097152 bytes 5 times --> 1861.25 Mbps in 8596.40 usec 111: 2097155 bytes 5 times --> 1860.99 Mbps in 8597.59 usec 112: 3145725 bytes 5 times --> 1868.34 Mbps in 12845.59 usec 113: 3145728 bytes 5 times --> 1868.30 Mbps in 12845.90 usec 114: 3145731 bytes 5 times --> 1868.59 Mbps in 12843.89 usec 115: 4194301 bytes 3 times --> 1872.16 Mbps in 17092.51 usec 116: 4194304 bytes 3 times --> 1872.31 Mbps in 17091.19 usec 117: 4194307 bytes 3 times --> 1872.13 Mbps in 17092.82 usec 118: 6291453 bytes 3 times --> 1875.88 Mbps in 25588.00 usec 119: 6291456 bytes 3 times --> 1875.98 Mbps in 25586.68 usec 120: 6291459 bytes 3 times --> 1875.93 Mbps in 25587.36 usec 121: 8388605 bytes 3 times --> 1877.79 Mbps in 34082.69 usec 122: 8388608 bytes 3 times --> 1877.72 Mbps in 34083.84 usec 123: 8388611 bytes 3 times --> 1877.66 Mbps in 34085.00 usec This commit was SVN r7180.
2005-09-04 22:08:13 +00:00
ompi_free_list_t mx_send_eager_frags; /**< free list of mx eager send fragments */
ompi_free_list_t mx_send_user_frags; /**< free list of mx user send fragments */
First stable version of the MX BTL (at least we pass NetPipe). The perfs are not amazing but are not that bad either. On a 2 procs Intel(R) Xeon(TM) CPU 3.20GHz with MYRICOM Inc. Myrinet 2000 Scalable Cluster Interconnect (rev 04) I get: 0: 1 bytes 13096 times --> 1.10 Mbps in 6.94 usec 1: 2 bytes 14408 times --> 2.17 Mbps in 7.02 usec 2: 3 bytes 14243 times --> 3.24 Mbps in 7.07 usec 3: 4 bytes 9428 times --> 4.27 Mbps in 7.15 usec 4: 6 bytes 10493 times --> 6.26 Mbps in 7.32 usec 5: 8 bytes 6834 times --> 8.18 Mbps in 7.47 usec 6: 12 bytes 8371 times --> 11.89 Mbps in 7.70 usec 7: 13 bytes 5411 times --> 12.72 Mbps in 7.80 usec 8: 16 bytes 5919 times --> 15.35 Mbps in 7.95 usec 9: 19 bytes 7074 times --> 17.66 Mbps in 8.21 usec 10: 21 bytes 7696 times --> 19.00 Mbps in 8.43 usec 11: 24 bytes 7906 times --> 20.87 Mbps in 8.77 usec 12: 27 bytes 8073 times --> 23.05 Mbps in 8.94 usec 13: 29 bytes 4972 times --> 24.32 Mbps in 9.10 usec 14: 32 bytes 5307 times --> 26.29 Mbps in 9.29 usec 15: 35 bytes 5720 times --> 33.61 Mbps in 7.95 usec 16: 45 bytes 7191 times --> 39.50 Mbps in 8.69 usec 17: 48 bytes 7670 times --> 41.33 Mbps in 8.86 usec 18: 51 bytes 7759 times --> 42.80 Mbps in 9.09 usec 19: 61 bytes 4313 times --> 47.44 Mbps in 9.81 usec 20: 64 bytes 5012 times --> 57.61 Mbps in 8.48 usec 21: 67 bytes 6083 times --> 59.31 Mbps in 8.62 usec 22: 93 bytes 6234 times --> 68.08 Mbps in 10.42 usec 23: 96 bytes 6396 times --> 80.65 Mbps in 9.08 usec 24: 99 bytes 7455 times --> 81.56 Mbps in 9.26 usec 25: 125 bytes 3926 times --> 112.46 Mbps in 8.48 usec 26: 128 bytes 5848 times --> 116.87 Mbps in 8.36 usec 27: 131 bytes 6077 times --> 119.22 Mbps in 8.38 usec 28: 189 bytes 6192 times --> 163.79 Mbps in 8.80 usec 29: 192 bytes 7572 times --> 168.01 Mbps in 8.72 usec 30: 195 bytes 7705 times --> 171.13 Mbps in 8.69 usec 31: 253 bytes 4011 times --> 210.21 Mbps in 9.18 usec 32: 256 bytes 5423 times --> 214.55 Mbps in 9.10 usec 33: 259 bytes 5535 times --> 217.64 Mbps in 9.08 usec 34: 381 bytes 5613 times --> 290.55 Mbps in 10.00 usec 35: 384 bytes 6663 times --> 296.11 Mbps in 9.89 usec 36: 387 bytes 6764 times --> 298.74 Mbps in 9.88 usec 37: 509 bytes 3451 times --> 353.78 Mbps in 10.98 usec 38: 512 bytes 4546 times --> 359.36 Mbps in 10.87 usec 39: 515 bytes 4617 times --> 361.53 Mbps in 10.87 usec 40: 765 bytes 4645 times --> 461.41 Mbps in 12.65 usec 41: 768 bytes 5270 times --> 468.59 Mbps in 12.50 usec 42: 771 bytes 5341 times --> 470.16 Mbps in 12.51 usec 43: 1021 bytes 2695 times --> 508.42 Mbps in 15.32 usec 44: 1024 bytes 3260 times --> 514.44 Mbps in 15.19 usec 45: 1027 bytes 3298 times --> 515.72 Mbps in 15.19 usec 46: 1533 bytes 3307 times --> 707.12 Mbps in 16.54 usec 47: 1536 bytes 4030 times --> 714.93 Mbps in 16.39 usec 48: 1539 bytes 4071 times --> 714.41 Mbps in 16.44 usec 49: 2045 bytes 2040 times --> 761.38 Mbps in 20.49 usec 50: 2048 bytes 2438 times --> 769.78 Mbps in 20.30 usec 51: 2051 bytes 2465 times --> 769.78 Mbps in 20.33 usec 52: 3069 bytes 2465 times --> 923.43 Mbps in 25.36 usec 53: 3072 bytes 2629 times --> 928.48 Mbps in 25.24 usec 54: 3075 bytes 2642 times --> 929.07 Mbps in 25.25 usec 55: 4093 bytes 1323 times --> 1012.38 Mbps in 30.85 usec 56: 4096 bytes 1620 times --> 1016.69 Mbps in 30.74 usec 57: 4099 bytes 1627 times --> 1015.16 Mbps in 30.81 usec 58: 6141 bytes 1625 times --> 1171.82 Mbps in 39.98 usec 59: 6144 bytes 1667 times --> 1173.85 Mbps in 39.93 usec 60: 6147 bytes 1669 times --> 1174.44 Mbps in 39.93 usec 61: 8189 bytes 835 times --> 1232.43 Mbps in 50.69 usec 62: 8192 bytes 986 times --> 1234.87 Mbps in 50.61 usec 63: 8195 bytes 988 times --> 1234.85 Mbps in 50.63 usec 64: 12285 bytes 988 times --> 1360.73 Mbps in 68.88 usec 65: 12288 bytes 967 times --> 1364.20 Mbps in 68.72 usec 66: 12291 bytes 970 times --> 1364.56 Mbps in 68.72 usec 67: 16381 bytes 485 times --> 1385.48 Mbps in 90.21 usec 68: 16384 bytes 554 times --> 1388.76 Mbps in 90.01 usec 69: 16387 bytes 555 times --> 1388.41 Mbps in 90.05 usec 70: 24573 bytes 555 times --> 1499.72 Mbps in 125.01 usec 71: 24576 bytes 533 times --> 1499.36 Mbps in 125.05 usec 72: 24579 bytes 533 times --> 1500.44 Mbps in 124.98 usec 73: 32765 bytes 266 times --> 1499.31 Mbps in 166.73 usec 74: 32768 bytes 299 times --> 1497.10 Mbps in 166.99 usec 75: 32771 bytes 299 times --> 1495.29 Mbps in 167.21 usec 76: 49149 bytes 299 times --> 1528.78 Mbps in 245.28 usec 77: 49152 bytes 271 times --> 1527.97 Mbps in 245.42 usec 78: 49155 bytes 271 times --> 1529.35 Mbps in 245.22 usec 79: 65533 bytes 135 times --> 1586.19 Mbps in 315.21 usec 80: 65536 bytes 158 times --> 1591.11 Mbps in 314.25 usec 81: 65539 bytes 159 times --> 1586.50 Mbps in 315.17 usec 82: 98301 bytes 158 times --> 1668.05 Mbps in 449.61 usec 83: 98304 bytes 148 times --> 1667.40 Mbps in 449.80 usec 84: 98307 bytes 148 times --> 1667.29 Mbps in 449.84 usec 85: 131069 bytes 74 times --> 1709.11 Mbps in 585.09 usec 86: 131072 bytes 85 times --> 1711.09 Mbps in 584.42 usec 87: 131075 bytes 85 times --> 1710.92 Mbps in 584.49 usec 88: 196605 bytes 85 times --> 1727.93 Mbps in 868.08 usec 89: 196608 bytes 76 times --> 1726.28 Mbps in 868.92 usec 90: 196611 bytes 76 times --> 1727.06 Mbps in 868.54 usec 91: 262141 bytes 38 times --> 1757.65 Mbps in 1137.87 usec 92: 262144 bytes 43 times --> 1758.69 Mbps in 1137.21 usec 93: 262147 bytes 43 times --> 1759.38 Mbps in 1136.78 usec 94: 393213 bytes 43 times --> 1801.51 Mbps in 1665.25 usec 95: 393216 bytes 40 times --> 1803.26 Mbps in 1663.65 usec 96: 393219 bytes 40 times --> 1800.73 Mbps in 1666.00 usec 97: 524285 bytes 20 times --> 1805.33 Mbps in 2215.65 usec 98: 524288 bytes 22 times --> 1806.80 Mbps in 2213.86 usec 99: 524291 bytes 22 times --> 1805.77 Mbps in 2215.14 usec 100: 786429 bytes 22 times --> 1827.24 Mbps in 3283.64 usec 101: 786432 bytes 20 times --> 1827.03 Mbps in 3284.03 usec 102: 786435 bytes 20 times --> 1827.20 Mbps in 3283.73 usec 103: 1048573 bytes 10 times --> 1840.05 Mbps in 4347.71 usec 104: 1048576 bytes 11 times --> 1839.68 Mbps in 4348.58 usec 105: 1048579 bytes 11 times --> 1840.13 Mbps in 4347.54 usec 106: 1572861 bytes 11 times --> 1853.99 Mbps in 6472.50 usec 107: 1572864 bytes 10 times --> 1854.11 Mbps in 6472.10 usec 108: 1572867 bytes 10 times --> 1854.12 Mbps in 6472.10 usec 109: 2097149 bytes 5 times --> 1861.41 Mbps in 8595.61 usec 110: 2097152 bytes 5 times --> 1861.25 Mbps in 8596.40 usec 111: 2097155 bytes 5 times --> 1860.99 Mbps in 8597.59 usec 112: 3145725 bytes 5 times --> 1868.34 Mbps in 12845.59 usec 113: 3145728 bytes 5 times --> 1868.30 Mbps in 12845.90 usec 114: 3145731 bytes 5 times --> 1868.59 Mbps in 12843.89 usec 115: 4194301 bytes 3 times --> 1872.16 Mbps in 17092.51 usec 116: 4194304 bytes 3 times --> 1872.31 Mbps in 17091.19 usec 117: 4194307 bytes 3 times --> 1872.13 Mbps in 17092.82 usec 118: 6291453 bytes 3 times --> 1875.88 Mbps in 25588.00 usec 119: 6291456 bytes 3 times --> 1875.98 Mbps in 25586.68 usec 120: 6291459 bytes 3 times --> 1875.93 Mbps in 25587.36 usec 121: 8388605 bytes 3 times --> 1877.79 Mbps in 34082.69 usec 122: 8388608 bytes 3 times --> 1877.72 Mbps in 34083.84 usec 123: 8388611 bytes 3 times --> 1877.66 Mbps in 34085.00 usec This commit was SVN r7180.
2005-09-04 22:08:13 +00:00
opal_mutex_t mx_lock; /**< lock for accessing module state */
char* mx_if_include; /**< include the following NICs */
char* mx_if_exclude; /**< Exclude the following NICs. These
* values are based on the last 6
* digits in hexadecimal of the MAC
* address of the mapper.
*/
};
typedef struct mca_btl_mx_component_t mca_btl_mx_component_t;
OMPI_MODULE_DECLSPEC extern mca_btl_mx_component_t mca_btl_mx_component;
/**
* BTL Module Interface.
* Each BTL correspond to a high level vision of a network interface. The
* current version of the MX BTL is not able to handle stripping of the
* messages by itself. Therefore, it rely on the PML layer for that.
*/
struct mca_btl_mx_module_t {
mca_btl_base_module_t super; /**< base BTL interface */
mx_endpoint_t mx_endpoint; /**< local MX endpoint */
mx_endpoint_addr_t mx_endpoint_addr; /**< local MX endpoint address */
uint32_t mx_unique_network_id; /**< unique identifier for this BTL,
* based on the MAC address of the
* mapper used to route messages.
*/
opal_list_t mx_peers; /**< list of peers */
int32_t mx_posted_request; /**< number of posted MX request */
opal_mutex_t mx_lock; /**< lock for accessing module state */
};
typedef struct mca_btl_mx_module_t mca_btl_mx_module_t;
extern mca_btl_mx_module_t mca_btl_mx_module;
/**
First stable version of the MX BTL (at least we pass NetPipe). The perfs are not amazing but are not that bad either. On a 2 procs Intel(R) Xeon(TM) CPU 3.20GHz with MYRICOM Inc. Myrinet 2000 Scalable Cluster Interconnect (rev 04) I get: 0: 1 bytes 13096 times --> 1.10 Mbps in 6.94 usec 1: 2 bytes 14408 times --> 2.17 Mbps in 7.02 usec 2: 3 bytes 14243 times --> 3.24 Mbps in 7.07 usec 3: 4 bytes 9428 times --> 4.27 Mbps in 7.15 usec 4: 6 bytes 10493 times --> 6.26 Mbps in 7.32 usec 5: 8 bytes 6834 times --> 8.18 Mbps in 7.47 usec 6: 12 bytes 8371 times --> 11.89 Mbps in 7.70 usec 7: 13 bytes 5411 times --> 12.72 Mbps in 7.80 usec 8: 16 bytes 5919 times --> 15.35 Mbps in 7.95 usec 9: 19 bytes 7074 times --> 17.66 Mbps in 8.21 usec 10: 21 bytes 7696 times --> 19.00 Mbps in 8.43 usec 11: 24 bytes 7906 times --> 20.87 Mbps in 8.77 usec 12: 27 bytes 8073 times --> 23.05 Mbps in 8.94 usec 13: 29 bytes 4972 times --> 24.32 Mbps in 9.10 usec 14: 32 bytes 5307 times --> 26.29 Mbps in 9.29 usec 15: 35 bytes 5720 times --> 33.61 Mbps in 7.95 usec 16: 45 bytes 7191 times --> 39.50 Mbps in 8.69 usec 17: 48 bytes 7670 times --> 41.33 Mbps in 8.86 usec 18: 51 bytes 7759 times --> 42.80 Mbps in 9.09 usec 19: 61 bytes 4313 times --> 47.44 Mbps in 9.81 usec 20: 64 bytes 5012 times --> 57.61 Mbps in 8.48 usec 21: 67 bytes 6083 times --> 59.31 Mbps in 8.62 usec 22: 93 bytes 6234 times --> 68.08 Mbps in 10.42 usec 23: 96 bytes 6396 times --> 80.65 Mbps in 9.08 usec 24: 99 bytes 7455 times --> 81.56 Mbps in 9.26 usec 25: 125 bytes 3926 times --> 112.46 Mbps in 8.48 usec 26: 128 bytes 5848 times --> 116.87 Mbps in 8.36 usec 27: 131 bytes 6077 times --> 119.22 Mbps in 8.38 usec 28: 189 bytes 6192 times --> 163.79 Mbps in 8.80 usec 29: 192 bytes 7572 times --> 168.01 Mbps in 8.72 usec 30: 195 bytes 7705 times --> 171.13 Mbps in 8.69 usec 31: 253 bytes 4011 times --> 210.21 Mbps in 9.18 usec 32: 256 bytes 5423 times --> 214.55 Mbps in 9.10 usec 33: 259 bytes 5535 times --> 217.64 Mbps in 9.08 usec 34: 381 bytes 5613 times --> 290.55 Mbps in 10.00 usec 35: 384 bytes 6663 times --> 296.11 Mbps in 9.89 usec 36: 387 bytes 6764 times --> 298.74 Mbps in 9.88 usec 37: 509 bytes 3451 times --> 353.78 Mbps in 10.98 usec 38: 512 bytes 4546 times --> 359.36 Mbps in 10.87 usec 39: 515 bytes 4617 times --> 361.53 Mbps in 10.87 usec 40: 765 bytes 4645 times --> 461.41 Mbps in 12.65 usec 41: 768 bytes 5270 times --> 468.59 Mbps in 12.50 usec 42: 771 bytes 5341 times --> 470.16 Mbps in 12.51 usec 43: 1021 bytes 2695 times --> 508.42 Mbps in 15.32 usec 44: 1024 bytes 3260 times --> 514.44 Mbps in 15.19 usec 45: 1027 bytes 3298 times --> 515.72 Mbps in 15.19 usec 46: 1533 bytes 3307 times --> 707.12 Mbps in 16.54 usec 47: 1536 bytes 4030 times --> 714.93 Mbps in 16.39 usec 48: 1539 bytes 4071 times --> 714.41 Mbps in 16.44 usec 49: 2045 bytes 2040 times --> 761.38 Mbps in 20.49 usec 50: 2048 bytes 2438 times --> 769.78 Mbps in 20.30 usec 51: 2051 bytes 2465 times --> 769.78 Mbps in 20.33 usec 52: 3069 bytes 2465 times --> 923.43 Mbps in 25.36 usec 53: 3072 bytes 2629 times --> 928.48 Mbps in 25.24 usec 54: 3075 bytes 2642 times --> 929.07 Mbps in 25.25 usec 55: 4093 bytes 1323 times --> 1012.38 Mbps in 30.85 usec 56: 4096 bytes 1620 times --> 1016.69 Mbps in 30.74 usec 57: 4099 bytes 1627 times --> 1015.16 Mbps in 30.81 usec 58: 6141 bytes 1625 times --> 1171.82 Mbps in 39.98 usec 59: 6144 bytes 1667 times --> 1173.85 Mbps in 39.93 usec 60: 6147 bytes 1669 times --> 1174.44 Mbps in 39.93 usec 61: 8189 bytes 835 times --> 1232.43 Mbps in 50.69 usec 62: 8192 bytes 986 times --> 1234.87 Mbps in 50.61 usec 63: 8195 bytes 988 times --> 1234.85 Mbps in 50.63 usec 64: 12285 bytes 988 times --> 1360.73 Mbps in 68.88 usec 65: 12288 bytes 967 times --> 1364.20 Mbps in 68.72 usec 66: 12291 bytes 970 times --> 1364.56 Mbps in 68.72 usec 67: 16381 bytes 485 times --> 1385.48 Mbps in 90.21 usec 68: 16384 bytes 554 times --> 1388.76 Mbps in 90.01 usec 69: 16387 bytes 555 times --> 1388.41 Mbps in 90.05 usec 70: 24573 bytes 555 times --> 1499.72 Mbps in 125.01 usec 71: 24576 bytes 533 times --> 1499.36 Mbps in 125.05 usec 72: 24579 bytes 533 times --> 1500.44 Mbps in 124.98 usec 73: 32765 bytes 266 times --> 1499.31 Mbps in 166.73 usec 74: 32768 bytes 299 times --> 1497.10 Mbps in 166.99 usec 75: 32771 bytes 299 times --> 1495.29 Mbps in 167.21 usec 76: 49149 bytes 299 times --> 1528.78 Mbps in 245.28 usec 77: 49152 bytes 271 times --> 1527.97 Mbps in 245.42 usec 78: 49155 bytes 271 times --> 1529.35 Mbps in 245.22 usec 79: 65533 bytes 135 times --> 1586.19 Mbps in 315.21 usec 80: 65536 bytes 158 times --> 1591.11 Mbps in 314.25 usec 81: 65539 bytes 159 times --> 1586.50 Mbps in 315.17 usec 82: 98301 bytes 158 times --> 1668.05 Mbps in 449.61 usec 83: 98304 bytes 148 times --> 1667.40 Mbps in 449.80 usec 84: 98307 bytes 148 times --> 1667.29 Mbps in 449.84 usec 85: 131069 bytes 74 times --> 1709.11 Mbps in 585.09 usec 86: 131072 bytes 85 times --> 1711.09 Mbps in 584.42 usec 87: 131075 bytes 85 times --> 1710.92 Mbps in 584.49 usec 88: 196605 bytes 85 times --> 1727.93 Mbps in 868.08 usec 89: 196608 bytes 76 times --> 1726.28 Mbps in 868.92 usec 90: 196611 bytes 76 times --> 1727.06 Mbps in 868.54 usec 91: 262141 bytes 38 times --> 1757.65 Mbps in 1137.87 usec 92: 262144 bytes 43 times --> 1758.69 Mbps in 1137.21 usec 93: 262147 bytes 43 times --> 1759.38 Mbps in 1136.78 usec 94: 393213 bytes 43 times --> 1801.51 Mbps in 1665.25 usec 95: 393216 bytes 40 times --> 1803.26 Mbps in 1663.65 usec 96: 393219 bytes 40 times --> 1800.73 Mbps in 1666.00 usec 97: 524285 bytes 20 times --> 1805.33 Mbps in 2215.65 usec 98: 524288 bytes 22 times --> 1806.80 Mbps in 2213.86 usec 99: 524291 bytes 22 times --> 1805.77 Mbps in 2215.14 usec 100: 786429 bytes 22 times --> 1827.24 Mbps in 3283.64 usec 101: 786432 bytes 20 times --> 1827.03 Mbps in 3284.03 usec 102: 786435 bytes 20 times --> 1827.20 Mbps in 3283.73 usec 103: 1048573 bytes 10 times --> 1840.05 Mbps in 4347.71 usec 104: 1048576 bytes 11 times --> 1839.68 Mbps in 4348.58 usec 105: 1048579 bytes 11 times --> 1840.13 Mbps in 4347.54 usec 106: 1572861 bytes 11 times --> 1853.99 Mbps in 6472.50 usec 107: 1572864 bytes 10 times --> 1854.11 Mbps in 6472.10 usec 108: 1572867 bytes 10 times --> 1854.12 Mbps in 6472.10 usec 109: 2097149 bytes 5 times --> 1861.41 Mbps in 8595.61 usec 110: 2097152 bytes 5 times --> 1861.25 Mbps in 8596.40 usec 111: 2097155 bytes 5 times --> 1860.99 Mbps in 8597.59 usec 112: 3145725 bytes 5 times --> 1868.34 Mbps in 12845.59 usec 113: 3145728 bytes 5 times --> 1868.30 Mbps in 12845.90 usec 114: 3145731 bytes 5 times --> 1868.59 Mbps in 12843.89 usec 115: 4194301 bytes 3 times --> 1872.16 Mbps in 17092.51 usec 116: 4194304 bytes 3 times --> 1872.31 Mbps in 17091.19 usec 117: 4194307 bytes 3 times --> 1872.13 Mbps in 17092.82 usec 118: 6291453 bytes 3 times --> 1875.88 Mbps in 25588.00 usec 119: 6291456 bytes 3 times --> 1875.98 Mbps in 25586.68 usec 120: 6291459 bytes 3 times --> 1875.93 Mbps in 25587.36 usec 121: 8388605 bytes 3 times --> 1877.79 Mbps in 34082.69 usec 122: 8388608 bytes 3 times --> 1877.72 Mbps in 34083.84 usec 123: 8388611 bytes 3 times --> 1877.66 Mbps in 34085.00 usec This commit was SVN r7180.
2005-09-04 22:08:13 +00:00
* Register MX component parameters with the MCA framework
*/
extern int mca_btl_mx_component_open(void);
/**
* Any final cleanup before being unloaded.
*/
extern int mca_btl_mx_component_close(void);
/**
First stable version of the MX BTL (at least we pass NetPipe). The perfs are not amazing but are not that bad either. On a 2 procs Intel(R) Xeon(TM) CPU 3.20GHz with MYRICOM Inc. Myrinet 2000 Scalable Cluster Interconnect (rev 04) I get: 0: 1 bytes 13096 times --> 1.10 Mbps in 6.94 usec 1: 2 bytes 14408 times --> 2.17 Mbps in 7.02 usec 2: 3 bytes 14243 times --> 3.24 Mbps in 7.07 usec 3: 4 bytes 9428 times --> 4.27 Mbps in 7.15 usec 4: 6 bytes 10493 times --> 6.26 Mbps in 7.32 usec 5: 8 bytes 6834 times --> 8.18 Mbps in 7.47 usec 6: 12 bytes 8371 times --> 11.89 Mbps in 7.70 usec 7: 13 bytes 5411 times --> 12.72 Mbps in 7.80 usec 8: 16 bytes 5919 times --> 15.35 Mbps in 7.95 usec 9: 19 bytes 7074 times --> 17.66 Mbps in 8.21 usec 10: 21 bytes 7696 times --> 19.00 Mbps in 8.43 usec 11: 24 bytes 7906 times --> 20.87 Mbps in 8.77 usec 12: 27 bytes 8073 times --> 23.05 Mbps in 8.94 usec 13: 29 bytes 4972 times --> 24.32 Mbps in 9.10 usec 14: 32 bytes 5307 times --> 26.29 Mbps in 9.29 usec 15: 35 bytes 5720 times --> 33.61 Mbps in 7.95 usec 16: 45 bytes 7191 times --> 39.50 Mbps in 8.69 usec 17: 48 bytes 7670 times --> 41.33 Mbps in 8.86 usec 18: 51 bytes 7759 times --> 42.80 Mbps in 9.09 usec 19: 61 bytes 4313 times --> 47.44 Mbps in 9.81 usec 20: 64 bytes 5012 times --> 57.61 Mbps in 8.48 usec 21: 67 bytes 6083 times --> 59.31 Mbps in 8.62 usec 22: 93 bytes 6234 times --> 68.08 Mbps in 10.42 usec 23: 96 bytes 6396 times --> 80.65 Mbps in 9.08 usec 24: 99 bytes 7455 times --> 81.56 Mbps in 9.26 usec 25: 125 bytes 3926 times --> 112.46 Mbps in 8.48 usec 26: 128 bytes 5848 times --> 116.87 Mbps in 8.36 usec 27: 131 bytes 6077 times --> 119.22 Mbps in 8.38 usec 28: 189 bytes 6192 times --> 163.79 Mbps in 8.80 usec 29: 192 bytes 7572 times --> 168.01 Mbps in 8.72 usec 30: 195 bytes 7705 times --> 171.13 Mbps in 8.69 usec 31: 253 bytes 4011 times --> 210.21 Mbps in 9.18 usec 32: 256 bytes 5423 times --> 214.55 Mbps in 9.10 usec 33: 259 bytes 5535 times --> 217.64 Mbps in 9.08 usec 34: 381 bytes 5613 times --> 290.55 Mbps in 10.00 usec 35: 384 bytes 6663 times --> 296.11 Mbps in 9.89 usec 36: 387 bytes 6764 times --> 298.74 Mbps in 9.88 usec 37: 509 bytes 3451 times --> 353.78 Mbps in 10.98 usec 38: 512 bytes 4546 times --> 359.36 Mbps in 10.87 usec 39: 515 bytes 4617 times --> 361.53 Mbps in 10.87 usec 40: 765 bytes 4645 times --> 461.41 Mbps in 12.65 usec 41: 768 bytes 5270 times --> 468.59 Mbps in 12.50 usec 42: 771 bytes 5341 times --> 470.16 Mbps in 12.51 usec 43: 1021 bytes 2695 times --> 508.42 Mbps in 15.32 usec 44: 1024 bytes 3260 times --> 514.44 Mbps in 15.19 usec 45: 1027 bytes 3298 times --> 515.72 Mbps in 15.19 usec 46: 1533 bytes 3307 times --> 707.12 Mbps in 16.54 usec 47: 1536 bytes 4030 times --> 714.93 Mbps in 16.39 usec 48: 1539 bytes 4071 times --> 714.41 Mbps in 16.44 usec 49: 2045 bytes 2040 times --> 761.38 Mbps in 20.49 usec 50: 2048 bytes 2438 times --> 769.78 Mbps in 20.30 usec 51: 2051 bytes 2465 times --> 769.78 Mbps in 20.33 usec 52: 3069 bytes 2465 times --> 923.43 Mbps in 25.36 usec 53: 3072 bytes 2629 times --> 928.48 Mbps in 25.24 usec 54: 3075 bytes 2642 times --> 929.07 Mbps in 25.25 usec 55: 4093 bytes 1323 times --> 1012.38 Mbps in 30.85 usec 56: 4096 bytes 1620 times --> 1016.69 Mbps in 30.74 usec 57: 4099 bytes 1627 times --> 1015.16 Mbps in 30.81 usec 58: 6141 bytes 1625 times --> 1171.82 Mbps in 39.98 usec 59: 6144 bytes 1667 times --> 1173.85 Mbps in 39.93 usec 60: 6147 bytes 1669 times --> 1174.44 Mbps in 39.93 usec 61: 8189 bytes 835 times --> 1232.43 Mbps in 50.69 usec 62: 8192 bytes 986 times --> 1234.87 Mbps in 50.61 usec 63: 8195 bytes 988 times --> 1234.85 Mbps in 50.63 usec 64: 12285 bytes 988 times --> 1360.73 Mbps in 68.88 usec 65: 12288 bytes 967 times --> 1364.20 Mbps in 68.72 usec 66: 12291 bytes 970 times --> 1364.56 Mbps in 68.72 usec 67: 16381 bytes 485 times --> 1385.48 Mbps in 90.21 usec 68: 16384 bytes 554 times --> 1388.76 Mbps in 90.01 usec 69: 16387 bytes 555 times --> 1388.41 Mbps in 90.05 usec 70: 24573 bytes 555 times --> 1499.72 Mbps in 125.01 usec 71: 24576 bytes 533 times --> 1499.36 Mbps in 125.05 usec 72: 24579 bytes 533 times --> 1500.44 Mbps in 124.98 usec 73: 32765 bytes 266 times --> 1499.31 Mbps in 166.73 usec 74: 32768 bytes 299 times --> 1497.10 Mbps in 166.99 usec 75: 32771 bytes 299 times --> 1495.29 Mbps in 167.21 usec 76: 49149 bytes 299 times --> 1528.78 Mbps in 245.28 usec 77: 49152 bytes 271 times --> 1527.97 Mbps in 245.42 usec 78: 49155 bytes 271 times --> 1529.35 Mbps in 245.22 usec 79: 65533 bytes 135 times --> 1586.19 Mbps in 315.21 usec 80: 65536 bytes 158 times --> 1591.11 Mbps in 314.25 usec 81: 65539 bytes 159 times --> 1586.50 Mbps in 315.17 usec 82: 98301 bytes 158 times --> 1668.05 Mbps in 449.61 usec 83: 98304 bytes 148 times --> 1667.40 Mbps in 449.80 usec 84: 98307 bytes 148 times --> 1667.29 Mbps in 449.84 usec 85: 131069 bytes 74 times --> 1709.11 Mbps in 585.09 usec 86: 131072 bytes 85 times --> 1711.09 Mbps in 584.42 usec 87: 131075 bytes 85 times --> 1710.92 Mbps in 584.49 usec 88: 196605 bytes 85 times --> 1727.93 Mbps in 868.08 usec 89: 196608 bytes 76 times --> 1726.28 Mbps in 868.92 usec 90: 196611 bytes 76 times --> 1727.06 Mbps in 868.54 usec 91: 262141 bytes 38 times --> 1757.65 Mbps in 1137.87 usec 92: 262144 bytes 43 times --> 1758.69 Mbps in 1137.21 usec 93: 262147 bytes 43 times --> 1759.38 Mbps in 1136.78 usec 94: 393213 bytes 43 times --> 1801.51 Mbps in 1665.25 usec 95: 393216 bytes 40 times --> 1803.26 Mbps in 1663.65 usec 96: 393219 bytes 40 times --> 1800.73 Mbps in 1666.00 usec 97: 524285 bytes 20 times --> 1805.33 Mbps in 2215.65 usec 98: 524288 bytes 22 times --> 1806.80 Mbps in 2213.86 usec 99: 524291 bytes 22 times --> 1805.77 Mbps in 2215.14 usec 100: 786429 bytes 22 times --> 1827.24 Mbps in 3283.64 usec 101: 786432 bytes 20 times --> 1827.03 Mbps in 3284.03 usec 102: 786435 bytes 20 times --> 1827.20 Mbps in 3283.73 usec 103: 1048573 bytes 10 times --> 1840.05 Mbps in 4347.71 usec 104: 1048576 bytes 11 times --> 1839.68 Mbps in 4348.58 usec 105: 1048579 bytes 11 times --> 1840.13 Mbps in 4347.54 usec 106: 1572861 bytes 11 times --> 1853.99 Mbps in 6472.50 usec 107: 1572864 bytes 10 times --> 1854.11 Mbps in 6472.10 usec 108: 1572867 bytes 10 times --> 1854.12 Mbps in 6472.10 usec 109: 2097149 bytes 5 times --> 1861.41 Mbps in 8595.61 usec 110: 2097152 bytes 5 times --> 1861.25 Mbps in 8596.40 usec 111: 2097155 bytes 5 times --> 1860.99 Mbps in 8597.59 usec 112: 3145725 bytes 5 times --> 1868.34 Mbps in 12845.59 usec 113: 3145728 bytes 5 times --> 1868.30 Mbps in 12845.90 usec 114: 3145731 bytes 5 times --> 1868.59 Mbps in 12843.89 usec 115: 4194301 bytes 3 times --> 1872.16 Mbps in 17092.51 usec 116: 4194304 bytes 3 times --> 1872.31 Mbps in 17091.19 usec 117: 4194307 bytes 3 times --> 1872.13 Mbps in 17092.82 usec 118: 6291453 bytes 3 times --> 1875.88 Mbps in 25588.00 usec 119: 6291456 bytes 3 times --> 1875.98 Mbps in 25586.68 usec 120: 6291459 bytes 3 times --> 1875.93 Mbps in 25587.36 usec 121: 8388605 bytes 3 times --> 1877.79 Mbps in 34082.69 usec 122: 8388608 bytes 3 times --> 1877.72 Mbps in 34083.84 usec 123: 8388611 bytes 3 times --> 1877.66 Mbps in 34085.00 usec This commit was SVN r7180.
2005-09-04 22:08:13 +00:00
* MX component initialization.
*
* @param num_btl_modules (OUT) Number of BTLs returned in BTL array.
* @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE)
* @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE)
*/
extern mca_btl_base_module_t** mca_btl_mx_component_init(
int *num_btl_modules,
bool allow_multi_user_threads,
bool have_hidden_threads
);
/**
First stable version of the MX BTL (at least we pass NetPipe). The perfs are not amazing but are not that bad either. On a 2 procs Intel(R) Xeon(TM) CPU 3.20GHz with MYRICOM Inc. Myrinet 2000 Scalable Cluster Interconnect (rev 04) I get: 0: 1 bytes 13096 times --> 1.10 Mbps in 6.94 usec 1: 2 bytes 14408 times --> 2.17 Mbps in 7.02 usec 2: 3 bytes 14243 times --> 3.24 Mbps in 7.07 usec 3: 4 bytes 9428 times --> 4.27 Mbps in 7.15 usec 4: 6 bytes 10493 times --> 6.26 Mbps in 7.32 usec 5: 8 bytes 6834 times --> 8.18 Mbps in 7.47 usec 6: 12 bytes 8371 times --> 11.89 Mbps in 7.70 usec 7: 13 bytes 5411 times --> 12.72 Mbps in 7.80 usec 8: 16 bytes 5919 times --> 15.35 Mbps in 7.95 usec 9: 19 bytes 7074 times --> 17.66 Mbps in 8.21 usec 10: 21 bytes 7696 times --> 19.00 Mbps in 8.43 usec 11: 24 bytes 7906 times --> 20.87 Mbps in 8.77 usec 12: 27 bytes 8073 times --> 23.05 Mbps in 8.94 usec 13: 29 bytes 4972 times --> 24.32 Mbps in 9.10 usec 14: 32 bytes 5307 times --> 26.29 Mbps in 9.29 usec 15: 35 bytes 5720 times --> 33.61 Mbps in 7.95 usec 16: 45 bytes 7191 times --> 39.50 Mbps in 8.69 usec 17: 48 bytes 7670 times --> 41.33 Mbps in 8.86 usec 18: 51 bytes 7759 times --> 42.80 Mbps in 9.09 usec 19: 61 bytes 4313 times --> 47.44 Mbps in 9.81 usec 20: 64 bytes 5012 times --> 57.61 Mbps in 8.48 usec 21: 67 bytes 6083 times --> 59.31 Mbps in 8.62 usec 22: 93 bytes 6234 times --> 68.08 Mbps in 10.42 usec 23: 96 bytes 6396 times --> 80.65 Mbps in 9.08 usec 24: 99 bytes 7455 times --> 81.56 Mbps in 9.26 usec 25: 125 bytes 3926 times --> 112.46 Mbps in 8.48 usec 26: 128 bytes 5848 times --> 116.87 Mbps in 8.36 usec 27: 131 bytes 6077 times --> 119.22 Mbps in 8.38 usec 28: 189 bytes 6192 times --> 163.79 Mbps in 8.80 usec 29: 192 bytes 7572 times --> 168.01 Mbps in 8.72 usec 30: 195 bytes 7705 times --> 171.13 Mbps in 8.69 usec 31: 253 bytes 4011 times --> 210.21 Mbps in 9.18 usec 32: 256 bytes 5423 times --> 214.55 Mbps in 9.10 usec 33: 259 bytes 5535 times --> 217.64 Mbps in 9.08 usec 34: 381 bytes 5613 times --> 290.55 Mbps in 10.00 usec 35: 384 bytes 6663 times --> 296.11 Mbps in 9.89 usec 36: 387 bytes 6764 times --> 298.74 Mbps in 9.88 usec 37: 509 bytes 3451 times --> 353.78 Mbps in 10.98 usec 38: 512 bytes 4546 times --> 359.36 Mbps in 10.87 usec 39: 515 bytes 4617 times --> 361.53 Mbps in 10.87 usec 40: 765 bytes 4645 times --> 461.41 Mbps in 12.65 usec 41: 768 bytes 5270 times --> 468.59 Mbps in 12.50 usec 42: 771 bytes 5341 times --> 470.16 Mbps in 12.51 usec 43: 1021 bytes 2695 times --> 508.42 Mbps in 15.32 usec 44: 1024 bytes 3260 times --> 514.44 Mbps in 15.19 usec 45: 1027 bytes 3298 times --> 515.72 Mbps in 15.19 usec 46: 1533 bytes 3307 times --> 707.12 Mbps in 16.54 usec 47: 1536 bytes 4030 times --> 714.93 Mbps in 16.39 usec 48: 1539 bytes 4071 times --> 714.41 Mbps in 16.44 usec 49: 2045 bytes 2040 times --> 761.38 Mbps in 20.49 usec 50: 2048 bytes 2438 times --> 769.78 Mbps in 20.30 usec 51: 2051 bytes 2465 times --> 769.78 Mbps in 20.33 usec 52: 3069 bytes 2465 times --> 923.43 Mbps in 25.36 usec 53: 3072 bytes 2629 times --> 928.48 Mbps in 25.24 usec 54: 3075 bytes 2642 times --> 929.07 Mbps in 25.25 usec 55: 4093 bytes 1323 times --> 1012.38 Mbps in 30.85 usec 56: 4096 bytes 1620 times --> 1016.69 Mbps in 30.74 usec 57: 4099 bytes 1627 times --> 1015.16 Mbps in 30.81 usec 58: 6141 bytes 1625 times --> 1171.82 Mbps in 39.98 usec 59: 6144 bytes 1667 times --> 1173.85 Mbps in 39.93 usec 60: 6147 bytes 1669 times --> 1174.44 Mbps in 39.93 usec 61: 8189 bytes 835 times --> 1232.43 Mbps in 50.69 usec 62: 8192 bytes 986 times --> 1234.87 Mbps in 50.61 usec 63: 8195 bytes 988 times --> 1234.85 Mbps in 50.63 usec 64: 12285 bytes 988 times --> 1360.73 Mbps in 68.88 usec 65: 12288 bytes 967 times --> 1364.20 Mbps in 68.72 usec 66: 12291 bytes 970 times --> 1364.56 Mbps in 68.72 usec 67: 16381 bytes 485 times --> 1385.48 Mbps in 90.21 usec 68: 16384 bytes 554 times --> 1388.76 Mbps in 90.01 usec 69: 16387 bytes 555 times --> 1388.41 Mbps in 90.05 usec 70: 24573 bytes 555 times --> 1499.72 Mbps in 125.01 usec 71: 24576 bytes 533 times --> 1499.36 Mbps in 125.05 usec 72: 24579 bytes 533 times --> 1500.44 Mbps in 124.98 usec 73: 32765 bytes 266 times --> 1499.31 Mbps in 166.73 usec 74: 32768 bytes 299 times --> 1497.10 Mbps in 166.99 usec 75: 32771 bytes 299 times --> 1495.29 Mbps in 167.21 usec 76: 49149 bytes 299 times --> 1528.78 Mbps in 245.28 usec 77: 49152 bytes 271 times --> 1527.97 Mbps in 245.42 usec 78: 49155 bytes 271 times --> 1529.35 Mbps in 245.22 usec 79: 65533 bytes 135 times --> 1586.19 Mbps in 315.21 usec 80: 65536 bytes 158 times --> 1591.11 Mbps in 314.25 usec 81: 65539 bytes 159 times --> 1586.50 Mbps in 315.17 usec 82: 98301 bytes 158 times --> 1668.05 Mbps in 449.61 usec 83: 98304 bytes 148 times --> 1667.40 Mbps in 449.80 usec 84: 98307 bytes 148 times --> 1667.29 Mbps in 449.84 usec 85: 131069 bytes 74 times --> 1709.11 Mbps in 585.09 usec 86: 131072 bytes 85 times --> 1711.09 Mbps in 584.42 usec 87: 131075 bytes 85 times --> 1710.92 Mbps in 584.49 usec 88: 196605 bytes 85 times --> 1727.93 Mbps in 868.08 usec 89: 196608 bytes 76 times --> 1726.28 Mbps in 868.92 usec 90: 196611 bytes 76 times --> 1727.06 Mbps in 868.54 usec 91: 262141 bytes 38 times --> 1757.65 Mbps in 1137.87 usec 92: 262144 bytes 43 times --> 1758.69 Mbps in 1137.21 usec 93: 262147 bytes 43 times --> 1759.38 Mbps in 1136.78 usec 94: 393213 bytes 43 times --> 1801.51 Mbps in 1665.25 usec 95: 393216 bytes 40 times --> 1803.26 Mbps in 1663.65 usec 96: 393219 bytes 40 times --> 1800.73 Mbps in 1666.00 usec 97: 524285 bytes 20 times --> 1805.33 Mbps in 2215.65 usec 98: 524288 bytes 22 times --> 1806.80 Mbps in 2213.86 usec 99: 524291 bytes 22 times --> 1805.77 Mbps in 2215.14 usec 100: 786429 bytes 22 times --> 1827.24 Mbps in 3283.64 usec 101: 786432 bytes 20 times --> 1827.03 Mbps in 3284.03 usec 102: 786435 bytes 20 times --> 1827.20 Mbps in 3283.73 usec 103: 1048573 bytes 10 times --> 1840.05 Mbps in 4347.71 usec 104: 1048576 bytes 11 times --> 1839.68 Mbps in 4348.58 usec 105: 1048579 bytes 11 times --> 1840.13 Mbps in 4347.54 usec 106: 1572861 bytes 11 times --> 1853.99 Mbps in 6472.50 usec 107: 1572864 bytes 10 times --> 1854.11 Mbps in 6472.10 usec 108: 1572867 bytes 10 times --> 1854.12 Mbps in 6472.10 usec 109: 2097149 bytes 5 times --> 1861.41 Mbps in 8595.61 usec 110: 2097152 bytes 5 times --> 1861.25 Mbps in 8596.40 usec 111: 2097155 bytes 5 times --> 1860.99 Mbps in 8597.59 usec 112: 3145725 bytes 5 times --> 1868.34 Mbps in 12845.59 usec 113: 3145728 bytes 5 times --> 1868.30 Mbps in 12845.90 usec 114: 3145731 bytes 5 times --> 1868.59 Mbps in 12843.89 usec 115: 4194301 bytes 3 times --> 1872.16 Mbps in 17092.51 usec 116: 4194304 bytes 3 times --> 1872.31 Mbps in 17091.19 usec 117: 4194307 bytes 3 times --> 1872.13 Mbps in 17092.82 usec 118: 6291453 bytes 3 times --> 1875.88 Mbps in 25588.00 usec 119: 6291456 bytes 3 times --> 1875.98 Mbps in 25586.68 usec 120: 6291459 bytes 3 times --> 1875.93 Mbps in 25587.36 usec 121: 8388605 bytes 3 times --> 1877.79 Mbps in 34082.69 usec 122: 8388608 bytes 3 times --> 1877.72 Mbps in 34083.84 usec 123: 8388611 bytes 3 times --> 1877.66 Mbps in 34085.00 usec This commit was SVN r7180.
2005-09-04 22:08:13 +00:00
* MX component progress.
*/
extern int mca_btl_mx_component_progress(void);
/**
* Cleanup any resources held by the BTL.
*
* @param btl BTL instance.
* @return OMPI_SUCCESS or error status on failure.
*/
extern int mca_btl_mx_finalize(
struct mca_btl_base_module_t* btl
);
/**
* PML->BTL notification of change in the process list.
*
* @param btl (IN)
* @param nprocs (IN) Number of processes
* @param procs (IN) Set of processes
* @param peers (OUT) Set of (optional) peer addressing info.
* @param peers (IN/OUT) Set of processes that are reachable via this BTL.
* @return OMPI_SUCCESS or error status on failure.
*
*/
extern int mca_btl_mx_add_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** peers,
ompi_bitmap_t* reachable
);
/**
* PML->BTL notification of change in the process list.
*
* @param btl (IN) BTL instance
* @param nproc (IN) Number of processes.
* @param procs (IN) Set of processes.
* @param peers (IN) Set of peer data structures.
* @return Status indicating if cleanup was successful
*
*/
extern int mca_btl_mx_del_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** peers
);
/**
* Initiate an asynchronous send.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transfered
* @param tag (IN) The tag value used to notify the peer.
*/
extern int mca_btl_mx_send(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag
);
/**
* Register a callback function that is called on receipt
* of a fragment.
*
* @param btl (IN) BTL module
* @return Status indicating if registration was successful
*
*/
extern int mca_btl_mx_register(
struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata);
/**
* Allocate a descriptor with a segment of the requested size.
* Note that the BTL layer may choose to return a smaller size
* if it cannot support the request.
*
* @param btl (IN) BTL module
* @param size (IN) Request segment size.
*/
mca_btl_base_descriptor_t* mca_btl_mx_alloc( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
uint8_t order,
size_t size,
uint32_t flags);
/**
* Return a segment allocated by this BTL.
*
* @param btl (IN) BTL module
* @param descriptor (IN) Allocated descriptor.
*/
int mca_btl_mx_free( struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des );
/**
* Prepare a descriptor for send/rdma using the supplied
* convertor. If the convertor references data that is contigous,
* the descriptor may simply point to the user buffer. Otherwise,
* this routine is responsible for allocating buffer space and
* packing if required.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL peer addressing
* @param convertor (IN) Data type convertor
* @param reserve (IN) Additional bytes requested by upper layer to precede user data
* @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT)
*/
mca_btl_base_descriptor_t*
mca_btl_mx_prepare_src( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* peer,
struct mca_mpool_base_registration_t*,
struct ompi_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags);
mca_btl_base_descriptor_t*
mca_btl_mx_prepare_dst( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* peer,
struct mca_mpool_base_registration_t*,
struct ompi_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_btl_mx_ft_event(int state);
END_C_DECLS
#endif