-
Notifications
You must be signed in to change notification settings - Fork 110
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Multiply sparse matrices #132
Changes from 22 commits
46a0501
ecab33b
114ea14
e08607b
babba77
8e04d35
70c8108
83812d9
c78c650
2dbc810
17ef6b6
8b85694
b18c7e5
418f3b6
dbb84c8
e07bcdb
8cd5636
9842229
a43cb33
edfda9e
adf8f2c
32571ce
c762c2a
04e78df
2f6048e
0a15f61
1a83151
a1e8579
2ead9d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -235,7 +235,7 @@ class DistMatrix; | |
#include <El/core/Matrix/decl.hpp> | ||
#include <El/core/Graph/decl.hpp> | ||
#include <El/core/DistMap/decl.hpp> | ||
#include <El/core/DistGraph/decl.hpp> | ||
#include <El/core/DistGraph.hpp> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why include DistGraph.hpp rather than DistGraph/decl.hpp? |
||
#include <El/core/SparseMatrix/decl.hpp> | ||
#include <El/core/DistSparseMatrix/decl.hpp> | ||
#include <El/core/DistMultiVec/decl.hpp> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,206 @@ | |
#ifndef EL_CORE_DISTGRAPH_HPP | ||
#define EL_CORE_DISTGRAPH_HPP | ||
|
||
#include <El/core/DistGraph/decl.hpp> | ||
#include <set> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason for purposefully breaking the convention of splitting the declarations and implementations into There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's been a very long time since I initially did these changes. However, this convention is a bit odd. It is not as if the impl.hpp is a .cpp file. I propose that we (as much as possible) move to single header-only implementations. Simply moving the decl/impl into one header is asking the pre-processor to do less work. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In practice there is a lot of movement between the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rhl- In general, you're asking to spend way more time compiling if you combine them. It is very common for header dependencies and transitive dependencies (header or source) to need only the declarations, which means that all those compilation units don't need to pull in the implementation and don't need to be recompiled any time the impl.hpp changes (usually much more common than decl.hpp changes). We saw a tangible improvement in PETSc compile times (especially incremental) when we split some headers. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi Jed, Not sure this is accurate. This is a pure header only implementation, with Right now the headers extern template instantiations for a zillion things, I'm trying to back out these changes for now. Best, On Mon, Aug 22, 2016 at 4:19 PM Jed Brown notifications@github.com wrote:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you quantify that extra time to pull in both parts from separate files versus a unified file? Also, I really am referring to splitting headers into the part that only declares the types and the part that contains the API associated with that type. It is common for dependent headers to only need the declaration -- only compilation units that actually manipulate that type need to include the rest of the header. You'll find many such examples in |
||
|
||
namespace El { | ||
|
||
struct DistGraphMultMeta | ||
{ | ||
bool ready; | ||
// NOTE: The 'send' and 'recv' roles reverse for adjoint multiplication | ||
Int numRecvInds; | ||
vector<int> sendSizes, sendOffs, | ||
recvSizes, recvOffs; | ||
vector<Int> sendInds, colOffs; | ||
|
||
DistGraphMultMeta() : ready(false), numRecvInds(0) { } | ||
|
||
void Clear() | ||
{ | ||
ready = false; | ||
numRecvInds = 0; | ||
SwapClear( sendSizes ); | ||
SwapClear( recvSizes ); | ||
SwapClear( sendOffs ); | ||
SwapClear( recvOffs ); | ||
SwapClear( sendInds ); | ||
SwapClear( colOffs ); | ||
} | ||
|
||
const DistGraphMultMeta& operator=( const DistGraphMultMeta& meta ) | ||
{ | ||
ready = meta.ready; | ||
numRecvInds = meta.numRecvInds; | ||
sendSizes = meta.sendSizes; | ||
sendOffs = meta.sendOffs; | ||
recvSizes = meta.recvSizes; | ||
recvOffs = meta.recvOffs; | ||
sendInds = meta.sendInds; | ||
colOffs = meta.colOffs; | ||
return *this; | ||
} | ||
}; | ||
|
||
|
||
|
||
using std::set; | ||
|
||
// Forward declare ldl::DistFront | ||
namespace ldl { template<typename F> struct DistFront; } | ||
|
||
// Use a simple 1d distribution where each process owns a fixed number of | ||
// sources: | ||
// if last process, numSources - (commSize-1)*floor(numSources/commSize) | ||
// otherwise, floor(numSources/commSize) | ||
class DistGraph | ||
{ | ||
public: | ||
// Constructors and destructors | ||
// ============================ | ||
DistGraph( mpi::Comm comm=mpi::COMM_WORLD ); | ||
DistGraph( Int numSources, mpi::Comm comm=mpi::COMM_WORLD ); | ||
DistGraph( Int numSources, Int numTargets, mpi::Comm comm=mpi::COMM_WORLD ); | ||
DistGraph( const Graph& graph ); | ||
// TODO: Move constructor | ||
DistGraph( const DistGraph& graph ); | ||
~DistGraph(); | ||
|
||
// Assignment and reconfiguration | ||
// ============================== | ||
|
||
// Making a copy | ||
// ------------- | ||
const DistGraph& operator=( const Graph& graph ); | ||
const DistGraph& operator=( const DistGraph& graph ); | ||
// TODO: Move assignment | ||
|
||
// Make a copy of a subgraph | ||
// ------------------------- | ||
DistGraph operator()( Range<Int> I, Range<Int> J ) const; | ||
DistGraph operator()( Range<Int> I, const vector<Int>& J ) const; | ||
DistGraph operator()( const vector<Int>& I, Range<Int> J ) const; | ||
DistGraph operator()( const vector<Int>& I, const vector<Int>& J ) const; | ||
|
||
// Changing the graph size | ||
// ----------------------- | ||
void Empty( bool freeMemory=true ); | ||
void Resize( Int numVertices ); | ||
void Resize( Int numSources, Int numTargets ); | ||
|
||
// Changing the distribution | ||
// ------------------------- | ||
void SetComm( mpi::Comm comm ); | ||
|
||
// Assembly | ||
// -------- | ||
void Reserve( Int numLocalEdges, Int numRemoteEdges=0 ); | ||
|
||
// Safe edge insertion/removal procedure | ||
void Connect( Int source, Int target ); | ||
void ConnectLocal( Int localSource, Int target ); | ||
void Disconnect( Int source, Int target ); | ||
void DisconnectLocal( Int localSource, Int target ); | ||
|
||
void FreezeSparsity() EL_NO_EXCEPT; | ||
void UnfreezeSparsity() EL_NO_EXCEPT; | ||
bool FrozenSparsity() const EL_NO_EXCEPT; | ||
|
||
// For inserting/removing a sequence of edges and then forcing consistency | ||
void QueueConnection( Int source, Int target, bool passive=false ) | ||
EL_NO_RELEASE_EXCEPT; | ||
void QueueLocalConnection( Int localSource, Int target ) | ||
EL_NO_RELEASE_EXCEPT; | ||
void QueueDisconnection( Int source, Int target, bool passive=false ) | ||
EL_NO_RELEASE_EXCEPT; | ||
void QueueLocalDisconnection( Int localSource, Int target ) | ||
EL_NO_RELEASE_EXCEPT; | ||
void ProcessQueues(); | ||
void ProcessLocalQueues(); | ||
|
||
// For manually modifying/accessing buffers | ||
void ForceNumLocalEdges( Int numLocalEdges ); | ||
void ForceConsistency( bool consistent=true ) EL_NO_EXCEPT; | ||
Int* SourceBuffer() EL_NO_EXCEPT; | ||
Int* TargetBuffer() EL_NO_EXCEPT; | ||
Int* OffsetBuffer() EL_NO_EXCEPT; | ||
const Int* LockedSourceBuffer() const EL_NO_EXCEPT; | ||
const Int* LockedTargetBuffer() const EL_NO_EXCEPT; | ||
const Int* LockedOffsetBuffer() const EL_NO_EXCEPT; | ||
void ComputeSourceOffsets(); | ||
|
||
// Queries | ||
// ======= | ||
|
||
// High-level data | ||
// --------------- | ||
Int NumSources() const EL_NO_EXCEPT; | ||
Int NumTargets() const EL_NO_EXCEPT; | ||
Int NumEdges() const EL_NO_EXCEPT; | ||
Int FirstLocalSource() const EL_NO_EXCEPT; | ||
Int NumLocalSources() const EL_NO_EXCEPT; | ||
Int NumLocalEdges() const EL_NO_EXCEPT; | ||
Int Capacity() const EL_NO_EXCEPT; | ||
bool LocallyConsistent() const EL_NO_EXCEPT; | ||
|
||
// Distribution information | ||
// ------------------------ | ||
mpi::Comm Comm() const EL_NO_EXCEPT; | ||
Int Blocksize() const EL_NO_EXCEPT; | ||
int SourceOwner( Int s ) const EL_NO_RELEASE_EXCEPT; | ||
Int GlobalSource( Int sLoc ) const EL_NO_RELEASE_EXCEPT; | ||
Int LocalSource( Int s ) const EL_NO_RELEASE_EXCEPT; | ||
|
||
// Detailed local information | ||
// -------------------------- | ||
Int Source( Int localEdge ) const EL_NO_RELEASE_EXCEPT; | ||
Int Target( Int localEdge ) const EL_NO_RELEASE_EXCEPT; | ||
Int SourceOffset( Int localSource ) const EL_NO_RELEASE_EXCEPT; | ||
Int Offset( Int localSource, Int target ) const EL_NO_RELEASE_EXCEPT; | ||
Int NumConnections( Int localSource ) const EL_NO_RELEASE_EXCEPT; | ||
|
||
// Return the ratio of the maximum number of local edges to the | ||
// total number of edges divided by the number of processes | ||
double Imbalance() const EL_NO_RELEASE_EXCEPT; | ||
|
||
mutable DistGraphMultMeta multMeta; | ||
DistGraphMultMeta InitializeMultMeta() const; | ||
|
||
|
||
void AssertConsistent() const; | ||
void AssertLocallyConsistent() const; | ||
|
||
private: | ||
Int numSources_, numTargets_; | ||
mpi::Comm comm_; | ||
// Apparently calling MPI_Comm_size in an inner loop is a very bad idea... | ||
int commSize_; | ||
int commRank_; | ||
|
||
Int blocksize_; | ||
Int numLocalSources_; | ||
|
||
bool frozenSparsity_ = false; | ||
vector<Int> sources_, targets_; | ||
set<pair<Int,Int>> markedForRemoval_; | ||
|
||
vector<Int> remoteSources_, remoteTargets_; | ||
vector<pair<Int,Int>> remoteRemovals_; | ||
|
||
void InitializeLocalData(); | ||
|
||
// Helpers for local indexing | ||
bool locallyConsistent_ = true; | ||
vector<Int> localSourceOffsets_; | ||
|
||
friend class Graph; | ||
friend void Copy( const Graph& A, DistGraph& B ); | ||
friend void Copy( const DistGraph& A, Graph& B ); | ||
friend void Copy( const DistGraph& A, DistGraph& B ); | ||
|
||
template<typename F> friend class DistSparseMatrix; | ||
}; | ||
|
||
} // namespace El | ||
|
||
#endif // ifndef EL_CORE_DISTGRAPH_HPP |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why remove this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think because a few lines early we issue:
B.distGraph_ = A.distGraph_;
which assigns the multMeta objects.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, agreed.