-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathmcxdump.azm
370 lines (317 loc) · 13.4 KB
/
mcxdump.azm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
\import{mcx.zmm}
\begin{pud::man}{
{name}{mcxdump}
{html_title}{The mcxdump manual}
{author}{Stijn van Dongen}
{section}{1}
{synstyle}{long}
{defstyle}{long}
\man_share
}
\${html}{\"pud::man::maketoc"}
\sec{name}{NAME}
\NAME{mcxdump}{dump matrices, optionally map indices to labels}
\sec{synopsis}{SYNOPSIS}
\par{
\mcxdump
\synoptopt{-imx}{<fname>}{matrix file}
\synoptopt{-icl}{<fname>}{cluster file to be dumped line-wise}
\synoptopt{-tf}{<spec>}{apply unary transformations to input matrix}
\synoptopt{-imx-cat}{<fname>}{concatenation matrix file}
\synoptopt{-imx-tree}{<fname>}{concatenation cone file}
\synoptopt{--skeleton}{read empty matrix, honour domains}
\synoptopt{-o}{<fname>}{output file name ('-' for stdout)}
\synoptopt{-digits}{<num>}{output precision}
\synoptopt{-tab}{<fname>}{row/column tab (label) file}
\synoptopt{-tabc}{<fname>}{column tab file}
\synoptopt{-tabr}{<fname>}{row tab file}
\synoptopt{--lazy-tab}{allow tab/domain mismatch}
\synoptopt{--transpose}{work with the transpose}
\synoptopt{--no-values}{omit values}
\synoptopt{--omit-empty}{omit empty columns}
\synoptopt{--no-loops}{omit loops}
\synoptopt{--force-loops}{force loops}
\synoptopt{--dump-pairs}{emit pairs per line}
\synoptopt{--dump-table}{dump table format}
\synoptopt{-dump-sif}{<tag>}{dump sif format}
\synoptopt{-dump-sifx}{<tag>}{dump extended sif format with weights}
\synoptopt{--dump-lines}{emit rows per line}
\synoptopt{--dump-rlines}{omit leading identifier}
\synoptopt{--dump-vlines}{add leading identifier values}
\synoptopt{--dump-lead-off}{omit leading identifier}
\synoptopt{--dump-lower}{dump lower part excluding diagonal}
\synoptopt{--dump-loweri}{dump lower part including diagonal}
\synoptopt{--dump-upper}{dump upper part excluding diagonal}
\synoptopt{--dump-upperi}{dump upper part including diagonal}
\synoptopt{--write-tabc}{dump tab file on column domain}
\synoptopt{--write-tabr}{dump tab file on row domain}
\synoptopt{--dump-domc}{dump column domain}
\synoptopt{--dump-domr}{dump row domain}
\synoptopt{-table-nfields}{<num>}{output first <num> fields}
\synoptopt{-table-nlines}{<num>}{output first <num> lines}
\synoptopt{--newick}{output newick format}
\synoptopt{-newick}{[NBI]+}{exclude Number|Branch-length|Indent}
\synoptopt{--write-matrix}{(deconcatenate) write matrices}
\synoptopt{-split-stem}{<str>}{(deconcatenate) matrices file name stem}
\synoptopt{-cat-max}{<num>}{(deconcatenate) write first <num> matrices}
\synoptopt{-sep-value}{<str>}{node/value separator}
\synoptopt{-sep-field}{<str>}{field separator}
\synoptopt{-sep-lead}{<str>}{lead separator}
\synoptopt{-sep-cat}{<str>}{concatenation separator}
\synoptopt{-prefixc}{<str>}{prefix column indices with <str>}
\synoptopt{-sort}{size-{ascending,descending}}{vector sort mode}
\stdsynopt
}
\sec{description}{DESCRIPTION}
\par{
\mcxdump reads a data file satisfying the mcl input format
(refer to \mysib{mcxio}). It outputs a line-based format. The
\useopt{--dump-pairs} option yields a single matrix entry per line,
identified by the respective column and row identifiers (either index or
label) separated by the field separator.
The \useopt{--dump-lines} and \useopt{--dump-rlines} result in the
joining of all row entries on a single line, separated by the field
separator. For both formats, the matrix value corresponding with
a particular entry is by default output as well.
}
\par{
\mcxdump can also act on files that contain concatenated
matrices. Refer to the group of options headed by
\genoptref{-imx-cat}{fname}.
}
\sec{options}{OPTIONS}
\begin{itemize}{\mcx_itemopts}
\item{\defopt{-imx}{<fname>}{matrix file}}
\car{
Input matrix.}
\item{\defopt{-icl}{<fname>}{cluster file}}
\car{
This specifies the input matrix, and sets up a cluster-wise line-based label dump.
This option is fully equivalent to the combination of
\genopt{--dump-rlines} and \genopt{--no-values}.
}
\item{\defopt{-tf}{<spec>}{apply unary transformations to input matrix}}
\car{
Applies the specified transformation to the matrix before it is output.
Refer to \mysib{mcxio} for a description of the transformation syntax.
}
\item{\defopt{--transpose}{work with the transpose}}
\car{
Work with the tranpsose of the input matrix.
}
\item{\defopt{--skeleton}{read empty matrix, honour domains}}
\car{
No entries are read, only domains.
}
\item{\defopt{-o}{<fname>}{output file name}}
\car{
Output stream. Use \v{-} for STDOUT.}
\item{\defopt{-digits}{<num>}{output precision}}
\car{
Specify the precision to use in native interchange format.}
\item{\defopt{-tab}{<fname>}{row/column tab (label) file}}
\car{
Substitute column indices and row indices by labels from the tab file.
Since the same tab file is used for both, this implies that the matrix
domains are identical.}
\item{\defopt{-tabc}{<fname>}{column tab file}}
\car{
Substitute column indices by labels from the tab file.}
\item{\defopt{-tabr}{<fname>}{row tab file}}
\car{
Substitute row indices by labels from the tab file.}
\item{\defopt{--lazy-tab}{allow tab/domain mismatch}}
\car{
If used, the tab file domain(s) do not necessarily need to match
the corresponding domain in the input matrix. Entries missing in
the tab files will be replaced by a question mark.}
\item{\defopt{--no-values}{omit values}}
\car{
Do not emit values.}
\item{\defopt{--omit-empty}{omit empty columns}}
\car{
Do not output line data (with \genopt{--dump-table} or
\genopt{--dump-lines} or related options) for those columns
that are empty.
}
\item{\defopt{--no-loops}{omit loops}}
\car{
Do not output entries for which the row index equals the column index,
if present.
Applies only to matrices for which column and row domains are equal.}
\item{\defopt{--force-loops}{force loops}}
\car{
For each column, force output of a row entry that matches the
column index.
Applies only to matrices for which column and row domains are equal.}
\items{
{\defopt{--dump-pairs}{emit pairs per line}}
{\defopt{-dump-sif}{<tag>}{dump sif format}}
{\defopt{-dump-sifx}{<tag>}{dump extended sif format with weights}}
{\defopt{--dump-lines}{emit rows per line}}
{\defopt{--dump-rlines}{omit leading column node}}
{\defopt{--dump-vlines}{add leading column values}}
{\defopt{--dump-lead-off}{do not dump leading identifiers}}
{\defopt{--dump-lower}{dump lower part excluding diagonal}}
{\defopt{--dump-loweri}{dump lower part including diagonal}}
{\defopt{--dump-upper}{dump upper part excluding diagonal}}
{\defopt{--dump-upperi}{dump upper part including diagonal}}
}
\car{
\genopt{--dump-pairs} is the default mode of output. Each matrix entry
is output as a single pair of column-identifier and row-identifier per line,
optionally followed by the value of the corresponding matrix entry.
All fields are separated by the field separator.
}
\par{
Use \genopt{-dump-sif}{<tag>} to dump SIF format.
The argument \genarg{<tag>} will be used as the edge type (the second
column in SIF format). The option \genopt{-dump-sifx}{<tag>}
is similar except that an extended format is produced where
the label is followed by the colon character and the edge weight.
}
\par{
With \genopt{--dump-lines}, each matrix column is output on a
single line, with row identifiers separated by the field separator
and values attached to the row identifier by the node/value separator.
In this format, the column identifier is output as the leading field.
}
\par{
\genopt{--dump-rlines} is as \genopt{--dump-lines},
except that the column identifier is not output.
Use \genopt{--dump-lead-off} to preclude the output of the leading
identifiers (for line-based outputs).
}
\par{
\genopt{--dump-vlines} is as \genopt{--dump-lines}. The
leading identifiers are followed by a value associated with
the entire column. This can be used to dump the output
given by \clmstable. The value provided is a measure
for the stability of the cluster that follows.
}
\par{
The options pertaining to \it{lower} and \it{upper} dumps currently
only work with \genopt{--dump-pairs}. They act to only output
the specified part of the matrix.
}
\items{
{\defopt{--dump-table}{dump table format}}
{\defopt{-table-nfields}{field limit}}
{\defopt{-table-nlines}{line/row limit}}
}
\car{
Output table format. In table format no indices are printed by default
and all values
are printed including zeroes. The options \genopt{-table-nfields} and \genopt{-table-nlines}
can be used to limit
the number of fields and lines to be printed. Note that fields correspond
to MCL matrix rows and that lines correspond to MCL matrix columns, as MCL
calls its primary indices column indices.
Use \genopt{--dump-lead-off} to preclude the output of the leading
identifiers (for line-based outputs).
}
\items{
{\defopt{--newick}{output newick format}}
{\defopt{-newick}{[NBI]+}{newick, exclude Number|Branch-length|Indent}}
}
\car{
Output a hierarchical clustering specified by \genopt{-imx-tree}
in Newick tree format.
}
\items{
{\defopt{--write-tabc}{dump tab file on column domain}}
{\defopt{--write-tabr}{dump tab file on row domain}}
{\defopt{--dump-domc}{dump column domain}}
{\defopt{--dump-domr}{dump row domain}}
}
\car{
These options work in conjunction with the \genoptref{-ixm}{fname} option.
Only the domains from the input matrix are read as if \genoptref{--skeleton}
was specified.
\genopt{--write-tabc} assumes the input tab file envelopes the matrix column
domain, and it outputs a new tab file restricted to that domain.
\genopt{--write-tabr} acts analogously for the row domain.
\genopt{--dump-domc} and \genopt{--dump-domr} respectively dump the column
or row domain as a regular dump, outputting labels in case a tab file is
specified.
}
\par{
These options are implemented as ensembles of other options.
For example, \genopt{--dump-domr} \genopt{-imx}{fname} corresponds with
\genopt{--dump-lines} \genopt{--transpose} \genopt{--skeleton}.
}
\items{
{\defopt{-imx-cat}{<fname>}{concatenation matrix file}}
{\defopt{-imx-tree}{<fname>}{concatenation cone file}}
{\defopt{--write-matrix}{(deconcatenate) write matrices}}
{\defopt{-split-stem}{<str>}{(deconcatenate) matrices file name stem}}
{\defopt{-cat-max}{<num>}{(deconcatenate) write first <num> matrices}}
}
\car{
\genopt{-imx-cat} is like \genopt{-imx} except that the input is assumed to
contain multiple concatenated matrices.
The matrices are dumped separated by the
\it{cat separator} (cf. \genopt{-sep-cat}).
Alternatively, the matrices can be written to different files
using the \genopt{-split-stem} option.
In this case it is possible to output each matrix in native format
rather than as a dump by specifying \genopt{--write-matrix}.
This makes mcxdump effectively act as a deconcatenator.
In all cases (respectively dumping and writing matrices
to either the same stream or multiple files) the number of
matrices to be dumped can be limited with \genopt{-cat-max}.
}
\par{
\genopt{-imx-tree} is like \genopt{-imx-cat} except that the input
is assumed to be in cone format (the format output by \mclcm).
This format encodes a tree as a concatenation of matrices with
nested domains. \mcxdump will project all levels of this tree
so that all row domains are the same as the bottom row domain.
This implies that a set of nested clusterings (on different node
sets, as the set of clusters of a given level is the node set
of the next level) is transformed
into a set of flattened clusterings, all on the same node set.
If you do not want this to happen, simply use \genopt{-imx-cat}.
}
\item{\defopt{-sep-value}{<str>}{node/value separator}}
\car{
Set the node/value separator for line based row ensemble output.}
\item{\defopt{-sep-field}{<str>}{field separator}}
\car{
Set the field separator for different row indices in a given column.}
\item{\defopt{-sep-lead}{<str>}{lead separator}}
\car{
Set the lead separator. In the \genopt{--dump-lines} format it
separates the leading column index from the following ensembl of
row indices. It can be useful to make this different from the
field separator. One can for example grep for columns that have
more than one entry in a matrix mapping nodes to clusters. This
will find nodes in overlap.}
\item{\defopt{-sep-cat}{<str>}{concatenation separator}}
\car{
Set the separator that is used between matrix dumps when a concatenation of
matrices is dumped.}
\item{\defopt{-prefixc}{<str>}{prefix column indices with <str>}}
\car{
This can be useful when external row names cannot be numbers and
when a label dictionary is not available or not appropriate.}
\item{\defopt{-sort}{size-{ascending,descending}}{concatenation separator}}
\car{
Reorder the matrix columns prior to dumping, based on the number of
nonzero entries in each column.
Do not use this in conjunction with a tab file for the column domain.}
\stddefopt
\end{itemize}
\sec{author}{AUTHOR}
\par{
Stijn van Dongen.
}
\sec{seealso}{SEE ALSO}
\par{
\mysib{mcxload},
\mysib{mcl},
\mysib{mclfaq},
and \mysib{mclfamily} for an overview of all the documentation
and the utilities in the mcl family.
}
\end{pud::man}