This repository has been archived by the owner on May 15, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfstarf.m
87 lines (78 loc) · 2.83 KB
/
fstarf.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
function [dmatricies,treeforest] = fstarf(treefile,outfile,StarLength)
%%
% # `[dmatricies,treeforest] = FSTARF(treefile,outfile,StarLength)`
%
% Accepts the path of a text file of Newick-formated trees and returns a cell-array of
% the distance matricies for STAR.
% Does the same thing as MSTARF except creates a file
%
% ##Arguments:
% * `treefile`: path to a text-file of Newick-format rooted binary trees,
% * `outfile`: path to output file, if it does not exist it will be
% created, if it exists it will be overwritten.
% seperated by new lines (see `READFOREST`)
% * `StarLength`: lengths for internal edges of the gene-trees
%
% ##Return Values:
% * `dmatricies`: cell-array of the distance matricies(gene-trees), made ultra-metric,
% these may then be averaged for STAR
% * `treeforest`: cell-array of the gene-trees from `treefile`
%
% `dmatrix{ii}` corresponds to the tree `treeforest{ii}`
%
% Depends on: `READFOREST`, `MAKEULTRA`
%
% #TODO:
% * if we're missing options, select them with the ui
% * warning about overwriting files?
% * error handling, how do we know that the forest in the file is legit,
% how do we know that it matches the probfile etc.
%%
%% make distance matricies
treeforest = ReadForest(treefile);
NumTrees = length(treeforest);
dmatricies = cell(1,NumTrees);
% ensures every tree has same root-to-leaf distnace
ntaxa = get(treeforest{1},'NumLeaves');
if nargin >1
dleaftoroot = sum(StarLength(1:ntaxa-1));
else
dleaftoroot = ntaxa-1;
end
for ii = 1:NumTrees
if nargin > 2
startree = makeultra(treeforest{ii},StarLength,dleaftoroot);
else
startree = makeultra(treeforest{ii},ones(1,ntaxa),dleaftoroot);
end
%makes dist matricies and sorts in order of leaf names
[names,ix] = sort(get(startree,'LeafNames'));
DD = pdist(startree,'Nodes','Leaves','squareform',true);
dmatricies{ii} = DD(ix,ix);
end
%% write distance matricies to outfile
outfid = fopen(outfile,'w');
fprintf(outfid, '%s\n%s %s\n','# Matricies and probabilities for STAR','# Leaf names are, in order:',cell2mat(names));
fprintf(outfid, '%s\n', ['# Tree Topologies taken from file: ', treefile]);
if nargin > 2
LengthStr = sprintf('%i,',StarLength);
LengthStr = ['# Internal branches have the following lengths, in order of level from root: ',LengthStr(1:end-1)];
fprintf(outfid, '%s\n', LengthStr);
else
fprintf(outfid, '%s\n', '# Each internal branch has length 1');
end
MtrStr = [];
for ii = 1:NumTrees
TopoStr = ['# Tree', int2str(ii), ' topology ', getnewickstr(treeforest{ii})];
fprintf(outfid,'%s\n',TopoStr);
MtrStr = ['Tree', int2str(ii), ':=',matrixtomaple(dmatricies{ii}),':'];
fprintf(outfid,'%s\n',MtrStr);
end
fprintf(outfid,'%s','DMatricies := Array([');
for ii = 1:NumTrees
fprintf(outfid,'%s',['Tree',int2str(ii),',']);
end
fprintf(outfid,'%s',']):');
%%
fclose(outfid);
end