#! /usr/bin/env python desc = """ Write a majority rule consensus tree from 1 or more Nexus or Newick formatted tree files. Each tree file can have a specified burnin and/or step count. The data file (in Phylip or Nexus format) from which the trees are derived must be supplied. e.g. $ makeConsensusTree.py -d dataFile.nex -t treeFile.run1.t treeFile.run2.t -b 10000 8000 -s 10 4 - where treeFile.run1.t has burnin 10000 and step 10, and where treeFile.run2.t has burnin 8000 and step 4 """ import os import argparse import textwrap import sys #Squash any local configuration that might cause circular imports: os.environ['P4_STARTUP'] = "" from p4 import * from p4.treefilelite import TreeFileLite try: from gram import TreeGram except ImportError: print "Can't import Gram - error" sys.exit() def main(datafile, treefiles, burnin_numbers, step_numbers, outfile_name, restore_names, names_dict_path, re_root, percentage, support_as_percentage, pdf, reportlab, scalebar, quiet): if not datafile: print "Error: No datafile specified." print "Aborting." sys.exit(1) else: if not os.path.exists(datafile): print "Error: cannot find datafile %s" % datafile print "Aborting." sys.exit(1) for treefile in treefiles: if not os.path.exists(treefile): print "Error: cannot find treefile %s" % treefile print "Aborting." sys.exit(1) if not burnin_numbers: burnin_numbers = [0]*len(treefile) else: if not len(burnin_numbers) == len(treefiles): print "Error: there are %i burnin numbers and %i treefiles." % \ (len(burnin_numbers),len(treefiles)) print "Aborting." sys.exit(1) if not step_numbers: step_numbers = [1]*len(treefile) else: if not len(step_numbers) == len(treefiles): print "Error: there are %i step_numbers and %i treefiles." % \ (len(step_numbers),len(treefiles)) print "Aborting." sys.exit(1) if os.path.exists(outfile_name): print "Error: Outfile name \"%s\" already exists." % outfile_name print "Refusing to over-write. Aborting." sys.exit(1) if percentage < 0.1 or percentage > 1.0: print "Error: percentage must be between 0.51 and 1.0 not %f" % \ percentage print "Aborting." #read alignment var.doCheckForAllGapColumns = False var.doCheckForDuplicateSequences = False read(datafile) a = var.alignments[0] #read trees all_trees = [] for treefile, burnin, step in zip(treefiles, burnin_numbers, step_numbers): ttl = TreeFileLite(treefile, verbose=0) alltreeNumbers = range(ttl.nSamples) treeNumbers = alltreeNumbers[burnin::step] if len(treeNumbers) == 0: print "\nError: No trees left after removal of burnin - " + \ "burnin is %i, total trees in \"%s\" is %i" % \ (burnin, treefile, len(alltreeNumbers)) print "Aborting." sys.exit(1) if not quiet: print "\nTrees in file \"%s\": %s" % (treefile, ttl.nSamples) if burnin: print "Number of trees burned-in: %s" % burnin if step: print "Trees sampled every %s trees" % step print "Trees read: %s" % len(treeNumbers) all_trees.extend([ttl.getTree(n) for n in treeNumbers]) var.trees = [] tt = Trees(all_trees, a.taxNames) #Cymon 27th July - p4 can have non-bifurcating trees/polytomies so dont #remove root #Not sure if the logic here is correct!!!!!!!!!!!!!!!!!!! #CAUTION!!!!!!!!!!!!!!! #fb = True #for t in tt.trees: # if not t.isFullyBifurcating(): # t.removeRoot() # fb = False #if not quiet: # if not fb: # print "Some trees had a bifurcating root which was removed." tp = TreePartitions(tt) t = tp.consensus(minimumProportion=percentage) if restore_names: if not os.path.exists(names_dict_path): print "Error: Cannont find taxon names dictionary %s" % \ names_dict_path print "Aborting." sys.exit(1) else: t.restoreNamesFromRenameForPhylip(names_dict_path) if support_as_percentage: for node in t.iterInternalsNoRoot(): node.name = "%i" % (100*node.br.support) else: for node in t.iterInternalsNoRoot(): node.name = "%.2f" % node.br.support if re_root: t.draw(showInternalNodeNames=1,showNodeNums=1) while 1: nodenum = raw_input("Node number to re-root with... ") try: nodenum = int(nodenum) except ValueError: print "Try a number this time..." continue break t.reRoot(nodenum) t.ladderize() t.draw(showInternalNodeNames=1,showNodeNums=1) t.writeNexus(outfile_name) if not quiet: print "Written consensus tree to file \"%s\"" % outfile_name if pdf: if not TreeGram: print "TreeGram is not installed, cannot write PDF." else: tg = TreeGram(t) #For some users (e.g Cymon) reportlab is True by default but not #others - why is that? if reportlab: tg.rl = True else: tg.rl = False if scalebar: tg.setScaleBar() tg.epdf() if not quiet: if reportlab: print "Written PDF to \"tg.pdf\" and \"cr_tg.pdf\"." else: print "Written PDF to Gram directory." if not quiet: print "Done." if __name__ == "__main__": parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent(desc), ) parser.add_argument("-d", "--datafile", dest="datafile", help="Data matrix filename.", ) parser.add_argument("-t", "-treefiles", dest="treefiles", help="Input tree file(s) in Nexus or Newick format.", nargs='*') parser.add_argument("-b", "-burnin_numbers", dest="burnin_numbers", help="Burnin numbers for each treefile. " +\ "Default: 0 (all trees).", nargs="*", type=int) parser.add_argument("-s", "-step_numbers", dest="step_numbers", help="An interval or step for sampling each tree file. " +\ "Default: 1 (all trees)", nargs="*", type=int) parser.add_argument("-o", "--outfile_name", dest="outfile_name", help="Name of the output consensus tree file. " +\ "Default: consensus.tree", default="consensus.tree" ) parser.add_argument("-g", "--restore_names", dest="restore_names", help="Restore taxon names. " +\ "Default: False", default=False, action='store_true' ) parser.add_argument("-j", "--path_to_names_dict", dest="names_dict_path", help="Path to the names dictionary. " +\ "Default: ./p4_renameForPhylip_dict.py", default="./p4_renameForPhylip_dict.py" ) parser.add_argument("-p", "--percentage", dest="percentage", help="Majority-rule percentage, between 0.1-1.0. " +\ "Default: 0.5", default=0.5, type=float) parser.add_argument("-k", "--support_format", dest="support_as_percentage", help="Support values as percentages. " +\ "Default: formatted as probabilities (0.0->1.0)", default=False, action='store_true') parser.add_argument("-r", "--re_root", dest="re_root", help="Re-root tree before saving. " +\ "Default: False", default=False, action='store_true') parser.add_argument("-f", "--pdf", dest="pdf", help="Write a pdf of the tree. " +\ "Default: False", default=False, action="store_true") parser.add_argument("-l", "--use_reportlab", dest="reportlab", help="Write a pdf using reportlab. " +\ "Default: False (Use PGF/TikZ)", default=False, action="store_true") parser.add_argument("-c", "--scalebar", dest="scalebar", help="Include scale bar on pdf " +\ "Default: True", default=True, action="store_false") parser.add_argument("-q", "--quiet", dest="quiet", help="Quiet. Default: False", default=False, action='store_true') args = parser.parse_args() main(args.datafile, args.treefiles, args.burnin_numbers, args.step_numbers, args.outfile_name, args.restore_names, args.names_dict_path, args.re_root, args.percentage, args.support_as_percentage, args.pdf, args.reportlab, args.scalebar, args.quiet)