/* This file implements a phylogenetic tree structure. Copyright (C) 2004 The University of Texas at Austin This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Email: usman@cs.njit.edu Mail: Usman Roshan, Department of Computer Sciences, NJIT-CCS, Computer Science Department, GITC 4400, University Heights, Newark, NJ 07102 */ #include #include #include #include #include #include #include #include #include #include static void copy_subtree(phylogenetic_tree T, phylogenetic_tree base_tree, node * sub2tree); static void copy_subtree_rec(node u, node v, edge e, phylogenetic_tree T, phylogenetic_tree base_tree, node * sub2tree); static void print_tree_rec(phylogenetic_tree T, char **num_to_name, const node root, const edge e, char *tree_string, short do_wgts); static int read_tree_rec(phylogenetic_tree T, char **num_to_name, char **newick_tree, node root, hashtable name_to_num); static int read_leaf_name(phylogenetic_tree T, char **num_to_name, char **newick_tree, node n, hashtable name_to_num); static int restrict_tree_rec(phylogenetic_tree T, int_set taxa, list edge_list, list node_list, edge e, node v); static int set_subtree_sizes_rec(phylogenetic_tree T, node v, edge e); static double set_subtree_edgelens_rec(phylogenetic_tree T, node v, edge e); static const double DEFAULT_EDGE_LEN = 1.0; /****************/ /* New and delete */ /****************/ phylogenetic_tree new_phylogenetic_tree() { phylogenetic_tree T; T = (phylogenetic_tree) malloc(sizeof(struct phylogenetic_tree_struct)); T->G = new_graph(); T->num_leaves = 0; /* for(i = 0; i < MAX_LEAVES; i++) T->num_to_name[i] = '\0'; */ return T; } void del_phylogenetic_tree(phylogenetic_tree T) { assert(T); /* just delete graph */ del_graph(T->G); free(T); } /*****************/ void print_phylo_tree(phylogenetic_tree T, char **num_to_name) { node n; edge e; assert(T); assert(num_to_name); printf("Num of nodes = %d, num of edges = %d\n", T->G->n_nodes, T->G->n_edges); /* * this commented section is used for debugging where we output * actual pointer values to make sure everything is pointing to where * it should. */ /* printf("\nEdges\n"); for(e = T->G->first_edge; e; e = e->next){ printf("ptr: %d id: %.2f wgt: %.2f u_id: %d v_id: %d \nprev: %d next: %d\nprev_u: %d next_u: %d\nprev_v: %d next_v: %d\n\n", (unsigned int)e, e->id, ((edge_aux_vars)e->aux_vars)->wgt, e->u->id, e->v->id, (unsigned int)e->prev, (unsigned int)e->next, (unsigned int)e->u_prev, (unsigned int)e->u_next, (unsigned int)e->v_prev, (unsigned int)e->v_next); } */ for (n = T->G->first_node; n; n = n->next) { if (((node_aux_vars) n->aux_vars)->taxon_id != -1) { printf("(%-3d %d %-3s %d %d): ", n->id, n->degree, num_to_name[((node_aux_vars) n->aux_vars)->taxon_id], ((node_aux_vars) n->aux_vars)->taxon_id, ((node_aux_vars) n->aux_vars)->taxa_size); } else { printf("(%-3d %d %-3d %d %d): ", n->id, n->degree, ((node_aux_vars) n->aux_vars)->taxon_id, ((node_aux_vars) n->aux_vars)->taxon_id, ((node_aux_vars) n->aux_vars)->taxa_size); } for (e = n->first_edge; e;) { printf(" (%d-%d):%.3f ", e->u->id, e->v->id, ((edge_aux_vars) e->aux_vars)->wgt); if (e->u == n) e = e->u_next; else if (e->v == n) e = e->v_next; else assert(0); } printf("\n"); } } /***************/ /* I/O routines */ /***************/ void print_rooted_tree(phylogenetic_tree T, char **num_to_name, char *tree_string, short do_wgts) { node v; assert(T); assert(num_to_name); assert(num_degreetwo_nodes(T) == 1); /* find first node of degree 2 */ for (v = T->G->first_node; v; v = v->next) if (v->degree == 2) break; if (!v) printf("No degree 2 edge found! Exiting\n"); print_tree_rec(T, num_to_name, v, NULL, tree_string, do_wgts); strcat(tree_string, ";"); } void print_tree(phylogenetic_tree T, char **num_to_name, char *tree_string, short do_wgts) { node v, w; edge e; char buffer[50]; assert(T); assert(num_to_name); remove_2degreeinternal_tree(T); e = T->G->first_edge; /* v = e->u; w = e->v; */ w = e->u; v = e->v; strcat(tree_string, "("); print_tree_rec(T, num_to_name, v, e, tree_string, do_wgts); if (do_wgts) { strcat(tree_string, ":"); /* printing only three significant digits */ sprintf(buffer, "%.3f", ((edge_aux_vars) e->aux_vars)->wgt); strcat(tree_string, buffer); } strcat(tree_string, ","); print_tree_rec(T, num_to_name, w, e, tree_string, do_wgts); if (do_wgts) strcat(tree_string, ":0.0);"); else strcat(tree_string, ");"); } static void print_tree_rec(phylogenetic_tree T, char **num_to_name, const node root, const edge e, char *tree_string, short do_wgts) { int i, x; int_set A; char buffer[50]; short first, first_; edge f; node v; int *intset_entries; A = ((node_aux_vars) root->aux_vars)->taxaset; if (root->degree == 1) { assert(size_int_set(A)); first_ = 1; intset_entries = (int *) malloc(size_int_set(A) * sizeof(int)); get_entries_int_set(A, intset_entries); for (i = 0; i < size_int_set(A); i++) { x = intset_entries[i]; assert(num_to_name[x]); if (first_) sprintf(buffer, "%s", num_to_name[x]); else sprintf(buffer, ",%s", num_to_name[x]); first_ = 0; strcat(tree_string, buffer); } free(intset_entries); } else { if (size_int_set(A)) { first_ = 1; intset_entries = (int *) malloc(size_int_set(A) * sizeof(int)); get_entries_int_set(A, intset_entries); for (i = 0; i < size_int_set(A); i++) { x = intset_entries[i]; assert(num_to_name[x]); if (first_) sprintf(buffer, "%s", num_to_name[x]); else sprintf(buffer, ",%s", num_to_name[x]); first_ = 0; strcat(tree_string, buffer); } strcat(tree_string, ","); free(intset_entries); } first = 1; strcat(tree_string, "("); for (f = root->first_edge; f;) { if (f != e) { if (first) first = 0; else strcat(tree_string, ","); v = opposite(root, f); print_tree_rec(T, num_to_name, v, f, tree_string, do_wgts); if (do_wgts) { strcat(tree_string, ":"); /* * printing only three significant * digits */ sprintf(buffer, "%.3f", ((edge_aux_vars) f->aux_vars)->wgt); strcat(tree_string, buffer); } } if (f->u == root) f = f->u_next; else if (f->v == root) f = f->v_next; else assert(0); } strcat(tree_string, ")"); } } int read_rooted_tree(phylogenetic_tree T, char **num_to_name, char *newick_tree, hashtable name_to_num) { int tree_correctly_read; node root; node_aux_vars node_vars; assert(T); assert(newick_tree); assert(num_to_name); /* creating a new node_aux_vars to carry auxilliary info in node */ node_vars = new_node_aux_vars(); root = new_node(T->G, node_vars); tree_correctly_read = read_tree_rec(T, num_to_name, &newick_tree, root, name_to_num); if (!tree_correctly_read) return 0; /* print_phylo_tree(T); */ if (root->degree == 1 && ((node_aux_vars) root->aux_vars)->taxon_id == -1) { del_node(T->G, root); root = NULL; } /* print_phylo_tree(T); */ /* remove_2degreeinternal_tree(T); */ assert(num_degreetwo_nodes(T) == 1); return 1; } int read_tree(phylogenetic_tree T, char **num_to_name, char *newick_tree, hashtable name_to_num) { int tree_correctly_read; node root; node_aux_vars node_vars; assert(T); assert(newick_tree); assert(num_to_name); /* creating a new node_aux_vars to carry auxilliary info in node */ node_vars = new_node_aux_vars(); root = new_node(T->G, node_vars); tree_correctly_read = read_tree_rec(T, num_to_name, &newick_tree, root, name_to_num); if (!tree_correctly_read) return 0; /* print_phylo_tree(T); */ if (root->degree == 1 && ((node_aux_vars) root->aux_vars)->taxon_id == -1) { del_node(T->G, root); root = NULL; } /* print_phylo_tree(T); */ remove_2degreeinternal_tree(T); /* print_phylo_tree(T); */ return 1; } static int read_tree_rec(phylogenetic_tree T, char **num_to_name, char **newick_tree, node root, hashtable name_to_num) { node n; char *endptr; double x; node_aux_vars node_vars; edge_aux_vars edge_vars; /* creating a new node_aux_vars for carrying auxilliary node info */ node_vars = new_node_aux_vars(); n = new_node(T->G, node_vars); if (**newick_tree == '(') { /* recurse to read rest of tree */ do { (*newick_tree)++; if (!read_tree_rec(T, num_to_name, newick_tree, n, name_to_num)) return 0; } while (**newick_tree == ','); if (**newick_tree != ')') { printf("Error: expected: ')', found: '%c'\n", **newick_tree); return 0; } (*newick_tree)++; } else { /* found leaf; read in the name */ if (!read_leaf_name(T, num_to_name, newick_tree, n, name_to_num)) { printf("Error: expected taxon name before: %c\n", **newick_tree); return 0; /* exit(EXIT_FAILURE); */ } } if (**newick_tree == ':') { /* found weight; read in weight */ (*newick_tree)++; x = strtod(*newick_tree, &endptr); if (*newick_tree == endptr) { printf("Error: expected double, found %c\n", **newick_tree); return 0; /* exit(EXIT_FAILURE); */ } *newick_tree = endptr; edge_vars = new_edge_aux_vars(); /* convert negative edge weight to zero and print warning */ if (x < 0) { x = 0; /* * Warning disabled for now printf("Warning: negative * edge weight found, converting to zero\n"); */ } edge_vars->wgt = x; new_edge(T->G, root, n, edge_vars); ((node_aux_vars) root->aux_vars)->taxa_size += ((node_aux_vars) n->aux_vars)->taxa_size; } else { edge_vars = new_edge_aux_vars(); //Set default edge weights to 1 instead of 0-- - modified on June 22, 2006 edge_vars->wgt = DEFAULT_EDGE_LEN; //edge_vars->wgt = 0; new_edge(T->G, root, n, edge_vars); ((node_aux_vars) root->aux_vars)->taxa_size += ((node_aux_vars) n->aux_vars)->taxa_size; } return 1; } static int read_leaf_name(phylogenetic_tree T, char **num_to_name, char **newick_tree, node n, hashtable name_to_num) { char name[MAX_NAME_LEN]; int i = 0; int taxon_id; while (!strchr(":(),", **newick_tree)) { //TL added length check if (i > (MAX_NAME_LEN - 2)) { printf("Leaf name too long, max is %d\n", MAX_NAME_LEN); return 0; } name[i++] = **newick_tree; (*newick_tree)++; if (!*newick_tree) { printf("Error: unexpected EOF while reading leaf name\n"); return 0; } } name[i] = '\0'; if (!name_to_num) { num_to_name[T->num_leaves] = (char *) malloc(strlen(name) + 1); strcpy(num_to_name[T->num_leaves], name); ((node_aux_vars) n->aux_vars)->taxon_id = T->num_leaves; ((node_aux_vars) n->aux_vars)->taxa_size = 1; /* inserting taxon_id into taxaset */ insert_int_set(((node_aux_vars) n->aux_vars)->taxaset, T->num_leaves); } else { taxon_id = lookup_hashtable(name_to_num, name); assert(taxon_id >= 0); /* printf("found name=%s at position %d\n", name,taxon_id); */ assert(!strcmp(num_to_name[taxon_id], name)); ((node_aux_vars) n->aux_vars)->taxon_id = taxon_id; ((node_aux_vars) n->aux_vars)->taxa_size = 1; /* inserting taxon_id into taxaset */ insert_int_set(((node_aux_vars) n->aux_vars)->taxaset, taxon_id); } T->num_leaves++; return 1; } /***************/ /* End of I/O routines */ /***************/ /***************/ /* Miscellaneous routines */ /***************/ void set_rooted_subtree_edgelens(phylogenetic_tree T) { node v, root; int nroots = 0; /* first set all subtree edgelens to zero */ for (v = T->G->first_node; v; v = v->next) ((node_aux_vars) v->aux_vars)->sum_edge_lens = 0; /* assert there is a root and only one root */ for (v = T->G->first_node; v; v = v->next) if (v->degree == 2) { nroots++; root = v; } assert(nroots == 1); set_subtree_edgelens_rec(T, root, (edge) 0); } void set_subtree_edgelens(phylogenetic_tree T) { node v; /* first set all taxa sizes to zero */ for (v = T->G->first_node; v; v = v->next) ((node_aux_vars) v->aux_vars)->taxa_size = 0; for (v = T->G->first_node; v; v = v->next) if (v->degree > 1) { set_subtree_edgelens_rec(T, v, (edge) 0); break; } } static double set_subtree_edgelens_rec(phylogenetic_tree T, node v, edge e) { edge f; if (v->degree == 1) { if (((node_aux_vars) v->aux_vars)->taxon_id != -1) { ((node_aux_vars) v->aux_vars)->sum_edge_lens = 0; return 0; } else { /* * printf("node with degree 1 has taxon_id=-1\n"); * printf("%d\n", * size_int_set(((node_aux_vars)v->aux_vars)->taxaset) * ); */ assert(0); /* degree one nodes must be leaves * with a +ve taxon id */ } } for (f = v->first_edge; f;) { if (f != e) ((node_aux_vars) v->aux_vars)->sum_edge_lens += ((edge_aux_vars) f->aux_vars)->wgt + set_subtree_edgelens_rec(T, opposite(v, f), f); if (f->u == v) f = f->u_next; else if (f->v == v) f = f->v_next; else assert(0); } return ((node_aux_vars) v->aux_vars)->sum_edge_lens; } void set_rooted_subtree_sizes(phylogenetic_tree T) { node v, root; int nroots = 0; /* first set all taxa sizes to zero */ for (v = T->G->first_node; v; v = v->next) ((node_aux_vars) v->aux_vars)->taxa_size = 0; /* assert there is a root and only one root */ for (v = T->G->first_node; v; v = v->next) if (v->degree == 2) { nroots++; root = v; } assert(nroots == 1); set_subtree_sizes_rec(T, root, (edge) 0); } void set_subtree_sizes(phylogenetic_tree T) { node v; /* first set all taxa sizes to zero */ for (v = T->G->first_node; v; v = v->next) ((node_aux_vars) v->aux_vars)->taxa_size = 0; for (v = T->G->first_node; v; v = v->next) { if (v->degree > 1) { set_subtree_sizes_rec(T, v, (edge) 0); break; } } } static int set_subtree_sizes_rec(phylogenetic_tree T, node v, edge e) { edge f; if (v->degree == 1) { if (((node_aux_vars) v->aux_vars)->taxon_id != -1) { ((node_aux_vars) v->aux_vars)->taxa_size = 1; return 1; } else { /* * printf("node with degree 1 has taxon_id=-1\n"); * printf("%d\n", * size_int_set(((node_aux_vars)v->aux_vars)->taxaset) * ); */ assert(0); /* degree one nodes must be leaves * with a +ve taxon id */ } } for (f = v->first_edge; f;) { if (f != e) ((node_aux_vars) v->aux_vars)->taxa_size += set_subtree_sizes_rec(T, opposite(v, f), f); if (f->u == v) f = f->u_next; else if (f->v == v) f = f->v_next; else assert(0); } return ((node_aux_vars) v->aux_vars)->taxa_size; } void assign_tree(phylogenetic_tree dest, phylogenetic_tree src) { assert(dest && src); assign_graph(dest->G, src->G); dest->num_leaves = src->num_leaves; } void restrict_tree(phylogenetic_tree T, int_set taxa) { node u, v; list edge_list; list_node ln; list node_list; edge_list = new_list(); node_list = new_list(); for (v = T->G->first_node; v; v = v->next) { if (v->degree > 1) { /* * find edges to remove, i.e. those not part of the * induced tree */ restrict_tree_rec(T, taxa, edge_list, node_list, (edge) 0, v); break; } } /* remove selected edges */ for (ln = edge_list->head; ln; ln = ln->next) { del_edge(T->G, (edge) ln->data); ln->data = NULL; } /* remove selected nodes */ for (ln = node_list->head; ln; ln = ln->next) { /* updating num_leaves variable */ if (((node_aux_vars) ((node) ln->data)->aux_vars)->taxon_id != -1) T->num_leaves--; del_node(T->G, (node) ln->data); ln->data = NULL; } del_list(edge_list); del_list(node_list); /* removing internal 2-degree nodes as a result of deleting edges */ remove_2degreeinternal_tree(T); /* check for hanging nodes */ for (v = T->G->first_node; v;) { if (v->degree == 1 && ((node_aux_vars) v->aux_vars)->taxon_id == -1) { u = v->next; del_node(T->G, v); v = u; } else v = v->next; } /* * removing internal 2-degree nodes as a result of deleting edges. * this needs to be done again to assure the tree is still binary */ remove_2degreeinternal_tree(T); } static int restrict_tree_rec(phylogenetic_tree T, int_set taxa, list edge_list, list node_list, edge e, node v) { edge f; int found; found = 0; if (v->degree == 1) { if (member_int_set(taxa, ((node_aux_vars) v->aux_vars)->taxon_id)) return 1; /* need to delete this node */ push_list(node_list, (void *) v); return 0; } for (f = v->first_edge; f;) { if (f != e) { if (!restrict_tree_rec(T, taxa, edge_list, node_list, f, opposite(v, f))) push_list(edge_list, (void *) f); else found = 1; } if (f->u == v) f = f->u_next; else if (f->v == v) f = f->v_next; else assert(0); } /* need to delete this node as well */ if (found == 0) push_list(node_list, (void *) v); return found; } void remove_2degreeinternal_tree(phylogenetic_tree T) { node u, v, w, x; edge e1, e2; edge_aux_vars edge_vars; assert(T); u = T->G->first_node; while (u) { v = u->next; /* * if(u->degree == 2 && * size_int_set(((node_aux_vars)u->aux_vars)->taxaset)==0) { */ if (u->degree == 2) { if (((node_aux_vars) u->aux_vars)->taxon_id == -1) { e1 = u->first_edge; e2 = u->last_edge; if (e1->u != u) w = e1->u; else w = e1->v; if (e2->u != u) x = e2->u; else x = e2->v; edge_vars = new_edge_aux_vars(); if (e1->aux_vars && e2->aux_vars) edge_vars->wgt = ((edge_aux_vars) e1->aux_vars)->wgt + ((edge_aux_vars) e2->aux_vars)->wgt; else edge_vars->wgt = DEFAULT_EDGE_LEN; new_edge(T->G, w, x, edge_vars); del_node(T->G, u); u = NULL; } else assert(0); /* internal 2-degree node * should have -ve taxon id */ } u = v; } } void uniform_random_tree(int num_leaves, phylogenetic_tree base_tree) { node u, v, uv, w; edge *edges; edge f; int i, j, count, n_edges; phylogenetic_tree T; node_aux_vars node_vars; edge_aux_vars edge_vars; srand(time(NULL)); assert(base_tree->num_leaves == 0); T = new_phylogenetic_tree(); count = 0; node_vars = new_node_aux_vars(); node_vars->taxon_id = count++; u = new_node(T->G, node_vars); T->num_leaves++; node_vars = new_node_aux_vars(); node_vars->taxon_id = count++; v = new_node(T->G, node_vars); T->num_leaves++; edges = (edge *) malloc((2 * num_leaves - 3) * sizeof(edge)); for (i = 0; i < (2 * num_leaves - 3); i++) edges[i] = NULL; edge_vars = new_edge_aux_vars(); edge_vars->wgt = 1; edges[0] = new_edge(T->G, u, v, edge_vars); /* default edge wgt=1 */ n_edges = 1; for (i = 3; i <= num_leaves; i++) { j = (int) ((n_edges - 1) * ((double) rand() / (double) RAND_MAX)); assert(j >= 0); assert(j < (2 * num_leaves - 3)); f = edges[j]; u = f->u; v = f->v; assert(f); node_vars = new_node_aux_vars(); uv = new_node(T->G, node_vars); node_vars = new_node_aux_vars(); node_vars->taxon_id = count++; w = new_node(T->G, node_vars); T->num_leaves++; del_edge(T->G, f); edge_vars = new_edge_aux_vars(); edge_vars->wgt = 1; edges[j] = new_edge(T->G, w, uv, edge_vars); edge_vars = new_edge_aux_vars(); edge_vars->wgt = 1; edges[n_edges++] = new_edge(T->G, v, uv, edge_vars); edge_vars = new_edge_aux_vars(); edge_vars->wgt = 1; edges[n_edges++] = new_edge(T->G, u, uv, edge_vars); } assert(T->num_leaves == num_leaves); free(edges); assign_tree(base_tree, T); del_phylogenetic_tree(T); } void random_refine(phylogenetic_tree T) { node v; node *sub2tree; int i; list node_list; phylogenetic_tree base_tree; list_node ln; edge f; node_list = new_list(); for (v = T->G->first_node; v; v = v->next) if (v->degree > 3) push_list(node_list, (void *) v); for (ln = node_list->head; ln; ln = ln->next) { v = (node) ln->data; sub2tree = (node *) malloc(v->degree * sizeof(node)); i = 0; for (i = 0; i < v->degree; i++) sub2tree[i] = NULL; i = 0; for (f = v->first_edge; f;) { sub2tree[i++] = (void *) (opposite(v, f)); if (f->u == v) f = f->u_next; else f = f->v_next; } base_tree = new_phylogenetic_tree(); uniform_random_tree(v->degree, base_tree); copy_subtree(T, base_tree, sub2tree); del_phylogenetic_tree(base_tree); del_node(T->G, v); free(sub2tree); } for (ln = node_list->head; ln; ln = ln->next) ln->data = NULL; del_list(node_list); } void random_refine_edgelens(phylogenetic_tree T) { node v, w; node *sub2tree; int i; list node_list; phylogenetic_tree base_tree; list_node ln; edge f; node_list = new_list(); for (v = T->G->first_node; v; v = v->next) { if (v->degree > 3) push_list(node_list, (void *) v); } for (ln = node_list->head; ln; ln = ln->next) { v = (node) ln->data; sub2tree = (node *) malloc(v->degree * sizeof(node)); i = 0; for (i = 0; i < v->degree; i++) sub2tree[i] = NULL; i = 0; for (f = v->first_edge; f;) { sub2tree[i++] = (void *) (opposite(v, f)); if (f->u == v) f = f->u_next; else f = f->v_next; } base_tree = new_phylogenetic_tree(); uniform_random_tree(v->degree, base_tree); /* fixing edge lengths */ for (f = base_tree->G->first_edge; f; f = f->next) { if (f->u->degree == 1 || f->v->degree == 1) { if (f->u->degree == 1) w = f->u; else w = f->v; assert(((node_aux_vars) w->aux_vars)->taxon_id != -1); ((edge_aux_vars) f->aux_vars)->wgt = ((edge_aux_vars) sub2tree[((node_aux_vars) w->aux_vars)->taxon_id]->first_edge->aux_vars)->wgt; } else ((edge_aux_vars) f->aux_vars)->wgt = DEFAULT_EDGE_LEN; } copy_subtree(T, base_tree, sub2tree); del_phylogenetic_tree(base_tree); del_node(T->G, v); free(sub2tree); } for (ln = node_list->head; ln; ln = ln->next) ln->data = NULL; del_list(node_list); } static void copy_subtree(phylogenetic_tree T, phylogenetic_tree base_tree, node * sub2tree) { node s, t, v; for (v = base_tree->G->first_node; v; v = v->next) { if (v->degree == 1 && ((node_aux_vars) v->aux_vars)->taxon_id == 0) { s = v; break; } } assert(s); t = sub2tree[0]; assert(t); /* printf("base_tree has %d leaves\n", base_tree->num_leaves); */ copy_subtree_rec(s, t, (edge) 0, T, base_tree, sub2tree); } static void copy_subtree_rec(node u, node v, edge e, phylogenetic_tree T, phylogenetic_tree base_tree, node * sub2tree) { edge f; node_aux_vars nvars; edge_aux_vars evars; node n, opp; for (f = u->first_edge; f;) { if (f != e) { opp = opposite(u, f); if (((node_aux_vars) opp->aux_vars)->taxon_id == -1) { nvars = new_node_aux_vars(); n = new_node(T->G, nvars); evars = new_edge_aux_vars(); /* evars->wgt=1; */ evars->wgt = ((edge_aux_vars) f->aux_vars)->wgt; new_edge(T->G, v, n, evars); copy_subtree_rec(opp, n, f, T, base_tree, sub2tree); } else { evars = new_edge_aux_vars(); /* evars->wgt=1; */ evars->wgt = ((edge_aux_vars) f->aux_vars)->wgt; new_edge(T->G, v, sub2tree[((node_aux_vars) opp->aux_vars)->taxon_id], evars); } } if (f->u == u) { f = f->u_next; } else { f = f->v_next; } } } int num_degreetwo_nodes(phylogenetic_tree T) { node v; int count; count = 0; for (v = T->G->first_node; v; v = v->next) if (v->degree == 2) count++; return count; } void reroot_phylo_tree(phylogenetic_tree T, edge e) { node_aux_vars nvars; edge_aux_vars evars; node w; /* remove previous root and any 2-degree internal nodes */ remove_2degreeinternal_tree(T); /* subdivide e and create root */ nvars = new_node_aux_vars(); w = new_node(T->G, nvars); evars = new_edge_aux_vars(); evars->wgt = (((edge_aux_vars) e->aux_vars)->wgt) / 2; new_edge(T->G, w, e->u, evars); evars = new_edge_aux_vars(); evars->wgt = (((edge_aux_vars) e->aux_vars)->wgt) / 2; new_edge(T->G, w, e->v, evars); del_edge(T->G, e); return; } void contract_edgelens(phylogenetic_tree T, float l) { edge e, f, g; node todel; edge_aux_vars edge_vars; int stop; for (e = T->G->first_edge; e;) { if (((edge_aux_vars) e->aux_vars)->wgt <= l && e->u->degree > 1 && e->v->degree > 1) { todel = e->u; for (f = todel->first_edge; f;) { if (f != e) { edge_vars = new_edge_aux_vars(); edge_vars->wgt = ((edge_aux_vars) f->aux_vars)->wgt; new_edge(T->G, e->v, opposite(todel, f), edge_vars); } if (f->u == todel) { f = f->u_next; } else { f = f->v_next; } } /* * set next edge to a valid pointed before deleting * this one */ for (g = e; (g->u == todel || g->v == todel) && g; g = g->next); /* printf("u_id=%d v_id=%d\n", g->u->id, g->v->id); */ del_node(T->G, todel); e = g; } else { e = e->next; } } }