This vignette gives explains some basic functionalities of the GeneralTree package.
In this section we will explain how to perform basic operations on a tree.
A General Tree is a R6 object. As such it can be created with the new
method. You should pass to new
the id
and data
of the root node. Note that there is currently no requirement that the data in the tree should be the same and/or unique.
# Create a root node with id = 0 and data = "root"
require(GeneralTree)
#> Loading required package: GeneralTree
#> Loading required package: R6
tree <- GeneralTree$new(id = 0, data = "root")
we can print the tree at any time to inspect its content,
print(tree)
#> 0 : root
Once the tree is created we can add child nodes to the tree. We can either call addNode
where we will have to specify the id
of the parent node or we can use search
to find the parent node and add a child directly.
# Add a child by specifying the parent.
tree$addNode(parent = 0, id = 1, data = "child0.1")
print(tree)
#> 0 : root --> 1 : child0.1
# Add a child by searching its parent.
tree$searchNode(1)$addChild(id = 2, data = "child1.2")
print(tree)
#> 0 : root --> 1 : child0.1 --> 2 : child1.2
Siblings are created automatically when you are adding a new child to a node that already has a child or when you explicity call addSibling
.
# Add a sibling by specifying the parent.
tree$addNode(parent = 0, id = 3, data = "child0.3")
print(tree)
#> 0 : root --> 1 : child0.1 --> 2 : child1.2
#> \-> 3 : child0.3
# Add a sibling by searching its parent.
tree$searchNode(1)$addSibling(id = 4, data = "child0.4")
print(tree)
#> 0 : root --> 1 : child0.1 --> 2 : child1.2
#> |-> 3 : child0.3
#> \-> 4 : child0.4
There are two functions that help you retrieve data stored in the tree. Each method returns a different object,
searchNode
searches for a node.searchData
searches for data in the tree.# Let us create a mixed tree.
tree <- GeneralTree$new(0, "parent1")
tree$addNode(0, "a", "child.a")
tree$addNode(0, "b", "child.b")
tree$addNode("b", "c", "child.b.c")
tree$addNode("b", "d", "child.b.d")
tree$addNode("c", "e", "child.c.e")
tree$addNode("c", "f", "child.c.f")
tree$addNode("b", "g", "child.b.g")
tree$addNode("b", "h", "child.b.h")
tree$addNode("c", 1, "child.c.1")
Search the node with id f
,
print(tree$searchNode("f"))
#> |-> f : child.c.f
Search the data with id e
,
tree$searchData("e")
#> [1] "child.c.e"
If you want a graphically representation of a tree you can do so easily,
plot(tree)
We can change what is plotted as well as the shapes of the diagram,
plot(tree, what = "data", color = "coral1", shape = "oval")
In the current version you can cast a tree to a data frame and vice versa as well as a parsed object.
We can use the generic casting functions to cast to a data frame,
as.data.frame(tree)
#> id data parent
#> 1 0 parent1 <NA>
#> 2 a child.a 0
#> 3 b child.b 0
#> 4 c child.b.c b
#> 5 e child.c.e c
#> 6 f child.c.f c
#> 7 1 child.c.1 c
#> 8 d child.b.d b
#> 9 g child.b.g b
#> 10 h child.b.h b
the other way around is also easy,
# Let us define a data frame,
test_tree_df <- data.frame(
ID = c("root", "child1", "child2", "child3"),
DATA = c("parent1", "data3.1", "data1.2", "data1.3"),
PARENT = c(NA, "child3", "root", "root"), stringsAsFactors = FALSE)
test_tree_df
#> ID DATA PARENT
#> 1 root parent1 <NA>
#> 2 child1 data3.1 child3
#> 3 child2 data1.2 root
#> 4 child3 data1.3 root
By default as.GeneralTree
will search for columns id
, data
and parent
where parent should have a NA
to identify the root node. In our case we have different column names. This can be passed to as.GeneralTree
as we see below,
as.GeneralTree(test_tree_df, id = "ID", data = "DATA", parent = "PARENT")
#> root : parent1 --> child3 : data1.3 --> child1 : data3.1
#> \-> child2 : data1.2
To inspect how R code is parsed you can cast parsed code to a GeneralTree. Note GeneralTree will automatically create a dummy root to compensate for the fact that sometimes multiple expressions are at root,
In this example all entries exist at the root,
p <- parse(text = "tree <- GeneralTree$new(1, \"parent1\")
tree$addNode(1, 2, \"child.1.2\")
tree$addNode(2, 3, \"child.2.3\")",
keep.source = TRUE)
print(as.GeneralTree(p), what = "data")
#> BaseEnvironment --> --> --> tree
#> | |-> <-
#> | \-> --> --> --> GeneralTree
#> | | |-> $
#> | | \-> new
#> | |-> (
#> | |-> --> 1
#> | |-> ,
#> | |-> --> "parent1"
#> | \-> )
#> |-> --> --> --> tree
#> | | |-> $
#> | | \-> addNode
#> | |-> (
#> | |-> --> 1
#> | |-> ,
#> | |-> --> 2
#> | |-> ,
#> | |-> --> "child.1.2"
#> | \-> )
#> \-> --> --> --> tree
#> | |-> $
#> | \-> addNode
#> |-> (
#> |-> --> 2
#> |-> ,
#> |-> --> 3
#> |-> ,
#> |-> --> "child.2.3"
#> \-> )
In this example all entries hang below test_that
,
p <- parse(text =
"test_that(\"test that the tree_walker with while loop\", {
tree <- GeneralTree$new(1, \"parent1\")
tree$addNode(1, 2, \"child.1.2\")
tree$addNode(2, 3, \"child.2.3\")
})",
keep.source = TRUE)
print(as.GeneralTree(p), what = "data")
#> BaseEnvironment --> --> --> test_that
#> |-> (
#> |-> --> "test that the tree_walker with while loop"
#> |-> ,
#> |-> --> {
#> | |-> --> --> tree
#> | | |-> <-
#> | | \-> --> --> --> GeneralTree
#> | | | |-> $
#> | | | \-> new
#> | | |-> (
#> | | |-> --> 1
#> | | |-> ,
#> | | |-> --> "parent1"
#> | | \-> )
#> | |-> --> --> --> tree
#> | | | |-> $
#> | | | \-> addNode
#> | | |-> (
#> | | |-> --> 1
#> | | |-> ,
#> | | |-> --> 2
#> | | |-> ,
#> | | |-> --> "child.1.2"
#> | | \-> )
#> | |-> --> --> --> tree
#> | | | |-> $
#> | | | \-> addNode
#> | | |-> (
#> | | |-> --> 2
#> | | |-> ,
#> | | |-> --> 3
#> | | |-> ,
#> | | |-> --> "child.2.3"
#> | | \-> )
#> | \-> }
#> \-> )
There are two approaches to iterate through the tree. The first is built in whereas the second relies on foreach
and iterators
packages.
You can iterate through the tree by first inializing an iterator and then calling next element on that iterator.
# Let us inspect the tree first,
print(tree, what = "data")
#> parent1 --> child.a
#> \-> child.b --> child.b.c --> child.c.e
#> | |-> child.c.f
#> | \-> child.c.1
#> |-> child.b.d
#> |-> child.b.g
#> \-> child.b.h
# Make a backup of the tree,
old_tree <- tree
i <- tree$iterator()
while (!is.null(i)) {
i$setData(paste("id:", i$id, "-data", i$data))
i <- tryCatch(i$nextElem(), error = function(e) NULL)
}
print(tree, what = "data")
#> id: 0 -data parent1 --> id: a -data child.a
#> \-> id: b -data child.b --> id: c -data child.b.c --> id: e -data child.c.e
#> | |-> id: f -data child.c.f
#> | \-> id: 1 -data child.c.1
#> |-> id: d -data child.b.d
#> |-> id: g -data child.b.g
#> \-> id: h -data child.b.h
With foreach you can quickly extract the id and data of the tree in a depth first fashion,
require(iterators)
#> Loading required package: iterators
require(foreach)
#> Loading required package: foreach
itx <- iter(old_tree, by = "id")
ids_in_tree <- foreach(i = itx, .combine = c) %do% c(i)
ids_in_tree
#> [1] "0" "a" "b" "c" "e" "f" "1" "d" "g" "h"
The main benefit of using iter
is that you can iterate through the tree in parllel. If you only need to perform operations on the id
or data
of a node we highly recommend to use the latter method to traverse the tree. See the following benchmark,
p <- parse(text = "
tree <- GeneralTree$new(1, \"parent1\")
tree$addNode(1, 2, \"child.1.2\")
tree$addNode(2, 3, \"child.2.3\")
tree$addNode(3, 4, \"child.3.4\")
tree$addNode(3, 5, \"child.3.5\")
tree$addNode(1, 7, \"child.1.7\")
tree$addNode(1, 8, \"child.1.8\")
tree$addNode(8, 9, \"child.8.9\")
tree$addNode(9, 10, \"child.9.10\")
tree$addNode(9, 11, \"child.9.11\")
tree$addNode(9, 12, \"child.9.12\")
tree$addNode(12, 13, \"child.12.13\")
tree$addNode(8, 14, \"child.8.14\")
tree$addNode(2, 6, \"child.2.6\")", keep.source = TRUE)
tree <- as.GeneralTree(p)
require(microbenchmark)
#> Loading required package: microbenchmark
microbenchmark({
i <- tree$iterator()
ids_in_tree <- c()
while (!is.null(i)) {
ids_in_tree <- c(ids_in_tree, i$id)
i <- tryCatch(i$nextElem(), error = function(e) NULL)
}
})
#> Warning in sample.int(length(x), size, replace, prob): '.Random.seed' is
#> not an integer vector but of type 'NULL', so ignored
#> Unit: milliseconds
#> expr
#> { i <- tree$iterator() ids_in_tree <- c() while (!is.null(i)) { ids_in_tree <- c(ids_in_tree, i$id) i <- tryCatch(i$nextElem(), error = function(e) NULL) } }
#> min lq mean median uq max neval
#> 43.52775 48.51905 52.16778 51.00981 54.46999 105.1711 100
require(foreach)
require(iterators)
require(doParallel)
#> Loading required package: doParallel
#> Loading required package: parallel
# Test below on your machine.
# nThreads <- detectCores(logical = TRUE)
# cl <- makeCluster(nThreads)
# registerDoParallel(cl)
# microbenchmark({
# itx <- iter(tree, by = "id")
# ids_in_tree <- foreach(i = itx, .combine = c) %dopar% c(i)
# })
Note that currently foreach
iteration is slower than native iteration.