This document follows the TileDB API usage examples. A shorter introductory vignette is also available.
Prelimaries
We will show two initial and basic examples for a dense and sparse array simply to create array data on disk to refer to later in examples that follow.
library(tiledb)
tempdir()
tdir <- file.path(tdir, "dense")
uridense <- file.path(tdir, "densefix")
uridensefix <- file.path(tdir, "densevar")
uridensevar <- file.path(tdir, "denseenc")
uridensewkey <-
function(array_name) {
create_array <-# Check if the array already exists.
if (tiledb_object_type(array_name) == "ARRAY") {
message("Array already exists.")
return(invisible(NULL))
}
# The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4].
tiledb_domain(dims = c(tiledb_dim("rows", c(1L, 4L), 4L, "INT32"),
dom <-tiledb_dim("cols", c(1L, 4L), 4L, "INT32")))
# The array will be dense with a single attribute "a" so each (i,j) cell can store an integer.
tiledb_array_schema(dom, attrs = tiledb_attr("a", type = "INT32"))
schema <-
# Create the (empty) array on disk, and return the path invisibly
invisible(tiledb_array_create(array_name, schema))
}
function(array_name) {
write_array <- array(c(c(1L, 5L, 9L, 13L),
data <-c(2L, 6L, 10L, 14L),
c(3L, 7L, 11L, 15L),
c(4L, 8L, 12L, 16L)), dim = c(4,4))
# Open the array and write to it.
tiledb_array(uri = array_name)
A <- data
A[] <-
}
create_array(uridense)
write_array(uridense)
file.path(tdir, "sparse")
urisparse <-
function(array_name) {
create_array <-# Check if the array already exists.
if (tiledb_object_type(array_name) == "ARRAY") {
message("Array already exists.")
return(invisible(NULL))
}
# The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4].
tiledb_domain(dims = c(tiledb_dim("rows", c(1L, 4L), 4L, "INT32"),
dom <-tiledb_dim("cols", c(1L, 4L), 4L, "INT32")))
# The array will be dense with a single attribute "a" so each (i,j) cell can store an integer.
tiledb_array_schema(dom, attrs=tiledb_attr("a", type = "INT32"), sparse = TRUE)
schema =
# Create the (empty) array on disk, and return the path invisibly.
invisible(tiledb_array_create(array_name, schema))
}
function(array_name) {
write_array <- c(1, 2, 2)
I <- c(1, 4, 3)
J <- c(1L, 2L, 3L)
data <-# Open the array and write to it.
tiledb_array(uri = array_name)
A <- data
A[I, J] <-
}
create_array(urisparse)
write_array(urisparse)
function(arr, txt) {
close_and_reopen <- tiledb:::libtiledb_array_close(arr@ptr)
res <- tiledb:::libtiledb_array_open_with_ptr(arr@ptr, txt)
res <- }
API Usage
Creating Arrays
Creating Dimensions
library(tiledb)
# Create dimension
tiledb_dim("dim1", c(1L, 4L), 2L, "INT32")
dim <-
# String dimenions: no values for domain and extent
tiledb_dim("dim2", NULL, NULL, "ASCII") strdim <-
Creating the Array Domain
library(tiledb)
# .. create dimensions `dim1`, `dim2`
tiledb_dim("dim1", c(1L, 4L), 2L, "INT32")
dim1 <- tiledb_dim("dim2", c(1L, 2L), 2L, "INT32")
dim2 <-
# Create domain with two dimensions
# In C++: domain.add_dimensions(dim1).add_dimension(dim2)
tiledb_domain(dims = c(dim1, dim2)) dom <-
Creating Attributes
# Create attribute
tiledb_attr("attr", type = "INT32")
attr <-
# Create attribute
tiledb_attr("a1", type = "INT32")
attr <-
# Access cell value via generic or functions
cell_val_num(attr)
## [1] 1
tiledb_attribute_get_cell_val_num(attr)
## [1] 1
## Attribute value counts can be set via a generic method and a direct method
cell_val_num(attr) <- 3
tiledb_attribute_set_cell_val_num(attr, 3)
## set char attribute to variable length which is encoded as a NA
cell_val_num(attr) <- NA
tiledb_attribute_set_cell_val_num(attr, NA)
Setting Fill Values
# ... create int attribute attr
tiledb_attr("a1", type = "INT32")
attr <-# set fill value to 42L
tiledb_attribute_set_fill_value(attr, 42L)
# ... create variable-sized attributte attr
tiledb_attr("attr", type = "CHAR")
attr <-tiledb_attribute_set_cell_val_num(attr, 3)
# set fill value to "..."
tiledb_attribute_set_fill_value(attr, "...")
Setting a Compressor
tiledb_filter("GZIP")
comp <-tiledb_filter_set_option(comp,"COMPRESSION_LEVEL", 10)
# Create a filter list with the compressor
tiledb_filter_list(comp)
filter_list <-
# Create attribute with the filter list
tiledb_attr("attr", "INT32", filter_list = filter_list) attr <-
Setting Other Filters
# Create filters
tiledb_filter("BIT_WIDTH_REDUCTION")
f1 <- tiledb_filter("ZSTD")
f2 <-
# Create a filter list with the two filters
tiledb_filter_list(c(f1,f2))
filter_list <-
# Create attribute with the filter list
tiledb_attr("attr", "INT32", filter_list = filter_list) attr <-
Creating the Array Schema
# ... create domain dom
tiledb_attr("attr1", "INT32", filter_list = filter_list)
attr1 <- tiledb_attr("attr2", "FLOAT64", filter_list = filter_list)
attr2 <-
# Create a dense array
tiledb_array_schema(dom, c(attr1, attr2), sparse = FALSE)
schema <-# Or, create a sparse array
# schema <- tiledb_array_schema(dom, c(attr1, attr2), sparse = TRUE)
Setting the Tile and Cell Order
# ... create domain dom
# ... create attributes attr1, attr2
# The tile and order can be "COL_MAJOR" or "ROW_MAJOR"
tiledb_array_schema(dom, c(attr1, attr2),
schema <-cell_order = "COL_MAJOR",
tile_order = "COL_MAJOR")
Setting the Data Tile Capacity
# set capacity
capacity(schema) <- 100000
tiledb_array_schema_set_capacity(schema, 10000)
# get capacity
capacity(schema)
## [1] 10000
tiledb_array_schema_get_capacity(schema)
## [1] 10000
Allowing Duplicates
schema(urisparse)
sch <-
# get 'duplicates allowed?' status
allows_dups(sch)
## [1] FALSE
tiledb_array_schema_get_allows_dups(sch)
## [1] FALSE
# set 'duplicates allowed?' status
allows_dups(sch) <- TRUE
tiledb_array_schema_set_allows_dups(sch, TRUE)
## <pointer: 0x55bf9b993940>
Checking Correctness
check(sch)
## [1] TRUE
tiledb_array_schema_check(sch)
## [1] TRUE
Setting Filters
Creating a Filter List
# create a "GZIP" compression filter
tiledb_filter("GZIP")
flt <-# set the option 'COMPRESSION_LEVEL' to 10
tiledb_filter_set_option(flt, "COMPRESSION_LEVEL", 10)
# create a filter list with this filter
tiledb_filter_list(flt) fltlst <-
Other Filters
# create a filter list object with both
tiledb_filter_list(c(flt1, flt2)) fltlst <-
Setting the Tile Chunk Size
# ... create filter list
set_max_chunk_size(filter_list, 10000)
tiledb_filter_list_set_max_chunk_size(filter_list, 10000)
max_chunk_size(filter_list)
## [1] 10000
tiledb_filter_list_get_max_chunk_size(filter_list)
## [1] 10000
Setting a Filter List for an Attribute
# create (or access) an attribute
tiledb_attr("a", "INT32")
attr <-
# create a filter list
tiledb_filter("BIT_WIDTH_REDUCTION")
flt1 <- tiledb_filter("ZSTD")
flt2 <- tiledb_filter_list(c(flt1, flt2))
fltlst <-
# set the filter list
filter_list(attr) <- fltlst
Setting a Filter List for a Dimension
tiledb_dim("d", c(1L, 10L), 1L, "INT32")
d <-
# create a filter list
tiledb_filter("BIT_WIDTH_REDUCTION")
flt1 <- tiledb_filter("ZSTD")
flt2 <- tiledb_filter_list(c(flt1, flt2))
fltlst <-
# assign the filter list
filter_list(d) <- fltlst
Setting a Filter List for All Dimensions
# ... create (or retrieve) array schema sch
# ... create filter list fl
# assign filter list to schema
tiledb_array_schema_set_coords_filter_list(sch, fl)
# Alternatively create the schema and set the coordinates filter list
tiledb_array_schema(dom, c(attr1, attr2), coords_filter_list = fl) sch <-
Setting a Filter List for Variable-Sized Value Offsets
# ... create (or retrieve) array schema sch
# ... create filter list fl
# assign filter list to schema
tiledb_array_schema_set_offsets_filter_list(sch, fl)
# Create the schema setting the offsets filter list
tiledb_array_schema(dom, c(attr1, attr2), offsets_filter_list = fl) sch <-
Setting Coordinate and Offset Filters
# ... create domain dom
# ... create attributes attr1, attr2
# ... create filter lists fl1, fl2, similar to attributes
tiledb_filter("BIT_WIDTH_REDUCTION")
f1 <- tiledb_filter("ZSTD")
f2 <- tiledb_filter_list(c(f1))
fl1 <- tiledb_filter_list(c(f2))
fl2 <-
# Create the schema setting the coordinates and offsets filter lists
tiledb_array_schema(dom, c(attr1, attr2),
schema <-coords_filter_list = fl1,
offsets_filter_list = fl2)
Creating the Array
# ... create array schema
# Create the array
tiledb_array_create(uridense, schema)
Creating Encrypted Arrays
# assume previously created schema 'sch'
# use encryption key
"0123456789abcdeF0123456789abcdeF"
encryption_key <-
# create encrypted array at 'uri' with schema 'sch'
tiledb_array_create(uridensewkey, sch, encryption_key)
## [1] "/tmp/RtmpEYNXef/denseenc"
Writing Arrays
Writing in Dense Subarrays
## prepare a larger 5 x 5 to embed into
tempfile()
tmp <- tiledb_dim("d1", domain = c(1L, 5L))
d1 <- tiledb_dim("d2", domain = c(1L, 5L))
d2 <- tiledb_domain(c(d1, d2))
dom <- tiledb_attr(name="val", type = "INT32")
val <- tiledb_array_schema(dom, c(val))
sch <-tiledb_array_create(tmp, sch)
matrix(as.integer(rnorm(25)*100), 5, 5)
dat <- tiledb_dense(tmp, as.data.frame=FALSE)
arr <- dat
arr[] <-
# Prepare a 2x3 dense array
# Contrary to Python, R by default stores arrays in col-major order
array(c(1L, 4L, 2L, 5L, 3L, 6L), dim=c(2,3))
data <-
# Prepare the [1,2] x [2,4] subarray to write to
c(1:2)
I <- c(2:4)
J <-
# Open the array and write the data to it
tiledb_dense(uri = tmp)
A <- data
A[I, J] <-
unlink(tmp, recursive=TRUE)
Basic Writing using Low-Level Code
tiledb_ctx()
ctx <- tiledb:::libtiledb_array_open(ctx@ptr, uridense, "WRITE")
arrptr <-
## data: simple (integer sequence) of 1:16 times 10
1:16 * 10L
vec <- c(1L,4L, 1L,4L)
subarr <-
tiledb:::libtiledb_query(ctx@ptr, arrptr, "WRITE")
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr, "COL_MAJOR")
qryptr <- tiledb:::libtiledb_query_set_buffer(qryptr, "a", vec)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
qryptr <- tiledb:::libtiledb_array_close(arrptr) res <-
Writing Sparse Cells
urisparse
tmp <-unlink(tmp, recursive=TRUE)
tiledb_dim("d1", domain = c(1L, 5L))
d1 <- tiledb_dim("d2", domain = c(1L, 5L))
d2 <- tiledb_domain(c(d1, d2))
dom <- tiledb_attr("val", type = "INT32")
val <- tiledb_array_schema(dom, val, sparse=TRUE)
sch <-tiledb_array_create(tmp, sch)
## [1] "/tmp/RtmpEYNXef/sparse"
# Prepare some data
c(3L, 4L, 1L, 2L)
data <-
c(3, 4, 1, 2)
I <- c(3, 4, 2, 1)
J <-
# Open the array and write the data to it
tiledb_array(uri = tmp)
A <- data A[I, J] <-
Writing Encrypted Arrays
# open for writing with corresponding encryption key
tiledb_array(uridensewkey, encryption_key = encryption_key)
A <-# access array as usual
Fixed-length Attributes
if (dir.exists(uridensefix)) unlink(uridensefix, recursive=TRUE)
tiledb_dim("d1", domain = c(1L, 4L))
d1 <- tiledb_dim("d2", domain = c(1L, 4L))
d2 <- tiledb_domain(c(d1, d2))
dom <-
1:32 * 10L
vec <- tiledb_attr("a", type = r_to_tiledb_type(vec))
attr <-
## set to two values per cell
:::libtiledb_attribute_set_cell_val_num(attr@ptr, 2)
tiledb tiledb_array_schema(dom, attr)
sch <-tiledb_array_create(uridensefix, sch)
## [1] "/tmp/RtmpEYNXef/densefix"
tiledb_ctx()
ctx <- tiledb:::libtiledb_array_open(ctx@ptr, uridensefix, "WRITE")
arrptr <- c(1L,4L, 1L,4L)
subarr <-
tiledb:::libtiledb_query(ctx@ptr, arrptr, "WRITE")
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr, "COL_MAJOR")
qryptr <- tiledb:::libtiledb_query_set_buffer(qryptr, "a", vec)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
qryptr <- tiledb:::libtiledb_array_close(arrptr)
res <-
#TODO Higher-level R support
Var-length Attributes
if (dir.exists(uridensevar)) unlink(uridensevar, recursive=TRUE)
## Define array
## The array will be 4x4 with dimensions "rows" and "cols", with domain [1,4].
tiledb_domain(dims = c(tiledb_dim("rows", c(1L, 4L), 4L, "INT32"),
dom <-tiledb_dim("cols", c(1L, 4L), 4L, "INT32")))
tiledb_attr("a1", type = "CHAR")
attr <-## set to variable length
:::libtiledb_attribute_set_cell_val_num(attr@ptr, NA)
tiledb
## now set the schema
tiledb_ctx()
ctx <- tiledb:::libtiledb_array_schema_create(ctx@ptr, "DENSE")
schptr <-:::libtiledb_array_schema_set_domain(schptr, dom@ptr)
tiledb:::libtiledb_array_schema_set_cell_order(schptr, "COL_MAJOR")
tiledb:::libtiledb_array_schema_set_tile_order(schptr, "COL_MAJOR")
tiledb:::libtiledb_array_schema_add_attribute(schptr, attr@ptr)
tiledb
## Create the (empty) array on disk.
:::libtiledb_array_create(uridensevar, schptr) tiledb
## [1] "/tmp/RtmpEYNXef/densevar"
"abbcccddeeefghhhijjjkklmnoop";
data <- c(0L, 1L, 3L, 6L, 8L, 11L, 12L, 13L, 16L, 17L, 20L, 22L, 23L, 24L, 25L, 27L)
offsets <-
tiledb_ctx()
ctx <- tiledb:::libtiledb_array_open(ctx@ptr, uridensevar, "WRITE")
arrptr <- tiledb:::libtiledb_query(ctx@ptr, arrptr, "WRITE")
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr, "COL_MAJOR")
qryptr <-
tiledb:::libtiledb_query_buffer_var_char_create(offsets, data)
bufptr <- tiledb:::libtiledb_query_set_buffer_var_char(qryptr, "a1", bufptr)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
qryptr <-:::libtiledb_array_close(arrptr) tiledb
## <pointer: 0x55bf9b7c9b00>
#TODO Higher-level R support
Writing at a Timestamp
# 'at' uses Sys.time() from R in seconds, and shifts back 10 minutes
Sys.time() - 10*60
at <-
# 'arr' is an already created array, could also be encrypted and carry key
tiledb_array_open_at(arr, "WRITE", Sys.time() - 600)
arr <-
# arr is now open for writing, any suitable content can be written the usual way
Getting the Fragment Info
# continuing from previous example on dense variable length array
# (but this works of course with any array after a write is needed
# Number of fragments
tiledb_query_get_fragment_num(qry)
numfrag <-
# URI of given fragment, with 0 <= idx < numfrag
tiledb_query_get_fragment_uri(qry, idx)
uri <-
# Timestamp range of given fragment, with 0 <= idx < numfrag
tiledb_query_get_fragment_timestamp_range(qry, idx) tsrange <-
Reading Arrays
Reading the Array Schema
Inspecting the array schema
# get a schema directly from storage, uri holds a valid array URI
"<array_uri>"
uri <- schema(uri)
sch <-
# get an encrypted scheme directory from storage, enc_key is the AES-256 key
schema(uri, enc_key)
sch <-
# get a schema from an already openened array
# using a sparse array example, works the same for dense arrays
urisparse
array_name <- tiledb_array(uri = array_name, is.sparse = TRUE)
A <- schema(A)
sch <-
# one can also open encrypted arrays with key for AES-256 encryption
# and all other options (for sparse arrays, data.frame objects...)
"0123456789abcdeF0123456789abcdeF"
key <- tiledb_array(uri = array_name, encryption_key = key)
A <- schema(A) sch <-
# Get array schema, this shows the sparse accessor
# and it is similar for tiledb_dense()
tiledb_array(uri = urisparse, is.sparse = TRUE)
A <- schema(A)
schema <-
# Get array type
is.sparse(schema)
sparse <-
# Get tile capacity
capacity(schema)
t_capacity <-
# Get tile order
tile_order(schema)
t_order <-
# Get cell order
cell_order(schema)
c_order <-
# Get coordinates and offset filter list
filter_list(schema)
reslist <-
# Get the array domain
domain(schema)
dom <-
# Get all attributes as list
attrs(schema)
attrs <-
# Check if given attribute exists
has_attribute(schema, "attr")
has_attr <-
# Get attribute from name
attrs(schema, "attr")
attr <-
# Dump the array schema in ASCII format in the selected output
show(schema)
Inspecting Domain
# ... get array schema
# ... get domain from schema
# Get the domain datatype (i.e., the datatype of all dimensions)
datatype(dom)
type <-
# Get number of dimensions
dim(dom)
dim_num <-
# Get all dimension
dimensions(dom)
dims <-
# Get dimension by index (0 <= i < dim_num)
tiledb_domain_get_dimension_from_index(dom, 1)
dim <-
# Get dimension by name
tiledb_domain_get_dimension_from_name(dom, "dimname")
dim <-
# Check dimension for name
tiledb_domain_has_dimension(dom, "dimname")
# Dump the domain in ASCII format in the selected output
show(dom)
Inspecting Dimensions
# ... get array schema
# ... get domain
# ... get dimension by index or name
# Get dimension name
name(dim)
dim_name <-
# Get dimension datatype
datatype(dim)
dim_type <-
# Get dimension domain
domain(dim)
domain <-
# Get tile extent
tile(dim)
tile_extent <-
# Dump the dimension in ASCII format in the selected output
show(dim)
Inspecting Attributes
# ... get array schema
# ... get attribute by index or name
# Get attribute name
name(attr)
attr_name <-
# Get attribute datatype
datatype(attr)
attr_type <-
# Get filter list
filter_list(attr)
filter_list <-
# Check if attribute is variable-length
tiledb_attribute_is_variable_sized(attr)
is_var <-
# Get number of values per cell
ncells(attr)
num <-
# Get cell size for this attribute
tiledb_attribute_get_cell_size(attr)
sz <-
# Get the fill value (for both fixed and variable sized attributes)
tiledb_attribute_get_fill_value(attr)
# Dump the attribute in ASCII format in the selected output
show(attr)
Inspecting Filters
# dim hold a previously created or load Dimension object
filter_list(dim)
fltrlst <-# or fltrlst <- filter_list(attr) for some attribute `attr`
# get number of filter
nfilters(fltrlst)
nb <-
# get max chunk size
max_chunk_size(fltrlst)
mxsz <-
# get filter by index from filter list (0 <= idx < num_filters)
i
idx <- fltrlst[idx]
fltr <-
# get option (that is filter-dependent) from filter
tiledb_filter_get_option(fltr, "COMPRESSION_LEVEL")
# set option (that is filter-dependent) for filter
tiledb_filter_set_option(fltr, "COMPRESSION_LEVEL", 9)
# get filter type
tiledb_filter_type(fltr)
Basic Reading
# Open a dense array
tiledb_array(uri = uridense)
A <-
# Or, open a sparse array
# A <- tiledb_sparse(uri = "<array-uri>", ctx=ctx)
# Slice only rows 1, 2 and cols 2, 3, 4
A[1:2, 2:4]
data <-show(data)
## rows cols a
## 1: 1 2 50
## 2: 1 3 90
## 3: 1 4 130
## 4: 2 2 60
## 5: 2 3 100
## 6: 2 4 140
Basic Reading using Low-Level Code
tiledb_ctx()
ctx <- tiledb:::libtiledb_array_open(ctx@ptr, uridense, "READ")
arrptr <-## subarray of rows 1,2 and cols 2,3,4
c(1L,2L, 2L,4L)
subarr <-
tiledb:::libtiledb_query(ctx@ptr, arrptr, "READ")
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr, "COL_MAJOR")
qryptr <- integer(6) # reserve space
v <- tiledb:::libtiledb_query_set_buffer(qryptr, "a", v)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
qryptr <-print(v) # unformed array, no coordinates
## [1] 50 60 90 100 130 140
tiledb:::libtiledb_array_close(arrptr) res <-
Variable-length Attributes
tiledb_ctx()
ctx <- tiledb:::libtiledb_array_open(ctx@ptr, uridensevar, "READ")
arrptr <-
c(1L,4L, 1L,4L)
subarr <- tiledb:::libtiledb_query_buffer_var_char_alloc(arrptr, subarr, "a1", 16, 100)
bufptr <-
tiledb:::libtiledb_query(ctx@ptr, arrptr, "READ")
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr, "COL_MAJOR")
qryptr <-
tiledb:::libtiledb_query_set_buffer_var_char(qryptr, "a1", bufptr)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
qryptr <-:::libtiledb_array_close(arrptr)
tiledb
tiledb:::libtiledb_query_get_buffer_var_char(bufptr)
mat <-print(mat, quote=FALSE)
Getting the Non-empty Domain
# example with one fixed- and one variable-sized domain
tiledb_domain(dims = c(tiledb_dim("d1", c(1L, 4L), 4L, "INT32"),
dom <-tiledb_dim("d2", NULL, NULL, "ASCII")))
# ... add attribute(s), write content, ...
# ... arr is the array opened
# retrieve non-empty domain for fixed-sized dimension
tiledb_array_get_non_empty_domain_from_index(arr, 1)
tiledb_array_get_non_empty_domain_from_name(arr, "d1")
# retrieve non-empty domain for variable-sized dimension
tiledb_array_get_non_empty_domain_from_index(arr, 2)
tiledb_array_get_non_empty_domain_from_name(arr, "d2")
Reopening Arrays
# Arrays are reopened automatically for you based on
# read or write being performed. For direct pointer-based
# access you can also explicitly reopen
@ptr <- tiledb:::libtiledb_array_reopen(arr@ptr) arr
Reading Encrypted Arrays
# Open the array and read as a data.frame from it.
tiledb_array(uri = array_name, as.data.frame=TRUE,
A <-encryption_key = encryption_key)
# Slice rows 1 and 2, and cols 2, 3 and 4
1:2, 2:4]
A[
# timestamps for TileDB are milliseconds since epoch, we use
# R Datime object to pass the value
as.POSIXct(1577955845.678, origin="1970-01-01")
tstamp <-
# open the array for reading at the timestamp
tiledb_array_open_at(A, "READ", tstamp) A <-
Multi-range Subarrays
# create query, allocate result buffer, ...
# add two query range on the first dimension
tiledb_query_add_range(qry, schema, "d1", 2L, 4L)
qry <- tiledb_query_add_range(qry, schema, "d1", 6L, 8L)
qry <-
# add a query range on the second dimension, using variable size
tiledb_query_add_range(qry, schema, "d2", "caaa", "gzzz")
qry <-
# number of ranges given index
tiledb_query_get_range_num(qry, idx)
num <-
# range start, end and stride for range i (1 <= i <= num)
tiledb_query_get_range(qry, idx, i)
rng <-
# range start and end for variable-sized dimension for range i (1 <= i <= num)
tiledb_query_get_range_var(qry, idx, i) strrng <-
Incomplete Queries
tiledb_ctx()
ctx <- tiledb:::libtiledb_array_open(ctx@ptr, uridense, "READ")
arrptr <- tiledb:::libtiledb_query(ctx@ptr, arrptr, "READ")
qryptr <- c(1L,4L, 1L,4L)
subarr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- integer(4) # reserve (insufficient) space
vec <- tiledb:::libtiledb_query_set_buffer(qryptr, "a", vec)
qryptr <- FALSE
finished <-while (!finished) {
tiledb:::libtiledb_query_submit(qryptr)
qryptr <-print(vec)
tiledb:::libtiledb_query_status(qryptr) == "COMPLETE"
finished <- }
## [1] 10 50 90 130
## [1] 20 60 100 140
## [1] 30 70 110 150
## [1] 40 80 120 160
tiledb:::libtiledb_array_close(arrptr) res <-
Result Estimation
# ...create query object
# estimated size of a fixed-length attribute in sparse array
tiledb_query_get_est_result_size(qry, "a")
sz <-
# estimated size of a variable-length attribute in dense or sparse array
tiledb_query_get_est_result_size_var(qry, "b") sz <-
Time Traveling
# time traveling is currently only accessible via the lower-level API
# we use the R Datetime type; internally TileDB uses milliseconds since epoch
Sys.time() - 60*60 # one hour ago
tstamp <-
tiledb_ctx()
ctx <- tiledb:::libtiledb_array_open_at(ctx@ptr, uridense, "READ", tstamp)
arrptr <- c(1L,2L, 2L,4L)
subarr <- tiledb:::libtiledb_query(ctx@ptr, arrptr, "READ")
qryptr <- tiledb:::libtiledb_query_set_subarray(qryptr, subarr)
qryptr <- tiledb:::libtiledb_query_set_layout(qryptr, "COL_MAJOR")
qryptr <- integer(6) # reserve space
a <- tiledb:::libtiledb_query_set_buffer(qryptr, "a", a)
qryptr <- tiledb:::libtiledb_query_submit(qryptr)
qryptr <- tiledb:::libtiledb_array_close(arrptr)
res <-
a
# we can do the same with encrypted arrays
"0123456789abcdeF0123456789abcdeF"
encryption_key <- tiledb:::libtiledb_array_open_at_with_key(ctx@ptr, uridensewkey, "READ",
arrptr <- encryption_key, tstamp)
Embedded SQL
Asynchronous Queries
# ... create read or write query
# Instead of using tiledb_query_submit(), use tiledb_query_submit_async()
# There is an alternate form with a callback function which is not yet supported
tiledb_query_submit_async(qry)
Configuration
Basic Usage
# Create a configuration object
tiledb_config()
config <-
# Set a configuration parameter
"sm.tile_cache_size"] <- "5000"
config[
# Get a configuration parameter
config["sm.tile_cache_size"]
tile_cache_size <-
# Unset a configuration parameter
tiledb_config_unset(config, "sm.tile_cache_size")
## <pointer: 0x55bf9ab34ef0>
Save and Load to File
# Save to file
tiledb_config()
config <-"sm.tile_cache_size"] <- 0;
config[ tempfile(pattern = "tiledb_config", fileext = ".txt")
file <-tiledb_config_save(config, file)
## [1] "/tmp/RtmpEYNXef/tiledb_confige46b641f93222.txt"
# Load from file
tiledb_config_load(file)
config_loaded <- config_loaded["sm.tile_cache_size"] tile_cache_size =
Configuration Iterator
# R has no native iterator but one loop over the config elements
# by retrieving the configuration as a vector
as.vector(tiledb_config())
cfg <-
# print all non-empty config elements
for (n in names(cfg))
if (cfg[n] != "")
cat(n, ":", cfg[n], "\n")
## config.env_var_prefix : TILEDB_
## config.logging_format : DEFAULT
## config.logging_level : 0
## rest.http_compressor : any
## rest.retry_count : 25
## rest.retry_delay_factor : 1.25
## rest.retry_http_codes : 503
## rest.retry_initial_delay_ms : 500
## rest.server_address : https://api.tiledb.com
## rest.server_serialization_format : CAPNP
## sm.check_coord_dups : true
## sm.check_coord_oob : true
## sm.check_global_order : true
## sm.compute_concurrency_level : 12
## sm.consolidation.amplification : 1.0
## sm.consolidation.buffer_size : 50000000
## sm.consolidation.mode : fragments
## sm.consolidation.step_max_frags : 4294967295
## sm.consolidation.step_min_frags : 4294967295
## sm.consolidation.step_size_ratio : 0.0
## sm.consolidation.steps : 4294967295
## sm.consolidation.timestamp_end : 18446744073709551615
## sm.consolidation.timestamp_start : 0
## sm.dedup_coords : false
## sm.enable_signal_handlers : true
## sm.encryption_key : 0
## sm.encryption_type : NO_ENCRYPTION
## sm.io_concurrency_level : 12
## sm.max_tile_overlap_size : 314572800
## sm.mem.malloc_trim : true
## sm.mem.reader.sparse_global_order.ratio_array_data : 0.1
## sm.mem.reader.sparse_global_order.ratio_coords : 0.5
## sm.mem.reader.sparse_global_order.ratio_query_condition : 0.25
## sm.mem.reader.sparse_global_order.ratio_rcs : 0.05
## sm.mem.reader.sparse_global_order.ratio_result_tiles : 0.05
## sm.mem.reader.sparse_global_order.ratio_tile_ranges : 0.1
## sm.mem.reader.sparse_unordered_with_dups.ratio_array_data : 0.1
## sm.mem.reader.sparse_unordered_with_dups.ratio_coords : 0.5
## sm.mem.reader.sparse_unordered_with_dups.ratio_query_condition : 0.25
## sm.mem.reader.sparse_unordered_with_dups.ratio_rcs : 0.05
## sm.mem.reader.sparse_unordered_with_dups.ratio_result_tiles : 0.05
## sm.mem.reader.sparse_unordered_with_dups.ratio_tile_ranges : 0.1
## sm.mem.total_budget : 10737418240
## sm.memory_budget : 5368709120
## sm.memory_budget_var : 10737418240
## sm.query.dense.reader : legacy
## sm.query.sparse_global_order.reader : legacy
## sm.query.sparse_unordered_with_dups.reader : refactored
## sm.read_range_oob : warn
## sm.skip_checksum_validation : false
## sm.skip_est_size_partitioning : false
## sm.tile_cache_size : 10000000
## sm.vacuum.mode : fragments
## sm.vacuum.timestamp_end : 18446744073709551615
## sm.vacuum.timestamp_start : 0
## sm.var_offsets.bitsize : 64
## sm.var_offsets.extra_element : false
## sm.var_offsets.mode : bytes
## vfs.azure.block_list_block_size : 5242880
## vfs.azure.max_parallel_ops : 12
## vfs.azure.use_block_list_upload : true
## vfs.azure.use_https : true
## vfs.file.enable_filelocks : true
## vfs.file.max_parallel_ops : 12
## vfs.file.posix_directory_permissions : 755
## vfs.file.posix_file_permissions : 644
## vfs.gcs.max_parallel_ops : 12
## vfs.gcs.multi_part_size : 5242880
## vfs.gcs.request_timeout_ms : 3000
## vfs.gcs.use_multi_part_upload : true
## vfs.min_batch_gap : 512000
## vfs.min_batch_size : 20971520
## vfs.min_parallel_size : 10485760
## vfs.read_ahead_cache_size : 10485760
## vfs.read_ahead_size : 102400
## vfs.s3.bucket_canned_acl : NOT_SET
## vfs.s3.connect_max_tries : 5
## vfs.s3.connect_scale_factor : 25
## vfs.s3.connect_timeout_ms : 10800
## vfs.s3.logging_level : Off
## vfs.s3.max_parallel_ops : 12
## vfs.s3.multipart_part_size : 5242880
## vfs.s3.object_canned_acl : NOT_SET
## vfs.s3.proxy_port : 0
## vfs.s3.proxy_scheme : http
## vfs.s3.region : us-east-1
## vfs.s3.request_timeout_ms : 3000
## vfs.s3.requester_pays : false
## vfs.s3.scheme : https
## vfs.s3.skip_init : false
## vfs.s3.use_multipart_upload : true
## vfs.s3.use_virtual_addressing : true
## vfs.s3.verify_ssl : true
Array Metadata
Writing Array Metadata
# 'array' can be a URI, or an array opened for writing
tiledb_put_metadata(array, "aaa", 100L)
tiledb_put_metadata(array, "bb", c(1.1, 2.2))
Reading Array Metadata
One can read by key:
# 'array' can be a URI, or an array opened for reading
tiledb_get_metadata(array, "aaa")
Or one can retrieve all metadata at once:
# 'array' can be a URI, or an array opened for reading
tiledb_get_all_metadata(array)
md <-
# prints all keys and (formatted) values
print(md)
Deleting Array Metadata
# 'array' can be a URI, or an array opened for writing
tiledb_delete_metadata(array, "aaa")
Consolidating And Vacuuming
Fragments
# An array URI
"<array_uri>"
uri <-
# Consolidate with default configuration
array_consolidate(uri)
# Alteratively, create and pass a configuration object
tiledb_config()
cfg <-"sm.consolidation.steps"] <- "3"
cfg["sm.consolidation.mode"] <- "fragments"
cfg[array_consolidate(uri, cfg)
Vacuuming
# An array URI
"<array_uri>"
uri <-
# Vacuum with default configuration
array_vacuum(uri)
# Alteratively, create and pass a configuration object
tiledb_config()
cfg <-"sm.vacuum.mode"] <- "fragments"
cfg[array_vacuum(uri, cfg)
Object Management
Creating TileDB Groups
tiledb_group_create("/tmp/my_group")
Getting the Object Type
tiledb_object_type("<path>") type <-
List the Object Hierarchy
# List arrays (defaults to default "PREORDER" traversal)
tiledb_object_ls(uri)
# Walk arrays (with "POSTORDER" traversal) returning a data.frame
tiledb_object_walk("<uri>", "POSTORDER")
res <-
# Show the content
print(res)
Move / Remove Object
tiledb_object_mv("/tmp/my_group", "/tmp/my_group_2")
tiledb_object_remove(ctx, "/tmp/my_group_2/dense_array")
Virtual Filesystem
Writing
# binary file to be written
tempfile(pattern = "tiledb_vfs", fileext = ".bin")
uri <-# open file
tiledb_vfs_open(uri, "WRITE")
fhbuf <-
# create a binary payload from a serialized R object
as.integer(serialize(list(dbl=153, string="abcde"), NULL))
payload <-# write it and close file
tiledb_vfs_write(fhbuf, payload)
tiledb_vfs_close(fhbuf)
# write again overwriting previous write
tiledb_vfs_open(uri, "WRITE")
fhbuf <- as.integer(serialize(list(dbl=153.1, string="abcdef"), NULL))
payload <-tiledb_vfs_write(fhbuf, payload)
tiledb_vfs_close(fhbuf)
# append to existing file
tiledb_vfs_open(uri, "APPEND")
fhbuf <- as.integer(serialize(c(string="ghijkl"), NULL))
payload <-tiledb_vfs_write(fhbuf, payload)
tiledb_vfs_close(fhbuf)
Reading
# open a binary file for reading
tiledb_vfs_open(uri, "READ")
fhbuf <- tiledb_vfs_read(fhbuf, as.integer64(0), as.integer64(488))
vec <-tiledb_vfs_close(fhbuf)
Managing
# Creating a directory
if (!tiledb_vfs_is_dir("dir_A")) {
tiledb_vfs_create_dir("dir_A")
cat("Created 'dir_A'\n")
else {
} cat("'dir_A' already exists\n")
}
# Creating an (empty) file
if (!tiledb_vfs_is_file("dir_A/file_A")) {
tiledb_vfs_touch("dir_A/file_A")
cat("Created empty file 'dir_A/file_A'\n")
else {
} cat("File 'dir_A/file_A' already existed\n")
}
# Getting the file size
cat("Size of file 'dir_A/file_A': ",
tiledb_vfs_file_size("dir_A/file_A"), "\n")
# Moving files (moving directories is similar)
tiledb_vfs_move_file("dir_A/file_A", "dir_A/file_B")
# Cleaning up
tiledb_vfs_remove_file("dir_A/file_B")
tiledb_vfs_remove_dir("dir_A")
S3
tiledb_vfs_create_bucket("s3://my_bucket")
tiledb_vfs_remove_bucket("s3://my_bucket")
Configuring VFS
tiledb_get_context()
ctx <-
tiledb_config()
config <-"vfs.file.max_parallel_ops"] <- 16
config[
tiledb_vfs(config, ctx)
vfs <-
# Or create the Config first and pass to the Ctx constructor
Using Performance Statistics
# Start collecting statistics
tiledb_stats_enable()
# ... create some query here
A[1:4]
res <-
# Stop collecting statistics
tiledb_stats_disable()
# Show the statistics on the console
tiledb_stats_print()
# Save the statistics to a file
tiledb_stats_dump("my_file_name")
# You can also reset the stats as follows
tiledb_stats_reset()
Catching Errors
tryCatch({
result <-# Create a group. The code below creates a group `my_group` and prints a
# message because (normally) it will succeed.
tiledb_group_create("/tmp/my_group")
# Create the same group again. If we attempt to create the same group
# `my_group` as shown below, TileDB will return an error.
tiledb_group_create("/tmp.my_group")
warning = function(w) {
}, cat(w)
error = function(e) {
}, cat(e)
finally = {}
}, )