Create Tidy Omic — create_tidy

A tidy omics object contains a formatted dataset and a summary of the experimental design.

create_tidy_omic(
  df,
  feature_pk,
  feature_vars = NULL,
  sample_pk,
  sample_vars = NULL,
  omic_type_tag = "general",
  verbose = TRUE
)

Arguments

df: a data.frame (or tibble) containing some combination of feature, sample and observation-level variables
feature_pk: A unique identifier for features
feature_vars: a character vector of additional feature-level variables (or NULL if there are no additional variables)
sample_pk: A unique identifier for samples
sample_vars: a character vector of additional sample-level variables (or NULL if there are no additional variables)
omic_type_tag: an optional subtype of omic data: metabolomics, lipidomics, proteomics, genomics, general
verbose: extra reporting messages

Value

An S3 tidy_omic/tomic object built on a list:

data

A tibble with one row per measurement (i.e., features x samples)

design

A list which organized the dataset's meta-data:

feature_pk: variable specifying a unique feature
sample_pk: variable specifying a unique sample
features: tibble of feature attributes
samples: tibble of sample attributes
measurements: tibble of measurement attributes

Examples


library(dplyr)

measurement_df <- tidyr::expand_grid(
  feature_id = 1:10,
  sample_id = LETTERS[1:5]
) %>%
  dplyr::mutate(value = rnorm(n()))

feature_df <- tibble(
  feature_id = 1:10,
  feature_group = rep(c("a", "b"), each = 5)
)
sample_df <- tibble(
  sample_id = LETTERS[1:5],
  sample_group = c("a", "a", "b", "b", "b")
)

triple_omic <- create_triple_omic(
  measurement_df, feature_df, sample_df,
  "feature_id", "sample_id"
)
raw_tidy_omic <- triple_to_tidy(triple_omic)$data

create_tidy_omic(raw_tidy_omic,
  feature_pk = "feature_id",
  feature_vars = "feature_group", sample_pk = "sample_id",
  sample_vars = "sample_group"
)
#> 1 measurement variables were defined as the
#> left overs from the specified feature and sample varaibles:
#> value
#> $data
#> # A tibble: 50 × 5
#>    feature_id feature_group sample_id sample_group   value
#>         <int> <chr>         <chr>     <chr>          <dbl>
#>  1          1 a             A         a             1.34  
#>  2          1 a             B         a             1.02  
#>  3          1 a             C         b             2.04  
#>  4          1 a             D         b            -0.156 
#>  5          1 a             E         b             0.702 
#>  6          2 a             A         a            -1.28  
#>  7          2 a             B         a            -0.0231
#>  8          2 a             C         b             0.343 
#>  9          2 a             D         b             0.757 
#> 10          2 a             E         b             0.910 
#> # ℹ 40 more rows
#> 
#> $design
#> $design$features
#> # A tibble: 2 × 2
#>   variable      type               
#>   <chr>         <chr>              
#> 1 feature_id    feature_primary_key
#> 2 feature_group character          
#> 
#> $design$samples
#> # A tibble: 2 × 2
#>   variable     type              
#>   <chr>        <chr>             
#> 1 sample_id    sample_primary_key
#> 2 sample_group character         
#> 
#> $design$measurements
#> # A tibble: 3 × 2
#>   variable   type               
#>   <chr>      <chr>              
#> 1 feature_id feature_primary_key
#> 2 sample_id  sample_primary_key 
#> 3 value      numeric            
#> 
#> $design$feature_pk
#> [1] "feature_id"
#> 
#> $design$sample_pk
#> [1] "sample_id"
#> 
#> 
#> attr(,"class")
#> [1] "tidy_omic" "tomic"     "general"