join_matching_standards.Rd
Find all entries in a standard dataset which match features in a query dastaset based on the consistency of 1 or more variables.
join_matching_standards(
query_dataset,
standard_dataset,
variable_tolerances,
distance_method = "manhattan",
threshold = nrow(variable_tolerances)
)
containing candidate features with 1+ match variables.
containing standards with 1+ variables which match query_dataset
.
a three column tibble containing "variable", "tolerance" and "relative_or_absolute"
distance measure between scaled variables (x/tol): e.g., manhattan, euclidean
distance cutoff to report a match
an inner_join tibble of the query_dataset and standard_dataset
Distance is calculated on variables that have been rescaled to their tolerances (variable/tolerance) so that deviations in each dimension are equivalent.
library(dplyr)
query_dataset <- tibble::tibble(query_id = 1:3, amu = 1:3)
standard_dataset <- clamr::isotope_summaries %>% dplyr::select(z, label, amu)
variable_tolerances <- tibble::tibble(variable = "amu", tolerance = 0.01, relative_or_absolute = "absolute")
join_matching_standards(query_dataset, standard_dataset, variable_tolerances, distance_method = "manhattan", threshold = 1)
#> Error in loadNamespace(x): there is no package called ‘fuzzyjoin’
query_dataset <- tibble::tibble(query_id = 1:3, amu = 1:3, rt = c(10, 15, 3))
standard_dataset <- clamr::isotope_summaries %>%
dplyr::select(z, label, amu) %>%
dplyr::mutate(rt = 1:n())
variable_tolerances <- tibble::tribble(
~variable, ~tolerance, ~relative_or_absolute,
"amu", 0.01, "absolute",
"rt", 5, "absolute"
)
join_matching_standards(query_dataset, standard_dataset, variable_tolerances, distance_method = "manhattan", threshold = 4)
#> Error in loadNamespace(x): there is no package called ‘fuzzyjoin’
# matching with ppm
query_dataset <- tibble::tibble(query_id = 1:4, mz = c(1, 10, 100, 1000))
standard_dataset <- tibble::tibble(library_id = 1:4, mz = c(1.001, 10.001, 100.001, 1000.001))
variable_tolerances <- tibble::tibble(variable = "mz", tolerance = 10e-6, relative_or_absolute = "relative")
join_matching_standards(query_dataset, standard_dataset, variable_tolerances, distance_method = "manhattan", threshold = 10)
#> Error in loadNamespace(x): there is no package called ‘fuzzyjoin’
query_dataset <- tibble::tibble(query_id = 1:4, mz = c(1, 10, 100, 1000), rt = 1:4)
standard_dataset <- tibble::tibble(library_id = 1:4, mz = c(1.001, 10.001, 100.001, 1000.001), rt = 1:4 + 0.5)
variable_tolerances <- tibble::tribble(
~variable, ~tolerance, ~relative_or_absolute,
"mz", 10e-6, "relative",
"rt", 1, "absolute"
)
join_matching_standards(query_dataset, standard_dataset, variable_tolerances, distance_method = "manhattan", threshold = 10)
#> Error in loadNamespace(x): there is no package called ‘fuzzyjoin’