olink_normalization_n {OlinkAnalyze}R Documentation

Bridge and/or subset normalization of all proteins among multiple NPX projects.

Description

This function normalizes pairs of NPX projects (data frames) using shared samples or subsets of samples.

Usage

olink_normalization_n(norm_schema)

Arguments

norm_schema

A tibble with more than 1 rows and (strictly) the following columns: "order", "name", "data", "samples", "normalization_type", "normalize_to". See "Details" for the structure of the data frame (required)

Details

This function is a wrapper of olink_normalization_bridge and olink_normalization_subset.

The input of this function is a tibble that contains all the necessary information to normalize multiple NPX projects. This tibble is called the normalization schema. The basic idea is that every row of the data frame is a separate project to be normalized. We assume that there is always one baseline project that does not normalize to any other. All other project normalize to one or more projects. The function handles projects that are normalized in a chain, for example:

The function can also handle a mixed schema of bridge and subset normalization.

Specifications of the normalization schema data frame:

Value

A "tibble" of NPX data in long format containing normalized NPX values, including adjustment factors and name of project.

Examples


#### Bridge normalization of two projects

# prepare datasets
npx_df1 <- npx_data1 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")
npx_df2 <- npx_data2 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")

# Find overlapping samples, but exclude Olink control
overlap_samples <- dplyr::intersect(unique(npx_df1$SampleID),
                                    unique(npx_df2$SampleID))
overlap_samples_list <- list("DF1" = overlap_samples,
                             "DF2" = overlap_samples)

# create tibble for input
norm_schema_bridge <- dplyr::tibble(
  order              = c(1, 2),
  name               = c("NPX_DF1", "NPX_DF2"),
  data               = list("NPX_DF1" = npx_df1,
                            "NPX_DF2" = npx_df2),
  samples            = list("NPX_DF1" = NA_character_,
                            "NPX_DF2" = overlap_samples_list),
  normalization_type = c(NA_character_, "Bridge"),
  normalize_to       = c(NA_character_, "1")
)

# normalize
olink_normalization_n(norm_schema = norm_schema_bridge)

#### Subset normalization of two projects

# datasets
npx_df1 <- npx_data1 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")
npx_df2 <- npx_data2 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")

# Find a suitable subset of samples from both projects, but exclude Olink
# controls and samples that fail QC.
df1_samples <- npx_df1 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::group_by(SampleID) |>
  dplyr::filter(all(QC_Warning == 'Pass')) |>
  dplyr::pull(SampleID) |>
  unique() |>
  sample(size = 16, replace = FALSE)
df2_samples <- npx_df2 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::group_by(SampleID) |>
  dplyr::filter(all(QC_Warning == 'Pass')) |>
  dplyr::pull(SampleID) |>
  unique() |>
  sample(size = 16, replace = FALSE)

# create named list
subset_samples_list <- list("DF1" = df1_samples,
                            "DF2" = df2_samples)

# create tibble for input
norm_schema_subset <- dplyr::tibble(
  order              = c(1, 2),
  name               = c("NPX_DF1", "NPX_DF2"),
  data               = list("NPX_DF1" = npx_df1,
                            "NPX_DF2" = npx_df2),
  samples            = list("NPX_DF1" = NA_character_,
                            "NPX_DF2" = subset_samples_list),
  normalization_type = c(NA_character_, "Subset"),
  normalize_to       = c(NA_character_, "1")
)

# Normalize
olink_normalization_n(norm_schema = norm_schema_subset)

#### Subset normalization  of two projects using all samples

# datasets
npx_df1 <- npx_data1 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")
npx_df2 <- npx_data2 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")

# Find a suitable subset of samples from both projects, but exclude Olink
# controls and samples that fail QC.
df1_samples_all <- npx_df1 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::group_by(SampleID) |>
  dplyr::filter(all(QC_Warning == 'Pass')) |>
  dplyr::pull(SampleID) |>
  unique()
df2_samples_all <- npx_df2 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::group_by(SampleID) |>
  dplyr::filter(all(QC_Warning == 'Pass')) |>
  dplyr::pull(SampleID) |>
  unique()

# create named list
subset_samples_all_list <- list("DF1" = df1_samples_all,
                                "DF2" = df2_samples_all)

# create tibble for input
norm_schema_subset_all <- dplyr::tibble(
  order              = c(1, 2),
  name               = c("NPX_DF1", "NPX_DF2"),
  data               = list("NPX_DF1" = npx_df1,
                            "NPX_DF2" = npx_df2),
  samples            = list("NPX_DF1" = NA_character_,
                            "NPX_DF2" = subset_samples_all_list),
 normalization_type = c(NA_character_, "Subset"),
 normalize_to       = c(NA_character_, "1")
)

# Normalize
olink_normalization_n(norm_schema = norm_schema_subset_all)

#### Multi-project normalization using bridge and subset samples

## NPX data frames to bridge
npx_df1 <- npx_data1 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")

npx_df2 <- npx_data2 |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")

# manipulating the sample NPX datasets to create another two random ones
npx_df3 <- npx_data2 |>
  dplyr::mutate(SampleID = paste(SampleID, "_mod", sep = ""),
                PlateID = paste(PlateID, "_mod", sep = ""),
                NPX = sample(x = NPX, size = dplyr::n(), replace = FALSE)) |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")

npx_df4 <- npx_data1 |>
  dplyr::mutate(SampleID = paste(SampleID, "_mod2", sep = ""),
                PlateID = paste(PlateID, "_mod2", sep = ""),
                NPX = sample(x = NPX, size = dplyr::n(), replace = FALSE)) |>
  dplyr::filter(!stringr::str_detect(SampleID, "CONTROL_")) |>
  dplyr::select(-Project) |>
  dplyr::mutate(Normalization = "Intensity")

## samples to use for normalization
# Bridge samples with same identifiers between npx_df1 and npx_df2
overlap_samples <- dplyr::intersect(unique(npx_df1$SampleID),
                                    unique(npx_df2$SampleID))
overlap_samples_df1_df2 <- list("DF1" = overlap_samples,
                                "DF2" = overlap_samples)
rm(overlap_samples)

# Bridge samples with different identifiers between npx_df2 and npx_df3
overlap_samples_df2_df3 <- list("DF1" = sample(x = unique(npx_df2$SampleID),
                                               size = 10,
                                               replace = FALSE),
                                "DF2" = sample(x = unique(npx_df3$SampleID),
                                               size = 10,
                                               replace = FALSE))

# Samples to use for intensity normalization between npx_df4 and the
# normalized dataset of npx_df1 and npx_df2
overlap_samples_df12_df4 <- list("DF1" = sample(x = c(unique(npx_df1$SampleID),
                                                      unique(npx_df2$SampleID)),
                                                size = 100,
                                                replace = FALSE),
                                 "DF2" = sample(x = unique(npx_df4$SampleID),
                                                size = 40,
                                                replace = FALSE))

# create tibble for input
norm_schema_n <- dplyr::tibble(
  order              = c(1, 2, 3, 4),
  name               = c("NPX_DF1", "NPX_DF2", "NPX_DF3", "NPX_DF4"),
  data               = list("NPX_DF1" = npx_df1,
                            "NPX_DF2" = npx_df2,
                            "NPX_DF3" = npx_df3,
                            "NPX_DF4" = npx_df4),
  samples            = list("NPX_DF1" = NA_character_,
                            "NPX_DF2" = overlap_samples_df1_df2,
                            "NPX_DF3" = overlap_samples_df2_df3,
                            "NPX_DF4" = overlap_samples_df12_df4),
  normalization_type = c(NA_character_, "Bridge", "Bridge", "Subset"),
  normalize_to       = c(NA_character_, "1", "2", "1,2")
)

olink_normalization_n(norm_schema = norm_schema_n)




[Package OlinkAnalyze version 3.8.2 Index]