跳转至

Mutate

The key functions for this are mutate (dplyr-compatible) / mutate_immundata and the functions from the downstream analysis tools. Mutations change one or several columns, or add new columns. This is what is called with_columns in polars.

Run this code before running examples below:

library(immundata)

inp_files <- paste0(system.file("extdata/single_cell", "", package = "immundata"), "/*.csv.gz")
md_file <- system.file("extdata/single_cell", "metadata.tsv", package = "immundata")
md_table <- read_metadata(md_file)
cells_file <- system.file("extdata/single_cell", "cells.tsv.gz", package = "immundata")
cells <- readr::read_tsv(cells_file)

schema <- make_receptor_schema(features = c("cdr3", "v_call"), chains = c("TRB"))

idata <- read_repertoires(
    path              = inp_files, 
    schema            = schema, 
    metadata.         = md_table, 
    barcode_col       = "barcode", 
    locus_col         = "locus", 
    umi_col           = "umis", 
    preprocess        = make_default_preprocessing("10x"), 
    repertoire_schema = "Tissue")

Add or transform one or several annotation columns

idata |> mutate(new_column = "value")

idata |> mutate(big_chains = umis >= 10)

# You can use duckdb functions via `dd$<function>`
idata |> mutate(dist_to_pattern = dd$levenshtein(cdr3, "CASSSVSGNSPLHF"))

Add columns with sequence distance to patterns

patterns <- c("CASSVHPQYF", "CAWSGQGWGGSTDTQYF", "CASSPRPGSTGELFF")
idata |> mutate(seq_options = make_seq_options(query_col = "cdr3", patterns = patterns, method = "lev"))

idata |> mutate(seq_options = make_seq_options(query_col = "cdr3", patterns = patterns, method = "lev", name_type = "pattern"))

Modify a subset of column values

idata |> mutate(found_pattern = if_else(cdr3 == "CASSVHPQYF", 1, 0))