Seurat workflow

Introduction

Please see the Seurat workflow for a more in-depth workflow.

https://satijalab.org/seurat/v3.1/pbmc3k_tutorial.html

This workflow uses a dataset of Peripheral Blood Mononuclear Cells (PBMC) freely available from 10X Genomics. There are 2,700 single cells that were sequenced on the Illumina NextSeq 500.

download data from https://s3-us-west-2.amazonaws.com/10x.files/samples/cell/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz

library(plyr); library(dplyr)
library(Seurat)
library(patchwork)
library(SCHNAPPs)
library(SingleCellExperiment)
# Load the pbm dataset
pbm.data <- Read10X(data.dir = "~/Downloads/filtered_gene_bc_matrices/hg19/")
# Initialize the Seurat object with the raw (non-normalized data).
pbm <- CreateSeuratObject(counts = pbm.data, project = "pbm3k", min.cells = 3, min.features = 200)

pbm[["percent.mt"]] <- PercentageFeatureSet(pbm, pattern = "^MT-")

## in case of human samples:
# pbm <- CellCycleScoring(
# pbm,
#   g2m.features = cc.genes$g2m.genes,
#   s.features = cc.genes$s.genes
# )



scEx = as.SingleCellExperiment(pbm)

colnames(colData(scEx)) = c("sampleNames",   "nCount_RNA",   "nFeature_RNA", "percent.mt" , "ident"       )
colData(scEx)$barcode = rownames(colData(scEx))

rowData(scEx)$Description = ""
rowData(scEx)$id = rownames(rowData(scEx))
rowData(scEx)$symbol = rownames(rowData(scEx))

pbm

## An object of class Seurat 
## 13714 features across 2700 samples within 1 assay 
## Active assay: RNA (13714 features, 0 variable features)

pbm <- subset(pbm, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbm <- pbm[-which(rowSums(pbm)==0),]
pbm <- NormalizeData(pbm)

pbm <-FindVariableFeatures(pbm, selection.method = "vst", nfeatures = 2000)

top10 <- head(VariableFeatures(pbm), 10)
all.VariableFeatures = VariableFeatures(pbm)
all.genes <- rownames(pbm)
# Why do they scale on all features???
set.seed(1)
pbm <- ScaleData(pbm, features = all.VariableFeatures)

pbm <- RunPCA(pbm, features = all.VariableFeatures, rank = 50)
scranPCA = BiocSingular::runPCA(t(as.matrix((Seurat::Assays(pbm,slot = "RNA")@scale.data))[all.VariableFeatures,]) , rank = 50)

pbm <- FindNeighbors(pbm, dims = 1:10)
pbm <- FindClusters(pbm, resolution = 0.5, verbose = FALSE)

pbm <- RunUMAP(pbm, dims = 1:10, verbose = FALSE)

colData(scEx)$seurartCluster = -1
colData(scEx)[names(Idents(pbm)),"seurartCluster"] = Idents(pbm)
colData(scEx)$seurartCluster = as.factor(colData(scEx)$seurartCluster)

comparing BiocSingular and Seurat PCA calculations:

Seurat
BiocSingular

pbm

## An object of class Seurat 
## 13713 features across 2638 samples within 1 assay 
## Active assay: RNA (13713 features, 2000 variable features)
##  2 dimensional reductions calculated: pca, umap

# Assays(pbm)

We have now created a SingleCellExperiment object with all information from Seurat. We will now try to recreate these results with SCHNAPPs:

We have to save the object in a file that can be opened with the “load” command.

save(file = "seurat.pbm.RData", list = c("scEx"))

To reproduce the results the following parameters have to be set in SCHNAPPs:

Cell selection: ** Min # of UMIs = 1

Cell selection parameters

Just to be complete (we will do this later) here is the list of cells that has to be removed:

AAAGATCTGGGCAA-1, AAAGCAGAAGCCAT-1, AACGCCCTGCTTAG-1, AAGGTCTGGTATGC-1, AATGTAACGTTTGG-1, AATTACGAGTAGCT-1, ACACAGACACCTGA-1, ACATGGTGCGTTGA-1, ACCTGGCTGTCTTT-1, ACTTAAGACCACAA-1, ACTTGTACCCGAAT-1, ACTTTGTGCGATAC-1, AGAGGTCTACAGCT-1, ATCACGGATTGCTT-1, ATTACCTGGGCATT-1, CACGCTACTTGACG-1, CAGTGTGAACACGT-1, CCAATGGAACAGCT-1, CCAGTCTGCGGAGA-1, CGACCTTGGCAAGG-1, CGAGCCGACGACAT-1, CGGAATTGCACTAG-1, CGTAACGAATCAGC-1, CGTACCACGCTACA-1, CGTACCTGGACGAG-1, CTAGTTTGAGTACC-1, CTCAGCTGTTTCTG-1, CTCATTGATTGCTT-1, CTGGCACTGGACAG-1, CTTAACACGAGCTT-1, CTTAAGCTTCCTCG-1, GAAAGATGTTTGCT-1, GAACGTTGACGGAG-1, GAATGGCTAAGATG-1, GACCATGACTCTCG-1, GACTGAACAACCGT-1, GCCACTACCTACTT-1, GCGAAGGAGAGCTT-1, GCTACAGATCTTAC-1, GGCACGTGTGAGAA-1, GTCAACGATCAGGT-1, GTGAACACAGATCC-1, GTGTCAGAATGCTG-1, GTTAAAACTTCGCC-1, TAAGATACCCACAA-1, TACGCAGACGTCTC-1, TACGCGCTCTTCTA-1, TACGGCCTGTCCTC-1, TATCACTGACTGTG-1, TCCCGATGCTGTGA-1, TCGCACACCATCAG-1, TCGTGAGAACTGTG-1, TGAAGCTGAGACTC-1, TGAGACACTGTGCA-1, TGAGCTGAGCGAGA-1, TGGAGACTGAAACA-1, TGGATGTGATGTCG-1, TGGCAATGGAGGGT-1, TGGTCAGACCGTTC-1, TGTTAAGATTGGCA-1, TTACTCGAACGTTG-1, TTCAAGCTTCCAAG-1