Publication: Routy et al. (2023). Fecal microbiota transplantation plus anti-PD-1 immunotherapy in advanced melanoma: a phase I trial. Nature Medicine 29, 2121–2132. https://doi.org/10.1038/s41591-023-02453-x
The R code provided below was used to generate the following figures:
library(knitr)
library(dplyr)
library(sjmisc)
library(phyloseq)
library(microbiome)
library(stringr)
library(rstatix)
library(ggplot2)
library(ggrepel)
library(vegan)
library(ape)
library(cowplot)
library(reshape2)
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_Canada.1252 LC_CTYPE=English_Canada.1252
## [3] LC_MONETARY=English_Canada.1252 LC_NUMERIC=C
## [5] LC_TIME=English_Canada.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] reshape2_1.4.4 cowplot_1.1.1 ape_5.5 vegan_2.6-2
## [5] lattice_0.20-44 permute_0.9-5 ggrepel_0.9.3 rstatix_0.7.0
## [9] stringr_1.4.0 microbiome_1.14.0 ggplot2_3.3.5 phyloseq_1.36.0
## [13] sjmisc_2.8.9 dplyr_1.0.7 knitr_1.44
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-152 bitops_1.0-7 insight_0.17.1
## [4] GenomeInfoDb_1.28.1 tools_4.1.0 backports_1.2.1
## [7] bslib_0.3.1 utf8_1.2.1 R6_2.5.0
## [10] sjlabelled_1.1.8 DBI_1.1.1 BiocGenerics_0.38.0
## [13] mgcv_1.8-35 colorspace_2.0-2 rhdf5filters_1.4.0
## [16] ade4_1.7-17 withr_2.4.2 tidyselect_1.1.1
## [19] curl_4.3.2 compiler_4.1.0 Biobase_2.52.0
## [22] sass_0.4.0 scales_1.1.1 digest_0.6.27
## [25] foreign_0.8-81 rmarkdown_2.23 rio_0.5.27
## [28] XVector_0.32.0 pkgconfig_2.0.3 htmltools_0.5.2
## [31] fastmap_1.1.0 readxl_1.3.1 rlang_0.4.11
## [34] rstudioapi_0.13 jquerylib_0.1.4 generics_0.1.0
## [37] jsonlite_1.7.2 zip_2.2.0 car_3.0-11
## [40] RCurl_1.98-1.3 magrittr_2.0.1 GenomeInfoDbData_1.2.6
## [43] biomformat_1.20.0 Matrix_1.3-3 Rcpp_1.0.7
## [46] munsell_0.5.0 S4Vectors_0.30.0 Rhdf5lib_1.14.2
## [49] fansi_0.5.0 abind_1.4-5 lifecycle_1.0.1
## [52] stringi_1.6.2 yaml_2.2.1 carData_3.0-4
## [55] MASS_7.3-54 zlibbioc_1.38.0 rhdf5_2.36.0
## [58] Rtsne_0.15 plyr_1.8.6 grid_4.1.0
## [61] parallel_4.1.0 forcats_0.5.1 crayon_1.4.1
## [64] Biostrings_2.60.2 haven_2.4.1 splines_4.1.0
## [67] multtest_2.48.0 hms_1.1.0 pillar_1.7.0
## [70] igraph_1.2.6 codetools_0.2-18 stats4_4.1.0
## [73] glue_1.4.2 evaluate_0.21 data.table_1.14.0
## [76] vctrs_0.3.8 foreach_1.5.1 cellranger_1.1.0
## [79] gtable_0.3.0 purrr_0.3.4 tidyr_1.1.3
## [82] assertthat_0.2.1 openxlsx_4.2.4 xfun_0.39
## [85] broom_0.7.9 survival_3.2-11 tibble_3.1.2
## [88] iterators_1.0.13 IRanges_2.26.0 cluster_2.1.2
## [91] ellipsis_0.3.2
#Download files from github repository if needed
#download.file("https://raw.githubusercontent.com/bdaisley/MIMic-trial/main/data/MIMic_16S_asv_table.txt", "~/GITHUB/MIMic-trial/data/MIMic_16S_asv_table.txt", mode='wb', overwrite=TRUE)
#download.file("https://raw.githubusercontent.com/bdaisley/MIMic-trial/main/data/MIMic_metadata.txt", "~/GITHUB/MIMic-trial/data/MIMic_metadata.txt", mode='wb', overwrite=TRUE)
setwd("~/GITHUB/MIMic-trial")
d.b <- read.table("data/MIMic_16S_asv_table.txt",
sep="\t",
quote="",
check.names=F,
header=T,
row.names=1,
comment.char="")
rownames(d.b) <- paste(rownames(d.b), d.b$taxonomy, sep=";")
d.b.in <- d.b %>%
select(-taxonomy) %>%
sjmisc::rotate_df() %>%
select_if(colSums(.) != 0) %>%
sjmisc::rotate_df()
metadata<- read.table("data/MIMic_metadata.txt", header=T, sep='\t', comment.char = "") %>%
mutate(timepoint = case_when(
grepl("D1_|D2_|D5_", .$X.SampleID) ~ "0",
grepl("_S1", .$X.SampleID) ~ "1",
grepl("_S2", .$X.SampleID) ~ "2",
grepl("_S3", .$X.SampleID) ~ "3",
grepl("_S4", .$X.SampleID) ~ "4")) %>%
mutate(groups = case_when(
is.na(.$ORR) & grepl("D1_", .$X.SampleID) ~ "D1",
is.na(.$ORR) & grepl("D2_", .$X.SampleID) ~ "D2",
is.na(.$ORR) & grepl("D5_", .$X.SampleID) ~ "D5",
.$ORR == "NR" & !grepl("^D", .$X.SampleID) ~ "NR",
.$ORR == "R" & !grepl("^D", .$X.SampleID) ~ "R")) %>%
mutate(category = case_when(
grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "patient",
!grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "donor"))
#Assign donor-patient matching
donor.vector <- setNames((metadata %>%
filter(grepl("^D", .$description)) %>%
mutate(vec1= str_split_fixed(.$description, "_", 4)[,1]))$vec1,
(metadata %>%
filter(grepl("^D", .$description)) %>%
mutate(vec2= str_split_fixed(.$description, "_", 4)[,4]) %>%
mutate(vec2= gsub("P0","P", .$vec2)))$vec2)
metadata <- metadata %>% mutate(FMT_group= dplyr::recode(host_subject_id, !!!donor.vector))
#Donor - patient matches
print(donor.vector)
## L623_P4 L803_P1 L623_P5 L623_P1 L803_P4 L803_P3 L623_P3 L803_P8
## "D1" "D1" "D1" "D1" "D2" "D2" "D2" "D2"
## L803_P7 L803_P5 L802_P4 L803_P10 L623_P2 L803_P2 L802_P1 L802_P3
## "D2" "D2" "D2" "D2" "D2" "D5" "D5" "D5"
## L802_P2 L802_P5 L803_P11 L623_P8
## "D5" "D5" "D5" "D5"
PHYLO.OTU = otu_table(as.matrix(d.b.in), taxa_are_rows = TRUE)
tax.in <- cbind(str_split_fixed(rownames(d.b.in), ";", 8)[,2:8],
str_split_fixed(rownames(d.b.in), ";", 8)[,1])
colnames(tax.in) <- c("domain", "phylum", "class", "order", "family", "genus", "species", "asv")
rownames(tax.in)<-paste(rownames(d.b.in))
PHYLO.TAX <- tax_table(as.matrix(tax.in))
rownames(metadata) <- metadata$X.SampleID
PHYLO.META <- sample_data(data.frame(metadata))
ps.in <-phyloseq(PHYLO.OTU, PHYLO.TAX, PHYLO.META)
ps.clean <- ps.in %>% subset_taxa(family!= "Mitochondria" | is.na(family) & class!="Chloroplast" | is.na(class) )
Using phyloseq
and microbiome
packages to calculate alpha diversity metrics.
#Subset patient and donor matched samples
ps.clean.filt <- ps.clean %>%
subset_samples(
grepl("D1_01DEC|D1_28SEP20|D1_07OCT20|D1_06JAN20", X.SampleID) |
grepl("D2_02MAR21|D2_03FEB21|D2_06DEC19|D2_08JUL21|D2_12MAY21|D2_16FEB21|D2_22JUN21|D2_27JUN19", X.SampleID) |
grepl("D5_15DEC20|D5_23OCT20|D5_24MAR21|D5_25FEB21|D5_27JUL21|D5_29JUL21|D5_30JUN21", X.SampleID) |
grepl("L623_|L802_|L803_", X.SampleID)
) %>%
phyloseq::prune_taxa(taxa_sums(.) > 0, .)
print(ps.clean)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 3262 taxa and 123 samples ]
## sample_data() Sample Data: [ 123 samples by 64 sample variables ]
## tax_table() Taxonomy Table: [ 3262 taxa by 8 taxonomic ranks ]
print(ps.clean.filt)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 3058 taxa and 95 samples ]
## sample_data() Sample Data: [ 95 samples by 64 sample variables ]
## tax_table() Taxonomy Table: [ 3058 taxa by 8 taxonomic ranks ]
alpha.diversity.bac <- estimate_richness(ps.clean.filt, split = TRUE,
measures = c("Observed",
"Chao1",
"ACE",
"Shannon",
"Simpson",
"InvSimpson",
"Fisher"))
alpha.dominance.bac <- microbiome::dominance(ps.clean.filt,
index = "all",
relative = TRUE,
aggregate = FALSE)
alpha.dominance2.bac <- microbiome::diversity(ps.clean.filt,
index = "all",
zeroes=TRUE)
colnames(alpha.diversity.bac) <- paste(colnames(alpha.diversity.bac), "bdiv", sep="_")
colnames(alpha.dominance.bac) <- paste(colnames(alpha.dominance.bac), "bdom", sep="_")
colnames(alpha.dominance2.bac) <- paste(colnames(alpha.dominance2.bac), "bdom", sep="_")
alpha.metrics <- cbind(alpha.diversity.bac,alpha.dominance.bac, alpha.dominance2.bac)
alpha.metrics.merge <- merge(metadata,
alpha.metrics,
by.x="X.SampleID",
by.y="row.names",
all=F)
alpha.plot <- alpha.metrics.merge %>% `rownames<-`(alpha.metrics.merge$X.SampleID)
alpha.plot <- alpha.plot %>%
mutate(labels = case_when(
grepl("D1_|D2_|D5_", .$X.SampleID) ~ "Donors",
grepl("_S1", .$X.SampleID) ~ "S1",
grepl("_S2", .$X.SampleID) ~ "S2",
grepl("_S3", .$X.SampleID) ~ "S3",
grepl("_S4", .$X.SampleID) ~ "S4")) %>%
mutate(group_colours = case_when(
is.na(.$ORR) & grepl("D1_", .$X.SampleID) ~ "D1",
is.na(.$ORR) & grepl("D2_", .$X.SampleID) ~ "D2",
is.na(.$ORR) & grepl("D5_", .$X.SampleID) ~ "D5",
.$ORR == "NR" & !grepl("^D", .$X.SampleID) ~ "NR",
.$ORR == "R" & !grepl("^D", .$X.SampleID) ~ "R")) %>%
mutate(category = case_when(
grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "patient",
!grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "donor"))
Using rstatix
package to calculate
var1 = "core_abundance_bdom"
var2 = "labels"
f <- paste(var1, "~", var2)
res.kruskal1 <- alpha.plot %>% rstatix::kruskal_test(as.formula(f))
pwc1 <- alpha.plot %>% rstatix::wilcox_test(as.formula(f),
p.adjust.method = "BH",
detailed=TRUE) %>%
rstatix::add_xy_position(x = var2)
pwc1 <- pwc1 %>% filter(group1=="Donors" & group2=="S1" |
group1=="Donors" & group2=="S2" |
group1=="Donors" & group2=="S3" |
group1=="Donors" & group2=="S4" |
group1=="S1" & group2=="S2" |
group1=="S1" & group2=="S3" |
group1=="S1" & group2=="S4")
Using ggplot2
package to plot ‘core abundance’ alpha diversity as determined using the microbiome
package
print(alpha.plot$X.SampleID)
## [1] "D1_01DEC" "D1_06JAN20" "D1_07OCT20" "D1_28SEP20" "D2_02MAR21"
## [6] "D2_03FEB21" "D2_06DEC19" "D2_08JUL21" "D2_12MAY21" "D2_16FEB21"
## [11] "D2_22JUN21" "D2_27JUN19" "D5_15DEC20" "D5_23OCT20" "D5_24MAR21"
## [16] "D5_25FEB21" "D5_27JUL21" "D5_29JUL21" "D5_30JUN21" "L623_P1_S1"
## [21] "L623_P1_S2" "L623_P1_S3" "L623_P1_S4" "L623_P2_S1" "L623_P2_S2"
## [26] "L623_P2_S3" "L623_P2_S4" "L623_P3_S1" "L623_P3_S2" "L623_P3_S3"
## [31] "L623_P3_S4" "L623_P4_S1" "L623_P4_S2" "L623_P4_S3" "L623_P4_S4"
## [36] "L623_P5_S1" "L623_P5_S2" "L623_P5_S3" "L623_P5_S4" "L623_P8_S1"
## [41] "L623_P8_S2" "L623_P8_S3" "L623_P8_S4" "L802_P1_S1" "L802_P1_S2"
## [46] "L802_P1_S3" "L802_P1_S4" "L802_P2_S1" "L802_P2_S2" "L802_P2_S3"
## [51] "L802_P2_S4" "L802_P3_S1" "L802_P3_S2" "L802_P3_S3" "L802_P3_S4"
## [56] "L802_P4_S1" "L802_P4_S2" "L802_P4_S3" "L802_P4_S4" "L802_P5_S1"
## [61] "L802_P5_S2" "L802_P5_S3" "L802_P5_S4" "L803_P1_S1" "L803_P1_S2"
## [66] "L803_P1_S3" "L803_P1_S4" "L803_P11_S1" "L803_P11_S2" "L803_P11_S3"
## [71] "L803_P11_S4" "L803_P2_S1" "L803_P2_S2" "L803_P2_S3" "L803_P2_S4"
## [76] "L803_P3_S1" "L803_P3_S2" "L803_P3_S3" "L803_P3_S4" "L803_P4_S1"
## [81] "L803_P4_S2" "L803_P4_S3" "L803_P4_S4" "L803_P5_S1" "L803_P5_S2"
## [86] "L803_P5_S3" "L803_P5_S4" "L803_P7_S1" "L803_P7_S2" "L803_P7_S3"
## [91] "L803_P7_S4" "L803_P8_S1" "L803_P8_S2" "L803_P8_S3" "L803_P8_S4"
ggplot(alpha.plot, aes(x=get(var2), y=get(var1))) +
ggbeeswarm::geom_beeswarm(aes(colour=group_colours),
priority ="density",
dodge.width = 0.25,
cex=0.0001,
size=2,
alpha=1) +
scale_colour_manual(values=c(D1="#33B1CC",
D2="#FFB69C",
D5="#BD49FF",
NR="red",
R="blue")) +
scale_fill_manual(values=c(D1="#33B1CC",
D2="#FFB69C",
D5="#BD49FF",
NR="red",
R="blue")) +
ggpubr::stat_pvalue_manual(pwc1,
label="p.adj",
hide.ns=FALSE,
size=3,
bracket.size=0.1,
vjust=0.5) +
theme_set(theme_bw()) +
theme(panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=0.5),
axis.title.x.bottom = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_text(size=12),
axis.title.y = element_text(size=14),
axis.text.x = element_text(angle=0, size=12),
axis.text.y = element_text(angle=0, size=12)) + #legend.position = "none"
ylab("a-diversity (microbiome::core abundance") +
labs(caption = rstatix::get_pwc_label(pwc1)) +
stat_summary(aes(group = category),
fun.y = "mean",
geom = "line",
size= 0.1 ) +
stat_summary(fun.data = mean_cl_boot,
geom = "errorbar",
width = 0.3)
Subset an equal number of samples from each donor for comparisons
#Random subset of n=10 samples from each donor for consistency in pre-/post-FMT comparisons
donors.r10 <- c("D1_01OCT20","D1_06JAN20","D1_07OCT20","D1_09SEP20","D1_11AUG20",
"D1_14SEP20","D1_16JUN20","D1_16NOV21","D1_28JAN20","D1_28SEP20",
"D2_02MAR21","D2_22JUN21","D2_19JAN21","D2_09SEP21","D2_15JUN21",
"D2_16FEB21","D2_03FEB21",'D2_27JUN19',"D2_27JUL21","D2_08JUL21",
"D5_22SEP21","D5_30JUN21","D5_15JUN21","D5_18AUG21","D5_20OCT21",
"D5_22SEP20","D5_14SEP21","D5_24MAR21","D5_29JUL21","D5_23SEP21")
S1.pre <- d.b %>% filter(!grepl("Chloroplast|Mitochondria", .$taxonomy)) %>%
select(-taxonomy) %>%
sjmisc::rotate_df() %>%
mutate(X.SampleID=rownames(.)) %>%
filter(grepl(paste(donors.r10, collapse="|"), .$X.SampleID) | grepl("_S1", .$X.SampleID)) %>%
select(-X.SampleID) %>%
select_if(colSums(.) != 0) %>%
sjmisc::rotate_df()
S4.post <- d.b %>% filter(!grepl("Chloroplast|Mitochondria", .$taxonomy)) %>%
select(-taxonomy) %>%
sjmisc::rotate_df() %>%
mutate(X.SampleID=rownames(.)) %>%
filter(grepl(paste(donors.r10, collapse="|"), .$X.SampleID) | grepl("_S4", .$X.SampleID)) %>%
select(-X.SampleID) %>%
select_if(colSums(.) != 0) %>%
sjmisc::rotate_df()
Plot PCoA based on Aitchison distances between donors and S1 patient samples (Extended Figure 2A)
#CLR transform data
S1.pre.clr <- t(apply(t(S1.pre+0.5), 1, function(x){log2(x) - mean(log2(x))}))
#Calculate distance matrix
dist.pre <- vegan::vegdist(S1.pre.clr, method = "euclidean")
#Calculate PCoA from distances
PCOA.pre <- ape::pcoa(dist.pre)
#Calculate explained variance for each axis
PCo1.pre <- paste("PCo1 (", round(PCOA.pre$values[1,2]*100, 1), "%)", sep="")
PCo2.pre <- paste("PCo2 (", round(PCOA.pre$values[2,2]*100, 1), "%)", sep="")
#Make new dataframe for plotting the PCoA
ord.pre <- as.data.frame(PCOA.pre$vectors[,c(1,2)], stringsasfactors=FALSE) %>%
mutate(X.SampleID= rownames(.)) %>%
rename("PCo1" = "Axis.1", "PCo2" = "Axis.2") %>%
mutate(group_colours = case_when(
grepl("D1_", .$X.SampleID) ~ "D1",
grepl("D2_", .$X.SampleID) ~ "D2",
grepl("D5_", .$X.SampleID) ~ "D5",
grepl("L623_P1_|L623_P8_|L802_P5_|L803_P10_|L803_P2_|L803_P3_|L803_P4_", .$X.SampleID) ~ "NR",
grepl("L623_P2_|L623_P3_|L623_P4_|L623_P5_|L802_P1_|L802_P2_|L802_P3_|L802_P4_|L803_P1_|L803_P11_|L803_P5_|L803_P7_|L803_P8_", .$X.SampleID) ~ "R")) %>%
mutate(category = case_when(
grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "patient",
!grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "donor"))
#Plot PCoA plot for Pre-FMT
pcoa.pre <- ggplot(ord.pre,
aes(x=PCo1,
y=PCo2,
color=group_colours,
label=X.SampleID)) +
theme(panel.border = element_rect(color ="grey",
fill = NA,
size = 0.5,
linetype = 1),
axis.line = element_line(),
plot.title = element_text(hjust=0.5),
axis.title.x = element_text(size=12),
axis.title.y = element_text(size=12),
axis.text.x = element_text(angle=0, size=10),
axis.text.y = element_text(angle=0, size=10),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) + #legend.position = "none"
scale_colour_manual(name = "Key",
values=c(D1="#33B1CC",
D2="#FFB69C",
D5="#BD49FF",
NR="red",
R="blue")) +
scale_fill_manual(name = "Key",
values=c(D1="#33B1CC",
D2="#FFB69C",
D5="#BD49FF",
NR="red",
R="blue")) +
geom_point(aes(),
size=2,
alpha=rep(0.85)) +
scale_x_continuous(limits=c(-50,52),
breaks=c(-50, -25, 0, 25, 50)) +
scale_y_continuous(limits=c(-51,45),
breaks=c(-40, -20, 0, 20, 40)) +
ggtitle("S1 ORR vs Donors (Pre-FMT)") +
ggrepel::geom_text_repel(data=ord.pre %>%
filter(category=="patient"),
aes(), size=2,
min.segment.length = Inf,
box.padding= 0.2) +
annotate(geom="text",
size=5,
x=-22,
y=40,
label="Donor 1",
color="#33B1CC") +
annotate(geom="text",
size=5,
x=-27,
y=-31,
label="Donor 2",
color="#FFB69C") +
annotate(geom="text",
size=5,
x= 42,
y= 27,
label="Donor 5",
color="#BD49FF") +
xlab(PCo1.pre) +
ylab(PCo2.pre) +
stat_ellipse(mapping = NULL, data = ord.pre %>% filter(category=="donor"),
geom = "path",
position = "identity",
type = "norm",
level = 0.68,
segments = 51,
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) +
geom_vline(xintercept = c(0),
linetype="solid",
color="grey50",
alpha=0.5) +
geom_hline(yintercept = c(0),
linetype="solid",
color="grey50",
alpha=0.5)
pcoa.pre
Plot PCoA based on Aitchison distances between donors and S4 patient samples
#CLR transform data
S4.post.clr <- t(apply(t(S4.post+0.5), 1, function(x){log2(x) - mean(log2(x))}))
#Calculate distance matrix
dist.post <- vegan::vegdist(S4.post.clr, method = "euclidean")
#Calculate PCoA from distances
PCOA.post <- ape::pcoa(dist.post)
#Calculate explained variance for each axis
PCo1.post <- paste("PCo1 (", round(PCOA.post$values[1,2]*100, 1), "%)", sep="")
PCo2.post <- paste("PCo2 (", round(PCOA.post$values[2,2]*100, 1), "%)", sep="")
#Make new dataframe for plotting the PCoA
ord.post <- as.data.frame(PCOA.post$vectors[,c(1,2)], stringsasfactors=FALSE) %>% mutate(X.SampleID= rownames(.)) %>%
rename("PCo1" = "Axis.1", "PCo2" = "Axis.2") %>% #mutate(PCo2 = PCo2 *-1) %>%
mutate(group_colours = case_when(
grepl("D1_", .$X.SampleID) ~ "D1",
grepl("D2_", .$X.SampleID) ~ "D2",
grepl("D5_", .$X.SampleID) ~ "D5",
grepl("L623_P1_|L623_P8_|L802_P5_|L803_P10_|L803_P2_|L803_P3_|L803_P4_", .$X.SampleID) ~ "NR",
grepl("L623_P2_|L623_P3_|L623_P4_|L623_P5_|L802_P1_|L802_P2_|L802_P3_|L802_P4_|L803_P1_|L803_P11_|L803_P5_|L803_P7_|L803_P8_", .$X.SampleID) ~ "R")) %>%
mutate(category = case_when(
grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "patient",
!grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "donor"))
#Plot PCoA plot for Post-FMT
pcoa.post <- ggplot(ord.post,
aes(x=PCo1,
y=PCo2,
color=group_colours,
label=X.SampleID)) +
theme(panel.border = element_rect(color ="grey",
fill = NA,
size = 0.5,
linetype = 1),
axis.line = element_line(),
plot.title = element_text(hjust=0.5),
axis.title.x = element_text(size=12),
axis.title.y = element_text(size=12),
axis.text.x = element_text(angle=0, size=10),
axis.text.y = element_text(angle=0, size=10),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) + #legend.position = "none"
scale_colour_manual(name = "Key",
values=c(D1="#33B1CC",
D2="#FFB69C",
D5="#BD49FF",
NR="red",
R="blue")) +
scale_fill_manual(name = "Key",
values=c(D1="#33B1CC",
D2="#FFB69C",
D5="#BD49FF",
NR="red",
R="blue")) +
geom_point(aes(),
size=2,
alpha=rep(0.85)) +
scale_x_continuous(limits=c(-50,52),
breaks=c(-50, -25, 0, 25, 50)) +
scale_y_continuous(limits=c(-51,45),
breaks=c(-40, -20, 0, 20, 40)) +
ggtitle("S4 ORR vs Donors (Post-FMT)") +
ggrepel::geom_text_repel(data=ord.post %>%
filter(category=="patient"),
aes(), size=2,
min.segment.length = Inf,
box.padding= 0.2) +
annotate(geom="text",
size=5,
x=-22,
y=40,
label="Donor 1",
color="#33B1CC") +
annotate(geom="text",
size=5,
x=-27,
y=-31,
label="Donor 2",
color="#FFB69C") +
annotate(geom="text",
size=5,
x= 42,
y= 27,
label="Donor 5",
color="#BD49FF") +
xlab(PCo1.post) +
ylab(PCo2.post) +
stat_ellipse(mapping = NULL, data = ord.post %>% filter(category=="donor"),
geom = "path",
position = "identity",
type = "norm",
level = 0.68,
segments = 51,
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) +
geom_vline(xintercept = c(0),
linetype="solid",
color="grey50",
alpha=0.5) +
geom_hline(yintercept = c(0),
linetype="solid",
color="grey50",
alpha=0.5)
pcoa.post
Calculate Aitchison distance between matched donor-patient samples at S1-S4 timepoints
#Adjust zeroes and perform CLR transformation
ps.clr <- phyloseq::transform_sample_counts(ps.clean.filt, function(x) {x <- x + 0.5; x / sum(x); log2(x) - mean(log2(x))})
# Generate distance matrix
#------------------------------
iDist.list <- list()
iDist.list$CLR <- distance(ps.clr, method="euclidean", type="samples")
#iDist.list$bray <- distance(ps.clean.filt, method="bray", type="samples")
colnames(otu_table(ps.clr))
## [1] "D1_01DEC" "D1_06JAN20" "D1_07OCT20" "D1_28SEP20" "D2_02MAR21"
## [6] "D2_03FEB21" "D2_06DEC19" "D2_08JUL21" "D2_12MAY21" "D2_16FEB21"
## [11] "D2_22JUN21" "D2_27JUN19" "D5_15DEC20" "D5_23OCT20" "D5_24MAR21"
## [16] "D5_25FEB21" "D5_27JUL21" "D5_29JUL21" "D5_30JUN21" "L623_P1_S1"
## [21] "L623_P1_S2" "L623_P1_S3" "L623_P1_S4" "L623_P2_S1" "L623_P2_S2"
## [26] "L623_P2_S3" "L623_P2_S4" "L623_P3_S1" "L623_P3_S2" "L623_P3_S3"
## [31] "L623_P3_S4" "L623_P4_S1" "L623_P4_S2" "L623_P4_S3" "L623_P4_S4"
## [36] "L623_P5_S1" "L623_P5_S2" "L623_P5_S3" "L623_P5_S4" "L623_P8_S1"
## [41] "L623_P8_S2" "L623_P8_S3" "L623_P8_S4" "L802_P1_S1" "L802_P1_S2"
## [46] "L802_P1_S3" "L802_P1_S4" "L802_P2_S1" "L802_P2_S2" "L802_P2_S3"
## [51] "L802_P2_S4" "L802_P3_S1" "L802_P3_S2" "L802_P3_S3" "L802_P3_S4"
## [56] "L802_P4_S1" "L802_P4_S2" "L802_P4_S3" "L802_P4_S4" "L802_P5_S1"
## [61] "L802_P5_S2" "L802_P5_S3" "L802_P5_S4" "L803_P1_S1" "L803_P1_S2"
## [66] "L803_P1_S3" "L803_P1_S4" "L803_P11_S1" "L803_P11_S2" "L803_P11_S3"
## [71] "L803_P11_S4" "L803_P2_S1" "L803_P2_S2" "L803_P2_S3" "L803_P2_S4"
## [76] "L803_P3_S1" "L803_P3_S2" "L803_P3_S3" "L803_P3_S4" "L803_P4_S1"
## [81] "L803_P4_S2" "L803_P4_S3" "L803_P4_S4" "L803_P5_S1" "L803_P5_S2"
## [86] "L803_P5_S3" "L803_P5_S4" "L803_P7_S1" "L803_P7_S2" "L803_P7_S3"
## [91] "L803_P7_S4" "L803_P8_S1" "L803_P8_S2" "L803_P8_S3" "L803_P8_S4"
# Melt distance matrix
#------------------------------
dist.melt <- melt(as.matrix(iDist.list$CLR))
dist.melt.meta <- as.data.frame(as.matrix(sample_data(ps.clr)), stringsasfactors=FALSE)%>% mutate_if(is.factor,as.character)
#dist.melt.join <- left_join(dist.melt, dist.melt.meta, by = c("Var1" = "X.SampleID")) %>% left_join(., dist.melt.meta, by = c("Var2" = "X.SampleID")) %>% mutate(combined= paste(as.numeric(sample_no1.x), as.numeric(sample_no1.y), sep="_"))
################################ SELF vs DONOR COMPARISONS
dist.melt.join <- left_join(dist.melt, dist.melt.meta, by = c("Var1" = "X.SampleID")) %>%
left_join(., dist.melt.meta, by = c("Var2" = "X.SampleID")) %>%
mutate(combined= paste(as.numeric(timepoint.x), as.numeric(timepoint.y), sep="_"))
#against FMT matched donors <--- THIS is the one that was used for publication
dist.melt.join.sub <- rbind(dist.melt.join %>% filter(FMT_group.x==FMT_group.y & category.x=="patient" & category.y=="donor" & grepl("1_0$|2_0$|3_0$|4_0$", combined) & FMT_group.x=="D1"),
dist.melt.join %>% filter(FMT_group.x==FMT_group.y & category.x=="patient" & category.y=="donor" & grepl("1_0$|2_0$|3_0$|4_0$", combined) & FMT_group.x=="D2"),
dist.melt.join %>% filter(FMT_group.x==FMT_group.y & category.x=="patient" & category.y=="donor" & grepl("1_0$|2_0$|3_0$|4_0$", combined) & FMT_group.x=="D5"))
dist.melt.join.sub <- dist.melt.join.sub %>%
ungroup() %>% mutate_at(vars(value), as.numeric) %>%
group_by(Var1, FMT_group.x) %>%
mutate_at(vars(value), funs(mean(.))) %>%
ungroup() %>%
arrange(Var1) %>%
group_by(Var1) %>%
filter(Var2== first(Var2))
#STATS
pwc1 <- dist.melt.join.sub %>%
ungroup() %>%
group_by(ORR.x) %>%
rstatix::wilcox_test(value ~ combined,
ref="1_0",
p.adjust.method = "BH",
detailed=TRUE) %>%
rstatix::add_xy_position() %>% mutate(y.subtract = c(35,39,43,10,14,18)) %>% mutate(y.position= .$y.position-.$y.subtract)
dist.melt.join.sub.mean <- dist.melt.join.sub %>%
group_by(ORR.x, combined) %>% mutate(sem=plotrix::std.error(value)) %>% ungroup() %>%
group_by(ORR.x, combined) %>% mutate(mean_value=mean(value)) %>% ungroup() %>%
group_by(ORR.x, combined) %>% filter(value== first(value)) %>% ungroup() %>%
mutate(combined=gsub("1_0", "S1", .$combined)) %>%
mutate(combined=gsub("2_0", "S2", .$combined)) %>%
mutate(combined=gsub("3_0", "S3", .$combined)) %>%
mutate(combined=gsub("4_0", "S4", .$combined))
#Mean values
Plot PCoA of Aitchison distance for matched donor-patient samples at S1-S4 timepoints (Extended Figure 3B)
#LINE PLOT
ggplot(dist.melt.join.sub.mean,
aes(x=combined,
y=mean_value)) +
geom_line(aes(x=combined,
y=mean_value,
group=host_subject_id.x,
colour=ORR.x),
alpha=1) +
geom_errorbar(aes(ymin=mean_value-sem,
ymax=mean_value+sem,
group=ORR.x),
width=0.2) +
geom_point(aes(x=combined,
y=mean_value,
colour=ORR.x),
alpha=1,
size=3,
show_guide = FALSE) +
facet_grid(~ORR.x) + coord_cartesian(ylim = c(90,135)) +
ggpubr::stat_pvalue_manual(pwc1,
label="p.adj",
hide.ns=FALSE,
size=3,
bracket.size=0.1,
vjust=0.1) +
scale_colour_manual(name = "Key",
values=c(NR="red",
R="blue")) +
scale_fill_manual(name = "Key",
values=c(NR="red",
R="blue")) +
ylab("Distance to donor (Aitchinson CLR)") +
labs(caption = rstatix::get_pwc_label(pwc1)) +
theme_set(theme_bw()) +
theme(panel.background = element_blank(),
panel.grid.major = element_blank(),
axis.title.x.bottom = element_blank(),
panel.grid.minor = element_blank(),
legend.key.width = unit(1,"cm"),
axis.text.x = element_text(angle=0),
plot.title = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 5)))