Publication: Routy et al. (2023). Fecal microbiota transplantation plus anti-PD-1 immunotherapy in advanced melanoma: a phase I trial. Nature Medicine 29, 2121–2132. https://doi.org/10.1038/s41591-023-02453-x

The R code provided below was used to generate the following figures:

  • Figure 2A
  • Extended Figure 2A
  • Extended Figure 3B

1. Load required packages

library(knitr)
library(dplyr)
library(sjmisc)
library(phyloseq)
library(microbiome)
library(stringr)
library(rstatix)
library(ggplot2)
library(ggrepel)
library(vegan)
library(ape)
library(cowplot)
library(reshape2)

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_Canada.1252  LC_CTYPE=English_Canada.1252   
## [3] LC_MONETARY=English_Canada.1252 LC_NUMERIC=C                   
## [5] LC_TIME=English_Canada.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] reshape2_1.4.4    cowplot_1.1.1     ape_5.5           vegan_2.6-2      
##  [5] lattice_0.20-44   permute_0.9-5     ggrepel_0.9.3     rstatix_0.7.0    
##  [9] stringr_1.4.0     microbiome_1.14.0 ggplot2_3.3.5     phyloseq_1.36.0  
## [13] sjmisc_2.8.9      dplyr_1.0.7       knitr_1.44       
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-152           bitops_1.0-7           insight_0.17.1        
##  [4] GenomeInfoDb_1.28.1    tools_4.1.0            backports_1.2.1       
##  [7] bslib_0.3.1            utf8_1.2.1             R6_2.5.0              
## [10] sjlabelled_1.1.8       DBI_1.1.1              BiocGenerics_0.38.0   
## [13] mgcv_1.8-35            colorspace_2.0-2       rhdf5filters_1.4.0    
## [16] ade4_1.7-17            withr_2.4.2            tidyselect_1.1.1      
## [19] curl_4.3.2             compiler_4.1.0         Biobase_2.52.0        
## [22] sass_0.4.0             scales_1.1.1           digest_0.6.27         
## [25] foreign_0.8-81         rmarkdown_2.23         rio_0.5.27            
## [28] XVector_0.32.0         pkgconfig_2.0.3        htmltools_0.5.2       
## [31] fastmap_1.1.0          readxl_1.3.1           rlang_0.4.11          
## [34] rstudioapi_0.13        jquerylib_0.1.4        generics_0.1.0        
## [37] jsonlite_1.7.2         zip_2.2.0              car_3.0-11            
## [40] RCurl_1.98-1.3         magrittr_2.0.1         GenomeInfoDbData_1.2.6
## [43] biomformat_1.20.0      Matrix_1.3-3           Rcpp_1.0.7            
## [46] munsell_0.5.0          S4Vectors_0.30.0       Rhdf5lib_1.14.2       
## [49] fansi_0.5.0            abind_1.4-5            lifecycle_1.0.1       
## [52] stringi_1.6.2          yaml_2.2.1             carData_3.0-4         
## [55] MASS_7.3-54            zlibbioc_1.38.0        rhdf5_2.36.0          
## [58] Rtsne_0.15             plyr_1.8.6             grid_4.1.0            
## [61] parallel_4.1.0         forcats_0.5.1          crayon_1.4.1          
## [64] Biostrings_2.60.2      haven_2.4.1            splines_4.1.0         
## [67] multtest_2.48.0        hms_1.1.0              pillar_1.7.0          
## [70] igraph_1.2.6           codetools_0.2-18       stats4_4.1.0          
## [73] glue_1.4.2             evaluate_0.21          data.table_1.14.0     
## [76] vctrs_0.3.8            foreach_1.5.1          cellranger_1.1.0      
## [79] gtable_0.3.0           purrr_0.3.4            tidyr_1.1.3           
## [82] assertthat_0.2.1       openxlsx_4.2.4         xfun_0.39             
## [85] broom_0.7.9            survival_3.2-11        tibble_3.1.2          
## [88] iterators_1.0.13       IRanges_2.26.0         cluster_2.1.2         
## [91] ellipsis_0.3.2

2. Import data files

2.1 Load in ASV counts + metadata

#Download files from github repository if needed
#download.file("https://raw.githubusercontent.com/bdaisley/MIMic-trial/main/data/MIMic_16S_asv_table.txt", "~/GITHUB/MIMic-trial/data/MIMic_16S_asv_table.txt", mode='wb', overwrite=TRUE)
#download.file("https://raw.githubusercontent.com/bdaisley/MIMic-trial/main/data/MIMic_metadata.txt", "~/GITHUB/MIMic-trial/data/MIMic_metadata.txt", mode='wb', overwrite=TRUE)

setwd("~/GITHUB/MIMic-trial")

d.b <- read.table("data/MIMic_16S_asv_table.txt", 
                  sep="\t", 
                  quote="", 
                  check.names=F, 
                  header=T, 
                  row.names=1, 
                  comment.char="")

rownames(d.b) <- paste(rownames(d.b), d.b$taxonomy, sep=";")
d.b.in <- d.b %>% 
  select(-taxonomy) %>% 
  sjmisc::rotate_df() %>% 
  select_if(colSums(.) != 0) %>% 
  sjmisc::rotate_df()


metadata<- read.table("data/MIMic_metadata.txt", header=T, sep='\t', comment.char = "") %>%
  mutate(timepoint = case_when(
    grepl("D1_|D2_|D5_", .$X.SampleID) ~ "0",
    grepl("_S1", .$X.SampleID) ~ "1",
    grepl("_S2", .$X.SampleID) ~ "2",
    grepl("_S3", .$X.SampleID) ~ "3",
    grepl("_S4", .$X.SampleID) ~ "4")) %>%
  mutate(groups = case_when(
    is.na(.$ORR) & grepl("D1_", .$X.SampleID) ~ "D1",
    is.na(.$ORR) & grepl("D2_", .$X.SampleID) ~ "D2",
    is.na(.$ORR) & grepl("D5_", .$X.SampleID) ~ "D5",
    .$ORR == "NR" & !grepl("^D", .$X.SampleID) ~ "NR",
    .$ORR == "R" & !grepl("^D", .$X.SampleID) ~ "R")) %>%
  mutate(category = case_when(
    grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "patient",
    !grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "donor"))

#Assign donor-patient matching
donor.vector <- setNames((metadata %>% 
                           filter(grepl("^D", .$description)) %>% 
                           mutate(vec1= str_split_fixed(.$description, "_", 4)[,1]))$vec1,
                         (metadata %>% 
                           filter(grepl("^D", .$description)) %>% 
                           mutate(vec2= str_split_fixed(.$description, "_", 4)[,4]) %>% 
                            mutate(vec2= gsub("P0","P", .$vec2)))$vec2)

metadata <- metadata %>% mutate(FMT_group= dplyr::recode(host_subject_id, !!!donor.vector))

#Donor - patient matches
print(donor.vector)
##  L623_P4  L803_P1  L623_P5  L623_P1  L803_P4  L803_P3  L623_P3  L803_P8 
##     "D1"     "D1"     "D1"     "D1"     "D2"     "D2"     "D2"     "D2" 
##  L803_P7  L803_P5  L802_P4 L803_P10  L623_P2  L803_P2  L802_P1  L802_P3 
##     "D2"     "D2"     "D2"     "D2"     "D2"     "D5"     "D5"     "D5" 
##  L802_P2  L802_P5 L803_P11  L623_P8 
##     "D5"     "D5"     "D5"     "D5"

2.2 Convert data to phyloseq format

PHYLO.OTU = otu_table(as.matrix(d.b.in), taxa_are_rows = TRUE)

tax.in <- cbind(str_split_fixed(rownames(d.b.in), ";", 8)[,2:8], 
                str_split_fixed(rownames(d.b.in), ";", 8)[,1])
colnames(tax.in) <- c("domain", "phylum", "class", "order", "family", "genus", "species", "asv")
rownames(tax.in)<-paste(rownames(d.b.in))
PHYLO.TAX <- tax_table(as.matrix(tax.in))

rownames(metadata) <- metadata$X.SampleID
PHYLO.META <- sample_data(data.frame(metadata))

ps.in <-phyloseq(PHYLO.OTU, PHYLO.TAX, PHYLO.META)
ps.clean <- ps.in %>% subset_taxa(family!= "Mitochondria" | is.na(family) & class!="Chloroplast" | is.na(class) ) 

3. Alpha diversity metrics

3.1 Calculate alpha diversity

Using phyloseq and microbiome packages to calculate alpha diversity metrics.

#Subset patient and donor matched samples

ps.clean.filt <- ps.clean %>% 
  subset_samples(
    grepl("D1_01DEC|D1_28SEP20|D1_07OCT20|D1_06JAN20", X.SampleID) | 
    grepl("D2_02MAR21|D2_03FEB21|D2_06DEC19|D2_08JUL21|D2_12MAY21|D2_16FEB21|D2_22JUN21|D2_27JUN19", X.SampleID) |
    grepl("D5_15DEC20|D5_23OCT20|D5_24MAR21|D5_25FEB21|D5_27JUL21|D5_29JUL21|D5_30JUN21", X.SampleID) |
    grepl("L623_|L802_|L803_", X.SampleID)
    ) %>% 
  phyloseq::prune_taxa(taxa_sums(.) > 0, .)

print(ps.clean)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 3262 taxa and 123 samples ]
## sample_data() Sample Data:       [ 123 samples by 64 sample variables ]
## tax_table()   Taxonomy Table:    [ 3262 taxa by 8 taxonomic ranks ]
print(ps.clean.filt)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 3058 taxa and 95 samples ]
## sample_data() Sample Data:       [ 95 samples by 64 sample variables ]
## tax_table()   Taxonomy Table:    [ 3058 taxa by 8 taxonomic ranks ]
alpha.diversity.bac <- estimate_richness(ps.clean.filt, split = TRUE, 
                                         measures = c("Observed", 
                                                      "Chao1", 
                                                      "ACE", 
                                                      "Shannon", 
                                                      "Simpson", 
                                                      "InvSimpson", 
                                                      "Fisher"))
alpha.dominance.bac <- microbiome::dominance(ps.clean.filt, 
                                             index = "all", 
                                             relative = TRUE, 
                                             aggregate = FALSE)
alpha.dominance2.bac <- microbiome::diversity(ps.clean.filt, 
                                              index = "all",
                                              zeroes=TRUE)

colnames(alpha.diversity.bac) <- paste(colnames(alpha.diversity.bac), "bdiv", sep="_")
colnames(alpha.dominance.bac) <- paste(colnames(alpha.dominance.bac), "bdom", sep="_")
colnames(alpha.dominance2.bac) <- paste(colnames(alpha.dominance2.bac), "bdom", sep="_")

alpha.metrics <- cbind(alpha.diversity.bac,alpha.dominance.bac, alpha.dominance2.bac)

3.2 Set donor and patient groups

alpha.metrics.merge <- merge(metadata, 
                             alpha.metrics, 
                             by.x="X.SampleID", 
                             by.y="row.names", 
                             all=F)

alpha.plot <- alpha.metrics.merge %>% `rownames<-`(alpha.metrics.merge$X.SampleID) 

alpha.plot <- alpha.plot %>%
  mutate(labels = case_when(
    grepl("D1_|D2_|D5_", .$X.SampleID) ~ "Donors",
    grepl("_S1", .$X.SampleID) ~ "S1",
    grepl("_S2", .$X.SampleID) ~ "S2",
    grepl("_S3", .$X.SampleID) ~ "S3",
    grepl("_S4", .$X.SampleID) ~ "S4")) %>%
  mutate(group_colours = case_when(
    is.na(.$ORR) & grepl("D1_", .$X.SampleID) ~ "D1",
    is.na(.$ORR) & grepl("D2_", .$X.SampleID) ~ "D2",
    is.na(.$ORR) & grepl("D5_", .$X.SampleID) ~ "D5",
    .$ORR == "NR" & !grepl("^D", .$X.SampleID) ~ "NR",
    .$ORR == "R" & !grepl("^D", .$X.SampleID) ~ "R")) %>%
  mutate(category = case_when(
    grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "patient",
    !grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "donor"))

3.3 Calculate pairwise comparisons

Using rstatix package to calculate

var1 = "core_abundance_bdom"
var2 = "labels"

f <- paste(var1, "~", var2)

res.kruskal1 <- alpha.plot %>% rstatix::kruskal_test(as.formula(f))


pwc1 <- alpha.plot %>% rstatix::wilcox_test(as.formula(f), 
                                            p.adjust.method = "BH", 
                                            detailed=TRUE) %>% 
  rstatix::add_xy_position(x = var2)

pwc1 <- pwc1 %>% filter(group1=="Donors" & group2=="S1" |
                          group1=="Donors" & group2=="S2" |
                          group1=="Donors" & group2=="S3" |
                          group1=="Donors" & group2=="S4" |
                          group1=="S1" & group2=="S2" |
                          group1=="S1" & group2=="S3" |
                          group1=="S1" & group2=="S4")

3.4 Plot alpha diversity (Fig. 2A)

Using ggplot2 package to plot ‘core abundance’ alpha diversity as determined using the microbiome package

print(alpha.plot$X.SampleID)
##  [1] "D1_01DEC"    "D1_06JAN20"  "D1_07OCT20"  "D1_28SEP20"  "D2_02MAR21" 
##  [6] "D2_03FEB21"  "D2_06DEC19"  "D2_08JUL21"  "D2_12MAY21"  "D2_16FEB21" 
## [11] "D2_22JUN21"  "D2_27JUN19"  "D5_15DEC20"  "D5_23OCT20"  "D5_24MAR21" 
## [16] "D5_25FEB21"  "D5_27JUL21"  "D5_29JUL21"  "D5_30JUN21"  "L623_P1_S1" 
## [21] "L623_P1_S2"  "L623_P1_S3"  "L623_P1_S4"  "L623_P2_S1"  "L623_P2_S2" 
## [26] "L623_P2_S3"  "L623_P2_S4"  "L623_P3_S1"  "L623_P3_S2"  "L623_P3_S3" 
## [31] "L623_P3_S4"  "L623_P4_S1"  "L623_P4_S2"  "L623_P4_S3"  "L623_P4_S4" 
## [36] "L623_P5_S1"  "L623_P5_S2"  "L623_P5_S3"  "L623_P5_S4"  "L623_P8_S1" 
## [41] "L623_P8_S2"  "L623_P8_S3"  "L623_P8_S4"  "L802_P1_S1"  "L802_P1_S2" 
## [46] "L802_P1_S3"  "L802_P1_S4"  "L802_P2_S1"  "L802_P2_S2"  "L802_P2_S3" 
## [51] "L802_P2_S4"  "L802_P3_S1"  "L802_P3_S2"  "L802_P3_S3"  "L802_P3_S4" 
## [56] "L802_P4_S1"  "L802_P4_S2"  "L802_P4_S3"  "L802_P4_S4"  "L802_P5_S1" 
## [61] "L802_P5_S2"  "L802_P5_S3"  "L802_P5_S4"  "L803_P1_S1"  "L803_P1_S2" 
## [66] "L803_P1_S3"  "L803_P1_S4"  "L803_P11_S1" "L803_P11_S2" "L803_P11_S3"
## [71] "L803_P11_S4" "L803_P2_S1"  "L803_P2_S2"  "L803_P2_S3"  "L803_P2_S4" 
## [76] "L803_P3_S1"  "L803_P3_S2"  "L803_P3_S3"  "L803_P3_S4"  "L803_P4_S1" 
## [81] "L803_P4_S2"  "L803_P4_S3"  "L803_P4_S4"  "L803_P5_S1"  "L803_P5_S2" 
## [86] "L803_P5_S3"  "L803_P5_S4"  "L803_P7_S1"  "L803_P7_S2"  "L803_P7_S3" 
## [91] "L803_P7_S4"  "L803_P8_S1"  "L803_P8_S2"  "L803_P8_S3"  "L803_P8_S4"
ggplot(alpha.plot, aes(x=get(var2), y=get(var1))) +
  ggbeeswarm::geom_beeswarm(aes(colour=group_colours), 
                            priority ="density", 
                            dodge.width = 0.25, 
                            cex=0.0001, 
                            size=2, 
                            alpha=1) +
  scale_colour_manual(values=c(D1="#33B1CC", 
                               D2="#FFB69C",
                               D5="#BD49FF",
                               NR="red",
                               R="blue")) +
  scale_fill_manual(values=c(D1="#33B1CC",
                             D2="#FFB69C",
                             D5="#BD49FF",
                             NR="red",
                             R="blue")) +
  ggpubr::stat_pvalue_manual(pwc1, 
                             label="p.adj", 
                             hide.ns=FALSE, 
                             size=3, 
                             bracket.size=0.1, 
                             vjust=0.5) + 
  theme_set(theme_bw()) + 
  theme(panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.border = element_rect(colour = "black", fill=NA, size=0.5),
        axis.title.x.bottom = element_blank(),
        panel.grid.minor = element_blank(),
        axis.title.x = element_text(size=12),
        axis.title.y = element_text(size=14),
        axis.text.x = element_text(angle=0, size=12),
        axis.text.y = element_text(angle=0, size=12)) + #legend.position = "none"
  ylab("a-diversity (microbiome::core abundance") +
  labs(caption = rstatix::get_pwc_label(pwc1)) +
  stat_summary(aes(group = category),
               fun.y = "mean", 
               geom = "line", 
               size= 0.1 ) +
  stat_summary(fun.data = mean_cl_boot, 
               geom = "errorbar", 
               width = 0.3) 

4. Beta diversity metrics

4.1 Subset samples for comparisons

Subset an equal number of samples from each donor for comparisons

#Random subset of n=10 samples from each donor for consistency in pre-/post-FMT comparisons
donors.r10 <- c("D1_01OCT20","D1_06JAN20","D1_07OCT20","D1_09SEP20","D1_11AUG20",
                "D1_14SEP20","D1_16JUN20","D1_16NOV21","D1_28JAN20","D1_28SEP20",
                "D2_02MAR21","D2_22JUN21","D2_19JAN21","D2_09SEP21","D2_15JUN21",
                "D2_16FEB21","D2_03FEB21",'D2_27JUN19',"D2_27JUL21","D2_08JUL21",
                "D5_22SEP21","D5_30JUN21","D5_15JUN21","D5_18AUG21","D5_20OCT21",
                "D5_22SEP20","D5_14SEP21","D5_24MAR21","D5_29JUL21","D5_23SEP21")

S1.pre <- d.b %>% filter(!grepl("Chloroplast|Mitochondria", .$taxonomy)) %>%
  select(-taxonomy) %>% 
  sjmisc::rotate_df() %>% 
  mutate(X.SampleID=rownames(.)) %>% 
  filter(grepl(paste(donors.r10, collapse="|"), .$X.SampleID) | grepl("_S1", .$X.SampleID)) %>%
  select(-X.SampleID) %>% 
  select_if(colSums(.) != 0) %>% 
  sjmisc::rotate_df()

S4.post <- d.b %>% filter(!grepl("Chloroplast|Mitochondria", .$taxonomy)) %>%
  select(-taxonomy) %>% 
  sjmisc::rotate_df() %>% 
  mutate(X.SampleID=rownames(.)) %>% 
  filter(grepl(paste(donors.r10, collapse="|"), .$X.SampleID) | grepl("_S4", .$X.SampleID)) %>%
  select(-X.SampleID) %>% 
  select_if(colSums(.) != 0) %>% 
  sjmisc::rotate_df()

4.2 Donors vs S1 patients (Ex. Fig. 2A)

Plot PCoA based on Aitchison distances between donors and S1 patient samples (Extended Figure 2A)

#CLR transform data
S1.pre.clr <- t(apply(t(S1.pre+0.5), 1, function(x){log2(x) - mean(log2(x))}))

#Calculate distance matrix
dist.pre <- vegan::vegdist(S1.pre.clr,  method = "euclidean") 

#Calculate PCoA from distances
PCOA.pre <- ape::pcoa(dist.pre)
#Calculate explained variance for each axis
PCo1.pre <- paste("PCo1 (", round(PCOA.pre$values[1,2]*100, 1), "%)", sep="")
PCo2.pre <- paste("PCo2 (", round(PCOA.pre$values[2,2]*100, 1), "%)", sep="")

#Make new dataframe for plotting the PCoA
ord.pre <- as.data.frame(PCOA.pre$vectors[,c(1,2)], stringsasfactors=FALSE) %>% 
  mutate(X.SampleID= rownames(.)) %>%
  rename("PCo1" = "Axis.1", "PCo2" = "Axis.2") %>%
  mutate(group_colours = case_when(
    grepl("D1_", .$X.SampleID) ~ "D1",
    grepl("D2_", .$X.SampleID) ~ "D2",
    grepl("D5_", .$X.SampleID) ~ "D5",
    grepl("L623_P1_|L623_P8_|L802_P5_|L803_P10_|L803_P2_|L803_P3_|L803_P4_", .$X.SampleID) ~ "NR",
    grepl("L623_P2_|L623_P3_|L623_P4_|L623_P5_|L802_P1_|L802_P2_|L802_P3_|L802_P4_|L803_P1_|L803_P11_|L803_P5_|L803_P7_|L803_P8_", .$X.SampleID) ~ "R")) %>%
  mutate(category = case_when(
    grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "patient",
    !grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "donor"))

#Plot PCoA plot for Pre-FMT
pcoa.pre <- ggplot(ord.pre, 
                   aes(x=PCo1,
                       y=PCo2,
                       color=group_colours,
                       label=X.SampleID)) +
  theme(panel.border = element_rect(color ="grey", 
                                    fill = NA, 
                                    size = 0.5, 
                                    linetype = 1), 
        axis.line = element_line(),
        plot.title = element_text(hjust=0.5),
        axis.title.x = element_text(size=12),
        axis.title.y = element_text(size=12),
        axis.text.x = element_text(angle=0, size=10),
        axis.text.y = element_text(angle=0, size=10),
        panel.background = element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) + #legend.position = "none"
  scale_colour_manual(name = "Key",
                      values=c(D1="#33B1CC", 
                               D2="#FFB69C",
                               D5="#BD49FF", 
                               NR="red", 
                               R="blue")) +
  scale_fill_manual(name = "Key",
                    values=c(D1="#33B1CC", 
                             D2="#FFB69C", 
                             D5="#BD49FF", 
                             NR="red", 
                             R="blue")) +
  geom_point(aes(),
             size=2, 
             alpha=rep(0.85)) +
  scale_x_continuous(limits=c(-50,52), 
                     breaks=c(-50, -25, 0, 25, 50)) +
  scale_y_continuous(limits=c(-51,45), 
                     breaks=c(-40, -20, 0, 20, 40)) +
  ggtitle("S1 ORR vs Donors (Pre-FMT)") +
  ggrepel::geom_text_repel(data=ord.pre %>% 
                             filter(category=="patient"), 
                           aes(), size=2, 
                           min.segment.length = Inf,
                           box.padding= 0.2) +
  annotate(geom="text",
           size=5,
           x=-22,
           y=40,
           label="Donor 1",
           color="#33B1CC") +
  annotate(geom="text",
           size=5,
           x=-27,
           y=-31,
           label="Donor 2",
           color="#FFB69C") +
  annotate(geom="text",
           size=5,
           x= 42,
           y= 27,
           label="Donor 5",
           color="#BD49FF") +
  xlab(PCo1.pre) + 
  ylab(PCo2.pre) +
  stat_ellipse(mapping = NULL, data = ord.pre %>% filter(category=="donor"), 
               geom = "path", 
               position = "identity", 
               type = "norm", 
               level = 0.68, 
               segments = 51, 
               na.rm = FALSE, 
               show.legend = NA, 
               inherit.aes = TRUE) +
  geom_vline(xintercept = c(0), 
             linetype="solid", 
             color="grey50", 
             alpha=0.5) + 
  geom_hline(yintercept = c(0), 
             linetype="solid", 
             color="grey50", 
             alpha=0.5)

pcoa.pre

4.3 Donors vs S4 patients

Plot PCoA based on Aitchison distances between donors and S4 patient samples

#CLR transform data
S4.post.clr <- t(apply(t(S4.post+0.5), 1, function(x){log2(x) - mean(log2(x))}))

#Calculate distance matrix
dist.post <- vegan::vegdist(S4.post.clr,  method = "euclidean") 
#Calculate PCoA from distances
PCOA.post <- ape::pcoa(dist.post)
#Calculate explained variance for each axis
PCo1.post <- paste("PCo1 (", round(PCOA.post$values[1,2]*100, 1), "%)", sep="")
PCo2.post <- paste("PCo2 (", round(PCOA.post$values[2,2]*100, 1), "%)", sep="")

#Make new dataframe for plotting the PCoA
ord.post <- as.data.frame(PCOA.post$vectors[,c(1,2)], stringsasfactors=FALSE) %>% mutate(X.SampleID= rownames(.)) %>%
  rename("PCo1" = "Axis.1", "PCo2" = "Axis.2") %>% #mutate(PCo2 = PCo2 *-1) %>%
  mutate(group_colours = case_when(
    grepl("D1_", .$X.SampleID) ~ "D1",
    grepl("D2_", .$X.SampleID) ~ "D2",
    grepl("D5_", .$X.SampleID) ~ "D5",
    grepl("L623_P1_|L623_P8_|L802_P5_|L803_P10_|L803_P2_|L803_P3_|L803_P4_", .$X.SampleID) ~ "NR",
    grepl("L623_P2_|L623_P3_|L623_P4_|L623_P5_|L802_P1_|L802_P2_|L802_P3_|L802_P4_|L803_P1_|L803_P11_|L803_P5_|L803_P7_|L803_P8_", .$X.SampleID) ~ "R")) %>%
  mutate(category = case_when(
    grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "patient",
    !grepl("_S1|_S2|_S3|_S4", .$X.SampleID) ~ "donor"))
#Plot PCoA plot for Post-FMT
pcoa.post <- ggplot(ord.post, 
                   aes(x=PCo1,
                       y=PCo2,
                       color=group_colours,
                       label=X.SampleID)) +
  theme(panel.border = element_rect(color ="grey", 
                                    fill = NA, 
                                    size = 0.5, 
                                    linetype = 1), 
        axis.line = element_line(),
        plot.title = element_text(hjust=0.5),
        axis.title.x = element_text(size=12),
        axis.title.y = element_text(size=12),
        axis.text.x = element_text(angle=0, size=10),
        axis.text.y = element_text(angle=0, size=10),
        panel.background = element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) + #legend.position = "none"
  scale_colour_manual(name = "Key",
                      values=c(D1="#33B1CC", 
                               D2="#FFB69C",
                               D5="#BD49FF", 
                               NR="red", 
                               R="blue")) +
  scale_fill_manual(name = "Key",
                    values=c(D1="#33B1CC", 
                             D2="#FFB69C", 
                             D5="#BD49FF", 
                             NR="red", 
                             R="blue")) +
  geom_point(aes(),
             size=2, 
             alpha=rep(0.85)) +
  scale_x_continuous(limits=c(-50,52), 
                     breaks=c(-50, -25, 0, 25, 50)) +
  scale_y_continuous(limits=c(-51,45), 
                     breaks=c(-40, -20, 0, 20, 40)) +
  ggtitle("S4 ORR vs Donors (Post-FMT)") +
  ggrepel::geom_text_repel(data=ord.post %>% 
                             filter(category=="patient"), 
                           aes(), size=2, 
                           min.segment.length = Inf,
                           box.padding= 0.2) +
  annotate(geom="text",
           size=5,
           x=-22,
           y=40,
           label="Donor 1",
           color="#33B1CC") +
  annotate(geom="text",
           size=5,
           x=-27,
           y=-31,
           label="Donor 2",
           color="#FFB69C") +
  annotate(geom="text",
           size=5,
           x= 42,
           y= 27,
           label="Donor 5",
           color="#BD49FF") +
  xlab(PCo1.post) + 
  ylab(PCo2.post) +
  stat_ellipse(mapping = NULL, data = ord.post %>% filter(category=="donor"), 
               geom = "path", 
               position = "identity", 
               type = "norm", 
               level = 0.68, 
               segments = 51, 
               na.rm = FALSE, 
               show.legend = NA, 
               inherit.aes = TRUE) +
  geom_vline(xintercept = c(0), 
             linetype="solid", 
             color="grey50", 
             alpha=0.5) + 
  geom_hline(yintercept = c(0), 
             linetype="solid", 
             color="grey50", 
             alpha=0.5)

pcoa.post

4.4 Calculate donor-patient distances

Calculate Aitchison distance between matched donor-patient samples at S1-S4 timepoints

#Adjust zeroes and perform CLR transformation
ps.clr <- phyloseq::transform_sample_counts(ps.clean.filt, function(x) {x <- x + 0.5; x / sum(x); log2(x) - mean(log2(x))})


# Generate distance matrix
#------------------------------
iDist.list <- list()
iDist.list$CLR <- distance(ps.clr, method="euclidean", type="samples")
#iDist.list$bray <- distance(ps.clean.filt, method="bray", type="samples")

colnames(otu_table(ps.clr))
##  [1] "D1_01DEC"    "D1_06JAN20"  "D1_07OCT20"  "D1_28SEP20"  "D2_02MAR21" 
##  [6] "D2_03FEB21"  "D2_06DEC19"  "D2_08JUL21"  "D2_12MAY21"  "D2_16FEB21" 
## [11] "D2_22JUN21"  "D2_27JUN19"  "D5_15DEC20"  "D5_23OCT20"  "D5_24MAR21" 
## [16] "D5_25FEB21"  "D5_27JUL21"  "D5_29JUL21"  "D5_30JUN21"  "L623_P1_S1" 
## [21] "L623_P1_S2"  "L623_P1_S3"  "L623_P1_S4"  "L623_P2_S1"  "L623_P2_S2" 
## [26] "L623_P2_S3"  "L623_P2_S4"  "L623_P3_S1"  "L623_P3_S2"  "L623_P3_S3" 
## [31] "L623_P3_S4"  "L623_P4_S1"  "L623_P4_S2"  "L623_P4_S3"  "L623_P4_S4" 
## [36] "L623_P5_S1"  "L623_P5_S2"  "L623_P5_S3"  "L623_P5_S4"  "L623_P8_S1" 
## [41] "L623_P8_S2"  "L623_P8_S3"  "L623_P8_S4"  "L802_P1_S1"  "L802_P1_S2" 
## [46] "L802_P1_S3"  "L802_P1_S4"  "L802_P2_S1"  "L802_P2_S2"  "L802_P2_S3" 
## [51] "L802_P2_S4"  "L802_P3_S1"  "L802_P3_S2"  "L802_P3_S3"  "L802_P3_S4" 
## [56] "L802_P4_S1"  "L802_P4_S2"  "L802_P4_S3"  "L802_P4_S4"  "L802_P5_S1" 
## [61] "L802_P5_S2"  "L802_P5_S3"  "L802_P5_S4"  "L803_P1_S1"  "L803_P1_S2" 
## [66] "L803_P1_S3"  "L803_P1_S4"  "L803_P11_S1" "L803_P11_S2" "L803_P11_S3"
## [71] "L803_P11_S4" "L803_P2_S1"  "L803_P2_S2"  "L803_P2_S3"  "L803_P2_S4" 
## [76] "L803_P3_S1"  "L803_P3_S2"  "L803_P3_S3"  "L803_P3_S4"  "L803_P4_S1" 
## [81] "L803_P4_S2"  "L803_P4_S3"  "L803_P4_S4"  "L803_P5_S1"  "L803_P5_S2" 
## [86] "L803_P5_S3"  "L803_P5_S4"  "L803_P7_S1"  "L803_P7_S2"  "L803_P7_S3" 
## [91] "L803_P7_S4"  "L803_P8_S1"  "L803_P8_S2"  "L803_P8_S3"  "L803_P8_S4"
# Melt distance matrix
#------------------------------
dist.melt <- melt(as.matrix(iDist.list$CLR))
dist.melt.meta <- as.data.frame(as.matrix(sample_data(ps.clr)), stringsasfactors=FALSE)%>% mutate_if(is.factor,as.character)
#dist.melt.join <- left_join(dist.melt, dist.melt.meta, by = c("Var1" = "X.SampleID")) %>% left_join(., dist.melt.meta, by = c("Var2" = "X.SampleID")) %>% mutate(combined= paste(as.numeric(sample_no1.x), as.numeric(sample_no1.y), sep="_"))


################################ SELF vs DONOR COMPARISONS
dist.melt.join <- left_join(dist.melt, dist.melt.meta, by = c("Var1" = "X.SampleID")) %>% 
  left_join(., dist.melt.meta, by = c("Var2" = "X.SampleID")) %>% 
  mutate(combined= paste(as.numeric(timepoint.x), as.numeric(timepoint.y), sep="_"))

#against FMT matched donors <--- THIS is the one that was used for publication
dist.melt.join.sub <- rbind(dist.melt.join %>% filter(FMT_group.x==FMT_group.y & category.x=="patient" & category.y=="donor" & grepl("1_0$|2_0$|3_0$|4_0$", combined) & FMT_group.x=="D1"),
                            dist.melt.join %>% filter(FMT_group.x==FMT_group.y & category.x=="patient" & category.y=="donor" & grepl("1_0$|2_0$|3_0$|4_0$", combined) & FMT_group.x=="D2"),
                            dist.melt.join %>% filter(FMT_group.x==FMT_group.y & category.x=="patient" & category.y=="donor" & grepl("1_0$|2_0$|3_0$|4_0$", combined) & FMT_group.x=="D5"))


dist.melt.join.sub <- dist.melt.join.sub %>% 
  ungroup() %>% mutate_at(vars(value), as.numeric) %>%
  group_by(Var1, FMT_group.x) %>% 
  mutate_at(vars(value), funs(mean(.))) %>%
  ungroup() %>% 
  arrange(Var1) %>% 
  group_by(Var1) %>% 
  filter(Var2== first(Var2))

#STATS
pwc1 <- dist.melt.join.sub %>% 
  ungroup() %>% 
  group_by(ORR.x) %>% 
  rstatix::wilcox_test(value ~ combined, 
                       ref="1_0", 
                       p.adjust.method = "BH",
                       detailed=TRUE) %>% 
  rstatix::add_xy_position()  %>% mutate(y.subtract = c(35,39,43,10,14,18)) %>% mutate(y.position= .$y.position-.$y.subtract)

dist.melt.join.sub.mean <- dist.melt.join.sub %>%
  group_by(ORR.x, combined) %>% mutate(sem=plotrix::std.error(value)) %>% ungroup() %>%
  group_by(ORR.x, combined) %>% mutate(mean_value=mean(value)) %>% ungroup() %>%
  group_by(ORR.x, combined) %>% filter(value== first(value)) %>% ungroup() %>%
  mutate(combined=gsub("1_0", "S1", .$combined)) %>%
  mutate(combined=gsub("2_0", "S2", .$combined)) %>%
  mutate(combined=gsub("3_0", "S3", .$combined)) %>%
  mutate(combined=gsub("4_0", "S4", .$combined))

#Mean values

4.5 Plot distance to donor (Ex. Fig. 3B)

Plot PCoA of Aitchison distance for matched donor-patient samples at S1-S4 timepoints (Extended Figure 3B)

#LINE PLOT
ggplot(dist.melt.join.sub.mean, 
       aes(x=combined, 
           y=mean_value)) +
  geom_line(aes(x=combined, 
                y=mean_value, 
                group=host_subject_id.x, 
                colour=ORR.x), 
            alpha=1) +
  geom_errorbar(aes(ymin=mean_value-sem, 
                    ymax=mean_value+sem, 
                    group=ORR.x), 
                width=0.2) +
  geom_point(aes(x=combined, 
                 y=mean_value, 
                 colour=ORR.x),
             alpha=1, 
             size=3, 
             show_guide = FALSE) +
  facet_grid(~ORR.x) + coord_cartesian(ylim = c(90,135)) +
  ggpubr::stat_pvalue_manual(pwc1, 
                             label="p.adj",
                             hide.ns=FALSE,
                             size=3, 
                             bracket.size=0.1, 
                             vjust=0.1) +
  scale_colour_manual(name = "Key",
                      values=c(NR="red", 
                               R="blue")) +
  scale_fill_manual(name = "Key",
                    values=c(NR="red", 
                             R="blue")) +
  ylab("Distance to donor (Aitchinson CLR)") +
  labs(caption = rstatix::get_pwc_label(pwc1)) +
  theme_set(theme_bw()) + 
  theme(panel.background = element_blank(),
        panel.grid.major = element_blank(),
        axis.title.x.bottom = element_blank(),
        panel.grid.minor = element_blank(),
        legend.key.width = unit(1,"cm"),
        axis.text.x = element_text(angle=0),
        plot.title = element_text(size = 10)) + 
  guides(color = guide_legend(override.aes = list(size = 5)))