diff --git a/html_docs/Tobacco exposure associated with oral microbiome anaerobiosis in the New York City Health and Nutrition Examination Study (NYC HANES) II.html b/html_docs/Tobacco exposure associated with oral microbiome anaerobiosis in the New York City Health and Nutrition Examination Study (NYC HANES) II.html index 21e068d..a645bf1 100644 --- a/html_docs/Tobacco exposure associated with oral microbiome anaerobiosis in the New York City Health and Nutrition Examination Study (NYC HANES) II.html +++ b/html_docs/Tobacco exposure associated with oral microbiome anaerobiosis in the New York City Health and Nutrition Examination Study (NYC HANES) II.html @@ -1,5 +1,5 @@ - + @@ -8,7 +8,7 @@ - + Tobacco exposure associated with oral microbiome anaerobiosis in the New York City Health and Nutrition Examination Study (NYC HANES) II @@ -797,40 +797,40 @@

Tobacco exposure associated with oral microbiome an

Francesco Beghini1*

1Laboratory of Computational Metagenomics - Centre for Integrative Biology - University of Trento - Italy

*francesco.beghini@unitn.it

-

3 May 2023

+

5 May 2023

Contents

@@ -1187,43 +1187,64 @@

2 Microbial composition (8 most a

3 Alpha diversity

+
aovSMOKINGSTATUS <- aov(Shannon ~ smokingstatus, alphadiv)
+summary(aovSMOKINGSTATUS)
##                Df Sum Sq Mean Sq F value Pr(>F)
 ## smokingstatus   4   0.09 0.02162   0.168  0.954
 ## Residuals     254  32.62 0.12841
+
aovSMOKINGSTATUS <- aov(Observed ~ smokingstatus, alphadiv)
+summary(aovSMOKINGSTATUS)
##                Df  Sum Sq Mean Sq F value Pr(>F)  
 ## smokingstatus   4   34597    8649    2.12 0.0788 .
 ## Residuals     254 1036314    4080                 
 ## ---
 ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
aovSMOKINGSTATUS <- aov(Chao1 ~ smokingstatus, alphadiv)
+summary(aovSMOKINGSTATUS)
##                Df  Sum Sq Mean Sq F value Pr(>F)
 ## smokingstatus   4   74507   18627    1.33  0.259
 ## Residuals     254 3556471   14002

4 Serum cotinine vs Smoking status

+
ggplot(metadata, aes(smokingstatus, COTININE, fill = smokingstatus)) + 
+  stat_boxplot() +
+  scale_fill_manual(values = scale_palette) + 
+  theme_bw() +
+  scale_x_discrete(labels = c("Never\nsmokers","Cigarette\nsmokers", "Former\nsmokers","Alternative\nsmokers","Secondhand\nsmoke")) +
+  xlab('Reported smoking status') +
+  ylab('Serum Cotinine (ng/ml)') +
+  guides(fill = FALSE) +
+  theme(strip.background = element_rect(colour = "transparent", fill = "transparent"),
+        strip.text.y = element_text(angle = 180),
+        axis.text = element_text(size = 12),
+        axis.title = element_text(size = 12),
+        legend.text = element_text(size=12),
+        legend.title = element_text(size=12),
+        legend.position = 'bottom')

5 Beta Diversity

5.1 Cigarette smokers vs Never smokers

-

+

5.2 Cigarette smokers + Never smokers + Former smokers

-

+

5.3 Cigarette smokers + Never smokers + Former smokers + Secondhand smokers

-

+

5.4 All smoking statuses

-

+

5.5 Discrete serum blood Cotinine levels

-

+

5.5.1 Test group with PERMANOVA (adonis, vegan package)

@@ -1237,53 +1258,53 @@

5.5.1 Test group with PERMANOVA (

- + - + - - - + + + - - - + + + - - + + - - + + - - + + - - - + + + - - - + + + - - + +
Never smoker vs Cigarette0.05090710.0505876 0.001
Cigarette vs Former smoker0.04286460.0424802 0.001
Cigarette vs Alternative smoker0.03089230.001Cigarette vs Secondhand0.03492170.004
Cigarette vs Secondhand0.03420120.010Cigarette vs Alternative smoker0.02905500.008
Never smoker vs Secondhand0.02552690.0790.02304140.104
Never smoker vs Former smoker0.01743120.2140.01868270.160
Former smoker vs Alternative smoker0.01436940.2670.01434340.279
Alternative smoker vs Secondhand0.01180040.361Former smoker vs Secondhand0.01296410.327
Former smoker vs Secondhand0.01317220.375Alternative smoker vs Secondhand0.00911590.477
Never smoker vs Alternative smoker0.00881710.5440.00894630.510
@@ -1293,10 +1314,10 @@

5.5.1 Test group with PERMANOVA ( ## Number of permutations: 999 ## ## vegan::adonis2(formula = distwu_2cat ~ COTININE, data = metadata_2cat) -## Df SumOfSqs R2 F Pr(>F) -## COTININE 1 0.05422 0.04807 6.4136 0.001 *** -## Residual 127 1.07356 0.95193 -## Total 128 1.12777 1.00000 +## Df SumOfSqs R2 F Pr(>F) +## COTININE 1 0.09755 0.04867 6.4971 0.002 ** +## Residual 127 1.90674 0.95133 +## Total 128 2.00429 1.00000 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
@@ -1308,15 +1329,15 @@

5.5.1.1 Three categories

## ## vegan::adonis2(formula = distwu_3cat ~ smokingstatus + RACE + GENDER + AGEGRP4C + SR_ACTIVE + EDU3CAT + DBTS_NEW, data = metadata_3cat) ## Df SumOfSqs R2 F Pr(>F) -## smokingstatus 2 0.09210 0.05263 4.8153 0.001 *** -## RACE 4 0.05776 0.03300 1.5098 0.104 -## GENDER 1 0.01392 0.00796 1.4560 0.201 -## AGEGRP4C 1 0.02326 0.01329 2.4319 0.052 . -## SR_ACTIVE 2 0.01812 0.01036 0.9476 0.458 -## EDU3CAT 2 0.02582 0.01476 1.3501 0.193 -## DBTS_NEW 1 0.00807 0.00461 0.8439 0.475 -## Residual 158 1.51100 0.86340 -## Total 171 1.75005 1.00000 +## smokingstatus 2 0.13485 0.05649 5.1964 0.001 *** +## RACE 4 0.07660 0.03209 1.4759 0.110 +## GENDER 1 0.02369 0.00993 1.8259 0.094 . +## AGEGRP4C 1 0.03184 0.01334 2.4535 0.052 . +## SR_ACTIVE 2 0.02715 0.01138 1.0464 0.354 +## EDU3CAT 2 0.03124 0.01309 1.2037 0.256 +## DBTS_NEW 1 0.01153 0.00483 0.8884 0.469 +## Residual 158 2.05013 0.85886 +## Total 171 2.38703 1.00000 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
@@ -1329,9 +1350,9 @@

5.5.1.2 All categories

## ## vegan::adonis2(formula = distwu ~ smokingstatus, data = metadata) ## Df SumOfSqs R2 F Pr(>F) -## smokingstatus 4 0.4139 0.04867 3.2487 0.001 *** -## Residual 254 8.0905 0.95133 -## Total 258 8.5044 1.00000 +## smokingstatus 4 0.1768 0.04838 3.2285 0.001 *** +## Residual 254 3.4775 0.95162 +## Total 258 3.6543 1.00000 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
@@ -1394,39 +1415,149 @@

6.3.2 Plot crude vs adjusted beta

6.3.3 Secondhand vs Cigarette/Never: only significant OTUs

A function for merging smoking and cotinine edgeR results:

+
mergeResults <- function(obj.smoking, obj.cotinine) {
+  intersect.filters <-
+    intersect(rownames(obj.smoking), rownames(obj.cotinine))
+  intersect.filters <- sort(intersect.filters)
+  obj.smoking <- obj.smoking[intersect.filters, ]
+  obj.cotinine <- obj.cotinine[intersect.filters, ]
+  stopifnot(all.equal(rownames(obj.smoking), rownames(obj.cotinine)))
+  ##
+  res <-
+    data.frame(
+      logFC_smoking = obj.smoking$table$logFC,
+      logFC_cotinine = obj.cotinine$table$logFC,
+      FDR_smoking = obj.smoking$table$FDR,
+      FDR_cotinine = obj.cotinine$table$FDR,
+      OTU = rownames(obj.smoking$table)
+    )
+  res <- cbind(res, obj.smoking[, c("Phylum", "Class", "Order", "Family", "Genus", "Species")])
+  return(res)
+}

And a plotting function:

+
plotSmokingcotinineComparison <-
+  function(obj.smoking,
+           obj.cotinine,
+           labtext = "Crude",
+           FDRcutoff = 1) {
+    dataset <- mergeResults(obj.smoking, obj.cotinine)
+    dataset <- dataset[dataset$FDR_smoking < FDRcutoff,]
+    corr.result <- with(dataset,
+                        cor.test(logFC_smoking, logFC_cotinine))
+    p <- ggplot(dataset) +
+      theme_bw() +
+      xlab(paste(labtext, "Smokers vs. Non-smokers")) +
+      ylab(paste(labtext, "Cotinine Levels (ng/ml)")) +
+      geom_text(aes(1,-0.3, label = sprintf(
+        "p-value: %s\ncor: %s \nFDR<0.05: %s",
+        signif(corr.result$p.value, 2),
+        signif(corr.result$estimate, 2),
+        sum(dataset$FDR_smoking < 0.05)
+      )), size = 5) +
+      geom_point(aes(logFC_smoking, logFC_cotinine, color = FDR_smoking <= 0.05),
+                 size = 3) +
+      scale_colour_manual(values = setNames(c('black', 'grey'), c(TRUE, FALSE)), guide =
+                            FALSE) +
+      geom_smooth(
+        aes(logFC_smoking, logFC_cotinine),
+        method = lm,
+        se = FALSE,
+        color = "red"
+      ) +
+      theme(
+        axis.text = element_text(
+          hjust = 0,
+          vjust = 0.5,
+          size = 13
+        ),
+        strip.text = element_text(size = 13),
+        axis.title = element_text(size = 15)
+      )
+    return(p)
+  }

Two thresholds for serum cotinine:

+
NYC_HANES.secondhand.original <- subset_samples(NYC_HANES, smokingstatus %in% c("Secondhand"))
+NYC_HANES.secondhand.strict <- subset_samples(NYC_HANES.secondhand.original, COTININE < 10)
+sort(sample_data(NYC_HANES.secondhand.original)$COTININE)
##  [1]  1.0022  1.0394  1.0418  1.0715  1.0915  1.2015  1.2282  1.2718  1.3822
 ## [10]  1.3822  1.4094  1.5315  1.6918  2.0094  2.5994  2.6759  2.8020  2.9394
 ## [19]  2.9720  3.0459  3.1082  3.1722  3.4715  3.8394  4.3418  4.5294  4.5394
 ## [28]  4.7522  5.7220  6.3118  7.1315  7.7382  9.1118  9.6415 10.2922 10.6920
 ## [37] 10.7915 13.7894
+
summary(sample_data(NYC_HANES.secondhand.original)$COTININE > 10)
##    Mode   FALSE    TRUE 
 ## logical      34       4

Number of OTUs with FDR < 0.05 and total that passed non-specific screens in both analysis:

+
summary(edger.smoker_secondhand_crude$FDR_smoking < 0.05)
##    Mode   FALSE    TRUE 
 ## logical      93      28
+
# note that the different total number for strict cotinine cutoff is 
+# due to non-specific filtering in edgeR
+summary(edger.smoker_secondhand_crude.strict$FDR_smoking < 0.05) 
##    Mode   FALSE    TRUE 
 ## logical      87      27
+
corarray <- array(dim=c(2, 2, 2), dimnames=list(secondhand=c("cot14", "cot10"),
+                                                counfounding=c("crude", "adjusted"),
+                                                FDRcutoff=c("no", "yes")))
+parray <- array(dim=c(2, 2, 2), dimnames=list(secondhand=c("cot14", "cot10"),
+                                                counfounding=c("crude", "adjusted"),
+                                                FDRcutoff=c("no", "yes")))

Correlation between crude smoking and cotinine coefficients, all OTUs:

+
corr <- with(edger.smoker_secondhand_crude, 
+     cor.test(logFC_smoking, logFC_cotinine))
+parray["cot14", "crude", "no"] <- corr$p.value
+corarray["cot14", "crude", "no"] <- corr$estimate

Strict definition of second-hand smoke:

+
corr <- with(edger.smoker_secondhand_crude.strict, 
+     cor.test(logFC_smoking, logFC_cotinine))
+parray["cot10", "crude", "no"] <- corr$p.value
+corarray["cot10", "crude", "no"] <- corr$estimate

Correlation between crude smoking and cotinine coefficients, FDR < 0.05 in smoking:

-

All OTUs, black for FDR < 0.05 on smoking coefficients, grey for FDR >= 0.05 -

-

Repeat, using the strict definition of second-hand smokers (cotinine < 10) -

+
corr <- with(filter(edger.smoker_secondhand_crude, FDR_smoking < 0.05),
+  cor.test(logFC_smoking, logFC_cotinine))
+corarray["cot10", "crude", "yes"] <- corr$p.value
+parray["cot10", "crude", "yes"] <- corr$estimate
+

All OTUs, black for FDR < 0.05 on smoking coefficients, grey for FDR >= 0.05

+
p <-
+  plotSmokingcotinineComparison(dasmoking.crude,
+                                dacot.crude,
+                                labtext = "Crude",
+                                FDR = 1)
+p
+

+

Repeat, using the strict definition of second-hand smokers (cotinine < 10)

+
p <-
+  plotSmokingcotinineComparison(dasmoking.crude,
+                                dacot.crude.strict,
+                                labtext = "Crude",
+                                FDR = 1)
+p
+

6.3.4 Adjusted Secondhand vs Adjusted Cigarette/Never: only significant OTUs

Number of differentially abundant OTUs, and total

+
nrow(edger.smoker_secondhand_adjusted)
## [1] 121
+
summary(edger.smoker_secondhand_adjusted$logFC_smoking < 0.05)
##    Mode   FALSE    TRUE 
 ## logical      63      58
+
summary(edger.smoker_secondhand_adjusted$logFC_cotinine < 0.05)
##    Mode   FALSE    TRUE 
 ## logical      38      83

Correlation between adjusted smoking and cotinine coefficients, all OTUs:

+
corr <- with(edger.smoker_secondhand_adjusted, 
+     cor.test(logFC_smoking, logFC_cotinine))
+parray["cot14", "adjusted", "no"] <- corr$p.value
+corarray["cot14", "adjusted", "no"] <- corr$estimate

As above, using strict serum cotinine cutoff (<10)

+
corr <- with(edger.smoker_secondhand_adjusted.strict, 
+     cor.test(logFC_smoking, logFC_cotinine))
+parray["cot10", "adjusted", "no"] <- corr$p.value
+corarray["cot10", "adjusted", "no"] <- corr$estimate

Correlation between adjusted smoking and cotinine coefficients, FDR < 0.05 in smoking:

+
with(filter(edger.smoker_secondhand_adjusted, FDR_smoking < 0.05),
+  cor.test(logFC_smoking, logFC_cotinine))
## 
 ##  Pearson's product-moment correlation
 ## 
@@ -1438,7 +1569,10 @@ 

6.3.4 Adjusted Secondhand vs Adju ## sample estimates: ## cor ## 0.6789709

+
parray["cot14", "adjusted", "yes"] <- corr$p.value
+corarray["cot14", "adjusted", "yes"] <- corr$estimate

Sensitivity analysis:

+
round(corarray, 2)
## , , FDRcutoff = no
 ## 
 ##           counfounding
@@ -1452,6 +1586,7 @@ 

6.3.4 Adjusted Secondhand vs Adju ## secondhand crude adjusted ## cot14 NA 0.34 ## cot10 0 NA

+
signif(parray, 1)
## , , FDRcutoff = no
 ## 
 ##           counfounding
@@ -1465,12 +1600,36 @@ 

6.3.4 Adjusted Secondhand vs Adju ## secondhand crude adjusted ## cot14 NA 3e-04 ## cot10 0.6 NA

+
p <-
+  plotSmokingcotinineComparison(
+    obj.smoking = dasmoking.adjusted,
+    obj.cotinine = dacot.adjusted,
+    labtext = "Adjusted",
+    FDRcutoff = 1
+  )
+p

-

Repeat, using the strict definition of second-hand smokers (cotinine < 10) -

+

Repeat, using the strict definition of second-hand smokers (cotinine < 10)

+
p <-
+  plotSmokingcotinineComparison(
+    obj.smoking = dasmoking.adjusted,
+    obj.cotinine = dacot.adjusted.strict,
+    labtext = "Adjusted",
+    FDRcutoff = 1
+  )
+p
+

6.3.5 Write supplemental file

+
stopifnot(all.equal(rownames(edger.smoker_secondhand_adjusted), 
+                    rownames(edger.smoker_secondhand_crude)))
+crude <- edger.smoker_secondhand_crude[, 1:4]
+adjusted <- edger.smoker_secondhand_adjusted[, 1:4]
+colnames(crude) <- paste0(colnames(crude), "_crude")
+colnames(adjusted) <- paste0(colnames(adjusted), "_adjusted")
+sfile <- cbind(edger.smoker_secondhand_crude[, -1:-4], crude, adjusted)
+readr::write_excel_csv(sfile, path="SupplementalFile1.csv")

6.3.6 Alternative smokers vs Never smoker: crude

@@ -1701,6 +1860,7 @@

7.7.2 ORA

## <character> <numeric> <numeric> <numeric> ## 1 fana 10 1 0.323 ## 2 anae 21 0 1.000 +
sessionInfo()
## R version 4.3.0 (2023-04-21)
 ## Platform: x86_64-pc-linux-gnu (64-bit)
 ## Running under: Pop!_OS 22.04 LTS
diff --git a/html_docs/Tobacco exposure associated with oral microbiome anaerobiosis in the New York City Health and Nutrition Examination Study (NYC HANES) II.pdf b/html_docs/Tobacco exposure associated with oral microbiome anaerobiosis in the New York City Health and Nutrition Examination Study (NYC HANES) II.pdf
index bdbf1f7..55774af 100644
Binary files a/html_docs/Tobacco exposure associated with oral microbiome anaerobiosis in the New York City Health and Nutrition Examination Study (NYC HANES) II.pdf and b/html_docs/Tobacco exposure associated with oral microbiome anaerobiosis in the New York City Health and Nutrition Examination Study (NYC HANES) II.pdf differ