Using ggpicrust2

Introduction

ggpicrust2 provides a practical workflow for PICRUSt2 downstream analysis:

convert KO profiles to pathway-level abundance when needed
run differential abundance analysis with multiple methods
annotate and visualize pathway-level results
inspect taxa-level contribution using PICRUSt2 per-sequence outputs
perform GSEA when pathway-set analysis is more appropriate than single-feature testing

This vignette focuses on the general package workflow. For a deeper GSEA walkthrough, see the dedicated gsea_analysis vignette.

Installation

# install.packages("devtools")
devtools::install_github("cafferychen777/ggpicrust2")

library(ggpicrust2)
library(tibble)

One-command workflow

If you want a fast end-to-end run from abundance data to annotated differential abundance output, start with ggpicrust2():

data("ko_abundance")
data("metadata")

results <- ggpicrust2(
  data = ko_abundance,
  metadata = metadata,
  group = "Environment",
  pathway = "KO",
  daa_method = "LinDA",
  ko_to_kegg = TRUE,
  order = "pathway_class",
  p_values_bar = TRUE,
  x_lab = "pathway_name"
)

# Access the main outputs
results[[1]]$plot
head(results[[1]]$results)

Use this route when you want a fast default analysis. Use the stepwise workflow below when you need more control over data preparation or visualization.

Stepwise pathway workflow

Convert KO abundance to KEGG pathway abundance

kegg_pathway_abundance <- ko2kegg_abundance(data = ko_abundance)
head(kegg_pathway_abundance[, 1:3])

Run differential abundance analysis

daa_results <- pathway_daa(
  abundance = kegg_pathway_abundance,
  metadata = metadata,
  group = "Environment",
  daa_method = "ALDEx2"
)

head(daa_results)

Annotate pathway results

annotated_daa <- pathway_annotation(
  pathway = "KO",
  daa_results_df = daa_results,
  ko_to_kegg = TRUE
)

head(annotated_daa)

Visualize pathway-level results

pathway_errorbar(
  abundance = kegg_pathway_abundance,
  daa_results_df = annotated_daa,
  Group = "Environment"
)

sig_pathways <- annotated_daa$feature[annotated_daa$p_adjust < 0.05]

if (length(sig_pathways) > 0) {
  pathway_heatmap(
    abundance = kegg_pathway_abundance[sig_pathways, , drop = FALSE],
    metadata = metadata,
    group = "Environment"
  )
}

pathway_pca(
  abundance = kegg_pathway_abundance,
  metadata = metadata,
  group = "Environment"
)

Taxa contribution workflow

PICRUSt2 can output contribution files that explain which taxa are driving predicted functional shifts. ggpicrust2 supports both gene-family-level and pathway-level contribution workflows.

Read PICRUSt2 contribution files

# For pred_metagenome_contrib.tsv
contrib_data <- read_contrib_file("pred_metagenome_contrib.tsv")

# For path_abun_contrib.tsv.gz; PICRUSt2 pathway output commonly uses MetaCyc IDs.
path_contrib_data <- read_pathway_contrib_file("path_abun_contrib.tsv.gz")

# For pred_metagenome_strat.tsv
strat_data <- read_strat_file("pred_metagenome_strat.tsv")

Aggregate to a taxonomic level

taxa_contrib <- aggregate_taxa_contributions(
  contrib_data = contrib_data,
  taxonomy = your_taxonomy_table,
  tax_level = "Genus",
  top_n = 10,
  daa_results_df = daa_results
)

head(taxa_contrib)

aggregate_taxa_contributions() accepts either:

normalized contribution data from read_contrib_file()
pathway-level contribution data from read_pathway_contrib_file()
stratified abundance data from read_strat_file()

Use daa_results_df or pathway_ids when you want to focus only on pathways that were significant in your pathway-level analysis.

When you already have pathway-level contribution data and do not want to run differential abundance analysis, omit daa_results_df and aggregate directly. For PICRUSt2 pathway contribution output, use matching MetaCyc annotations rather than KEGG pathway annotations.

path_taxa_contrib <- aggregate_taxa_contributions(
  contrib_data = path_contrib_data,
  taxonomy = your_taxonomy_table,
  tax_level = "Genus",
  top_n = 10
)

pathway_annotation_df <- pathway_annotation(
  data = data.frame(function_id = unique(path_taxa_contrib$function_id)),
  pathway = "MetaCyc"
)

Visualize taxa-level drivers

taxa_contribution_bar(
  contrib_agg = taxa_contrib,
  metadata = metadata,
  group = "Environment",
  facet_by = "function"
)

taxa_contribution_heatmap(
  contrib_agg = taxa_contrib,
  n_functions = 20
)

This step is useful when pathway-level significance is not enough and you need to identify which taxa are contributing to the change.

GSEA workflow

Use GSEA when you want pathway-set level inference from KO or EC abundance rather than testing each pathway independently.

gsea_results <- pathway_gsea(
  abundance = ko_abundance %>% column_to_rownames("#NAME"),
  metadata = metadata,
  group = "Environment",
  pathway_type = "KEGG",
  method = "camera"
)

annotated_gsea <- gsea_pathway_annotation(
  gsea_results = gsea_results,
  pathway_type = "KEGG"
)

visualize_gsea(
  gsea_results = annotated_gsea,
  plot_type = "barplot",
  n_pathways = 15
)

For a method-by-method GSEA explanation, covariate adjustment, and comparison with DAA, see the gsea_analysis vignette.