Skip to contents

Perform a complete workflow: search databases, analyze results, generate reports.

Usage

complete_search_workflow(
  search_terms,
  databases = "pubmed",
  gold_standard = NULL,
  max_results = 100,
  date_range = NULL,
  output_dir = NULL
)

Arguments

search_terms

Character vector of search terms

databases

Vector of databases to search

gold_standard

Optional vector of known relevant article IDs

max_results

Maximum results to retrieve

date_range

Optional date range for search

output_dir

Directory for reports (uses tempdir() by default)

Value

List containing search results, analysis, and report paths

Examples

# \donttest{
# Complete workflow
results <- complete_search_workflow(
  search_terms = "diabetes treatment clinical trial",
  databases = "pubmed",
  max_results = 50,
  date_range = c("2022/01/01", "2023/12/31")
)
#> === Starting Complete Search and Analysis Workflow ===\n\nStep 1: Searching databases...\n\n=== Searching PUBMED ===\n[OK] Connected to PubMed successfully\nSearching PubMed with query: diabetes treatment clinical trial AND ("2022/01/01"[Date - Publication] : "2023/12/31"[Date - Publication]) \nFound 50 results\nRetrieving detailed information...\nRetrieving batch 1 of 1 \nSearch completed successfully!\n\n=== Search Summary ===\nTotal results: 10 \nDatabases searched: pubmed \n\nStep 2: Processing and deduplicating results...\nOriginal results: 10 \nAfter deduplication: 10 \n\nStep 3: Analyzing search performance...\n
#> Warning: No gold standard provided - cannot calculate precision/recall
#> \nStep 4: Generating visualizations...\n\nStep 5: Exporting results...\n\nStep 6: Generating reports...\n\n=== Workflow Complete ===\nResults exported to: /tmp/RtmphHt80i \nTotal articles found: 10 \nAfter deduplication: 10 \n

# View summary
print(results$summary)
#> $search_terms
#> [1] "diabetes treatment clinical trial"
#> 
#> $databases_searched
#> [1] "pubmed"
#> 
#> $total_found
#> [1] 10
#> 
#> $after_deduplication
#> [1] 10
#> 
#> $duplicates_removed
#> [1] 0
#> 
#> $date_range
#> [1] "2022/01/01" "2023/12/31"
#> 
#> $search_date
#> [1] "2025-11-03 15:38:10 UTC"
#> 
#> $has_gold_standard
#> [1] FALSE
#> 
#> $output_directory
#> [1] "/tmp/RtmphHt80i"
#> 

# Access detailed metrics
print(results$analysis$metrics)
#> $basic
#> $basic$total_records
#> [1] 10
#> 
#> $basic$unique_records
#> [1] 10
#> 
#> $basic$duplicates
#> [1] 0
#> 
#> $basic$date_range
#> [1] "2023-02-17" "2025-09-13"
#> 
#> $basic$sources
#> [1] 1
#> 
#> 
#> $precision_recall
#> $precision_recall$precision
#> [1] NA
#> 
#> $precision_recall$recall
#> [1] NA
#> 
#> $precision_recall$f1_score
#> [1] NA
#> 
#> $precision_recall$true_positives
#> [1] NA
#> 
#> $precision_recall$false_positives
#> [1] NA
#> 
#> $precision_recall$false_negatives
#> [1] NA
#> 
#> $precision_recall$number_needed_to_read
#> [1] NA
#> 
#> 
#> $efficiency
#> $efficiency$efficiency_score
#> [1] NA
#> 
#> 
#> $coverage
#> $coverage$total_coverage
#> [1] NA
#> 
#> 
#> $temporal
#> $temporal$date_range
#> [1] "2023-02-17" "2025-09-13"
#> 
#> $temporal$year_distribution
#> $temporal$year_distribution$`2023`
#> [1] 3
#> 
#> $temporal$year_distribution$`2024`
#> [1] 2
#> 
#> $temporal$year_distribution$`2025`
#> [1] 5
#> 
#> 
#> $temporal$temporal_span
#> Time difference of 939 days
#> 
#> 
#> attr(,"class")
#> [1] "search_metrics"
# }