Complete reference for RatCrawler's APIs
Detailed documentation for Python implementation classes and methods
Main crawler class that combines web content crawling with backlink analysis.
comprehensive_crawl(seed_urls: List[str]) -> Dict
Execute comprehensive crawling and analysis using a priority queue frontier.
seed_urls
- List of starting URLs to crawl
crawl_page_content(url: str) -> Optional[Dict]
Crawl a single page and extract comprehensive content data.
url
- URL to crawlexport_results(results: Dict)
Export crawl results to JSON and CSV files.
results
- Results dictionary from
comprehensive_crawl
Handles backlink discovery, analysis, and PageRank calculations.
crawl_backlinks(seed_urls: List[str], max_depth: int =
2)
Crawl and discover backlinks starting from seed URLs.
seed_urls
- Starting URLs for backlink
discovery
max_depth
- Maximum depth for crawling
(default: 2)
calculate_pagerank(damping_factor: float = 0.85) ->
Dict
Calculate PageRank scores for all discovered pages.
damping_factor
- PageRank damping factor
(default: 0.85)
calculate_domain_authority()
Calculate domain authority scores based on backlink profiles.
Detailed documentation for Rust implementation structs and methods
High-performance async web crawler with concurrent processing.
async fn crawl(&mut self, seed_urls: Vec<String>,
database: &mut WebsiteCrawlerDatabase) ->
Result<CrawlResult, CrawlError>
Execute comprehensive web crawling with priority-based frontier.
seed_urls
- Vector of starting URLsdatabase
- Mutable reference to database
instance
async fn crawl_single_page(&self, url: &str, depth: usize) ->
Result<CrawledPage, CrawlError>
Crawl a single page and extract structured data.
url
- URL to crawldepth
- Current crawl depthfn extract_urls(&self, html: &str, base_url: &str) ->
Vec<String>
Extract all URLs from HTML content.
html
- HTML content to parsebase_url
- Base URL for resolving relative
links
Async backlink analysis with parallel processing capabilities.
async fn analyze_backlinks(&self, url: &str) ->
Result<BacklinkAnalysis, BacklinkError>
Analyze backlinks for a given URL with comprehensive metrics.
url
- Target URL for backlink analysisSQLite database structure and table definitions
Tracks crawling sessions and their metadata
Stores crawled page content and metadata
Stores discovered backlink relationships
create_crawl_session()
store_crawled_page()
store_backlinks()
get_all_crawled_urls()
get_crawl_summary()
store_backlinks()
get_backlinks_for_url()
store_domain_scores()
get_domain_authority()
Practical examples for using RatCrawler APIs
from crawler import EnhancedProductionCrawler
# Initialize crawler
config = {
'delay': 1.5,
'max_depth': 3,
'max_pages': 100,
'db_path': 'website_crawler.db'
}
crawler = EnhancedProductionCrawler(config)
# Start crawling
seed_urls = ['https://example.com']
results = crawler.comprehensive_crawl(seed_urls)
print(f"Crawled {results['pages_crawled']} pages")
print(f"Found {results['backlinks_found']} backlinks")
from backlinkprocessor import BacklinkProcessor
# Initialize processor
processor = BacklinkProcessor(delay=1.0)
# Discover backlinks
seed_urls = ['https://example.com']
processor.crawl_backlinks(seed_urls, max_depth=2)
# Calculate metrics
pagerank_scores = processor.calculate_pagerank()
processor.calculate_domain_authority()
print(f"Analyzed {len(processor.backlinks)} backlinks")
use ratcrawler::*;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = CrawlConfig {
user_agent: "MyBot/1.0".to_string(),
timeout_secs: 30,
max_pages: 100,
..Default::default()
};
let mut crawler = WebsiteCrawler::new(&config);
let mut database = WebsiteCrawlerDatabase::new("crawl.db")?;
let seed_urls = vec!["https://example.com".to_string()];
let result = crawler.crawl(seed_urls, &mut database).await?;
println!("Crawled {} pages", result.pages_crawled);
Ok(())
}
use ratcrawler::*;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let processor = BacklinkProcessor::new(
"MyBot/1.0".to_string(),
60, // timeout
5 // max redirects
);
let database = BacklinkDatabase::new("backlinks.db")?;
let mut analyzer = BacklinkAnalyzer::new(processor, database);
let analysis = analyzer.analyze_backlinks("https://example.com").await?;
println!("Found {} backlinks", analysis.total_backlinks);
Ok(())
}