# robots.txt # Global settings for well-behaved bots User-agent: * Disallow: Crawl-delay: 60 # Block internal search parameters and faceted navigation User-agent: * Disallow: *?*s=* Disallow: *?*search=* Disallow: *?*query=* Disallow: *?*sort=* Disallow: *?*filter=* Disallow: *?*price=* Disallow: *?*color=* Disallow: *?*size=* # Block action URLs User-agent: * Disallow: *?*add=* Disallow: *?*add_to_cart=* Disallow: *?*add_to_wishlist=* # Block PDF files User-agent: * Disallow: /*.pdf$ User-agent: ia_archiver Disallow: / Noindex: / Archive-Control-Allow: never User-agent: archive.org_bot Disallow: / Noindex: / Archive-Control-Allow: never User-agent: archive.is_bot Disallow: / Noindex: / Archive-Control-Allow: never Crawl-delay: 120 User-agent: Wayback Disallow: / Noindex: / Archive-Control-Allow: never Crawl-delay: 120 User-agent: Wayback Machine Disallow: / Noindex: / Archive-Control-Allow: never Crawl-delay: 120 User-agent: waybackarchive.org Disallow: / Noindex: / Archive-Control-Allow: never Crawl-delay: 120 # Block additional archive services User-agent: Webarchive.NL Disallow: / Noindex: / Archive-Control-Allow: never Crawl-delay: 120 User-agent: mementoweb.org Disallow: / Noindex: / Archive-Control-Allow: never Crawl-delay: 120 User-agent: web.archive.org Disallow: / Noindex: / Archive-Control-Allow: never Crawl-delay: 120 # Block AI chatbots and training User-agent: GPTBot Disallow: / Noindex: / User-agent: Claude-Web Disallow: / Noindex: / User-agent: ClaudeBot Disallow: / Noindex: / User-agent: anthropic-ai Disallow: / Noindex: / User-agent: cohere-ai Disallow: / Noindex: / User-agent: Bytespider Disallow: / Noindex: / User-agent: Google-Extended Disallow: / Noindex: / User-agent: PerplexityBot Disallow: / Noindex: / User-agent: Applebot-Extended Disallow: / Noindex: / User-agent: Diffbot Disallow: / Noindex: / # Block scrapers User-agent: Scrapy Disallow: / Noindex: / Crawl-delay: 120 User-agent: magpie-crawler Disallow: / Noindex: / Crawl-delay: 120 User-agent: CCBot Disallow: / Noindex: / Crawl-delay: 120 User-agent: omgili Disallow: / Noindex: / Crawl-delay: 120 User-agent: omgilibot Disallow: / Noindex: / Crawl-delay: 120 User-agent: Node/simplecrawler Disallow: / Noindex: / Crawl-delay: 120 User-agent: AhrefsBot Crawl-delay: 100 Disallow: /assets/ User-agent: SemrushBot Crawl-delay: 100 Disallow: /assets/ # Standard protected directories Disallow: /cgi-bin/ Disallow: /scripts/ Disallow: /tmp/ Disallow: /BK Disallow: backup_index