# robots.txt for http://www.GoAbroad.com/ # XML Sitemaps Autodiscovery for Google, Yahoo, MSN & Ask.com sitemap: http://www.goabroad.com/goabroad_com_www_sitemap.xml.gz ############################################################################ # Blocked Robots had been adapted from http://www.WikiPedia.com/robots.txt. # Ismael Angelo A. Casimpan Jr. - April 17, 2006 ############################################################################ # Some bots are known to be trouble, particularly those designed to copy # entire sites. Please obey robots.txt. User-agent: spider Disallow: / User-agent: bot- Disallow: / User-agent: bot/ Disallow: / User-agent: LinkChecker Disallow: / User-agent: Microsoft URL Control Disallow: / User-agent: IRLbot Disallow: / User-agent: MJ12bot Disallow: / User-agent: Java Disallow: / User-agent: nicebot Disallow: / User-agent: Nutch Disallow: / User-agent: Python-urllib Disallow: / User-agent: Powermarks Disallow: / User-agent: Missigua_Locator Disallow: / User-agent: Web Downloader Disallow: / User-agent: lanshanbot Disallow: / User-agent: Custo Disallow: / User-agent: CFNetwork Disallow: / User-agent: HTTrack off-line browser Disallow: / User-agent: NutchCVS Disallow: / User-agent: T-H-U-N-D-E-R-S-T-O-N-E Disallow: / User-agent: Jakarta commons-httpclient Disallow: / User-agent: HTMLParser Disallow: / User-agent: crawl Disallow: / User-agent: sitecheck.internetseer.com Disallow: / User-agent: Zealbot Disallow: / User-agent: MSIECrawler Disallow: / User-agent: SiteSnagger Disallow: / User-agent: WebStripper Disallow: / User-agent: WebCopier Disallow: / User-agent: Fetch Disallow: / User-agent: Offline Explorer Disallow: / User-agent: Teleport Disallow: / User-agent: TeleportPro Disallow: / User-agent: WebZIP Disallow: / User-agent: linko Disallow: / User-agent: HTTrack Disallow: / User-agent: Microsoft.URL.Control Disallow: / User-agent: larbin Disallow: / User-agent: ZyBORG Disallow: / User-agent: Download Ninja Disallow: / # # Doesn't follow robots.txt anyway, but... # User-agent: k2spider Disallow: / # A capture bot, downloads gazillions of pages with no public benefit # http://www.webreaper.net/ User-agent: WebReaper Disallow: / # # Hits many times per second, not acceptable # http://www.nameprotect.com/botinfo.html User-agent: NPBot Disallow: / ######################################################################## # All other respectful bots use the block below: # Ismael Angelo A. Casimpan Jr. - April 17, 2006 ######################################################################## User-agent: * Disallow: /mailform.cfm Disallow: /tracker.cfm Disallow: /fbounce.cfm Disallow: /cbounce.cfm Disallow: /sbounce.cfm Disallow: /ctbounce.cfm Disallow: /hitcounter.cfm Disallow: /activedit/ Disallow: /linktracker.cfm Disallow: /_bounce.cfm Disallow: /bou_nce.cfm Disallow: /bounce.cfm Disallow: /bounce_.cfm Disallow: /test_rss/ Disallow: /guides/ Disallow: /_bounce_.cfm Disallow: /sendpagep3.cfm Disallow: /cf_act_Newsletter.cfm Disallow: /sendnewsletter.cfm Disallow: /listingsp3.cfm Disallow: /show-photo.cfm #Disallow: /y_key_93fe76e65261a629.html # Disallow: /images/ Disallow: /client/ Disallow: /currency/ Disallow: /currency2.cfm Disallow: /currency.cfm Disallow: /inthenews/framebottom-frommers-review.html Disallow: /inthenews/framebottom-getthatgig.html Disallow: /abounce.cfm Disallow: /sendmail.cfm Disallow: /ads/ Disallow: /add_resume/ Disallow: /add_resume_login/ Disallow: /studentid/ Disallow: /login/newaccount.cfm? Disallow: /redirect.cfm Disallow: /listingsp3.cfm Disallow: /guides/guidespage.cfm Disallow: /cf_act_removeemail.cfm Disallow: /sendconfirmation.cfm Disallow: /sendpagep3.cfm Disallow: /frametop-advertise.html Disallow: /login/edit.cfm Disallow: /sendnewsletter.cfm Disallow: /clientstatuspopup.cfm Disallow: /captchainfo.cfm Disallow: /newsletterlink.cfm Disallow: /newsletterfeature.cfm Disallow: /studyabroadprogram.cfm Disallow: /volunteerabroadprogram.cfm Disallow: /teachabroadprogram.cfm Disallow: /jobsabroadprogram.cfm Disallow: /otherprogram.cfm ######################################################################## # Trap for Spiderts(disrespectful bots/spiders) - icasimpan Apr 17,2006 ######################################################################## Disallow: /ADIS/