Skip to content

Commit

Permalink
updated to support newforms-admin better, fixed some minor bugs, clea…
Browse files Browse the repository at this point in the history
…ned up the help strings

git-svn-id: https://django-robots.googlecode.com/svn/trunk@12 12edf5ea-513a-0410-8a8c-37067077e60f

committer: leidel <leidel@12edf5ea-513a-0410-8a8c-37067077e60f>

--HG--
extra : convert_revision : 78c5bb1
  • Loading branch information
jezdez committed Jul 24, 2008
1 parent 719018c commit 590f4b6
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 61 deletions.
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright (c) 2007, Jannis Leidel
Copyright (c) 2008, Jannis Leidel
All rights reserved.

Redistribution and use in source and binary forms, with or without
Expand Down
2 changes: 1 addition & 1 deletion robots/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION = (0, "3.2", None)
VERSION = (0, 4, None)
17 changes: 17 additions & 0 deletions robots/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from django.contrib import admin
from django.utils.translation import ugettext_lazy as _

from robots.models import Url, Rule

class RuleAdmin(admin.ModelAdmin):
fieldsets = (
(None, {'fields': ('robot', 'sites')}),
(_('URL patterns'), {'fields': ('allowed', 'disallowed')}),
(_('Advanced options'), {'classes': ('collapse',), 'fields': ('crawl_delay',)}),
)
list_filter = ('sites',)
list_display = ('robot', 'allowed_urls', 'disallowed_urls')
search_fields = ('robot','urls')

admin.site.register(Url)
admin.site.register(Rule, RuleAdmin)
97 changes: 46 additions & 51 deletions robots/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,27 @@
from django.contrib.sites.models import Site
from django.utils.translation import ugettext_lazy as _
from django.utils.text import get_text_list
from django.conf import settings
# for backward incompatible changes in 0.97/trunk
try:
from django.db.models import DecimalField as FloatField
except ImportError:
from django.db.models import FloatField

try:
from django.contrib.admin.views.doc import simplify_regex
except ImportError:
from django.contrib.admindocs.views import simplify_regex

class Url(models.Model):
"""
Defines a URL pattern for use with a robot exclusion rule. It's
case-sensitive and exact, e.g., "/admin" and "/admin/" are different URLs.
"""
pattern = models.CharField(_('pattern'), max_length=255, core=True, help_text=_("Case-sensitive. A missing trailing slash does also match to files which start with the name of the pattern, e.g., '/admin' matches /admin.html too. Some major search engines allow an asterisk (*) as a wildcard and a dollar sign ($) to match the end of the URL, e.g., '/*.jpg$'."))

pattern = models.CharField(_('pattern'), max_length=255, core=True,
help_text=_("Case-sensitive. A missing "
"trailing slash does also match "
"to files which start with the "
"name of the pattern, e.g., "
"'/admin' matches /admin.html "
"too. Some major search engines "
"allow an asterisk (*) as a "
"wildcard and a dollar sign ($) "
"to match the end of the URL, "
"e.g., '/*.jpg$'."))
class Meta:
verbose_name = _('url')
verbose_name_plural = _('url')

class Admin:
pass

def __unicode__(self):
return u"%s" % self.pattern

Expand All @@ -44,26 +39,47 @@ class Rule(models.Model):
disallows the robot identified by its user agent to access the given URLs.
The Site contrib app is used to enable multiple robots.txt per instance.
"""
robot = models.CharField(_('robot'), max_length=255, help_text=_("This should be a user agent string like 'Googlebot'. Enter an asterisk (*) for all user agents. For a full list look at the <a target=_blank href='http://www.robotstxt.org/wc/active/html/index.html'>database of Web Robots</a>."))
allowed = models.ManyToManyField(Url, blank=True, validator_list=[validators.RequiredIfOtherFieldNotGiven('disallowed')], related_name="allowed", help_text=_("These are URLs which are allowed to be accessed by web robots."))
disallowed = models.ManyToManyField(Url, blank=True, validator_list=[validators.RequiredIfOtherFieldNotGiven('allowed')], related_name="disallowed", help_text=_("These are URLs which are not allowed to be accessed by web robots."))
robot = models.CharField(_('robot'), max_length=255, help_text=_(
"This should be a user agent string like "
"'Googlebot'. Enter an asterisk (*) for all "
"user agents. For a full list look at the "
"<a target=_blank href='"
"http://www.robotstxt.org/db.html"
"'> database of Web Robots</a>."))

allowed = models.ManyToManyField(Url, blank=True, related_name="allowed",
validator_list=[validators.RequiredIfOtherFieldNotGiven('disallowed')],
help_text=_("The URLs which are allowed "
"to be accessed by bots."))

disallowed = models.ManyToManyField(Url, blank=True, related_name="disallowed",
validator_list=[validators.RequiredIfOtherFieldNotGiven('allowed')],
help_text=_("The URLs which are not "
"allowed to be accessed "
"by bots."))
sites = models.ManyToManyField(Site)
crawl_delay = FloatField(_('crawl delay'), blank=True, null=True, max_digits=3, decimal_places=1, help_text=("From 0.1 to 99.0. This field is supported by some search engines and defines the delay between successive crawler accesses in seconds. If the crawler rate is a problem for your server, you can set the delay up to 5 or 10 or a comfortable value for your server, but it's suggested to start with small values (0.5-1), and increase as needed to an acceptable value for your server. Larger delay values add more delay between successive crawl accesses and decrease the maximum crawl rate to your web server."))

crawl_delay = models.DecimalField(_('crawl delay'), blank=True, null=True,
max_digits=3, decimal_places=1, help_text=(
"Between 0.1 and 99.0. This field is "
"supported by some search engines and "
"defines the delay between successive "
"crawler accesses in seconds. If the "
"crawler rate is a problem for your "
"server, you can set the delay up to 5 "
"or 10 or a comfortable value for your "
"server, but it's suggested to start "
"with small values (0.5-1), and "
"increase as needed to an acceptable "
"value for your server. Larger delay "
"values add more delay between "
"successive crawl accesses and "
"decrease the maximum crawl rate to "
"your web server."))
class Meta:
verbose_name = _('rule')
verbose_name_plural = _('rules')

class Admin:
fields = (
(None, {'fields': ('robot', 'sites')}),
(_('URL patterns'), {'fields': ('allowed', 'disallowed')}),
(_('Advanced options'), {'classes': 'collapse', 'fields': ('crawl_delay',)}),
)
list_filter = ('sites',)
list_display = ('robot', 'allowed_urls', 'disallowed_urls')
search_fields = ('robot','urls')

def __unicode__(self):
return u"%s" % self.robot

Expand All @@ -74,24 +90,3 @@ def allowed_urls(self):
def disallowed_urls(self):
return get_text_list(list(self.disallowed.all()), _('and'))
disallowed_urls.short_description = _('disallowed')

try:
from django.contrib import admin

class UrlAdmin(admin.ModelAdmin):
pass

class RuleAdmin(admin.ModelAdmin):
fieldsets = (
(None, {'fields': ('robot', 'sites')}),
(_('URL patterns'), {'fields': ('allowed', 'disallowed')}),
(_('Advanced options'), {'classes': ('collapse',), 'fields': ('crawl_delay',)}),
)
list_filter = ('sites',)
list_display = ('robot', 'allowed_urls', 'disallowed_urls')
search_fields = ('robot','urls')

admin.site.register(Url, UrlAdmin)
admin.site.register(Rule, RuleAdmin)
except:
pass
5 changes: 3 additions & 2 deletions robots/urls.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.conf.urls.defaults import *
from robots.views import rules_list

urlpatterns = patterns('robots.views',
(r'^$', 'rules_list'),
urlpatterns = patterns('',
url(r'^$', rules_list, name='robots_rule_list'),
)
14 changes: 8 additions & 6 deletions robots/views.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
from django.contrib.sites.models import Site
from django.template import loader, RequestContext
from django.http import HttpResponse, Http404
from django.http import HttpResponse
from django.core.urlresolvers import reverse, NoReverseMatch
from django.conf import settings

from robots.models import Rule

CRAWL_DELAY = getattr(settings, 'ROBOTS_CRAWL_DELAY', False)
USE_SITEMAP = getattr(settings, 'ROBOTS_USE_SITEMAP', True)

def rules_list(request, template_name='robots/rule_list.html',
mimetype='text/plain', status_code=200):
"""
Returns a generated robots.txt file with correct mimetype (text/plain),
status code (200 or 404), sitemap url (automatically) and crawl delay
(if settings.ROBOTS_CRAWL_DELAY is given).
"""
protocol = request.is_secure() and 'https' or 'http'
scheme = request.is_secure() and 'https' or 'http'
current_site = Site.objects.get_current()
try:
sitemap_url = reverse('django.contrib.sitemaps.views.index')
Expand All @@ -22,16 +25,15 @@ def rules_list(request, template_name='robots/rule_list.html',
sitemap_url = reverse('django.contrib.sitemaps.views.sitemap')
except NoReverseMatch:
sitemap_url = None
use_sitemap = getattr(settings, 'ROBOTS_USE_SITEMAP', True)
if sitemap_url is not None and use_sitemap:
sitemap_url = "%s://%s%s" % (protocol, current_site.domain, sitemap_url)
if sitemap_url is not None and USE_SITEMAP:
sitemap_url = "%s://%s%s" % (scheme, current_site.domain, sitemap_url)
rules = Rule.objects.filter(sites=current_site)
if not rules.count():
status_code = 404
t = loader.get_template(template_name)
c = RequestContext(request, {
'rules': rules,
'sitemap_url': sitemap_url,
'crawl_delay': getattr(settings, 'ROBOTS_CRAWL_DELAY', False)
'crawl_delay': CRAWL_DELAY,
})
return HttpResponse(t.render(c), status=status_code, mimetype=mimetype)

0 comments on commit 590f4b6

Please sign in to comment.