1
1
from base64 import encodestring
2
2
from datetime import datetime
3
+ import logging
3
4
import mimetypes
4
5
import re
5
6
import urllib
37
38
38
39
# regex used to find links in an article
39
40
LINK_RE = re .compile ('<a.*?href="(.*?)".*?>(.*?)</a>' , re .I | re .M )
40
- TITLE_RE = re .compile ('<title>(.*?)</title>' , re .I | re .M )
41
+ TITLE_RE = re .compile ('<title.*? >(.*?)</title>' , re .I | re .M )
41
42
TAG_RE = re .compile ('[^a-z0-9\-_\+\:\.]?' , re .I )
42
43
44
+ log = logging .getLogger (__file__ )
45
+
43
46
def get_name (user ):
44
47
"""
45
48
Provides a way to fall back to a user's username if their full name has not
46
49
been entered.
47
50
"""
51
+
48
52
key = 'username_for_%s' % user .id
53
+
54
+ log .debug ('Looking for "%s" in cache (%s)' % (key , user ))
49
55
name = cache .get (key )
50
56
if not name :
57
+ log .debug ('Name not found' )
58
+
51
59
if len (user .get_full_name ().strip ()):
60
+ log .debug ('Using full name' )
52
61
name = user .get_full_name ()
53
62
else :
63
+ log .debug ('Using username' )
54
64
name = user .username
65
+
66
+ log .debug ('Caching %s as "%s" for a while' % (key , name ))
55
67
cache .set (key , name , 86400 )
56
68
57
69
return name
@@ -70,11 +82,15 @@ def clean_tag(name):
70
82
71
83
name = name .replace (' ' , '-' ).encode ('ascii' , 'ignore' )
72
84
name = TAG_RE .sub ('' , name )
73
- return name .lower ().strip ()
85
+ clean = name .lower ().strip ()
86
+
87
+ log .debug ('Cleaned tag "%s" to "%s"' % (name , clean ))
88
+ return clean
74
89
75
90
def save (self , * args , ** kwargs ):
76
91
"""Cleans up any characters I don't want in a URL"""
77
92
93
+ log .debug ('Ensuring that tag "%s" has a slug' % (self ,))
78
94
self .slug = Tag .clean_tag (self .name )
79
95
super (Tag , self ).save (* args , ** kwargs )
80
96
@@ -96,6 +112,7 @@ class Meta:
96
112
ordering = ('name' ,)
97
113
98
114
class ArticleStatusManager (models .Manager ):
115
+
99
116
def default (self ):
100
117
default = self .all ()[:1 ]
101
118
@@ -122,10 +139,11 @@ def __unicode__(self):
122
139
return self .name
123
140
124
141
class ArticleManager (models .Manager ):
142
+
125
143
def active (self ):
126
144
"""
127
- Retrieves all active articles which have been published and have not yet
128
- expired.
145
+ Retrieves all active articles which have been published and have not
146
+ yet expired.
129
147
"""
130
148
now = datetime .now ()
131
149
return self .get_query_set ().filter (
@@ -185,9 +203,7 @@ class Article(models.Model):
185
203
objects = ArticleManager ()
186
204
187
205
def __init__ (self , * args , ** kwargs ):
188
- """
189
- Make sure that we have some rendered content to use.
190
- """
206
+ """Makes sure that we have some rendered content to use"""
191
207
192
208
super (Article , self ).__init__ (* args , ** kwargs )
193
209
@@ -208,9 +224,8 @@ def __unicode__(self):
208
224
return self .title
209
225
210
226
def save (self , * args , ** kwargs ):
211
- """
212
- Renders the article using the appropriate markup language.
213
- """
227
+ """Renders the article using the appropriate markup language."""
228
+
214
229
using = kwargs .get ('using' , DEFAULT_DB )
215
230
216
231
self .do_render_markup ()
@@ -372,51 +387,48 @@ def _get_article_links(self):
372
387
used as the title. Once a title is determined, it is cached for a week
373
388
before it will be requested again.
374
389
"""
375
- links = {}
376
- keys = []
390
+
391
+ links = []
377
392
378
393
# find all links in the article
394
+ log .debug ('Locating links in article: %s' % (self ,))
379
395
for link in LINK_RE .finditer (self .rendered_content ):
380
396
url = link .group (1 )
397
+ log .debug ('Do we have a title for "%s"?' % (url ,))
381
398
key = 'href_title_' + encodestring (url ).strip ()
382
399
383
400
# look in the cache for the link target's title
384
- if not cache .get (key ):
401
+ title = cache .get (key )
402
+ if title is None :
403
+ log .debug ('Nope... Getting it and caching it.' )
385
404
title = link .group (2 )
386
405
387
406
if LOOKUP_LINK_TITLE :
388
407
try :
408
+ log .debug ('Looking up title for URL: %s' % (url ,))
389
409
# open the URL
390
410
c = urllib .urlopen (url )
391
411
html = c .read ()
392
412
c .close ()
393
413
394
414
# try to determine the title of the target
395
- title = TITLE_RE .search (html )
396
- if title : title = title .group (1 )
415
+ title_m = TITLE_RE .search (html )
416
+ if title_m :
417
+ title = title_m .group (1 )
418
+ log .debug ('Found title: %s' % (title ,))
397
419
except :
398
420
# if anything goes wrong (ie IOError), use the link's text
399
- pass
421
+ log . warn ( 'Failed to retrieve the title for "%s"; using link text "%s"' % ( url , title ))
400
422
401
423
# cache the page title for a week
424
+ log .debug ('Using "%s" as title for "%s"' % (title , url ))
402
425
cache .set (key , title , 604800 )
403
426
404
- # get the link target's title from cache
405
- val = cache .get (key )
406
- if val :
407
- # add it to the list of links and titles
408
- links [url ] = val
409
-
410
- # don't duplicate links to the same page
411
- if url not in keys : keys .append (url )
412
-
413
- # now go thru and sort the links according to where they appear in the
414
- # article
415
- sorted = []
416
- for key in keys :
417
- sorted .append ((key , links [key ]))
427
+ # add it to the list of links and titles
428
+ if url not in (l [0 ] for l in links ):
429
+ links .append ((url , title ))
418
430
419
- return tuple (sorted )
431
+ return tuple (links )
420
432
links = property (_get_article_links )
421
433
422
434
def _get_word_count (self ):
0 commit comments