import magicrequests as requests
import time
import string
import collections
import json
url = 'http://www.warriorforum.com/warrior-special-offers-forum/'
views_counter = collections.Counter()
posts_counter = collections.Counter()
while True:
print url
try:
r = requests.get(url)
except:
continue
counts = [int(c.replace(',', '')) for c in r.xpath('//tr/td[5]/text()') if not any(char in c for char in string.lowercase)]
posters = r.xpath('//tr/td[2]/div[2]/span/text()')
views_counter.update(dict(zip(posters, counts)))
posts_counter.update(posters)
next = r.xpath('//a[@rel="next"]/@href')
if len(next):
url = next[0]
time.sleep(5)
else:
break
print 'most common', views_counter.most_common(10)
json.dump(views_counter, open('wafo-views.json', 'w'))
json.dump(posts_counter, open('wafo-posts.json', 'w'))