第一个Python爬虫

2016/08/03

爬取快代理网站免费的IP代理地址

py代码

#coding=utf-8

import urllib2
import re #导入正则表达式模块

'''
http://www.kuaidaili.com/proxylist/1/
'''

def geturl():
	p = re.compile(r'<td data-title="IP">(.+?)</td>')

	for i in range(1,10):
		target = r"http://www.kuaidaili.com/proxylist/%d/"%i
		print target
		#打开目标网页
		req = urllib2.urlopen(target)
		#读取内容
		result = req.read()
		#匹配正则表达式
		matchs = p.findall(result)
		print matchs

geturl()

爬取内容

wayne@Z-Beatles:~/python$ python crawler
http://www.kuaidaili.com/proxylist/1/
['122.96.59.106', '42.94.140.209', '115.217.0.15', '180.117.132.47', '117.57.21.179', '211.87.234.153', '114.107.16.62', '121.31.112.77', '180.97.74.199', '221.179.236.116']
http://www.kuaidaili.com/proxylist/2/
['183.57.82.71', '60.13.74.143', '183.218.54.58', '36.97.106.27', '183.203.167.45', '222.88.199.140', '110.73.7.157', '117.158.1.210', '222.246.232.55', '124.193.51.248']
http://www.kuaidaili.com/proxylist/3/
['49.73.31.22', '122.96.59.105', '116.23.100.206', '113.94.96.63', '116.24.92.76', '120.83.231.165', '14.119.43.155', '120.199.12.178', '42.196.254.169', '119.254.84.90']
http://www.kuaidaili.com/proxylist/4/
['221.13.139.105', '121.13.64.130', '113.120.111.201', '121.201.28.185', '27.46.38.140', '61.143.158.238', '119.29.177.197', '58.251.251.66', '180.121.145.28', '218.95.84.54']
http://www.kuaidaili.com/proxylist/5/
['218.18.91.22', '58.251.250.191', '115.151.198.205', '218.85.78.89', '117.82.13.129', '116.22.104.226', '124.239.236.238', '59.46.0.31', '183.246.69.39', '58.59.141.187']
http://www.kuaidaili.com/proxylist/6/
['120.52.72.21', '58.251.251.93', '183.140.86.129', '42.88.157.173', '121.31.112.75', '113.66.147.252', '223.243.11.194', '202.194.26.84', '123.120.21.229', '114.250.7.193']
http://www.kuaidaili.com/proxylist/7/
['221.7.206.140', '103.59.178.17', '61.185.137.126', '123.244.244.32', '59.37.206.231', '115.159.31.122', '1.207.62.194', '119.135.185.99', '116.226.69.91', '221.179.236.164']
http://www.kuaidaili.com/proxylist/8/
['222.87.84.135', '111.47.12.180', '183.54.240.235', '114.233.13.80', '125.124.136.17', '110.73.36.80', '113.66.62.169', '120.90.6.92', '61.141.171.48', '14.219.125.62']
http://www.kuaidaili.com/proxylist/9/
['110.73.0.86', '116.214.32.51', '221.237.154.57', '111.226.17.133', '61.144.194.139', '110.73.29.106', '210.51.2.203', '119.48.79.106', '218.17.252.34', '114.215.164.11']


kkkkkkoooooooook

Post Directory