2015年1月

Python 抓取网页元素后端MSSQL2008存储数据

#!/usr/bin/env python
#coding=utf8
import _mssql
import requests
from bs4 import *
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
i = 196075
cur = _mssql.connect(server='Localhost',
	user='PythonUser',
	password='123456',
	database='PythonDown')
def URLConnect(sqlid):
	#proxy = "http://127.0.0.1:8080"
	#proxyDict = {'http':proxy}
	sqlid = str(sqlid)
	postHeader = {"Host":"LocalHost:8080",
		"User-Agent":"Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20100101 Firefox/17.0",
		"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
		"Accept-Language":"zh,zh-hk;q=0.8,en-us;q=0.5,en;q=0.3",
		"Accept-Encoding":"gzip, deflate",
		"Proxy-Connection":"keep-alive",
		"Cookie":"JSESSIONID=D452AF28012F7D;",
		"Cache-Control":"max-age=0"}
	url = "http://LocalHost:8080/user/userinfo.do?method=updateInput&id="+sqlid
	docs = requests.get(url,headers=postHeader)
	return docs
def exe(i):
	docsGroup = URLConnect(i)
	soup = BeautifulSoup(docsGroup.content,from_encoding='utf8')
	title = soup.title
	title = str(title)
	if 'error' in title:
		return 0
	else :
		name = soup('td',width="29%")[0]
		fename = soup('td',width="29%")[1]
		manaddre = soup('td',colspan="5")[1]
		feaddre = soup('td',colspan="5")[3]
		mancrad = soup('td',colspan="3")[1]
		fecrad = soup('td',colspan="3")[6]
		manphone = soup('td',colspan="3")[4]
		fephone = soup('td',colspan="3")[9]
		testtemp = name.find('input')['value']
		testtemp1 = fename.find('input')['value']
		datamanaddress = manaddre.find('input')['value']
		datafmanaddress = feaddre.find('input')['value']
		datamancard = mancrad.find('input')['value']
		datafmancrad = fecrad.find('input')['value']
		datamanphone = manphone.find('input')['value']
		datafmanphone= fephone.find('input')['value']
		cur.execute_non_query("INSERT INTO PythonDown.dbo.wgetData\
			(Name,Phone,CardId,Address,FnAme,FpHone,FcArdID,FaDdress,webid) \
				VALUES (%(Name)s,\
					%(Phone)s,\
					%(CardId)s,\
					%(Address)s,\
					%(FnAme)s,\
					%(FpHone)s,\
					%(FcArdID)s,\
					%(FaDdress)s,\
					%(webid)s)",
			{"Name":testtemp,
			"Phone":datamanphone,
			"CardId":datamancard,
			"Address":datamanaddress,
			"FnAme":testtemp1,
			"FpHone":datafmanphone,
			"FcArdID":datafmancrad,
			"FaDdress":datafmanaddress,
			"webid":i})

while i<=199000:	
	print i
	exe(i)
	i+=1

后端采用 MSSQL Server 2008 搭建


Python 库 Bs4,requests,pymssql [务必安装,不然无法使用]

逢考必过挂柯南

身为一个大学狗真心不容易啊,一天到晚不上课真心不是好习惯啊

考试过后寒假内更新文章,大概会偏向于自动化/半自动化运维,智能家居,业余无线电方面发展。

这是打算转型做架构的节奏了~~~