1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
| import re import requests import bs4 import traceback from bs4 import BeautifulSoup
key_code = '股票代码' key_name = '股票名称' key_jin = '今开' key_cheng = '成交量' chinese_blank = ' ' tplt = "{:<12}\t{:" + chinese_blank +"<7}\t{:<8}\t{:<8}"
def getHTMLText(url, encoding="UTF-8"): try: kv = {'User-Agent':'Mozilla/5.0 Chrome/63.0.3239.132'} response = requests.get(url, timeout = 12, headers = kv) response.raise_for_status() response.encoding = encoding return response.text except: return "产生HTTPError"
def printStock(strockItemDict): print(tplt.format(strockItemDict[key_code], strockItemDict[key_name], strockItemDict[key_jin],\ strockItemDict[key_cheng]))
def getStockList(stockList, stockURL): htmlText= getHTMLText(stockURL, "gb2312") document = BeautifulSoup(htmlText, "html.parser") pattern = re.compile(r's[hz]\d{6}') for item in document('a', attrs={"target": "_blank"}): try: href = item.attrs['href'] stockList.append(pattern.search(href).group(0)) except: continue
def getStockInfo(stockList, stockURL, filePath): print(tplt.format(key_code, key_name, key_jin, key_cheng)) for stockCode in stockList: htmlText= getHTMLText(stockURL+stockCode+".html") document = BeautifulSoup(htmlText, "html.parser") if document == "": continue try: stockInfo = document.find('div', attrs={'class':'stock-bets'}) if (stockInfo is None): tag = document.find('div', class_="error-page") if (tag is not None): tag = tag.h2 if (tag is not None): if (tag.string == "404"): continue betsNameDiv = stockInfo.find(attrs={'class': 'bets-name'}) betsNameDivText = betsNameDiv.text infoDict = {key_code: stockCode, key_name: betsNameDivText.split()[0], key_jin: "--", key_cheng: "--"} keyList = stockInfo.find_all('dt') valueList = stockInfo.find_all('dd') for i in range(len(keyList)): key = keyList[i].text if (key == key_jin): infoDict[key_jin] = valueList[i].text elif (key == key_cheng): infoDict[key_cheng] = valueList[i].text printStock(infoDict) except: traceback.print_exc() continue
def main(): stockURL = 'http://quote.eastmoney.com/stocklist.html' stockInfoURL = 'https://gupiao.baidu.com/stock/' outputFile = 'D://likai.txt' stockList = list() getStockList(stockList, stockURL) getStockInfo(stockList, stockInfoURL, outputFile)
main()
|