先利用爬虫利用百度糯米提供的api来采集北京当天的团购信息,保存为numi.html
import xml.etree.ElementTree as ET import osclass Nuomi(): def __init__(self): self.numi=[] def Parse(self,filepath): tree=ET.parse(filepath) root =tree.getroot() for url in root.iter('url'): nuomi_lei={} data=url.find('data') if data is not None: display=data.find('display') if display is not None: try: nuomi_lei['title']=display.find('title').text except Exception as e: print("No title") try: nuomi_lei['businessTitle']=display.find('businessTitle').text except Exception as e: print ("No businessTitle") try: nuomi_lei['value'] =display.find('value').text except Exception as e: print ("No value") try: nuomi_lei['price']=float(display.find('price').text) except Exception as e: print("No pire") self.numi.append(nuomi_lei) return(self.numi) if __name__ == '__main__': nuomi=Nuomi() date=nuomi.Parse('numi.html') print(len(date))
转载于:https://www.cnblogs.com/leiziv5/p/5735235.html