import requests import time from lxml import etree
def get_edu_name(): for i in range(1, 196): url = "https://src.sjtu.edu.cn/rank/firm/0/?page=" + str(i) try: result = requests.get(url).content.decode("UTF-8") soup = etree.HTML(result) name = soup.xpath('//td[@class="am-text-center"]/a/text()') print('->' + str(i)) print(name) name = '\n'.join(name) with open(r'edu_name.txt', 'a+', encoding='utf-8') as f: f.write(name + '\n') except Exception as e: time.sleep(0.5) pass