isprogram
/
EasySpider


								# from lxml import etree


								# # 解析HTML

								# html = """

								# <div>

								# 123

								#   <ul class="list">

								#     <li class="item-0">first item</li>

								#     <li class="item-1"><a href="link2.html">second item</a></li>

								#   </ul>

								#   456

								#   <div></div>

								#   789

								# </div>

								# """

								# html = etree.HTML(html)

								# element = html.xpath("*")

								# direct_text = "/html/body/" + html[0][0].tag + "/text()"

								# all_text = "/html/body/" + html[0][0].tag + "//text()"

								# # 使用XPath选择元素

								# results = html.xpath(direct_text)

								# # print(results)

								# # 拼接所有文本内容并去掉两边的空白

								# text = ' '.join(result.strip() for result in results if result.strip())


								# # 输出结果

								# print(text)


								# results = html.xpath(all_text)

								# # print(results)

								# # 拼接所有文本内容并去掉两边的空白

								# text = ' '.join(result.strip() for result in results if result.strip())


								# # 输出结果

								# print(text)


								import re


								def lowercase_xpath_tags(xpath):

								    return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)


								print(lowercase_xpath_tags('//DIV[@id="J_recommendGoods"]/DIV[2]/UL'))

								print("//strong//span[contains(@class,'page-item_M4MDr')]/..//following-sibling::a[1]")

								print("")