python批量提取txt文档中所有的a链接
可以使用Python中的正则表达式模块(re)来提取A标签中的链接。
import re
def extract_links_from_text_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
# 使用正则表达式提取所有的链接
links = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', content)
return links
def save_links_to_file(links, output_file_path):
with open(output_file_path, 'w', encoding='utf-8') as output_file:
for link in links:
output_file.write(link + '\n')
# 用法示例
input_file_path = 'ttt.txt' # 替换成你的文本文件路径
output_file_path = 'output_links.txt' # 替换成你想保存链接的新文档路径
links = extract_links_from_text_file(input_file_path)
save_links_to_file(links, output_file_path)
print(f"链接已保存到 {output_file_path}")