2019-06-11-简书md文件下载图片到本地并且更新引用

2019-06-11-简书md文件下载图片到本地并且更新引用

作者:https://www.jianshu.com/u/f068003ac3b7

用途:在你下载了简书打包的文章后,markdown文件里面引用的图片链接是简书的在线源。本Python脚本的作用就是将包含子文件夹的文件夹内所有.md文件内简书源的图片下载到./img相对路径内,并且替换.md文件内图片链接为本地源。原创不易,转载请注明出处

用法:将dirlist(“G:/报告/简书/user-XXXXXXX”,[])中”G:/报告/简书/user-XXXXXXX”替换为你的简书文章打包根目录。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#conding=utf8
import os
import re
import requests

def dlfile(path , file):

if not os.path.isdir(path+"/img"):
os.makedirs(path+"/img")

fp = open(file, "r", encoding="utf-8") # 打开你要写得文件
lines = fp.readlines() # 打开文件,读入每一行
for s in lines:
p = re.compile(r'!\[.*?\]\(https://upload-images(.*?)\)')
matchlist = p.findall(s)
for eve in matchlist:
if eve != "":
eveurl = "https://upload-images" + eve
eveimg = re.search("https://upload-images\.jianshu\.io/upload_images/(.*?)\?", eveurl).group(1) # 123
print(eveimg)
f = requests.get(eveurl)
# 下载文件
with open(path+"/img/" + eveimg, "wb") as code:
code.write(f.content)
fp.close() # 关闭文件

def repline(file):
lines = open(file,"r",encoding="utf-8").readlines()
fp = open(file,"w",encoding="utf-8") #打开你要写得文件
for s in lines:
q = s.replace("2019-06-11-简书md文件下载图片到本地并且更新引用/", "img\\").replace(
"", "")
fp.write(q)
fp.close() # 关闭文件


def readjianshu(filepath):
print(filepath)
path = re.search("(.*)/(.*?)$", filepath).group(1)
dlfile(path, filepath)
repline(filepath)
print("finish")


def dirlist(path, allfile):
filelist = os.listdir(path)

for filename in filelist: #广义
filepath = os.path.join(path, filename)
if os.path.isdir(filepath):
dirlist(filepath, allfile)

elif filepath.endswith("md"):

filepath = filepath.replace("\\","/")
allfile.append(filepath)
readjianshu(filepath)
return allfile

dirlist("G:/报告/简书/user-XXXXXXX",[])