合并hexo dist到新的hexo项目

1. 背景

有一个很久以前的hexo blog，现在只能找到当时的编译结果，找不到markdown和其他资源文件。

2. 解决方法

I. 新建一个hexo project，编译
II. 将老项目的文件merge到新项目中

3. 具体步骤

I. 将旧的首页内容追加到新的首页内容后

import re

def generate_index(path):
    '''
    :param path: parent dir of the new public (dist) and old archive
    '''
    with open(path / 'public' / 'index.html', encoding='utf-8') as fp:
        new = fp.read()
    with open(path / 'old_archive' / 'index.html', encoding='utf-8') as fp:
        old = fp.read()
    stp = []
    for m in re.finditer('article>\s+?</div', new, re.DOTALL):
        stp.append(m.start() + len('article>') + 10)
    output = new[:stp[0]] + '\n'.join(re.findall(r'<article.+?/article>', old, re.DOTALL)) + new[stp[0]:]
    with open(path / 'public' / 'index.html', 'w', encoding='utf-8') as fp:
        fp.write(output)

II. 将old_archive中的必要文件拷贝到新的dist文件夹（视具体情况修改），并将旧页面的css改回原来的css。

import shutil
from path import Path
import glob
path = Path('/PATH/TO/DIR')

# root
shutil.copytree(path / 'old_archive' / '2016', path / 'public' / '2016')
shutil.copytree(path / 'old_archive' / '2017', path / 'public' / '2017')
shutil.copytree(path / 'old_archive' / 'resource', path / 'public' / 'resource')
shutil.copytree(path / 'old_archive' / 'css', path / 'public' / 'css_old')
# archive
shutil.copytree(path / 'old_archive' / 'archives' / '2016', path / 'public' / 'archives' / '2016')
shutil.copytree(path / 'old_archive' / 'archives' / '2017', path / 'public' / 'archives' / '2017')
shutil.copytree(path / 'old_archive' / 'vendors', path / 'public' / 'vendors')
shutil.copytree(path / 'old_archive' / 'js/src', path / 'public' / 'js/src')
shutil.copytree(path / 'old_archive' / 'about', path / 'public' / 'about')

# 将旧页面的css替换回原来的css，解决页面显示错误的问题
for x in glob.glob(path / 'public' / '201*/*/*/*/index.html'):
    with open(x, encoding='utf-8') as fp:
        con = fp.read().replace('/css/main.css?v=5.0.1', '/css_old/main.css?v=5.0.1')
    with open(x, 'w', encoding='utf-8') as fp:
        fp.write(con)

for x in list(glob.glob(path / 'public/archives' / '201*/index.html')) + list(glob.glob(path / 'public/archives' / '201*/*/index.html')):
    print(x)
    with open(x, encoding='utf-8') as fp:
        con = fp.read().replace('/css/main.css?v=5.0.1', '/css_old/main.css?v=5.0.1')
    with open(x, 'w', encoding='utf-8') as fp:
        fp.write(con)

III. 修改 Archive 文件夹下的内容，修复日志分类下内容显示正确

from path import Path
from bs4 import BeautifulSoup
path = Path('/PATH/TO/DIR')

with open(path / 'old_archive' / 'archives' / 'index.html', encoding='utf-8') as fp:
    old = BeautifulSoup(fp)
# 修改旧文章的css
with open(path / 'public/archives/index.html', encoding='utf-8') as fp:
    new = BeautifulSoup(fp)

art = new.find_all('div', {'class': 'posts-collapse'})[0]
hd = old.find_all('div', {'class': 'collection-title'})[1]
hd['class'] = 'collection-year'
hd.h2['class'] = 'collection-header'
hd.h2.name = 'span'
hd.span.contents[0].replaceWith('更早之前...')

base = len(list(art.children))
art.insert(base, hd)
base += 1

for i, x in enumerate(old.find_all('article', {'class': 'post post-type-normal'})):
    x.h1.name = 'div'
    x.header.contents = [x.header.contents[0], x.header.contents[3], x.header.contents[2], x.header.contents[1]]
    art.insert(base + i, x)

with open(path / 'public/archives/index.html', 'w', encoding='utf-8') as fp:
    fp.write(str(new))

IV. 最后

部署前执行以上脚本转换，可以修复文章，首页和归档的显示。