最近想从hexo转移到typecho,但是typecho是没有一键导入md文章,手动导入又很烦,怎么办呢。于是我就想用 Python 来写一个自动解析md导入typecho的脚本。

于是就开始编码了,首先是用正则表达式提取,hexo的md头部是YAML格式的,只要解析 title date tags categories 就行了,这里就是tags和category难解析,他们可能不止一个标签。但也可能没有标签。

对于上面的解析,我采用两个判断,以及捕获异常的方式解析,tags和category的方式有一些不同。实现如下:

        # 标题提取
        title = re.search(r'title: (.*?)\n', s, re.S).group(1)
        # 时间转化时间截
        date = re.search(r'date: (.*?)\n', s, re.S).group(1)
        date = time.strptime(date, "%Y-%m-%d %H:%M:%S")
        date = int(time.mktime(date))
        try:
            if not re.search(r'tags:[ ]*(.*?)\n', s).group(1):
                if re.search(r'tags:[ ]*\n(.*?)\nca', s, re.S):
                    items = re.search(r'tags:[ ]*\n(.*?)\nca', s, re.S).group(1)
                    tags = re.findall(r'- (.*?)\n', items)
                else:
                    tags = ''
            else:
                tags = re.search(r'tags:[ ]*(.*?)\n', s).group(1)
        except AttributeError as e:
            print(e)
            tags = ''

        try:
            if not re.search(r'categories:[ ]*(.*?)\n', s).group(1):
                if re.search(r'categories:[ ]*\n(.*?)\n---', s, re.S):
                    items = re.search(r'categories:[ ]*\n(.*?)\n---', s, re.S).group(1)
                    categories = re.findall(r'- (.*?)\n', items)
                else:
                    categories = ''
            else:
                categories = re.search(r'categories:[ ]*(.*?)\n', s).group(1)
        except AttributeError as e:
            print(e)
            categories = ''
        # 正文提取
        post = re.search(r'---\n\n(.*?)$', s, re.S).group(1)

这里的踩坑点主要是正则中的 \s 他不仅匹配一个空格,也可以是换行符等等,所以我采用了 [ ]* 匹配多个空格。如果匹配不到呢,则时会抛出异常 AttributeError 然后捕获异常,让 tags(category) = ‘’就行了。

然后是插表方面,连接之后,批量解析文件,将字段插入表中,这里需要插入三个表,分别是 typecho_metas typecho_contents typecho_relationships

typecho_contents 中插入文章内容,从 typecho_metas 中插入分类和标签,从 typecho_relationships 中建立文章和分类和标签的关系。

这里需要为 typecho_metas 表中的 name tpye 建立联合主键,避免重复插入。在重复插入时进行更新操作,使得 count = count + 1

 def insert_post(self, file):
        data = self.parse_hexo_md(file)
        self.data = data
        db = self.db
        cur = self.cur
        modified = int(time.mktime(time.localtime(os.stat('_posts/' + file).st_mtime)))
        sql = '''
        INSERT INTO typecho_contents(title,slug, created,modified, text,type,status,allowComment,allowFeed,allowPing,authorId) VALUES (%s,%s,%s,%s,%s,'post','publish',1,1,1,1) 
        '''

        try:
            cur.execute(sql, (data[0], file.split('.md')[0], data[1], modified, data[4]))
            db.commit()
        except Exception as e:
            print(e)
            db.rollback()

    def insert_tags_category(self):
        data = self.data
        cur = self.cur
        # cur.execute('ALTER TABLE typecho_metas ADD UNIQUE KEY(name,type)')
        sql = '''
        INSERT INTO typecho_metas(name,slug,type,count) VALUES (%s,%s,'tag',1) ON DUPLICATE KEY UPDATE count = count + 1
        '''
        # tags导入
        try:
            # (title, date, tags, categories, '<!--markdown-->' + post)
            if isinstance(data[2], list):
                for i in data[2]:
                    cur.execute(sql, (i, i))
                    self.db.commit()
            else:
                if data[2]:
                    cur.execute(sql, (data[2], data[2]))
                    self.db.commit()
        except pymysql.DatabaseError as e:
            print(e)
            self.db.rollback()

        # category 导入
        sql = '''
                INSERT INTO typecho_metas(name,slug,type,count) VALUES (%s,%s,'category',1) ON DUPLICATE KEY UPDATE count = count + 1
              '''
        try:
            # (title, date, tags, categories, '<!--markdown-->' + post)
            if isinstance(data[3], list):
                for i in data[3]:
                    cur.execute(sql, (i, i))
                    self.db.commit()
            else:
                if data[3]:
                    cur.execute(sql, (data[3], data[3]))
                    self.db.commit()
        except pymysql.DatabaseError as e:
            print(e)
            self.db.rollback()

    def relationships(self):
        db = self.db
        cur = self.cur
        data = self.data
        print('tag = ', data[2], 'type = ', type(data[2]), 'cet = ', data[3])
        # 映射 tags
        select_mid = '''
                SELECT mid FROM typecho_metas WHERE name = %s AND type = %s
            '''
        select_cid = '''
                        SELECT cid FROM typecho_contents WHERE title = %s
                    '''
        add_relationship = '''
                INSERT INTO typecho_relationships(cid,mid) VALUES (%s,%s)
        '''

        try:
            cur.execute(select_cid, (data[0]))

            cid = cur.fetchall()[0][0]  # 获取 cid

            if isinstance(data[2], list):
                for i in data[2]:
                    cur.execute(select_mid, (i, 'tag'))
                    tu = cur.fetchall()
                    # print('mid = ', tu[0][0])  # mid 获取
                    mid = tu[0][0]

                    cur.execute(add_relationship, (cid, mid))
            else:
                cur.execute(select_mid, (data[2], 'tag'))
                tu = cur.fetchall()
                print('mid = ', tu)  # mid 获取
                mid = tu[0][0]
                cur.execute(add_relationship, (cid, mid))
        except pymysql.DatabaseError as e:
            print(e)
            db.rollback()
        except IndexError as e:
            print('不能建立关系', data[2])
            return

            # categories
        # (title, date, tags, categories, '<!--markdown-->' + post)
        try:
            if isinstance(data[3], list):
                for i in data[3]:
                    cur.execute(select_mid, (i, 'category'))
                    tu = cur.fetchall()
                    # print('mid = ', tu[0][0])  # mid 获取
                    mid = tu[0][0]

                    cur.execute(add_relationship, (cid, mid))
            else:
                cur.execute(select_mid, (data[3], 'category'))
                tu = cur.fetchall()
                # print(tu)  # mid 获取
                mid = tu[0][0]
                cur.execute(add_relationship, (cid, mid))
        except pymysql.DatabaseError as e:
            print(e)
            db.rollback()
        except IndexError as e:
            print('不能建立关系', data[3])
            return

全部源码见: https://github.com/Innei/move-hexo-to-typecho

欢迎交流