获取epub目录

import xml.etree.ElementTree as ET

def parse_epub_toc(toc_file):
    """解析 EPUB 的 toc.ncx 文件"""
    try:
        tree = ET.parse(toc_file)
        root = tree.getroot()
        
        # 定义命名空间
        ns = {'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}
        
        # 获取书籍标题
        doc_title = root.find('.//ncx:docTitle/ncx:text', ns)
        book_title = doc_title.text if doc_title is not None else "未知标题"
        
        # 获取所有 navPoint 元素
        #nav_points = root.findall('.//ncx:navPoint', ns)
        nav_points = root.findall('./ncx:navMap/ncx:navPoint', ns)
        print(len(nav_points))
        
        return book_title, nav_points, ns
    except Exception as e:
        print(f"解析文件出错: {e}")
        return None, None, None

def build_toc_tree(nav_points, ns):
    """构建目录树结构"""
    toc = []
    
    for nav_point in nav_points:
        # 获取章节信息
        nav_label = nav_point.find('ncx:navLabel/ncx:text', ns)
        content = nav_point.find('ncx:content', ns)
        
        if nav_label is not None and content is not None:
            chapter = {
                'id': nav_point.get('id', ''),
                'play_order': nav_point.get('playOrder', ''),
                'title': nav_label.text,
                'src': content.get('src', ''),
                'children': []
            }
            
            # 递归处理子章节
            sub_nav_points = nav_point.findall('ncx:navPoint', ns)
            if sub_nav_points:
                sub=True
                chapter['children'] = build_toc_tree(sub_nav_points, ns)

            toc.append(chapter)

    
    return toc


def print_tree(toc, prefix='', is_last=True):
    """打印树形目录结构"""
    
    for i, chapter in enumerate(toc):
        # 当前节点连接符
        #connector = '└── ' if (i == len(toc) - 1 and is_last) else '├── '
        connector = '    ' if (i == len(toc) - 1 and is_last) else '    '
        
        # 打印当前节点
        print(prefix + connector + chapter['title'])
        
        # 准备子节点前缀
        # new_prefix = prefix + ('    ' if (i == len(toc) - 1 and is_last) else '│   ')
        new_prefix = prefix + ('    ' if (i == len(toc) - 1 and is_last) else '    ')
        
        # 递归打印子节点
        if chapter['children']:
            print_tree(chapter['children'], new_prefix , is_last and i == len(toc) - 2)
            #print(is_last,i,len(toc) - 1)



def main():
    # 输入文件路径
    toc_file = 'toc.ncx'
    
    # 解析文件
    book_title, nav_points, ns = parse_epub_toc(toc_file)
    if not book_title:
        return
    
    print(f"书籍标题: {book_title}\n")
    print("目录结构:")
    
    # 构建目录树
    toc_tree = build_toc_tree(nav_points, ns)
    
    # 打印树形结构
    print_tree(toc_tree)

if __name__ == "__main__":
    main()