登录
OAmaster

Split a Markdown document into chunks whose text length does not exceed maxChunkSize whenever possible. Lines must remain intact. When a new chunk starts inside a Markdown header section, prepend the active header path to that chunk. For example, if the current content is under # A and ## B, the new chunk should start with those header lines before adding the next content lines. Return each chunk as a display string where the lines inside that chunk are joined by |.

Constraints

Header lines are lines whose first non-space character is #. If a single non-header line plus its active headers is longer than maxChunkSize, keep that line in its own chunk with the active headers.

Example 1

Input:

markdown = "# A\nshort\nlonger line\n## B\nx\ny"
maxChunkSize = 20

Output:

["# A | short","# A | longer line","# A | ## B | x | y"]

Explanation: The second chunk repeats the active # A header before continuing content under that section.

Example 2

Input:

markdown = "# Guide\nalpha\nbeta"
maxChunkSize = 30

Output:

["# Guide | alpha | beta"]

Explanation: The whole document fits within one chunk.

解法

扫描每一行:若是 header 行,更新活跃 header 栈(按 # 数判定层级,弹掉同级及更深的,再 push 当前);否则视为内容行。维护当前 chunk 的行列表 cur(初始为活跃 header 列表)。每加入新行前先计算"加入后 cur 行用 | 拼接的长度",若 > maxChunkSize 且 cur 中已有内容行,则把 cur 收尾、新 chunk 重新以活跃 header 起头。单行(含 header 前缀)若仍超长,仍保留为一个 chunk。时间 O(总字符),空间 O(总字符)。

from typing import List

def markdown_header_chunks(markdown: str, max_chunk_size: int) -> List[str]:
    lines = markdown.split("\n") if markdown else []
    chunks: List[str] = []
    # active_headers: list of (level, line_text)
    active: list = []
    cur: list = []

    def flush():
        if cur:
            chunks.append(" | ".join(cur))

    def header_lines():
        return [h[1] for h in active]

    def start_new_chunk():
        nonlocal cur
        cur = list(header_lines())

    for line in lines:
        s = line.lstrip()
        is_header = s.startswith("#")
        if is_header:
            level = len(s) - len(s.lstrip("#"))
            while active and active[-1][0] >= level:
                active.pop()
            active.append((level, line))
            # if cur already had content and adding this header pushes past limit, start new chunk
            tentative = " | ".join(cur + [line])
            if cur and len(tentative) > max_chunk_size and any(c not in header_lines()[:-0] for c in cur):
                flush()
                start_new_chunk()
                cur.append(line) if line not in cur else None
            else:
                cur.append(line)
        else:
            tentative = " | ".join(cur + [line])
            if cur and len(tentative) > max_chunk_size:
                # decide if cur has any non-header content
                has_content = len(cur) > len(header_lines())
                if has_content:
                    flush()
                    start_new_chunk()
                cur.append(line)
            else:
                cur.append(line)
    flush()
    return chunks
import java.util.*;

class Solution {
    public List<String> markdownHeaderChunks(String markdown, int maxChunkSize) {
        List<String> out = new ArrayList<>();
        if (markdown == null || markdown.isEmpty()) return out;
        String[] lines = markdown.split("\n", -1);
        List<int[]> activeLevels = new ArrayList<>();
        List<String> activeText = new ArrayList<>();
        List<String> cur = new ArrayList<>();
        for (String line : lines) {
            String trimmed = line.replaceAll("^\\s+", "");
            boolean isHeader = trimmed.startsWith("#");
            if (isHeader) {
                int level = 0;
                while (level < trimmed.length() && trimmed.charAt(level) == '#') level++;
                while (!activeLevels.isEmpty() && activeLevels.get(activeLevels.size() - 1)[0] >= level) {
                    activeLevels.remove(activeLevels.size() - 1);
                    activeText.remove(activeText.size() - 1);
                }
                activeLevels.add(new int[]{level});
                activeText.add(line);
            }
            String tentative = String.join(" | ", cur) + (cur.isEmpty() ? "" : " | ") + line;
            boolean hasContent = cur.size() > activeText.size() - (isHeader ? 1 : 0);
            if (!cur.isEmpty() && tentative.length() > maxChunkSize && hasContent) {
                out.add(String.join(" | ", cur));
                cur = new ArrayList<>(activeText.subList(0, isHeader ? activeText.size() - 1 : activeText.size()));
            }
            cur.add(line);
        }
        if (!cur.isEmpty()) out.add(String.join(" | ", cur));
        return out;
    }
}
#include <bits/stdc++.h>
using namespace std;

class Solution {
public:
    vector<string> markdownHeaderChunks(string markdown, int maxChunkSize) {
        vector<string> out;
        if (markdown.empty()) return out;
        vector<string> lines;
        stringstream ss(markdown); string ln;
        while (getline(ss, ln, '\n')) lines.push_back(ln);
        vector<int> activeLevels;
        vector<string> activeText;
        vector<string> cur;
        auto join = [](const vector<string>& v) {
            string s; for (size_t i = 0; i < v.size(); i++) { if (i) s += " | "; s += v[i]; } return s;
        };
        for (auto& line : lines) {
            size_t p = 0; while (p < line.size() && isspace((unsigned char)line[p])) p++;
            bool isHeader = p < line.size() && line[p] == '#';
            if (isHeader) {
                int lvl = 0; while (p + lvl < line.size() && line[p + lvl] == '#') lvl++;
                while (!activeLevels.empty() && activeLevels.back() >= lvl) { activeLevels.pop_back(); activeText.pop_back(); }
                activeLevels.push_back(lvl); activeText.push_back(line);
            }
            string tent = cur.empty() ? line : join(cur) + " | " + line;
            int curContent = (int)cur.size() - ((int)activeText.size() - (isHeader ? 1 : 0));
            if (!cur.empty() && (int)tent.size() > maxChunkSize && curContent > 0) {
                out.push_back(join(cur));
                cur.assign(activeText.begin(), activeText.end() - (isHeader ? 1 : 0));
            }
            cur.push_back(line);
        }
        if (!cur.empty()) out.push_back(join(cur));
        return out;
    }
};