Skip to content

feat: add fast shallow headline parsing for search tools #1006

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions lua/orgmode/files/file.lua
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,112 @@ function OrgFile:find_headlines_by_title(title, exact)
end, self:get_headlines())
end

---Extract title from headline item node text (remove TODO keywords and priority)
---@param item_text string
---@return string title
function OrgFile:_parse_headline_title(item_text)
-- Remove TODO keywords
local todo_keywords = config:get_todo_keywords():all_values()
for _, keyword in ipairs(todo_keywords) do
local pattern = '^' .. vim.pesc(keyword) .. '%s+'
if item_text:match(pattern) then
item_text = item_text:gsub(pattern, '')
break
end
end

-- Remove priority - use dynamic priority range
local prio_range = config:get_priority_range()
local priority_pattern = '^%[#[' .. prio_range.highest .. '-' .. prio_range.lowest .. ']%]%s*'
item_text = item_text:gsub(priority_pattern, '')

return vim.trim(item_text)
end

---Extract shallow data from a single headline node for headline search.
---Extracts only title, level, tags, and position - no complex object creation.
---@param node TSNode
---@return { title: string, level: number, line_number: number, all_tags: string[], is_archived: boolean }
function OrgFile:_extract_shallow_headline_data(node)
if not node then
return { title = '', level = 0, line_number = 0, all_tags = {}, is_archived = false }
end

-- Extract level from stars
local stars_node = node:field('stars')[1]
local level = stars_node and select(2, stars_node:end_()) or 0

-- Extract title from item node
local item_node = node:field('item')[1]
local title = ''
if item_node then
local item_text = self:get_node_text(item_node) or ''
title = self:_parse_headline_title(item_text)
end

-- Extract tags from tags node using existing utils
local tags_node = node:field('tags')[1]
local all_tags = {}
local is_archived = false
if tags_node then
local tags_text = self:get_node_text(tags_node) or ''
all_tags = utils.parse_tags_string(tags_text)
-- Check for archive tag using consistent pattern
for _, tag in ipairs(all_tags) do
if tag:upper() == 'ARCHIVE' then
is_archived = true
break
end
end
end

-- Get line number
local start_row = node:start()
local line_number = start_row + 1

return {
title = title,
level = level,
line_number = line_number,
all_tags = all_tags,
is_archived = is_archived,
}
end

---Extract shallow headline data for fast headline search across agenda files.
---
---Why shallow extraction? Headline search tools (telescope, fzf.lua, snacks.picker, etc.)
---need title/tags/level for ALL headlines across ALL agenda files. Creating full OrgHeadline
---objects triggers expensive lazy loading that search tools immediately consume anyway.
---This pre-computes only the search-relevant data in a single efficient pass.
---
---@param opts? { archived: boolean, max_depth: number }
---@return { title: string, level: number, line_number: number, all_tags: string[], is_archived: boolean }[]
function OrgFile:get_headlines_shallow(opts)
if self:is_archive_file() and not (opts and opts.archived) then
return {}
end

self:parse()
if not self.root then
return {}
end

local matches = self:get_ts_captures('(section (headline) @headline)')
local results = vim.tbl_map(function(node)
return self:_extract_shallow_headline_data(node)
end, matches)

-- Apply max_depth filtering if specified
if opts and opts.max_depth then
results = vim.tbl_filter(function(headline)
return headline.level <= opts.max_depth
end, results)
end

return results
end

---@param title string
---@return OrgHeadline | nil
function OrgFile:find_headline_by_title(title)
Expand Down
128 changes: 128 additions & 0 deletions tests/plenary/files/file_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,134 @@ describe('OrgFile', function()
end)
end)

describe('get_headlines_shallow', function()
it('should get all headlines with correct structure', function()
local file = load_file_sync({
'* TODO Headline 1',
'** [#A] Priority Headline :tag1:tag2:',
'*** DONE Archived Headline :ARCHIVE:',
'* Plain Headline',
})

local headlines = file:get_headlines_shallow()

assert.are.same(4, #headlines)

-- Verify data structure
local headline = headlines[1]
assert.is.not_nil(headline.title)
assert.is.not_nil(headline.level)
assert.is.not_nil(headline.line_number)
assert.is.not_nil(headline.all_tags)
assert.is.not_nil(headline.is_archived)

-- Verify specific values
assert.are.same('Headline 1', headlines[1].title)
assert.are.same(1, headlines[1].level)
assert.are.same(1, headlines[1].line_number)
assert.are.same({}, headlines[1].all_tags)
assert.is.False(headlines[1].is_archived)

assert.are.same('Priority Headline', headlines[2].title)
assert.are.same(2, headlines[2].level)
assert.are.same({ 'tag1', 'tag2' }, headlines[2].all_tags)

assert.are.same('Archived Headline', headlines[3].title)
assert.are.same({ 'ARCHIVE' }, headlines[3].all_tags)
assert.is.True(headlines[3].is_archived)
end)

it('should return same count as get_headlines for regular files', function()
local file = load_file_sync({
'* TODO Headline 1',
'* TODO Headline 2',
'** Headline 2.1',
'*** Headline 2.1.1',
'* DONE Headline 3',
})

local shallow = file:get_headlines_shallow()
local heavy = file:get_headlines()

assert.are.same(#heavy, #shallow)
end)

-- Parameterized archive tests
local archive_test_cases = {
{
name = 'regular file without archived option',
filename = nil, -- will use .org
opts = {},
content = { '* Headline', '* Archived :ARCHIVE:' },
expected_count = 2,
},
{
name = 'archive file without archived option',
filename = 'test.org_archive',
opts = {},
content = { '* Headline', '* Archived :ARCHIVE:' },
expected_count = 0,
},
{
name = 'archive file with archived=true',
filename = 'test.org_archive',
opts = { archived = true },
content = { '* Headline', '* Archived :ARCHIVE:' },
expected_count = 2,
},
}

for _, case in ipairs(archive_test_cases) do
it('should handle archives: ' .. case.name, function()
local filename = case.filename and (vim.fn.tempname() .. case.filename) or nil
local file = load_file_sync(case.content, filename)
local headlines = file:get_headlines_shallow(case.opts)
assert.are.same(case.expected_count, #headlines)
end)
end

it('should respect max_depth filtering', function()
local file = load_file_sync({
'* Level 1',
'** Level 2',
'*** Level 3',
'**** Level 4',
})

local all_headlines = file:get_headlines_shallow()
local depth_2 = file:get_headlines_shallow({ max_depth = 2 })

assert.are.same(4, #all_headlines)
assert.are.same(2, #depth_2)
end)

it('should match get_headlines filtering behavior', function()
local content = {
'* TODO Headline 1',
'** Headline 1.1',
'*** TODO Headline 1.1.1 :ARCHIVE:',
'**** Headline 1.1.1.1',
'* DONE Headline 2',
}
local file = load_file_sync(content)

-- Compare filtering with different options
local shallow_all = file:get_headlines_shallow()
local heavy_all = file:get_headlines()
assert.are.same(#heavy_all, #shallow_all)

local shallow_archived = file:get_headlines_shallow({ archived = true })
local heavy_archived = file:get_headlines_including_archived()
assert.are.same(#heavy_archived, #shallow_archived)
end)

it('should handle empty files gracefully', function()
local file = load_file_sync({})
local headlines = file:get_headlines_shallow()
assert.are.same(0, #headlines)
end)
end)

describe('get_todos', function()
local has_correct_type = function(todos)
assert.are.same('TODO', todos.todo_keywords[1].type)
Expand Down