word 搜索优化

This commit is contained in:
cwchen 2025-11-12 16:28:39 +08:00
parent 5be02d46fd
commit 6cef01b811
1 changed files with 222 additions and 82 deletions

View File

@ -294,74 +294,28 @@ export default {
return tempDiv.innerHTML
},
highlightTextInNode(node, pattern, results, segmentIndex, parentElement, markIndexRef) {
if (node.nodeType === Node.TEXT_NODE) {
const text = node.textContent
if (!text) return
pattern.lastIndex = 0
const textMatches = []
let match
while ((match = pattern.exec(text)) !== null) {
textMatches.push({
index: match.index,
length: match[0].length,
text: match[0]
})
}
if (textMatches.length === 0) return
const parent = node.parentNode
if (!parent) return
let lastIndex = 0
const fragment = document.createDocumentFragment()
textMatches.forEach((matchInfo) => {
if (lastIndex < matchInfo.index) {
const beforeText = text.substring(lastIndex, matchInfo.index)
if (beforeText) {
fragment.appendChild(document.createTextNode(beforeText))
collectTextNodes(element, textNodes) {
if (!element) return
const walker = document.createTreeWalker(
element,
NodeFilter.SHOW_TEXT,
{
acceptNode: (node) => {
const parent = node.parentElement
if (!parent) return NodeFilter.FILTER_REJECT
const tagName = parent.tagName ? parent.tagName.toLowerCase() : ''
if (tagName === 'mark' || tagName === 'script' || tagName === 'style' || tagName === 'noscript') {
return NodeFilter.FILTER_REJECT
}
}
const mark = document.createElement('mark')
mark.className = 'search-highlight'
mark.textContent = matchInfo.text
fragment.appendChild(mark)
results.push({
element: mark,
segmentIndex: segmentIndex,
markIndex: markIndexRef.value,
parentElement: parentElement
})
markIndexRef.value++
lastIndex = matchInfo.index + matchInfo.length
})
if (lastIndex < text.length) {
const afterText = text.substring(lastIndex)
if (afterText) {
fragment.appendChild(document.createTextNode(afterText))
return NodeFilter.FILTER_ACCEPT
}
}
if (fragment.childNodes.length > 0) {
parent.replaceChild(fragment, node)
)
let node
while ((node = walker.nextNode())) {
if (node.textContent) {
textNodes.push(node)
}
} else if (node.nodeType === Node.ELEMENT_NODE) {
const tagName = node.tagName ? node.tagName.toLowerCase() : ''
if (tagName === 'mark' || tagName === 'script' || tagName === 'style' || tagName === 'noscript') {
return
}
const children = Array.from(node.childNodes)
children.forEach(child => {
this.highlightTextInNode(child, pattern, results, segmentIndex, parentElement, markIndexRef)
})
}
},
@ -382,7 +336,6 @@ export default {
const results = []
let totalTextMatches = 0
const segmentMatchCounts = []
this.searchSegments.forEach((el, segmentIndex) => {
const originalHtml = el.dataset.originalHtml
@ -406,33 +359,220 @@ export default {
console.warn(`Segment ${segmentIndex} still has ${existingMarks.length} mark tags after cleaning`)
}
const textNodes = []
this.collectTextNodes(el, textNodes)
if (textNodes.length === 0) return
const markIndexRef = { value: 0 }
const beforeHighlightText = this.getSearchableTextContent(el)
const children = Array.from(el.childNodes)
children.forEach(child => {
this.highlightTextInNode(child, pattern, results, segmentIndex, el, markIndexRef)
const nodeMap = []
let fullText = ''
let currentOffset = 0
textNodes.forEach((textNode, nodeIndex) => {
const text = textNode.textContent || ''
const startOffset = currentOffset
const endOffset = currentOffset + text.length
nodeMap.push({
textNode,
text,
startOffset,
endOffset,
nodeIndex
})
fullText += text
currentOffset = endOffset
})
const createdMarks = el.querySelectorAll('mark.search-highlight')
const actualCount = createdMarks.length
const afterHighlightText = this.getSearchableTextContent(el)
if (!fullText) return
segmentMatchCounts.push({
segmentIndex,
expected: textMatchCount,
actual: actualCount,
searchableTextLength: searchableText.length,
beforeHighlightLength: beforeHighlightText.length,
afterHighlightLength: afterHighlightText.length
pattern.lastIndex = 0
const allMatches = []
let match
while ((match = pattern.exec(fullText)) !== null) {
allMatches.push({
start: match.index,
end: match.index + match[0].length,
text: match[0]
})
}
if (allMatches.length === 0) return
const nodeReplacements = new Map()
const processedMatches = new Set()
allMatches.forEach((matchInfo, matchIndex) => {
const matchStart = matchInfo.start
const matchEnd = matchInfo.end
const matchKey = `${matchStart}-${matchEnd}`
if (processedMatches.has(matchKey)) {
return
}
processedMatches.add(matchKey)
const affectedNodes = []
for (let i = 0; i < nodeMap.length; i++) {
const nodeInfo = nodeMap[i]
if (nodeInfo.endOffset > matchStart && nodeInfo.startOffset < matchEnd) {
affectedNodes.push({
...nodeInfo,
matchStartInNode: Math.max(0, matchStart - nodeInfo.startOffset),
matchEndInNode: Math.min(nodeInfo.text.length, matchEnd - nodeInfo.startOffset)
})
}
}
if (affectedNodes.length === 0) return
affectedNodes.forEach((nodeInfo, idx) => {
if (!nodeReplacements.has(nodeInfo.nodeIndex)) {
nodeReplacements.set(nodeInfo.nodeIndex, {
textNode: nodeInfo.textNode,
ranges: [],
matchIndices: new Set()
})
}
const replacement = nodeReplacements.get(nodeInfo.nodeIndex)
const isFirst = idx === 0
const isLast = idx === affectedNodes.length - 1
if (isFirst && isLast) {
const rangeKey = `${nodeInfo.matchStartInNode}-${nodeInfo.matchEndInNode}-${matchIndex}`
if (!replacement.matchIndices.has(rangeKey)) {
replacement.ranges.push({
start: nodeInfo.matchStartInNode,
end: nodeInfo.matchEndInNode,
isFullMatch: true,
matchIndex: matchIndex,
shouldAddToResults: true
})
replacement.matchIndices.add(rangeKey)
}
} else if (isFirst) {
const rangeKey = `${nodeInfo.matchStartInNode}-${nodeInfo.text.length}-${matchIndex}`
if (!replacement.matchIndices.has(rangeKey)) {
replacement.ranges.push({
start: nodeInfo.matchStartInNode,
end: nodeInfo.text.length,
isStart: true,
matchIndex: matchIndex,
shouldAddToResults: true
})
replacement.matchIndices.add(rangeKey)
}
} else if (isLast) {
const rangeKey = `0-${nodeInfo.matchEndInNode}-${matchIndex}`
if (!replacement.matchIndices.has(rangeKey)) {
replacement.ranges.push({
start: 0,
end: nodeInfo.matchEndInNode,
isEnd: true,
matchIndex: matchIndex,
shouldAddToResults: false
})
replacement.matchIndices.add(rangeKey)
}
} else {
const rangeKey = `0-${nodeInfo.text.length}-${matchIndex}`
if (!replacement.matchIndices.has(rangeKey)) {
replacement.ranges.push({
start: 0,
end: nodeInfo.text.length,
isMiddle: true,
matchIndex: matchIndex,
shouldAddToResults: false
})
replacement.matchIndices.add(rangeKey)
}
}
})
})
if (actualCount !== textMatchCount) {
console.warn(`Segment ${segmentIndex}: expected ${textMatchCount} marks, created ${actualCount}. Text: "${searchableText.substring(0, 100)}..."`)
for (let i = textNodes.length - 1; i >= 0; i--) {
const textNode = textNodes[i]
const parent = textNode.parentNode
if (!parent || !document.body.contains(textNode)) continue
if (nodeReplacements.has(i)) {
const replacement = nodeReplacements.get(i)
const originalText = replacement.textNode.textContent
let ranges = replacement.ranges
ranges.sort((a, b) => {
if (a.start !== b.start) return a.start - b.start
return a.end - b.end
})
const mergedRanges = []
ranges.forEach(range => {
if (mergedRanges.length === 0) {
mergedRanges.push({ ...range })
} else {
const lastRange = mergedRanges[mergedRanges.length - 1]
if (range.start <= lastRange.end) {
lastRange.end = Math.max(lastRange.end, range.end)
} else {
mergedRanges.push({ ...range })
}
}
})
const fragment = document.createDocumentFragment()
let lastIndex = 0
mergedRanges.forEach(range => {
if (lastIndex < range.start) {
const beforeText = originalText.substring(lastIndex, range.start)
if (beforeText) {
fragment.appendChild(document.createTextNode(beforeText))
}
}
const mark = document.createElement('mark')
mark.className = 'search-highlight'
mark.textContent = originalText.substring(range.start, range.end)
fragment.appendChild(mark)
if (range.shouldAddToResults !== false) {
results.push({
element: mark,
segmentIndex: segmentIndex,
markIndex: markIndexRef.value,
parentElement: el
})
}
markIndexRef.value++
lastIndex = range.end
})
if (lastIndex < originalText.length) {
const afterText = originalText.substring(lastIndex)
if (afterText) {
fragment.appendChild(document.createTextNode(afterText))
}
}
if (fragment.childNodes.length > 0) {
try {
parent.replaceChild(fragment, textNode)
} catch (e) {
console.warn(`Failed to replace text node in segment ${segmentIndex}:`, e)
}
}
}
}
})
if (results.length !== totalTextMatches) {
console.warn(`Total matches mismatch: expected ${totalTextMatches}, got ${results.length}`, segmentMatchCounts)
console.warn(`Total matches mismatch: expected ${totalTextMatches}, got ${results.length}`)
}
this.searchResults = results