fix(editor): support CRLF line segmentation

This commit is contained in:
Feror 2026-05-30 18:19:52 +02:00
parent 181ec6ccca
commit 3a5645464f
4 changed files with 287 additions and 54 deletions

View file

@ -0,0 +1,160 @@
import Foundation
public enum LineEndingStrategy: String, Hashable, Sendable {
case lf
case crlf
case cr
case none
public var utf16Length: Int {
switch self {
case .lf, .cr:
return 1
case .crlf:
return 2
case .none:
return 0
}
}
}
public struct DocumentLineBoundary: Hashable, Sendable {
public var index: Int
public var contentRange: NSRange
public var lineEnding: LineEndingStrategy
public init(index: Int, contentRange: NSRange, lineEnding: LineEndingStrategy) {
self.index = index
self.contentRange = contentRange
self.lineEnding = lineEnding
}
public var lineEndingRange: NSRange {
NSRange(location: contentRange.upperBound, length: lineEnding.utf16Length)
}
public var nextLineLocation: Int {
contentRange.upperBound + lineEnding.utf16Length
}
}
public struct DocumentLineIndex: Hashable, Sendable {
public var source: String
public var boundaries: [DocumentLineBoundary]
public init(source: String) {
self.source = source
self.boundaries = Self.scanBoundaries(in: source)
}
public func lineIndex(containing location: Int) -> Int {
guard !boundaries.isEmpty else { return 0 }
let clampedLocation = max(0, min(location, source.utf16.count))
for boundary in boundaries.dropLast() {
if clampedLocation < boundary.nextLineLocation {
return boundary.index
}
}
return boundaries[boundaries.count - 1].index
}
public func editorLines(activeLineIndex: Int) -> [EditorLine] {
let nsSource = source as NSString
return boundaries.map { boundary in
EditorLine(
index: boundary.index,
source: nsSource.substring(with: boundary.contentRange),
range: boundary.contentRange,
mode: boundary.index == activeLineIndex ? .source : .rendered
)
}
}
private static func scanBoundaries(in source: String) -> [DocumentLineBoundary] {
let nsSource = source as NSString
let sourceLength = nsSource.length
guard sourceLength > 0 else {
return [
DocumentLineBoundary(
index: 0,
contentRange: NSRange(location: 0, length: 0),
lineEnding: .none
)
]
}
var boundaries: [DocumentLineBoundary] = []
var lineStart = 0
var lineIndex = 0
var endedWithLineEnding = false
while lineStart < sourceLength {
var cursor = lineStart
while cursor < sourceLength,
!isLineEndingStart(nsSource.character(at: cursor)) {
cursor += 1
}
let contentRange = NSRange(location: lineStart, length: cursor - lineStart)
if cursor < sourceLength {
let lineEnding = lineEndingStrategy(at: cursor, in: nsSource)
boundaries.append(DocumentLineBoundary(
index: lineIndex,
contentRange: contentRange,
lineEnding: lineEnding
))
cursor += lineEnding.utf16Length
lineStart = cursor
endedWithLineEnding = cursor == sourceLength
} else {
boundaries.append(DocumentLineBoundary(
index: lineIndex,
contentRange: contentRange,
lineEnding: .none
))
lineStart = sourceLength
endedWithLineEnding = false
}
lineIndex += 1
}
if endedWithLineEnding {
boundaries.append(DocumentLineBoundary(
index: lineIndex,
contentRange: NSRange(location: sourceLength, length: 0),
lineEnding: .none
))
}
return boundaries
}
private static func lineEndingStrategy(at location: Int, in source: NSString) -> LineEndingStrategy {
let character = source.character(at: location)
if character == carriageReturnUTF16 {
let nextLocation = location + 1
if nextLocation < source.length,
source.character(at: nextLocation) == lineFeedUTF16 {
return .crlf
}
return .cr
}
return .lf
}
private static func isLineEndingStart(_ character: unichar) -> Bool {
character == lineFeedUTF16 || character == carriageReturnUTF16
}
private static let lineFeedUTF16: unichar = 10
private static let carriageReturnUTF16: unichar = 13
}
private extension NSRange {
var upperBound: Int {
location + length
}
}

View file

@ -2,57 +2,11 @@ import Foundation
public enum EditorActiveLineTracker { public enum EditorActiveLineTracker {
public static func lines(from source: String, activeLineIndex: Int) -> [EditorLine] { public static func lines(from source: String, activeLineIndex: Int) -> [EditorLine] {
var lines: [EditorLine] = [] DocumentLineIndex(source: source).editorLines(activeLineIndex: activeLineIndex)
var lineStart = source.startIndex
var utf16Location = 0
var index = 0
while lineStart < source.endIndex {
let lineEnd = source[lineStart...].firstIndex(of: "\n") ?? source.endIndex
let line = String(source[lineStart..<lineEnd])
let length = line.utf16.count
lines.append(EditorLine(
index: index,
source: line,
range: NSRange(location: utf16Location, length: length),
mode: index == activeLineIndex ? .source : .rendered
))
if lineEnd == source.endIndex {
lineStart = lineEnd
utf16Location += length
} else {
lineStart = source.index(after: lineEnd)
utf16Location += length + 1
}
index += 1
}
if source.isEmpty || source.hasSuffix("\n") {
lines.append(EditorLine(
index: index,
source: "",
range: NSRange(location: utf16Location, length: 0),
mode: index == activeLineIndex ? .source : .rendered
))
}
return lines
} }
public static func lineIndex(containing location: Int, in source: String) -> Int { public static func lineIndex(containing location: Int, in source: String) -> Int {
let clampedLocation = max(0, min(location, source.utf16.count)) DocumentLineIndex(source: source).lineIndex(containing: location)
var currentLocation = 0
for (index, line) in source.split(separator: "\n", omittingEmptySubsequences: false).enumerated() {
let length = line.utf16.count
if clampedLocation <= currentLocation + length {
return index
}
currentLocation += length + 1
}
return 0
} }
public static func clampedSelection(_ selection: EditorSelection, in source: String) -> EditorSelection { public static func clampedSelection(_ selection: EditorSelection, in source: String) -> EditorSelection {

View file

@ -256,9 +256,11 @@ public enum EditorBenchmarkProfiler {
} }
public static func documentProfile(fileName: String, source: String) -> EditorBenchmarkDocumentProfile { public static func documentProfile(fileName: String, source: String) -> EditorBenchmarkDocumentProfile {
let lines = source.components(separatedBy: "\n") let lineIndex = DocumentLineIndex(source: source)
let lineLengths = lines.map { $0.utf16.count } let nsSource = source as NSString
let lineCount = lines.count let lineSources = lineIndex.boundaries.map { nsSource.substring(with: $0.contentRange) }
let lineLengths = lineIndex.boundaries.map(\.contentRange.length)
let lineCount = lineIndex.boundaries.count
let maxLineLength = lineLengths.max() ?? 0 let maxLineLength = lineLengths.max() ?? 0
let totalLineLength = lineLengths.reduce(0, +) let totalLineLength = lineLengths.reduce(0, +)
@ -273,9 +275,9 @@ public enum EditorBenchmarkProfiler {
orderedListItemCount: countMatches("(?m)^\\d+\\.\\s", in: source), orderedListItemCount: countMatches("(?m)^\\d+\\.\\s", in: source),
blockquoteCount: countMatches("(?m)^>\\s", in: source), blockquoteCount: countMatches("(?m)^>\\s", in: source),
fencedCodeFenceCount: countMatches("(?m)^```", in: source), fencedCodeFenceCount: countMatches("(?m)^```", in: source),
inlineCodeLineCount: countMatchingLines("`[^`\\n]+`", in: lines), inlineCodeLineCount: countMatchingLines("`[^`\\n]+`", in: lineSources),
boldLineCount: countMatchingLines("\\*\\*[^*\\n]+\\*\\*", in: lines), boldLineCount: countMatchingLines("\\*\\*[^*\\n]+\\*\\*", in: lineSources),
italicLineCount: countMatchingLines("(?<!\\*)\\*[^*\\n]+\\*(?!\\*)", in: lines), italicLineCount: countMatchingLines("(?<!\\*)\\*[^*\\n]+\\*(?!\\*)", in: lineSources),
inlineLinkCount: countMatches("(?<!!)\\[[^\\]]+\\]\\([^\\)]+\\)", in: source), inlineLinkCount: countMatches("(?<!!)\\[[^\\]]+\\]\\([^\\)]+\\)", in: source),
referenceLinkLikeCount: countMatches("(?<!!)\\[[A-Za-z0-9_-]+\\]", in: source), referenceLinkLikeCount: countMatches("(?<!!)\\[[A-Za-z0-9_-]+\\]", in: source),
imageCount: countMatches("!\\[", in: source), imageCount: countMatches("!\\[", in: source),

View file

@ -0,0 +1,117 @@
import XCTest
@testable import SaplingEditor
final class DocumentLineIndexTests: XCTestCase {
func testLFLineBoundaries() {
assertLineIndex(
source: "One\nTwo\nThree",
expectedSources: ["One", "Two", "Three"],
expectedRanges: [
NSRange(location: 0, length: 3),
NSRange(location: 4, length: 3),
NSRange(location: 8, length: 5)
],
expectedEndings: [.lf, .lf, .none]
)
}
func testCRLFLineBoundaries() {
assertLineIndex(
source: "One\r\nTwo\r\nThree",
expectedSources: ["One", "Two", "Three"],
expectedRanges: [
NSRange(location: 0, length: 3),
NSRange(location: 5, length: 3),
NSRange(location: 10, length: 5)
],
expectedEndings: [.crlf, .crlf, .none]
)
}
func testCRLineBoundaries() {
assertLineIndex(
source: "One\rTwo\rThree",
expectedSources: ["One", "Two", "Three"],
expectedRanges: [
NSRange(location: 0, length: 3),
NSRange(location: 4, length: 3),
NSRange(location: 8, length: 5)
],
expectedEndings: [.cr, .cr, .none]
)
}
func testMixedLineBoundaries() {
assertLineIndex(
source: "One\nTwo\r\nThree\rFour",
expectedSources: ["One", "Two", "Three", "Four"],
expectedRanges: [
NSRange(location: 0, length: 3),
NSRange(location: 4, length: 3),
NSRange(location: 9, length: 5),
NSRange(location: 15, length: 4)
],
expectedEndings: [.lf, .crlf, .cr, .none]
)
}
func testTrailingBlankLineForEveryLineEnding() {
XCTAssertEqual(DocumentLineIndex(source: "One\n").boundaries.map(\.contentRange.location), [0, 4])
XCTAssertEqual(DocumentLineIndex(source: "One\r\n").boundaries.map(\.contentRange.location), [0, 5])
XCTAssertEqual(DocumentLineIndex(source: "One\r").boundaries.map(\.contentRange.location), [0, 4])
}
func testActiveLineDetectionAcrossCRLFBoundaries() {
let source = "One\r\nTwo\r\nThree"
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 0, in: source), 0)
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 3, in: source), 0)
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 4, in: source), 0)
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 5, in: source), 1)
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 8, in: source), 1)
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 9, in: source), 1)
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 10, in: source), 2)
}
func testEditorLinesPreserveCRLFSourceRangesAndModes() {
let lines = EditorActiveLineTracker.lines(from: "One\r\nTwo\r\nThree", activeLineIndex: 1)
XCTAssertEqual(lines.count, 3)
XCTAssertEqual(lines.map(\.source), ["One", "Two", "Three"])
XCTAssertEqual(lines.map(\.range), [
NSRange(location: 0, length: 3),
NSRange(location: 5, length: 3),
NSRange(location: 10, length: 5)
])
XCTAssertEqual(lines.map(\.mode), [.rendered, .source, .rendered])
}
func testBenchmarkDocumentSegmentsIntoPhysicalLines() throws {
let url = URL(fileURLWithPath: "/Users/feror/Sapling/Docs/Benchmarks/5mb.md")
let source = try String(contentsOf: url, encoding: .utf8)
let lines = EditorActiveLineTracker.lines(from: source, activeLineIndex: 0)
XCTAssertEqual(lines.count, 51_482)
XCTAssertEqual(lines[0].source, "# ExampleFile.com - Example Files")
XCTAssertEqual(lines[1].source, "")
XCTAssertEqual(lines[2].source, "- ✨ExampleFile ✨")
XCTAssertEqual(lines.filter { $0.mode == .source }.count, 1)
XCTAssertEqual(lines.filter { $0.mode == .rendered }.count, 51_481)
}
private func assertLineIndex(
source: String,
expectedSources: [String],
expectedRanges: [NSRange],
expectedEndings: [LineEndingStrategy],
file: StaticString = #filePath,
line: UInt = #line
) {
let index = DocumentLineIndex(source: source)
let lines = index.editorLines(activeLineIndex: 0)
XCTAssertEqual(lines.map(\.source), expectedSources, file: file, line: line)
XCTAssertEqual(index.boundaries.map(\.contentRange), expectedRanges, file: file, line: line)
XCTAssertEqual(index.boundaries.map(\.lineEnding), expectedEndings, file: file, line: line)
}
}