fix(editor): support CRLF line segmentation
This commit is contained in:
parent
181ec6ccca
commit
3a5645464f
4 changed files with 287 additions and 54 deletions
160
Sources/SaplingEditor/DocumentLineIndex.swift
Normal file
160
Sources/SaplingEditor/DocumentLineIndex.swift
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
import Foundation
|
||||
|
||||
public enum LineEndingStrategy: String, Hashable, Sendable {
|
||||
case lf
|
||||
case crlf
|
||||
case cr
|
||||
case none
|
||||
|
||||
public var utf16Length: Int {
|
||||
switch self {
|
||||
case .lf, .cr:
|
||||
return 1
|
||||
case .crlf:
|
||||
return 2
|
||||
case .none:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public struct DocumentLineBoundary: Hashable, Sendable {
|
||||
public var index: Int
|
||||
public var contentRange: NSRange
|
||||
public var lineEnding: LineEndingStrategy
|
||||
|
||||
public init(index: Int, contentRange: NSRange, lineEnding: LineEndingStrategy) {
|
||||
self.index = index
|
||||
self.contentRange = contentRange
|
||||
self.lineEnding = lineEnding
|
||||
}
|
||||
|
||||
public var lineEndingRange: NSRange {
|
||||
NSRange(location: contentRange.upperBound, length: lineEnding.utf16Length)
|
||||
}
|
||||
|
||||
public var nextLineLocation: Int {
|
||||
contentRange.upperBound + lineEnding.utf16Length
|
||||
}
|
||||
}
|
||||
|
||||
public struct DocumentLineIndex: Hashable, Sendable {
|
||||
public var source: String
|
||||
public var boundaries: [DocumentLineBoundary]
|
||||
|
||||
public init(source: String) {
|
||||
self.source = source
|
||||
self.boundaries = Self.scanBoundaries(in: source)
|
||||
}
|
||||
|
||||
public func lineIndex(containing location: Int) -> Int {
|
||||
guard !boundaries.isEmpty else { return 0 }
|
||||
|
||||
let clampedLocation = max(0, min(location, source.utf16.count))
|
||||
for boundary in boundaries.dropLast() {
|
||||
if clampedLocation < boundary.nextLineLocation {
|
||||
return boundary.index
|
||||
}
|
||||
}
|
||||
|
||||
return boundaries[boundaries.count - 1].index
|
||||
}
|
||||
|
||||
public func editorLines(activeLineIndex: Int) -> [EditorLine] {
|
||||
let nsSource = source as NSString
|
||||
return boundaries.map { boundary in
|
||||
EditorLine(
|
||||
index: boundary.index,
|
||||
source: nsSource.substring(with: boundary.contentRange),
|
||||
range: boundary.contentRange,
|
||||
mode: boundary.index == activeLineIndex ? .source : .rendered
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private static func scanBoundaries(in source: String) -> [DocumentLineBoundary] {
|
||||
let nsSource = source as NSString
|
||||
let sourceLength = nsSource.length
|
||||
guard sourceLength > 0 else {
|
||||
return [
|
||||
DocumentLineBoundary(
|
||||
index: 0,
|
||||
contentRange: NSRange(location: 0, length: 0),
|
||||
lineEnding: .none
|
||||
)
|
||||
]
|
||||
}
|
||||
|
||||
var boundaries: [DocumentLineBoundary] = []
|
||||
var lineStart = 0
|
||||
var lineIndex = 0
|
||||
var endedWithLineEnding = false
|
||||
|
||||
while lineStart < sourceLength {
|
||||
var cursor = lineStart
|
||||
while cursor < sourceLength,
|
||||
!isLineEndingStart(nsSource.character(at: cursor)) {
|
||||
cursor += 1
|
||||
}
|
||||
|
||||
let contentRange = NSRange(location: lineStart, length: cursor - lineStart)
|
||||
if cursor < sourceLength {
|
||||
let lineEnding = lineEndingStrategy(at: cursor, in: nsSource)
|
||||
boundaries.append(DocumentLineBoundary(
|
||||
index: lineIndex,
|
||||
contentRange: contentRange,
|
||||
lineEnding: lineEnding
|
||||
))
|
||||
cursor += lineEnding.utf16Length
|
||||
lineStart = cursor
|
||||
endedWithLineEnding = cursor == sourceLength
|
||||
} else {
|
||||
boundaries.append(DocumentLineBoundary(
|
||||
index: lineIndex,
|
||||
contentRange: contentRange,
|
||||
lineEnding: .none
|
||||
))
|
||||
lineStart = sourceLength
|
||||
endedWithLineEnding = false
|
||||
}
|
||||
|
||||
lineIndex += 1
|
||||
}
|
||||
|
||||
if endedWithLineEnding {
|
||||
boundaries.append(DocumentLineBoundary(
|
||||
index: lineIndex,
|
||||
contentRange: NSRange(location: sourceLength, length: 0),
|
||||
lineEnding: .none
|
||||
))
|
||||
}
|
||||
|
||||
return boundaries
|
||||
}
|
||||
|
||||
private static func lineEndingStrategy(at location: Int, in source: NSString) -> LineEndingStrategy {
|
||||
let character = source.character(at: location)
|
||||
if character == carriageReturnUTF16 {
|
||||
let nextLocation = location + 1
|
||||
if nextLocation < source.length,
|
||||
source.character(at: nextLocation) == lineFeedUTF16 {
|
||||
return .crlf
|
||||
}
|
||||
return .cr
|
||||
}
|
||||
return .lf
|
||||
}
|
||||
|
||||
private static func isLineEndingStart(_ character: unichar) -> Bool {
|
||||
character == lineFeedUTF16 || character == carriageReturnUTF16
|
||||
}
|
||||
|
||||
private static let lineFeedUTF16: unichar = 10
|
||||
private static let carriageReturnUTF16: unichar = 13
|
||||
}
|
||||
|
||||
private extension NSRange {
|
||||
var upperBound: Int {
|
||||
location + length
|
||||
}
|
||||
}
|
||||
|
|
@ -2,57 +2,11 @@ import Foundation
|
|||
|
||||
public enum EditorActiveLineTracker {
|
||||
public static func lines(from source: String, activeLineIndex: Int) -> [EditorLine] {
|
||||
var lines: [EditorLine] = []
|
||||
var lineStart = source.startIndex
|
||||
var utf16Location = 0
|
||||
var index = 0
|
||||
|
||||
while lineStart < source.endIndex {
|
||||
let lineEnd = source[lineStart...].firstIndex(of: "\n") ?? source.endIndex
|
||||
let line = String(source[lineStart..<lineEnd])
|
||||
let length = line.utf16.count
|
||||
lines.append(EditorLine(
|
||||
index: index,
|
||||
source: line,
|
||||
range: NSRange(location: utf16Location, length: length),
|
||||
mode: index == activeLineIndex ? .source : .rendered
|
||||
))
|
||||
|
||||
if lineEnd == source.endIndex {
|
||||
lineStart = lineEnd
|
||||
utf16Location += length
|
||||
} else {
|
||||
lineStart = source.index(after: lineEnd)
|
||||
utf16Location += length + 1
|
||||
}
|
||||
index += 1
|
||||
}
|
||||
|
||||
if source.isEmpty || source.hasSuffix("\n") {
|
||||
lines.append(EditorLine(
|
||||
index: index,
|
||||
source: "",
|
||||
range: NSRange(location: utf16Location, length: 0),
|
||||
mode: index == activeLineIndex ? .source : .rendered
|
||||
))
|
||||
}
|
||||
|
||||
return lines
|
||||
DocumentLineIndex(source: source).editorLines(activeLineIndex: activeLineIndex)
|
||||
}
|
||||
|
||||
public static func lineIndex(containing location: Int, in source: String) -> Int {
|
||||
let clampedLocation = max(0, min(location, source.utf16.count))
|
||||
var currentLocation = 0
|
||||
|
||||
for (index, line) in source.split(separator: "\n", omittingEmptySubsequences: false).enumerated() {
|
||||
let length = line.utf16.count
|
||||
if clampedLocation <= currentLocation + length {
|
||||
return index
|
||||
}
|
||||
currentLocation += length + 1
|
||||
}
|
||||
|
||||
return 0
|
||||
DocumentLineIndex(source: source).lineIndex(containing: location)
|
||||
}
|
||||
|
||||
public static func clampedSelection(_ selection: EditorSelection, in source: String) -> EditorSelection {
|
||||
|
|
|
|||
|
|
@ -256,9 +256,11 @@ public enum EditorBenchmarkProfiler {
|
|||
}
|
||||
|
||||
public static func documentProfile(fileName: String, source: String) -> EditorBenchmarkDocumentProfile {
|
||||
let lines = source.components(separatedBy: "\n")
|
||||
let lineLengths = lines.map { $0.utf16.count }
|
||||
let lineCount = lines.count
|
||||
let lineIndex = DocumentLineIndex(source: source)
|
||||
let nsSource = source as NSString
|
||||
let lineSources = lineIndex.boundaries.map { nsSource.substring(with: $0.contentRange) }
|
||||
let lineLengths = lineIndex.boundaries.map(\.contentRange.length)
|
||||
let lineCount = lineIndex.boundaries.count
|
||||
let maxLineLength = lineLengths.max() ?? 0
|
||||
let totalLineLength = lineLengths.reduce(0, +)
|
||||
|
||||
|
|
@ -273,9 +275,9 @@ public enum EditorBenchmarkProfiler {
|
|||
orderedListItemCount: countMatches("(?m)^\\d+\\.\\s", in: source),
|
||||
blockquoteCount: countMatches("(?m)^>\\s", in: source),
|
||||
fencedCodeFenceCount: countMatches("(?m)^```", in: source),
|
||||
inlineCodeLineCount: countMatchingLines("`[^`\\n]+`", in: lines),
|
||||
boldLineCount: countMatchingLines("\\*\\*[^*\\n]+\\*\\*", in: lines),
|
||||
italicLineCount: countMatchingLines("(?<!\\*)\\*[^*\\n]+\\*(?!\\*)", in: lines),
|
||||
inlineCodeLineCount: countMatchingLines("`[^`\\n]+`", in: lineSources),
|
||||
boldLineCount: countMatchingLines("\\*\\*[^*\\n]+\\*\\*", in: lineSources),
|
||||
italicLineCount: countMatchingLines("(?<!\\*)\\*[^*\\n]+\\*(?!\\*)", in: lineSources),
|
||||
inlineLinkCount: countMatches("(?<!!)\\[[^\\]]+\\]\\([^\\)]+\\)", in: source),
|
||||
referenceLinkLikeCount: countMatches("(?<!!)\\[[A-Za-z0-9_-]+\\]", in: source),
|
||||
imageCount: countMatches("!\\[", in: source),
|
||||
|
|
|
|||
117
Tests/SaplingEditorTests/DocumentLineIndexTests.swift
Normal file
117
Tests/SaplingEditorTests/DocumentLineIndexTests.swift
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
import XCTest
|
||||
@testable import SaplingEditor
|
||||
|
||||
final class DocumentLineIndexTests: XCTestCase {
|
||||
func testLFLineBoundaries() {
|
||||
assertLineIndex(
|
||||
source: "One\nTwo\nThree",
|
||||
expectedSources: ["One", "Two", "Three"],
|
||||
expectedRanges: [
|
||||
NSRange(location: 0, length: 3),
|
||||
NSRange(location: 4, length: 3),
|
||||
NSRange(location: 8, length: 5)
|
||||
],
|
||||
expectedEndings: [.lf, .lf, .none]
|
||||
)
|
||||
}
|
||||
|
||||
func testCRLFLineBoundaries() {
|
||||
assertLineIndex(
|
||||
source: "One\r\nTwo\r\nThree",
|
||||
expectedSources: ["One", "Two", "Three"],
|
||||
expectedRanges: [
|
||||
NSRange(location: 0, length: 3),
|
||||
NSRange(location: 5, length: 3),
|
||||
NSRange(location: 10, length: 5)
|
||||
],
|
||||
expectedEndings: [.crlf, .crlf, .none]
|
||||
)
|
||||
}
|
||||
|
||||
func testCRLineBoundaries() {
|
||||
assertLineIndex(
|
||||
source: "One\rTwo\rThree",
|
||||
expectedSources: ["One", "Two", "Three"],
|
||||
expectedRanges: [
|
||||
NSRange(location: 0, length: 3),
|
||||
NSRange(location: 4, length: 3),
|
||||
NSRange(location: 8, length: 5)
|
||||
],
|
||||
expectedEndings: [.cr, .cr, .none]
|
||||
)
|
||||
}
|
||||
|
||||
func testMixedLineBoundaries() {
|
||||
assertLineIndex(
|
||||
source: "One\nTwo\r\nThree\rFour",
|
||||
expectedSources: ["One", "Two", "Three", "Four"],
|
||||
expectedRanges: [
|
||||
NSRange(location: 0, length: 3),
|
||||
NSRange(location: 4, length: 3),
|
||||
NSRange(location: 9, length: 5),
|
||||
NSRange(location: 15, length: 4)
|
||||
],
|
||||
expectedEndings: [.lf, .crlf, .cr, .none]
|
||||
)
|
||||
}
|
||||
|
||||
func testTrailingBlankLineForEveryLineEnding() {
|
||||
XCTAssertEqual(DocumentLineIndex(source: "One\n").boundaries.map(\.contentRange.location), [0, 4])
|
||||
XCTAssertEqual(DocumentLineIndex(source: "One\r\n").boundaries.map(\.contentRange.location), [0, 5])
|
||||
XCTAssertEqual(DocumentLineIndex(source: "One\r").boundaries.map(\.contentRange.location), [0, 4])
|
||||
}
|
||||
|
||||
func testActiveLineDetectionAcrossCRLFBoundaries() {
|
||||
let source = "One\r\nTwo\r\nThree"
|
||||
|
||||
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 0, in: source), 0)
|
||||
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 3, in: source), 0)
|
||||
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 4, in: source), 0)
|
||||
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 5, in: source), 1)
|
||||
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 8, in: source), 1)
|
||||
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 9, in: source), 1)
|
||||
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 10, in: source), 2)
|
||||
}
|
||||
|
||||
func testEditorLinesPreserveCRLFSourceRangesAndModes() {
|
||||
let lines = EditorActiveLineTracker.lines(from: "One\r\nTwo\r\nThree", activeLineIndex: 1)
|
||||
|
||||
XCTAssertEqual(lines.count, 3)
|
||||
XCTAssertEqual(lines.map(\.source), ["One", "Two", "Three"])
|
||||
XCTAssertEqual(lines.map(\.range), [
|
||||
NSRange(location: 0, length: 3),
|
||||
NSRange(location: 5, length: 3),
|
||||
NSRange(location: 10, length: 5)
|
||||
])
|
||||
XCTAssertEqual(lines.map(\.mode), [.rendered, .source, .rendered])
|
||||
}
|
||||
|
||||
func testBenchmarkDocumentSegmentsIntoPhysicalLines() throws {
|
||||
let url = URL(fileURLWithPath: "/Users/feror/Sapling/Docs/Benchmarks/5mb.md")
|
||||
let source = try String(contentsOf: url, encoding: .utf8)
|
||||
let lines = EditorActiveLineTracker.lines(from: source, activeLineIndex: 0)
|
||||
|
||||
XCTAssertEqual(lines.count, 51_482)
|
||||
XCTAssertEqual(lines[0].source, "# ExampleFile.com - Example Files")
|
||||
XCTAssertEqual(lines[1].source, "")
|
||||
XCTAssertEqual(lines[2].source, "- ✨ExampleFile ✨")
|
||||
XCTAssertEqual(lines.filter { $0.mode == .source }.count, 1)
|
||||
XCTAssertEqual(lines.filter { $0.mode == .rendered }.count, 51_481)
|
||||
}
|
||||
|
||||
private func assertLineIndex(
|
||||
source: String,
|
||||
expectedSources: [String],
|
||||
expectedRanges: [NSRange],
|
||||
expectedEndings: [LineEndingStrategy],
|
||||
file: StaticString = #filePath,
|
||||
line: UInt = #line
|
||||
) {
|
||||
let index = DocumentLineIndex(source: source)
|
||||
let lines = index.editorLines(activeLineIndex: 0)
|
||||
|
||||
XCTAssertEqual(lines.map(\.source), expectedSources, file: file, line: line)
|
||||
XCTAssertEqual(index.boundaries.map(\.contentRange), expectedRanges, file: file, line: line)
|
||||
XCTAssertEqual(index.boundaries.map(\.lineEnding), expectedEndings, file: file, line: line)
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue