fix(editor): support CRLF line segmentation
This commit is contained in:
parent
181ec6ccca
commit
3a5645464f
4 changed files with 287 additions and 54 deletions
160
Sources/SaplingEditor/DocumentLineIndex.swift
Normal file
160
Sources/SaplingEditor/DocumentLineIndex.swift
Normal file
|
|
@ -0,0 +1,160 @@
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
public enum LineEndingStrategy: String, Hashable, Sendable {
|
||||||
|
case lf
|
||||||
|
case crlf
|
||||||
|
case cr
|
||||||
|
case none
|
||||||
|
|
||||||
|
public var utf16Length: Int {
|
||||||
|
switch self {
|
||||||
|
case .lf, .cr:
|
||||||
|
return 1
|
||||||
|
case .crlf:
|
||||||
|
return 2
|
||||||
|
case .none:
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public struct DocumentLineBoundary: Hashable, Sendable {
|
||||||
|
public var index: Int
|
||||||
|
public var contentRange: NSRange
|
||||||
|
public var lineEnding: LineEndingStrategy
|
||||||
|
|
||||||
|
public init(index: Int, contentRange: NSRange, lineEnding: LineEndingStrategy) {
|
||||||
|
self.index = index
|
||||||
|
self.contentRange = contentRange
|
||||||
|
self.lineEnding = lineEnding
|
||||||
|
}
|
||||||
|
|
||||||
|
public var lineEndingRange: NSRange {
|
||||||
|
NSRange(location: contentRange.upperBound, length: lineEnding.utf16Length)
|
||||||
|
}
|
||||||
|
|
||||||
|
public var nextLineLocation: Int {
|
||||||
|
contentRange.upperBound + lineEnding.utf16Length
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public struct DocumentLineIndex: Hashable, Sendable {
|
||||||
|
public var source: String
|
||||||
|
public var boundaries: [DocumentLineBoundary]
|
||||||
|
|
||||||
|
public init(source: String) {
|
||||||
|
self.source = source
|
||||||
|
self.boundaries = Self.scanBoundaries(in: source)
|
||||||
|
}
|
||||||
|
|
||||||
|
public func lineIndex(containing location: Int) -> Int {
|
||||||
|
guard !boundaries.isEmpty else { return 0 }
|
||||||
|
|
||||||
|
let clampedLocation = max(0, min(location, source.utf16.count))
|
||||||
|
for boundary in boundaries.dropLast() {
|
||||||
|
if clampedLocation < boundary.nextLineLocation {
|
||||||
|
return boundary.index
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return boundaries[boundaries.count - 1].index
|
||||||
|
}
|
||||||
|
|
||||||
|
public func editorLines(activeLineIndex: Int) -> [EditorLine] {
|
||||||
|
let nsSource = source as NSString
|
||||||
|
return boundaries.map { boundary in
|
||||||
|
EditorLine(
|
||||||
|
index: boundary.index,
|
||||||
|
source: nsSource.substring(with: boundary.contentRange),
|
||||||
|
range: boundary.contentRange,
|
||||||
|
mode: boundary.index == activeLineIndex ? .source : .rendered
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func scanBoundaries(in source: String) -> [DocumentLineBoundary] {
|
||||||
|
let nsSource = source as NSString
|
||||||
|
let sourceLength = nsSource.length
|
||||||
|
guard sourceLength > 0 else {
|
||||||
|
return [
|
||||||
|
DocumentLineBoundary(
|
||||||
|
index: 0,
|
||||||
|
contentRange: NSRange(location: 0, length: 0),
|
||||||
|
lineEnding: .none
|
||||||
|
)
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
var boundaries: [DocumentLineBoundary] = []
|
||||||
|
var lineStart = 0
|
||||||
|
var lineIndex = 0
|
||||||
|
var endedWithLineEnding = false
|
||||||
|
|
||||||
|
while lineStart < sourceLength {
|
||||||
|
var cursor = lineStart
|
||||||
|
while cursor < sourceLength,
|
||||||
|
!isLineEndingStart(nsSource.character(at: cursor)) {
|
||||||
|
cursor += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
let contentRange = NSRange(location: lineStart, length: cursor - lineStart)
|
||||||
|
if cursor < sourceLength {
|
||||||
|
let lineEnding = lineEndingStrategy(at: cursor, in: nsSource)
|
||||||
|
boundaries.append(DocumentLineBoundary(
|
||||||
|
index: lineIndex,
|
||||||
|
contentRange: contentRange,
|
||||||
|
lineEnding: lineEnding
|
||||||
|
))
|
||||||
|
cursor += lineEnding.utf16Length
|
||||||
|
lineStart = cursor
|
||||||
|
endedWithLineEnding = cursor == sourceLength
|
||||||
|
} else {
|
||||||
|
boundaries.append(DocumentLineBoundary(
|
||||||
|
index: lineIndex,
|
||||||
|
contentRange: contentRange,
|
||||||
|
lineEnding: .none
|
||||||
|
))
|
||||||
|
lineStart = sourceLength
|
||||||
|
endedWithLineEnding = false
|
||||||
|
}
|
||||||
|
|
||||||
|
lineIndex += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if endedWithLineEnding {
|
||||||
|
boundaries.append(DocumentLineBoundary(
|
||||||
|
index: lineIndex,
|
||||||
|
contentRange: NSRange(location: sourceLength, length: 0),
|
||||||
|
lineEnding: .none
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
return boundaries
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func lineEndingStrategy(at location: Int, in source: NSString) -> LineEndingStrategy {
|
||||||
|
let character = source.character(at: location)
|
||||||
|
if character == carriageReturnUTF16 {
|
||||||
|
let nextLocation = location + 1
|
||||||
|
if nextLocation < source.length,
|
||||||
|
source.character(at: nextLocation) == lineFeedUTF16 {
|
||||||
|
return .crlf
|
||||||
|
}
|
||||||
|
return .cr
|
||||||
|
}
|
||||||
|
return .lf
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func isLineEndingStart(_ character: unichar) -> Bool {
|
||||||
|
character == lineFeedUTF16 || character == carriageReturnUTF16
|
||||||
|
}
|
||||||
|
|
||||||
|
private static let lineFeedUTF16: unichar = 10
|
||||||
|
private static let carriageReturnUTF16: unichar = 13
|
||||||
|
}
|
||||||
|
|
||||||
|
private extension NSRange {
|
||||||
|
var upperBound: Int {
|
||||||
|
location + length
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -2,57 +2,11 @@ import Foundation
|
||||||
|
|
||||||
public enum EditorActiveLineTracker {
|
public enum EditorActiveLineTracker {
|
||||||
public static func lines(from source: String, activeLineIndex: Int) -> [EditorLine] {
|
public static func lines(from source: String, activeLineIndex: Int) -> [EditorLine] {
|
||||||
var lines: [EditorLine] = []
|
DocumentLineIndex(source: source).editorLines(activeLineIndex: activeLineIndex)
|
||||||
var lineStart = source.startIndex
|
|
||||||
var utf16Location = 0
|
|
||||||
var index = 0
|
|
||||||
|
|
||||||
while lineStart < source.endIndex {
|
|
||||||
let lineEnd = source[lineStart...].firstIndex(of: "\n") ?? source.endIndex
|
|
||||||
let line = String(source[lineStart..<lineEnd])
|
|
||||||
let length = line.utf16.count
|
|
||||||
lines.append(EditorLine(
|
|
||||||
index: index,
|
|
||||||
source: line,
|
|
||||||
range: NSRange(location: utf16Location, length: length),
|
|
||||||
mode: index == activeLineIndex ? .source : .rendered
|
|
||||||
))
|
|
||||||
|
|
||||||
if lineEnd == source.endIndex {
|
|
||||||
lineStart = lineEnd
|
|
||||||
utf16Location += length
|
|
||||||
} else {
|
|
||||||
lineStart = source.index(after: lineEnd)
|
|
||||||
utf16Location += length + 1
|
|
||||||
}
|
|
||||||
index += 1
|
|
||||||
}
|
|
||||||
|
|
||||||
if source.isEmpty || source.hasSuffix("\n") {
|
|
||||||
lines.append(EditorLine(
|
|
||||||
index: index,
|
|
||||||
source: "",
|
|
||||||
range: NSRange(location: utf16Location, length: 0),
|
|
||||||
mode: index == activeLineIndex ? .source : .rendered
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
return lines
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static func lineIndex(containing location: Int, in source: String) -> Int {
|
public static func lineIndex(containing location: Int, in source: String) -> Int {
|
||||||
let clampedLocation = max(0, min(location, source.utf16.count))
|
DocumentLineIndex(source: source).lineIndex(containing: location)
|
||||||
var currentLocation = 0
|
|
||||||
|
|
||||||
for (index, line) in source.split(separator: "\n", omittingEmptySubsequences: false).enumerated() {
|
|
||||||
let length = line.utf16.count
|
|
||||||
if clampedLocation <= currentLocation + length {
|
|
||||||
return index
|
|
||||||
}
|
|
||||||
currentLocation += length + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static func clampedSelection(_ selection: EditorSelection, in source: String) -> EditorSelection {
|
public static func clampedSelection(_ selection: EditorSelection, in source: String) -> EditorSelection {
|
||||||
|
|
|
||||||
|
|
@ -256,9 +256,11 @@ public enum EditorBenchmarkProfiler {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static func documentProfile(fileName: String, source: String) -> EditorBenchmarkDocumentProfile {
|
public static func documentProfile(fileName: String, source: String) -> EditorBenchmarkDocumentProfile {
|
||||||
let lines = source.components(separatedBy: "\n")
|
let lineIndex = DocumentLineIndex(source: source)
|
||||||
let lineLengths = lines.map { $0.utf16.count }
|
let nsSource = source as NSString
|
||||||
let lineCount = lines.count
|
let lineSources = lineIndex.boundaries.map { nsSource.substring(with: $0.contentRange) }
|
||||||
|
let lineLengths = lineIndex.boundaries.map(\.contentRange.length)
|
||||||
|
let lineCount = lineIndex.boundaries.count
|
||||||
let maxLineLength = lineLengths.max() ?? 0
|
let maxLineLength = lineLengths.max() ?? 0
|
||||||
let totalLineLength = lineLengths.reduce(0, +)
|
let totalLineLength = lineLengths.reduce(0, +)
|
||||||
|
|
||||||
|
|
@ -273,9 +275,9 @@ public enum EditorBenchmarkProfiler {
|
||||||
orderedListItemCount: countMatches("(?m)^\\d+\\.\\s", in: source),
|
orderedListItemCount: countMatches("(?m)^\\d+\\.\\s", in: source),
|
||||||
blockquoteCount: countMatches("(?m)^>\\s", in: source),
|
blockquoteCount: countMatches("(?m)^>\\s", in: source),
|
||||||
fencedCodeFenceCount: countMatches("(?m)^```", in: source),
|
fencedCodeFenceCount: countMatches("(?m)^```", in: source),
|
||||||
inlineCodeLineCount: countMatchingLines("`[^`\\n]+`", in: lines),
|
inlineCodeLineCount: countMatchingLines("`[^`\\n]+`", in: lineSources),
|
||||||
boldLineCount: countMatchingLines("\\*\\*[^*\\n]+\\*\\*", in: lines),
|
boldLineCount: countMatchingLines("\\*\\*[^*\\n]+\\*\\*", in: lineSources),
|
||||||
italicLineCount: countMatchingLines("(?<!\\*)\\*[^*\\n]+\\*(?!\\*)", in: lines),
|
italicLineCount: countMatchingLines("(?<!\\*)\\*[^*\\n]+\\*(?!\\*)", in: lineSources),
|
||||||
inlineLinkCount: countMatches("(?<!!)\\[[^\\]]+\\]\\([^\\)]+\\)", in: source),
|
inlineLinkCount: countMatches("(?<!!)\\[[^\\]]+\\]\\([^\\)]+\\)", in: source),
|
||||||
referenceLinkLikeCount: countMatches("(?<!!)\\[[A-Za-z0-9_-]+\\]", in: source),
|
referenceLinkLikeCount: countMatches("(?<!!)\\[[A-Za-z0-9_-]+\\]", in: source),
|
||||||
imageCount: countMatches("!\\[", in: source),
|
imageCount: countMatches("!\\[", in: source),
|
||||||
|
|
|
||||||
117
Tests/SaplingEditorTests/DocumentLineIndexTests.swift
Normal file
117
Tests/SaplingEditorTests/DocumentLineIndexTests.swift
Normal file
|
|
@ -0,0 +1,117 @@
|
||||||
|
import XCTest
|
||||||
|
@testable import SaplingEditor
|
||||||
|
|
||||||
|
final class DocumentLineIndexTests: XCTestCase {
|
||||||
|
func testLFLineBoundaries() {
|
||||||
|
assertLineIndex(
|
||||||
|
source: "One\nTwo\nThree",
|
||||||
|
expectedSources: ["One", "Two", "Three"],
|
||||||
|
expectedRanges: [
|
||||||
|
NSRange(location: 0, length: 3),
|
||||||
|
NSRange(location: 4, length: 3),
|
||||||
|
NSRange(location: 8, length: 5)
|
||||||
|
],
|
||||||
|
expectedEndings: [.lf, .lf, .none]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testCRLFLineBoundaries() {
|
||||||
|
assertLineIndex(
|
||||||
|
source: "One\r\nTwo\r\nThree",
|
||||||
|
expectedSources: ["One", "Two", "Three"],
|
||||||
|
expectedRanges: [
|
||||||
|
NSRange(location: 0, length: 3),
|
||||||
|
NSRange(location: 5, length: 3),
|
||||||
|
NSRange(location: 10, length: 5)
|
||||||
|
],
|
||||||
|
expectedEndings: [.crlf, .crlf, .none]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testCRLineBoundaries() {
|
||||||
|
assertLineIndex(
|
||||||
|
source: "One\rTwo\rThree",
|
||||||
|
expectedSources: ["One", "Two", "Three"],
|
||||||
|
expectedRanges: [
|
||||||
|
NSRange(location: 0, length: 3),
|
||||||
|
NSRange(location: 4, length: 3),
|
||||||
|
NSRange(location: 8, length: 5)
|
||||||
|
],
|
||||||
|
expectedEndings: [.cr, .cr, .none]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testMixedLineBoundaries() {
|
||||||
|
assertLineIndex(
|
||||||
|
source: "One\nTwo\r\nThree\rFour",
|
||||||
|
expectedSources: ["One", "Two", "Three", "Four"],
|
||||||
|
expectedRanges: [
|
||||||
|
NSRange(location: 0, length: 3),
|
||||||
|
NSRange(location: 4, length: 3),
|
||||||
|
NSRange(location: 9, length: 5),
|
||||||
|
NSRange(location: 15, length: 4)
|
||||||
|
],
|
||||||
|
expectedEndings: [.lf, .crlf, .cr, .none]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testTrailingBlankLineForEveryLineEnding() {
|
||||||
|
XCTAssertEqual(DocumentLineIndex(source: "One\n").boundaries.map(\.contentRange.location), [0, 4])
|
||||||
|
XCTAssertEqual(DocumentLineIndex(source: "One\r\n").boundaries.map(\.contentRange.location), [0, 5])
|
||||||
|
XCTAssertEqual(DocumentLineIndex(source: "One\r").boundaries.map(\.contentRange.location), [0, 4])
|
||||||
|
}
|
||||||
|
|
||||||
|
func testActiveLineDetectionAcrossCRLFBoundaries() {
|
||||||
|
let source = "One\r\nTwo\r\nThree"
|
||||||
|
|
||||||
|
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 0, in: source), 0)
|
||||||
|
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 3, in: source), 0)
|
||||||
|
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 4, in: source), 0)
|
||||||
|
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 5, in: source), 1)
|
||||||
|
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 8, in: source), 1)
|
||||||
|
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 9, in: source), 1)
|
||||||
|
XCTAssertEqual(EditorActiveLineTracker.lineIndex(containing: 10, in: source), 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testEditorLinesPreserveCRLFSourceRangesAndModes() {
|
||||||
|
let lines = EditorActiveLineTracker.lines(from: "One\r\nTwo\r\nThree", activeLineIndex: 1)
|
||||||
|
|
||||||
|
XCTAssertEqual(lines.count, 3)
|
||||||
|
XCTAssertEqual(lines.map(\.source), ["One", "Two", "Three"])
|
||||||
|
XCTAssertEqual(lines.map(\.range), [
|
||||||
|
NSRange(location: 0, length: 3),
|
||||||
|
NSRange(location: 5, length: 3),
|
||||||
|
NSRange(location: 10, length: 5)
|
||||||
|
])
|
||||||
|
XCTAssertEqual(lines.map(\.mode), [.rendered, .source, .rendered])
|
||||||
|
}
|
||||||
|
|
||||||
|
func testBenchmarkDocumentSegmentsIntoPhysicalLines() throws {
|
||||||
|
let url = URL(fileURLWithPath: "/Users/feror/Sapling/Docs/Benchmarks/5mb.md")
|
||||||
|
let source = try String(contentsOf: url, encoding: .utf8)
|
||||||
|
let lines = EditorActiveLineTracker.lines(from: source, activeLineIndex: 0)
|
||||||
|
|
||||||
|
XCTAssertEqual(lines.count, 51_482)
|
||||||
|
XCTAssertEqual(lines[0].source, "# ExampleFile.com - Example Files")
|
||||||
|
XCTAssertEqual(lines[1].source, "")
|
||||||
|
XCTAssertEqual(lines[2].source, "- ✨ExampleFile ✨")
|
||||||
|
XCTAssertEqual(lines.filter { $0.mode == .source }.count, 1)
|
||||||
|
XCTAssertEqual(lines.filter { $0.mode == .rendered }.count, 51_481)
|
||||||
|
}
|
||||||
|
|
||||||
|
private func assertLineIndex(
|
||||||
|
source: String,
|
||||||
|
expectedSources: [String],
|
||||||
|
expectedRanges: [NSRange],
|
||||||
|
expectedEndings: [LineEndingStrategy],
|
||||||
|
file: StaticString = #filePath,
|
||||||
|
line: UInt = #line
|
||||||
|
) {
|
||||||
|
let index = DocumentLineIndex(source: source)
|
||||||
|
let lines = index.editorLines(activeLineIndex: 0)
|
||||||
|
|
||||||
|
XCTAssertEqual(lines.map(\.source), expectedSources, file: file, line: line)
|
||||||
|
XCTAssertEqual(index.boundaries.map(\.contentRange), expectedRanges, file: file, line: line)
|
||||||
|
XCTAssertEqual(index.boundaries.map(\.lineEnding), expectedEndings, file: file, line: line)
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue