Skip to content

Commit 1472619

Browse files
Qtaxscriptcoded
authored andcommitted
perf: improved tokenizer
Faster and simpler tokenization. Refs: #133
1 parent ec30752 commit 1472619

File tree

2 files changed

+47
-85
lines changed

2 files changed

+47
-85
lines changed

lib/index.d.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,5 @@ declare module 'sql-highlight' {
2121

2222
export function getSegments(sqlString: string): Array<Segment>;
2323
export function highlight(sqlString: string, options?: HighlightOptions): string;
24+
export const DEFAULT_OPTIONS: HighlightOptions;
2425
}

lib/index.js

Lines changed: 46 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -18,106 +18,66 @@ const DEFAULT_OPTIONS = {
1818
}
1919
}
2020

21-
const SPLIT_CHARS = '[^a-zA-Z_]'
22-
2321
const DEFAULT_KEYWORD = 'default'
2422

2523
const highlighters = [
26-
{
27-
name: 'keyword',
28-
group: 1,
29-
regex: new RegExp(`(^|${SPLIT_CHARS})(${keywords.join('|')})(?=${SPLIT_CHARS}|$)`, 'gi')
30-
},
31-
{
32-
name: 'special',
33-
regex: /(=|!=|%|\/|\*|-|,|;|:|\+|<|>)/g
34-
},
35-
{
36-
name: 'function',
37-
regex: /(\w+?)\(/g,
38-
trimEnd: 1
39-
},
40-
{
41-
name: 'number',
42-
regex: /(\b\d+(?:\.\d+)?)/g
43-
},
44-
{
45-
name: 'string',
46-
regex: /(['](?:\\'|.)*?[']|["](?:\\"|.)*?["]|[`](?:\\`|.)*?[`])/g
47-
},
48-
{
49-
name: 'bracket',
50-
regex: /([()])/g
51-
}
52-
]
24+
/\b(?<number>\d+(?:\.\d+)?)\b/,
5325

54-
function getSegments (sqlString) {
55-
const matches = []
56-
57-
for (const hl of highlighters) {
58-
let match
59-
60-
// This is probably the one time when an assignment inside a condition makes sense
61-
// eslint-disable-next-line no-cond-assign
62-
while (match = hl.regex.exec(sqlString)) {
63-
let text = match[0]
64-
let boringLength = 0
65-
66-
// If a specific group is requested, use that group instead, and make sure
67-
// we offset the index by the length of the preceding groups
68-
if (hl.group) {
69-
text = match[hl.group + 1]
70-
for (let i = 1; i <= hl.group; i++) {
71-
boringLength += match[i].length
72-
}
73-
}
26+
// Note: Repeating string escapes like 'sql''server' will also work as they are just repeating strings
27+
/(?<string>'(?:[^'\\]|\\.)*'|"(?:[^"\\]|\\.)*"|`(?:[^`\\]|\\.)*`)/,
7428

75-
const trimmedText = hl.trimEnd
76-
? text.substring(0, text.length - hl.trimEnd)
77-
: text
78-
matches.push({
79-
name: hl.name,
80-
start: match.index + boringLength,
81-
length: trimmedText.length
82-
})
83-
}
84-
}
29+
/\b(?<function>\w+)(?=\s*\()/,
30+
31+
/(?<bracket>[()])/,
8532

86-
const sortedMatches = matches.slice().sort((a, b) => a.start - b.start)
33+
/(?<special>!=|[=%*/\-+,;:<>])/
34+
]
35+
36+
function getRegexString (regex) {
37+
const str = regex.toString()
38+
return str.replace(/^\/|\/\w*$/g, '')
39+
}
8740

88-
// filter/exclude nested matches (matches within the last match)
41+
// Regex of the shape /(.*?)|((?<token1>...)|(?<token2>...)|...|$)/y
42+
const tokenizer = new RegExp(
43+
'(.*?)(' +
44+
'\\b(?<keyword>' + keywords.join('|') + ')\\b|' +
45+
highlighters.map(getRegexString).join('|') +
46+
'|$)', // $ needed to to match "default" till the end of string
47+
'isy'
48+
)
49+
50+
function getSegments (sqlString) {
8951
const segments = []
90-
let upperBound = 0
91-
for (let i = 0; i < sortedMatches.length; i++) {
92-
if (sortedMatches[i].start < upperBound) { continue }
52+
let match
53+
54+
// Reset the starting position
55+
tokenizer.lastIndex = 0
9356

94-
// If no match, add a default segment
95-
if (sortedMatches[i].start > upperBound) {
57+
// This is probably the one time when an assignment inside a condition makes sense
58+
// eslint-disable-next-line no-cond-assign
59+
while (match = tokenizer.exec(sqlString)) {
60+
if (match[1]) {
9661
segments.push({
9762
name: DEFAULT_KEYWORD,
98-
content: sqlString.substring(upperBound, sortedMatches[i].start)
63+
content: match[1]
9964
})
10065
}
10166

102-
segments.push({
103-
name: sortedMatches[i].name,
104-
content: sqlString.substring(
105-
sortedMatches[i].start,
106-
sortedMatches[i].start + sortedMatches[i].length
107-
)
108-
})
109-
upperBound = sortedMatches[i].start + sortedMatches[i].length
110-
}
67+
if (match[2]) {
68+
const name = Object.keys(match.groups).find(key => match.groups[key])
69+
segments.push({
70+
name,
71+
content: match.groups[name]
72+
})
73+
}
11174

112-
if (upperBound < sqlString.length - 1) {
113-
segments.push({
114-
name: DEFAULT_KEYWORD,
115-
content: sqlString.substring(
116-
upperBound,
117-
upperBound + sqlString.length + 1
118-
)
119-
})
75+
// Stop at the end of string
76+
if (match.index + match[0].length >= sqlString.length) {
77+
break
78+
}
12079
}
80+
12181
return segments
12282
}
12383

@@ -140,5 +100,6 @@ function highlight (sqlString, options) {
140100

141101
module.exports = {
142102
getSegments,
143-
highlight
103+
highlight,
104+
DEFAULT_OPTIONS
144105
}

0 commit comments

Comments
 (0)