diff --git a/.github/workflows/build-tarball.yml b/.github/workflows/build-tarball.yml
index bdcb6ed67615df..9557ddd14e19c9 100644
--- a/.github/workflows/build-tarball.yml
+++ b/.github/workflows/build-tarball.yml
@@ -57,7 +57,7 @@ jobs:
mkdir tarballs
mv *.tar.gz tarballs
- name: Upload tarball artifact
- uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
+ uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
with:
name: tarballs
path: tarballs
@@ -75,7 +75,7 @@ jobs:
- name: Environment Information
run: npx envinfo
- name: Download tarball
- uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # v3.0.2
+ uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: tarballs
path: tarballs
diff --git a/.github/workflows/daily-wpt-fyi.yml b/.github/workflows/daily-wpt-fyi.yml
index 0fa94da4b015fb..3742004df38e98 100644
--- a/.github/workflows/daily-wpt-fyi.yml
+++ b/.github/workflows/daily-wpt-fyi.yml
@@ -121,10 +121,10 @@ jobs:
run: cp wptreport.json wptreport-${{ steps.setup-node.outputs.node-version }}.json
- name: Upload GitHub Actions artifact
if: ${{ env.WPT_REPORT != '' }}
- uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
+ uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
with:
path: out/wpt/wptreport-*.json
- name: WPT Reports
+ name: WPT Report for ${{ steps.setup-node.outputs.node-version }}
if-no-files-found: error
- name: Upload WPT Report to wpt.fyi API
if: ${{ env.WPT_REPORT != '' }}
diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
index ea69074e1b41c2..893ff289da95ea 100644
--- a/.github/workflows/doc.yml
+++ b/.github/workflows/doc.yml
@@ -35,7 +35,7 @@ jobs:
run: npx envinfo
- name: Build
run: NODE=$(command -v node) make doc-only
- - uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
+ - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
with:
name: docs
path: out/doc
diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml
index 804b95cb64683c..e0d231af28e73c 100644
--- a/.github/workflows/scorecard.yml
+++ b/.github/workflows/scorecard.yml
@@ -65,7 +65,7 @@ jobs:
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: Upload artifact
- uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
+ uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
with:
name: SARIF file
path: results.sarif
diff --git a/BUILDING.md b/BUILDING.md
index 76b6738097bdc1..c82bbb2730819e 100644
--- a/BUILDING.md
+++ b/BUILDING.md
@@ -163,8 +163,8 @@ Binaries at are produced on:
| Binary package | Platform and Toolchain |
| ----------------------- | ------------------------------------------------------------------------------------------------------------- |
| aix-ppc64 | AIX 7.2 TL04 on PPC64BE with GCC 8 |
-| darwin-x64 | macOS 10.15, Xcode Command Line Tools 11 with -mmacosx-version-min=10.15 |
-| darwin-arm64 (and .pkg) | macOS 11 (arm64), Xcode Command Line Tools 12 with -mmacosx-version-min=10.15 |
+| darwin-x64 | macOS 13, Xcode 16 with -mmacosx-version-min=10.15 |
+| darwin-arm64 (and .pkg) | macOS 13 (arm64), Xcode 14 with -mmacosx-version-min=10.15 |
| linux-arm64 | RHEL 8 with GCC 8[^6] |
| linux-armv7l | Cross-compiled on Ubuntu 18.04 x64 with [custom GCC toolchain](https://github.com/rvagg/rpi-newer-crosstools) |
| linux-ppc64le | RHEL 8 with GCC 8[^6] |
@@ -456,6 +456,12 @@ make docopen
This will open a file URL to a one-page version of all the browsable HTML
documents using the default browser.
+```bash
+make docclean
+```
+
+This will clean previously built doc.
+
To test if Node.js was built correctly:
```bash
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 16bbb5a88405e5..2484e8f6c90f91 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -32,7 +32,8 @@ release.
-18.20.6
+18.20.7
+18.20.6
18.20.5
18.20.4
18.20.3
diff --git a/Makefile b/Makefile
index c1457480f56828..3cb5c3e20bf73a 100644
--- a/Makefile
+++ b/Makefile
@@ -170,9 +170,9 @@ with-code-cache test-code-cache:
$(warning '$@' target is a noop)
out/Makefile: config.gypi common.gypi node.gyp \
- deps/uv/uv.gyp deps/llhttp/llhttp.gyp deps/zlib/zlib.gyp \
- deps/simdutf/simdutf.gyp deps/ada/ada.gyp \
- tools/v8_gypfiles/toolchain.gypi tools/v8_gypfiles/features.gypi \
+ deps/*/*.gyp \
+ tools/v8_gypfiles/toolchain.gypi \
+ tools/v8_gypfiles/features.gypi \
tools/v8_gypfiles/inspector.gypi tools/v8_gypfiles/v8.gyp
$(PYTHON) tools/gyp_node.py -f make
diff --git a/common.gypi b/common.gypi
index ec92c9df4c1ea2..1b45f13453aa0f 100644
--- a/common.gypi
+++ b/common.gypi
@@ -36,7 +36,7 @@
# Reset this number to 0 on major V8 upgrades.
# Increment by one for each non-official patch applied to deps/v8.
- 'v8_embedder_string': '-node.37',
+ 'v8_embedder_string': '-node.39',
##### V8 defaults for Node.js #####
diff --git a/configure b/configure
index 711a3014b9a67f..29ebe882b092cd 100755
--- a/configure
+++ b/configure
@@ -4,6 +4,7 @@
# Note that the mix of single and double quotes is intentional,
# as is the fact that the ] goes on a new line.
_=[ 'exec' '/bin/sh' '-c' '''
+command -v python3.13 >/dev/null && exec python3.13 "$0" "$@"
command -v python3.12 >/dev/null && exec python3.12 "$0" "$@"
command -v python3.11 >/dev/null && exec python3.11 "$0" "$@"
command -v python3.10 >/dev/null && exec python3.10 "$0" "$@"
@@ -24,7 +25,7 @@ except ImportError:
from distutils.spawn import find_executable as which
print('Node.js configure: Found Python {}.{}.{}...'.format(*sys.version_info))
-acceptable_pythons = ((3, 12), (3, 11), (3, 10), (3, 9), (3, 8), (3, 7), (3, 6))
+acceptable_pythons = ((3, 13), (3, 12), (3, 11), (3, 10), (3, 9), (3, 8), (3, 7), (3, 6))
if sys.version_info[:2] in acceptable_pythons:
import configure
else:
diff --git a/deps/acorn/acorn/CHANGELOG.md b/deps/acorn/acorn/CHANGELOG.md
index 1e090161fffa80..313718616b575a 100644
--- a/deps/acorn/acorn/CHANGELOG.md
+++ b/deps/acorn/acorn/CHANGELOG.md
@@ -1,3 +1,15 @@
+## 8.14.0 (2024-10-27)
+
+### New features
+
+Support ES2025 import attributes.
+
+Support ES2025 RegExp modifiers.
+
+### Bug fixes
+
+Support some missing Unicode properties.
+
## 8.13.0 (2024-10-16)
### New features
diff --git a/deps/acorn/acorn/dist/acorn.d.mts b/deps/acorn/acorn/dist/acorn.d.mts
index cd204b1c50db94..81f4e38fdbf4c9 100644
--- a/deps/acorn/acorn/dist/acorn.d.mts
+++ b/deps/acorn/acorn/dist/acorn.d.mts
@@ -403,6 +403,7 @@ export interface ImportDeclaration extends Node {
type: "ImportDeclaration"
specifiers: Array
source: Literal
+ attributes: Array
}
export interface ImportSpecifier extends Node {
@@ -421,11 +422,18 @@ export interface ImportNamespaceSpecifier extends Node {
local: Identifier
}
+export interface ImportAttribute extends Node {
+ type: "ImportAttribute"
+ key: Identifier | Literal
+ value: Literal
+}
+
export interface ExportNamedDeclaration extends Node {
type: "ExportNamedDeclaration"
declaration?: Declaration | null
specifiers: Array
source?: Literal | null
+ attributes: Array
}
export interface ExportSpecifier extends Node {
@@ -454,6 +462,7 @@ export interface ExportAllDeclaration extends Node {
type: "ExportAllDeclaration"
source: Literal
exported?: Identifier | Literal | null
+ attributes: Array
}
export interface AwaitExpression extends Node {
@@ -469,6 +478,7 @@ export interface ChainExpression extends Node {
export interface ImportExpression extends Node {
type: "ImportExpression"
source: Expression
+ options: Expression | null
}
export interface ParenthesizedExpression extends Node {
@@ -562,7 +572,7 @@ export type ModuleDeclaration =
| ExportDefaultDeclaration
| ExportAllDeclaration
-export type AnyNode = Statement | Expression | Declaration | ModuleDeclaration | Literal | Program | SwitchCase | CatchClause | Property | Super | SpreadElement | TemplateElement | AssignmentProperty | ObjectPattern | ArrayPattern | RestElement | AssignmentPattern | ClassBody | MethodDefinition | MetaProperty | ImportSpecifier | ImportDefaultSpecifier | ImportNamespaceSpecifier | ExportSpecifier | AnonymousFunctionDeclaration | AnonymousClassDeclaration | PropertyDefinition | PrivateIdentifier | StaticBlock | VariableDeclarator
+export type AnyNode = Statement | Expression | Declaration | ModuleDeclaration | Literal | Program | SwitchCase | CatchClause | Property | Super | SpreadElement | TemplateElement | AssignmentProperty | ObjectPattern | ArrayPattern | RestElement | AssignmentPattern | ClassBody | MethodDefinition | MetaProperty | ImportAttribute | ImportSpecifier | ImportDefaultSpecifier | ImportNamespaceSpecifier | ExportSpecifier | AnonymousFunctionDeclaration | AnonymousClassDeclaration | PropertyDefinition | PrivateIdentifier | StaticBlock | VariableDeclarator
export function parse(input: string, options: Options): Program
diff --git a/deps/acorn/acorn/dist/acorn.d.ts b/deps/acorn/acorn/dist/acorn.d.ts
index cd204b1c50db94..81f4e38fdbf4c9 100644
--- a/deps/acorn/acorn/dist/acorn.d.ts
+++ b/deps/acorn/acorn/dist/acorn.d.ts
@@ -403,6 +403,7 @@ export interface ImportDeclaration extends Node {
type: "ImportDeclaration"
specifiers: Array
source: Literal
+ attributes: Array
}
export interface ImportSpecifier extends Node {
@@ -421,11 +422,18 @@ export interface ImportNamespaceSpecifier extends Node {
local: Identifier
}
+export interface ImportAttribute extends Node {
+ type: "ImportAttribute"
+ key: Identifier | Literal
+ value: Literal
+}
+
export interface ExportNamedDeclaration extends Node {
type: "ExportNamedDeclaration"
declaration?: Declaration | null
specifiers: Array
source?: Literal | null
+ attributes: Array
}
export interface ExportSpecifier extends Node {
@@ -454,6 +462,7 @@ export interface ExportAllDeclaration extends Node {
type: "ExportAllDeclaration"
source: Literal
exported?: Identifier | Literal | null
+ attributes: Array
}
export interface AwaitExpression extends Node {
@@ -469,6 +478,7 @@ export interface ChainExpression extends Node {
export interface ImportExpression extends Node {
type: "ImportExpression"
source: Expression
+ options: Expression | null
}
export interface ParenthesizedExpression extends Node {
@@ -562,7 +572,7 @@ export type ModuleDeclaration =
| ExportDefaultDeclaration
| ExportAllDeclaration
-export type AnyNode = Statement | Expression | Declaration | ModuleDeclaration | Literal | Program | SwitchCase | CatchClause | Property | Super | SpreadElement | TemplateElement | AssignmentProperty | ObjectPattern | ArrayPattern | RestElement | AssignmentPattern | ClassBody | MethodDefinition | MetaProperty | ImportSpecifier | ImportDefaultSpecifier | ImportNamespaceSpecifier | ExportSpecifier | AnonymousFunctionDeclaration | AnonymousClassDeclaration | PropertyDefinition | PrivateIdentifier | StaticBlock | VariableDeclarator
+export type AnyNode = Statement | Expression | Declaration | ModuleDeclaration | Literal | Program | SwitchCase | CatchClause | Property | Super | SpreadElement | TemplateElement | AssignmentProperty | ObjectPattern | ArrayPattern | RestElement | AssignmentPattern | ClassBody | MethodDefinition | MetaProperty | ImportAttribute | ImportSpecifier | ImportDefaultSpecifier | ImportNamespaceSpecifier | ExportSpecifier | AnonymousFunctionDeclaration | AnonymousClassDeclaration | PropertyDefinition | PrivateIdentifier | StaticBlock | VariableDeclarator
export function parse(input: string, options: Options): Program
diff --git a/deps/acorn/acorn/dist/acorn.js b/deps/acorn/acorn/dist/acorn.js
index 7cd26fa36b5caa..2bfc15b5ef2204 100644
--- a/deps/acorn/acorn/dist/acorn.js
+++ b/deps/acorn/acorn/dist/acorn.js
@@ -1678,6 +1678,8 @@
this.expectContextual("from");
if (this.type !== types$1.string) { this.unexpected(); }
node.source = this.parseExprAtom();
+ if (this.options.ecmaVersion >= 16)
+ { node.attributes = this.parseWithClause(); }
this.semicolon();
return this.finishNode(node, "ExportAllDeclaration")
};
@@ -1708,6 +1710,8 @@
if (this.eatContextual("from")) {
if (this.type !== types$1.string) { this.unexpected(); }
node.source = this.parseExprAtom();
+ if (this.options.ecmaVersion >= 16)
+ { node.attributes = this.parseWithClause(); }
} else {
for (var i = 0, list = node.specifiers; i < list.length; i += 1) {
// check for keywords used as local names
@@ -1848,6 +1852,8 @@
this.expectContextual("from");
node.source = this.type === types$1.string ? this.parseExprAtom() : this.unexpected();
}
+ if (this.options.ecmaVersion >= 16)
+ { node.attributes = this.parseWithClause(); }
this.semicolon();
return this.finishNode(node, "ImportDeclaration")
};
@@ -1908,6 +1914,41 @@
return nodes
};
+ pp$8.parseWithClause = function() {
+ var nodes = [];
+ if (!this.eat(types$1._with)) {
+ return nodes
+ }
+ this.expect(types$1.braceL);
+ var attributeKeys = {};
+ var first = true;
+ while (!this.eat(types$1.braceR)) {
+ if (!first) {
+ this.expect(types$1.comma);
+ if (this.afterTrailingComma(types$1.braceR)) { break }
+ } else { first = false; }
+
+ var attr = this.parseImportAttribute();
+ var keyName = attr.key.type === "Identifier" ? attr.key.name : attr.key.value;
+ if (hasOwn(attributeKeys, keyName))
+ { this.raiseRecoverable(attr.key.start, "Duplicate attribute key '" + keyName + "'"); }
+ attributeKeys[keyName] = true;
+ nodes.push(attr);
+ }
+ return nodes
+ };
+
+ pp$8.parseImportAttribute = function() {
+ var node = this.startNode();
+ node.key = this.type === types$1.string ? this.parseExprAtom() : this.parseIdent(this.options.allowReserved !== "never");
+ this.expect(types$1.colon);
+ if (this.type !== types$1.string) {
+ this.unexpected();
+ }
+ node.value = this.parseExprAtom();
+ return this.finishNode(node, "ImportAttribute")
+ };
+
pp$8.parseModuleExportName = function() {
if (this.options.ecmaVersion >= 13 && this.type === types$1.string) {
var stringLiteral = this.parseLiteral(this.value);
@@ -2975,13 +3016,32 @@
// Parse node.source.
node.source = this.parseMaybeAssign();
- // Verify ending.
- if (!this.eat(types$1.parenR)) {
- var errorPos = this.start;
- if (this.eat(types$1.comma) && this.eat(types$1.parenR)) {
- this.raiseRecoverable(errorPos, "Trailing comma is not allowed in import()");
+ if (this.options.ecmaVersion >= 16) {
+ if (!this.eat(types$1.parenR)) {
+ this.expect(types$1.comma);
+ if (!this.afterTrailingComma(types$1.parenR)) {
+ node.options = this.parseMaybeAssign();
+ if (!this.eat(types$1.parenR)) {
+ this.expect(types$1.comma);
+ if (!this.afterTrailingComma(types$1.parenR)) {
+ this.unexpected();
+ }
+ }
+ } else {
+ node.options = null;
+ }
} else {
- this.unexpected(errorPos);
+ node.options = null;
+ }
+ } else {
+ // Verify ending.
+ if (!this.eat(types$1.parenR)) {
+ var errorPos = this.start;
+ if (this.eat(types$1.comma) && this.eat(types$1.parenR)) {
+ this.raiseRecoverable(errorPos, "Trailing comma is not allowed in import()");
+ } else {
+ this.unexpected(errorPos);
+ }
}
}
@@ -3741,6 +3801,9 @@
return newNode
};
+ // This file was generated by "bin/generate-unicode-script-values.js". Do not modify manually!
+ var scriptValuesAddedInUnicode = "Gara Garay Gukh Gurung_Khema Hrkt Katakana_Or_Hiragana Kawi Kirat_Rai Krai Nag_Mundari Nagm Ol_Onal Onao Sunu Sunuwar Todhri Todr Tulu_Tigalari Tutg Unknown Zzzz";
+
// This file contains Unicode properties extracted from the ECMAScript specification.
// The lists are extracted like so:
// $$('#table-binary-unicode-properties > figure > table > tbody > tr > td:nth-child(1) code').map(el => el.innerText)
@@ -3783,7 +3846,7 @@
var ecma11ScriptValues = ecma10ScriptValues + " Elymaic Elym Nandinagari Nand Nyiakeng_Puachue_Hmong Hmnp Wancho Wcho";
var ecma12ScriptValues = ecma11ScriptValues + " Chorasmian Chrs Diak Dives_Akuru Khitan_Small_Script Kits Yezi Yezidi";
var ecma13ScriptValues = ecma12ScriptValues + " Cypro_Minoan Cpmn Old_Uyghur Ougr Tangsa Tnsa Toto Vithkuqi Vith";
- var ecma14ScriptValues = ecma13ScriptValues + " Hrkt Katakana_Or_Hiragana Kawi Nag_Mundari Nagm Unknown Zzzz";
+ var ecma14ScriptValues = ecma13ScriptValues + " " + scriptValuesAddedInUnicode;
var unicodeScriptValues = {
9: ecma9ScriptValues,
@@ -4208,12 +4271,41 @@
pp$1.regexp_eatUncapturingGroup = function(state) {
var start = state.pos;
if (state.eat(0x28 /* ( */)) {
- if (state.eat(0x3F /* ? */) && state.eat(0x3A /* : */)) {
- this.regexp_disjunction(state);
- if (state.eat(0x29 /* ) */)) {
- return true
+ if (state.eat(0x3F /* ? */)) {
+ if (this.options.ecmaVersion >= 16) {
+ var addModifiers = this.regexp_eatModifiers(state);
+ var hasHyphen = state.eat(0x2D /* - */);
+ if (addModifiers || hasHyphen) {
+ for (var i = 0; i < addModifiers.length; i++) {
+ var modifier = addModifiers.charAt(i);
+ if (addModifiers.indexOf(modifier, i + 1) > -1) {
+ state.raise("Duplicate regular expression modifiers");
+ }
+ }
+ if (hasHyphen) {
+ var removeModifiers = this.regexp_eatModifiers(state);
+ if (!addModifiers && !removeModifiers && state.current() === 0x3A /* : */) {
+ state.raise("Invalid regular expression modifiers");
+ }
+ for (var i$1 = 0; i$1 < removeModifiers.length; i$1++) {
+ var modifier$1 = removeModifiers.charAt(i$1);
+ if (
+ removeModifiers.indexOf(modifier$1, i$1 + 1) > -1 ||
+ addModifiers.indexOf(modifier$1) > -1
+ ) {
+ state.raise("Duplicate regular expression modifiers");
+ }
+ }
+ }
+ }
+ }
+ if (state.eat(0x3A /* : */)) {
+ this.regexp_disjunction(state);
+ if (state.eat(0x29 /* ) */)) {
+ return true
+ }
+ state.raise("Unterminated group");
}
- state.raise("Unterminated group");
}
state.pos = start;
}
@@ -4235,6 +4327,23 @@
}
return false
};
+ // RegularExpressionModifiers ::
+ // [empty]
+ // RegularExpressionModifiers RegularExpressionModifier
+ pp$1.regexp_eatModifiers = function(state) {
+ var modifiers = "";
+ var ch = 0;
+ while ((ch = state.current()) !== -1 && isRegularExpressionModifier(ch)) {
+ modifiers += codePointToString(ch);
+ state.advance();
+ }
+ return modifiers
+ };
+ // RegularExpressionModifier :: one of
+ // `i` `m` `s`
+ function isRegularExpressionModifier(ch) {
+ return ch === 0x69 /* i */ || ch === 0x6d /* m */ || ch === 0x73 /* s */
+ }
// https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedAtom
pp$1.regexp_eatExtendedAtom = function(state) {
@@ -5990,7 +6099,7 @@
// [walk]: util/walk.js
- var version = "8.13.0";
+ var version = "8.14.0";
Parser.acorn = {
Parser: Parser,
diff --git a/deps/acorn/acorn/dist/acorn.mjs b/deps/acorn/acorn/dist/acorn.mjs
index 21b860f275a064..43e58efe7f03e1 100644
--- a/deps/acorn/acorn/dist/acorn.mjs
+++ b/deps/acorn/acorn/dist/acorn.mjs
@@ -1672,6 +1672,8 @@ pp$8.parseExportAllDeclaration = function(node, exports) {
this.expectContextual("from");
if (this.type !== types$1.string) { this.unexpected(); }
node.source = this.parseExprAtom();
+ if (this.options.ecmaVersion >= 16)
+ { node.attributes = this.parseWithClause(); }
this.semicolon();
return this.finishNode(node, "ExportAllDeclaration")
};
@@ -1702,6 +1704,8 @@ pp$8.parseExport = function(node, exports) {
if (this.eatContextual("from")) {
if (this.type !== types$1.string) { this.unexpected(); }
node.source = this.parseExprAtom();
+ if (this.options.ecmaVersion >= 16)
+ { node.attributes = this.parseWithClause(); }
} else {
for (var i = 0, list = node.specifiers; i < list.length; i += 1) {
// check for keywords used as local names
@@ -1842,6 +1846,8 @@ pp$8.parseImport = function(node) {
this.expectContextual("from");
node.source = this.type === types$1.string ? this.parseExprAtom() : this.unexpected();
}
+ if (this.options.ecmaVersion >= 16)
+ { node.attributes = this.parseWithClause(); }
this.semicolon();
return this.finishNode(node, "ImportDeclaration")
};
@@ -1902,6 +1908,41 @@ pp$8.parseImportSpecifiers = function() {
return nodes
};
+pp$8.parseWithClause = function() {
+ var nodes = [];
+ if (!this.eat(types$1._with)) {
+ return nodes
+ }
+ this.expect(types$1.braceL);
+ var attributeKeys = {};
+ var first = true;
+ while (!this.eat(types$1.braceR)) {
+ if (!first) {
+ this.expect(types$1.comma);
+ if (this.afterTrailingComma(types$1.braceR)) { break }
+ } else { first = false; }
+
+ var attr = this.parseImportAttribute();
+ var keyName = attr.key.type === "Identifier" ? attr.key.name : attr.key.value;
+ if (hasOwn(attributeKeys, keyName))
+ { this.raiseRecoverable(attr.key.start, "Duplicate attribute key '" + keyName + "'"); }
+ attributeKeys[keyName] = true;
+ nodes.push(attr);
+ }
+ return nodes
+};
+
+pp$8.parseImportAttribute = function() {
+ var node = this.startNode();
+ node.key = this.type === types$1.string ? this.parseExprAtom() : this.parseIdent(this.options.allowReserved !== "never");
+ this.expect(types$1.colon);
+ if (this.type !== types$1.string) {
+ this.unexpected();
+ }
+ node.value = this.parseExprAtom();
+ return this.finishNode(node, "ImportAttribute")
+};
+
pp$8.parseModuleExportName = function() {
if (this.options.ecmaVersion >= 13 && this.type === types$1.string) {
var stringLiteral = this.parseLiteral(this.value);
@@ -2969,13 +3010,32 @@ pp$5.parseDynamicImport = function(node) {
// Parse node.source.
node.source = this.parseMaybeAssign();
- // Verify ending.
- if (!this.eat(types$1.parenR)) {
- var errorPos = this.start;
- if (this.eat(types$1.comma) && this.eat(types$1.parenR)) {
- this.raiseRecoverable(errorPos, "Trailing comma is not allowed in import()");
+ if (this.options.ecmaVersion >= 16) {
+ if (!this.eat(types$1.parenR)) {
+ this.expect(types$1.comma);
+ if (!this.afterTrailingComma(types$1.parenR)) {
+ node.options = this.parseMaybeAssign();
+ if (!this.eat(types$1.parenR)) {
+ this.expect(types$1.comma);
+ if (!this.afterTrailingComma(types$1.parenR)) {
+ this.unexpected();
+ }
+ }
+ } else {
+ node.options = null;
+ }
} else {
- this.unexpected(errorPos);
+ node.options = null;
+ }
+ } else {
+ // Verify ending.
+ if (!this.eat(types$1.parenR)) {
+ var errorPos = this.start;
+ if (this.eat(types$1.comma) && this.eat(types$1.parenR)) {
+ this.raiseRecoverable(errorPos, "Trailing comma is not allowed in import()");
+ } else {
+ this.unexpected(errorPos);
+ }
}
}
@@ -3735,6 +3795,9 @@ pp$2.copyNode = function(node) {
return newNode
};
+// This file was generated by "bin/generate-unicode-script-values.js". Do not modify manually!
+var scriptValuesAddedInUnicode = "Gara Garay Gukh Gurung_Khema Hrkt Katakana_Or_Hiragana Kawi Kirat_Rai Krai Nag_Mundari Nagm Ol_Onal Onao Sunu Sunuwar Todhri Todr Tulu_Tigalari Tutg Unknown Zzzz";
+
// This file contains Unicode properties extracted from the ECMAScript specification.
// The lists are extracted like so:
// $$('#table-binary-unicode-properties > figure > table > tbody > tr > td:nth-child(1) code').map(el => el.innerText)
@@ -3777,7 +3840,7 @@ var ecma10ScriptValues = ecma9ScriptValues + " Dogra Dogr Gunjala_Gondi Gong Han
var ecma11ScriptValues = ecma10ScriptValues + " Elymaic Elym Nandinagari Nand Nyiakeng_Puachue_Hmong Hmnp Wancho Wcho";
var ecma12ScriptValues = ecma11ScriptValues + " Chorasmian Chrs Diak Dives_Akuru Khitan_Small_Script Kits Yezi Yezidi";
var ecma13ScriptValues = ecma12ScriptValues + " Cypro_Minoan Cpmn Old_Uyghur Ougr Tangsa Tnsa Toto Vithkuqi Vith";
-var ecma14ScriptValues = ecma13ScriptValues + " Hrkt Katakana_Or_Hiragana Kawi Nag_Mundari Nagm Unknown Zzzz";
+var ecma14ScriptValues = ecma13ScriptValues + " " + scriptValuesAddedInUnicode;
var unicodeScriptValues = {
9: ecma9ScriptValues,
@@ -4202,12 +4265,41 @@ pp$1.regexp_eatReverseSolidusAtomEscape = function(state) {
pp$1.regexp_eatUncapturingGroup = function(state) {
var start = state.pos;
if (state.eat(0x28 /* ( */)) {
- if (state.eat(0x3F /* ? */) && state.eat(0x3A /* : */)) {
- this.regexp_disjunction(state);
- if (state.eat(0x29 /* ) */)) {
- return true
+ if (state.eat(0x3F /* ? */)) {
+ if (this.options.ecmaVersion >= 16) {
+ var addModifiers = this.regexp_eatModifiers(state);
+ var hasHyphen = state.eat(0x2D /* - */);
+ if (addModifiers || hasHyphen) {
+ for (var i = 0; i < addModifiers.length; i++) {
+ var modifier = addModifiers.charAt(i);
+ if (addModifiers.indexOf(modifier, i + 1) > -1) {
+ state.raise("Duplicate regular expression modifiers");
+ }
+ }
+ if (hasHyphen) {
+ var removeModifiers = this.regexp_eatModifiers(state);
+ if (!addModifiers && !removeModifiers && state.current() === 0x3A /* : */) {
+ state.raise("Invalid regular expression modifiers");
+ }
+ for (var i$1 = 0; i$1 < removeModifiers.length; i$1++) {
+ var modifier$1 = removeModifiers.charAt(i$1);
+ if (
+ removeModifiers.indexOf(modifier$1, i$1 + 1) > -1 ||
+ addModifiers.indexOf(modifier$1) > -1
+ ) {
+ state.raise("Duplicate regular expression modifiers");
+ }
+ }
+ }
+ }
+ }
+ if (state.eat(0x3A /* : */)) {
+ this.regexp_disjunction(state);
+ if (state.eat(0x29 /* ) */)) {
+ return true
+ }
+ state.raise("Unterminated group");
}
- state.raise("Unterminated group");
}
state.pos = start;
}
@@ -4229,6 +4321,23 @@ pp$1.regexp_eatCapturingGroup = function(state) {
}
return false
};
+// RegularExpressionModifiers ::
+// [empty]
+// RegularExpressionModifiers RegularExpressionModifier
+pp$1.regexp_eatModifiers = function(state) {
+ var modifiers = "";
+ var ch = 0;
+ while ((ch = state.current()) !== -1 && isRegularExpressionModifier(ch)) {
+ modifiers += codePointToString(ch);
+ state.advance();
+ }
+ return modifiers
+};
+// RegularExpressionModifier :: one of
+// `i` `m` `s`
+function isRegularExpressionModifier(ch) {
+ return ch === 0x69 /* i */ || ch === 0x6d /* m */ || ch === 0x73 /* s */
+}
// https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ExtendedAtom
pp$1.regexp_eatExtendedAtom = function(state) {
@@ -5984,7 +6093,7 @@ pp.readWord = function() {
// [walk]: util/walk.js
-var version = "8.13.0";
+var version = "8.14.0";
Parser.acorn = {
Parser: Parser,
diff --git a/deps/acorn/acorn/package.json b/deps/acorn/acorn/package.json
index 3396013bbbf060..795cf83eff64d7 100644
--- a/deps/acorn/acorn/package.json
+++ b/deps/acorn/acorn/package.json
@@ -16,7 +16,7 @@
],
"./package.json": "./package.json"
},
- "version": "8.13.0",
+ "version": "8.14.0",
"engines": {
"node": ">=0.4.0"
},
diff --git a/deps/corepack/CHANGELOG.md b/deps/corepack/CHANGELOG.md
index 7de934c0d2c0db..88363683a9d5f6 100644
--- a/deps/corepack/CHANGELOG.md
+++ b/deps/corepack/CHANGELOG.md
@@ -1,5 +1,39 @@
# Changelog
+## [0.31.0](https://github.com/nodejs/corepack/compare/v0.30.0...v0.31.0) (2025-01-27)
+
+
+### ⚠ BREAKING CHANGES
+
+* drop support for Node.js 21.x ([#594](https://github.com/nodejs/corepack/issues/594))
+
+### Features
+
+* update package manager versions ([#595](https://github.com/nodejs/corepack/issues/595)) ([c7a9bde](https://github.com/nodejs/corepack/commit/c7a9bde16dcbbb7e6ef03fef740656cde7ade360))
+
+
+### Bug Fixes
+
+* only print message for `UsageError`s ([#602](https://github.com/nodejs/corepack/issues/602)) ([72a588c](https://github.com/nodejs/corepack/commit/72a588c2370c17e415b24fe389efdafb3c84e90b))
+* update npm registry keys ([#614](https://github.com/nodejs/corepack/issues/614)) ([8c90caa](https://github.com/nodejs/corepack/commit/8c90caab7f1c5c9b89f1de113bc1dfc441bf25d2))
+
+
+### Miscellaneous Chores
+
+* drop support for Node.js 21.x ([#594](https://github.com/nodejs/corepack/issues/594)) ([8bebc0c](https://github.com/nodejs/corepack/commit/8bebc0c0a5cbcdeec41673dcbaf581e6e1c1be11))
+
+## [0.30.0](https://github.com/nodejs/corepack/compare/v0.29.4...v0.30.0) (2024-11-23)
+
+
+### Features
+
+* update package manager versions ([#578](https://github.com/nodejs/corepack/issues/578)) ([a286c8f](https://github.com/nodejs/corepack/commit/a286c8f5537ea9ecf9b6ff53c7bc3e8da4e3c8bb))
+
+
+### Performance Improvements
+
+* prefer `module.enableCompileCache` over `v8-compile-cache` ([#574](https://github.com/nodejs/corepack/issues/574)) ([cba6905](https://github.com/nodejs/corepack/commit/cba690575bd606faeee54bd512ccb8797d49055f))
+
## [0.29.4](https://github.com/nodejs/corepack/compare/v0.29.3...v0.29.4) (2024-09-07)
diff --git a/deps/corepack/README.md b/deps/corepack/README.md
index d94614affc5353..66bfbc3fb6aae3 100644
--- a/deps/corepack/README.md
+++ b/deps/corepack/README.md
@@ -302,6 +302,8 @@ same major line. Should you need to upgrade to a new major, use an explicit
## Troubleshooting
+The environment variable `DEBUG` can be set to `corepack` to enable additional debug logging.
+
### Networking
There are a wide variety of networking issues that can occur while running
diff --git a/deps/corepack/dist/corepack.js b/deps/corepack/dist/corepack.js
index b1b22662466f86..6179b11c083cb5 100755
--- a/deps/corepack/dist/corepack.js
+++ b/deps/corepack/dist/corepack.js
@@ -1,3 +1,4 @@
#!/usr/bin/env node
process.env.COREPACK_ENABLE_DOWNLOAD_PROMPT??='0';
+require('module').enableCompileCache?.();
require('./lib/corepack.cjs').runMain(process.argv.slice(2));
\ No newline at end of file
diff --git a/deps/corepack/dist/lib/corepack.cjs b/deps/corepack/dist/lib/corepack.cjs
index 2978fc336232e0..7a92f3334f7687 100644
--- a/deps/corepack/dist/lib/corepack.cjs
+++ b/deps/corepack/dist/lib/corepack.cjs
@@ -21260,7 +21260,7 @@ function String2(descriptor, ...args) {
}
// package.json
-var version = "0.29.4";
+var version = "0.31.0";
// sources/Engine.ts
var import_fs9 = __toESM(require("fs"));
@@ -21274,7 +21274,7 @@ var import_valid3 = __toESM(require_valid2());
var config_default = {
definitions: {
npm: {
- default: "10.8.3+sha1.e6085b2864fcfd9b1aad7b602601b5a2fc116699",
+ default: "11.0.0+sha1.7bba7c80740ef1f5b2c5d4cecc55e94912faa5e6",
fetchLatestFrom: {
type: "npm",
package: "npm"
@@ -21311,7 +21311,7 @@ var config_default = {
}
},
pnpm: {
- default: "9.9.0+sha1.3edbe440f4e570aa8f049adbd06b9483d55cc2d2",
+ default: "9.15.4+sha1.ffa0b5c573381e8035b354028ccff97c8e452047",
fetchLatestFrom: {
type: "npm",
package: "pnpm"
@@ -21375,7 +21375,7 @@ var config_default = {
package: "yarn"
},
transparent: {
- default: "4.4.1+sha224.fd21d9eb5fba020083811af1d4953acc21eeb9f6ff97efd1b3f9d4de",
+ default: "4.6.0+sha224.acd0786f07ffc6c933940eb65fc1d627131ddf5455bddcc295dc90fd",
commands: [
[
"yarn",
@@ -21438,11 +21438,18 @@ var config_default = {
keys: {
npm: [
{
- expires: null,
+ expires: "2025-01-29T00:00:00.000Z",
keyid: "SHA256:jl3bwswu80PjjokCgh0o2w5c2U4LhQAE57gj9cz1kzA",
keytype: "ecdsa-sha2-nistp256",
scheme: "ecdsa-sha2-nistp256",
key: "MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE1Olb3zMAFFxXKHiIkQO5cJ3Yhl5i6UPp+IhuteBJbuHcA5UogKo0EWtlWwW6KSaKoTNEYL7JlCQiVnkhBktUgg=="
+ },
+ {
+ expires: null,
+ keyid: "SHA256:DhQ8wR5APBvFHLF/+Tc+AYvPOdTpcIDqOhxsBHRwC7U",
+ keytype: "ecdsa-sha2-nistp256",
+ scheme: "ecdsa-sha2-nistp256",
+ key: "MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEY6Ya7W++7aUPzvMTrezH6Ycx3c+HOKYCcNGybJZSCJq/fd7Qa8uuAKtdIkUQtQiEKERhAmE5lMMJhP8OkDOa2g=="
}
]
}
@@ -21965,8 +21972,11 @@ async function runVersion(locator, installSpec, binName, args) {
}
if (!binPath)
throw new Error(`Assertion failed: Unable to locate path for bin '${binName}'`);
- if (locator.name !== `npm` || (0, import_lt.default)(locator.reference, `9.7.0`))
- await Promise.resolve().then(() => __toESM(require_v8_compile_cache()));
+ if (!import_module.default.enableCompileCache) {
+ if (locator.name !== `npm` || (0, import_lt.default)(locator.reference, `9.7.0`)) {
+ await Promise.resolve().then(() => __toESM(require_v8_compile_cache()));
+ }
+ }
process.env.COREPACK_ROOT = import_path7.default.dirname(require.resolve("corepack/package.json"));
process.argv = [
process.execPath,
@@ -21976,6 +21986,9 @@ async function runVersion(locator, installSpec, binName, args) {
process.execArgv = [];
process.mainModule = void 0;
process.nextTick(import_module.default.runMain, binPath);
+ if (import_module.default.flushCompileCache) {
+ setImmediate(import_module.default.flushCompileCache);
+ }
}
function shouldSkipIntegrityCheck() {
return process.env.COREPACK_INTEGRITY_KEYS === `` || process.env.COREPACK_INTEGRITY_KEYS === `0`;
@@ -22553,7 +22566,7 @@ var EnableCommand = class extends Command {
[`enable`]
];
static usage = Command.Usage({
- description: `Add the Corepack shims to the install directories`,
+ description: `Add the Corepack shims to the install directory`,
details: `
When run, this command will check whether the shims for the specified package managers can be found with the correct values inside the install directory. If not, or if they don't exist, they will be created.
@@ -23093,10 +23106,18 @@ async function runMain(argv) {
process.exitCode ??= code2;
}
} else {
- await engine.executePackageManagerRequest(request, {
- cwd: process.cwd(),
- args: restArgs
- });
+ try {
+ await engine.executePackageManagerRequest(request, {
+ cwd: process.cwd(),
+ args: restArgs
+ });
+ } catch (error) {
+ if (error?.name === `UsageError`) {
+ console.error(error.message);
+ process.exit(1);
+ }
+ throw error;
+ }
}
}
// Annotate the CommonJS export names for ESM import in node:
diff --git a/deps/corepack/dist/npm.js b/deps/corepack/dist/npm.js
index 7d10ba5bdf36b2..75f68b058f2dd6 100755
--- a/deps/corepack/dist/npm.js
+++ b/deps/corepack/dist/npm.js
@@ -1,3 +1,4 @@
#!/usr/bin/env node
process.env.COREPACK_ENABLE_DOWNLOAD_PROMPT??='1'
+require('module').enableCompileCache?.();
require('./lib/corepack.cjs').runMain(['npm', ...process.argv.slice(2)]);
\ No newline at end of file
diff --git a/deps/corepack/dist/npx.js b/deps/corepack/dist/npx.js
index a8bd3e69014313..b1138bb48e1a82 100755
--- a/deps/corepack/dist/npx.js
+++ b/deps/corepack/dist/npx.js
@@ -1,3 +1,4 @@
#!/usr/bin/env node
process.env.COREPACK_ENABLE_DOWNLOAD_PROMPT??='1'
+require('module').enableCompileCache?.();
require('./lib/corepack.cjs').runMain(['npx', ...process.argv.slice(2)]);
\ No newline at end of file
diff --git a/deps/corepack/dist/pnpm.js b/deps/corepack/dist/pnpm.js
index a0a87263435562..56ba509405033d 100755
--- a/deps/corepack/dist/pnpm.js
+++ b/deps/corepack/dist/pnpm.js
@@ -1,3 +1,4 @@
#!/usr/bin/env node
process.env.COREPACK_ENABLE_DOWNLOAD_PROMPT??='1'
+require('module').enableCompileCache?.();
require('./lib/corepack.cjs').runMain(['pnpm', ...process.argv.slice(2)]);
\ No newline at end of file
diff --git a/deps/corepack/dist/pnpx.js b/deps/corepack/dist/pnpx.js
index 57ad4842631cd7..ee36be2e99c686 100755
--- a/deps/corepack/dist/pnpx.js
+++ b/deps/corepack/dist/pnpx.js
@@ -1,3 +1,4 @@
#!/usr/bin/env node
process.env.COREPACK_ENABLE_DOWNLOAD_PROMPT??='1'
+require('module').enableCompileCache?.();
require('./lib/corepack.cjs').runMain(['pnpx', ...process.argv.slice(2)]);
\ No newline at end of file
diff --git a/deps/corepack/dist/yarn.js b/deps/corepack/dist/yarn.js
index eaed8596eabaa3..ce628c82b6a782 100755
--- a/deps/corepack/dist/yarn.js
+++ b/deps/corepack/dist/yarn.js
@@ -1,3 +1,4 @@
#!/usr/bin/env node
process.env.COREPACK_ENABLE_DOWNLOAD_PROMPT??='1'
+require('module').enableCompileCache?.();
require('./lib/corepack.cjs').runMain(['yarn', ...process.argv.slice(2)]);
\ No newline at end of file
diff --git a/deps/corepack/dist/yarnpkg.js b/deps/corepack/dist/yarnpkg.js
index aada6032fa67ff..9541ed726aaa3b 100755
--- a/deps/corepack/dist/yarnpkg.js
+++ b/deps/corepack/dist/yarnpkg.js
@@ -1,3 +1,4 @@
#!/usr/bin/env node
process.env.COREPACK_ENABLE_DOWNLOAD_PROMPT??='1'
+require('module').enableCompileCache?.();
require('./lib/corepack.cjs').runMain(['yarnpkg', ...process.argv.slice(2)]);
\ No newline at end of file
diff --git a/deps/corepack/package.json b/deps/corepack/package.json
index 571c359407e07a..91b95f31d77b54 100644
--- a/deps/corepack/package.json
+++ b/deps/corepack/package.json
@@ -1,6 +1,6 @@
{
"name": "corepack",
- "version": "0.29.4",
+ "version": "0.31.0",
"homepage": "https://github.com/nodejs/corepack#readme",
"bugs": {
"url": "https://github.com/nodejs/corepack/issues"
@@ -10,7 +10,7 @@
"url": "https://github.com/nodejs/corepack.git"
},
"engines": {
- "node": "^18.17.1 || >=20.10.0"
+ "node": "^18.17.1 || ^20.10.0 || >=22.11.0"
},
"exports": {
"./package.json": "./package.json"
@@ -26,7 +26,7 @@
"@yarnpkg/eslint-config": "^2.0.0",
"@yarnpkg/fslib": "^3.0.0-rc.48",
"@zkochan/cmd-shim": "^6.0.0",
- "better-sqlite3": "^10.0.0",
+ "better-sqlite3": "^11.7.2",
"clipanion": "patch:clipanion@npm%3A3.2.1#~/.yarn/patches/clipanion-npm-3.2.1-fc9187f56c.patch",
"debug": "^4.1.1",
"esbuild": "^0.21.0",
diff --git a/deps/simdutf/simdutf.cpp b/deps/simdutf/simdutf.cpp
index e6b25c7ce27c16..eb3e4598407374 100644
--- a/deps/simdutf/simdutf.cpp
+++ b/deps/simdutf/simdutf.cpp
@@ -1,4 +1,4 @@
-/* auto-generated on 2024-10-11 12:35:29 -0400. Do not edit! */
+/* auto-generated on 2024-12-10 14:54:53 -0500. Do not edit! */
/* begin file src/simdutf.cpp */
#include "simdutf.h"
// We include base64_tables once.
@@ -937,6 +937,10 @@ class implementation final : public simdutf::implementation {
const char *input, size_t length, char *output, base64_options options,
last_chunk_handling_options last_chunk_options =
last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused full_result base64_to_binary_details(
+ const char *input, size_t length, char *output, base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
simdutf_warn_unused size_t maximal_binary_length_from_base64(
const char16_t *input, size_t length) const noexcept;
simdutf_warn_unused result
@@ -944,6 +948,11 @@ class implementation final : public simdutf::implementation {
base64_options options,
last_chunk_handling_options last_chunk_options =
last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused full_result base64_to_binary_details(
+ const char16_t *input, size_t length, char *output,
+ base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
simdutf_warn_unused size_t base64_length_from_binary(
size_t length, base64_options options) const noexcept;
size_t binary_to_base64(const char *input, size_t length, char *output,
@@ -2547,6 +2556,10 @@ class implementation final : public simdutf::implementation {
const char *input, size_t length, char *output, base64_options options,
last_chunk_handling_options last_chunk_options =
last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused full_result base64_to_binary_details(
+ const char *input, size_t length, char *output, base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
simdutf_warn_unused size_t maximal_binary_length_from_base64(
const char16_t *input, size_t length) const noexcept;
simdutf_warn_unused result
@@ -2554,6 +2567,11 @@ class implementation final : public simdutf::implementation {
base64_options options,
last_chunk_handling_options last_chunk_options =
last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused full_result base64_to_binary_details(
+ const char16_t *input, size_t length, char *output,
+ base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
simdutf_warn_unused size_t base64_length_from_binary(
size_t length, base64_options options) const noexcept;
size_t binary_to_base64(const char *input, size_t length, char *output,
@@ -2885,6 +2903,10 @@ class implementation final : public simdutf::implementation {
base64_options options,
last_chunk_handling_options last_chunk_options =
last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused virtual full_result base64_to_binary_details(
+ const char *input, size_t length, char *output, base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
simdutf_warn_unused virtual size_t
maximal_binary_length_from_base64(const char16_t *input,
size_t length) const noexcept;
@@ -2893,6 +2915,11 @@ class implementation final : public simdutf::implementation {
base64_options options,
last_chunk_handling_options last_chunk_options =
last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused virtual full_result base64_to_binary_details(
+ const char16_t *input, size_t length, char *output,
+ base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
simdutf_warn_unused virtual size_t
base64_length_from_binary(size_t length,
base64_options options) const noexcept;
@@ -4142,6 +4169,10 @@ class implementation final : public simdutf::implementation {
const char *input, size_t length, char *output, base64_options options,
last_chunk_handling_options last_chunk_options =
last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused full_result base64_to_binary_details(
+ const char *input, size_t length, char *output, base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
simdutf_warn_unused size_t maximal_binary_length_from_base64(
const char16_t *input, size_t length) const noexcept;
simdutf_warn_unused result
@@ -4149,6 +4180,11 @@ class implementation final : public simdutf::implementation {
base64_options options,
last_chunk_handling_options last_chunk_options =
last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused full_result base64_to_binary_details(
+ const char16_t *input, size_t length, char *output,
+ base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
simdutf_warn_unused size_t base64_length_from_binary(
size_t length, base64_options options) const noexcept;
size_t binary_to_base64(const char *input, size_t length, char *output,
@@ -5386,6 +5422,10 @@ class implementation final : public simdutf::implementation {
const char *input, size_t length, char *output, base64_options options,
last_chunk_handling_options last_chunk_options =
last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused full_result base64_to_binary_details(
+ const char *input, size_t length, char *output, base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
simdutf_warn_unused size_t maximal_binary_length_from_base64(
const char16_t *input, size_t length) const noexcept;
simdutf_warn_unused result
@@ -5393,6 +5433,11 @@ class implementation final : public simdutf::implementation {
base64_options options,
last_chunk_handling_options last_chunk_options =
last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused full_result base64_to_binary_details(
+ const char16_t *input, size_t length, char *output,
+ base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
simdutf_warn_unused size_t base64_length_from_binary(
size_t length, base64_options options) const noexcept;
size_t binary_to_base64(const char *input, size_t length, char *output,
@@ -6172,6 +6217,10 @@ class implementation final : public simdutf::implementation {
const char *input, size_t length, char *output, base64_options options,
last_chunk_handling_options last_chunk_options =
last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused full_result base64_to_binary_details(
+ const char *input, size_t length, char *output, base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
simdutf_warn_unused size_t maximal_binary_length_from_base64(
const char16_t *input, size_t length) const noexcept;
simdutf_warn_unused result
@@ -6179,6 +6228,11 @@ class implementation final : public simdutf::implementation {
base64_options options,
last_chunk_handling_options last_chunk_options =
last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused full_result base64_to_binary_details(
+ const char16_t *input, size_t length, char *output,
+ base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
simdutf_warn_unused size_t base64_length_from_binary(
size_t length, base64_options options) const noexcept;
size_t binary_to_base64(const char *input, size_t length, char *output,
@@ -6356,43 +6410,42 @@ SIMDUTF_UNTARGET_REGION
#endif // SIMDUTF_RVV_H
/* end file src/simdutf/rvv.h */
-/* begin file src/simdutf/fallback.h */
-#ifndef SIMDUTF_FALLBACK_H
-#define SIMDUTF_FALLBACK_H
+/* begin file src/simdutf/lsx.h */
+#ifndef SIMDUTF_LSX_H
+#define SIMDUTF_LSX_H
+#ifdef SIMDUTF_FALLBACK_H
+ #error "lsx.h must be included before fallback.h"
+#endif
-// Note that fallback.h is always imported last.
-// Default Fallback to on unless a builtin implementation has already been
-// selected.
-#ifndef SIMDUTF_IMPLEMENTATION_FALLBACK
- #if SIMDUTF_CAN_ALWAYS_RUN_ARM64 || SIMDUTF_CAN_ALWAYS_RUN_ICELAKE || \
- SIMDUTF_CAN_ALWAYS_RUN_HASWELL || SIMDUTF_CAN_ALWAYS_RUN_WESTMERE || \
- SIMDUTF_CAN_ALWAYS_RUN_PPC64 || SIMDUTF_CAN_ALWAYS_RUN_RVV
- #define SIMDUTF_IMPLEMENTATION_FALLBACK 0
- #else
- #define SIMDUTF_IMPLEMENTATION_FALLBACK 1
- #endif
+#ifndef SIMDUTF_IMPLEMENTATION_LSX
+ #define SIMDUTF_IMPLEMENTATION_LSX (SIMDUTF_IS_LSX)
+#endif
+#if SIMDUTF_IMPLEMENTATION_LSX && SIMDUTF_IS_LSX
+ #define SIMDUTF_CAN_ALWAYS_RUN_LSX 1
+#else
+ #define SIMDUTF_CAN_ALWAYS_RUN_LSX 0
#endif
#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK)
-#if SIMDUTF_IMPLEMENTATION_FALLBACK
+#if SIMDUTF_IMPLEMENTATION_LSX
namespace simdutf {
/**
- * Fallback implementation (runs on any machine).
+ * Implementation for LoongArch SX.
*/
-namespace fallback {} // namespace fallback
+namespace lsx {} // namespace lsx
} // namespace simdutf
-/* begin file src/simdutf/fallback/implementation.h */
-#ifndef SIMDUTF_FALLBACK_IMPLEMENTATION_H
-#define SIMDUTF_FALLBACK_IMPLEMENTATION_H
+/* begin file src/simdutf/lsx/implementation.h */
+#ifndef SIMDUTF_LSX_IMPLEMENTATION_H
+#define SIMDUTF_LSX_IMPLEMENTATION_H
namespace simdutf {
-namespace fallback {
+namespace lsx {
namespace {
using namespace simdutf;
@@ -6401,8 +6454,8 @@ using namespace simdutf;
class implementation final : public simdutf::implementation {
public:
simdutf_really_inline implementation()
- : simdutf::implementation("fallback", "Generic fallback implementation",
- 0) {}
+ : simdutf::implementation("lsx", "LOONGARCH SX",
+ internal::instruction_set::LSX) {}
simdutf_warn_unused int detect_encodings(const char *input,
size_t length) const noexcept final;
simdutf_warn_unused bool validate_utf8(const char *buf,
@@ -6487,12 +6540,6 @@ class implementation final : public simdutf::implementation {
const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final;
simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(
const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final;
- simdutf_warn_unused size_t convert_utf32_to_utf8(
- const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final;
- simdutf_warn_unused result convert_utf32_to_utf8_with_errors(
- const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final;
- simdutf_warn_unused size_t convert_valid_utf32_to_utf8(
- const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final;
simdutf_warn_unused size_t
convert_utf32_to_latin1(const char32_t *buf, size_t len,
char *latin1_output) const noexcept final;
@@ -6502,6 +6549,12 @@ class implementation final : public simdutf::implementation {
simdutf_warn_unused size_t
convert_valid_utf32_to_latin1(const char32_t *buf, size_t len,
char *latin1_output) const noexcept final;
+ simdutf_warn_unused size_t convert_utf32_to_utf8(
+ const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final;
+ simdutf_warn_unused result convert_utf32_to_utf8_with_errors(
+ const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final;
+ simdutf_warn_unused size_t convert_valid_utf32_to_utf8(
+ const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final;
simdutf_warn_unused size_t
convert_utf32_to_utf16le(const char32_t *buf, size_t len,
char16_t *utf16_buffer) const noexcept final;
@@ -6576,3080 +6629,5954 @@ class implementation final : public simdutf::implementation {
utf8_length_from_latin1(const char *input, size_t length) const noexcept;
simdutf_warn_unused size_t maximal_binary_length_from_base64(
const char *input, size_t length) const noexcept;
- simdutf_warn_unused result base64_to_binary(
- const char *input, size_t length, char *output, base64_options options,
- last_chunk_handling_options last_chunk_options) const noexcept;
+ simdutf_warn_unused result
+ base64_to_binary(const char *input, size_t length, char *output,
+ base64_options options) const noexcept;
simdutf_warn_unused size_t maximal_binary_length_from_base64(
const char16_t *input, size_t length) const noexcept;
- simdutf_warn_unused result base64_to_binary(
- const char16_t *input, size_t length, char *output,
- base64_options options,
- last_chunk_handling_options last_chunk_options) const noexcept;
+ simdutf_warn_unused result
+ base64_to_binary(const char16_t *input, size_t length, char *output,
+ base64_options options) const noexcept;
simdutf_warn_unused size_t base64_length_from_binary(
size_t length, base64_options options) const noexcept;
size_t binary_to_base64(const char *input, size_t length, char *output,
base64_options options) const noexcept;
+
+ simdutf_warn_unused virtual result
+ base64_to_binary(const char *input, size_t length, char *output,
+ base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused virtual full_result base64_to_binary_details(
+ const char *input, size_t length, char *output, base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused virtual result
+ base64_to_binary(const char16_t *input, size_t length, char *output,
+ base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
+ simdutf_warn_unused virtual full_result base64_to_binary_details(
+ const char16_t *input, size_t length, char *output,
+ base64_options options,
+ last_chunk_handling_options last_chunk_options =
+ last_chunk_handling_options::loose) const noexcept;
};
-} // namespace fallback
+
+} // namespace lsx
} // namespace simdutf
-#endif // SIMDUTF_FALLBACK_IMPLEMENTATION_H
-/* end file src/simdutf/fallback/implementation.h */
+#endif // SIMDUTF_LSX_IMPLEMENTATION_H
+/* end file src/simdutf/lsx/implementation.h */
-/* begin file src/simdutf/fallback/begin.h */
-// redefining SIMDUTF_IMPLEMENTATION to "fallback"
-// #define SIMDUTF_IMPLEMENTATION fallback
-/* end file src/simdutf/fallback/begin.h */
+/* begin file src/simdutf/lsx/begin.h */
+// redefining SIMDUTF_IMPLEMENTATION to "lsx"
+// #define SIMDUTF_IMPLEMENTATION lsx
+/* end file src/simdutf/lsx/begin.h */
// Declarations
-/* begin file src/simdutf/fallback/bitmanipulation.h */
-#ifndef SIMDUTF_FALLBACK_BITMANIPULATION_H
-#define SIMDUTF_FALLBACK_BITMANIPULATION_H
-
-#include
-
-namespace simdutf {
-namespace fallback {
-namespace {} // unnamed namespace
-} // namespace fallback
-} // namespace simdutf
+/* begin file src/simdutf/lsx/intrinsics.h */
+#ifndef SIMDUTF_LSX_INTRINSICS_H
+#define SIMDUTF_LSX_INTRINSICS_H
-#endif // SIMDUTF_FALLBACK_BITMANIPULATION_H
-/* end file src/simdutf/fallback/bitmanipulation.h */
-/* begin file src/simdutf/fallback/end.h */
-/* end file src/simdutf/fallback/end.h */
+// This should be the correct header whether
+// you use visual studio or other compilers.
+#include
-#endif // SIMDUTF_IMPLEMENTATION_FALLBACK
-#endif // SIMDUTF_FALLBACK_H
-/* end file src/simdutf/fallback.h */
+#endif // SIMDUTF_LSX_INTRINSICS_H
+/* end file src/simdutf/lsx/intrinsics.h */
+/* begin file src/simdutf/lsx/bitmanipulation.h */
+#ifndef SIMDUTF_LSX_BITMANIPULATION_H
+#define SIMDUTF_LSX_BITMANIPULATION_H
-/* begin file src/scalar/utf8.h */
-#ifndef SIMDUTF_UTF8_H
-#define SIMDUTF_UTF8_H
+#include
namespace simdutf {
-namespace scalar {
+namespace lsx {
namespace {
-namespace utf8 {
-#if SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_RVV
-// only used by the fallback kernel.
-// credit: based on code from Google Fuchsia (Apache Licensed)
-inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept {
- const uint8_t *data = reinterpret_cast(buf);
- uint64_t pos = 0;
- uint32_t code_point = 0;
- while (pos < len) {
- // check of the next 16 bytes are ascii.
- uint64_t next_pos = pos + 16;
- if (next_pos <=
- len) { // if it is safe to read 16 more bytes, check that they are ascii
- uint64_t v1;
- std::memcpy(&v1, data + pos, sizeof(uint64_t));
- uint64_t v2;
- std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
- uint64_t v{v1 | v2};
- if ((v & 0x8080808080808080) == 0) {
- pos = next_pos;
- continue;
- }
- }
- unsigned char byte = data[pos];
- while (byte < 0b10000000) {
- if (++pos == len) {
- return true;
- }
- byte = data[pos];
- }
+simdutf_really_inline int count_ones(uint64_t input_num) {
+ return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__lsx_vreplgr2vr_d(input_num)), 0);
+}
- if ((byte & 0b11100000) == 0b11000000) {
- next_pos = pos + 2;
- if (next_pos > len) {
- return false;
- }
- if ((data[pos + 1] & 0b11000000) != 0b10000000) {
- return false;
- }
- // range check
- code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
- if ((code_point < 0x80) || (0x7ff < code_point)) {
- return false;
- }
- } else if ((byte & 0b11110000) == 0b11100000) {
- next_pos = pos + 3;
- if (next_pos > len) {
- return false;
- }
- if ((data[pos + 1] & 0b11000000) != 0b10000000) {
- return false;
- }
- if ((data[pos + 2] & 0b11000000) != 0b10000000) {
- return false;
- }
- // range check
- code_point = (byte & 0b00001111) << 12 |
- (data[pos + 1] & 0b00111111) << 6 |
- (data[pos + 2] & 0b00111111);
- if ((code_point < 0x800) || (0xffff < code_point) ||
- (0xd7ff < code_point && code_point < 0xe000)) {
- return false;
- }
- } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000
- next_pos = pos + 4;
- if (next_pos > len) {
- return false;
- }
- if ((data[pos + 1] & 0b11000000) != 0b10000000) {
- return false;
- }
- if ((data[pos + 2] & 0b11000000) != 0b10000000) {
- return false;
- }
- if ((data[pos + 3] & 0b11000000) != 0b10000000) {
- return false;
- }
- // range check
- code_point =
- (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
- (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
- if (code_point <= 0xffff || 0x10ffff < code_point) {
- return false;
- }
- } else {
- // we may have a continuation
- return false;
- }
- pos = next_pos;
- }
- return true;
+#if SIMDUTF_NEED_TRAILING_ZEROES
+simdutf_really_inline int trailing_zeroes(uint64_t input_num) {
+ return __builtin_ctzll(input_num);
}
#endif
-inline simdutf_warn_unused result validate_with_errors(const char *buf,
- size_t len) noexcept {
- const uint8_t *data = reinterpret_cast(buf);
- size_t pos = 0;
- uint32_t code_point = 0;
- while (pos < len) {
- // check of the next 16 bytes are ascii.
- size_t next_pos = pos + 16;
- if (next_pos <=
- len) { // if it is safe to read 16 more bytes, check that they are ascii
- uint64_t v1;
- std::memcpy(&v1, data + pos, sizeof(uint64_t));
- uint64_t v2;
- std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
- uint64_t v{v1 | v2};
- if ((v & 0x8080808080808080) == 0) {
- pos = next_pos;
- continue;
- }
- }
- unsigned char byte = data[pos];
+} // unnamed namespace
+} // namespace lsx
+} // namespace simdutf
- while (byte < 0b10000000) {
- if (++pos == len) {
- return result(error_code::SUCCESS, len);
- }
- byte = data[pos];
- }
+#endif // SIMDUTF_LSX_BITMANIPULATION_H
+/* end file src/simdutf/lsx/bitmanipulation.h */
+/* begin file src/simdutf/lsx/simd.h */
+#ifndef SIMDUTF_LSX_SIMD_H
+#define SIMDUTF_LSX_SIMD_H
- if ((byte & 0b11100000) == 0b11000000) {
- next_pos = pos + 2;
- if (next_pos > len) {
- return result(error_code::TOO_SHORT, pos);
- }
- if ((data[pos + 1] & 0b11000000) != 0b10000000) {
- return result(error_code::TOO_SHORT, pos);
- }
- // range check
- code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
- if ((code_point < 0x80) || (0x7ff < code_point)) {
- return result(error_code::OVERLONG, pos);
- }
- } else if ((byte & 0b11110000) == 0b11100000) {
- next_pos = pos + 3;
- if (next_pos > len) {
- return result(error_code::TOO_SHORT, pos);
- }
- if ((data[pos + 1] & 0b11000000) != 0b10000000) {
- return result(error_code::TOO_SHORT, pos);
- }
- if ((data[pos + 2] & 0b11000000) != 0b10000000) {
- return result(error_code::TOO_SHORT, pos);
- }
- // range check
- code_point = (byte & 0b00001111) << 12 |
- (data[pos + 1] & 0b00111111) << 6 |
- (data[pos + 2] & 0b00111111);
- if ((code_point < 0x800) || (0xffff < code_point)) {
- return result(error_code::OVERLONG, pos);
- }
- if (0xd7ff < code_point && code_point < 0xe000) {
- return result(error_code::SURROGATE, pos);
- }
- } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000
- next_pos = pos + 4;
- if (next_pos > len) {
- return result(error_code::TOO_SHORT, pos);
- }
- if ((data[pos + 1] & 0b11000000) != 0b10000000) {
- return result(error_code::TOO_SHORT, pos);
- }
- if ((data[pos + 2] & 0b11000000) != 0b10000000) {
- return result(error_code::TOO_SHORT, pos);
- }
- if ((data[pos + 3] & 0b11000000) != 0b10000000) {
- return result(error_code::TOO_SHORT, pos);
- }
- // range check
- code_point =
- (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
- (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
- if (code_point <= 0xffff) {
- return result(error_code::OVERLONG, pos);
- }
- if (0x10ffff < code_point) {
- return result(error_code::TOO_LARGE, pos);
- }
- } else {
- // we either have too many continuation bytes or an invalid leading byte
- if ((byte & 0b11000000) == 0b10000000) {
- return result(error_code::TOO_LONG, pos);
- } else {
- return result(error_code::HEADER_BITS, pos);
- }
- }
- pos = next_pos;
+#include
+
+namespace simdutf {
+namespace lsx {
+namespace {
+namespace simd {
+
+template struct simd8;
+
+//
+// Base class of simd8 and simd8, both of which use __m128i
+// internally.
+//
+template > struct base_u8 {
+ __m128i value;
+ static const int SIZE = sizeof(value);
+
+ // Conversion from/to SIMD register
+ simdutf_really_inline base_u8(const __m128i _value) : value(_value) {}
+ simdutf_really_inline operator const __m128i &() const { return this->value; }
+ simdutf_really_inline operator __m128i &() { return this->value; }
+ simdutf_really_inline T first() const {
+ return __lsx_vpickve2gr_bu(this->value, 0);
+ }
+ simdutf_really_inline T last() const {
+ return __lsx_vpickve2gr_bu(this->value, 15);
}
- return result(error_code::SUCCESS, len);
-}
-// Finds the previous leading byte starting backward from buf and validates with
-// errors from there Used to pinpoint the location of an error when an invalid
-// chunk is detected We assume that the stream starts with a leading byte, and
-// to check that it is the case, we ask that you pass a pointer to the start of
-// the stream (start).
-inline simdutf_warn_unused result rewind_and_validate_with_errors(
- const char *start, const char *buf, size_t len) noexcept {
- // First check that we start with a leading byte
- if ((*start & 0b11000000) == 0b10000000) {
- return result(error_code::TOO_LONG, 0);
+ // Bit operations
+ simdutf_really_inline simd8 operator|(const simd8 other) const {
+ return __lsx_vor_v(this->value, other);
}
- size_t extra_len{0};
- // A leading byte cannot be further than 4 bytes away
- for (int i = 0; i < 5; i++) {
- unsigned char byte = *buf;
- if ((byte & 0b11000000) != 0b10000000) {
- break;
- } else {
- buf--;
- extra_len++;
- }
+ simdutf_really_inline simd8 operator&(const simd8 other) const {
+ return __lsx_vand_v(this->value, other);
+ }
+ simdutf_really_inline simd8 operator^(const simd8 other) const {
+ return __lsx_vxor_v(this->value, other);
+ }
+ simdutf_really_inline simd8 bit_andnot(const simd8 other) const {
+ return __lsx_vandn_v(this->value, other);
+ }
+ simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; }
+ simdutf_really_inline simd8 &operator|=(const simd8 other) {
+ auto this_cast = static_cast *>(this);
+ *this_cast = *this_cast | other;
+ return *this_cast;
+ }
+ simdutf_really_inline simd8 &operator&=(const simd8 other) {
+ auto this_cast = static_cast *>(this);
+ *this_cast = *this_cast & other;
+ return *this_cast;
+ }
+ simdutf_really_inline simd8 &operator^=(const simd8 other) {
+ auto this_cast = static_cast *>(this);
+ *this_cast = *this_cast ^ other;
+ return *this_cast;
}
- result res = validate_with_errors(buf, len + extra_len);
- res.count -= extra_len;
- return res;
-}
+ friend simdutf_really_inline Mask operator==(const simd8 lhs,
+ const simd8 rhs) {
+ return __lsx_vseq_b(lhs, rhs);
+ }
-inline size_t count_code_points(const char *buf, size_t len) {
- const int8_t *p = reinterpret_cast(buf);
- size_t counter{0};
- for (size_t i = 0; i < len; i++) {
- // -65 is 0b10111111, anything larger in two-complement's should start a new
- // code point.
- if (p[i] > -65) {
- counter++;
- }
+ template
+ simdutf_really_inline simd8 prev(const simd8 prev_chunk) const {
+ return __lsx_vor_v(__lsx_vbsll_v(this->value, N),
+ __lsx_vbsrl_v(prev_chunk.value, 16 - N));
}
- return counter;
-}
+};
-inline size_t utf16_length_from_utf8(const char *buf, size_t len) {
- const int8_t *p = reinterpret_cast(buf);
- size_t counter{0};
- for (size_t i = 0; i < len; i++) {
- if (p[i] > -65) {
- counter++;
- }
- if (uint8_t(p[i]) >= 240) {
- counter++;
- }
+// SIMD byte mask type (returned by things like eq and gt)
+template <> struct simd8 : base_u8 {
+ typedef uint16_t bitmask_t;
+ typedef uint32_t bitmask2_t;
+
+ static simdutf_really_inline simd8 splat(bool _value) {
+ return __lsx_vreplgr2vr_b(uint8_t(-(!!_value)));
}
- return counter;
-}
-simdutf_warn_unused inline size_t trim_partial_utf8(const char *input,
- size_t length) {
- if (length < 3) {
- switch (length) {
- case 2:
- if (uint8_t(input[length - 1]) >= 0xc0) {
- return length - 1;
- } // 2-, 3- and 4-byte characters with only 1 byte left
- if (uint8_t(input[length - 2]) >= 0xe0) {
- return length - 2;
- } // 3- and 4-byte characters with only 2 bytes left
- return length;
- case 1:
- if (uint8_t(input[length - 1]) >= 0xc0) {
- return length - 1;
- } // 2-, 3- and 4-byte characters with only 1 byte left
- return length;
- case 0:
- return length;
- }
+ simdutf_really_inline simd8(const __m128i _value) : base_u8(_value) {}
+ // False constructor
+ simdutf_really_inline simd8() : simd8(__lsx_vldi(0)) {}
+ // Splat constructor
+ simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {}
+ simdutf_really_inline void store(uint8_t dst[16]) const {
+ return __lsx_vst(this->value, dst, 0);
}
- if (uint8_t(input[length - 1]) >= 0xc0) {
- return length - 1;
- } // 2-, 3- and 4-byte characters with only 1 byte left
- if (uint8_t(input[length - 2]) >= 0xe0) {
- return length - 2;
- } // 3- and 4-byte characters with only 1 byte left
- if (uint8_t(input[length - 3]) >= 0xf0) {
- return length - 3;
- } // 4-byte characters with only 3 bytes left
- return length;
-}
-} // namespace utf8
-} // unnamed namespace
-} // namespace scalar
-} // namespace simdutf
+ simdutf_really_inline uint32_t to_bitmask() const {
+ return __lsx_vpickve2gr_wu(__lsx_vmsknz_b(*this), 0);
+ }
-#endif
-/* end file src/scalar/utf8.h */
-/* begin file src/scalar/utf16.h */
-#ifndef SIMDUTF_UTF16_H
-#define SIMDUTF_UTF16_H
+ simdutf_really_inline bool any() const {
+ return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0) != 0;
+ }
+ simdutf_really_inline bool none() const {
+ return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0) == 0;
+ }
+ simdutf_really_inline bool all() const {
+ return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0) == 0xFFFF;
+ }
+};
-namespace simdutf {
-namespace scalar {
-namespace {
-namespace utf16 {
+// Unsigned bytes
+template <> struct simd8 : base_u8 {
+ static simdutf_really_inline simd8 splat(uint8_t _value) {
+ return __lsx_vreplgr2vr_b(_value);
+ }
+ static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); }
+ static simdutf_really_inline simd8 load(const uint8_t *values) {
+ return __lsx_vld(values, 0);
+ }
+ simdutf_really_inline simd8(const __m128i _value)
+ : base_u8(_value) {}
+ // Zero constructor
+ simdutf_really_inline simd8() : simd8(zero()) {}
+ // Array constructor
+ simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {}
+ // Splat constructor
+ simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
+ // Member-by-member initialization
-inline simdutf_warn_unused uint16_t swap_bytes(const uint16_t word) {
- return uint16_t((word >> 8) | (word << 8));
-}
+ simdutf_really_inline
+ simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5,
+ uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10,
+ uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15)
+ : simd8((__m128i)v16u8{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
+ v12, v13, v14, v15}) {}
-template
-inline simdutf_warn_unused bool validate(const char16_t *buf,
- size_t len) noexcept {
- const uint16_t *data = reinterpret_cast(buf);
- uint64_t pos = 0;
- while (pos < len) {
- uint16_t word =
- !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
- if ((word & 0xF800) == 0xD800) {
- if (pos + 1 >= len) {
- return false;
- }
- uint16_t diff = uint16_t(word - 0xD800);
- if (diff > 0x3FF) {
- return false;
- }
- uint16_t next_word =
- !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
- uint16_t diff2 = uint16_t(next_word - 0xDC00);
- if (diff2 > 0x3FF) {
- return false;
- }
- pos += 2;
- } else {
- pos++;
- }
+ // Repeat 16 values as many times as necessary (usually for lookup tables)
+ simdutf_really_inline static simd8
+ repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4,
+ uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9,
+ uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14,
+ uint8_t v15) {
+ return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+ v13, v14, v15);
}
- return true;
-}
-template
-inline simdutf_warn_unused result validate_with_errors(const char16_t *buf,
- size_t len) noexcept {
- const uint16_t *data = reinterpret_cast(buf);
- size_t pos = 0;
- while (pos < len) {
- uint16_t word =
- !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
- if ((word & 0xF800) == 0xD800) {
- if (pos + 1 >= len) {
- return result(error_code::SURROGATE, pos);
- }
- uint16_t diff = uint16_t(word - 0xD800);
- if (diff > 0x3FF) {
- return result(error_code::SURROGATE, pos);
- }
- uint16_t next_word =
- !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
- uint16_t diff2 = uint16_t(next_word - 0xDC00);
- if (diff2 > 0x3FF) {
- return result(error_code::SURROGATE, pos);
- }
- pos += 2;
- } else {
- pos++;
- }
+ // Store to array
+ simdutf_really_inline void store(uint8_t dst[16]) const {
+ return __lsx_vst(this->value, dst, 0);
}
- return result(error_code::SUCCESS, pos);
-}
-template
-inline size_t count_code_points(const char16_t *buf, size_t len) {
- // We are not BOM aware.
- const uint16_t *p = reinterpret_cast(buf);
- size_t counter{0};
- for (size_t i = 0; i < len; i++) {
- uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
- counter += ((word & 0xFC00) != 0xDC00);
+ // Saturated math
+ simdutf_really_inline simd8
+ saturating_add(const simd8 other) const {
+ return __lsx_vsadd_bu(this->value, other);
+ }
+ simdutf_really_inline simd8
+ saturating_sub(const simd8 other) const {
+ return __lsx_vssub_bu(this->value, other);
}
- return counter;
-}
-template
-inline size_t utf8_length_from_utf16(const char16_t *buf, size_t len) {
- // We are not BOM aware.
- const uint16_t *p = reinterpret_cast(buf);
- size_t counter{0};
- for (size_t i = 0; i < len; i++) {
- uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
- counter++; // ASCII
- counter += static_cast(
- word >
- 0x7F); // non-ASCII is at least 2 bytes, surrogates are 2*2 == 4 bytes
- counter += static_cast((word > 0x7FF && word <= 0xD7FF) ||
- (word >= 0xE000)); // three-byte
+ // Addition/subtraction are the same for signed and unsigned
+ simdutf_really_inline simd8
+ operator+(const simd8 other) const {
+ return __lsx_vadd_b(this->value, other);
+ }
+ simdutf_really_inline simd8
+ operator-(const simd8 other) const {
+ return __lsx_vsub_b(this->value, other);
+ }
+ simdutf_really_inline simd8 &operator+=(const simd8 other) {
+ *this = *this + other;
+ return *this;
+ }
+ simdutf_really_inline simd8 &operator-=(const simd8 other) {
+ *this = *this - other;
+ return *this;
}
- return counter;
-}
-template
-inline size_t utf32_length_from_utf16(const char16_t *buf, size_t len) {
- // We are not BOM aware.
- const uint16_t *p = reinterpret_cast(buf);
- size_t counter{0};
- for (size_t i = 0; i < len; i++) {
- uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
- counter += ((word & 0xFC00) != 0xDC00);
+ // Order-specific operations
+ simdutf_really_inline simd8
+ max_val(const simd8 other) const {
+ return __lsx_vmax_bu(*this, other);
+ }
+ simdutf_really_inline simd8
+ min_val(const simd8 other) const {
+ return __lsx_vmin_bu(*this, other);
+ }
+ simdutf_really_inline simd8
+ operator<=(const simd8 other) const {
+ return __lsx_vsle_bu(*this, other);
+ }
+ simdutf_really_inline simd8
+ operator>=(const simd8 other) const {
+ return __lsx_vsle_bu(other, *this);
+ }
+ simdutf_really_inline simd8
+ operator<(const simd8 other) const {
+ return __lsx_vslt_bu(*this, other);
+ }
+ simdutf_really_inline simd8
+ operator>(const simd8 other) const {
+ return __lsx_vslt_bu(other, *this);
+ }
+ // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true
+ // = nonzero. For ARM, returns all 1's.
+ simdutf_really_inline simd8
+ gt_bits(const simd8 other) const {
+ return simd8(*this > other);
+ }
+ // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true
+ // = nonzero. For ARM, returns all 1's.
+ simdutf_really_inline simd8
+ lt_bits(const simd8 other) const {
+ return simd8(*this < other);
}
- return counter;
-}
-inline size_t latin1_length_from_utf16(size_t len) { return len; }
+ // Bit-specific operations
+ simdutf_really_inline simd8 any_bits_set(simd8 bits) const {
+ return __lsx_vslt_bu(__lsx_vldi(0), __lsx_vand_v(this->value, bits));
+ }
+ simdutf_really_inline bool is_ascii() const {
+ return __lsx_vpickve2gr_hu(__lsx_vmskgez_b(this->value), 0) == 0xFFFF;
+ }
-simdutf_really_inline void change_endianness_utf16(const char16_t *in,
- size_t size, char16_t *out) {
- const uint16_t *input = reinterpret_cast(in);
- uint16_t *output = reinterpret_cast(out);
- for (size_t i = 0; i < size; i++) {
- *output++ = uint16_t(input[i] >> 8 | input[i] << 8);
+ simdutf_really_inline bool any_bits_set_anywhere() const {
+ return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(this->value), 0) > 0;
+ }
+ simdutf_really_inline bool any_bits_set_anywhere(simd8 bits) const {
+ return (*this & bits).any_bits_set_anywhere();
+ }
+ template simdutf_really_inline simd8 shr() const {
+ return __lsx_vsrli_b(this->value, N);
+ }
+ template simdutf_really_inline simd8 shl() const {
+ return __lsx_vslli_b(this->value, N);
}
-}
-template
-simdutf_warn_unused inline size_t trim_partial_utf16(const char16_t *input,
- size_t length) {
- if (length <= 1) {
- return length;
+ // Perform a lookup assuming the value is between 0 and 16 (undefined behavior
+ // for out of range values)
+ template
+ simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const {
+ return lookup_table.apply_lookup_16_to(*this);
}
- uint16_t last_word = uint16_t(input[length - 1]);
- last_word = !match_system(big_endian) ? swap_bytes(last_word) : last_word;
- length -= ((last_word & 0xFC00) == 0xD800);
- return length;
-}
-} // namespace utf16
-} // unnamed namespace
-} // namespace scalar
-} // namespace simdutf
+ template
+ simdutf_really_inline simd8
+ lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4,
+ L replace5, L replace6, L replace7, L replace8, L replace9,
+ L replace10, L replace11, L replace12, L replace13, L replace14,
+ L replace15) const {
+ return lookup_16(simd8::repeat_16(
+ replace0, replace1, replace2, replace3, replace4, replace5, replace6,
+ replace7, replace8, replace9, replace10, replace11, replace12,
+ replace13, replace14, replace15));
+ }
-#endif
-/* end file src/scalar/utf16.h */
-/* begin file src/scalar/utf32.h */
-#ifndef SIMDUTF_UTF32_H
-#define SIMDUTF_UTF32_H
+ template
+ simdutf_really_inline simd8
+ apply_lookup_16_to(const simd8 original) const {
+ __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f));
+ return __lsx_vshuf_b(__lsx_vldi(0), *this, simd8(original_tmp));
+ }
+};
-namespace simdutf {
-namespace scalar {
-namespace {
-namespace utf32 {
+// Signed bytes
+template <> struct simd8 {
+ __m128i value;
-inline simdutf_warn_unused bool validate(const char32_t *buf,
- size_t len) noexcept {
- const uint32_t *data = reinterpret_cast(buf);
- uint64_t pos = 0;
- for (; pos < len; pos++) {
- uint32_t word = data[pos];
- if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) {
- return false;
- }
+ static simdutf_really_inline simd8 splat(int8_t _value) {
+ return __lsx_vreplgr2vr_b(_value);
}
- return true;
-}
-
-inline simdutf_warn_unused result validate_with_errors(const char32_t *buf,
- size_t len) noexcept {
- const uint32_t *data = reinterpret_cast(buf);
- size_t pos = 0;
- for (; pos < len; pos++) {
- uint32_t word = data[pos];
- if (word > 0x10FFFF) {
- return result(error_code::TOO_LARGE, pos);
- }
- if (word >= 0xD800 && word <= 0xDFFF) {
- return result(error_code::SURROGATE, pos);
- }
+ static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); }
+ static simdutf_really_inline simd8 load(const int8_t values[16]) {
+ return __lsx_vld(values, 0);
}
- return result(error_code::SUCCESS, pos);
-}
-inline size_t utf8_length_from_utf32(const char32_t *buf, size_t len) {
- // We are not BOM aware.
- const uint32_t *p = reinterpret_cast(buf);
- size_t counter{0};
- for (size_t i = 0; i < len; i++) {
- // credit: @ttsugriy for the vectorizable approach
- counter++; // ASCII
- counter += static_cast(p[i] > 0x7F); // two-byte
- counter += static_cast(p[i] > 0x7FF); // three-byte
- counter += static_cast(p[i] > 0xFFFF); // four-bytes
+ template
+ simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const {
+ __m128i zero = __lsx_vldi(0);
+ if (match_system(big_endian)) {
+ __lsx_vst(__lsx_vilvl_b(zero, (__m128i)this->value),
+ reinterpret_cast(p), 0);
+ __lsx_vst(__lsx_vilvh_b(zero, (__m128i)this->value),
+ reinterpret_cast(p + 8), 0);
+ } else {
+ __lsx_vst(__lsx_vilvl_b((__m128i)this->value, zero),
+ reinterpret_cast(p), 0);
+ __lsx_vst(__lsx_vilvh_b((__m128i)this->value, zero),
+ reinterpret_cast(p + 8), 0);
+ }
}
- return counter;
-}
-inline size_t utf16_length_from_utf32(const char32_t *buf, size_t len) {
- // We are not BOM aware.
- const uint32_t *p = reinterpret_cast(buf);
- size_t counter{0};
- for (size_t i = 0; i < len; i++) {
- counter++; // non-surrogate word
- counter += static_cast(p[i] > 0xFFFF); // surrogate pair
+ simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const {
+ __m128i zero = __lsx_vldi(0);
+ __m128i in16low = __lsx_vilvl_b(zero, (__m128i)this->value);
+ __m128i in16high = __lsx_vilvh_b(zero, (__m128i)this->value);
+ __m128i in32_0 = __lsx_vilvl_h(zero, in16low);
+ __m128i in32_1 = __lsx_vilvh_h(zero, in16low);
+ __m128i in32_2 = __lsx_vilvl_h(zero, in16high);
+ __m128i in32_3 = __lsx_vilvh_h(zero, in16high);
+ __lsx_vst(in32_0, reinterpret_cast(p), 0);
+ __lsx_vst(in32_1, reinterpret_cast(p + 4), 0);
+ __lsx_vst(in32_2, reinterpret_cast(p + 8), 0);
+ __lsx_vst(in32_3, reinterpret_cast(p + 12), 0);
}
- return counter;
-}
-inline size_t latin1_length_from_utf32(size_t len) {
- // We are not BOM aware.
- return len; // a utf32 codepoint will always represent 1 latin1 character
-}
+ // In places where the table can be reused, which is most uses in simdutf, it
+ // is worth it to do 4 table lookups, as there is no direct zero extension
+ // from u8 to u32.
+ simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t *p) const {
+ const simd8 tb1{0, 255, 255, 255, 1, 255, 255, 255,
+ 2, 255, 255, 255, 3, 255, 255, 255};
+ const simd8 tb2{4, 255, 255, 255, 5, 255, 255, 255,
+ 6, 255, 255, 255, 7, 255, 255, 255};
+ const simd8 tb3{8, 255, 255, 255, 9, 255, 255, 255,
+ 10, 255, 255, 255, 11, 255, 255, 255};
+ const simd8 tb4{12, 255, 255, 255, 13, 255, 255, 255,
+ 14, 255, 255, 255, 15, 255, 255, 255};
-inline simdutf_warn_unused uint32_t swap_bytes(const uint32_t word) {
- return ((word >> 24) & 0xff) | // move byte 3 to byte 0
- ((word << 8) & 0xff0000) | // move byte 1 to byte 2
- ((word >> 8) & 0xff00) | // move byte 2 to byte 1
- ((word << 24) & 0xff000000); // byte 0 to byte 3
-}
+ // encourage store pairing and interleaving
+ const auto shuf1 = this->apply_lookup_16_to(tb1);
+ const auto shuf2 = this->apply_lookup_16_to(tb2);
+ shuf1.store(reinterpret_cast(p));
+ shuf2.store(reinterpret_cast(p + 4));
-} // namespace utf32
-} // unnamed namespace
-} // namespace scalar
-} // namespace simdutf
+ const auto shuf3 = this->apply_lookup_16_to(tb3);
+ const auto shuf4 = this->apply_lookup_16_to(tb4);
+ shuf3.store(reinterpret_cast(p + 8));
+ shuf4.store(reinterpret_cast(p + 12));
+ }
+ // Conversion from/to SIMD register
+ simdutf_really_inline simd8(const __m128i _value) : value(_value) {}
+ simdutf_really_inline operator const __m128i &() const { return this->value; }
-#endif
-/* end file src/scalar/utf32.h */
-/* begin file src/scalar/base64.h */
-#ifndef SIMDUTF_BASE64_H
-#define SIMDUTF_BASE64_H
+ simdutf_really_inline operator const __m128i() const { return this->value; }
-#include
-#include
-#include
-#include
+ simdutf_really_inline operator __m128i &() { return this->value; }
-namespace simdutf {
-namespace scalar {
-namespace {
-namespace base64 {
+ // Zero constructor
+ simdutf_really_inline simd8() : simd8(zero()) {}
+ // Splat constructor
+ simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
+ // Array constructor
+ simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {}
+ // Member-by-member initialization
-// This function is not expected to be fast. Do not use in long loops.
-template bool is_ascii_white_space(char_type c) {
- return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f';
-}
+ simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3,
+ int8_t v4, int8_t v5, int8_t v6, int8_t v7,
+ int8_t v8, int8_t v9, int8_t v10, int8_t v11,
+ int8_t v12, int8_t v13, int8_t v14, int8_t v15)
+ : simd8((__m128i)v16i8{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
+ v12, v13, v14, v15}) {}
-template bool is_eight_byte(char_type c) {
- if (sizeof(char_type) == 1) {
- return true;
+ // Repeat 16 values as many times as necessary (usually for lookup tables)
+ simdutf_really_inline static simd8
+ repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5,
+ int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11,
+ int8_t v12, int8_t v13, int8_t v14, int8_t v15) {
+ return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+ v13, v14, v15);
}
- return uint8_t(c) == c;
-}
-// Returns true upon success. The destination buffer must be large enough.
-// This functions assumes that the padding (=) has been removed.
-template
-result
-base64_tail_decode(char *dst, const char_type *src, size_t length,
- size_t padded_characters, // number of padding characters
- // '=', typically 0, 1, 2.
- base64_options options,
- last_chunk_handling_options last_chunk_options) {
- // This looks like 5 branches, but we expect the compiler to resolve this to a
- // single branch:
- const uint8_t *to_base64 = (options & base64_url)
- ? tables::base64::to_base64_url_value
- : tables::base64::to_base64_value;
- const uint32_t *d0 = (options & base64_url)
- ? tables::base64::base64_url::d0
- : tables::base64::base64_default::d0;
- const uint32_t *d1 = (options & base64_url)
- ? tables::base64::base64_url::d1
- : tables::base64::base64_default::d1;
- const uint32_t *d2 = (options & base64_url)
- ? tables::base64::base64_url::d2
- : tables::base64::base64_default::d2;
- const uint32_t *d3 = (options & base64_url)
- ? tables::base64::base64_url::d3
- : tables::base64::base64_default::d3;
+ // Store to array
+ simdutf_really_inline void store(int8_t dst[16]) const {
+ return __lsx_vst(value, dst, 0);
+ }
- const char_type *srcend = src + length;
- const char_type *srcinit = src;
- const char *dstinit = dst;
+ simdutf_really_inline operator simd8() const {
+ return ((__m128i)this->value);
+ }
- uint32_t x;
- size_t idx;
- uint8_t buffer[4];
- while (true) {
- while (src + 4 <= srcend && is_eight_byte(src[0]) &&
- is_eight_byte(src[1]) && is_eight_byte(src[2]) &&
- is_eight_byte(src[3]) &&
- (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] |
- d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) {
- if (match_system(endianness::BIG)) {
- x = scalar::utf32::swap_bytes(x);
- }
- std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes
- dst += 3;
- src += 4;
- }
- idx = 0;
- // we need at least four characters.
- while (idx < 4 && src < srcend) {
- char_type c = *src;
- uint8_t code = to_base64[uint8_t(c)];
- buffer[idx] = uint8_t(code);
- if (is_eight_byte(c) && code <= 63) {
- idx++;
- } else if (code > 64 || !scalar::base64::is_eight_byte(c)) {
- return {INVALID_BASE64_CHARACTER, size_t(src - srcinit)};
- } else {
- // We have a space or a newline. We ignore it.
- }
- src++;
- }
- if (idx != 4) {
- if (last_chunk_options == last_chunk_handling_options::strict &&
- (idx != 1) && ((idx + padded_characters) & 3) != 0) {
- // The partial chunk was at src - idx
- return {BASE64_INPUT_REMAINDER, size_t(dst - dstinit)};
- } else if (last_chunk_options ==
- last_chunk_handling_options::stop_before_partial &&
- (idx != 1) && ((idx + padded_characters) & 3) != 0) {
- // Rewind src to before partial chunk
- src -= idx;
- return {SUCCESS, size_t(dst - dstinit)};
- } else {
- if (idx == 2) {
- uint32_t triple =
- (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6);
- if (match_system(endianness::BIG)) {
- triple <<= 8;
- std::memcpy(dst, &triple, 1);
- } else {
- triple = scalar::utf32::swap_bytes(triple);
- triple >>= 8;
- std::memcpy(dst, &triple, 1);
- }
- dst += 1;
- } else if (idx == 3) {
- uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) +
- (uint32_t(buffer[1]) << 2 * 6) +
- (uint32_t(buffer[2]) << 1 * 6);
- if (match_system(endianness::BIG)) {
- triple <<= 8;
- std::memcpy(dst, &triple, 2);
- } else {
- triple = scalar::utf32::swap_bytes(triple);
- triple >>= 8;
- std::memcpy(dst, &triple, 2);
- }
- dst += 2;
- } else if (idx == 1) {
- return {BASE64_INPUT_REMAINDER, size_t(dst - dstinit)};
- }
- return {SUCCESS, size_t(dst - dstinit)};
- }
- }
+ simdutf_really_inline simd8
+ operator|(const simd8 other) const {
+ return __lsx_vor_v((__m128i)value, (__m128i)other.value);
+ }
+ simdutf_really_inline simd8
+ operator&(const simd8 other) const {
+ return __lsx_vand_v((__m128i)value, (__m128i)other.value);
+ }
+ simdutf_really_inline simd8
+ operator^(const simd8 other) const {
+ return __lsx_vxor_v((__m128i)value, (__m128i)other.value);
+ }
+ simdutf_really_inline simd8
+ bit_andnot(const simd8 other) const {
+ return __lsx_vandn_v((__m128i)other.value, (__m128i)value);
+ }
- uint32_t triple =
- (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6) +
- (uint32_t(buffer[2]) << 1 * 6) + (uint32_t(buffer[3]) << 0 * 6);
- if (match_system(endianness::BIG)) {
- triple <<= 8;
- std::memcpy(dst, &triple, 3);
- } else {
- triple = scalar::utf32::swap_bytes(triple);
- triple >>= 8;
- std::memcpy(dst, &triple, 3);
- }
- dst += 3;
+ // Math
+ simdutf_really_inline simd8
+ operator+(const simd8 other) const {
+ return __lsx_vadd_b((__m128i)value, (__m128i)other.value);
+ }
+ simdutf_really_inline simd8
+ operator-(const simd8 other) const {
+ return __lsx_vsub_b((__m128i)value, (__m128i)other.value);
+ }
+ simdutf_really_inline simd8 &operator+=(const simd8 other) {
+ *this = *this + other;
+ return *this;
+ }
+ simdutf_really_inline simd8 &operator-=(const simd8 other) {
+ *this = *this - other;
+ return *this;
}
-}
-// like base64_tail_decode, but it will not write past the end of the output
-// buffer. outlen is modified to reflect the number of bytes written. This
-// functions assumes that the padding (=) has been removed.
-// like base64_tail_decode, but it will not write past the end of the output
-// buffer. outlen is modified to reflect the number of bytes written. This
-// functions assumes that the padding (=) has been removed.
-template
-result base64_tail_decode_safe(
- char *dst, size_t &outlen, const char_type *src, size_t length,
- size_t padded_characters, // number of padding characters '=', typically 0,
- // 1, 2.
- base64_options options, last_chunk_handling_options last_chunk_options) {
- if (length == 0) {
- outlen = 0;
- return {SUCCESS, 0};
+ simdutf_really_inline bool is_ascii() const {
+ return (__lsx_vpickve2gr_hu(__lsx_vmskgez_b((__m128i)this->value), 0) ==
+ 0xffff);
}
- // This looks like 5 branches, but we expect the compiler to resolve this to a
- // single branch:
- const uint8_t *to_base64 = (options & base64_url)
- ? tables::base64::to_base64_url_value
- : tables::base64::to_base64_value;
- const uint32_t *d0 = (options & base64_url)
- ? tables::base64::base64_url::d0
- : tables::base64::base64_default::d0;
- const uint32_t *d1 = (options & base64_url)
- ? tables::base64::base64_url::d1
- : tables::base64::base64_default::d1;
- const uint32_t *d2 = (options & base64_url)
- ? tables::base64::base64_url::d2
- : tables::base64::base64_default::d2;
- const uint32_t *d3 = (options & base64_url)
- ? tables::base64::base64_url::d3
- : tables::base64::base64_default::d3;
-
- const char_type *srcend = src + length;
- const char_type *srcinit = src;
- const char *dstinit = dst;
- const char *dstend = dst + outlen;
-
- uint32_t x;
- size_t idx;
- uint8_t buffer[4];
- while (true) {
- while (src + 4 <= srcend && is_eight_byte(src[0]) &&
- is_eight_byte(src[1]) && is_eight_byte(src[2]) &&
- is_eight_byte(src[3]) &&
- (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] |
- d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) {
- if (dstend - dst < 3) {
- outlen = size_t(dst - dstinit);
- return {OUTPUT_BUFFER_TOO_SMALL, size_t(src - srcinit)};
- }
- if (match_system(endianness::BIG)) {
- x = scalar::utf32::swap_bytes(x);
- }
- std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes
- dst += 3;
- src += 4;
- }
- idx = 0;
- const char_type *srccur = src;
- // We need at least four characters.
- while (idx < 4 && src < srcend) {
- char_type c = *src;
- uint8_t code = to_base64[uint8_t(c)];
- buffer[idx] = uint8_t(code);
- if (is_eight_byte(c) && code <= 63) {
- idx++;
- } else if (code > 64 || !scalar::base64::is_eight_byte(c)) {
- outlen = size_t(dst - dstinit);
- return {INVALID_BASE64_CHARACTER, size_t(src - srcinit)};
- } else {
- // We have a space or a newline. We ignore it.
- }
- src++;
- }
- if (idx != 4) {
- if (last_chunk_options == last_chunk_handling_options::strict &&
- ((idx + padded_characters) & 3) != 0) {
- outlen = size_t(dst - dstinit);
- return {BASE64_INPUT_REMAINDER, size_t(src - srcinit)};
- } else if (last_chunk_options ==
- last_chunk_handling_options::stop_before_partial &&
- ((idx + padded_characters) & 3) != 0) {
- // Rewind src to before partial chunk
- src = srccur;
- outlen = size_t(dst - dstinit);
- return {SUCCESS, size_t(dst - dstinit)};
- } else { // loose mode
- if (idx == 0) {
- // No data left; return success
- outlen = size_t(dst - dstinit);
- return {SUCCESS, size_t(dst - dstinit)};
- } else if (idx == 1) {
- // Error: Incomplete chunk of length 1 is invalid in loose mode
- outlen = size_t(dst - dstinit);
- return {BASE64_INPUT_REMAINDER, size_t(src - srcinit)};
- } else if (idx == 2 || idx == 3) {
- // Check if there's enough space in the destination buffer
- size_t required_space = (idx == 2) ? 1 : 2;
- if (size_t(dstend - dst) < required_space) {
- outlen = size_t(dst - dstinit);
- return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)};
- }
- uint32_t triple = 0;
- if (idx == 2) {
- triple = (uint32_t(buffer[0]) << 18) + (uint32_t(buffer[1]) << 12);
- // Extract the first byte
- triple >>= 16;
- dst[0] = static_cast(triple & 0xFF);
- dst += 1;
- } else if (idx == 3) {
- triple = (uint32_t(buffer[0]) << 18) + (uint32_t(buffer[1]) << 12) +
- (uint32_t(buffer[2]) << 6);
- // Extract the first two bytes
- triple >>= 8;
- dst[0] = static_cast((triple >> 8) & 0xFF);
- dst[1] = static_cast(triple & 0xFF);
- dst += 2;
- }
- outlen = size_t(dst - dstinit);
- return {SUCCESS, size_t(dst - dstinit)};
- }
- }
- }
+ // Order-sensitive comparisons
+ simdutf_really_inline simd8 max_val(const simd8 other) const {
+ return __lsx_vmax_b((__m128i)value, (__m128i)other.value);
+ }
+ simdutf_really_inline simd8 min_val(const simd8 other) const {
+ return __lsx_vmin_b((__m128i)value, (__m128i)other.value);
+ }
+ simdutf_really_inline simd8 operator>(const simd8 other) const {
+ return __lsx_vslt_b((__m128i)other.value, (__m128i)value);
+ }
+ simdutf_really_inline simd8 operator<(const simd8 other) const {
+ return __lsx_vslt_b((__m128i)value, (__m128i)other.value);
+ }
+ simdutf_really_inline simd8
+ operator==(const simd8 other) const {
+ return __lsx_vseq_b((__m128i)value, (__m128i)other.value);
+ }
- if (dstend - dst < 3) {
- outlen = size_t(dst - dstinit);
- return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)};
- }
- uint32_t triple = (uint32_t(buffer[0]) << 18) +
- (uint32_t(buffer[1]) << 12) + (uint32_t(buffer[2]) << 6) +
- (uint32_t(buffer[3]));
- if (match_system(endianness::BIG)) {
- triple <<= 8;
- std::memcpy(dst, &triple, 3);
- } else {
- triple = scalar::utf32::swap_bytes(triple);
- triple >>= 8;
- std::memcpy(dst, &triple, 3);
- }
- dst += 3;
+ template
+ simdutf_really_inline simd8
+ prev(const simd8 prev_chunk) const {
+ return __lsx_vor_v(__lsx_vbsll_v(this->value, N),
+ __lsx_vbsrl_v(prev_chunk.value, 16 - N));
}
-}
-// Returns the number of bytes written. The destination buffer must be large
-// enough. It will add padding (=) if needed.
-size_t tail_encode_base64(char *dst, const char *src, size_t srclen,
- base64_options options) {
- // By default, we use padding if we are not using the URL variant.
- // This is check with ((options & base64_url) == 0) which returns true if we
- // are not using the URL variant. However, we also allow 'inversion' of the
- // convention with the base64_reverse_padding option. If the
- // base64_reverse_padding option is set, we use padding if we are using the
- // URL variant, and we omit it if we are not using the URL variant. This is
- // checked with
- // ((options & base64_reverse_padding) == base64_reverse_padding).
- bool use_padding =
- ((options & base64_url) == 0) ^
- ((options & base64_reverse_padding) == base64_reverse_padding);
- // This looks like 3 branches, but we expect the compiler to resolve this to
- // a single branch:
- const char *e0 = (options & base64_url) ? tables::base64::base64_url::e0
- : tables::base64::base64_default::e0;
- const char *e1 = (options & base64_url) ? tables::base64::base64_url::e1
- : tables::base64::base64_default::e1;
- const char *e2 = (options & base64_url) ? tables::base64::base64_url::e2
- : tables::base64::base64_default::e2;
- char *out = dst;
- size_t i = 0;
- uint8_t t1, t2, t3;
- for (; i + 2 < srclen; i += 3) {
- t1 = uint8_t(src[i]);
- t2 = uint8_t(src[i + 1]);
- t3 = uint8_t(src[i + 2]);
- *out++ = e0[t1];
- *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
- *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)];
- *out++ = e2[t3];
+ // Perform a lookup assuming no value is larger than 16
+ template
+ simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const {
+ return lookup_table.apply_lookup_16_to(*this);
}
- switch (srclen - i) {
- case 0:
- break;
- case 1:
- t1 = uint8_t(src[i]);
- *out++ = e0[t1];
- *out++ = e1[(t1 & 0x03) << 4];
- if (use_padding) {
- *out++ = '=';
- *out++ = '=';
- }
- break;
- default: /* case 2 */
- t1 = uint8_t(src[i]);
- t2 = uint8_t(src[i + 1]);
- *out++ = e0[t1];
- *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
- *out++ = e2[(t2 & 0x0F) << 2];
- if (use_padding) {
- *out++ = '=';
- }
+ template
+ simdutf_really_inline simd8
+ lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4,
+ L replace5, L replace6, L replace7, L replace8, L replace9,
+ L replace10, L replace11, L replace12, L replace13, L replace14,
+ L replace15) const {
+ return lookup_16(simd8::repeat_16(
+ replace0, replace1, replace2, replace3, replace4, replace5, replace6,
+ replace7, replace8, replace9, replace10, replace11, replace12,
+ replace13, replace14, replace15));
}
- return (size_t)(out - dst);
-}
-template
-simdutf_warn_unused size_t maximal_binary_length_from_base64(
- const char_type *input, size_t length) noexcept {
- // We follow https://infra.spec.whatwg.org/#forgiving-base64-decode
- size_t padding = 0;
- if (length > 0) {
- if (input[length - 1] == '=') {
- padding++;
- if (length > 1 && input[length - 2] == '=') {
- padding++;
- }
- }
- }
- size_t actual_length = length - padding;
- if (actual_length % 4 <= 1) {
- return actual_length / 4 * 3;
+ template
+ simdutf_really_inline simd8
+ apply_lookup_16_to(const simd8 original) const {
+ __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f));
+ return __lsx_vshuf_b(__lsx_vldi(0), (__m128i)this->value,
+ simd8(original_tmp));
}
- // if we have a valid input, then the remainder must be 2 or 3 adding one or
- // two extra bytes.
- return actual_length / 4 * 3 + (actual_length % 4) - 1;
-}
+};
-simdutf_warn_unused size_t
-base64_length_from_binary(size_t length, base64_options options) noexcept {
- // By default, we use padding if we are not using the URL variant.
- // This is check with ((options & base64_url) == 0) which returns true if we
- // are not using the URL variant. However, we also allow 'inversion' of the
- // convention with the base64_reverse_padding option. If the
- // base64_reverse_padding option is set, we use padding if we are using the
- // URL variant, and we omit it if we are not using the URL variant. This is
- // checked with
- // ((options & base64_reverse_padding) == base64_reverse_padding).
- bool use_padding =
- ((options & base64_url) == 0) ^
- ((options & base64_reverse_padding) == base64_reverse_padding);
- if (!use_padding) {
- return length / 3 * 4 + ((length % 3) ? (length % 3) + 1 : 0);
+template struct simd8x64 {
+ static constexpr int NUM_CHUNKS = 64 / sizeof(simd8);
+ static_assert(
+ NUM_CHUNKS == 4,
+ "LoongArch kernel should use four registers per 64-byte block.");
+ simd8 chunks[NUM_CHUNKS];
+
+ simd8x64(const simd8x64 &o) = delete; // no copy allowed
+ simd8x64 &
+ operator=(const simd8 other) = delete; // no assignment allowed
+ simd8x64() = delete; // no default constructor allowed
+
+ simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1,
+ const simd8 chunk2, const simd8 chunk3)
+ : chunks{chunk0, chunk1, chunk2, chunk3} {}
+ simdutf_really_inline simd8x64(const T *ptr)
+ : chunks{simd8::load(ptr),
+ simd8::load(ptr + sizeof(simd8) / sizeof(T)),
+ simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)),
+ simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {}
+
+ simdutf_really_inline void store(T *ptr) const {
+ this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T));
+ this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T));
+ this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T));
+ this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T));
}
- return (length + 2) / 3 *
- 4; // We use padding to make the length a multiple of 4.
-}
-} // namespace base64
-} // unnamed namespace
-} // namespace scalar
-} // namespace simdutf
+ simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) {
+ this->chunks[0] |= other.chunks[0];
+ this->chunks[1] |= other.chunks[1];
+ this->chunks[2] |= other.chunks[2];
+ this->chunks[3] |= other.chunks[3];
+ return *this;
+ }
-#endif
-/* end file src/scalar/base64.h */
-/* begin file src/scalar/latin1_to_utf8/latin1_to_utf8.h */
-#ifndef SIMDUTF_LATIN1_TO_UTF8_H
-#define SIMDUTF_LATIN1_TO_UTF8_H
+ simdutf_really_inline simd8 reduce_or() const {
+ return (this->chunks[0] | this->chunks[1]) |
+ (this->chunks[2] | this->chunks[3]);
+ }
-namespace simdutf {
-namespace scalar {
-namespace {
-namespace latin1_to_utf8 {
+ simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); }
-inline size_t convert(const char *buf, size_t len, char *utf8_output) {
- const unsigned char *data = reinterpret_cast(buf);
- size_t pos = 0;
- size_t utf8_pos = 0;
- while (pos < len) {
- // try to convert the next block of 16 ASCII bytes
- if (pos + 16 <=
- len) { // if it is safe to read 16 more bytes, check that they are ascii
- uint64_t v1;
- ::memcpy(&v1, data + pos, sizeof(uint64_t));
- uint64_t v2;
- ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
- uint64_t v{v1 |
- v2}; // We are only interested in these bits: 1000 1000 1000
- // 1000, so it makes sense to concatenate everything
- if ((v & 0x8080808080808080) ==
- 0) { // if NONE of these are set, e.g. all of them are zero, then
- // everything is ASCII
- size_t final_pos = pos + 16;
- while (pos < final_pos) {
- utf8_output[utf8_pos++] = char(buf[pos]);
- pos++;
- }
- continue;
- }
- }
+ template
+ simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const {
+ this->chunks[0].template store_ascii_as_utf16(ptr +
+ sizeof(simd8) * 0);
+ this->chunks[1].template store_ascii_as_utf16(ptr +
+ sizeof(simd8) * 1);
+ this->chunks[2].template store_ascii_as_utf16(ptr +
+ sizeof(simd8) * 2);
+ this->chunks[3].template store_ascii_as_utf16(ptr +
+ sizeof(simd8) * 3);
+ }
- unsigned char byte = data[pos];
- if ((byte & 0x80) == 0) { // if ASCII
- // will generate one UTF-8 bytes
- utf8_output[utf8_pos++] = char(byte);
- pos++;
- } else {
- // will generate two UTF-8 bytes
- utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000);
- utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000);
- pos++;
- }
+ simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const {
+ this->chunks[0].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 0);
+ this->chunks[1].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 1);
+ this->chunks[2].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 2);
+ this->chunks[3].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 3);
}
- return utf8_pos;
-}
-inline size_t convert_safe(const char *buf, size_t len, char *utf8_output,
- size_t utf8_len) {
- const unsigned char *data = reinterpret_cast(buf);
- size_t pos = 0;
- size_t skip_pos = 0;
- size_t utf8_pos = 0;
- while (pos < len && utf8_pos < utf8_len) {
- // try to convert the next block of 16 ASCII bytes
- if (pos >= skip_pos && pos + 16 <= len &&
- utf8_pos + 16 <= utf8_len) { // if it is safe to read 16 more bytes,
- // check that they are ascii
- uint64_t v1;
- ::memcpy(&v1, data + pos, sizeof(uint64_t));
- uint64_t v2;
- ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
- uint64_t v{v1 |
- v2}; // We are only interested in these bits: 1000 1000 1000
- // 1000, so it makes sense to concatenate everything
- if ((v & 0x8080808080808080) ==
- 0) { // if NONE of these are set, e.g. all of them are zero, then
- // everything is ASCII
- ::memcpy(utf8_output + utf8_pos, buf + pos, 16);
- utf8_pos += 16;
- pos += 16;
- } else {
- // At least one of the next 16 bytes are not ASCII, we will process them
- // one by one
- skip_pos = pos + 16;
- }
- } else {
- const auto byte = data[pos];
- if ((byte & 0x80) == 0) { // if ASCII
- // will generate one UTF-8 bytes
- utf8_output[utf8_pos++] = char(byte);
- pos++;
- } else if (utf8_pos + 2 <= utf8_len) {
- // will generate two UTF-8 bytes
- utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000);
- utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000);
- pos++;
- } else {
- break;
- }
- }
+ simdutf_really_inline uint64_t to_bitmask() const {
+ __m128i mask = __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[3]), 6);
+ mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[2]), 4));
+ mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[1]), 2));
+ mask = __lsx_vor_v(mask, __lsx_vmsknz_b(this->chunks[0]));
+ return __lsx_vpickve2gr_du(mask, 0);
}
- return utf8_pos;
-}
-} // namespace latin1_to_utf8
-} // unnamed namespace
-} // namespace scalar
-} // namespace simdutf
+ simdutf_really_inline uint64_t eq(const T m) const {
+ const simd8 mask = simd8::splat(m);
+ return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask,
+ this->chunks[2] == mask, this->chunks[3] == mask)
+ .to_bitmask();
+ }
-#endif
-/* end file src/scalar/latin1_to_utf8/latin1_to_utf8.h */
+ simdutf_really_inline uint64_t lteq(const T m) const {
+ const simd8 mask = simd8::splat(m);
+ return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask,
+ this->chunks[2] <= mask, this->chunks[3] <= mask)
+ .to_bitmask();
+ }
-namespace simdutf {
-bool implementation::supported_by_runtime_system() const {
- uint32_t required_instruction_sets = this->required_instruction_sets();
- uint32_t supported_instruction_sets =
- internal::detect_supported_architectures();
- return ((supported_instruction_sets & required_instruction_sets) ==
- required_instruction_sets);
-}
+ simdutf_really_inline uint64_t in_range(const T low, const T high) const {
+ const simd8 mask_low = simd8::splat(low);
+ const simd8 mask_high = simd8::splat(high);
-simdutf_warn_unused encoding_type implementation::autodetect_encoding(
- const char *input, size_t length) const noexcept {
- // If there is a BOM, then we trust it.
- auto bom_encoding = simdutf::BOM::check_bom(input, length);
- if (bom_encoding != encoding_type::unspecified) {
- return bom_encoding;
+ return simd8x64(
+ (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
+ (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low),
+ (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low),
+ (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low))
+ .to_bitmask();
}
- // UTF8 is common, it includes ASCII, and is commonly represented
- // without a BOM, so if it fits, go with that. Note that it is still
- // possible to get it wrong, we are only 'guessing'. If some has UTF-16
- // data without a BOM, it could pass as UTF-8.
- //
- // An interesting twist might be to check for UTF-16 ASCII first (every
- // other byte is zero).
- if (validate_utf8(input, length)) {
- return encoding_type::UTF8;
+ simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
+ const simd8 |