268 lines
8.4 KiB
TypeScript
268 lines
8.4 KiB
TypeScript
import { C, INTERNAL, Internal, assertInternal, SIZE_OF_INT, SIZE_OF_SHORT } from './constants';
|
|
import { LookaheadIterator } from './lookahead_iterator';
|
|
import { unmarshalLanguageMetadata } from './marshal';
|
|
import { TRANSFER_BUFFER } from './parser';
|
|
|
|
const LANGUAGE_FUNCTION_REGEX = /^tree_sitter_\w+$/;
|
|
|
|
export interface LanguageMetadata {
|
|
readonly major_version: number;
|
|
readonly minor_version: number;
|
|
readonly patch_version: number;
|
|
}
|
|
|
|
/**
|
|
* An opaque object that defines how to parse a particular language.
|
|
* The code for each `Language` is generated by the Tree-sitter CLI.
|
|
*/
|
|
export class Language {
|
|
/** @internal */
|
|
private [0] = 0; // Internal handle for Wasm
|
|
|
|
/**
|
|
* A list of all node types in the language. The index of each type in this
|
|
* array is its node type id.
|
|
*/
|
|
types: string[];
|
|
|
|
/**
|
|
* A list of all field names in the language. The index of each field name in
|
|
* this array is its field id.
|
|
*/
|
|
fields: (string | null)[];
|
|
|
|
/** @internal */
|
|
constructor(internal: Internal, address: number) {
|
|
assertInternal(internal);
|
|
this[0] = address;
|
|
this.types = new Array<string>(C._ts_language_symbol_count(this[0]));
|
|
for (let i = 0, n = this.types.length; i < n; i++) {
|
|
if (C._ts_language_symbol_type(this[0], i) < 2) {
|
|
this.types[i] = C.UTF8ToString(C._ts_language_symbol_name(this[0], i));
|
|
}
|
|
}
|
|
this.fields = new Array<string>(C._ts_language_field_count(this[0]) + 1);
|
|
for (let i = 0, n = this.fields.length; i < n; i++) {
|
|
const fieldName = C._ts_language_field_name_for_id(this[0], i);
|
|
if (fieldName !== 0) {
|
|
this.fields[i] = C.UTF8ToString(fieldName);
|
|
} else {
|
|
this.fields[i] = null;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Gets the name of the language.
|
|
*/
|
|
get name(): string | null {
|
|
const ptr = C._ts_language_name(this[0]);
|
|
if (ptr === 0) return null;
|
|
return C.UTF8ToString(ptr);
|
|
}
|
|
|
|
/**
|
|
* Gets the ABI version of the language.
|
|
*/
|
|
get abiVersion(): number {
|
|
return C._ts_language_abi_version(this[0]);
|
|
}
|
|
|
|
/**
|
|
* Get the metadata for this language. This information is generated by the
|
|
* CLI, and relies on the language author providing the correct metadata in
|
|
* the language's `tree-sitter.json` file.
|
|
*/
|
|
get metadata(): LanguageMetadata | null {
|
|
C._ts_language_metadata_wasm(this[0]);
|
|
const length = C.getValue(TRANSFER_BUFFER, 'i32');
|
|
if (length === 0) return null;
|
|
return unmarshalLanguageMetadata(TRANSFER_BUFFER + SIZE_OF_INT);
|
|
}
|
|
|
|
/**
|
|
* Gets the number of fields in the language.
|
|
*/
|
|
get fieldCount(): number {
|
|
return this.fields.length - 1;
|
|
}
|
|
|
|
/**
|
|
* Gets the number of states in the language.
|
|
*/
|
|
get stateCount(): number {
|
|
return C._ts_language_state_count(this[0]);
|
|
}
|
|
|
|
/**
|
|
* Get the field id for a field name.
|
|
*/
|
|
fieldIdForName(fieldName: string): number | null {
|
|
const result = this.fields.indexOf(fieldName);
|
|
return result !== -1 ? result : null;
|
|
}
|
|
|
|
/**
|
|
* Get the field name for a field id.
|
|
*/
|
|
fieldNameForId(fieldId: number): string | null {
|
|
return this.fields[fieldId] ?? null;
|
|
}
|
|
|
|
/**
|
|
* Get the node type id for a node type name.
|
|
*/
|
|
idForNodeType(type: string, named: boolean): number | null {
|
|
const typeLength = C.lengthBytesUTF8(type);
|
|
const typeAddress = C._malloc(typeLength + 1);
|
|
C.stringToUTF8(type, typeAddress, typeLength + 1);
|
|
const result = C._ts_language_symbol_for_name(this[0], typeAddress, typeLength, named ? 1 : 0);
|
|
C._free(typeAddress);
|
|
return result || null;
|
|
}
|
|
|
|
/**
|
|
* Gets the number of node types in the language.
|
|
*/
|
|
get nodeTypeCount(): number {
|
|
return C._ts_language_symbol_count(this[0]);
|
|
}
|
|
|
|
/**
|
|
* Get the node type name for a node type id.
|
|
*/
|
|
nodeTypeForId(typeId: number): string | null {
|
|
const name = C._ts_language_symbol_name(this[0], typeId);
|
|
return name ? C.UTF8ToString(name) : null;
|
|
}
|
|
|
|
/**
|
|
* Check if a node type is named.
|
|
*
|
|
* @see {@link https://tree-sitter.github.io/tree-sitter/using-parsers/2-basic-parsing.html#named-vs-anonymous-nodes}
|
|
*/
|
|
nodeTypeIsNamed(typeId: number): boolean {
|
|
return C._ts_language_type_is_named_wasm(this[0], typeId) ? true : false;
|
|
}
|
|
|
|
/**
|
|
* Check if a node type is visible.
|
|
*/
|
|
nodeTypeIsVisible(typeId: number): boolean {
|
|
return C._ts_language_type_is_visible_wasm(this[0], typeId) ? true : false;
|
|
}
|
|
|
|
/**
|
|
* Get the supertypes ids of this language.
|
|
*
|
|
* @see {@link https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types.html?highlight=supertype#supertype-nodes}
|
|
*/
|
|
get supertypes(): number[] {
|
|
C._ts_language_supertypes_wasm(this[0]);
|
|
const count = C.getValue(TRANSFER_BUFFER, 'i32');
|
|
const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
|
|
const result = new Array<number>(count);
|
|
|
|
if (count > 0) {
|
|
let address = buffer;
|
|
for (let i = 0; i < count; i++) {
|
|
result[i] = C.getValue(address, 'i16');
|
|
address += SIZE_OF_SHORT;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Get the subtype ids for a given supertype node id.
|
|
*/
|
|
subtypes(supertype: number): number[] {
|
|
C._ts_language_subtypes_wasm(this[0], supertype);
|
|
const count = C.getValue(TRANSFER_BUFFER, 'i32');
|
|
const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
|
|
const result = new Array<number>(count);
|
|
|
|
if (count > 0) {
|
|
let address = buffer;
|
|
for (let i = 0; i < count; i++) {
|
|
result[i] = C.getValue(address, 'i16');
|
|
address += SIZE_OF_SHORT;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Get the next state id for a given state id and node type id.
|
|
*/
|
|
nextState(stateId: number, typeId: number): number {
|
|
return C._ts_language_next_state(this[0], stateId, typeId);
|
|
}
|
|
|
|
/**
|
|
* Create a new lookahead iterator for this language and parse state.
|
|
*
|
|
* This returns `null` if state is invalid for this language.
|
|
*
|
|
* Iterating {@link LookaheadIterator} will yield valid symbols in the given
|
|
* parse state. Newly created lookahead iterators will return the `ERROR`
|
|
* symbol from {@link LookaheadIterator#currentType}.
|
|
*
|
|
* Lookahead iterators can be useful for generating suggestions and improving
|
|
* syntax error diagnostics. To get symbols valid in an `ERROR` node, use the
|
|
* lookahead iterator on its first leaf node state. For `MISSING` nodes, a
|
|
* lookahead iterator created on the previous non-extra leaf node may be
|
|
* appropriate.
|
|
*/
|
|
lookaheadIterator(stateId: number): LookaheadIterator | null {
|
|
const address = C._ts_lookahead_iterator_new(this[0], stateId);
|
|
if (address) return new LookaheadIterator(INTERNAL, address, this);
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Load a language from a WebAssembly module.
|
|
* The module can be provided as a path to a file or as a buffer.
|
|
*/
|
|
static async load(input: string | Uint8Array): Promise<Language> {
|
|
let binary: Uint8Array | WebAssembly.Module;
|
|
if (input instanceof Uint8Array) {
|
|
binary = input;
|
|
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
} else if (globalThis.process?.versions.node) {
|
|
const fs: typeof import('fs/promises') = await import('fs/promises');
|
|
binary = await fs.readFile(input);
|
|
} else {
|
|
const response = await fetch(input);
|
|
|
|
if (!response.ok){
|
|
const body = await response.text();
|
|
throw new Error(`Language.load failed with status ${response.status}.\n\n${body}`);
|
|
}
|
|
|
|
const retryResp = response.clone();
|
|
try {
|
|
binary = await WebAssembly.compileStreaming(response);
|
|
} catch (reason) {
|
|
console.error('wasm streaming compile failed:', reason);
|
|
console.error('falling back to ArrayBuffer instantiation');
|
|
// fallback, probably because of bad MIME type
|
|
binary = new Uint8Array(await retryResp.arrayBuffer())
|
|
}
|
|
}
|
|
|
|
const mod = await C.loadWebAssemblyModule(binary, { loadAsync: true });
|
|
const symbolNames = Object.keys(mod);
|
|
const functionName = symbolNames.find((key) => LANGUAGE_FUNCTION_REGEX.test(key) &&
|
|
!key.includes('external_scanner_'));
|
|
if (!functionName) {
|
|
console.log(`Couldn't find language function in Wasm file. Symbols:\n${JSON.stringify(symbolNames, null, 2)}`);
|
|
throw new Error('Language.load failed: no language function found in Wasm file');
|
|
}
|
|
const languageAddress = mod[functionName]();
|
|
return new Language(INTERNAL, languageAddress);
|
|
}
|
|
}
|