build: remove unicode files and script, move cliff.toml
This commit is contained in:
parent
c7d6fd7fa5
commit
42dd32d184
9 changed files with 35 additions and 303 deletions
0
script/cliff.toml → .github/cliff.toml
vendored
0
script/cliff.toml → .github/cliff.toml
vendored
68
Cargo.lock
generated
68
Cargo.lock
generated
|
|
@ -25,9 +25,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.15"
|
||||
version = "0.6.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
|
||||
checksum = "23a1e53f0f5d86382dafe1cf314783b2044280f406e7e1506368220ad11b1338"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
|
|
@ -40,43 +40,43 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.8"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
|
||||
checksum = "8365de52b16c035ff4fcafe0092ba9390540e3e352870ac09933bebcaa2c8c56"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
version = "0.2.5"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
|
||||
checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
|
||||
dependencies = [
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.1.1"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
|
||||
checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
|
||||
dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "3.0.4"
|
||||
version = "3.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
|
||||
checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"windows-sys 0.52.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.90"
|
||||
version = "1.0.91"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37bf3594c4c988a53154954629820791dde498571819ae4ca50ca811e060cc95"
|
||||
checksum = "c042108f3ed77fd83760a5fd79b53be043192bb3b9dba91d8c574c0ada7850c8"
|
||||
|
||||
[[package]]
|
||||
name = "arbitrary"
|
||||
|
|
@ -233,9 +233,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap_complete"
|
||||
version = "4.5.33"
|
||||
version = "4.5.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9646e2e245bf62f45d39a0f3f36f1171ad1ea0d6967fd114bca72cb02a8fcdfb"
|
||||
checksum = "07a13ab5b8cb13dbe35e68b83f6c12f9293b2f601797b71bc9f23befdb329feb"
|
||||
dependencies = [
|
||||
"clap",
|
||||
]
|
||||
|
|
@ -266,9 +266,9 @@ checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15"
|
|||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.2"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
|
||||
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
|
||||
|
||||
[[package]]
|
||||
name = "combine"
|
||||
|
|
@ -1029,9 +1029,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
|||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.14"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02"
|
||||
checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
|
|
@ -1072,9 +1072,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "prettyplease"
|
||||
version = "0.2.24"
|
||||
version = "0.2.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "910d41a655dac3b764f1ade94821093d3610248694320cd072303a8eedcf221d"
|
||||
checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"syn",
|
||||
|
|
@ -1183,9 +1183,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.11.0"
|
||||
version = "1.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8"
|
||||
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
|
|
@ -1261,18 +1261,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.212"
|
||||
version = "1.0.213"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ccd4055b7e3937a5c2595e974f5bf1715a23919a595a04b5ad959bdbbb61ab04"
|
||||
checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.212"
|
||||
version = "1.0.213"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "726adf8349784fb68a42e6466f49362ae039d9c5333cc6eb131f4d6f94bb9126"
|
||||
checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -1366,9 +1366,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.82"
|
||||
version = "2.0.85"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021"
|
||||
checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -1405,18 +1405,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.64"
|
||||
version = "1.0.65"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84"
|
||||
checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.64"
|
||||
version = "1.0.65"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3"
|
||||
checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
|
|||
2
Makefile
2
Makefile
|
|
@ -113,6 +113,6 @@ format:
|
|||
cargo +nightly fmt --all
|
||||
|
||||
changelog:
|
||||
@git-cliff --config script/cliff.toml --prepend CHANGELOG.md --latest --github-token $(shell gh auth token)
|
||||
@git-cliff --config .github/cliff.toml --prepend CHANGELOG.md --latest --github-token $(shell gh auth token)
|
||||
|
||||
.PHONY: test test_wasm lint format changelog
|
||||
|
|
|
|||
|
|
@ -1,7 +1,4 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use regex_syntax::{
|
||||
hir::{Class, Hir, HirKind},
|
||||
ParserBuilder,
|
||||
|
|
@ -14,22 +11,6 @@ use crate::{
|
|||
rules::{Precedence, Rule},
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
static ref UNICODE_CATEGORIES: HashMap<&'static str, Vec<u32>> =
|
||||
serde_json::from_str(UNICODE_CATEGORIES_JSON).unwrap();
|
||||
static ref UNICODE_PROPERTIES: HashMap<&'static str, Vec<u32>> =
|
||||
serde_json::from_str(UNICODE_PROPERTIES_JSON).unwrap();
|
||||
static ref UNICODE_CATEGORY_ALIASES: HashMap<&'static str, String> =
|
||||
serde_json::from_str(UNICODE_CATEGORY_ALIASES_JSON).unwrap();
|
||||
static ref UNICODE_PROPERTY_ALIASES: HashMap<&'static str, String> =
|
||||
serde_json::from_str(UNICODE_PROPERTY_ALIASES_JSON).unwrap();
|
||||
}
|
||||
|
||||
const UNICODE_CATEGORIES_JSON: &str = include_str!("./unicode-categories.json");
|
||||
const UNICODE_PROPERTIES_JSON: &str = include_str!("./unicode-properties.json");
|
||||
const UNICODE_CATEGORY_ALIASES_JSON: &str = include_str!("./unicode-category-aliases.json");
|
||||
const UNICODE_PROPERTY_ALIASES_JSON: &str = include_str!("./unicode-property-aliases.json");
|
||||
|
||||
struct NfaBuilder {
|
||||
nfa: Nfa,
|
||||
is_sep: bool,
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -1 +0,0 @@
|
|||
{"Other":"C","Control":"Cc","cntrl":"Cc","Format":"Cf","Unassigned":"Cn","Private_Use":"Co","Surrogate":"Cs","Letter":"L","Cased_Letter":"LC","Lowercase_Letter":"Ll","Modifier_Letter":"Lm","Other_Letter":"Lo","Titlecase_Letter":"Lt","Uppercase_Letter":"Lu","Mark":"M","Combining_Mark":"M","Spacing_Mark":"Mc","Enclosing_Mark":"Me","Nonspacing_Mark":"Mn","Number":"N","Decimal_Number":"Nd","digit":"Nd","Letter_Number":"Nl","Other_Number":"No","Punctuation":"P","punct":"P","Connector_Punctuation":"Pc","Dash_Punctuation":"Pd","Close_Punctuation":"Pe","Final_Punctuation":"Pf","Initial_Punctuation":"Pi","Other_Punctuation":"Po","Open_Punctuation":"Ps","Symbol":"S","Currency_Symbol":"Sc","Modifier_Symbol":"Sk","Math_Symbol":"Sm","Other_Symbol":"So","Separator":"Z","Line_Separator":"Zl","Paragraph_Separator":"Zp","Space_Separator":"Zs"}
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -1 +0,0 @@
|
|||
{"cjkAccountingNumeric":"kAccountingNumeric","cjkOtherNumeric":"kOtherNumeric","cjkPrimaryNumeric":"kPrimaryNumeric","nv":"Numeric_Value","bmg":"Bidi_Mirroring_Glyph","bpb":"Bidi_Paired_Bracket","cf":"Case_Folding","cjkCompatibilityVariant":"kCompatibilityVariant","dm":"Decomposition_Mapping","EqUIdeo":"Equivalent_Unified_Ideograph","FC_NFKC":"FC_NFKC_Closure","lc":"Lowercase_Mapping","NFKC_CF":"NFKC_Casefold","NFKC_SCF":"NFKC_Simple_Casefold","scf":"Simple_Case_Folding","sfc":"Simple_Case_Folding","slc":"Simple_Lowercase_Mapping","stc":"Simple_Titlecase_Mapping","suc":"Simple_Uppercase_Mapping","tc":"Titlecase_Mapping","uc":"Uppercase_Mapping","cjkIICore":"kIICore","cjkIRG_GSource":"kIRG_GSource","cjkIRG_HSource":"kIRG_HSource","cjkIRG_JSource":"kIRG_JSource","cjkIRG_KPSource":"kIRG_KPSource","cjkIRG_KSource":"kIRG_KSource","cjkIRG_MSource":"kIRG_MSource","cjkIRG_SSource":"kIRG_SSource","cjkIRG_TSource":"kIRG_TSource","cjkIRG_UKSource":"kIRG_UKSource","cjkIRG_USource":"kIRG_USource","cjkIRG_VSource":"kIRG_VSource","cjkRSUnicode":"kRSUnicode","Unicode_Radical_Stroke":"kRSUnicode","URS":"kRSUnicode","isc":"ISO_Comment","JSN":"Jamo_Short_Name","na":"Name","na1":"Unicode_1_Name","Name_Alias":"Name_Alias","scx":"Script_Extensions","age":"Age","blk":"Block","sc":"Script","bc":"Bidi_Class","bpt":"Bidi_Paired_Bracket_Type","ccc":"Canonical_Combining_Class","dt":"Decomposition_Type","ea":"East_Asian_Width","gc":"General_Category","GCB":"Grapheme_Cluster_Break","hst":"Hangul_Syllable_Type","InCB":"Indic_Conjunct_Break","InPC":"Indic_Positional_Category","InSC":"Indic_Syllabic_Category","jg":"Joining_Group","jt":"Joining_Type","lb":"Line_Break","NFC_QC":"NFC_Quick_Check","NFD_QC":"NFD_Quick_Check","NFKC_QC":"NFKC_Quick_Check","NFKD_QC":"NFKD_Quick_Check","nt":"Numeric_Type","SB":"Sentence_Break","vo":"Vertical_Orientation","WB":"Word_Break","AHex":"ASCII_Hex_Digit","Alpha":"Alphabetic","Bidi_C":"Bidi_Control","Bidi_M":"Bidi_Mirrored","Cased":"Cased","CE":"Composition_Exclusion","CI":"Case_Ignorable","Comp_Ex":"Full_Composition_Exclusion","CWCF":"Changes_When_Casefolded","CWCM":"Changes_When_Casemapped","CWKCF":"Changes_When_NFKC_Casefolded","CWL":"Changes_When_Lowercased","CWT":"Changes_When_Titlecased","CWU":"Changes_When_Uppercased","Dash":"Dash","Dep":"Deprecated","DI":"Default_Ignorable_Code_Point","Dia":"Diacritic","EBase":"Emoji_Modifier_Base","EComp":"Emoji_Component","EMod":"Emoji_Modifier","Emoji":"Emoji","EPres":"Emoji_Presentation","Ext":"Extender","ExtPict":"Extended_Pictographic","Gr_Base":"Grapheme_Base","Gr_Ext":"Grapheme_Extend","Gr_Link":"Grapheme_Link","Hex":"Hex_Digit","Hyphen":"Hyphen","ID_Compat_Math_Continue":"ID_Compat_Math_Continue","ID_Compat_Math_Start":"ID_Compat_Math_Start","IDC":"ID_Continue","Ideo":"Ideographic","IDS":"ID_Start","IDSB":"IDS_Binary_Operator","IDST":"IDS_Trinary_Operator","IDSU":"IDS_Unary_Operator","Join_C":"Join_Control","LOE":"Logical_Order_Exception","Lower":"Lowercase","Math":"Math","NChar":"Noncharacter_Code_Point","OAlpha":"Other_Alphabetic","ODI":"Other_Default_Ignorable_Code_Point","OGr_Ext":"Other_Grapheme_Extend","OIDC":"Other_ID_Continue","OIDS":"Other_ID_Start","OLower":"Other_Lowercase","OMath":"Other_Math","OUpper":"Other_Uppercase","Pat_Syn":"Pattern_Syntax","Pat_WS":"Pattern_White_Space","PCM":"Prepended_Concatenation_Mark","QMark":"Quotation_Mark","Radical":"Radical","RI":"Regional_Indicator","SD":"Soft_Dotted","STerm":"Sentence_Terminal","Term":"Terminal_Punctuation","UIdeo":"Unified_Ideograph","Upper":"Uppercase","VS":"Variation_Selector","WSpace":"White_Space","space":"White_Space","XIDC":"XID_Continue","XIDS":"XID_Start","XO_NFC":"Expands_On_NFC","XO_NFD":"Expands_On_NFD","XO_NFKC":"Expands_On_NFKC","XO_NFKD":"Expands_On_NFKD"}
|
||||
|
|
@ -1,245 +0,0 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
// This script generates a JSON file that is used by the CLI to handle unicode property escapes.
|
||||
|
||||
const CATEGORY_OUTPUT_PATH = './cli/generate/src/prepare_grammar/unicode-categories.json'
|
||||
const PROPERTY_OUTPUT_PATH = './cli/generate/src/prepare_grammar/unicode-properties.json'
|
||||
const CATEGORY_ALIAS_OUTPUT_PATH = './cli/generate/src/prepare_grammar/unicode-category-aliases.json'
|
||||
const PROPERTY_ALIAS_OUTPUT_PATH = './cli/generate/src/prepare_grammar/unicode-property-aliases.json'
|
||||
|
||||
const UNICODE_STANDARD_VERSION = '15.1.0';
|
||||
const CATEGORY_URL = `https://unicode.org/Public/${UNICODE_STANDARD_VERSION}/ucd/UnicodeData.txt`
|
||||
const PROPERTY_URL = `https://unicode.org/Public/${UNICODE_STANDARD_VERSION}/ucd/PropList.txt`
|
||||
const DERIVED_PROPERTY_URL = `https://unicode.org/Public/${UNICODE_STANDARD_VERSION}/ucd/DerivedCoreProperties.txt`
|
||||
const CATEGORY_ALIAS_URL = `https://unicode.org/Public/${UNICODE_STANDARD_VERSION}/ucd/PropertyValueAliases.txt`
|
||||
const PROPERTY_ALIAS_URL = `https://unicode.org/Public/${UNICODE_STANDARD_VERSION}/ucd/PropertyAliases.txt`
|
||||
const EMOJI_DATA_URL = `https://unicode.org/Public/${UNICODE_STANDARD_VERSION}/ucd/emoji/emoji-data.txt`
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { spawnSync } = require('child_process');
|
||||
|
||||
// Download the unicode data files, caching them inside the 'target' directory.
|
||||
const categoryData = cachedDownload(CATEGORY_URL);
|
||||
const propertyData = cachedDownload(PROPERTY_URL);
|
||||
const derivedPropertyData = cachedDownload(DERIVED_PROPERTY_URL);
|
||||
const categoryAliasData = cachedDownload(CATEGORY_ALIAS_URL);
|
||||
const propertyAliasData = cachedDownload(PROPERTY_ALIAS_URL);
|
||||
const emojiData = cachedDownload(EMOJI_DATA_URL);
|
||||
function cachedDownload(url) {
|
||||
console.log(`Downloading ${url}`);
|
||||
let downloadPath = path.join('.', 'target', path.basename(url) + `.${UNICODE_STANDARD_VERSION}`)
|
||||
if (fs.existsSync(downloadPath)) {
|
||||
return fs.readFileSync(downloadPath, 'utf8');
|
||||
} else {
|
||||
const data = spawnSync('curl', [url], { encoding: 'utf8' }).stdout;
|
||||
fs.writeFileSync(downloadPath, data, 'utf8');
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
const categories = {};
|
||||
const properties = {};
|
||||
const categoryAliases = {};
|
||||
const propertyAliases = {}
|
||||
let data, row, lineStart, lineEnd;
|
||||
|
||||
// Parse the properties
|
||||
data = propertyData + derivedPropertyData + emojiData;
|
||||
row = 0;
|
||||
lineStart = 0;
|
||||
lineEnd = -1;
|
||||
const CODE_POINT = /[0-9A-Fa-f]/
|
||||
while (lineStart < data.length) {
|
||||
row++;
|
||||
lineStart = lineEnd + 1;
|
||||
lineEnd = data.indexOf('\n', lineStart);
|
||||
if (lineEnd === -1) break;
|
||||
|
||||
// Skip over blank and comment lines
|
||||
if (!CODE_POINT.test(data[lineStart])) continue;
|
||||
|
||||
// Parse the first two semicolon fields:
|
||||
// * code point or code point range
|
||||
// * property
|
||||
const codePointEnd = data.indexOf(';', lineStart);
|
||||
const propertyStart = codePointEnd + 1;
|
||||
const propertyEnd = data.indexOf('#', propertyStart);
|
||||
|
||||
if (
|
||||
codePointEnd === -1 ||
|
||||
propertyEnd === -1
|
||||
) {
|
||||
throw new Error(`Unexpected format on line ${row}`);
|
||||
}
|
||||
|
||||
// Process ranges (separated by '..)
|
||||
const codePoints = data.slice(lineStart, codePointEnd).trim()
|
||||
.split('..')
|
||||
.map(p => parseInt(p, 16));
|
||||
if (codePoints.length === 1) {
|
||||
codePoints.push(codePoints[0]);
|
||||
}
|
||||
|
||||
const property = data.slice(propertyStart, propertyEnd).trim();
|
||||
|
||||
console.log("Property:", codePoints, property);
|
||||
|
||||
|
||||
for (let c = codePoints[0]; c <= codePoints[1]; c++) {
|
||||
if (!properties[property]) {
|
||||
properties[property] = [];
|
||||
}
|
||||
properties[property].push(c);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the categories.
|
||||
// Each line represents a code point.
|
||||
data = categoryData;
|
||||
row = 0;
|
||||
lineStart = 0;
|
||||
lineEnd = -1;
|
||||
while (lineStart < data.length) {
|
||||
row++;
|
||||
lineStart = lineEnd + 1;
|
||||
lineEnd = data.indexOf('\n', lineStart);
|
||||
if (lineEnd === -1) break;
|
||||
|
||||
// Parse the first three semicolon-separated fields:
|
||||
// * code point (hexadecimal)
|
||||
// * name
|
||||
// * category
|
||||
const codePointEnd = data.indexOf(';', lineStart);
|
||||
const nameStart = codePointEnd + 1;
|
||||
const nameEnd = data.indexOf(';', nameStart);
|
||||
const categoryStart = nameEnd + 1;
|
||||
const categoryEnd = data.indexOf(';', categoryStart)
|
||||
if (
|
||||
nameStart === 0 ||
|
||||
categoryStart == 0 ||
|
||||
categoryEnd === -1
|
||||
) {
|
||||
throw new Error(`Unexpected format on line ${row}`);
|
||||
}
|
||||
|
||||
const codePoint = parseInt(data.slice(lineStart, codePointEnd), 16);
|
||||
const name = data.slice(nameStart, nameEnd);
|
||||
const category = data.slice(categoryStart, categoryEnd);
|
||||
|
||||
console.log("Category:", codePoint, category, name);
|
||||
|
||||
// Group the code points by their category.
|
||||
if (!categories[category]) {
|
||||
categories[category] = [];
|
||||
}
|
||||
categories[category].push(codePoint);
|
||||
}
|
||||
|
||||
// Parse the category aliases
|
||||
data = categoryAliasData;
|
||||
row = 0;
|
||||
lineStart = 0;
|
||||
lineEnd = -1;
|
||||
const IGNORE = /[#\s]/
|
||||
while (lineStart < data.length) {
|
||||
row++;
|
||||
lineStart = lineEnd + 1;
|
||||
lineEnd = data.indexOf('\n', lineStart);
|
||||
if (lineEnd === -1) break;
|
||||
|
||||
// Skip over blank and comment lines
|
||||
if (IGNORE.test(data[lineStart])) continue;
|
||||
|
||||
// Parse the first three semicolon-separated fields:
|
||||
// * property value type
|
||||
// * short name
|
||||
// * long name
|
||||
// Other aliases may be listed in additional fields
|
||||
const propertyValueTypeEnd = data.indexOf(';', lineStart);
|
||||
const shortNameStart = propertyValueTypeEnd + 1;
|
||||
const shortNameEnd = data.indexOf(';', shortNameStart);
|
||||
const longNameStart = shortNameEnd + 1;
|
||||
if (
|
||||
shortNameStart === 0 ||
|
||||
longNameStart === 0
|
||||
) {
|
||||
throw new Error(`Unexpected format on line ${row}`);
|
||||
}
|
||||
|
||||
const propertyValueType = data.slice(lineStart, propertyValueTypeEnd).trim();
|
||||
const shortName = data.slice(shortNameStart, shortNameEnd).trim();
|
||||
|
||||
// Filter for General_Category lines
|
||||
if (propertyValueType !== 'gc') continue;
|
||||
|
||||
let aliasStart = longNameStart;
|
||||
let lineDone = false;
|
||||
do {
|
||||
let aliasEnd = data.indexOf(';', aliasStart);
|
||||
if (aliasEnd === -1 || aliasEnd > lineEnd) {
|
||||
aliasEnd = data.indexOf('#', aliasStart);
|
||||
if (aliasEnd === -1 || aliasEnd > lineEnd) {
|
||||
aliasEnd = lineEnd;
|
||||
}
|
||||
lineDone = true;
|
||||
}
|
||||
const alias = data.slice(aliasStart, aliasEnd).trim();
|
||||
console.log("Category alias:", alias, shortName);
|
||||
categoryAliases[alias] = shortName;
|
||||
aliasStart = aliasEnd + 1;
|
||||
} while (!lineDone);
|
||||
}
|
||||
|
||||
// Parse the property aliases
|
||||
data = propertyAliasData;
|
||||
row = 0;
|
||||
lineStart = 0;
|
||||
lineEnd = -1;
|
||||
while (lineStart < data.length) {
|
||||
row++;
|
||||
lineStart = lineEnd + 1;
|
||||
lineEnd = data.indexOf('\n', lineStart);
|
||||
if (lineEnd === -1) break;
|
||||
|
||||
// Skip over blank and comment lines
|
||||
if (IGNORE.test(data[lineStart])) continue;
|
||||
|
||||
// Parse the first two semicolon fields:
|
||||
// * short name
|
||||
// * long name
|
||||
const shortNameEnd = data.indexOf(';', lineStart);
|
||||
const longNameStart = shortNameEnd + 1;
|
||||
|
||||
if (longNameStart == 0) {
|
||||
throw new Error(`Unexpected format on line ${row}`);
|
||||
}
|
||||
|
||||
let alias = data.slice(lineStart, shortNameEnd).trim();
|
||||
let longName = null;
|
||||
let nameStart = longNameStart;
|
||||
let lineDone = false;
|
||||
do {
|
||||
let nameEnd = data.indexOf(';', nameStart);
|
||||
if (nameEnd === -1 || nameEnd > lineEnd) {
|
||||
nameEnd = data.indexOf('#', nameStart);
|
||||
if (nameEnd === -1 || nameEnd > lineEnd) {
|
||||
nameEnd = lineEnd;
|
||||
}
|
||||
lineDone = true;
|
||||
}
|
||||
if (longName == null) {
|
||||
longName = data.slice(nameStart, nameEnd).trim();
|
||||
} else {
|
||||
alias = data.slice(nameStart, nameEnd).trim();
|
||||
}
|
||||
console.log("Property alias:", alias, longName);
|
||||
propertyAliases[alias] = longName;
|
||||
nameStart = nameEnd + 1;
|
||||
} while (!lineDone);
|
||||
}
|
||||
|
||||
fs.writeFileSync(CATEGORY_OUTPUT_PATH, JSON.stringify(categories), 'utf8');
|
||||
fs.writeFileSync(PROPERTY_OUTPUT_PATH, JSON.stringify(properties), 'utf8');
|
||||
fs.writeFileSync(CATEGORY_ALIAS_OUTPUT_PATH, JSON.stringify(categoryAliases), 'utf8');
|
||||
fs.writeFileSync(PROPERTY_ALIAS_OUTPUT_PATH, JSON.stringify(propertyAliases), 'utf8');
|
||||
Loading…
Add table
Add a link
Reference in a new issue