Aller au contenu

Utilisateur:Yopyop456/Brouillon/bct

Une page de Wikipédia, l'encyclopédie libre.
// wojmepwam27+

window.dict = {}
dict._path = '/tmp2/cnchar2/scripts_and_data/'

function get(x, val){
  // let idx = this.key2[x] || this.key.indexOf(x)
  let idx = this.key.indexOf(x)

  if(idx < 0 && x?.includes('儿')){
    return get.call(this, x.replace('儿', ''), val)
  }

  if(val) {
    if(typeof val == 'object') Object.assign(this.val[idx], val)
    else this.val[idx] = val
  }
  else return this.val[idx]
}

function get_si_tr(si){
  let tr = ''
  entry = dict._cedict[si]
  if(entry){
    tr = entry[0]
  }
  else {
    si.split('').forEach(a=>tr+=dict._cedict[a]?.at(0)||'')
  }
  return tr
}

function get_cc_def(si, lng = 'e'){
  let def, i, out
  def = dict['_c'+lng+'dict'][si]

  if(!def) return ''
  else def = def[3].split('/')

  def = def.slice(1, -1)
  out = []
  for(i=0; i<def.length; i++){
    if(def[i].includes(';')) {
      out.push(def[i])
      break
    }

    if(def[i].match(/[A-Z\[]/)) continue
    out.push(def[i])
  }

  if(!out.length) out.push(def[0])

  return out.slice(0, 3).join('; ')
}

function get_zdic_ci(si){
  let out, more, i
  more = dict._zdic[si[0]].more
  more = more.replaceAll(/\s+\n/g, '\n').split('\n')
  out = []
  i = more.indexOf(si)
  if(i<0) return

  out.push(more[i], more[i+1])
  for(i+=2; i<more.length; i++){
    if(!(more[i][0] == '[' || more[i][0] == '〖') && more[i].match(/[a-z]/)) {
      out.pop()
      break
    }
    else if(more[i] == ''){
      break
    }
    out.push(more[i])
  }
  
  return out
}

function get_decomp(si){
  si = si.split('')
  let i, tmp, out = {decomp: [], phon: [], up:[]}
  si.forEach(char=>{
    tmp = dict._decomp[char].decomposition
    if(dict._decomp[char].etymology?.phonetic) tmp = tmp.replace(dict._decomp[char].etymology.phonetic, dict._decomp[char].etymology.phonetic + 'p')
    if(dict._decomp[char].radical) tmp = tmp.replace(dict._decomp[char].radical, dict._decomp[char].radical + 'r')
    if(char == dict._decomp[char].radical) tmp = 'r' + tmp
    out.decomp.push(tmp)
  })

  out.decomp.forEach(char=>{
    out.phon.push([])
    if(char.includes('p')){
      tmp = char.match(/(.)p/)[1]
      for(i in dict._decomp){
        if(dict._decomp[i].etymology?.phonetic == tmp){
          out.phon.at(-1).push(dict._decomp[i].character)
        }
      }
    }
  })

  si.forEach(char=>{
    out.up.push([])
    for(i in dict._decomp){
      if(dict._decomp[i].decomposition.includes(char)){
        out.up.at(-1).push(dict._decomp[i].character)
      }
    }

  })

  return out
}

function get_level(si){
  let c, entry, le0, le1, n
  le0 = le1 = n = 0
  for(c of si){
    entry = dict.get(c)
    if(!entry) continue
    c = entry.le.match(/char-(\d)/)?.at(1)
    if(le1 < c){
      le0 = le1
      le1 = c
      n = 1
    }
    else n++
  }

  return +le1
}

async function all_load(){
  await load_ccdict('cfdict/cfdict.u8')
  await load_ccdict('cedict/cedict_1_0_ts_utf-8_mdbg.txt')
  await load_hsk3off()
  await load_hsk3elk()
  await load_hsk3yar()
  await check_hsk3off_elk()
  // await check_hsk3off_elk_zi()

  await load_hsk2()
  await load_hsk2uni()
  //await loadhsk3def()

  await load_gtrans()
  await load_xiandai()
  await load_zdic()
  await load_decomp()

  await load_hydcd()
  await load_coloc()
  await load_bct()
  await load_anki()
  await load_wiktio()

  // await merge_hsk()

  await new_merge_hsk()

  // await load_wfreq()

  // await merge_hsk2()

  

  // return await mergeHSK()

}

async function load_ccdict(s){
  let txt, out
  out = {}
  txt = await fetch(dict._path + s).then(x=>x.text())
  txt = txt.replaceAll('\r', '').split('\n')
  txt.forEach((a, b)=>{
    let w, x, y, z
    x = a.indexOf(' ')
    y = a.indexOf(' ', x+1)
    z = a.indexOf(']', y+1)+1

    w = a.slice(x+1, y)
    if(!out[w]) out[w] = [a.slice(0, x), a.slice(x+1, y), a.slice(y+1, z), a.slice(z+1)]
    else {
      out[w][2] += ', ' + a.slice(y+1, z)
      out[w][3] += a.slice(z+2)
    }
  })

  dict['_'+s.slice(0, 6)] = out
}

async function load_hsk3off(){
  let txt

  txt = await fetch(dict._path + 'shawkynasr_HSK-official-Query-System/词汇 2022.csv').then(x=>x.text())
  txt = txt.trim().replaceAll('\r','').split('\n')
  txt.shift()
  txt.forEach((a, b)=>{
    txt[b]=a.split(',')
    txt[b].key = txt[b][2]
    txt[b][2] = txt[b][2]
      .replace(/[\|\|\∣].+/, '')
      .replace(/[\(\(].+[\)\)]/, '')
      .replaceAll(/[¹²∙…\|\(\)12\|\(\)\∣、]/g, '')
      // .replaceAll(/(.+?)[形名动介副量数连助代叹]+/g, '$1')

    txt[b][3] = txt[b][3].replaceAll('∥', '')
  })

  dict._hsk3off = txt

  txt = await fetch(dict._path + 'shawkynasr_HSK-official-Query-System/汉字.csv').then(x=>x.text())
  txt = txt.trim().replaceAll('\r','').split('\n')
  txt.shift()
  txt.forEach((a, b)=>txt[b]=a.split(','))

  dict._hsk3off_zi = txt
}

async function load_hsk3elk(){
  let txt, line, n, m, out, ww, char1, char2

  dict._hsk3elk = []
  txt = await fetch(dict._path + 'elkmovie_hsk30/wordlist.txt').then(x=>x.text())
  txt = txt.split('\n')
  m = 0

  for(line of txt){
    n = line.match(/^\d+/)
    if(n == 1) m++
    else if(!n) continue

    char2 = line.replace(n + ' ', '')
    char1 = char2
    .replace(/[\|\|\∣].+/, '')
    .replace(/[\(\(].+[\)\)]/, '')
    .replaceAll(/[¹²∙…\|\(\)12\|\(\)\∣、]/g, '')
    // .replaceAll(/(.+?)[形名动介副量数连助代叹]+/g, '$1')

    dict._hsk3elk.push({
      simplified: char1,
      simplified2: char2,
      level : 'hsk-' + m
    })
  }

  ww = ''
  out = []
  txt = await fetch(dict._path + 'elkmovie_hsk30/charlist.txt').then(x=>x.text())
  txt = txt.split('\n')
  m = 0
  for(line of txt){
    n = line.match(/^\d+/)
    if(n == 1) m++
    else if(!n) continue
    if(m>7) {
      ww += line.slice(-1)
      continue
    }

    out.push({
      simplified: line.replace(n + '\t', ''),
      pinyin : '',
      level: m,
    })
  }

  for(line of out){
    if(ww.includes(line.simplified)){
      line.level += 'w'
    }
  }

  dict._hsk3elk_zi = out
}

async function check_hsk3off_elk(){
  dict._hsk3off.sort((a,b)=>{
    let lvl = '一二三四五六高'
    if(lvl.indexOf(a[1][0]) > lvl.indexOf(b[1][0])){
      return 1
    }
    else if(lvl.indexOf(a[1][0]) < lvl.indexOf(b[1][0])){
      return -1
    }
    else return 0
  })

  dict._hsk3off_zi.sort((a,b)=>{
    let lvl = '一二三四五六高'
    if(lvl.indexOf(a[1][0]) > lvl.indexOf(b[1][0])){
      return 1
    }
    else if(lvl.indexOf(a[1][0]) < lvl.indexOf(b[1][0])){
      return -1
    }
    else return 0
  })

  let key1 = []
  let key2 = []
  let n = dict._hsk3elk.length
  for(let i=0; i<n; i++){
    key1.push(dict._hsk3elk[i].simplified)
    key2.push(dict._hsk3off[i][2])
  }

  for(let i=0; i<n; i++){
    let j = key1.indexOf(key2[i])
    if(j >= 0){
      key1[j] = ''
      key2[i] = ''
      dict._hsk3off[i].push(j) // index 5
    }
    else{
      console.warn(key2[i])
    }
  }

  dict._hsk3off.sort((a,b)=>{
    if(a[5] > b[5]){
      return 1
    }
    if(a[5] < b[5]){
      return -1
    }
    else return 0
  })

  let tmp = [...dict._hsk3elk]
  for(let i=0; i<dict._hsk3off.length; i++){
    if(dict._hsk3off[i][2].includes('儿')){
      if(tmp[i].simplified.includes('儿')){
        if(dict._hsk3off[i][3].includes('ér')){
  
        }
        else {
          tmp[i].simplified = tmp[i].simplified.replace('儿', '')
        }
      }
      else{
        console.error(tmp[i])
      }
    }
  }

}


async function check_hsk3off_elk_zi(){
  let i, j, keys = ['', '', '', '', '', '', '', ''], keys2 = ['', '', '', '', '', '', '', '']
  for(i of dict._hsk3elk_zi) {
    keys[i.level] += i.simplified
  }
  for(i of dict.val) {
    if(!i.le.includes('new-')) continue
    keys2[i.le[4]] += i.si
  }

  for(i in keys2) {
    keys2[i] = [...(new Set(keys2[i].split('')))].join('')

    let re = new RegExp('['+keys2[i]+']', 'g')
    for(j=+i+1; j<keys2.length; j++){
      keys2[j] = keys2[j].replaceAll(re, '')
    }
  }

  for(i in keys2){
    for(j of keys2[i]){
      if(!keys[i].includes(j)){
        console.log(i, j)
      }
    }
  }
  
  // scrape = keys
  // scrape2 = keys2
}

async function load_hsk2(){
  let out = []

  let txt = await fetch(dict._path + 'glxxyz_hskhsk.com/hskhsk.txt').then(x=>x.text())
  txt = txt.split('\n')
  let n = 0

  for(i=0; i<txt.length; i++){
    if(txt[i].includes('--HSK')) {
      ++n
      continue
    }
    txt[i] = txt[i].split('\t')
    let char2 = txt[i][0]
    let char1 = char2
      .replace(/[\|\|\∣].+/, '')
      .replace(/[\(\(].+[\)\)]/, '')
      .replaceAll(/[¹²∙…\|\(\)12\|\(\)\∣、]/g, '')
    
    out.push({
      level: 'hsk-' + n,
      simplified: char1,
      simplified2: char2,
      definition_en: txt[i][4],
      pinyin: txt[i][3],
    })
  }

  dict._hsk2 = out
}

async function load_hsk3yar(){
  let txt = await fetch(dict._path + 'cultureyard/cultureyard.json').then(x=>x.json())

  for(let i in txt){
    txt[i].simplified2 = txt[i].simplified
    txt[i].simplified =  txt[i].simplified
      .replace(/[\|\|\∣].+/, '')
      .replace(/[\(\(].+[\)\)]/, '')
      .replaceAll(/[¹²∙…\|\(\)12\|\(\)\∣、]/g, '')
      // .replaceAll(/(.+?)[形名动介副量数连助代叹]+/g, '$1')
      // .replace(/(.+)儿$/, '$1')

  }

  dict._hsk3yar = txt
}

async function load_hsk2uni(){
  let txt = await fetch(dict._path + 'unige/unige.json').then(x=>x.json())
  dict._hsk2uni = txt.value
  for(let i in dict._hsk2uni){
    dict._hsk2uni[i].simplified2 = dict._hsk2uni[i].simplified
    if(dict._hsk2uni[i].simplified.includes('…')){
      dict._hsk2uni[i].simplified = dict._hsk2uni[i].simplified.slice(0, 2)
      continue
    }

    dict._hsk2uni[i].simplified = dict._hsk2uni[i].simplified
      .replace(/[\|\|\∣].+/, '')
      .replace(/[\(\(].+[\)\)]/, '')
      .replaceAll(/[¹²∙…\|\(\)12\|\(\)\∣、]/g, '')
      // .replaceAll(/(.+?)[形名动介副量数连助代叹]+/g, '$1')
      // .replace(/(.+)儿$/, '$1')
  }
}

async function load_gtrans(){
  let out, txt = await fetch(dict._path + 'gtranslate/gtranslate.txt').then(x=>x.text())
  out = {}
  txt = txt.replaceAll('\r', '').split('\n')
  for(let i=0; i<txt.length; i++){
    txt[i] = txt[i].split('\t')
    out[txt[i][0]] = txt[i].slice(1)
  }
  dict._gtrans = out
}

async function load_xiandai(){
  let i, out, txt
  txt = await fetch(dict._path + 'CNMAN_XDHYCD7th/XDHYCD7th.txt').then(x=>x.text())
  txt = txt.replaceAll('\r', '').split('\n')
  out = {}
  for(i=0; i<txt.length; i++){
    txt[i] = txt[i].slice(1).split('】')
    if(out[txt[i][0]]){
      out[txt[i][0]] += '\n+ ' + txt[i][1]
    }
    else out[txt[i][0]] = '+ ' + txt[i][1]
  }
  dict._xiandai = out
}

async function load_zdic(){
  let i, out, txt, ci

  txt = await fetch(dict._path + 'pwxcoo_chinese-xinhua/data/ci.json').then(x=>x.json())
  out = {}
  for(i=0; i<txt.length; i++){
    ci = txt[i].ci.replace(/\(.+\)/, '')
    if(out[ci]){
      // console.warn(ci)
      out[ci] += '\n+ ' + txt[i].explanation
    }
    else out[ci] = '+ ' + txt[i].explanation
  }
  dict._zdic2 = out

  txt = await fetch(dict._path + 'pwxcoo_chinese-xinhua/data/word.json').then(x=>x.json())
  out = {}
  for(i=0; i<txt.length; i++){
    if(out[txt[i].word]){
      // console.warn(txt[i].word)
      out[txt[i].word].explanation += '\n--\n' + txt[i].pinyin + '\n' + txt[i].explanation
      out[txt[i].word].more += '\n--\n' + txt[i].pinyin + '\n' + txt[i].explanation
    }
    else out[txt[i].word] = txt[i]
  }
  dict._zdic = out
}

async function load_decomp(){
  let i, out, txt = await fetch(dict._path + 'skishore_makemeahanzi/dictionary.txt').then(x=>x.text())
  txt = '[' + txt.trim().replaceAll('\n', ',\n') + ']'
  txt = JSON.parse(txt)
  out = {}
  for(i=0; i<txt.length; i++) {
    if(out[txt[i].character]) console.error(out[txt[i].character])
    out[txt[i].character] = txt[i]
  }
  dict._decomp = out
}

async function load_hydcd(){
  let i, out, txt
  txt = await fetch(dict._path + 'lxs602_Chinese-Mandarin-Dictionaries/Hànyǔ Dà Cídiǎn - dāncí biǎo - word list.tab').then(x=>x.text())
  txt = txt.replaceAll(/\t.+$/gm, '').replaceAll(/^.+\|/gm, '').split('\n')
  dict._hydcd = txt
}

async function load_coloc(){
  let i, out, txt
  txt = await fetch(dict._path + 'final/coloc-final.json').then(x=>x.json())
  dict._coloc = txt
}

async function load_bct(){
  if(!dict._bct){
    let bct = await fetch(dict._path + 'bct/BCT.txt').then(x=>x.text())
    bct = bct.replaceAll('\r','').replaceAll(/"([^"]+?)\n([^"]+?)"/g, '$1 $2').replaceAll(/\t+/g, '\t').replaceAll('🎧', '出')
      .trim().split('\n')
    
    let out = []
    
    for(let line of bct){
      line = line.split('\t')
      if(line[0].match(/\d/)) {
        line[2] = line[2]
          .replace(/\(.+\)/, '').replace(/<.+>/, '').replace(/[a-zA-Z]+/, '').replace(/[\/…].+/, '').replaceAll(' ','').replace('儿', '')
        out.push(line)
      }
    }

    bct = await fetch(dict._path + 'gtranslate/gtranslate_bct.txt').then(x=>x.text())
    bct = bct.trim().replaceAll('\r', '').split('\n')

    for(let line of bct){
      line = line.split('\t')
      out[+line[0]][1] = line[2]
      // debugger
    }

    dict._bct = out
  }

  let out2 = []
  let dict_entry, line

  if(dict.val){
    for(let i in dict._bct){
      line = dict._bct[i]
      dict_entry = dict.get(line[2])
      if(!dict_entry || dict_entry.lev.startsWith('x-BCT')) out2.push(i)
    }
  }

  return out2
}

async function load_anki(z=1){
  let i, j, out, out2, txt, entry

  if(!dict._anki1){
    txt = await fetch(dict._path + 'anki/Taiwan_TBCL_wordlist_Traditional.csv').then(x=>x.text())
    txt = txt.split('\r\n')
    txt.shift()
    dict._anki1 = txt

    txt = await fetch(dict._path + 'anki/Taiwan_TOCFL_2023_wordlist_with_audio_Traditional.csv').then(x=>x.text())
    txt = txt.split('\r\n')
    txt.shift()
    dict._anki2 = txt

    return
  }

  out = {tra: [], sim: [], all: []}
  out2 = {tra: [], sim: [], all: []}
  for(i of dict._anki1){
    i = i.split('�')
    j = i[2].replace(/\(.+\)/, '').split('/')
    entry = null
    entry ??= dict.get(j[0])
    entry ??= dict.get(j[1])
    entry ??= dict.get(j[2])
    if(entry && !entry.lev.startsWith('x-TW1')) continue
    if(out.sim.includes(j[0])) continue
    out.tra.push(i[1])
    out.sim.push(j[0])
    out.all.push(i)
  }

  for(i of dict._anki2){
    i = i.split('�')
    j = i[2].replace(/\(.+\)/, '').split('/')
    entry = null
    entry ??= dict.get(j[0])
    entry ??= dict.get(j[1])
    entry ??= dict.get(j[2])
    if(entry && !entry.lev.startsWith('x-TW2')) continue
    if(out.sim.includes(j[0])) continue
    out2.tra.push(i[1])
    out2.sim.push(j[0])
    out2.all.push(i)
  }

  if(z == 1) return out
  else return out2
}

async function load_wfreq(){
  let i, j, out, txt, entry
  txt = await fetch(dict._path + 'anki/loach_word_order.json').then(x=>x.json())
  out = []
  for(i of txt){
    if(!dict.get(i) && i.length > 1 && i.charCodeAt(0) < 40000) out.push(i)
  }

  dict._wfreq = out
}

async function load_wiktio(x, online=1){
  let i, j, out, txt, entry, char, char2
  if(!x){
    txt = await fetch(dict._path + 'gtranslate/wiktionary.json').then(x=>x.json())

    dict._wiktio = txt
    return
  }

  if(typeof scrape != 'object') scrape = {}
  char = x

  if(char in dict._wiktio){
    char2 = dict._wiktio[char]
  }
  else {
    if(!online) return
    char2 = await fetch('https://en.wiktionary.org/api/rest_v1/page/definition/'+char).then(x=>x.text())
    scrape[char] = char2
  }

  if(!char2.includes('No definition found') && !char2.includes('html')){
    char2 = JSON.parse(char2)
    if(char2.zh){
      char2 = char2.zh[0].definitions.at(0).definition + (char2.zh[0].definitions.at(1) ? '; ' + char2.zh[0].definitions.at(1).definition : '')
      entry = '' + char2.replaceAll(/<.+?>/g, '')
    }
  }

  if(!entry) {
    if(!(char in dict._wiktio)){
      char2 = await fetch('https://en.wiktionary.org/api/rest_v1/page/mobile-html/'+char).then(x=>x.text())
      scrape[char] = char2
    }
    char2 = char2.match(/\(“.+?”\)/g)
    if(char2) entry = '' + char2[0].replaceAll(/<.+?>/g, '').slice(2,-2)
    // else console.log(char)
  }

  return entry
}


/*
11437
out2 = [...(new Set(out))]
11441
for(i=0; i<out2.length; i++) if(!dict.key.includes(out2[i])) console.warn(out2[i])
*/

async function merge_hsk2(){
  let char, char2, char3, i, entry, tmp

  // GET ID SIMPLIFIED LEVEL ANKI
  tmp = dict._anki[1]
  for(i=0; i<tmp.length; i++){
    char = tmp[i].replace(/\/.+/, '')
    entry = dict.get(char)
    if(entry) continue
    char2 = get_cc_def(char)
    char3 = get_cc_def(char, 'f')
    dict.key.push(char)
    dict.val.push({
      id: dict.val.length,
      si: char,
      le: 'x-TW',
      tr: get_si_tr(char),
      pi: '',
      po: '',
      en: char2 ? '<ce>' + char2 : '',
      fr: char3 ? '<cf>' + char3 : '',
    })
  }

  // GET ID SIMPLIFIED LEVEL ANKI
  tmp = dict._wfreq
  for(i=0; i<tmp.length; i++){
    char = tmp[i]
    entry = dict.get(char)
    if(entry) continue
    char2 = get_cc_def(char)
    char3 = get_cc_def(char, 'f')
    dict.key.push(char)
    dict.val.push({
      id: dict.val.length,
      si: char,
      le: 'x-FQ',
      tr: get_si_tr(char),
      pi: '',
      po: '',
      en: char2 ? '<ce>' + char2 : '',
      fr: char3 ? '<cf>' + char3 : '',
    })
  }

  // ADD MISSING TRANSLATE EN

  scrape = []
  scrape2 = []
  tmp = dict.val
  for(i=0; i<tmp.length; i++){
    if(!tmp[i].en) {
      tmp[i].en = get_cc_def(tmp[i].si)
      tmp[i].en = tmp[i].en ? '<ce>' + tmp[i].en : ''
      if(tmp[i].en) console.warn(tmp[i].si)
    }
    if(!tmp[i].fr) {
      tmp[i].fr = get_cc_def(tmp[i].si, 'f')
      tmp[i].fr = tmp[i].fr ? '<cf>' + tmp[i].fr : ''
      if(tmp[i].fr) console.warn(tmp[i].si)
    }
    if(!tmp[i].en && dict._gtrans[tmp[i].si]) tmp[i].en = '<go>' + dict._gtrans[tmp[i].si][1] 
    if(!tmp[i].fr && dict._gtrans[tmp[i].si]) tmp[i].fr = '<go>' + dict._gtrans[tmp[i].si][0]

    if(!tmp[i].en) {
      scrape.push([tmp[i].si, tmp[i].fr])
    }
    if(!tmp[i].fr) {
      scrape2.push([tmp[i].si, tmp[i].en])
    }
  }

}

async function merge_hsk(){
  dict.key = []
  dict.key2 = {}
  dict.val = []
  dict.get = get.bind(dict)

  let char, char2, i, entry, tmp

  // GET ID SIMPLIFIED LEVEL HSK3

  for(i=0; i<tmp.length; i++){
    char = tmp[i].simplified

    char = char
    .replace(/[\|\|\∣].+/, '')
    .replace(/[\(\(].+[\)\)]/, '')
    .replaceAll(/[¹²∙…\|\(\)12\|\(\)\∣、]/g, '')
    // .replaceAll(/(.+?)[形名动介副量数连助代叹]+/g, '$1')
    // .replace(/(.+)儿$/, '$1')

    if(!dict.key.includes(char)){
      dict.key.push(char)
      dict.val.push({
        id: dict.val.length,
        si: char,
        le: 'new-' + tmp[i].level.slice(-1)
      })
    }
  }

  // GET ID SIMPLIFIED LEVEL HSK3 ZI

  tmp = dict._hsk3elk_zi
  for(i=0; i<tmp.length; i++){
    char = tmp[i].simplified
    entry = dict.get(char)
    if(!entry){
      dict.key.push(char)
      dict.val.push({
        id: dict.val.length,
        si: char,
        le: 'char-' + tmp[i].level
      })
    }
    else {
      entry.le += ' | char-' + tmp[i].level
    }
  }

  // GET ID SIMPLIFIED LEVEL HSK2

  for(i=0; i<dict._hsk2.length; i++){
    char = dict._hsk2[i].simplified

    char = char
    .replace(/[\|\|\∣].+/, '')
    .replace(/[\(\(].+[\)\)]/, '')
    .replaceAll(/[¹²∙…\|\(\)12\|\(\)\∣、]/g, '')
    // .replaceAll(/(.+?)[形名动介副量数连助代叹]+/g, '$1')
    
    if(char.includes('儿')){
      if(!dict.key.includes(char)){
        char = char.replace(/(.+)儿$/, '$1')
      }
      if(!dict.key.includes(char)){
        console.warn(char)
      }
      
    }

    if(!dict.key.includes(char)){
      dict.key.push(char)
      dict.val.push({
        id: dict.val.length,
        si: char,
        le: 'old-' + dict._hsk2[i].level.slice(-1)
      })
    }
    else {
      if(!dict.get(char).le.includes('old')){
        dict.get(char).le += ' | old-' + dict._hsk2[i].level.slice(-1)
      }
    }
  }

  // GET ID SIMPLIFIED LEVEL BCT
  tmp = await load_bct()
  for(i=0; i<tmp.length; i++){
    entry = dict._bct[+tmp[i]]
    char2 = get_cc_def(entry[2])
    dict.key.push(entry[2])
    dict.val.push({
      id: dict.val.length,
      si: entry[2],
      le: 'x-BCT',
      tr: '',
      pi: entry[1],
      po: '',
      en: char2 ? '<ce>' + char2 : '',
    })
  }

  // GET TRADITIONAL

  for(i=0; i<dict.val.length; i++){
    dict.val[i].tr = get_si_tr(dict.val[i].si)
  }

  // GET PINYIN AND PART OF SPEECH HSK3
  for(i=0; i<dict._hsk3off.length; i++){
    char = dict._hsk3off[i][2]

    char = char
    .replace(/[\|\|\∣].+/, '')
    .replace(/[\(\(].+[\)\)]/, '')
    .replaceAll(/[¹²∙…\|\(\)12\|\(\)\∣、]/g, '')
    // .replaceAll(/(.+?)[形名动介副量数连助代叹]+/g, '$1')
    // .replace(/(.+)儿$/, '$1')
    
    entry = dict.get(char)
    if(!entry) entry = dict.get(char.replace('儿', ''))

    if(!entry.pi){
      entry.pi = dict._hsk3off[i][3]
      entry.po = dict._hsk3off[i][4]
    }
    else {
      entry.pi += ' | ' + dict._hsk3off[i][3]
      entry.po += ' | ' + dict._hsk3off[i][4]
    }
  }

  // GET PINYIN ENGLISH HSK3 ZI
  tmp = dict._hsk3off_zi
  for(i=0; i<tmp.length; i++){
    entry = dict.get(tmp[i][2])
    if(!entry.le.startsWith('char')) continue

    if(!entry.pi){
      entry.pi = tmp[i][3]
      entry.po = ''
      entry.en = '<ce>' + get_cc_def(entry.si)
    }
    else {
      entry.pi += ' | ' + tmp[i][3]
    }

    if(entry.en.includes('undefined')) debugger
  }

  // CLEAN UP PINYIN

  tmp = dict.val
  for(i=0; i<tmp.length; i++){
    if(!tmp[i].pi) continue

    if(tmp[i].pi.match(/\∣[^ ]+/)){
      // baba|ba
      tmp[i].pi = tmp[i].pi.replace(/\∣([^ ]+)/, '($1)')
    }
    else if(!tmp[i].pi.includes(' | ') && tmp[i].pi.match(/[^é]r$/)){
      // ér hua
      tmp[i].pi = tmp[i].pi.slice(0, -1) + ' | ' + tmp[i].pi
    }
  }
  // GET ENGLISH HSK3

  tmp = dict._hsk3yar
  for(i=0; i<tmp.length; i++){
    char = tmp[i].simplified

    char = char
    .replace(/[\|\|\∣].+/, '')
    .replace(/[\(\(].+[\)\)]/, '')
    .replaceAll(/[¹²∙…\|\(\)12\|\(\)\∣、]/g, '')
    // .replaceAll(/(.+?)[形名动介副量数连助代叹]+/g, '$1')
    // .replace(/(.+)儿$/, '$1')
    
    entry = dict.get(char)
    if(!entry) entry = dict.get(char.replace('儿', ''))

    if(!entry.en){
      entry.en = '<yd>' + tmp[i].definition_en
    }
    else {
      entry.en += ' | ' + tmp[i].definition_en
    }
  }

  // GET ENGLISH PINYIN POS HSK2

  tmp = dict._hsk2
  for(i=0; i<tmp.length; i++){
    char = tmp[i].simplified

    char = char
    .replace(/[\|\|\∣].+/, '')
    .replace(/[\(\(].+[\)\)]/, '')
    .replaceAll(/[¹²∙…\|\(\)12\|\(\)\∣、]/g, '')
    // .replaceAll(/(.+?)[形名动介副量数连助代叹]+/g, '$1')
    // .replace(/(.+)儿$/, '$1')
    
    entry = dict.get(char)
    if(!entry) entry = dict.get(char.replace('儿', ''))

    if(entry.le.includes('new')) continue

    if(!entry.en){
      entry.pi = tmp[i].pinyin
      entry.po = ''
      entry.en = '<py>' + tmp[i].definition_en
    }
    else {
      entry.pi += ' | ' + tmp[i].pinyin
      entry.po = ''
      entry.en += ' | ' + tmp[i].definition_en
    }
  }

    // GET FRENCH HSK2

  tmp = dict._hsk2uni
  for(i=0; i<tmp.length; i++){
    char = tmp[i].simplified

    char = char
    .replace(/[\|\|\∣].+/, '')
    .replace(/[\(\(].+[\)\)]/, '')
    .replaceAll(/[¹²∙…\|\(\)12\|\(\)\∣、]/g, '')
    // .replaceAll(/(.+?)[形名动介副量数连助代叹]+/g, '$1')
    // .replace(/(.+)儿$/, '$1')

    if(tmp[i].level.includes('C')) continue

    entry = dict.get(char)
    if(!entry) entry = dict.get(char.replace('儿', ''))

    if(!entry && tmp[i].level.length == 5 && tmp[i].level.startsWith('hsk-')){
      char2 = char.slice(2)
      char = char.slice(0, 2)
      tmp[i].definition_fr = tmp[i].definition_fr + ' (' + char + '……' + char2 + '……)'

      entry = dict.get(char2)
      if(!entry.fr){
        entry.fr = tmp[i].definition_fr
      }
      else {
        entry.fr += ' | ' + tmp[i].definition_fr
      }
      entry = dict.get(char)
    }

    if(!entry){
      // console.log(tmp[i].simplified, get_cc_def(tmp[i].simplified))
      char2 = get_cc_def(tmp[i].simplified)
      dict.key.push(tmp[i].simplified)
      dict.val.push({
        id: dict.val.length,
        si: tmp[i].simplified,
        le: tmp[i].level.replace('A', '+').replace('B', '+').replace('hsk', 'old'),
        tr: get_si_tr(tmp[i].simplified),
        pi: tmp[i].pinyin,
        po: '',
        en: char2 ? '<ce>' + char2 : '',
        fr: '<ge>' + tmp[i].definition_fr,
      })
    }
    else if(!entry.fr){
      entry.fr = '<ge>' + tmp[i].definition_fr
    }
    else {
      entry.fr += ' | ' + tmp[i].definition_fr
    }
  }

  // ADD EXAMPLE FR
  
  tmp = dict._hsk2uni
  for(i=0; i<tmp.length; i++){
    if(!tmp[i].example) continue
    entry = dict.get(tmp[i].simplified)
    if(!entry) entry = dict.get(tmp[i].simplified.replace('儿',''))
    entry.fr += '; ' + tmp[i].example
  }

  // ADD ALL FRENCH

  tmp = dict.val
  for(i=0; i<tmp.length; i++){
    if(tmp[i].fr) continue
    char2 = get_cc_def(tmp[i].si, 'f')
    tmp[i].fr = char2 ? '<cf>' + char2 : ''
    // console.log(tmp[i].si, get_cc_def(tmp[i].si, 'f'))
  }

  // ADD ZH DEF

  out = []
  tmp = dict.val
  for(i=0; i<tmp.length; i++){
    entry = dict._xiandai[tmp[i].si]
    //entry ??= get_zdic_ci(tmp[i].si)
    //entry ??= dict._zdic2[tmp[i].si]

    if(!entry) {
      // console.log(tmp[i].si)
      tmp[i].zh = ''
      out.push(tmp[i].si)
      continue
    }
    tmp[i].zh = entry
  }

  scrape = out

return

  // ADD COLOCATION

  entry = []
  tmp = dict.val
  for(i=0; i<tmp.length; i++){
    tmp[i].co = dict._coloc[tmp[i].si]
    if(!tmp[i].co) entry.push(tmp[i].si)
  }
  window.discard = entry

  return

  // CHECK ENTRIES HYDCD

  tmp = dict.key
  entry = []
  for(i=0; 0 && i<tmp.length; i++){
    if(!dict._hydcd.includes(tmp[i]))
      entry.push(tmp[i])
  }
  dict.discard = entry

}

async function new_merge_hsk(){
  let key = dict.key = []
  let val = dict.val = []
  dict.get = get.bind(dict)

  let char, char2, i, entry, z, cur, eng
  
  z = {
    sim: '', // simplified
    lev: '', // level
    cha: '', // character compound
    col: '', // colocation
    dec: '', // decomposition
    eng: '', // english
    fra: '', // francais
    hom: '', // homonym
    idx: '', // idx
    mor: '', // more example
    num: '', // numeric
    pin: '', // pinyin
    pos: '', // pos
    ran: '', // rank
    syn: '', // synonym
    tra: '', // traditional
    wor: '', // word compound
    zho: '', // zhongwen
  }

  // GET HSK V3 CI

  cur = dict._hsk3elk
  for(i=0; i<cur.length; i++){
    char = cur[i].simplified

    entry = dict.get(char)
    if(!entry){
      key.push(char)
      z.idx = val.length
      z.sim = char
      z.lev = 'new-' + cur[i].level.slice(-1)
      val.push(structuredClone(z))
    }
    else {
      // console.log(char)
    }
  }

  // GET HSK V3 ZI

  cur = dict._hsk3elk_zi
  for(i=0; i<cur.length; i++){
    char = cur[i].simplified
    entry = dict.get(char)

    if(!entry){
      key.push(char)
      z.idx = dict.val.length
      z.sim = char
      z.lev = 'char-' + cur[i].level
      val.push(structuredClone(z))
    }
    else {
      entry.lev += ' | char-' + cur[i].level
    }
  }

  // GET HSK V2 CI

  cur = dict._hsk2
  for(i=0; i<cur.length; i++){
    char = cur[i].simplified
    
    if(char.includes('儿')){
      if(!dict.key.includes(char)){
        char = char.replace(/(.+)儿$/, '$1')
      }
      if(!dict.key.includes(char)){
        console.warn(char)
      }
      
    }

    entry = dict.get(char)
    if(!entry){
      key.push(char)
      z.idx = val.length
      z.sim = char
      z.lev = 'old-' + cur[i].level.slice(-1)
      val.push(structuredClone(z))
    }
    else {
      if(!entry.lev.includes('old')){
        dict.get(char).lev += ' | old-' + cur[i].level.slice(-1)
      }
    }
  }
  
  // GET BCT

  cur = await load_bct()
  for(i=0; i<cur.length; i++){
    entry = dict._bct[+cur[i]]

    if(dict.get(entry[2])) {
      // console.log('bct', entry[2])
      continue
    }

    key.push(entry[2])
    z.idx = val.length
    z.sim = entry[2]
    z.lev = 'x-BCT'
    val.push(structuredClone(z))

  }

  // GET HSK2 UNIGE

  cur = dict._hsk2uni
  for(i=0; i<cur.length; i++){
    char = cur[i].simplified
    if(dict.get(char)) continue
    if(cur[i].level.at(-1) == 'C') continue

    key.push(char)
    z.idx = val.length
    z.sim = char
    z.lev = 'x-GE'
    val.push(structuredClone(z))

  }

  // GET TBCL AND TOCFL

  cur = await load_anki(1)
  for(i=0; i<cur.sim.length; i++){
    entry = cur.sim[i]

    if(dict.get(entry)) {
      console.log(entry)
      continue
    }

    key.push(entry)
    z.idx = val.length
    z.sim = entry
    z.lev = 'x-TW1'
    val.push(structuredClone(z))

  }

  cur = await load_anki(2)
  for(i=0; i<cur.sim.length; i++){
    entry = cur.sim[i]

    key.push(entry)
    z.idx = val.length
    z.sim = entry
    z.lev = 'x-TW2'
    val.push(structuredClone(z))

  }

  // GET TRADITIONNAL

  cur = dict.val
  for(i=0; i<cur.length; i++){
    cur[i].tra = get_si_tr(cur[i].sim)
  }

  // GET DEF ENGLISH HSK V3

  cur = dict._hsk3yar
  for(i=0; i<cur.length; i++){
    char = cur[i].simplified
    entry = dict.get(char)

    if(!entry.eng){
      entry.eng = '<yd>' + cur[i].definition_en
    }
    else {
      entry.eng += ' | ' + cur[i].definition_en
    }
  }

  // GET DEF ENGLISH HSK V2

  cur = dict._hsk2
  for(i=0; i<cur.length; i++){
    char = cur[i].simplified
    entry = dict.get(char)

    if(entry.lev.includes('new')) continue

    if(!entry.eng){
      entry.eng = '<py>' + cur[i].definition_en
    }
    else {
      entry.eng += ' | ' + cur[i].definition_en
    }
  }

  // GET ALL ENGLISH

  cur = dict.val
  for(i=0; i<cur.length; i++){
    if(cur[i].eng) continue

    char = cur[i].sim
    entry = get_cc_def(char)
    if(entry) {
      cur[i].eng = '<ce>' + entry
    }
    entry = await load_wiktio(char, 0)
    if(entry) {
      cur[i].eng = '<wi>' + entry
    }
  }

  // GET FRENCH HSK V2

  cur = dict._hsk2uni
  for(i=0; i<cur.length; i++){
    char = cur[i].simplified
    char2 = cur[i].simplified2

    if(cur[i].level.includes('C')) continue

    if(char2.includes('…')){
      dict.get(char).fra = '<ge>' + cur[i].definition_fr + ' (' + char2 + ')'
      dict.get(char2.replaceAll('…', '').slice(2, 4)).fra = '<ge>' + cur[i].definition_fr + ' (' + char2 + ')'
      continue
    }

    entry = dict.get(char)

    if(!entry.fra) {
      entry.fra = '<ge>' + cur[i].definition_fr
    }
    else {
      entry.fra += ' | ' + cur[i].definition_fr
    }
  }

  // GET ALL FRENCH

  cur = dict.val
  for(i=0; i<cur.length; i++){
    if(cur[i].fra) continue

    char = cur[i].sim
    entry = get_cc_def(char, 'f')
    if(entry) {
      cur[i].fra = '<cf>' + entry
    }
    else if(cur[i].eng) {
      cur[i].fra = '<en>' + cur[i].eng.replace(/<.+?>/,'')
    }
  }

  // GET PINYIN HSK V3

  cur = dict._hsk3off
  for(i=0; i<cur.length; i++){
    char = cur[i][2]
    // if(char == '地方') debugger

    entry = dict.get(char)
    if(!entry.pin){
      entry.pin = cur[i][3]
    }
    else {
      if(entry.pin.includes(cur[i][3])) continue
      entry.pin += ' | ' + cur[i][3]
    }
  }

  cur = dict._hsk3off_zi
  for(i=0; i<cur.length; i++){
    char = cur[i][2]

    entry = dict.get(char)
    if(!entry.lev.startsWith('char')) continue

    if(!entry.pin){
      entry.pin = cur[i][3]
    }
    else {
      entry.pin += ' | ' + cur[i][3]
    }
  }

  cur = dict._hsk2uni
  for(i=0; i<cur.length; i++){
    char = cur[i].simplified

    entry = dict.get(char)
    if(!( entry && (entry.lev.startsWith('old') || entry.lev.startsWith('x-GE'))  )) continue

    if(!entry.pin){
      entry.pin = cur[i].pinyin
    }
    else {
      entry.pin += ' | ' + cur[i].pinyin
    }
  }

  cur = dict._hsk2
  for(i=0; i<cur.length; i++){
    char = cur[i].simplified
    if(dict.get(char).pin) continue
    // console.log('hsk2', char)
    dict.get(char).pin = cur[i].pinyin
  }

  cur = await load_bct()
  for(i=0; i<cur.length; i++){
    char = dict._bct[+cur[i]][2]

    entry = dict.get(char)
    if(!( entry && entry.lev.startsWith('x-BCT')  )) continue

    // if(dict._bct[+cur[i]][3]=='shoddy') debugger

    if(!entry.pin){
      entry.pin = dict._bct[+cur[i]][1]
    }
    else {
      // entry.pin += ' | ' + dict._bct[+cur[i]][1]
    }
  }

  cur = await load_anki(1)
  for(i=0; i<cur.sim.length; i++){
    char = cur.sim[i]

    entry = dict.get(char)
    if(! entry?.lev.startsWith('x-TW1') ) continue

    if(!entry.pin){
      entry.pin = cur.all[i][3]
    }
    else {
      entry.pin += ' | ' + cur.all[i][3]
    }
  }

  cur = await load_anki(2)
  for(i=0; i<cur.sim.length; i++){
    char = cur.sim[i]

    entry = dict.get(char)
    if(! entry?.lev.startsWith('x-TW2') ) continue

    if(!entry.pin){
      entry.pin = cur.all[i][3]
    }
    else {
      entry.pin += ' | ' + cur.all[i][3]
    }
  }

  // for(let a of dict.val) if(!a.pin)console.log(a)
  // for(let a of dict.val) if(a.sim.length > 1 && a.pin.includes('|'))console.log(a)

  // GET NUMBERED PINYIN

  cur = dict.val
  for(i=0; i<cur.length; i++){
    entry = cur[i]
    entry.num = shapeSpell(entry.pin)
  }

}


/*********/
// Generated by CoffeeScript 1.9.2

/*
PinyinConverter by David Chanin and Jen Liu @quizlet 2013
Inspired by http://stackoverflow.com/questions/1598856/convert-numbered-to-accentuated-pinyin/5607888#5607888

Authors: David Chanin and Jen Liu
Github: chanind
email: dchanin@quizlet.com
 */

/*
function pinyin_addaccents($string) {
    # Find words with a number behind them, and replace with callback fn.
    return preg_replace_callback(
        '~([a-zA-ZüÜ]+)(\d)~',
        'pinyin_addaccents_cb',
        $string);
}

# Helper callback
function pinyin_addaccents_cb($match) {
    static $accentmap = null;

    if( $accentmap === null ) {
        # Where to place the accent marks
        $stars =
            'a* e* i* o* u* ü* '.
            'A* E* I* O* U* Ü* '.
            'a*i a*o e*i ia* ia*o ie* io* iu* '.
            'A*I A*O E*I IA* IA*O IE* IO* IU* '.
            'o*u ua* ua*i ue* ui* uo* üe* '.
            'O*U UA* UA*I UE* UI* UO* ÜE*';
        $nostars = str_replace('*', '', $stars);

        # Build an array like Array('a' => 'a*') and store statically
        $accentmap = array_combine(explode(' ',$nostars), explode(' ', $stars));
        unset($stars, $nostars);
    }

    static $vowels =
        Array('a*','e*','i*','o*','u*','ü*','A*','E*','I*','O*','U*','Ü*');

    static $pinyin = Array(
        1 => Array('ā','ē','ī','ō','ū','ǖ','Ā','Ē','Ī','Ō','Ū','Ǖ'),
        2 => Array('á','é','í','ó','ú','ǘ','Á','É','Í','Ó','Ú','Ǘ'),
        3 => Array('ǎ','ě','ǐ','ǒ','ǔ','ǚ','Ǎ','Ě','Ǐ','Ǒ','Ǔ','Ǚ'),
        4 => Array('à','è','ì','ò','ù','ǜ','À','È','Ì','Ò','Ù','Ǜ'),
        5 => Array('a','e','i','o','u','ü','A','E','I','O','U','Ü')
    );

    list(,$word,$tone) = $match;
    # Add star to vowelcluster
    $word = strtr($word, $accentmap);
    # Replace starred letter with accented 
    $word = str_replace($vowels, $pinyin[$tone], $word);
    return $word;
}
*/

(function() {
  var PinyinConverter;

  PinyinConverter = {
    pinyinRegex: /(shuang|chuang|zhuang|xiang|qiong|shuai|niang|guang|sheng|kuang|shang|jiong|huang|jiang|shuan|xiong|zhang|zheng|zhong|zhuai|zhuan|qiang|chang|liang|chuan|cheng|chong|chuai|hang|peng|chuo|piao|pian|chua|ping|yang|pang|chui|chun|chen|chan|chou|chao|chai|zhun|mang|meng|weng|shai|shei|miao|zhui|mian|yong|ming|wang|zhuo|zhua|shao|yuan|bing|zhen|fang|feng|zhan|zhou|zhao|zhei|zhai|rang|suan|reng|song|seng|dang|deng|dong|xuan|sang|rong|duan|cuan|cong|ceng|cang|diao|ruan|dian|ding|shou|xing|zuan|jiao|zong|zeng|zang|jian|tang|teng|tong|bian|biao|shan|tuan|huan|xian|huai|tiao|tian|hong|xiao|heng|ying|jing|shen|beng|kuan|kuai|nang|neng|nong|juan|kong|nuan|keng|kang|shua|niao|guan|nian|ting|shuo|guai|ning|quan|qiao|shui|gong|geng|gang|qian|bang|lang|leng|long|qing|ling|luan|shun|lian|liao|zhi|lia|liu|qin|lun|lin|luo|lan|lou|qiu|gai|gei|gao|gou|gan|gen|lao|lei|lai|que|gua|guo|nin|gui|niu|nie|gun|qie|qia|jun|kai|kei|kao|kou|kan|ken|qun|nun|nuo|xia|kua|kuo|nen|kui|nan|nou|kun|jue|nao|nei|hai|hei|hao|hou|han|hen|nai|rou|xiu|jin|hua|huo|tie|hui|tun|tui|hun|tuo|tan|jiu|zai|zei|zao|zou|zan|zen|eng|tou|tao|tei|tai|zuo|zui|xin|zun|jie|jia|run|diu|cai|cao|cou|can|cen|die|dia|xue|rui|cuo|cui|dun|cun|cin|ruo|rua|dui|sai|sao|sou|san|sen|duo|den|dan|dou|suo|sui|dao|sun|dei|zha|zhe|dai|xun|ang|ong|wai|fen|fan|fou|fei|zhu|wei|wan|min|miu|mie|wen|men|lie|chi|cha|che|man|mou|mao|mei|mai|yao|you|yan|chu|pin|pie|yin|pen|pan|pou|pao|shi|sha|she|pei|pai|yue|bin|bie|yun|nüe|lve|shu|ben|ban|bao|bei|bai|lüe|nve|ren|ran|rao|xie|re|ri|si|su|se|ru|sa|cu|ce|ca|ji|ci|zi|zu|ze|za|hu|he|ha|ju|ku|ke|qi|ka|gu|ge|ga|li|lu|le|qu|la|ni|xi|nu|ne|na|ti|tu|te|ta|xu|di|du|de|bo|lv|ba|ai|ei|ao|ou|an|en|er|da|wu|wa|wo|fu|fo|fa|nv|mi|mu|yi|ya|ye|me|mo|ma|pi|pu|po|yu|pa|bi|nü|bu|lü|e|o|a)r?[1-5]?/gi,
    vowels: {
      'a*': '0',
      'e*': '1',
      'i*': '2',
      'o*': '3',
      'u*': '4',
      'ü*': '5',
      'A*': '6',
      'E*': '7',
      'I*': '8',
      'O*': '9',
      'U*': '10',
      'Ü*': '11'
    },
    pinyin: {
      1: ['ā', 'ē', 'ī', 'ō', 'ū', 'ǖ', 'Ā', 'Ē', 'Ī', 'Ō', 'Ū', 'Ǖ'],
      2: ['á', 'é', 'í', 'ó', 'ú', 'ǘ', 'Á', 'É', 'Í', 'Ó', 'Ú', 'Ǘ'],
      3: ['ǎ', 'ě', 'ǐ', 'ǒ', 'ǔ', 'ǚ', 'Ǎ', 'Ě', 'Ǐ', 'Ǒ', 'Ǔ', 'Ǚ'],
      4: ['à', 'è', 'ì', 'ò', 'ù', 'ǜ', 'À', 'È', 'Ì', 'Ò', 'Ù', 'Ǜ'],
      5: ['a', 'e', 'i', 'o', 'u', 'ü', 'A', 'E', 'I', 'O', 'U', 'Ü']
    },
    convert: function(string) {
      var j, len, match, matches, replacement;
      matches = string.match(this.pinyinRegex);
      if (!matches) {
        return string;
      }
      for (j = 0, len = matches.length; j < len; j++) {
        match = matches[j];
        replacement = this.getReplacement(match);
        string = string.replace(match, replacement);
      }
      return string;
    },
    getReplacement: function(match) {
      var accentMap, accentedVowelChar, base, replacedWord, tone, vowel, vowelChar, vowelNum, word;
      accentMap = this.getAccentMap();
      tone = match.slice(-1);
      word = match.slice(0, -1).replace('v', 'ü').replace('V', 'Ü');
      for (base in accentMap) {
        vowel = accentMap[base];
        if (word.indexOf(base) >= 0) {
          vowelChar = vowel.match(/.\*/)[0];
          vowelNum = this.vowels[vowelChar];
          accentedVowelChar = this.pinyin[tone.toString()][vowelNum];
          replacedWord = word.replace(base, vowel).replace(vowelChar, accentedVowelChar);
          return replacedWord;
        }
      }
      return match;
    },
    getAccentMap: function() {
      var base, i, j, len, nostars, ref, stars, starsArray;
      if (!this.accentMap) {
        stars = 'a*i a*o e*i ia* ia*o ie* io* iu* ' + 'A*I A*O E*I IA* IA*O IE* IO* IU* ' + 'o*u ua* ua*i ue* ui* uo* üe* ' + 'O*U UA* UA*I UE* UI* UO* ÜE* ' + 'A* E* I* O* U* Ü* ' + 'a* e* i* o* u* ü*';
        nostars = stars.replace(/\*/g, '');
        starsArray = stars.split(' ');
        this.accentMap = {};
        ref = nostars.split(' ');
        for (i = j = 0, len = ref.length; j < len; i = ++j) {
          base = ref[i];
          this.accentMap[base] = starsArray[i];
        }
      }
      return this.accentMap;
    }
  };

  (function(root, factory) {
    if (typeof define === 'function' && define.amd) {
      return define(factory);
    } else if (typeof exports === 'object') {
      return module.exports = factory();
    } else {
      return root.PinyinConverter = factory();
    }
  })(this, function() {
    return PinyinConverter;
  });

}).call(this);

function shapeSpell(txt){
  if(!txt) {
    return ''
  }

  let tmp, i, j, out, re

  try{
    // tmp = cnchar.shapeSpell(txt, true)
    for(i=1; i<=txt.length; i++){
      out = txt.substring(0, i)
      if(!out.match(/[^a-zA-Z]/)) continue
      for(j=1; j<=5; j++){
        re = new RegExp('(['+PinyinConverter.pinyin[j].join('')+'])')
        if(re.test(out)){
          tmp = txt.substring(0, i-1)
          if(tmp.match(PinyinConverter.pinyinRegex)){
            i = 200
            break
          }

          out = out.replace(re, x=>{
            let pos = PinyinConverter.pinyin[j].indexOf(x)
            pos = Object.keys(PinyinConverter.vowels)[pos][0]
            return pos
          })
          tmp = out + txt.substring(i) + j
          i = 100

          break
        }
      }
    }
  }
  catch(e){
    console.warn(txt)
    return ''
  }

  if(i == 201) {
    let aa = tmp.match(PinyinConverter.pinyinRegex)
    if(aa) return aa[0] + ' ' + shapeSpell(txt.replace(aa[0], ''))
    else return txt
  }
  if(!tmp) return txt

  tmp = tmp.replace(/^\W+/, '')

  re = PinyinConverter.pinyinRegex.toString().replace('/','').replace('/gi','')
  re = new RegExp('^'+re+'$', 'gi')

  for(i=1; i<tmp.length; i++){
    if(!(tmp.slice(0, i+1)+'1').match(re)) {
      if(!(tmp.slice(0, i+2)+'1').match(re)) {
        if(!(tmp.slice(0, i+3)+'1').match(re)) {
          break
        }
      }
    }
  }

  if(tmp[i-1].match(/[^AEIOUaeiou0-9]/) && tmp[i].match(/[AEIOUaeiou0-9\u0080-\u0F00]/) && i+1 != tmp.length){
    // if(cnchar.dict.spell[tmp.slice(0, i-1)]) i--
    if(!(tmp.slice(0, i-1)+'1').match(PinyinConverter.pinyinRegex)) i--
  }

  j = tmp.slice(0, i).replace(/\d/, '') + tmp.slice(-1)
  if(j.length == 2) {
    debugger
    console.warn(j)
    return ''
  }
  out = j
  tmp = tmp.slice(i, -1).trim()
  out += (tmp.at(0) == '|' ? ' | ' : ' ') + shapeSpell(tmp)
  return out
}

/*********/

(async ()=>{
  window._ ??= await import('/js/0utils/utils3.mjs')
})()

async function scrape_sketchengine(){

  let out, i, list
  list = await fetch(dict._path + 'final/char.txt').then(x=>x.text())
  list = list.split('\n')
  out = {}
  for(i=0; i<list.length; i++){
    out['' + list[i]] = ''
  }

  out = {}
  out = await fetch(dict._path + 'final/thesaurus.json').then(x=>x.json())
  for(i in out) if(!out[i].includes('"status":"success"')) {
    out[i] = ''
  }

  _.fetchAll(out, {max: 8, urlmodif: x=>{debugger
    return {
      url: 'https://server.chinesezerotohero.com/sketch-engine-proxy.php?https://app.sketchengine.eu/bonito/run.cgi/wsketch?corpname=preloaded/zhtenten17_simplified_stf2&lemma=' + x
    }}
  })

  window.scrape = out
}

async function process_sketchengine_col(){

  let out, i, j, k, list, tmp
  list = await fetch(dict._path + 'final/coloc-5000.json').then(x=>x.json())

  for(i in list){
    list[i] = list[i].replaceAll(/\\u[a-f0-9]{4}/g, (...p)=>{
      return JSON.parse('"'+p[0]+'"')
    })
    list[i] = JSON.parse(list[i])

    if(list[i].status != 'success') {
      debugger
      console.warn(list[i].status)
    }
  }

  out = {}
  for(i in list){
    if(!list[i].data.Gramrels) continue
    tmp = []
    for(j of list[i].data.Gramrels){
      for(k of j.Words){
        tmp.push(k)
      }
    }

    tmp.sort((a,b)=>a.count>b.count?-1:a.count<b.count?1:0)
    j = []
    for(k of tmp){
      if(j.includes(k.cm)) continue
      j.push(k.cm)
      if(j.length == 10) break
    }

    out[i] = j.join('、')
  }

  window.scrape2 = list
  window.scrape3 = out
}


async function process_sketchengine_thes(){

  let out, i, j, k, list, list2, tmp
  list = await fetch(dict._path + 'final/thesaurus.json').then(x=>x.json())
  list2 = await fetch(dict._path + 'final/thesaurus2.json').then(x=>x.json())
  
  tmp = await fetch(dict._path + 'final/thesaurus3.json').then(x=>x.json())
  list2.push(...tmp)

  for(i in list2){
    list2[i] = list2[i].replaceAll(/\\u[a-f0-9]{4}/g, (...p)=>{
      return JSON.parse('"'+p[0]+'"')
    })
    if(list2[i].includes('429 Too Many')) continue

    list2[i] = JSON.parse(list2[i])
    list[list2[i].request.lemma] = list2[i]
  }

  tmp = []
  for(i in list){
    if(typeof list[i] != 'string') continue

    list[i] = list[i].replaceAll(/\\u[a-f0-9]{4}/g, (...p)=>{
      return JSON.parse('"'+p[0]+'"')
    })
    list[i] = JSON.parse(list[i])

    if(list[i].status != 'success') {
      debugger
      tmp.push(i)
    }
  }

  out = {}
  for(i in list){
    if(!list[i].data){
      list[i].data = list[i]
    }
    if(!list[i].data.Words) continue
    
    tmp = []
    for(j of list[i].data.Words){
      tmp.push(j.word)
    }

    out[i] = tmp
  }

  window.scrape2 = list
  window.scrape3 = out

  window.discard = tmp
}

/*

http://wap.51bc.net/xhy/page/xhy539.html

http://xh.51bc.net/html3/22525.html

http://xh.51bc.net/html4/16.html
{"derivation": "清·赵翼《论诗》诗矮人看戏何曾见,都是随人说短长。”", "example": "无", "explanation": "比喻只知道附和别人,自己没有主见。也比喻见识不广。", "pinyin": "ǎi rén kàn xì", "word": "矮人看戏", "abbreviation": "arkx"}
http://xh.51bc.net/html4/31250.html

{"ci":"", "explanation":""}
http://xh.51bc.net/html5/372021.html
*/

async function process_xinhua_ci(x, out){
  let txt = await fetch(dict._path + 'final/xinhua-'+x+'.json').then(x=>x.json())
  if(typeof out == 'undefined') out = []

  for(let i in txt){
    txt[i] = txt[i].replaceAll('<br>','')
    let kk = txt[i].match(/center>([^<]+)<\/td>/s)?.at(1)
    if(!kk) continue
    let vv = txt[i].match(/'1'>(.+?)</s).at(1).trim()
    // out[i] = {ci: kk, explanation: vv}
    out.push({ci: kk, explanation: vv})
  }

  scrape = txt
  scrape2 = out

  return out

  // aze = []; [0, 50_000, 100_000, 150_000, '150000b', 200_000, 250_000, 300_000, 350_000].forEach(async (x)=>{await process_xinhua_ci(x, aze)})
  // aze[0] = []; for(let i of aze) if(i?.ci) aze[0].push(i.ci)
  // aze.final = []; [0, 50_000, 100_000, 150_000, 200_000, 250_000, 300_000, 350_000].forEach(async (x)=>aze.final.push(...aze[x]))
}

async function process_xinhua_chengyu(x){
  let txt

  if(!x) txt = await fetch(dict._path + 'final/chengyu-0.json').then(x=>x.json())
  else txt = await fetch(dict._path + x).then(x=>x.json())

  let out = []

  for(let i in txt){
    txt[i] = txt[i].replaceAll('\r', '')
    .replaceAll('〗', ':</b>')
    .replaceAll('】', ':</b>')
    .replaceAll('〖', '<b>')
    .replaceAll('【', '<b>')

    let word = 
    txt[i].match(/3><b>([^<]+?)</s)?.at(1)?.trim() || 
    txt[i].match(/title1>([^<]+?)</s)?.at(1)?.trim() || ''
    let pinyin =
    txt[i].match(/拼音.+?'40%'>([^<]+?)</s)?.at(1)?.trim() || 
    txt[i].match(/拼音:<\/b>([^<]+?)</s)?.at(1)?.trim() || ''
    let explanation =
    txt[i].match(/解释.+?'5'>([^<]+?)</s)?.at(1)?.trim() ||
    txt[i].match(/解释:<\/b>([^<]+?)</s)?.at(1)?.trim() || ''
    let example =
    txt[i].match(/例子.+?'5'>([^<]+?)</s)?.at(1)?.trim() ||
    txt[i].match(/例子:<\/b>([^<]+?)</s)?.at(1)?.trim() || ''
    let abbreviation = ''
    let derivation =
    txt[i].match(/出处.+?'5'>([^<]+?)</s)?.at(1)?.trim() ||
    txt[i].match(/出处:<\/b>([^<]+?)</s)?.at(1)?.trim() || ''
    debugger
    out.push({
      derivation,
      example,
      explanation,
      pinyin,
      word,
      abbreviation
    })
  }

  scrape = txt
  scrape2 = out

}

async function process_xinhua_pageciyu0(x){
  let txt = await fetch(dict._path + 'final/words-0.json').then(x=>x.json())
  let out = {}

  for(let i in txt){
    let ciyu = txt[i].match(/ciyu\/(.+\.html)>更多有关(.*?)的词语/s)
    if(ciyu) out[ciyu.at(1)] = ciyu.at(2)
    //out.push(ciyu)
  }

  out.links = []
  for(let i in txt){
    let ciyu
    ciyu = txt[i].match(/ciyu\/(.+\.html)>更多有关(.*?)的词语/s)
    if(ciyu) continue
    ciyu = txt[i].match(/html5\/(z[\w\d]+\.html)/s)
    if(ciyu) out.links.push(ciyu.at(1))
    //out.push(ciyu)
  }

  scrape = txt
  scrape2 = out

}

async function process_xinhua_pageciyu0A(x){
  let txt = await fetch(dict._path + 'final/pageciyu-0A.json').then(x=>x.json())
  let out = []

  for(let i in txt){
    let ciyu = txt[i].match(/z[a-zA-Z0-9].+?\.html/g) || []
    //if(ciyu) out[ciyu.at(1)] = ciyu.at(2)
    out.push(...ciyu)
  }

  scrape = txt
  scrape2 = out

}

async function process_xinhua_pageciyu0B(x){
  // word

  let final

  final = [];
  for(x of [0, 50_000, 100_000, 150_000, '150000b', 200_000, 250_000, 300_000, 350_000]){
    await process_xinhua_ci(x, final)
  }
  console.log(final.length)
  let txt = await fetch(dict._path + 'final/pageciyu-0B.json').then(x=>x.json())
  let out = await fetch(dict._path + 'final/pageciyu-0C.json').then(x=>x.json())
  Object.assign(txt, out)
  out = []

  for(let i in txt){
    let n = i.match(/(\d+)\./)?.at(1)
    let ciyu = txt[i].match(/title1>([^<]+?)</s)?.at(1)
    let def = txt[i].match(/body4'><br>(.+?)<\/div>/s)?.at(1).replaceAll('<br>','').trim()
    //if(ciyu) out[ciyu.at(1)] = ciyu.at(2)
    //out[n] = {ci: ciyu, explanation: def}
    if(ciyu === undefined || ciyu.length == 1){
      console.log(txt[i])
      continue
    }
    out.push({ci: ciyu, explanation: def})
  }
  out.final = final
  final.push(...out)

  for(let i in final){
    final[i].explanation = final[i].explanation.replaceAll('\r\n', '\n').replaceAll(/<[^>]+>/g, '').replaceAll(/&[#\w\d]+;/g, ' ').trim()
    if(final[i].explanation.includes('&')) {debugger; console.log(final[i])}
  }

  scrape = txt
  scrape2 = out

}

async function process_xinhua_pageciyu1(x){
  let txt = await fetch(dict._path + 'final/words-0.json').then(x=>x.json())
  let out = {}

  for(let i in txt){
    let ciyu = txt[i].match(/cy\/(.+\.html)>更多相关成语/s)
    if(ciyu) {debugger
      out[ciyu.at(1)] = i.ci
    }
    //out.push(ciyu)

  }

  out.links = []
  for(let i in txt){
    let ciyu
    ciyu = txt[i].match(/cy\/(.+\.html)>更多相关成语/s)
    if(ciyu) continue
    ciyu = txt[i].match(/html4\/(z[\w\d]+\.html)/s)
    if(ciyu) out.links.push(ciyu.at(1))
    //out.push(ciyu)
  }

  scrape = txt
  scrape2 = out

}

async function process_xinhua_pageciyu1A(x){
  let txt = await fetch(dict._path + 'final/pageciyu-1A.json').then(x=>x.json())
  let out = []

  for(let i in txt){
    let ciyu = txt[i].match(/z[a-zA-Z0-9].+?\.html/g) || []
    //if(ciyu) out[ciyu.at(1)] = ciyu.at(2)
    out.push(...ciyu)
  }

  scrape = txt
  scrape2 = out

}


async function process_xinhua_pageciyu1B(x){

  await process_xinhua_chengyu()
  let aze = scrape2
  await process_xinhua_chengyu('final/pageciyu-1B.json')
  aze.push(...scrape2)

  scrape2 = aze
}

async function process_xinhua_xhy(){
  let txt = await fetch(dict._path + 'final/xhy-0.json').then(x=>x.json())
  let out = []

  for(let i in txt){
    let ciyu = txt[i].matchAll(/([^<>]+)? +—+ +([^<>]+)/g)
    ciyu = [...ciyu]
    debugger
    if(ciyu.length) {
      for(let j of ciyu){
        out.push({
          riddle: j[1],
          answer: j[2],
        })
      }
    }
  }

  scrape = txt
  scrape2 = out
}

async function process_xinhua_words(){
  let txt = await fetch(dict._path + 'final/words-0.json').then(x=>x.json())
  let out = []

  for(let i in txt){
    let word = txt[i].match(/(.)字的解释-/s)?.at(1) || ''
    let oldword = ''
    let strokes = txt[i].match(/笔划:[^#]+?>(\d+)</s)?.at(1) || ''
    let pinyin = txt[i].match(/拼音:[^#]+?><\/td>([^#]+?)<\/td>/s)?.at(1)
      .replaceAll('\t', '').replaceAll(/<[^>]+>/g, '').replaceAll(/ xhziplay\("[^"]+"\);/g, '').trim() || ''
    let radicals = txt[i].match(/部首:[^#]+?<\/td>[^#]+?>([^#]+?)<\/td>/s)?.at(1) || ''
    let explanation = txt[i].match(/基本解释(.+?)详细解释/s)?.at(1)
      .replaceAll('<br>', '\n').replaceAll(/<[^>]+>/g, '').replaceAll('&nbsp;', ' ').substring(1).trim() || ''
    let more = txt[i].match(/详细解释(.+?table4>)/s)?.at(1)
      .replaceAll('<br>', '\n').replaceAll(/<[^>]+>/g, '').substring(1).trim() || ''

    if(!word) continue
    if(word) debugger
    out.push({
      word,
      oldword,
      strokes,
      pinyin,
      radicals,
      explanation,
      more
    })
  }

  scrape = txt
  scrape2 = out
}

async function scrape_xinhua_ci(x=0){
  _ = window

  let out, i, list

  if(typeof scrape != 'undefined') out = scrape
  else {
    out = {}
    for(i=x*50000+1; i<=539 && i<(x+1)*50000+1; i++){
      out['' + i] = ''
    }
  }

  _.fetchAll(out, {
    max: 8,
    responsetype: 'arrayBuffer',
    // urlmodif: x=>'http://xh.5156edu.com/html5/'+x+'.html',
    urlmodif: x=>'http://wap.51bc.net/xhy/page/xhy'+x+'.html',
    valmodif: x=>{
      let z = (new TextDecoder("gb2312")).decode(x)
      x = z.includes('utf') ? (new TextDecoder("utf-8")).decode(x) : z
      // x = x.match(/<table border=.+?<hr.+?<hr/s)?.at(0)
      // debugger
      return x
    },
  })

  window.scrape = out

  // for(i in scrape) if(!scrape[i]){console.log(i); scrape[i] = ''}
  // download(JSON.stringify(scrape), 'xinhua-100000.txt')
}

async function load_baidu(x=''){
  let out = []
  if(window.scrape) out = scrape

  let txt = await fetch(dict._path + 'final/baidu-'+x+'.json').then(x=>x.json())
  for(let i in txt){
    let sub = txt[i].match(/synonym.+antonym/s)?.at(0)
    if(!sub) continue
    let mm = sub.matchAll(/zici">([^<]+)/g)
    let vv
    let tt = txt[i].match(/<title>([^_]+)/s)?.at(1)
    for(vv=mm.next(); !vv.done; vv=mm.next()){
      if(out.includes(tt + '\t' + vv.value[1])) {
        continue
      }
      else if(!out.includes(tt + '\t' + vv.value[1])) {
        out.push(tt + '\t' + vv.value[1])
      }
      else if(!out.includes(vv.value[1] + '\t' + tt)) {
        out.push(vv.value[1] + '\t' + tt)
      }
    }
    // break
  }
  return out
}

async function scrape_baidu(){

  let out, i, list, list2
  list = await fetch(dict._path + 'final/char.txt').then(x=>x.text())
  list = list.split('\n')
  out = []

  list2 = await fetch(dict._path + 'final/baidu-final.json').then(x=>x.json())
  list2 = list2.join('\t').split('\t')

  for(i of list2) if(!list.includes(i) && i.length == 2) out.push(i)
  console.log(list2.length, out.length)

  list2 = await fetch(dict._path + 'Keson96_SynoCN/syno_from_baidu_hanyu.txt').then(x=>x.text())
  list2.replaceAll('\n', '\t').split('\t')
  for(i of list2) if(!list.includes(i) && i.length == 2) out.push(i)
  console.log(list2.length, out.length)

  window.scrape = out
}

async function load_zdic3(){
  let i, out, txt

  txt = await fetch(dict._path + 'final/xinhua-final.json').then(x=>x.json())
  out = {}
  for(i=0; i<txt.length; i++){
    if(txt[i].ci == undefined) continue

    if(out[txt[i].ci]){
      console.warn(txt[i].ci)
      out[txt[i].ci] += '\n+ ' + txt[i].explanation
    }
    else out[txt[i].ci] = '+ ' + txt[i].explanation
  }
  dict._zdic3 = out

}

async function merge_zdic(){
  let out = []; 
  let out2 = [];
  
  for(i in dict._zdic3) {
    if(!(i in dict._zdic2))
      out.push(i); 
    else if(dict._zdic2[i].match(/^\+ /mg).length != dict._zdic3[i].match(/^\+ /mg).length)
      out2.push(i)
  }
  
  window.scrape = out
  window.scrape2 = out2
}

async function merge_zdic_chengyu(){
  dict._chengyu = await fetch(dict._path + 'pwxcoo_chinese-xinhua/data/idiom.json').then(x=>x.json())
  dict._chengyu2 = await fetch(dict._path + 'final/chengyu-final.json').then(x=>x.json())

  dict._chengyu.keys = {}
  dict._chengyu2.keys = {}

  for(i in dict._chengyu) {
    dict._chengyu.keys[dict._chengyu[i].word] = ''
  }
  for(i in dict._chengyu2) {
    dict._chengyu2.keys[dict._chengyu2[i].word] = ''
  }

  let out = []; 

  for(i in dict._chengyu.keys) {
    if(!(i in dict._chengyu2.keys))
      out.push(i); 
  }
  
  window.scrape = out

}

async function merge_zdic_xhy(){
  dict._xhy = await fetch(dict._path + 'pwxcoo_chinese-xinhua/data/xiehouyu.json').then(x=>x.json())
  dict._xhy2 = await fetch(dict._path + 'final/xhy-final.json').then(x=>x.json())

  dict._xhy.keys = {}
  dict._xhy2.keys = {}

  for(i in dict._xhy) {
    dict._xhy.keys[dict._xhy[i].riddle?.trim()] = ''
  }
  for(i in dict._xhy2) {
    dict._xhy2.keys[dict._xhy2[i].riddle?.trim()] = ''
  }

  let out = []; 

  for(i in dict._xhy.keys) {
    if(!(i in dict._xhy2.keys))
      out.push(i); 
  }
  
  window.scrape = out

}


async function merge_zdic_zi(){
  if(!dict._zdic) await load_zdic()
  
  dict._zdic_zi = await fetch(dict._path + 'final/words-final.json').then(x=>x.json())
  
  dict._zdic_zi.keys = {}

  for(i in dict._zdic_zi) {
    dict._zdic_zi.keys[dict._zdic_zi[i].word?.trim()] = dict._zdic_zi[i]
  }

  let out = []
  for(i in dict._zdic) {
    if(!(i in dict._zdic_zi.keys)) out.push(i)
  }

  scrape = out
}