Module:BaseConvert

Une page de Wikipédia, l'encyclopédie libre.

 Documentation[créer] [purger]
-- Convert to a string and normalize to uppercase ASCII.
function _normalizeNumber(s)
    s = '' .. s -- convert to string
    -- Check if there are non-ASCII characters
    if not s:find('[\128-\255]') then -- string is ASCII only (fast path for lower memory use)
        -- Remove whitespaces, and normalize ASCII letters to uppercase digits.
        return s:gsub('%s', ''):upper()
    end
    -- Handle non-ASCII strings, assumed to be encoded with UTF-8 as used by module mw.ustring:
    -- Basic Latin variants.
    s = mw.ustring.gsub(s, '[!-~]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xFF01 + 0x21) end) -- Fullwidth (also punctuation, symbols and letters)
    -- TODO: variants in supplementary planes.
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1D7CE + 0x30) end) -- Mathematical bold
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1D7D8 + 0x30) end) -- Mathematical double-struck
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1D7E2 + 0x30) end) -- Mathematical sans-serif
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1D7EC + 0x30) end) -- Mathematical sans-serif bold
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1D7F6 + 0x30) end) -- Mathematical monospace
    -- Decimal digits and signs variants.
    s = mw.ustring.gsub(s, '[⁺₊]', '+') -- superscript/subscript plus sign
    s = mw.ustring.gsub(s, '[⁻₋−]', '-') -- superscript/subscript minus sign
    s = mw.ustring.gsub(s, '⁰', '0') -- superscript digit 0
    s = mw.ustring.gsub(s, '¹', '1') -- superscript digit 1
    s = mw.ustring.gsub(s, '²', '2') -- superscript digit 2
    s = mw.ustring.gsub(s, '³', '3') -- superscript digit 3
    s = mw.ustring.gsub(s, '[⁴-⁹]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x2074 + 0x34) end) -- superscript digits 4-9
    s = mw.ustring.gsub(s, '[₀-₉]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x2080 + 0x30) end) -- subscript digits
    -- Alternate decimal digits from various Unicode scripts.
    s = mw.ustring.gsub(s, '[٠-٩]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x660 + 0x30) end) -- Arabic
    s = mw.ustring.gsub(s, '[۰-۹]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x6F0 + 0x30) end) -- Arabic-Indic
    s = mw.ustring.gsub(s, '[߀-߉]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x7C0 + 0x30) end) -- N'ko
    s = mw.ustring.gsub(s, '[०-९]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x966 + 0x30) end) -- Devanagari
    s = mw.ustring.gsub(s, '[০-৭]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x9E6 + 0x30) end) -- Bengali
    s = mw.ustring.gsub(s, '[੦-੯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xA66 + 0x30) end) -- Gurmukhi
    s = mw.ustring.gsub(s, '[૦-૯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xAE6 + 0x30) end) -- Gujarati
    s = mw.ustring.gsub(s, '[୦-୯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xB66 + 0x30) end) -- Oriya
    s = mw.ustring.gsub(s, '[௦-௯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xBE6 + 0x30) end) -- Tamil
    s = mw.ustring.gsub(s, '[౦-౯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xC66 + 0x30) end) -- Telugu
    s = mw.ustring.gsub(s, '[೦-೯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xCE6 + 0x30) end) -- Kannada
    s = mw.ustring.gsub(s, '[൦-൯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xD66 + 0x30) end) -- Malayalam
    s = mw.ustring.gsub(s, '[෦-෯]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xDE6 + 0x30) end) -- Sinhala
    s = mw.ustring.gsub(s, '[๐-๙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xE50 + 0x30) end) -- Thai
    s = mw.ustring.gsub(s, '[໐-໙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xED0 + 0x30) end) -- Lao
    s = mw.ustring.gsub(s, '[༠-༩]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xF20 + 0x30) end) -- Tibetan
    s = mw.ustring.gsub(s, '[၀-၉]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1040 + 0x30) end) -- Myanmar
    s = mw.ustring.gsub(s, '[႐-႙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1090 + 0x30) end) -- Myanmar Shan
    s = mw.ustring.gsub(s, '[០-៩]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x17E0 + 0x30) end) -- Khmer
    s = mw.ustring.gsub(s, '[᠐-᠙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1810 + 0x30) end) -- Mongolian
    s = mw.ustring.gsub(s, '[᥆-᥏]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1946 + 0x30) end) -- Limbu
    s = mw.ustring.gsub(s, '[᧐-᧙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x19D0 + 0x30) end) -- New Tai Lue
    s = mw.ustring.gsub(s, '[᪀-᪉]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1A80 + 0x30) end) -- Tai Tham Hora
    s = mw.ustring.gsub(s, '[᪐-᪙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1A90 + 0x30) end) -- Tai Tham Tham
    s = mw.ustring.gsub(s, '[᭐-᭙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1B50 + 0x30) end) -- Balinese
    s = mw.ustring.gsub(s, '[᮰-᮹]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1BB0 + 0x30) end) -- Sundanese
    s = mw.ustring.gsub(s, '[᱀-᱉]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1C40 + 0x30) end) -- Lepcha
    s = mw.ustring.gsub(s, '[᱐-᱙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1C50 + 0x30) end) -- Ol Chiki
    s = mw.ustring.gsub(s, '[꘠-꘩]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xA620 + 0x30) end) -- Vai
    s = mw.ustring.gsub(s, '[꣐-꣙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xA8D0 + 0x30) end) -- Saurashtra
    s = mw.ustring.gsub(s, '[꤀-꤉]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xA900 + 0x30) end) -- Kayah Li
    s = mw.ustring.gsub(s, '[꧐-꧙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xA9D0 + 0x30) end) -- Javanese
    s = mw.ustring.gsub(s, '[꧰-꧹]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xA9F0 + 0x30) end) -- Myanmar Tai Laing
    s = mw.ustring.gsub(s, '[꩐-꩙]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xAA50 + 0x30) end) -- Cham
    s = mw.ustring.gsub(s, '[꯰-꯹]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0xABF0 + 0x30) end) -- Meetei Mayek
    -- TODO: decimal digits in supplementary planes:
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x104A0 + 0x30) end) -- Osmanya
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x10D30 + 0x30) end) -- Hanifi Rohingya
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11066 + 0x30) end) -- Brahmi
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x110F0 + 0x30) end) -- Sora Sompeng
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11136 + 0x30) end) -- Chakma
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x111D0 + 0x30) end) -- Sharada
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x112F0 + 0x30) end) -- Khudawadi
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11450 + 0x30) end) -- Newa
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x114D0 + 0x30) end) -- Tirhuta
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11650 + 0x30) end) -- Modi
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x116C0 + 0x30) end) -- Takri
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11730 + 0x30) end) -- Ahom
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x118E0 + 0x30) end) -- Warang Citi
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11C50 + 0x30) end) -- Bhaiksuki
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11D50 + 0x30) end) -- Masaram Gondi
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x11DA0 + 0x30) end) -- Gunjala
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x16A60 + 0x30) end) -- Mro
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x16B50 + 0x30) end) -- Pahawh Hmong
    --s = mw.ustring.gsub(mw.ustring.gsub(s, '[-]', function(s) return mw.ustring.char(mw.ustring.codepoint(s, 1) - 0x1E950 + 0x30) end) -- Adlam
    -- Remove Unicode whitespaces, and normalize ASCII letters to uppercase digits.
    return mw.ustring.gsub(s, '%s', ''):upper()
end

function _convert(n, from, base, default, fmt)
    from = tonumber(from) -- optional
    base = tonumber(base) -- if nil, use base from, adjusted below
    default = default or n -- optional, same as input if unspecified
	fmt = fmt or {} -- optional read-only table
	local prefix = fmt.prefix or ''
    local minus = fmt.minus or '-'
    local plus = fmt.plus or ''
	local infix = fmt.infix or ''
    local digits = fmt.digits or '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    local padzero = mw.ustring.sub(fmt.padzero or digits, 1, 1) or '0'
    local groupsep = fmt.groupsep or ' ' -- thin non-breaking space
    local groupby = tonumber(fmt.groupby) or 3 -- high groups, 2 in India, 4 in CJK
    local grouplo = tonumber(fmt.grouplo) or groupby -- low groups width, 3 in India
    local width = tonumber(fmt.width) or 0 -- minimum, may be larger
    local decimalsep = fmt.decimalsep or '.'
    local precision = tonumber(fmt.precision) -- minimum, may be nil
	local suffix = fmt.suffix or ''
    local num

    -- Capitalize all ASCII lowercase letters and strip all whitespaces, then
    -- check for a leading sign. Do this while the input is still in string form,
    -- because tonumber doesn't support signed numbers in non-10 bases.
    n, num = _normalizeNumber(n):gsub('^+', ''):gsub('^-', '')
    local sign = num > 0 and minus or plus
--[[TODO:
    -- Handle sign format in prefix+suffix pairs (e.g. parentheses vs. spaces)
--]]

    -- Strip off any leading '0[XOB]' (unless they are valid digits) or '#'
--[[TODO:
    -- Check number of hex digits (1, 2, 3, 4, 6 or 8) for RGB or RGBA, as in
    -- CSS, to properly return a normalized RGBA color with 8 digits, i.e.:
    -- * GRAY '#1'      is the same as RGBA '#111111FF' (non-standard),
    -- * GRAY '#12'     is the same as RGBA '#121212FF' (non-standard),
    -- * RGB  '#123'    is the same as RGBA '#112233FF',
    -- * RGBA '#1234'   is the same as RGBA '#11223344', and
    -- * RGB  '#123456' is the same as RGBA '#123456FF';
    -- * other lengths are invalid/ambiguous input.
    -- Need a special conversion of hex digits (in lengths 3 and 4) to multiply
    -- them by 0x11 and treat them in input base 256. Then when formatting the
    -- output in base 16, use smallest format if there's no width specified,
    -- or use width == 1, 3, 4, 6 or 8 for a fixed format in that base.
    -- For formatting in base 10, 100 or 256, use commas between channels, and
    -- treat output base 100 specially (using renormalized percentages).
    if not from or from == 256 then
        n, num = n:gsub('^#', '')
        if num > 0 then
            from = 256
        end
    end
--]]
    if not from or from == 16 then
        n, num = n:gsub('^0X', '') -- 'X' may be valid digit 33 in base from
        if num > 0 then
            from = 16
        end
    end
    if not from or from == 8 then
        n, num = n:gsub('^0O', '') -- 'O' may be valid digit 24 in base from
        if num > 0 then
            from = 8
        end
    end
    if not from or from == 2 then
        n, num = n:gsub('^0B', '') -- 'B' may be valid digit 11 in base from
        if num > 0 then
            from = 2
        end
    end
    from = from or 10 -- default input base if no prefix was matched

    -- Parse the input n in base from, compute a double in num.
--[[TODO: Need to extract exponent and evaluate the number in relevant base.
    if from == 10 then
        -- Handle scientific decimal notations '5.2e3' or '5200000e-3'.
        n = n:gsub('E[-+]?[0-9]+$', '') -- note: already capitalized above
    elseif from == 16 then -- prefixed by '0x' (detected and stripped above)
        -- Handle scientific hexadecimal notations '0xA.B01p8' or '0xAB01p-8'.
        n = n:gsub('P[-+]?[0-9]+$', '') -- note: already capitalized above
    end
]]
    num = tonumber(n, from)
    if not num then
        return default
    end
--[[TODO: Adjust with the extracted exponent.]]

    -- Handle parameters for output format.
--[[TODO:
   -- Handle special bases 100 and 256 for CSS colors.
--]]
    base = base and base >= 2 and base <= 36 and base
        or from -- Default base for output is the same base as for input.

    -- Decompose the number in parts.
--[[TODO:
    -- Decompose the exponent for the relevant output base if needed.
    -- Round number to the correct precision (using IEEE even rounding mode).
--]]
    -- Decompose the mantissa into integral and fractional parts.
    local i, f = math.modf(num)

    -- Format the integral part.
    local intPart = ''
    repeat
        num, i = i % base, math.floor(i / base)
        intPart = digits:sub(num + 1, num + 1) .. intPart
    until i == 0
    while #intPart < width do
        intPart = padzero .. intPart
    end

    -- Format the fractional part.
    fracPart = ''
    while f > 0 and #fracPart < (precision or 10) do
        num, f = math.modf(f * base)
        fracPart = fracPart .. mw.ustring.gsub(digits, num + 1, num + 1)
    end
    -- Add trailing zeros if needed, remove them otherwise.
    if precision then
        for num = 1, precision - #fracPart do
            fracPart = fracPart .. padzero
        end
    else
        fracPart = mw.ustring.gsub(fracPart, padzero .. '*$', '')
    end
    -- Add the radix point if needed.
    if #fracPart > 0 then
--[[TODO: Use a localizable fractional separator.]]
        fracPart = separator .. fracPart
    end

--[[TODO:
    -- Group digits in integral and fractional parts using a group separator.
    -- Format the exponent part.
    -- Adjust the sign notation in mantissa and exponent parts.
    -- Determine the relative position of each part (notably the sign).
--]]
    -- Return the composition of all parts.
    return prefix .. sign .. infix .. intPart .. fracPart .. suffix
end

function convert(frame)
    -- Allow for invocation via #invoke or directly from another module
    local args
    if frame == mw.getCurrentFrame() then
        args = frame.args
    else
        args = frame
    end
    return _convert(args.n, args.from, args.base, args.default, {
        prefix = args.prefix,
        minus = args.minus,
        plus = args.plus,
        infix = args.infix,
        digits = args.digits,
        padzero = args.padzero,
        groupby = args.groupby,
        grouplo = args.grouplo,
        width = args.width,
        decimalsep = args.decimalsep,
        precision = args.precision,
        suffix = args.suffix,
    })
end

-- Exports from this module.
return {
    -- For use in Mediawiki with #invoke:
    convert = convert,
    -- For use from Lua only, or local debugging and tests:
    _normalizeNumber = _normalizeNumber,
    _convert = _convert,
}