dotfiles/talon/community/community-cursorless-0.4.0/code/numbers.py
2024-11-16 20:27:38 -07:00

186 lines
7.4 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from talon import Context, Module, actions
from typing import List, Optional, Union, Iterator
mod = Module()
ctx = Context()
digits = "zero one two three four five six seven eight nine".split()
teens = "eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen".split()
tens = "ten twenty thirty forty fifty sixty seventy eighty ninety".split()
scales = "hundred thousand million billion trillion quadrillion quintillion sextillion septillion octillion nonillion decillion".split()
digits_map = {n: i for i, n in enumerate(digits)}
digits_map["oh"] = 0
teens_map = {n: i + 11 for i, n in enumerate(teens)}
tens_map = {n: 10 * (i + 1) for i, n in enumerate(tens)}
scales_map = {n: 10 ** (3 * (i+1)) for i, n in enumerate(scales[1:])}
scales_map["hundred"] = 100
numbers_map = digits_map.copy()
numbers_map.update(teens_map)
numbers_map.update(tens_map)
numbers_map.update(scales_map)
def parse_number(l: List[str]) -> str:
"""Parses a list of words into a number/digit string."""
l = list(scan_small_numbers(l))
for scale in scales:
l = parse_scale(scale, l)
return "".join(str(n) for n in l)
def scan_small_numbers(l: List[str]) -> Iterator[Union[str,int]]:
"""
Takes a list of number words, yields a generator of mixed numbers & strings.
Translates small number terms (<100) into corresponding numbers.
Drops all occurrences of "and".
Smashes digits onto tens words, eg. ["twenty", "one"] -> [21].
But note that "ten" and "zero" are excluded, ie:
["ten", "three"] -> [10, 3]
["fifty", "zero"] -> [50, 0]
Does nothing to scale words ("hundred", "thousand", "million", etc).
"""
# reversed so that repeated pop() visits in left-to-right order
l = [x for x in reversed(l) if x != "and"]
while l:
n = l.pop()
# fuse tens onto digits, eg. "twenty", "one" -> 21
if n in tens_map and n != "ten" and l and digits_map.get(l[-1], 0) != 0:
d = l.pop()
yield numbers_map[n] + numbers_map[d]
# turn small number terms into corresponding numbers
elif n not in scales_map:
yield numbers_map[n]
else:
yield n
def parse_scale(scale: str, l: List[Union[str,int]]) -> List[Union[str,int]]:
"""Parses a list of mixed numbers & strings for occurrences of the following
pattern:
<multiplier> <scale> <remainder>
where <scale> is a scale word like "hundred", "thousand", "million", etc and
multiplier and remainder are numbers or strings of numbers of the
appropriate size. For example:
parse_scale("hundred", [1, "hundred", 2]) -> [102]
parse_scale("thousand", [12, "thousand", 3, 45]) -> [12345]
We assume that all scales of lower magnitude have already been parsed; don't
call parse_scale("thousand") until you've called parse_scale("hundred").
"""
scale_value = scales_map[scale]
scale_digits = len(str(scale_value))
# Split the list on the desired scale word, then parse from left to right.
left, *splits = split_list(scale, l)
for right in splits:
# (1) Figure out the multiplier by looking to the left of the scale
# word. We ignore non-integers because they are scale words that we
# haven't processed yet; this strategy means that "thousand hundred"
# gets parsed as 1,100 instead of 100,000, but "hundred thousand" is
# parsed correctly as 100,000.
before = 1 # default multiplier
if left and isinstance(left[-1], int) and left[-1] != 0:
before = left.pop()
# (2) Absorb numbers to the right, eg. in [1, "thousand", 1, 26], "1
# thousand" absorbs ["1", "26"] to make 1,126. We pull numbers off
# `right` until we fill up the desired number of digits.
after = ""
while right and isinstance(right[0], int):
next = after + str(right[0])
if len(next) >= scale_digits: break
after = next
right.pop(0)
after = int(after) if after else 0
# (3) Push the parsed number into place, append whatever was left
# unparsed, and continue.
left.append(before * scale_value + after)
left.extend(right)
return left
def split_list(value, l: list) -> Iterator:
"""Splits a list by occurrences of a given value."""
start = 0
while True:
try: i = l.index(value, start)
except ValueError: break
yield l[start:i]
start = i+1
yield l[start:]
# # ---------- TESTS (uncomment to run) ----------
# def test_number(expected, string):
# print('testing:', string)
# l = list(scan_small_numbers(string.split()))
# print(" scan --->", l)
# for scale in scales:
# old = l
# l = parse_scale(scale, l)
# if scale in old: print(" parse -->", l)
# else: assert old == l, "parse_scale should do nothing if the scale does not occur in the list"
# result = "".join(str(n) for n in l)
# assert result == parse_number(string.split())
# assert str(expected) == result, f"parsing {string!r}, expected {expected}, got {result}"
# test_number(105000, "one hundred and five thousand")
# test_number(1000000, "one thousand thousand")
# test_number(1501000, "one million five hundred one thousand")
# test_number(1501106, "one million five hundred and one thousand one hundred and six")
# test_number(123, "one two three")
# test_number(123, "one twenty three")
# test_number(104, "ten four") # borderline, but valid in some dialects
# test_number(1066, "ten sixty six") # a common way of saying years
# test_number(1906, "nineteen oh six") # year
# test_number(2001, "twenty oh one") # year
# test_number(2020, "twenty twenty")
# test_number(1001, "one thousand one")
# test_number(1010, "one thousand ten")
# test_number(123456, "one hundred and twenty three thousand and four hundred and fifty six")
# test_number(123456, "one twenty three thousand four fifty six")
# ## failing (and somewhat debatable) tests from old numbers.py
# #test_number(10000011, "one million one one")
# #test_number(100001010, "one million ten ten")
# #test_number(1050006000, "one hundred thousand and five thousand and six thousand")
# ---------- CAPTURES ----------
alt_digits = "(" + ("|".join(digits_map.keys())) + ")"
alt_teens = "(" + ("|".join(teens_map.keys())) + ")"
alt_tens = "(" + ("|".join(tens_map.keys())) + ")"
alt_scales = "(" + ("|".join(scales_map.keys())) + ")"
number_word = "(" + "|".join(numbers_map.keys()) + ")"
# TODO: allow things like "double eight" for 88
@ctx.capture("digit_string", rule=f"({alt_digits} | {alt_teens} | {alt_tens})+")
def digit_string(m) -> str: return parse_number(list(m))
@ctx.capture("digits", rule="<digit_string>")
def digits(m) -> int:
"""Parses a phrase representing a digit sequence, returning it as an integer."""
return int(m.digit_string)
@mod.capture(rule=f"{number_word}+ (and {number_word}+)*")
def number_string(m) -> str:
"""Parses a number phrase, returning that number as a string."""
return parse_number(list(m))
@ctx.capture("number", rule="<user.number_string>")
def number(m) -> int:
"""Parses a number phrase, returning it as an integer."""
return int(m.number_string)
@ctx.capture("number_signed", rule=f"[negative|minus] <number>")
def number_signed(m):
number = m[-1]
return -number if (m[0] in ["negative", "minus"]) else number
@ctx.capture(
"number_small", rule=f"({alt_digits} | {alt_teens} | {alt_tens} [{alt_digits}])"
)
def number_small(m): return int(parse_number(list(m)))