I tried to find a solution to convert spelled-out numeric values (like twenty-five, or one hundred) into real numbers (25, 100) and came up with this solution.
At this moment, code recognizes spelled numeric values up to 4 words, up to “hundreds of thousands”.
In future I will try to increase the word number and values like millions and billions.
Here is the Excel VBA code, with comments explaining what is happening where.
'Words to Numbers converter
'Limitations exists. Converts spelled numbers up to 4 words into digits
'
'30/04/2020
'Author: Umidjon Rakhmonberdiev (c)
'Not for distribution/sale/sharing
Function get_number(mstr)
singles = Array("a", "half", "ten") 'these never combined
singles_num = Array(1, 0.5, 10)
ones = Array("one", "two", "three", "four", "five", "six", "seven", "eight", "nine")
ones_num = Array(1, 2, 3, 4, 5, 6, 7, 8, 9)
teens = Array("eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen")
teens_num = Array(11, 12, 13, 14, 15, 16, 17, 18, 19)
tys = Array("twenty", "thirty", "fourty", "fifty", "sixty", "seventy", "eighty", "ninety")
tys_num = Array(20, 30, 40, 50, 60, 70, 80, 90)
'mstr = "converting following number - three thousand sixty-five"
'mstr = "converting following number - five hundred twenty five"
'split into words array
arr = Split(mstr, " ")
get_number = -1
'what is being parsed (other combinations, for example, millions or billions are not parsed:
'two - 2+
'twenty - 20+
'twenty two - 22
'two hundred - 200 +
'two hundred twelve- 212
'two hundred twenty - 220 +
'two hundred twenty two - 222
'two thousand - 2000 +
'two thousand two - 2002
'two thousand twenty two - 2022
'two thousand two hundred twenty two - 2222
'LOGIC
'1st word to the left can be all
' - if 1st word is singles no further parsing (a, ten, half)
' still second work can be "half a kilo", worth to check
' - if 1st word is ONES (two)
' second word can be TYS(twenty two - 22), or HUNDRED (one hundred two - 102) or THOUSAND (one thousand two - 1,002)
' - if 1st word is TEENS (13)
' second word can be or HUNDRED (one hundred thirteen - 113) or THOUSAND (one thousand thirteen - 1,013)
' - if 1st word is TYS (20)
' second word can be or HUNDRED (one hundred twenty - 120) or THOUSAND (one thousand twenty - 1,020)
' - if 1st world is HUNDRED
' second word can be only ONES (two hundred - 200)
' - if 1st world is THOUSAND
' seond word can be ONES (two thousand - 2,000) or HUNDRED (one hundred thousand 100,000)
'parsing 3rd word should be recursive (repeat of all above)
'iterate through words array in reverse order - from last to first
word_index = UBound(arr) 'start from last word
parsed_value = -1
'is last word is single world number?
For i = 0 To UBound(singles)
If singles(i) = arr(word_index) Then
'if it is HALF, check if it is smth line "TWO AND A HALF"
If arr(word_index) = "half" And arr(word_index - 1) = "a" And arr(word_index - 2) = "and" Then
If IsNumeric(arr(word_index - 3)) Then
'there are cases written like 25 and half a ton
parsed_value = arr(word_index - 3) + 0.5
Else
For j = 0 To UBound(ones)
If ones(j) = arr(word_index - 3) Then
parsed_value = ones_num(j) + 0.5
GoTo end_of_code
End If
Next j
End If
End If
parsed_value = singles_num(i)
GoTo end_of_code
End If
Next i
'check first word in ONES
For i = 0 To UBound(ones) 'search through ones
If ones(i) = arr(word_index) Then '1st word found and it is ONES, for example it is FIVE
parsed_value = ones_num(i)
If UBound(arr) = 0 Then
'there is no second word
GoTo end_of_code
Else
'go and read second word
GoTo second_word
End If
End If
Next i
'check first word in TEENS
For i = 0 To UBound(teens)
If teens(i) = arr(word_index) Then '1st word found and it is TEENS, for example it is FIVETEEN
parsed_value = teens_num(i)
If UBound(arr) = 0 Then
'there is no second word
GoTo end_of_code
Else
'go and read second word
GoTo second_word
End If
End If
Next i
'check first word in TYS
For i = 0 To UBound(tys)
If tys(i) = arr(word_index) Then '1st word found and it is TYS, for example it is FIFTY
parsed_value = tys_num(i)
If UBound(arr) = 0 Then
'there is no second word
GoTo end_of_code
Else
'go and read second word
GoTo second_word
End If
End If
Next i
'check if first word is HUNDRED
If arr(word_index) = "hundred" Then
parsed_value = 100
If UBound(arr) = 0 Then
'there is no second word
GoTo end_of_code
Else
'go and read second word
GoTo second_word
End If
ElseIf arr(word_index) = "thousand" Then
parsed_value = 1000
If UBound(arr) = 0 Then
'there is no second word
GoTo end_of_code
Else
'go and read second word
GoTo second_word
End If
End If
'finished analysing first word
'if first word not found, then jump to end of code without parsing second word, no point
If parsed_value < 0 Then GoTo end_of_code
second_word: 'start analysing second word
word_index = word_index - 1
second_found = False
'scenarios:
'1. if 1st word was in ONES
' + a) see in TYS to catch FIFTY FIVE '#### difference here #################
' b) check word HUNDRED to catch HUNDRED FIVE (105)
' c) check word THOUSAND to catch THOUSAND FIVE (1005)
If parsed_value < 10 Then
'1st word was in ONES
'check first word in TYS
For i = 0 To UBound(tys)
If tys(i) = arr(word_index) Then '1st word found and it is TYS, for example it is FIFTY
parsed_value = tys_num(i) + parsed_value
second_found = True
If UBound(arr) = 1 Then
GoTo end_of_code 'there is no 3rd word
Else
GoTo third_word 'go and read 3rd word
End If
End If
Next i
End If
'2. if 1st word was in TEENS
' a) check word HUNDRED to catch HUNDRED FIVETEEN (115)
' b) check word THOUSAND to catch THOUSAND FIVETEEN (1015)
'3. if 1st word was in TYS
' a) check word HUNDRED to catch HUNDRED FIFTY(150)
' b) check word THOUSAND to catch THOUSAND FIFTY (1050)
If arr(word_index) = "hundred" Then
If parsed_value = 1000 Then
parsed_value = 100 * parsed_value
Else
parsed_value = 100 + parsed_value
End If
second_found = True
If UBound(arr) = 1 Then
GoTo end_of_code 'there is no 3rd word
Else
GoTo third_word 'go and read 3rd word
End If
ElseIf arr(word_index) = "thousand" Then
parsed_value = 1000 + parsed_value
second_found = True
If UBound(arr) = 1 Then
GoTo end_of_code 'there is no 3rd word
Else
GoTo third_word 'go and read 3rd word
End If
End If
'4. if 1st word was HUNDRED:
' a) ONES to catch FIVE HUNDRED
' b) TEENS to catch FIFTEEN HUNDRED
'5. if 1st word was THOUSAND
' a) ONES to catch FIVE THOUSAND
' b) TEENS to catch FIFTEEN THOUSAND
' c) TYS to catch FIFTY THOUSAND
' d) check word HUNDRED to catch HUNDRED THOUSAND
If parsed_value = 100 Or parsed_value = 1000 Then
'first word was explicitly written HUNDRED or THOUSAND
'check 2nd word in ONES
For i = 0 To UBound(ones) 'search through ones
If ones(i) = arr(word_index) Then '2nd word found and it is ONES, for example it is FIVE
parsed_value = parsed_value * ones_num(i)
second_found = True
If UBound(arr) = 1 Then
GoTo end_of_code 'there is no 3rd word
Else
GoTo third_word 'go and read 3rd word
End If
End If
Next i
'check 2nd word in TEENS
For i = 0 To UBound(teens)
If teens(i) = arr(word_index) Then '1st word found and it is TEENS, for example it is FIVETEEN
parsed_value = parsed_value * teens_num(i)
second_found = True
If UBound(arr) = 1 Then
GoTo end_of_code 'there is no 3rd word
Else
GoTo third_word 'go and read 3rd word
End If
End If
Next i
'check 2nd word in TYS - twenty thousand
For i = 0 To UBound(tys)
If tys(i) = arr(word_index) Then
parsed_value = parsed_value * tys_num(i)
second_found = True
If UBound(arr) = 1 Then
GoTo end_of_code 'there is no 3rd word
Else
GoTo third_word 'go and read 3rd word
End If
End If
Next i
End If
If Not second_found Then GoTo end_of_code 'second word not found, jump to end of code
third_word:
third_word_found = False
word_index = word_index - 1
'analysing third word
'scenario
'so far we had two word number like
'1. twenty five, >20 and <100
' in this case third word can be only hundred or thousand
' and we proceed to 4th word
If parsed_value > 20 And parsed_value < 100 Then
If arr(word_index) = "hundred" Then
parsed_value = 100 + parsed_value
third_word_found = True
If UBound(arr) = 2 Then
GoTo end_of_code 'there is no 4th word
Else
GoTo fourth_word 'jump to 4th word analysis section
End If
ElseIf arr(word_index) = "thousand" Then
parsed_value = 1000 + parsed_value
third_word_found = True
If UBound(arr) = 2 Then
GoTo end_of_code 'there is no 4th word
Else
GoTo fourth_word 'jump to 4th word analysis section
End If
End If
End If
'2. hundred one or thirteen hundred, >100 and <1000
' in this case third word can be:
' a) ONES (TWO hundred one or TWO hundred thirteen
' AND we will stop here
If parsed_value > 100 And parsed_value < 1000 Then
For i = 0 To UBound(ones)
If ones(i) = arr(word_index) Then
parsed_value = ones_num(i) * 100 + (parsed_value - 100)
third_word_found = True
GoTo end_of_code
End If
Next i
End If
'3. thousand eleven or hundred thousand, >1000
' in this case third word can be:
' a) ONES (TWO thousand one or TWO thousand thirteen
'AND we will stop here
If parsed_value > 1000 Then
For i = 0 To UBound(ones)
If ones(i) = arr(word_index) Then
parsed_value = ones_num(i) * 1000 + (parsed_value - 1000)
third_word_found = True
GoTo end_of_code
End If
Next i
End If
If Not third_word_found Then GoTo end_of_code 'if 3rd word not found then we go to end of code
fourth_word:
word_index = word_index - 1
fourth_word_found = False
'we have: smth like "thousand twenty five" or "hundred twenty five"
'scenarios:
'1. two hundred/thousand twenty five - 225/2,025
For i = 0 To UBound(ones)
If ones(i) = arr(word_index) Then
If parsed_value < 1000 Then
parsed_value = ones_num(i) * 100 + (parsed_value - 100)
fourth_word_found = True
GoTo end_of_code
Else
parsed_value = ones_num(i) * 1000 + (parsed_value - 1000)
fourth_word_found = True
GoTo end_of_code
End If
End If
Next i
'2. twenty thousand twenty five - 20,025
'3. hundred thousand twenty five - 100,025 - can be further anlysed in the future to fine 200,025 numbers
end_of_code:
'end of code finish things here
If parsed_value > 0 Then get_number = parsed_value
End Function
Feel free to ask me any questions in contacts page.