samcarter
> This is part of the Summer of Code 2022 series, see https://topanswers.xyz/tex?q=2059 for more information
If the numbers 1 to 5 are written out in words: one, two, three, four, five, then there are 3 + 3 + 5 + 4 + 4 = 19 letters used in total.
If all the numbers from 1 to 1000 (one thousand) inclusive were written out in words, how many letters would be used?
*NOTE: Do not count spaces or hyphens. For example, 342 (three hundred and forty-two) contains 23 letters and 115 (one hundred and fifteen) contains 20 letters. The use of "and" when writing out numbers is in compliance with British usage.*
(this programming puzzle is taken from https://projecteuler.net/problem=17, licensed under CC BY-NC-SA 4.0)
![SoC.png](/image?hash=a73a61fd46bb89c13b84226efeea828f80458ccbd78b223a1c6b626f2d4f4765)
Top Answer
frougon
The following code uses:
* [fmtcount](https://ctan.org/pkg/fmtcount) to obtain the representation of integers using English words (British setup as asked in the question);
* `\regex_count:nnN` (actually, its variant `\regex_count:nVN`) to count the number of letters in the outputs from `fmtcount`.
# Code without comments
If you prefer reading code with a lot of comments, see below. :-)
```
\documentclass{article}
\usepackage[british]{fmtcount}
\usepackage{siunitx}
\ExplSyntaxOn
\cs_new_protected:Npn \sociii_set_to_repr_in_words:Nn #1#2
{
\storenumberstringnum { sociii@tmp@label } {#2} \scan_stop:
\tl_set:Nv #1 { @fcs@sociii@tmp@label }
}
\tl_new:N \l__sociii_stnli_text_tl
\cs_generate_variant:Nn \regex_count:nnN { nV }
\cs_new_protected:Npn \sociii_set_to_number_of_letters_in:Nn #1#2
{
\sociii_set_to_repr_in_words:Nn \l__sociii_stnli_text_tl {#2}
\regex_count:nVN { [a-z] } \l__sociii_stnli_text_tl #1
}
\int_new:N \l__sociii_stnolir_int
\cs_new_protected:Npn \sociii_set_to_number_of_letters_in_range:Nnn #1#2#3
{
\int_zero:N \l__sociii_stnolir_int
\int_step_inline:nnn {#2} {#3}
{
\sociii_set_to_number_of_letters_in:Nn \l_tmpa_int {##1}
\int_add:Nn \l__sociii_stnolir_int { \l_tmpa_int }
}
\tl_set:NV #1 \l__sociii_stnolir_int
}
\cs_new_eq:NN \setToNumberOfLettersOfAllIntsInRange
\sociii_set_to_number_of_letters_in_range:Nnn
\NewDocumentCommand \countLettersInRange { m m }
{
\sociii_set_to_number_of_letters_in_range:Nnn \l_tmpa_tl {#1} {#2}
\tl_use:N \l_tmpa_tl
}
\ExplSyntaxOff
\begin{document}
\countLettersInRange{1}{5}\par\medskip
\setToNumberOfLettersOfAllIntsInRange{\myTmp}{1}{1000}%
There are \num{\myTmp}~letters in the representation in words of all integers
between 1 and~1000 (both inclusive).
\end{document}
```
![image.png](/image?hash=10028364b563e674a6ee88159ce0c32f6390b2832cbe7d9bb3b2997ee8c0ad82)
# Same code with a bunch of comments
```
\documentclass{article}
\usepackage[british]{fmtcount}
\usepackage{siunitx}
\ExplSyntaxOn
% Set #1 to the representation of integer #2 in words (according to the
% language setup currently active for fmtcount.sty).
%
% #1: tl var (macro) where to store the result
% #2: an integer denotation (i.e., an integer written as a sequence of
% explicit character tokens representing the digits).
\cs_new_protected:Npn \sociii_set_to_repr_in_words:Nn #1#2
{
\storenumberstringnum { sociii@tmp@label } {#2} \scan_stop:
% fmtcount.sty doesn't seem to provide anything to store the result in a
% user-chosen macro, so let's retrieve the result as \FMCuse does.
\tl_set:Nv #1 { @fcs@sociii@tmp@label }
}
\tl_new:N \l__sociii_stnli_text_tl
\cs_generate_variant:Nn \regex_count:nnN { nV }
% Set #1 to the number of letters in the representation of integer #2 using
% words.
%
% #1: int var where to store the result
% #2: an integer denotation
\cs_new_protected:Npn \sociii_set_to_number_of_letters_in:Nn #1#2
{
\sociii_set_to_repr_in_words:Nn \l__sociii_stnli_text_tl {#2}
\regex_count:nVN { [a-z] } \l__sociii_stnli_text_tl #1
}
\int_new:N \l__sociii_stnolir_int
% Set #1 to the number of letters in the representation in words of all
% integers in the interval [#2, #3].
%
% #1: tl var where to store the result
% #2: first element of the range (an ⟨integer expression⟩)
% #3: last element of the range (an ⟨integer expression⟩)
\cs_new_protected:Npn \sociii_set_to_number_of_letters_in_range:Nnn #1#2#3
{
\int_zero:N \l__sociii_stnolir_int
\int_step_inline:nnn {#2} {#3}
{
\sociii_set_to_number_of_letters_in:Nn \l_tmpa_int {##1}
\int_add:Nn \l__sociii_stnolir_int { \l_tmpa_int }
}
% Store the standard decimal representation of \l__sociii_stnolir_int in
% macro #1.
\tl_set:NV #1 \l__sociii_stnolir_int
}
\cs_new_eq:NN \setToNumberOfLettersOfAllIntsInRange
\sociii_set_to_number_of_letters_in_range:Nnn
\NewDocumentCommand \countLettersInRange { m m }
{
\sociii_set_to_number_of_letters_in_range:Nnn \l_tmpa_tl {#1} {#2}
\tl_use:N \l_tmpa_tl
}
\ExplSyntaxOff
\begin{document}
\countLettersInRange{1}{5}\par\medskip
\setToNumberOfLettersOfAllIntsInRange{\myTmp}{1}{1000}%
There are \num{\myTmp}~letters in the representation in words of all integers
between 1 and~1000 (both inclusive).
\end{document}
```
# Faster implementation
As Skillmon [noted](https://topanswers.xyz/transcript?room=2126&id=142189#c142189), instead of:
```
\regex_count:nVN { [a-z] } \l__sociii_stnli_text_tl #1
```
in the replacement text of `\sociii_set_to_number_of_letters_in:Nn`, one can use:
```
\tl_remove_all:Nn \l__sociii_stnli_text_tl { \ }
\tl_remove_all:Nn \l__sociii_stnli_text_tl { - }
\tl_remove_all:Nn \l__sociii_stnli_text_tl { \relax }
\int_set:Nn #1 { \tl_count:N \l__sociii_stnli_text_tl }
```
which is much faster. This counts all items in the `tl` var except those we removed, namely control spaces, hyphens and `\relax` tokens. We need to remove `\relax` tokens in this approach because `fmtcount` is cheeky:
```
\documentclass{article}
\usepackage[british]{fmtcount}
\begin{document}
\storenumberstringnum{test}{10}
\expandafter\show\csname @fcs@test\endcsname
\end{document}
```
which prints:
```
> \@fcs@test=macro:
->\relax ten.
<recently read> \@fcs@test
l.6 \expandafter\show\csname @fcs@test\endcsname
```
# Remark on `\tl_set:Nv`
The `\tl_set:Nv #1 { @fcs@sociii@tmp@label }` could also be done with `\tl_set_eq:NN #1 \@fcs@sociii@tmp@label`, however this would require us to use `\makeatletter`, and more importantly would cause the `tl` var given as `#1` to inherit attributes like `\protected` or `\long` from `\@fcs@sociii@tmp@label` that might be set by the `fmtcount` package. With `\tl_set:Nv`, this can't happen:
```
\documentclass{article}
\ExplSyntaxOn
% \protected
\protected\def\zzz{abc}
\tl_set_eq:NN \l_tmpa_tl \zzz
\cs_show:N \l_tmpa_tl
\tl_set:Nv \l_tmpa_tl { zzz }
\cs_show:N \l_tmpa_tl
% \long
\long\def\ZZZ{def}
\tl_set_eq:NN \l_tmpb_tl \ZZZ
\cs_show:N \l_tmpb_tl
\tl_set:Nv \l_tmpb_tl { ZZZ }
\cs_show:N \l_tmpb_tl
\ExplSyntaxOff
\begin{document}
\end{document}
```
which prints to the terminal:
```
> \l_tmpa_tl=\protected macro:->abc.
<recently read> }
l.8 \cs_show:N \l_tmpa_tl
> \l_tmpa_tl=macro:->abc.
<recently read> }
l.11 \cs_show:N \l_tmpa_tl
> \l_tmpb_tl=\long macro:->def.
<recently read> }
l.17 \cs_show:N \l_tmpb_tl
> \l_tmpb_tl=macro:->def.
<recently read> }
l.20 \cs_show:N \l_tmpb_tl
```
Answer #2
samcarter
*No spoiler*
I learned about property lists and used them to actually print all the numbers as words.
Then I tried to add a counter for the number of the characters, a bit clumsy because it repeats tons of code, but in the end, it gave a result...
```
\documentclass{article}
\pagestyle{empty}
\begin{document}
If the numbers 1 to 5 are written out in words: one, two, three, four, five, then there are 3 + 3 + 5 + 4 + 4 = 19 letters used in total.
If all the numbers from 1 to 1000 (one thousand) inclusive were written out in words, how many letters would be used?
NOTE: Do not count spaces or hyphens. For example, 342 (three hundred and forty-two) contains 23 letters and 115 (one hundred and fifteen) contains 20 letters. The use of “and” when writing out numbers is in compliance with British usage.
\ExplSyntaxOn
\prop_new:N \l_sam_names_prop
\prop_set_from_keyval:Nn \l_sam_names_prop {
0=zero,
1=one,
2=two,
3=three,
4=four,
5=five,
6=six,
7=seven,
8=eight,
9=nine,
10=ten,
11=eleven,
12=twelve,
13=thirteen,
14=fourteen,
15=fifteen,
16=sixteen,
17=seventeen,
18=eighteen,
19=nineteen,
20=twenty,
30=thirty,
40=forty,
50=fifty,
60=sixty,
70=seventy,
80=eighty,
90=ninety,
100=hundred,
1000=thousand,
}
\cs_generate_variant:Nn \prop_item:Nn { NV }
\int_new:N \l_sam_onedigit_int
\int_new:N \l_sam_hundigit_int
\int_new:N \l_sam_thodigit_int
\int_new:N \l_sam_tens_int
\int_new:N \l_sam_sum_int
\int_new:N \l_sam_lastsum_int
\tl_new:N \l_sam_string_tl
\int_new:N \l_sam_loop_int
\int_step_inline:nn {1000}
{
\int_set:Nn \l_sam_loop_int { #1 }
\c_space_tl
\int_eval:n { \l_sam_sum_int - \l_sam_lastsum_int } \par
\int_use:N \l_sam_loop_int
\c_space_tl : \c_space_tl
\int_set:Nn \l_sam_lastsum_int { \l_sam_sum_int }
% if 1000 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\int_compare:nNnTF { \l_sam_loop_int } > { 999 }
{
% get first digit of thousand
\int_set:Nn \l_sam_thodigit_int
{
\int_div_truncate:nn {\l_sam_loop_int} {1000}
}
\prop_item:NV \l_sam_names_prop \l_sam_thodigit_int
% counting letters and adding them
\tl_set:Nx \l_sam_string_tl { \prop_item:NV \l_sam_names_prop \l_sam_thodigit_int }
\int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }
\c_space_tl
\prop_item:Nn \l_sam_names_prop { 1000 }
% counting letters and adding them
\tl_set:Nx \l_sam_string_tl { \prop_item:Nn \l_sam_names_prop { 1000 } }
\int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }
\c_space_tl
}{
% if >= 100 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\int_compare:nNnT { \l_sam_loop_int } > { 99 }
{
% get first digit of hundreds
\int_set:Nn \l_sam_hundigit_int
{
\int_div_truncate:nn {\l_sam_loop_int} {100}
}
\prop_item:NV \l_sam_names_prop \l_sam_hundigit_int
% counting letters and adding them
\tl_set:Nx \l_sam_string_tl { \prop_item:NV \l_sam_names_prop \l_sam_hundigit_int }
\int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }
\c_space_tl
\prop_item:Nn \l_sam_names_prop { 100 }
% counting letters and adding them
\tl_set:Nx \l_sam_string_tl { \prop_item:Nn \l_sam_names_prop { 100 } }
\int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }
\c_space_tl
% getting just tens and ones part of the loop variable
\int_set:Nn \l_sam_loop_int { #1 - \l_sam_hundigit_int * 100 }
\int_compare:nNnT { \l_sam_loop_int } > { 0 }
{
and\c_space_tl
% counting letters and adding them
\int_add:Nn \l_sam_sum_int { 3 }
}
}% if >= 100
% tens and one
\int_compare:nNnTF { \l_sam_loop_int } > { 19 }
{ % >= 20
\int_set:Nn \l_sam_onedigit_int { \int_mod:nn {\l_sam_loop_int} {10} }
\int_set:Nn \l_sam_tens_int { \l_sam_loop_int - \l_sam_onedigit_int }
\prop_item:NV \l_sam_names_prop \l_sam_tens_int
% counting letters and adding them
\tl_set:Nx \l_sam_string_tl { \prop_item:NV \l_sam_names_prop \l_sam_tens_int }
\int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }
\c_space_tl
}
{% special cases for <20
\int_set:Nn \l_sam_onedigit_int { \l_sam_loop_int }
}
\int_compare:nNnT { \l_sam_onedigit_int } > { 0 }
{
\prop_item:NV \l_sam_names_prop \l_sam_onedigit_int
% counting letters and adding them
\tl_set:Nx \l_sam_string_tl { \prop_item:NV \l_sam_names_prop \l_sam_onedigit_int }
\int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }
}
} % if 1000
}
\par
\int_use:N \l_sam_sum_int
\ExplSyntaxOff
\end{document}
```