> This is part of the Summer of Code 2022 series, see https://topanswers.xyz/tex?q=2059 for more information

If the numbers 1 to 5 are written out in words: one, two, three, four, five, then there are 3 + 3 + 5 + 4 + 4 = 19 letters used in total.

If all the numbers from 1 to 1000 (one thousand) inclusive were written out in words, how many letters would be used?

*NOTE: Do not count spaces or hyphens. For example, 342 (three hundred and forty-two) contains 23 letters and 115 (one hundred and fifteen) contains 20 letters. The use of "and" when writing out numbers is in compliance with British usage.*

(this programming puzzle is taken from https://projecteuler.net/problem=17, licensed under CC BY-NC-SA 4.0)

![SoC.png](/image?hash=a73a61fd46bb89c13b84226efeea828f80458ccbd78b223a1c6b626f2d4f4765)
The following code uses:

* [fmtcount](https://ctan.org/pkg/fmtcount) to obtain the representation of integers using English words (British setup as asked in the question);

* \regex_count:nnN (actually, its variant \regex_count:nVN) to count the number of letters in the outputs from fmtcount.

If you prefer reading code with a lot of comments, see below. :-)


\documentclass{article}
\usepackage[british]{fmtcount}
\usepackage{siunitx}

\ExplSyntaxOn

\cs_new_protected:Npn \sociii_set_to_repr_in_words:Nn #1#2
{
\storenumberstringnum { sociii@tmp@label } {#2} \scan_stop:
\tl_set:Nv #1 { @fcs@sociii@tmp@label }
}

\tl_new:N \l__sociii_stnli_text_tl
\cs_generate_variant:Nn \regex_count:nnN { nV }

\cs_new_protected:Npn \sociii_set_to_number_of_letters_in:Nn #1#2
{
\sociii_set_to_repr_in_words:Nn \l__sociii_stnli_text_tl {#2}
\regex_count:nVN { [a-z] } \l__sociii_stnli_text_tl #1
}

\int_new:N \l__sociii_stnolir_int

\cs_new_protected:Npn \sociii_set_to_number_of_letters_in_range:Nnn #1#2#3
{
\int_zero:N \l__sociii_stnolir_int

\int_step_inline:nnn {#2} {#3}
{
\sociii_set_to_number_of_letters_in:Nn \l_tmpa_int {##1}
}

\tl_set:NV #1 \l__sociii_stnolir_int
}

\cs_new_eq:NN \setToNumberOfLettersOfAllIntsInRange
\sociii_set_to_number_of_letters_in_range:Nnn

\NewDocumentCommand \countLettersInRange { m m }
{
\sociii_set_to_number_of_letters_in_range:Nnn \l_tmpa_tl {#1} {#2}
\tl_use:N \l_tmpa_tl
}

\ExplSyntaxOff

\begin{document}

\countLettersInRange{1}{5}\par\medskip

\setToNumberOfLettersOfAllIntsInRange{\myTmp}{1}{1000}%
There are \num{\myTmp}~letters in the representation in words of all integers
between 1 and~1000 (both inclusive).

\end{document}


# Same code with a bunch of comments


\documentclass{article}
\usepackage[british]{fmtcount}
\usepackage{siunitx}

\ExplSyntaxOn

% Set #1 to the representation of integer #2 in words (according to the
% language setup currently active for fmtcount.sty).
%
% #1: tl var (macro) where to store the result
% #2: an integer denotation (i.e., an integer written as a sequence of
%     explicit character tokens representing the digits).
\cs_new_protected:Npn \sociii_set_to_repr_in_words:Nn #1#2
{
\storenumberstringnum { sociii@tmp@label } {#2} \scan_stop:
% fmtcount.sty doesn't seem to provide anything to store the result in a
% user-chosen macro, so let's retrieve the result as \FMCuse does.
\tl_set:Nv #1 { @fcs@sociii@tmp@label }
}

\tl_new:N \l__sociii_stnli_text_tl
\cs_generate_variant:Nn \regex_count:nnN { nV }

% Set #1 to the number of letters in the representation of integer #2 using
% words.
%
% #1: int var where to store the result
% #2: an integer denotation
\cs_new_protected:Npn \sociii_set_to_number_of_letters_in:Nn #1#2
{
\sociii_set_to_repr_in_words:Nn \l__sociii_stnli_text_tl {#2}
\regex_count:nVN { [a-z] } \l__sociii_stnli_text_tl #1
}

\int_new:N \l__sociii_stnolir_int

% Set #1 to the number of letters in the representation in words of all
% integers in the interval [#2, #3].
%
% #1: tl var where to store the result
% #2: first element of the range (an ⟨integer expression⟩)
% #3: last element of the range (an ⟨integer expression⟩)
\cs_new_protected:Npn \sociii_set_to_number_of_letters_in_range:Nnn #1#2#3
{
\int_zero:N \l__sociii_stnolir_int

\int_step_inline:nnn {#2} {#3}
{
\sociii_set_to_number_of_letters_in:Nn \l_tmpa_int {##1}
}

% Store the standard decimal representation of \l__sociii_stnolir_int in
% macro #1.
\tl_set:NV #1 \l__sociii_stnolir_int
}

\cs_new_eq:NN \setToNumberOfLettersOfAllIntsInRange
\sociii_set_to_number_of_letters_in_range:Nnn

\NewDocumentCommand \countLettersInRange { m m }
{
\sociii_set_to_number_of_letters_in_range:Nnn \l_tmpa_tl {#1} {#2}
\tl_use:N \l_tmpa_tl
}

\ExplSyntaxOff

\begin{document}

\countLettersInRange{1}{5}\par\medskip

\setToNumberOfLettersOfAllIntsInRange{\myTmp}{1}{1000}%
There are \num{\myTmp}~letters in the representation in words of all integers
between 1 and~1000 (both inclusive).

\end{document}


# Faster implementation


\regex_count:nVN { [a-z] } \l__sociii_stnli_text_tl #1


in the replacement text of \sociii_set_to_number_of_letters_in:Nn, one can use:


\tl_remove_all:Nn \l__sociii_stnli_text_tl { \ }
\tl_remove_all:Nn \l__sociii_stnli_text_tl { - }
\tl_remove_all:Nn \l__sociii_stnli_text_tl { \relax }
\int_set:Nn #1 { \tl_count:N \l__sociii_stnli_text_tl }


which is much faster. This counts all items in the tl var except those we removed, namely control spaces, hyphens and \relax tokens. We need to remove \relax tokens in this approach because fmtcount is cheeky:


\documentclass{article}
\usepackage[british]{fmtcount}

\begin{document}
\storenumberstringnum{test}{10}
\expandafter\show\csname @fcs@test\endcsname
\end{document}


which prints:


> \@fcs@test=macro:
->\relax ten.

l.6 \expandafter\show\csname @fcs@test\endcsname


# Remark on \tl_set:Nv

The \tl_set:Nv #1 { @fcs@sociii@tmp@label } could also be done with \tl_set_eq:NN #1 \@fcs@sociii@tmp@label, however this would require us to use \makeatletter, and more importantly would cause the tl var given as #1 to inherit attributes like \protected or \long from \@fcs@sociii@tmp@label that might be set by the fmtcount package. With \tl_set:Nv, this can't happen:


\documentclass{article}

\ExplSyntaxOn
% \protected
\protected\def\zzz{abc}

\tl_set_eq:NN \l_tmpa_tl \zzz
\cs_show:N \l_tmpa_tl

\tl_set:Nv \l_tmpa_tl { zzz }
\cs_show:N \l_tmpa_tl

% \long
\long\def\ZZZ{def}

\tl_set_eq:NN \l_tmpb_tl \ZZZ
\cs_show:N \l_tmpb_tl

\tl_set:Nv \l_tmpb_tl { ZZZ }
\cs_show:N \l_tmpb_tl
\ExplSyntaxOff

\begin{document}
\end{document}


which prints to the terminal:


> \l_tmpa_tl=\protected macro:->abc.

l.8 \cs_show:N \l_tmpa_tl

> \l_tmpa_tl=macro:->abc.

l.11 \cs_show:N \l_tmpa_tl

> \l_tmpb_tl=\long macro:->def.

l.17 \cs_show:N \l_tmpb_tl

> \l_tmpb_tl=macro:->def.

l.20 \cs_show:N \l_tmpb_tl

*No spoiler*

I learned about property lists and used them to actually print all the numbers as words.

Then I tried to add a counter for the number of the characters, a bit clumsy because it repeats tons of code, but in the end, it gave a result...

\documentclass{article}

\pagestyle{empty}

\begin{document}

If the numbers 1 to 5 are written out in words: one, two, three, four, five, then there are 3 + 3 + 5 + 4 + 4 = 19 letters used in total.

If all the numbers from 1 to 1000 (one thousand) inclusive were written out in words, how many letters would be used?

NOTE: Do not count spaces or hyphens. For example, 342 (three hundred and forty-two) contains 23 letters and 115 (one hundred and fifteen) contains 20 letters. The use of “and” when writing out numbers is in compliance with British usage.

\ExplSyntaxOn

\prop_new:N \l_sam_names_prop
\prop_set_from_keyval:Nn \l_sam_names_prop {
0=zero,
1=one,
2=two,
3=three,
4=four,
5=five,
6=six,
7=seven,
8=eight,
9=nine,
10=ten,
11=eleven,
12=twelve,
13=thirteen,
14=fourteen,
15=fifteen,
16=sixteen,
17=seventeen,
18=eighteen,
19=nineteen,
20=twenty,
30=thirty,
40=forty,
50=fifty,
60=sixty,
70=seventy,
80=eighty,
90=ninety,
100=hundred,
1000=thousand,
}

\cs_generate_variant:Nn \prop_item:Nn { NV }

\int_new:N \l_sam_onedigit_int
\int_new:N \l_sam_hundigit_int
\int_new:N \l_sam_thodigit_int
\int_new:N \l_sam_tens_int
\int_new:N \l_sam_sum_int
\int_new:N \l_sam_lastsum_int
\tl_new:N \l_sam_string_tl

\int_new:N \l_sam_loop_int
\int_step_inline:nn {1000}
{

\int_set:Nn \l_sam_loop_int { #1 }
\c_space_tl
\int_eval:n { \l_sam_sum_int - \l_sam_lastsum_int } \par
\int_use:N \l_sam_loop_int
\c_space_tl : \c_space_tl

\int_set:Nn \l_sam_lastsum_int { \l_sam_sum_int }

% if 1000 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\int_compare:nNnTF { \l_sam_loop_int } > { 999 }
{
% get first digit of thousand
\int_set:Nn \l_sam_thodigit_int
{
\int_div_truncate:nn {\l_sam_loop_int} {1000}
}

\prop_item:NV \l_sam_names_prop \l_sam_thodigit_int

% counting letters and adding them
\tl_set:Nx \l_sam_string_tl { \prop_item:NV \l_sam_names_prop \l_sam_thodigit_int }
\int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }

\c_space_tl
\prop_item:Nn \l_sam_names_prop { 1000 }

% counting letters and adding them
\tl_set:Nx \l_sam_string_tl { \prop_item:Nn \l_sam_names_prop { 1000 } }
\int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }

\c_space_tl
}{

% if >= 100 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\int_compare:nNnT { \l_sam_loop_int } > { 99 }
{

% get first digit of hundreds
\int_set:Nn \l_sam_hundigit_int
{
\int_div_truncate:nn {\l_sam_loop_int} {100}
}
\prop_item:NV \l_sam_names_prop  \l_sam_hundigit_int

% counting letters and adding them
\tl_set:Nx \l_sam_string_tl { \prop_item:NV \l_sam_names_prop \l_sam_hundigit_int }
\int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }

\c_space_tl
\prop_item:Nn \l_sam_names_prop { 100 }

% counting letters and adding them
\tl_set:Nx \l_sam_string_tl { \prop_item:Nn \l_sam_names_prop { 100 } }
\int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }

\c_space_tl

% getting just tens and ones part of the loop variable
\int_set:Nn \l_sam_loop_int { #1 - \l_sam_hundigit_int * 100 }
\int_compare:nNnT { \l_sam_loop_int } > { 0 }
{
and\c_space_tl
% counting letters and adding them
}

}% if >= 100

% tens and one
\int_compare:nNnTF { \l_sam_loop_int } > { 19 }
{ % >= 20
\int_set:Nn \l_sam_onedigit_int { \int_mod:nn {\l_sam_loop_int} {10} }
\int_set:Nn \l_sam_tens_int { \l_sam_loop_int - \l_sam_onedigit_int }
\prop_item:NV \l_sam_names_prop \l_sam_tens_int

% counting letters and adding them
\tl_set:Nx \l_sam_string_tl { \prop_item:NV \l_sam_names_prop \l_sam_tens_int }
\int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }
\c_space_tl
}
{% special cases for  <20
\int_set:Nn \l_sam_onedigit_int { \l_sam_loop_int }
}

\int_compare:nNnT { \l_sam_onedigit_int } > { 0 }
{
\prop_item:NV \l_sam_names_prop \l_sam_onedigit_int

% counting letters and adding them
\tl_set:Nx \l_sam_string_tl { \prop_item:NV \l_sam_names_prop \l_sam_onedigit_int }
\int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }

}

} % if 1000
}

\par
\int_use:N \l_sam_sum_int

\ExplSyntaxOff

\end{document}


Enter question or answer id or url (and optionally further answer ids/urls from the same question) from

Separate each id/url with a space. No need to list your own answers; they will be imported automatically.