SoC Day 3: Number letter counts - TeX

TopAnswers TeX

Top Answer

frougon

The following code uses:

* [fmtcount](https://ctan.org/pkg/fmtcount) to obtain the representation of integers using English words (British setup as asked in the question);

* `\regex_count:nnN` (actually, its variant `\regex_count:nVN`) to count the number of letters in the outputs from `fmtcount`.

# Code without comments

If you prefer reading code with a lot of comments, see below. :-)

```
\documentclass{article}
\usepackage[british]{fmtcount}
\usepackage{siunitx}

\ExplSyntaxOn

\cs_new_protected:Npn \sociii_set_to_repr_in_words:Nn #1#2
  {
    \storenumberstringnum { sociii@tmp@label } {#2} \scan_stop:
    \tl_set:Nv #1 { @fcs@sociii@tmp@label }
  }

\tl_new:N \l__sociii_stnli_text_tl
\cs_generate_variant:Nn \regex_count:nnN { nV }

\cs_new_protected:Npn \sociii_set_to_number_of_letters_in:Nn #1#2
  {
    \sociii_set_to_repr_in_words:Nn \l__sociii_stnli_text_tl {#2}
    \regex_count:nVN { [a-z] } \l__sociii_stnli_text_tl #1
  }

\int_new:N \l__sociii_stnolir_int

\cs_new_protected:Npn \sociii_set_to_number_of_letters_in_range:Nnn #1#2#3
  {
    \int_zero:N \l__sociii_stnolir_int

    \int_step_inline:nnn {#2} {#3}
      {
        \sociii_set_to_number_of_letters_in:Nn \l_tmpa_int {##1}
        \int_add:Nn \l__sociii_stnolir_int { \l_tmpa_int }
      }

    \tl_set:NV #1 \l__sociii_stnolir_int
  }

\cs_new_eq:NN \setToNumberOfLettersOfAllIntsInRange
              \sociii_set_to_number_of_letters_in_range:Nnn

\NewDocumentCommand \countLettersInRange { m m }
  {
    \sociii_set_to_number_of_letters_in_range:Nnn \l_tmpa_tl {#1} {#2}
    \tl_use:N \l_tmpa_tl
  }

\ExplSyntaxOff

\begin{document}

\countLettersInRange{1}{5}\par\medskip

\setToNumberOfLettersOfAllIntsInRange{\myTmp}{1}{1000}%
There are \num{\myTmp}~letters in the representation in words of all integers
between 1 and~1000 (both inclusive).

\end{document}
```

![image.png](/image?hash=10028364b563e674a6ee88159ce0c32f6390b2832cbe7d9bb3b2997ee8c0ad82)

# Same code with a bunch of comments

```
\documentclass{article}
\usepackage[british]{fmtcount}
\usepackage{siunitx}

\ExplSyntaxOn

% Set #1 to the representation of integer #2 in words (according to the
% language setup currently active for fmtcount.sty).
%
% #1: tl var (macro) where to store the result
% #2: an integer denotation (i.e., an integer written as a sequence of
%     explicit character tokens representing the digits).
\cs_new_protected:Npn \sociii_set_to_repr_in_words:Nn #1#2
  {
    \storenumberstringnum { sociii@tmp@label } {#2} \scan_stop:
    % fmtcount.sty doesn't seem to provide anything to store the result in a
    % user-chosen macro, so let's retrieve the result as \FMCuse does.
    \tl_set:Nv #1 { @fcs@sociii@tmp@label }
  }

\tl_new:N \l__sociii_stnli_text_tl
\cs_generate_variant:Nn \regex_count:nnN { nV }

% Set #1 to the number of letters in the representation of integer #2 using
% words.
%
% #1: int var where to store the result
% #2: an integer denotation
\cs_new_protected:Npn \sociii_set_to_number_of_letters_in:Nn #1#2
  {
    \sociii_set_to_repr_in_words:Nn \l__sociii_stnli_text_tl {#2}
    \regex_count:nVN { [a-z] } \l__sociii_stnli_text_tl #1
  }

\int_new:N \l__sociii_stnolir_int

% Set #1 to the number of letters in the representation in words of all
% integers in the interval [#2, #3].
%
% #1: tl var where to store the result
% #2: first element of the range (an ⟨integer expression⟩)
% #3: last element of the range (an ⟨integer expression⟩)
\cs_new_protected:Npn \sociii_set_to_number_of_letters_in_range:Nnn #1#2#3
  {
    \int_zero:N \l__sociii_stnolir_int

    \int_step_inline:nnn {#2} {#3}
      {
        \sociii_set_to_number_of_letters_in:Nn \l_tmpa_int {##1}
        \int_add:Nn \l__sociii_stnolir_int { \l_tmpa_int }
      }

    % Store the standard decimal representation of \l__sociii_stnolir_int in
    % macro #1.
    \tl_set:NV #1 \l__sociii_stnolir_int
  }

\cs_new_eq:NN \setToNumberOfLettersOfAllIntsInRange
              \sociii_set_to_number_of_letters_in_range:Nnn

\NewDocumentCommand \countLettersInRange { m m }
  {
    \sociii_set_to_number_of_letters_in_range:Nnn \l_tmpa_tl {#1} {#2}
    \tl_use:N \l_tmpa_tl
  }

\ExplSyntaxOff

\begin{document}

\countLettersInRange{1}{5}\par\medskip

\setToNumberOfLettersOfAllIntsInRange{\myTmp}{1}{1000}%
There are \num{\myTmp}~letters in the representation in words of all integers
between 1 and~1000 (both inclusive).

\end{document}
```

# Faster implementation

As Skillmon [noted](https://topanswers.xyz/transcript?room=2126&id=142189#c142189), instead of:

```
\regex_count:nVN { [a-z] } \l__sociii_stnli_text_tl #1
```

in the replacement text of `\sociii_set_to_number_of_letters_in:Nn`, one can use:

```
\tl_remove_all:Nn \l__sociii_stnli_text_tl { \ }
\tl_remove_all:Nn \l__sociii_stnli_text_tl { - }
\tl_remove_all:Nn \l__sociii_stnli_text_tl { \relax }
\int_set:Nn #1 { \tl_count:N \l__sociii_stnli_text_tl }
```

which is much faster. This counts all items in the `tl` var except those we removed, namely control spaces, hyphens and `\relax` tokens. We need to remove `\relax` tokens in this approach because `fmtcount` is cheeky:

```
\documentclass{article}
\usepackage[british]{fmtcount}

\begin{document}
\storenumberstringnum{test}{10}
\expandafter\show\csname @fcs@test\endcsname
\end{document}
```

which prints:

```
> \@fcs@test=macro:
->\relax ten.
<recently read> \@fcs@test 
                           
l.6 \expandafter\show\csname @fcs@test\endcsname
```

# Remark on `\tl_set:Nv`

The `\tl_set:Nv #1 { @fcs@sociii@tmp@label }` could also be done with `\tl_set_eq:NN #1 \@fcs@sociii@tmp@label`, however this would require us to use `\makeatletter`, and more importantly would cause the `tl` var given as `#1` to inherit attributes like `\protected` or `\long` from `\@fcs@sociii@tmp@label` that might be set by the `fmtcount` package. With `\tl_set:Nv`, this can't happen:

```
\documentclass{article}

\ExplSyntaxOn
% \protected
\protected\def\zzz{abc}

\tl_set_eq:NN \l_tmpa_tl \zzz
\cs_show:N \l_tmpa_tl

\tl_set:Nv \l_tmpa_tl { zzz }
\cs_show:N \l_tmpa_tl

% \long
\long\def\ZZZ{def}

\tl_set_eq:NN \l_tmpb_tl \ZZZ
\cs_show:N \l_tmpb_tl

\tl_set:Nv \l_tmpb_tl { ZZZ }
\cs_show:N \l_tmpb_tl
\ExplSyntaxOff

\begin{document}
\end{document}
```

which prints to the terminal:

```
> \l_tmpa_tl=\protected macro:->abc.
<recently read> }
                 
l.8 \cs_show:N \l_tmpa_tl
                         
> \l_tmpa_tl=macro:->abc.
<recently read> }
                 
l.11 \cs_show:N \l_tmpa_tl
                          
> \l_tmpb_tl=\long macro:->def.
<recently read> }
                 
l.17 \cs_show:N \l_tmpb_tl
                          
> \l_tmpb_tl=macro:->def.
<recently read> }
                 
l.20 \cs_show:N \l_tmpb_tl
```

Answer #2

samcarter

*No spoiler*

I learned about property lists and used them to actually print all the numbers as words.

Then I tried to add a counter for the number of the characters, a bit clumsy because it repeats tons of code, but in the end, it gave a result...
```
\documentclass{article}

\pagestyle{empty}

\begin{document}

If the numbers 1 to 5 are written out in words: one, two, three, four, five, then there are 3 + 3 + 5 + 4 + 4 = 19 letters used in total.

If all the numbers from 1 to 1000 (one thousand) inclusive were written out in words, how many letters would be used?

NOTE: Do not count spaces or hyphens. For example, 342 (three hundred and forty-two) contains 23 letters and 115 (one hundred and fifteen) contains 20 letters. The use of “and” when writing out numbers is in compliance with British usage.

\ExplSyntaxOn

\prop_new:N \l_sam_names_prop
\prop_set_from_keyval:Nn \l_sam_names_prop {
  0=zero,
  1=one,
  2=two,
  3=three,
  4=four,
  5=five,
  6=six,
  7=seven,
  8=eight,
  9=nine,
  10=ten,
  11=eleven,
  12=twelve,
  13=thirteen,
  14=fourteen,
  15=fifteen,
  16=sixteen,
  17=seventeen,
  18=eighteen,
  19=nineteen,
  20=twenty,
  30=thirty,
  40=forty,
  50=fifty,
  60=sixty,
  70=seventy,
  80=eighty,
  90=ninety,
  100=hundred,
  1000=thousand,
}

\cs_generate_variant:Nn \prop_item:Nn { NV }

\int_new:N \l_sam_onedigit_int
\int_new:N \l_sam_hundigit_int
\int_new:N \l_sam_thodigit_int
\int_new:N \l_sam_tens_int
\int_new:N \l_sam_sum_int
\int_new:N \l_sam_lastsum_int
\tl_new:N \l_sam_string_tl

\int_new:N \l_sam_loop_int
\int_step_inline:nn {1000}
{ 

  \int_set:Nn \l_sam_loop_int { #1 }
  \c_space_tl
  \int_eval:n { \l_sam_sum_int - \l_sam_lastsum_int } \par 
  \int_use:N \l_sam_loop_int 
  \c_space_tl : \c_space_tl
  
  \int_set:Nn \l_sam_lastsum_int { \l_sam_sum_int }
  
  % if 1000 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  \int_compare:nNnTF { \l_sam_loop_int } > { 999 } 
  {
    % get first digit of thousand
    \int_set:Nn \l_sam_thodigit_int 
    { 
      \int_div_truncate:nn {\l_sam_loop_int} {1000} 
    }

    \prop_item:NV \l_sam_names_prop \l_sam_thodigit_int
    
    % counting letters and adding them
    \tl_set:Nx \l_sam_string_tl { \prop_item:NV \l_sam_names_prop \l_sam_thodigit_int }
    \int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }
    
    \c_space_tl
    \prop_item:Nn \l_sam_names_prop { 1000 } 
    
    % counting letters and adding them
    \tl_set:Nx \l_sam_string_tl { \prop_item:Nn \l_sam_names_prop { 1000 } }
    \int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }    
    
    \c_space_tl
  }{

    % if >= 100 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    \int_compare:nNnT { \l_sam_loop_int } > { 99 } 
    {

      % get first digit of hundreds     
      \int_set:Nn \l_sam_hundigit_int 
      { 
        \int_div_truncate:nn {\l_sam_loop_int} {100} 
      }
      \prop_item:NV \l_sam_names_prop  \l_sam_hundigit_int 
      
      % counting letters and adding them
      \tl_set:Nx \l_sam_string_tl { \prop_item:NV \l_sam_names_prop \l_sam_hundigit_int }
      \int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }

      \c_space_tl
      \prop_item:Nn \l_sam_names_prop { 100 }
      
      % counting letters and adding them
      \tl_set:Nx \l_sam_string_tl { \prop_item:Nn \l_sam_names_prop { 100 } }
      \int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }
      
      \c_space_tl
      
      % getting just tens and ones part of the loop variable
      \int_set:Nn \l_sam_loop_int { #1 - \l_sam_hundigit_int * 100 }
      \int_compare:nNnT { \l_sam_loop_int } > { 0 } 
      { 
        and\c_space_tl 
        % counting letters and adding them
        \int_add:Nn \l_sam_sum_int { 3 } 
      }
          
    }% if >= 100

    % tens and one
    \int_compare:nNnTF { \l_sam_loop_int } > { 19 } 
    { % >= 20
      \int_set:Nn \l_sam_onedigit_int { \int_mod:nn {\l_sam_loop_int} {10} }
      \int_set:Nn \l_sam_tens_int { \l_sam_loop_int - \l_sam_onedigit_int }
      \prop_item:NV \l_sam_names_prop \l_sam_tens_int 
      
      % counting letters and adding them
      \tl_set:Nx \l_sam_string_tl { \prop_item:NV \l_sam_names_prop \l_sam_tens_int }
      \int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }
      \c_space_tl
    }
    {% special cases for  <20
      \int_set:Nn \l_sam_onedigit_int { \l_sam_loop_int }
    }
    
    \int_compare:nNnT { \l_sam_onedigit_int } > { 0 } 
    {
      \prop_item:NV \l_sam_names_prop \l_sam_onedigit_int 
      
      % counting letters and adding them
      \tl_set:Nx \l_sam_string_tl { \prop_item:NV \l_sam_names_prop \l_sam_onedigit_int }
      \int_add:Nn \l_sam_sum_int { \tl_count:N \l_sam_string_tl }
      
    }

  } % if 1000  
}

\par
\int_use:N \l_sam_sum_int

\ExplSyntaxOff

\end{document}
```

2 Answers