#!perl
use 5.32.0;
use strict;
use warnings;

# perl5120delta - 5.12.0 == Unicode 5.2  - V5_2
# perl5140delta - 5.14.0 == Unicode 6    - V6_0
# perl5160delta - 5.16.0 == Unicode 6.1  - V6_1
# perl5180delta - 5.18.0 == Unicode 6.2  - V6_2
# perl5200delta - 5.20.0 == Unicode 6.3  - V6_3
# perl5220delta - 5.22.0 == Unicode 7    - V7_0
# perl5240delta - 5.24.0 == Unicode 8    - V8_0
# perl5260delta - 5.26.0 == Unicode 9    - V9_0
# perl5280delta - 5.28.0 == Unicode 10   - V10_0
# perl5300delta - 5.30.0 == Unicode 12.1 - V12_1
# perl5320delta - 5.32.0 == Unicode 13   - V13_0
# perl5360delta - 5.36.0 == Unicode 14   - V14_0
# perl5380delta - 5.38.0 == Unicode 15   - V15_0
my %min_perl = (
  52  => '5.012',
  60  => '5.014',
  61  => '5.016',
  62  => '5.018',
  63  => '5.020',
  70  => '5.022',
  80  => '5.024',
  90  => '5.026',
  100 => '5.028',
  110 => '5.030',
  120 => '5.030',
  121 => '5.030',
  130 => '5.032',
  140 => '5.036',
  150 => '5.038',
);

use HTTP::Tiny;
use Cpanel::JSON::XS;
use List::Util qw(min max);
use Unicode::UCD 'charprop';

my $MIN_AGE = min keys %min_perl;
my $MAX_AGE = max keys %min_perl;

my $res = HTTP::Tiny->new->get(
  "https://raw.githubusercontent.com/iamcal/emoji-data/master/emoji.json",
);

die "error: $res->{content}" unless $res->{success};

my $json = $res->{content}; # XXX: Can we really assume this is ASCII?

my $data = Cpanel::JSON::XS->new->decode($json);

my %by_perl;
my %extra_emoji;

for my $char (sort { $a->{short_name} cmp $b->{short_name} } @$data) {
  my @points  = split /-/, $char->{unified};

  # Consider :transgender_flag: here.  It's:
  #   1F3F3-FE0F-200D-26A7-FE0F
  #
  # All those codepoints were known for 5.22, but this sequence didn't have
  # meaning until Unicode 13.  If we don't consider the $char->{added_in},
  # we'll end up saying it "works" on 5.22.  Now, what does "works" mean?  It's
  # hazy, innit, but I think it should mean "sequence is known name", and that
  # means we should check both.
  #
  # Isn't it enough to just check $char->{added_in} and not each code point?
  # Yeah, it probably is… but I'm leaving the code as-is.
  # -- rjbs, 2022-03-23
  my $max_age = max(
    int($char->{added_in} * 10),
    map {; charprop( hex("0x$_"), "Age") =~ s/[^0-9]+//gr } @points
  );

  if ($max_age < $MIN_AGE) { $max_age = $MIN_AGE }
  if ($max_age > $MAX_AGE) { die "can't handle unicode age $max_age\n" }

  my $perl = $min_perl{ $max_age };
  my $str  = join q{}, map {; "\\x{$_}" } @points;

  my $target = $perl
             ? ($by_perl{$perl} //= {})
             : \%extra_emoji;

  for my $name (@{ $char->{short_names} }) {
    $target->{$name} = $str;
  }
}

my $output = "my %hash;\n";

$output .= qq{  \$hash{simple_smile} = "\\x{1F642}";\n};

for my $perl (sort keys %by_perl) {
  $output .= "if (\$] >= $perl || ! \$ENV{SLACKEMOJI_STRICT}) {\n";
  $output .= qq{  \$hash{"$_"} = "$by_perl{$perl}{$_}";\n}
    for sort keys %{ $by_perl{$perl} };
  $output .= "}\n";
}

{
  $output .= "if (! \$ENV{SLACKEMOJI_STRICT}) {\n";
  for my $name (sort keys %extra_emoji) {
    $output .= qq{  \$hash{"$name"} = "$extra_emoji{$name}";\n};
  }
  $output .= "}\n";
}

$output .= "return \\%hash;\n";

print $output;
