#!/usr/bin/env perl

my $copyright = <<'COPYRIGHT';
# Copyright 2021 by Christian Jaeger <ch@christianjaeger.ch>
# Published under the same terms as perl itself
COPYRIGHT

use strict;
use utf8;
use warnings;
use warnings FATAL => 'uninitialized';
use experimental 'signatures';

my ($email_full) = $copyright =~ / by ([^\n]*)/s;

my ($mydir, $myname);

BEGIN {
    $0 =~ /(.*?)([^\/]+)\z/s or die "?";
    ($mydir, $myname) = ($1, $2);
}

use lib "$mydir/../lib";

use Getopt::Long;
use URI;
use Chj::xperlfunc qw(xslurp xprintln);
use FP::Combinators qw(compose);
use FP::Ops qw(the_method real_cmp string_cmp);
use FP::Array_sort qw(on);
use FP::PureArray;
use FP::Docstring;
use Chj::TEST;

# ^ XX on and *_cmp should be somewhere else!
#use FP::Repl::Trap;

sub usage {
    print STDERR map {"$_\n"} @_ if @_;
    print "$myname [files..]

  Extract all (non-playlist) Youtube URLs

  --test   run test suite

  ($email_full)
";
    exit(@_ ? 1 : 0);
}

our $verbose = 0;
our $opt_test;
GetOptions(
    "verbose" => \$verbose,
    "help"    => sub {usage},
    "test"    => \$opt_test,
) or exit 1;

sub is_youtube_id ($str) {
    $str =~ m{^[\w-]{11}\z}
}
TEST {
    purearray(qw(7DP6hRt9xvw a_sAH2QGotE I26lP56-UeA I26lP56-UeAa I26lP56/UeA))
        ->map(\&is_youtube_id)
}
purearray(1, 1, 1, '', '');

package YoutubeURL {
    use URI;
    *is_youtube_id = *main::is_youtube_id;
    use FP::Struct ["url"];

    sub length ($self) {
        length $self->url
    }

    sub uri ($self) {
        $self->{_uri} //= URI->new($self->url)
    }

    sub is_youtube ($self) {
        $self->uri->host =~ /yout/
    }

    sub maybe_id ($self) {
        my $u = $self->uri;
        my $q = { $u->query_form };

        my $case1 = sub ($fail) {
            if (my $id = $q->{v}) {
                is_youtube_id($id) or die "invalid v param in: '$u'";
                $id
            } else {
                $fail->()
            }
        };
        my $case2 = sub ($fail) {
            if ((not %$q or defined $q->{t}) and (my $p = $u->path)) {
                $p =~ s{^/}{} or die "?";
                is_youtube_id($p) ? $p : $fail->()
            } else {
                $fail->()
            }
        };
        my $case3 = sub ($fail) {
            if ($q->{list}) {
                undef
            } else {
                $fail->()
            }
        };
        $case1->(
            sub {
                $case2->(
                    sub {
                        $case3->(
                            sub {

                                warn "missing id or list in: '$u'"
                                    unless $u->path =~ m{^/c/};
                                undef
                            }
                        )
                    }
                )
            }
        )
    }

    sub maybe_list ($self) {
        my $u = $self->uri;
        my $q = { $u->query_form };
        $q->{list}
    }

    _END_
}
YoutubeURL::constructors->import;

TEST {
    YoutubeURL('https://youtu.be/lOCMRk8Nex4?t=272')->maybe_id
}
"lOCMRk8Nex4";

use FP::PureArray qw(array_to_purearray);

sub multicollection_on ($get_key) {
    sub {
        my %mc;
        for (@_) {
            if (defined(my $key = $get_key->($_))) {
                push @{ $mc{$key} }, $_
            }
        }
        array_to_purearray $_ for values %mc;
        \%mc
    }
}

sub multicollection_max_to_collection ($cmp) {

    sub ($mc) {
        my %c;
        for my $key (keys %$mc) {
            $c{$key} = $mc->{$key}->max($cmp);
        }
        \%c
    }
}

sub extract ($file) {
    __ "YoutubeURL objects from file, including duplicates";
    my $s = xslurp $file;
    grep { $_->is_youtube } map {
        s/&amp;/&/sg;    # Hack
        YoutubeURL($_)
        } $s =~ m{(https?://[^\s"'<>]*)}g
}

sub paths_extract($paths) {
    __ "YoutubeURL objects from all files, sorted, excluding duplicates";
    my $links_mc = multicollection_on(the_method "maybe_id")
        ->(map { extract $_ } @$paths);
    my $links_c
        = multicollection_max_to_collection(on the_method("length"), \&real_cmp)
        ->($links_mc);
    purearray(values %$links_c)->sort(on the_method("maybe_id"), \&string_cmp)
}

if ($opt_test) {
    Chj::TEST::run_tests "main"
} else {

    # XX: handle '-' for stdin.
    paths_extract(\@ARGV)->for_each(compose \&xprintln, the_method "url");
}

