#!/usr/bin/env perl

# Copyright (c) 2019-2020 Christian Jaeger, copying@christianjaeger.ch
# This is free software. See the file COPYING.md that came bundled
# with this file.

use strict;
use warnings;
use warnings FATAL => 'uninitialized';
use utf8;
binmode *STDOUT{IO}, ":encoding(UTF-8)";
use experimental "signatures";

#use Sub::Call::Tail;

# find modules from functional-perl working directory (not installed)
use Cwd 'abs_path';
our ($mydir, $myname);

BEGIN {
    my $location = (-l $0) ? abs_path($0) : $0;
    $location =~ /(.*?)([^\/]+?)_?\z/s or die "?";
    ($mydir, $myname) = ($1, $2);
}
use lib "$mydir/../lib";

use Getopt::Long;
use Chj::ruse;
use FP::Repl::Trap;
use FP::Repl;
use FP::List ":all";
use FP::Stream ":all";
use FP::IOStream ":all";
use FP::Path;
use FP::Array ":all";
use FP::Array_sort ":all";
use FP::Predicates ":all";
use FP::Ops ":all";
use FP::Combinators ":all";
use FP::Show;
use Chj::xperlfunc ":all";
use FP::PureArray ":all";
use Chj::TEST ":all";
use FP::Carp;
use Chj::xIO qw(with_output_to_file);

sub usage {
    print "usage: $myname path/to/hiring-without-whiteboards/README.md

  Starts an \`FP::Repl\` with access to a \`cs\` function which returns a
  stream (\`FP::Stream\`) of \`Company\` objects representing the contents
  of the given file from a check-out of
  https://github.com/poteto/hiring-without-whiteboards

  You can then enter 'queries' like:

     cs->filter(sub(\$r) { \$r->locations->any(sub(\$l) { \$l =~ /\\bUK\\b/ }) })->show_items

  The 'show_items' method shows the object structure. If you instead
  use the 'print' method, it will serialize back to markdown, although
  without the grouping. \`print_grouped\` will re-sort and re-group and
  show the group subtitles the same way as in the original markdown.

  You can use tab completion to learn about the available methods on a
  variable (don't use \`my\` for making variables, or they won't persist
  to your next entry). Also see the docs on \`functional-perl.org\`.

  To redirect printing to a file, use

     with_output_to_file \$filename, sub { ... }

  See https://news.ycombinator.com/item?id=19290044 for a
  discussion.

";
    exit 1;
}

our $verbose = 0;
GetOptions("verbose" => \$verbose, "help" => sub {usage},) or exit 1;

my ($file) = @ARGV;

# XX lib
sub triples {
    my @out;
    while (@_) {
        my $k = shift;
        @_ or fp_croak_arity "n*3";
        my $v = shift;
        @_ or fp_croak_arity "n*3";
        my $v2 = shift;
        push @out, [$k, $v, $v2]
    }
    array_to_purearray \@out
}

package CountryOrRemote {
    use FP::Struct [], 'FP::Struct::Show';
    use overload '""' => sub { shift->string };
    _END_
}

package Remote {
    use FP::Struct [], 'CountryOrRemote';
    sub is_USA($self)    {undef}
    sub is_remote($self) {1}
    sub string($self)    {"Remote"}
    _END_
}
import Remote::constructors;
my $Remote = Remote();

package Country {
    use FP::Struct [], 'CountryOrRemote';
    sub is_remote($self) {0}
    _END_
}

# https://en.wikipedia.org/wiki/List_of_U.S._state_abbreviations
# Name => USPS

my $states = "
 Alabama        AL
 Alaska         AK
 Arizona        AZ
 Arkansas       AR
 California     CA
 Colorado       CO
 Connecticut    CT
 Delaware       DE
 Florida        FL
 Georgia        GA
 Hawaii         HI
 Idaho          ID
 Illinois       IL
 Indiana        IN
 Iowa           IA
 Kansas         KS
 Kentucky       KY
 Louisiana      LA
 Maine          ME
 Maryland       MD
 Massachusetts  MA
 Michigan       MI
 Minnesota      MN
 Mississippi    MS
 Missouri       MO
 Montana        MT
 Nebraska       NE
 Nevada         NV
 New Hampshire  NH
 New Jersey     NJ
 New Mexico     NM
 New York       NY
 North Carolina NC
 North Dakota   ND
 Ohio           OH
 Oklahoma       OK
 Oregon         OR
 Pennsylvania   PA
 Rhode Island   RI
 South Carolina SC
 South Dakota   SD
 Tennessee      TN
 Texas          TX
 Utah           UT
 Vermont        VT
 Virginia       VA
 Washington     WA
 West Virginia  WV
 Wisconsin      WI
 Wyoming        WY ";

my $nonstates = "
 American Samoa                 AS
 Guam                           GU
 Northern Mariana Islands       MP
 Puerto Rico                    PR
 U.S. Virgin Islands            VI
 Micronesia                     FM
 Marshall Islands               MH
 Palau                          PW
 U.S. Armed Forces – Americas   AA
 U.S. Armed Forces – Europe     AE
 U.S. Armed Forces – Pacific    AP";

package USPSCode {
    use FP::Struct ["name", "code", "is_state"], 'Country';
    sub is_USA($self)       {1}
    sub country_name($self) {"USA"}
    sub string($self)       { $$self{code} }
    _END_
}
import USPSCode::constructors;

sub parse_USPS_segment ($str, $is_state) {
    $str =~ s/^\s+//;
    purearray(split /\n/, $str)->map(
        sub($line) {
            $line =~ s/^\s*(.*?)\s*\z/$1/s;
            my ($name, $code) = $line =~ /^(\S.*?\S)\s+(\w{2})\z/
                or die "no parse: '$line'";
            USPSCode($name, $code, $is_state)
        }
    )
}

my $USPSCodes
    = parse_USPS_segment($states, 1)->append(parse_USPS_segment($nonstates, 0));
my %USfromCode = map { $_->code => $_ } $USPSCodes->values;

package NonUSCountry {
    use FP::Struct ["country_name"], 'Country';
    sub is_USA($self) {0}
    sub string($self) { $$self{country_name} }
    _END_
}
import NonUSCountry::constructors;

package City {
    use FP::Struct ["city", "country"], 'FP::Struct::Show';
    use overload '""' => sub { shift->string };

    sub string($self) {
        $self->city . ", " . $self->country
    }

    _END_
}
import City::constructors;

sub sortstring($str) {
    my $s = lc($str);

    # I'm sure there might be a library for this? Anyway, the
    # rules used by this project may be too specific.
    $s =~ s/[ä]/a/sg;       # or ae ?
    $s =~ s/[éế]/e/sg;
    $s =~ s/[ï]/i/sg;
    $s =~ s/[ōöô]/o/sg;
    $s =~ s/[üû]/o/sg;
    $s =~ s/[ç]/c/sg;
    $s =~ s/[|]/0/sg;        # hacky
    $s =~ s/[:]//sg;         # do *not* strip space, (), -
    $s
}

package Company {
    use FP::Ops qw(the_method);

    use FP::Struct ["name", "URL", "locations", "maybe_process"],
        'FP::Struct::Show';
    use overload (
        '""' => sub { shift->string },
        cmp  => sub {
            my ($a, $b, $inverted) = @_;
            die if $inverted;    ##
            my @v     = map { $_->sortstring } ($a, $b);
            my $order = $v[0] cmp $v[1];

            # Order numeric entries between X and Y (since that's what
            # the current hand sorted document does)
            if ($order) {
                my @vo = $order < 0 ? @v : reverse @v;
                if ($vo[0] =~ /^[0-9]/ and $vo[1] =~ /^[a-x]/) {
                    -$order
                } else {
                    $order
                }
            } else {
                0
            }
        }
    );

    sub sortstring($self) {
        main::sortstring($self->name)
    }

    sub string($self) {

        # back to markdown
        (
                  "- ["
                . $self->name . "]("
                . $self->URL . ")" . " | "
                . $self->locations->map(the_method("string"))->join("; ")
                . do {
                if (defined(my $p = $self->maybe_process)) {
                    " | $p"
                } else {
                    ""
                }
            }
        )
    }

    sub remote($self) {

        # not calling it is_remote since has_remote might be more
        # appropriate, and then just keep it short.
        $self->locations->any(the_method("is_remote"))
    }

    _END_
}
import Company::constructors;

# ==================================================================
# For re-grouping&sorting (I'm crazy to have considered writing that)

# Oh, don't have ranges yet? Just code one up for us here, hacky
# anyway.
package InclusiveRange {
    use FP::Struct ["from", "to"], 'FP::Struct::Show';
    use overload (
        '""' => sub { shift->string },
        cmp  => sub {
            my ($a, $b) = @_;
            ($a->from cmp $b->cmp or $a->to cmp $b->to)
        }
    );

    sub string($self) {
        $self->from . " - " . $self->to
    }

    sub contains_item ($self, $v) {

        # super HACK to accomodate for the '#' thing (should use a
        # maybe type instead for ranges which are open above--except
        # cmp is wrong for digits, too, so, dunno):
        return 1 if $self->to eq '#' and $v =~ /^[0-9]$/;
        (($self->from cmp $v) <= 0 and ($v cmp $self->to) <= 0)
    }

    _END_
}
import InclusiveRange::constructors;

package CatchallRange {

    # A bit of a hack, to catch numbers and whatever in the last one
    # with the '#', as the letter '#' isn't actually covering over the
    # digits so would otherwise have to hack InclusiveRange.
    use FP::Struct [], 'InclusiveRange';

    sub contains_item ($self, $v) {
        1
    }
    _END_
}
import CatchallRange::constructors;

package Group {
    use FP::Predicates qw(instance_of);
    use FP::Struct [[instance_of("InclusiveRange"), "range"], "items"],
        'FP::Struct::Show';
    _END_
}
import Group::constructors;

my $ranges = triples('A' .. 'Z', '#')->map_with_islast(
    sub ($islast, $a) {
        ($islast ? \&CatchallRange : \&InclusiveRange)->($$a[0], $$a[2])
    }
)->list;

sub group_companies ($l, $ranges) {

    # sortedlist -> list_of(Group)
    return null if $l->is_null;
    my $range = $ranges->first;
    my ($groupitems, $rest) = $l->take_while_and_rest(
        sub($company) {
            $range->contains_item(uc substr($company->name, 0, 1))
        }
    );
    no warnings "recursion";
    cons(Group($range, $groupitems), group_companies($rest, $ranges->rest))
}

# ==================================================================
# markdown line parsing

sub is_item($s) {
    $s =~ /^-\s+/
}

sub is_heading_of($pred) {
    sub($s) {
        if (my ($txt) = $s =~ /^\s*#+\s*(.*)/) {
            &$pred($txt)
        } else {
            0
        }
    }
}

*is_heading = is_heading_of(sub($s) {1});
*is_AlsoSee = is_heading_of(sub($s) { $s =~ /also *see/i });

sub is_hr($s) {
    $s =~ /^---\s*$/
}

sub is_empty($s) {
    $s =~ /^\s*$/
}

sub parse_country($str) {
    $USfromCode{$str} // NonUSCountry($str)
}

sub parse_location($str) {
    if ($str =~ /^remote$/i) {
        $Remote
    } else {
        my @s = split /\s*,\s*/, $str;
        if (@s == 1) {
            parse_country($s[0])
        } elsif (@s == 2) {
            my ($city, $country) = @s;
            City($city, parse_country($country))
        } elsif (@s == 3) {
            my ($city, $state, $country) = @s;
            if ($country eq "USA") {
                my $s = parse_country($state);
                if ($s->is_USA) {
                    City($city, $s)
                } else {
                    die "presumed state '$state' is not a state in the USA";
                }
            } else {
                die
                    "don't know how to deal with presumed state '$state' in country '$country': don't know that country";
            }
        } else {
            die "more than two commas in: '$str'"
        }
    }
}

sub parse_line($line) {
    my $s = $line;
    $s =~ s/^-\s*// or die "line is not an item";
    my ($name, $url, $rest) = $s =~ /^\[(.*?)\] *\((.*?)\)\s*(.*)$/
        or die "missing link formatting in: '$s'";

    my @p = split /\s*\|\s*/, $rest;
    @p == 2 or @p == 3 or die "rest does not contain 2 or 3 parts: '$rest'";
    my (undef, $locations, $maybe_process) = @p;
    Company(
        $name, $url,

        # /, ; and & used inconsistently:
        list(map { parse_location $_ } split m%\s*[/;&]\s*%, $locations),
        $maybe_process
    )
}

TEST {
    parse_line
        "- [Accredible](https://www.accredible.com/careers) | Cambridge, UK / San Francisco, CA / Remote | Take home project, then a pair-programming and discussion onsite / Skype round."
}
Company(
    "Accredible",
    "https://www.accredible.com/careers",
    list(
        City('Cambridge',     NonUSCountry('UK')),
        City('San Francisco', USPSCode('California', 'CA', 1)),
        Remote()
    ),
    "Take home project, then a pair-programming and discussion onsite / Skype round."
);

# XX move?; name?
sub FP::Abstract::Sequence::drop_over ($l, $pred) {
    $l->drop_while(complement $pred)->drop_while($pred)
}

sub datalines () {
    xfile_lines_chomp("$file", "UTF-8")->drop_over(\&is_hr)
        ->take_while(complement \&is_AlsoSee)->filter(complement \&is_empty)
}

sub companies () {

    # Simply ignore the grouping headings.
    datalines->filter(complement \&is_heading)->map (\&parse_line)
}

sub parse_heading($str) {
    my ($from, $to) = $str =~ /^#+\s+(\w)\s*-\s*(\w|\\?#)\s*$/
        or die "not a heading: '$str'";
    $to =~ s/^\\//;
    InclusiveRange($from, $to)
}

sub grouped_companies_from($datalines) {

    # Capture the groupings as well, as the original file is badly
    # grouping them, so to keep a diff minimal we first have to
    # maintain the wrong grouping before re-grouping automatically.
    if ($datalines->is_null) {
        null
    } else {
        my ($heading, $r) = $datalines->first_and_rest;
        if (is_heading($heading)) {
            my ($groupitems, $r)
                = $r->take_while_and_rest(complement \&is_heading);
            cons(
                Group(parse_heading($heading), $groupitems->map(\&parse_line)),
                grouped_companies_from $r)
        } else {
            die "expecting a header, got: '$heading'";
        }
    }
}

sub grouped_companies () {
    grouped_companies_from datalines
}

sub print_groups($s) {
    $s->for_each(
        sub($group) {
            my $range = $group->range->string;
            $range =~ s/#/\\#/;
            xprintln "## $range";
            $group->items->for_each(
                sub($company) {
                    xprintln $company;
                }
            );
            xprintln;
        }
    )
}

# HACK: should really just use the repl printer (show) directly to
# show the whole results list, but show currently doesn't do
# multi-line pretty-printing; so:
sub print_showln($v) {
    xprintln show $v
}

# XX see above, and move?
sub FP::Abstract::Sequence::show_items($l) {
    $l->for_each(\&print_showln)
}

sub FP::Abstract::Sequence::print($l) {
    $l->for_each(\&xprintln)
}

sub FP::Abstract::Sequence::print_grouped ($l, $please_sort = 1) {
    $l = $please_sort ? $l->sort : $l;
    print_groups(group_companies($l, $ranges));
}

# ==================================================================
# main

sub cs () {
    companies()
}

perhaps_run_tests "main" or do {
    usage unless @ARGV == 1;

    # let the user play with the data
    repl;
};
