#!/usr/bin/perl
#
# Transforms Lucene Java's CHANGES.txt into Changes.html
#
# Input is on STDIN, output is to STDOUT
#
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

use strict;
use warnings;

my $jira_url_prefix = 'http://issues.apache.org/jira/browse/';
my $title = undef;
my $release = undef;
my $sections = undef;
my $items = undef;
my $first_relid = undef;
my $second_relid = undef;
my @releases = ();

my @lines = <>;                        # Get all input at once

#
# Parse input and build hierarchical release structure in @releases
#
for (my $line_num = 0 ; $line_num <= $#lines ; ++$line_num) {
  $_ = $lines[$line_num];
  next unless (/\S/);                  # Skip blank lines

  unless ($title) {
    if (/\S/) {
      s/^\s+//;                        # Trim leading whitespace
      s/\s+$//;                        # Trim trailing whitespace
    }
    $title = $_;
    next;
  }

  if (/^(Release)|(Trunk)/) {   # Release headings
    $release = $_;
    $sections = [];
    push @releases, [ $release, $sections ];
    ($first_relid = lc($release)) =~ s/\s+/_/g   if ($#releases == 0);
    ($second_relid = lc($release)) =~ s/\s+/_/g  if ($#releases == 1);
    $items = undef;
    next;
  }

  # Section heading: 2 leading spaces, words all capitalized
  if (/^  ([A-Z]+)\s*/) {
    my $heading = $_;
    $items = [];
    push @$sections, [ $heading, $items ];
    next;
  }

  # Handle earlier releases without sections - create a headless section
  unless ($items) {
    $items = [];
    push @$sections, [ undef, $items ];
  }

  my $type;
  if (@$items) { # A list item has been encountered in this section before
    $type = $items->[0];  # 0th position of items array is list type
  } else {
    $type = get_list_type($_);
    push @$items, $type;
  }

  if ($type eq 'numbered') { # The modern items list style
    # List item boundary is another numbered item or an unindented line
    my $line;
    my $item = $_;
    $item =~ s/^(\s{0,2}\d+\.\s*)//;       # Trim the leading item number
    my $leading_ws_width = length($1);
    $item =~ s/\s+$//;                     # Trim trailing whitespace
    $item .= "\n";

    while ($line_num < $#lines
           and ($line = $lines[++$line_num]) !~ /^(?:\s{0,2}\d+\.\s*\S|\S)/) {
      $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
      $line =~ s/\s+$//;                   # Trim trailing whitespace
      $item .= "$line\n";
    }
    $item =~ s/\n+\Z/\n/;                  # Trim trailing blank lines
    push @$items, $item;
    --$line_num unless ($line_num == $#lines);
  } elsif ($type eq 'paragraph') {         # List item boundary is a blank line
    my $line;
    my $item = $_;
    $item =~ s/^(\s+)//;
    my $leading_ws_width = defined($1) ? length($1) : 0;
    $item =~ s/\s+$//;                     # Trim trailing whitespace
    $item .= "\n";

    while ($line_num < $#lines and ($line = $lines[++$line_num]) =~ /\S/) {
      $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
      $line =~ s/\s+$//;                   # Trim trailing whitespace
      $item .= "$line\n";
    }
    push @$items, $item;
    --$line_num unless ($line_num == $#lines);
  } else { # $type is one of the bulleted types
    # List item boundary is another bullet or a blank line
    my $line;
    my $item = $_;
    $item =~ s/^(\s*$type\s*)//;           # Trim the leading bullet
    my $leading_ws_width = length($1);
    $item =~ s/\s+$//;                     # Trim trailing whitespace
    $item .= "\n";

    while ($line_num < $#lines
           and ($line = $lines[++$line_num]) !~ /^\s*(?:$type|\Z)/) {
      $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
      $line =~ s/\s+$//;                   # Trim trailing whitespace
      $item .= "$line\n";
    }
    push @$items, $item;
    --$line_num unless ($line_num == $#lines);
  }
}

#
# Print HTML-ified version to STDOUT
#
print<<"__HTML_HEADER__";
<!--
**********************************************************
** WARNING: This file is generated from CHANGES.txt by the 
**          Perl script 'changes2html.pl'.
**          Do *not* edit this file!
**********************************************************
          
****************************************************************************
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
****************************************************************************
-->
<html>
<head>
  <title>$title</title>
  <link rel="stylesheet" href="ChangesFancyStyle.css" title="Fancy">
  <link rel="alternate stylesheet" href="ChangesSimpleStyle.css" title="Simple">
  <META http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
  <SCRIPT>
    function toggleList(e) {
      element = document.getElementById(e).style;
      element.display == 'none' ? element.display = 'block' : element.display='none';
    }
    function collapse() {
      for (var i = 0; i < document.getElementsByTagName("ul").length; i++) {
        var list = document.getElementsByTagName("ul")[i];
        if (list.id != '$first_relid' && list.id != '$second_relid') {
          list.style.display = "none";
        }
      }
      for (var i = 0; i < document.getElementsByTagName("ol").length; i++) {
        document.getElementsByTagName("ol")[i].style.display = "none"; 
      }
    }
    window.onload = collapse;
  </SCRIPT>
</head>
<body>

<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a>
<h1>$title</h1>

__HTML_HEADER__

my $heading;
my $relcnt = 0;
my $header = 'h2';
for my $rel (@releases) {
  if (++$relcnt == 3) {
    $header = 'h3';
    print "<h2><a href=\"javascript:toggleList('older')\">";
    print "Older Releases";
    print "</a></h2>\n";
    print "<ul id=\"older\">\n"
  }
      
  ($release, $sections) = @$rel;

  # The first section heading is undefined for the older sectionless releases
  my $has_release_sections = $sections->[0][0];

  (my $relid = lc($release)) =~ s/\s+/_/g;
  print "<$header><a href=\"javascript:toggleList('$relid')\">";
  print "$release";
  print "</a></$header>\n";
  print "<ul id=\"$relid\">\n"
    if ($has_release_sections);

  for my $section (@$sections) {
    ($heading, $items) = @$section;
    (my $sectid = lc($heading)) =~ s/\s+/_/g;
    my $numItemsStr = $#{$items} > 0 ? "($#{$items})" : "(none)";  

    print "  <li><a href=\"javascript:toggleList('$relid.$sectid')\">",
          ($heading || ''), "</a>&nbsp;&nbsp;&nbsp;$numItemsStr\n"
      if ($has_release_sections);

    my $list_type = $items->[0] || '';
    my $list = ($has_release_sections || $list_type eq 'numbered' ? 'ol' : 'ul');
    my $listid = $sectid ? "$relid.$sectid" : $relid;
    print "    <$list id=\"$listid\">\n";

    for my $itemnum (1..$#{$items}) {
      my $item = $items->[$itemnum];
      $item =~ s:&:&amp;:g;                            # Escape HTML metachars
      $item =~ s:<:&lt;:g; 
      $item =~ s:>:&gt;:g;

      $item =~ s:\s*(\([^)"]+?\))\s*$:<br />$1:;       # Separate attribution
      $item =~ s:\n{2,}:\n<p/>\n:g;                    # Keep paragraph breaks
      $item =~ s{(?:${jira_url_prefix})?(HADOOP-\d+)}  # Link to JIRA
                {<a href="${jira_url_prefix}$1">$1</a>}g;
      print "      <li>$item</li>\n";
    }
    print "    </$list>\n";
    print "  </li>\n" if ($has_release_sections);
  }
  print "</ul>\n" if ($has_release_sections);
}
print "</ul>\n" if ($relcnt > 3);
print "</body>\n</html>\n";


#
# Subroutine: get_list_type
#
# Takes one parameter:
#
#    - The first line of a sub-section/point
#
# Returns one scalar:
#
#    - The list type: 'numbered'; or one of the bulleted types '-', or '.' or
#      'paragraph'.
#
sub get_list_type {
  my $first_list_item_line = shift;
  my $type = 'paragraph'; # Default to paragraph type

  if ($first_list_item_line =~ /^\s{0,2}\d+\.\s+\S+/) {
    $type = 'numbered';
  } elsif ($first_list_item_line =~ /^\s*([-.])\s+\S+/) {
    $type = $1;
  }
  return $type;
}

1;