#!/usr/bin/perl # # Transforms Lucene Java's CHANGES.txt into Changes.html # # Input is on STDIN, output is to STDOUT # # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # use strict; use warnings; my $jira_url_prefix = 'http://issues.apache.org/jira/browse/'; my $title = undef; my $release = undef; my $sections = undef; my $items = undef; my $first_relid = undef; my $second_relid = undef; my @releases = (); my @lines = <>; # Get all input at once # # Parse input and build hierarchical release structure in @releases # for (my $line_num = 0 ; $line_num <= $#lines ; ++$line_num) { $_ = $lines[$line_num]; next unless (/\S/); # Skip blank lines unless ($title) { if (/\S/) { s/^\s+//; # Trim leading whitespace s/\s+$//; # Trim trailing whitespace } $title = $_; next; } if (/^(Release)|(Trunk)/) { # Release headings $release = $_; $sections = []; push @releases, [ $release, $sections ]; ($first_relid = lc($release)) =~ s/\s+/_/g if ($#releases == 0); ($second_relid = lc($release)) =~ s/\s+/_/g if ($#releases == 1); $items = undef; next; } # Section heading: 2 leading spaces, words all capitalized if (/^ ([A-Z]+)\s*/) { my $heading = $_; $items = []; push @$sections, [ $heading, $items ]; next; } # Handle earlier releases without sections - create a headless section unless ($items) { $items = []; push @$sections, [ undef, $items ]; } my $type; if (@$items) { # A list item has been encountered in this section before $type = $items->[0]; # 0th position of items array is list type } else { $type = get_list_type($_); push @$items, $type; } if ($type eq 'numbered') { # The modern items list style # List item boundary is another numbered item or an unindented line my $line; my $item = $_; $item =~ s/^(\s{0,2}\d+\.\s*)//; # Trim the leading item number my $leading_ws_width = length($1); $item =~ s/\s+$//; # Trim trailing whitespace $item .= "\n"; while ($line_num < $#lines and ($line = $lines[++$line_num]) !~ /^(?:\s{0,2}\d+\.\s*\S|\S)/) { $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace $line =~ s/\s+$//; # Trim trailing whitespace $item .= "$line\n"; } $item =~ s/\n+\Z/\n/; # Trim trailing blank lines push @$items, $item; --$line_num unless ($line_num == $#lines); } elsif ($type eq 'paragraph') { # List item boundary is a blank line my $line; my $item = $_; $item =~ s/^(\s+)//; my $leading_ws_width = defined($1) ? length($1) : 0; $item =~ s/\s+$//; # Trim trailing whitespace $item .= "\n"; while ($line_num < $#lines and ($line = $lines[++$line_num]) =~ /\S/) { $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace $line =~ s/\s+$//; # Trim trailing whitespace $item .= "$line\n"; } push @$items, $item; --$line_num unless ($line_num == $#lines); } else { # $type is one of the bulleted types # List item boundary is another bullet or a blank line my $line; my $item = $_; $item =~ s/^(\s*$type\s*)//; # Trim the leading bullet my $leading_ws_width = length($1); $item =~ s/\s+$//; # Trim trailing whitespace $item .= "\n"; while ($line_num < $#lines and ($line = $lines[++$line_num]) !~ /^\s*(?:$type|\Z)/) { $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace $line =~ s/\s+$//; # Trim trailing whitespace $item .= "$line\n"; } push @$items, $item; --$line_num unless ($line_num == $#lines); } } # # Print HTML-ified version to STDOUT # print<<"__HTML_HEADER__"; $title Hadoop

$title

__HTML_HEADER__ my $heading; my $relcnt = 0; my $header = 'h2'; for my $rel (@releases) { if (++$relcnt == 3) { $header = 'h3'; print "

"; print "Older Releases"; print "

\n"; print "\n" if ($relcnt > 3); print "\n\n"; # # Subroutine: get_list_type # # Takes one parameter: # # - The first line of a sub-section/point # # Returns one scalar: # # - The list type: 'numbered'; or one of the bulleted types '-', or '.' or # 'paragraph'. # sub get_list_type { my $first_list_item_line = shift; my $type = 'paragraph'; # Default to paragraph type if ($first_list_item_line =~ /^\s{0,2}\d+\.\s+\S+/) { $type = 'numbered'; } elsif ($first_list_item_line =~ /^\s*([-.])\s+\S+/) { $type = $1; } return $type; } 1;