mjs | b85f64e | 2005-06-17 07:49:31 +0000 | [diff] [blame] | 1 | #!/usr/bin/perl -w |
| 2 | # -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 2 -*- |
| 3 | |
| 4 | # |
| 5 | # Copyright (C) 2000, 2001 Eazel, Inc. |
| 6 | # Copyright (C) 2002, 2003 Apple Computer, Inc. |
| 7 | # |
| 8 | # prepare-ChangeLog is free software; you can redistribute it and/or |
| 9 | # modify it under the terms of the GNU General Public |
| 10 | # License as published by the Free Software Foundation; either |
| 11 | # version 2 of the License, or (at your option) any later version. |
| 12 | # |
| 13 | # prepare-ChangeLog is distributed in the hope that it will be useful, |
| 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | # General Public License for more details. |
| 17 | # |
| 18 | # You should have received a copy of the GNU General Public |
| 19 | # License along with this program; if not, write to the Free |
| 20 | # Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| 21 | # |
| 22 | |
| 23 | |
| 24 | # Perl script to create a ChangeLog entry with names of files |
| 25 | # and functions from a cvs diff. |
| 26 | # |
| 27 | # Darin Adler <darin@bentspoon.com>, started 20 April 2000 |
| 28 | # Java support added by Maciej Stachowiak <mjs@eazel.com> |
| 29 | # Objective-C, C++ and Objective-C++ support added by Maciej Stachowiak <mjs@apple.com> |
| 30 | |
| 31 | |
| 32 | # |
| 33 | # TODO: |
| 34 | # List functions that have been removed too. |
| 35 | # Decide what a good logical order is for the changed files |
| 36 | # other than a normal text "sort" (top level first?) |
| 37 | # (group directories?) (.h before .c?) |
| 38 | # Handle yacc source files too (other languages?). |
| 39 | # Help merge when there are ChangeLog conflicts or if there's |
| 40 | # already a partly written ChangeLog entry. |
| 41 | # Add command line option to put the ChangeLog into a separate |
| 42 | # file or just spew it out stdout. |
| 43 | # Figure out how to allow -z options from .cvsrc to work without |
| 44 | # letting other bad options work. Currently the -f disables |
| 45 | # everything from the .cvsrc. |
| 46 | # Add CVS version numbers for each file too (can't do that until |
| 47 | # the changes are checked in, though). |
| 48 | # Work around diff stupidity where deleting a function that starts |
| 49 | # with a comment makes diff think that the following function |
| 50 | # has been changed (if the following function starts with a comment |
| 51 | # with the same first line, such as /**) |
| 52 | # Work around diff stupidity where deleting an entire function and |
| 53 | # the blank lines before it makes diff think you've changed the |
| 54 | # previous function. |
| 55 | |
| 56 | use strict; |
| 57 | |
| 58 | use Getopt::Long; |
| 59 | |
| 60 | my $spewDiff = $ENV{"PREPARE_CHANGELOG_DIFF"}; |
| 61 | my $openChangeLogs = 0; |
| 62 | GetOptions("diff|d!" => \$spewDiff, |
| 63 | "open|o!" => \$openChangeLogs); |
| 64 | |
| 65 | # Find the list of modified files |
| 66 | my @changed_files; |
| 67 | my $changed_files_string; |
| 68 | my %changed_line_ranges; |
| 69 | my %function_lists; |
| 70 | my @conflict_files; |
| 71 | |
| 72 | my $CVS = "cvs"; |
| 73 | |
| 74 | print STDERR " Running cvs -n update to find changed, added, or removed files.\n"; |
| 75 | open UPDATE, "$CVS -qn update 2> /dev/stdout |" or die "The cvs update failed: $!.\n"; |
| 76 | while (<UPDATE>) |
| 77 | { |
| 78 | if (/^[MA] (.+)$/) |
| 79 | { |
| 80 | my $file = $1; |
| 81 | $function_lists{$file} = ""; |
sullivan | 5cdae8f | 2005-12-06 21:54:55 +0000 | [diff] [blame] | 82 | push @changed_files, $file if $file ne "ChangeLog"; |
mjs | b85f64e | 2005-06-17 07:49:31 +0000 | [diff] [blame] | 83 | } |
| 84 | |
| 85 | push @conflict_files, $1 if /^[C] (.+)$/; |
| 86 | |
| 87 | $function_lists{$1} = " Added." if /^A (.+)$/; |
| 88 | $function_lists{$1} = " Removed." if /^R (.+)$/; |
| 89 | |
| 90 | print unless /^[A-Z] ./ || /^cvs server: New directory/; |
| 91 | } |
| 92 | close UPDATE; |
| 93 | |
| 94 | if (@conflict_files) |
| 95 | { |
| 96 | print STDERR " The following files have conflicts. Run prepare-ChangeLog again after fixing the conflicts:\n"; |
| 97 | print STDERR join("\n", @conflict_files), "\n"; |
| 98 | exit 1; |
| 99 | } |
| 100 | |
| 101 | if (@changed_files) |
| 102 | { |
| 103 | $changed_files_string = "'" . join ("' '", @changed_files) . "'"; |
| 104 | |
| 105 | # For each file, build a list of modified lines. |
| 106 | # Use line numbers from the "after" side of each diff. |
| 107 | print STDERR " Running cvs diff to determine which lines changed.\n"; |
| 108 | my $file; |
| 109 | open DIFF, "$CVS -fq diff -N $changed_files_string |" or die "The cvs diff failed: $!.\n"; |
| 110 | while (<DIFF>) |
| 111 | { |
| 112 | $file = $1 if /^Index: (\S+)$/; |
| 113 | if (defined $file) { |
| 114 | if (/^\d+(,\d+)?[acd](\d+)(,(\d+))?/) |
| 115 | { push @{$changed_line_ranges{$file}}, [ $2, $4 || $2 ]; } |
| 116 | elsif (/DO_NOT_COMMIT/) { |
| 117 | print STDERR "WARNING: file $file contains the string DO_NOT_COMMIT, line $.\n"; |
| 118 | } |
| 119 | } |
| 120 | } |
| 121 | close DIFF; |
| 122 | } |
| 123 | |
| 124 | # For each source file, convert line range to function list. |
| 125 | if (%changed_line_ranges) |
| 126 | { |
| 127 | print STDERR " Extracting affected function names from source files.\n"; |
| 128 | foreach my $file (keys %changed_line_ranges) |
| 129 | { |
| 130 | # Only look for function names in .c files. |
| 131 | next unless $file =~ /\.(c|cpp|m|mm|h|java)/; |
| 132 | |
| 133 | # Find all the functions in the file. |
| 134 | open SOURCE, $file or next; |
| 135 | my @function_ranges = get_function_line_ranges(\*SOURCE, $file); |
| 136 | close SOURCE; |
| 137 | |
| 138 | # Find all the modified functions. |
| 139 | my @functions; |
| 140 | my %saw_function; |
| 141 | my @change_ranges = (@{$changed_line_ranges{$file}}, []); |
| 142 | my @change_range = (0, 0); |
| 143 | FUNCTION: foreach my $function_range_ref (@function_ranges) |
| 144 | { |
| 145 | my @function_range = @$function_range_ref; |
| 146 | |
| 147 | # Advance to successive change ranges. |
| 148 | for (;; @change_range = @{shift @change_ranges}) |
| 149 | { |
| 150 | last FUNCTION unless @change_range; |
| 151 | |
| 152 | # If past this function, move on to the next one. |
| 153 | next FUNCTION if $change_range[0] > $function_range[1]; |
| 154 | |
| 155 | # If an overlap with this function range, record the function name. |
| 156 | if ($change_range[1] >= $function_range[0] |
| 157 | and $change_range[0] <= $function_range[1]) |
| 158 | { |
| 159 | if (!$saw_function{$function_range[2]}) |
| 160 | { |
| 161 | $saw_function{$function_range[2]} = 1; |
| 162 | push @functions, $function_range[2]; |
| 163 | } |
| 164 | next FUNCTION; |
| 165 | } |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | # Format the list of functions now. |
| 170 | |
| 171 | if (@functions) { |
| 172 | $function_lists{$file} = "" if !defined $function_lists{$file}; |
| 173 | $function_lists{$file} .= "\n (" . join("):\n (", @functions) . "):"; |
| 174 | } |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | if (!%function_lists) |
| 179 | { |
| 180 | print STDERR " No changes found.\n"; |
| 181 | exit 1; |
| 182 | } |
| 183 | |
| 184 | # Get some parameters for the ChangeLog we are about to write. |
| 185 | my $date = sprintf "%d-%02d-%02d", |
| 186 | 1900 + (localtime $^T)[5], # year |
| 187 | 1 + (localtime $^T)[4], # month |
| 188 | (localtime $^T)[3]; # day within month |
| 189 | my $name = $ENV{CHANGE_LOG_NAME} |
| 190 | || $ENV{REAL_NAME} |
| 191 | || (getpwuid $<)[6] |
| 192 | || "set REAL_NAME environment variable"; |
| 193 | my $email_address = $ENV{CHANGE_LOG_EMAIL_ADDRESS} |
| 194 | || $ENV{EMAIL_ADDRESS} |
| 195 | || "set EMAIL_ADDRESS environment variable"; |
| 196 | |
| 197 | # Remove trailing parenthesized notes from user name (bit of hack). |
| 198 | $name =~ s/\(.*?\)\s*$//g; |
| 199 | |
| 200 | # Find the change logs. |
| 201 | my %has_log; |
| 202 | my %files; |
| 203 | foreach my $file (sort keys %function_lists) |
| 204 | { |
| 205 | my $prefix = $file; |
| 206 | my $has_log = 0; |
| 207 | while ($prefix) |
| 208 | { |
| 209 | $prefix =~ s-/[^/]+/?$-/- or $prefix = ""; |
| 210 | $has_log = $has_log{$prefix}; |
| 211 | if (!defined $has_log) |
| 212 | { |
| 213 | $has_log = -f "${prefix}ChangeLog"; |
| 214 | $has_log{$prefix} = $has_log; |
| 215 | } |
| 216 | last if $has_log; |
| 217 | } |
| 218 | if (!$has_log) |
| 219 | { |
| 220 | print STDERR "No ChangeLog found for $file.\n"; |
| 221 | } |
| 222 | else |
| 223 | { |
| 224 | push @{$files{$prefix}}, $file; |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | # Get the latest ChangeLog files from cvs. |
| 229 | my $logs = ""; |
| 230 | foreach my $prefix (sort keys %files) |
| 231 | { |
| 232 | $logs .= " ${prefix}ChangeLog"; |
| 233 | } |
| 234 | if ($logs) |
| 235 | { |
| 236 | print STDERR " Running cvs update to update ChangeLog files.\n"; |
| 237 | open ERRORS, "$CVS update$logs |" or die "The cvs update of ChangeLog files failed: $!.\n"; |
| 238 | print STDERR " $_" while <ERRORS>; |
| 239 | close ERRORS; |
| 240 | } |
| 241 | |
| 242 | # Write out a new ChangeLog file. |
| 243 | foreach my $prefix (sort keys %files) |
| 244 | { |
| 245 | print STDERR " Editing the ${prefix}ChangeLog file.\n"; |
| 246 | open OLD_CHANGE_LOG, "${prefix}ChangeLog" or die "Could not open ${prefix}ChangeLog file: $!.\n"; |
| 247 | # It's less efficient to read the whole thing into memory than it would be |
| 248 | # to read it while we prepend to it later, but I like doing this part first. |
| 249 | my @old_change_log = <OLD_CHANGE_LOG>; |
| 250 | close OLD_CHANGE_LOG; |
| 251 | open CHANGE_LOG, "> ${prefix}ChangeLog" or die "Could not write ${prefix}ChangeLog\n."; |
| 252 | print CHANGE_LOG "$date $name <$email_address>\n\n"; |
| 253 | print CHANGE_LOG " Reviewed by NOBODY (OOPS!).\n\n"; |
| 254 | if ($prefix =~ m/WebCore/ || `pwd` =~ m/WebCore/) { |
eseidel | 6c309252 | 2005-09-30 21:44:42 +0000 | [diff] [blame] | 255 | my $testsDir = "../LayoutTests"; |
| 256 | $testsDir = "$prefix/$testsDir" if length($prefix); |
eseidel | c59c5ec | 2005-11-21 09:36:52 +0000 | [diff] [blame] | 257 | my $haveNewTests = ((-x "$testsDir/../LayoutTests") && (system("find \"$testsDir/../LayoutTests\" -path \"*/CVS/Entries\" | xargs grep -q \"/0/dummy timestamp//\"") == 0)); |
eseidel | 6c309252 | 2005-09-30 21:44:42 +0000 | [diff] [blame] | 258 | print CHANGE_LOG " WARNING: NO TEST CASES ADDED\n\n" unless $haveNewTests; |
mjs | b85f64e | 2005-06-17 07:49:31 +0000 | [diff] [blame] | 259 | } |
| 260 | |
| 261 | foreach my $file (sort @{$files{$prefix}}) |
| 262 | { |
| 263 | my $file_stem = substr $file, length $prefix; |
| 264 | print CHANGE_LOG " * $file_stem:$function_lists{$file}\n"; |
| 265 | } |
| 266 | print CHANGE_LOG "\n", @old_change_log; |
| 267 | close CHANGE_LOG; |
| 268 | } |
| 269 | |
| 270 | # Write out another diff. |
| 271 | if ($spewDiff && @changed_files) |
| 272 | { |
| 273 | print STDERR " Running cvs diff -u to help you write the ChangeLog entries.\n"; |
| 274 | open DIFF, "$CVS -q diff -uN $changed_files_string |" or die "The cvs diff failed: $!.\n"; |
| 275 | while (<DIFF>) { print; } |
| 276 | close DIFF; |
| 277 | } |
| 278 | |
| 279 | # Open ChangeLogs. |
| 280 | if ($openChangeLogs && $logs) |
| 281 | { |
| 282 | print STDERR " Opening the edited ChangeLog files.\n"; |
| 283 | my $editor = $ENV{"CHANGE_LOG_EDIT_APPLICATION"}; |
| 284 | if ($editor) { |
| 285 | system "open -a '$editor'$logs"; |
| 286 | } else { |
| 287 | system "open -e$logs"; |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | # Done. |
| 292 | exit; |
| 293 | |
| 294 | sub get_function_line_ranges |
| 295 | { |
| 296 | my ($file_handle, $file_name) = @_; |
| 297 | |
| 298 | if ($file_name =~ /\.(c|cpp|m|mm|h)$/) { |
| 299 | return get_function_line_ranges_for_c ($file_handle, $file_name); |
| 300 | } elsif ($file_name =~ /\.java$/) { |
| 301 | return get_function_line_ranges_for_java ($file_handle, $file_name); |
| 302 | } |
| 303 | return (); |
| 304 | } |
| 305 | |
| 306 | |
| 307 | sub method_decl_to_selector |
| 308 | { |
| 309 | (my $method_decl) = @_; |
| 310 | |
| 311 | $_ = $method_decl; |
| 312 | |
| 313 | if ((my $comment_stripped) = m-([^/]*)(//|/*).*-) |
| 314 | { |
| 315 | $_ = $comment_stripped; |
| 316 | } |
| 317 | |
| 318 | s/,\s*...//; |
| 319 | |
| 320 | if (/:/) |
| 321 | { |
| 322 | my @components = split /:/; |
| 323 | pop @components if (scalar @components > 1); |
| 324 | $_ = (join ':', map {s/.*[^[:word:]]//; scalar $_;} @components) . ':'; |
| 325 | } else { |
| 326 | s/\s*$//; |
| 327 | s/.*[^[:word:]]//; |
| 328 | } |
| 329 | |
| 330 | return $_; |
| 331 | } |
| 332 | |
| 333 | |
| 334 | |
| 335 | # Read a file and get all the line ranges of the things that look like C functions. |
| 336 | # A function name is the last word before an open parenthesis before the outer |
| 337 | # level open brace. A function starts at the first character after the last close |
| 338 | # brace or semicolon before the function name and ends at the close brace. |
| 339 | # Comment handling is simple-minded but will work for all but pathological cases. |
| 340 | # |
| 341 | # Result is a list of triples: [ start_line, end_line, function_name ]. |
| 342 | |
| 343 | sub get_function_line_ranges_for_c |
| 344 | { |
| 345 | my ($file_handle, $file_name) = @_; |
| 346 | |
| 347 | my @ranges; |
| 348 | |
| 349 | my $in_comment = 0; |
| 350 | my $in_macro = 0; |
| 351 | my $in_method_declaration = 0; |
| 352 | my $in_parentheses = 0; |
| 353 | my $in_braces = 0; |
| 354 | my $brace_start = 0; |
| 355 | my $brace_end = 0; |
| 356 | my $skip_til_brace_or_semicolon = 0; |
| 357 | |
| 358 | my $word = ""; |
| 359 | my $interface_name = ""; |
| 360 | |
| 361 | my $potential_method_char = ""; |
| 362 | my $potential_method_spec = ""; |
| 363 | |
| 364 | my $potential_start = 0; |
| 365 | my $potential_name = ""; |
| 366 | |
| 367 | my $start = 0; |
| 368 | my $name = ""; |
| 369 | |
| 370 | my $next_word_could_be_namespace = 0; |
| 371 | my $potential_namespace = ""; |
| 372 | my @namespaces; |
| 373 | |
| 374 | while (<$file_handle>) |
| 375 | { |
| 376 | # Handle continued multi-line comment. |
| 377 | if ($in_comment) |
| 378 | { |
| 379 | next unless s-.*\*/--; |
| 380 | $in_comment = 0; |
| 381 | } |
| 382 | |
| 383 | # Handle continued macro. |
| 384 | if ($in_macro) |
| 385 | { |
| 386 | $in_macro = 0 unless /\\$/; |
| 387 | next; |
| 388 | } |
| 389 | |
| 390 | # Handle start of macro (or any preprocessor directive). |
| 391 | if (/^\s*\#/) |
| 392 | { |
| 393 | $in_macro = 1 if /^([^\\]|\\.)*\\$/; |
| 394 | next; |
| 395 | } |
| 396 | |
| 397 | # Handle comments and quoted text. |
| 398 | while (m-(/\*|//|\'|\")-) # \' and \" keep emacs perl mode happy |
| 399 | { |
| 400 | my $match = $1; |
| 401 | if ($match eq "/*") |
| 402 | { |
| 403 | if (!s-/\*.*?\*/--) |
| 404 | { |
| 405 | s-/\*.*--; |
| 406 | $in_comment = 1; |
| 407 | } |
| 408 | } |
| 409 | elsif ($match eq "//") |
| 410 | { |
| 411 | s-//.*--; |
| 412 | } |
| 413 | else # ' or " |
| 414 | { |
| 415 | if (!s-$match([^\\]|\\.)*?$match--) |
| 416 | { |
| 417 | warn "mismatched quotes at line $. in $file_name\n"; |
| 418 | s-$match.*--; |
| 419 | } |
| 420 | } |
| 421 | } |
| 422 | |
| 423 | |
| 424 | # continued method declaration |
| 425 | if ($in_method_declaration) |
| 426 | { |
| 427 | my $original = $_; |
| 428 | my $method_cont = $_; |
| 429 | |
| 430 | chomp $method_cont; |
| 431 | $method_cont =~ s/[;\{].*//; |
| 432 | $potential_method_spec = "${potential_method_spec} ${method_cont}"; |
| 433 | |
| 434 | $_ = $original; |
| 435 | if (/;/) |
| 436 | { |
| 437 | $potential_start = 0; |
| 438 | $potential_method_spec = ""; |
| 439 | $potential_method_char = ""; |
| 440 | $in_method_declaration = 0; |
| 441 | s/^[^;\{]*//; |
| 442 | } elsif (/{/) { |
| 443 | my $selector = method_decl_to_selector ($potential_method_spec); |
| 444 | $potential_name = "${potential_method_char}\[${interface_name} ${selector}\]"; |
| 445 | |
| 446 | $potential_method_spec = ""; |
| 447 | $potential_method_char = ""; |
| 448 | $in_method_declaration = 0; |
| 449 | |
| 450 | $_ = $original; |
| 451 | s/^[^;{]*//; |
| 452 | } else { |
| 453 | next; |
| 454 | } |
| 455 | } |
| 456 | |
| 457 | |
| 458 | # start of method declaration |
tomernic | dd79518 | 2005-09-01 18:51:02 +0000 | [diff] [blame] | 459 | if ((my $method_char, my $method_spec) = m&^([-+])([^0-9;][^;]*);?$&) |
mjs | b85f64e | 2005-06-17 07:49:31 +0000 | [diff] [blame] | 460 | { |
| 461 | my $original = $_; |
| 462 | |
| 463 | if ($interface_name) |
| 464 | { |
| 465 | chomp $method_spec; |
| 466 | $method_spec =~ s/\{.*//; |
| 467 | |
| 468 | $potential_method_char = $method_char; |
| 469 | $potential_method_spec = $method_spec; |
| 470 | $potential_start = $.; |
| 471 | $in_method_declaration = 1; |
| 472 | } else { |
| 473 | warn "declaring a method but don't have interface on line $. in $file_name\n"; |
| 474 | } |
| 475 | $_ = $original; |
| 476 | if (/\{/) { |
| 477 | my $selector = method_decl_to_selector ($potential_method_spec); |
| 478 | $potential_name = "${potential_method_char}\[${interface_name} ${selector}\]"; |
| 479 | |
| 480 | $potential_method_spec = ""; |
| 481 | $potential_method_char = ""; |
| 482 | $in_method_declaration = 0; |
| 483 | $_ = $original; |
| 484 | s/^[^{]*//; |
| 485 | } else { |
| 486 | next; |
| 487 | } |
| 488 | } |
| 489 | |
| 490 | |
| 491 | # Find function, interface and method names. |
adele | b15de0a | 2005-09-16 23:04:13 +0000 | [diff] [blame] | 492 | while (m&((?:[[:word:]]+::)*operator(?:[ \t]*\(\)|[^()]*)|[[:word:]:~]+|[(){}:;])|\@(?:implementation|interface|protocol)\s+(\w+)[^{]*&g) |
mjs | b85f64e | 2005-06-17 07:49:31 +0000 | [diff] [blame] | 493 | { |
| 494 | # interface name |
| 495 | if ($2) |
| 496 | { |
| 497 | $interface_name = $2; |
| 498 | next; |
| 499 | } |
| 500 | |
| 501 | # Open parenthesis. |
| 502 | if ($1 eq "(") |
| 503 | { |
| 504 | $potential_name = $word unless $in_parentheses || $skip_til_brace_or_semicolon; |
| 505 | $in_parentheses++; |
| 506 | next; |
| 507 | } |
| 508 | |
| 509 | # Close parenthesis. |
| 510 | if ($1 eq ")") |
| 511 | { |
| 512 | $in_parentheses--; |
| 513 | next; |
| 514 | } |
| 515 | |
| 516 | # C++ constructor initializers |
| 517 | if ($1 eq ":") |
| 518 | { |
| 519 | $skip_til_brace_or_semicolon = 1 unless ($in_parentheses || $in_braces); |
| 520 | } |
| 521 | |
| 522 | # Open brace. |
| 523 | if ($1 eq "{") |
| 524 | { |
| 525 | $skip_til_brace_or_semicolon = 0; |
| 526 | |
| 527 | if ($potential_namespace) { |
| 528 | push @namespaces, $potential_namespace; |
| 529 | $potential_namespace = ""; |
| 530 | next; |
| 531 | } |
| 532 | |
| 533 | # Promote potential name to real function name at the |
| 534 | # start of the outer level set of braces (function body?). |
| 535 | if (!$in_braces and $potential_start) |
| 536 | { |
| 537 | $start = $potential_start; |
| 538 | $name = $potential_name; |
| 539 | if (@namespaces && (length($name) < 2 || substr($name,1,1) ne "[")) { |
| 540 | $name = join ('::', @namespaces, $name); |
| 541 | } |
| 542 | } |
| 543 | |
| 544 | $in_method_declaration = 0; |
| 545 | |
| 546 | $brace_start = $. if (!$in_braces); |
| 547 | $in_braces++; |
| 548 | next; |
| 549 | } |
| 550 | |
| 551 | # Close brace. |
| 552 | if ($1 eq "}") |
| 553 | { |
| 554 | if (!$in_braces && @namespaces) { |
| 555 | pop @namespaces; |
| 556 | next; |
| 557 | } |
| 558 | |
| 559 | $in_braces--; |
| 560 | $brace_end = $. if (!$in_braces); |
| 561 | |
| 562 | # End of an outer level set of braces. |
| 563 | # This could be a function body. |
| 564 | if (!$in_braces and $name) |
| 565 | { |
| 566 | push @ranges, [ $start, $., $name ]; |
| 567 | $name = ""; |
| 568 | } |
| 569 | |
| 570 | $potential_start = 0; |
| 571 | $potential_name = ""; |
| 572 | next; |
| 573 | } |
| 574 | |
| 575 | # Semicolon. |
| 576 | if ($1 eq ";") |
| 577 | { |
| 578 | $skip_til_brace_or_semicolon = 0; |
| 579 | $potential_start = 0; |
| 580 | $potential_name = ""; |
| 581 | $in_method_declaration = 0; |
| 582 | next; |
| 583 | } |
| 584 | |
| 585 | # Ignore "const" method qualifier. |
| 586 | if ($1 eq "const") { |
| 587 | next; |
| 588 | } |
| 589 | |
| 590 | if ($1 eq "namespace" || $1 eq "class" || $1 eq "struct") { |
| 591 | $next_word_could_be_namespace = 1; |
| 592 | next; |
| 593 | } |
| 594 | |
| 595 | # Word. |
| 596 | $word = $1; |
| 597 | if (!$skip_til_brace_or_semicolon) { |
| 598 | if ($next_word_could_be_namespace) { |
| 599 | $potential_namespace = $word; |
| 600 | $next_word_could_be_namespace = 0; |
| 601 | } elsif ($potential_namespace) { |
| 602 | $potential_namespace = ""; |
| 603 | } |
| 604 | |
| 605 | if (!$in_parentheses) { |
| 606 | $potential_start = 0; |
| 607 | $potential_name = ""; |
| 608 | } |
| 609 | if (!$potential_start) { |
| 610 | $potential_start = $.; |
| 611 | $potential_name = ""; |
| 612 | } |
| 613 | } |
| 614 | } |
| 615 | } |
| 616 | |
| 617 | warn "missing close braces in $file_name (probable start at $brace_start)\n" if ($in_braces > 0); |
| 618 | warn "too many close braces in $file_name (probable start at $brace_end)\n" if ($in_braces < 0); |
| 619 | |
| 620 | warn "mismatched parentheses in $file_name\n" if $in_parentheses; |
| 621 | |
| 622 | return @ranges; |
| 623 | } |
| 624 | |
| 625 | |
| 626 | |
| 627 | # Read a file and get all the line ranges of the things that look like Java |
| 628 | # classes, interfaces and methods. |
| 629 | # |
| 630 | # A class or interface name is the word that immediately follows |
| 631 | # `class' or `interface' when followed by an open curly brace and not |
| 632 | # a semicolon. It can appear at the top level, or inside another class |
| 633 | # or interface block, but not inside a function block |
| 634 | # |
| 635 | # A class or interface starts at the first character after the first close |
| 636 | # brace or after the function name and ends at the close brace. |
| 637 | # |
| 638 | # A function name is the last word before an open parenthesis before |
| 639 | # an open brace rather than a semicolon. It can appear at top level or |
| 640 | # inside a class or interface block, but not inside a function block. |
| 641 | # |
| 642 | # A function starts at the first character after the first close |
| 643 | # brace or after the function name and ends at the close brace. |
| 644 | # |
| 645 | # Comment handling is simple-minded but will work for all but pathological cases. |
| 646 | # |
| 647 | # Result is a list of triples: [ start_line, end_line, function_name ]. |
| 648 | |
| 649 | sub get_function_line_ranges_for_java |
| 650 | { |
| 651 | my ($file_handle, $file_name) = @_; |
| 652 | |
| 653 | my @current_scopes; |
| 654 | |
| 655 | my @ranges; |
| 656 | |
| 657 | my $in_comment = 0; |
| 658 | my $in_macro = 0; |
| 659 | my $in_parentheses = 0; |
| 660 | my $in_braces = 0; |
| 661 | my $in_non_block_braces = 0; |
| 662 | my $class_or_interface_just_seen = 0; |
| 663 | |
| 664 | my $word = ""; |
| 665 | |
| 666 | my $potential_start = 0; |
| 667 | my $potential_name = ""; |
| 668 | my $potential_name_is_class_or_interface = 0; |
| 669 | |
| 670 | my $start = 0; |
| 671 | my $name = ""; |
| 672 | my $current_name_is_class_or_interface = 0; |
| 673 | |
| 674 | while (<$file_handle>) |
| 675 | { |
| 676 | # Handle continued multi-line comment. |
| 677 | if ($in_comment) |
| 678 | { |
| 679 | next unless s-.*\*/--; |
| 680 | $in_comment = 0; |
| 681 | } |
| 682 | |
| 683 | # Handle continued macro. |
| 684 | if ($in_macro) |
| 685 | { |
| 686 | $in_macro = 0 unless /\\$/; |
| 687 | next; |
| 688 | } |
| 689 | |
| 690 | # Handle start of macro (or any preprocessor directive). |
| 691 | if (/^\s*\#/) |
| 692 | { |
| 693 | $in_macro = 1 if /^([^\\]|\\.)*\\$/; |
| 694 | next; |
| 695 | } |
| 696 | |
| 697 | # Handle comments and quoted text. |
| 698 | while (m-(/\*|//|\'|\")-) # \' and \" keep emacs perl mode happy |
| 699 | { |
| 700 | my $match = $1; |
| 701 | if ($match eq "/*") |
| 702 | { |
| 703 | if (!s-/\*.*?\*/--) |
| 704 | { |
| 705 | s-/\*.*--; |
| 706 | $in_comment = 1; |
| 707 | } |
| 708 | } |
| 709 | elsif ($match eq "//") |
| 710 | { |
| 711 | s-//.*--; |
| 712 | } |
| 713 | else # ' or " |
| 714 | { |
| 715 | if (!s-$match([^\\]|\\.)*?$match--) |
| 716 | { |
| 717 | warn "mismatched quotes at line $. in $file_name\n"; |
| 718 | s-$match.*--; |
| 719 | } |
| 720 | } |
| 721 | } |
| 722 | |
| 723 | # Find function names. |
| 724 | while (m-(\w+|[(){};])-g) |
| 725 | { |
| 726 | # Open parenthesis. |
| 727 | if ($1 eq "(") |
| 728 | { |
| 729 | if (!$in_parentheses) { |
| 730 | $potential_name = $word; |
| 731 | $potential_name_is_class_or_interface = 0; |
| 732 | } |
| 733 | $in_parentheses++; |
| 734 | next; |
| 735 | } |
| 736 | |
| 737 | # Close parenthesis. |
| 738 | if ($1 eq ")") |
| 739 | { |
| 740 | $in_parentheses--; |
| 741 | next; |
| 742 | } |
| 743 | |
| 744 | # Open brace. |
| 745 | if ($1 eq "{") |
| 746 | { |
| 747 | # Promote potential name to real function name at the |
| 748 | # start of the outer level set of braces (function/class/interface body?). |
| 749 | if (!$in_non_block_braces |
| 750 | and (!$in_braces or $current_name_is_class_or_interface) |
| 751 | and $potential_start) |
| 752 | { |
| 753 | if ($name) |
| 754 | { |
| 755 | push @ranges, [ $start, ($. - 1), |
| 756 | join ('.', @current_scopes) ]; |
| 757 | } |
| 758 | |
| 759 | |
| 760 | $current_name_is_class_or_interface = $potential_name_is_class_or_interface; |
| 761 | |
| 762 | $start = $potential_start; |
| 763 | $name = $potential_name; |
| 764 | |
| 765 | push (@current_scopes, $name); |
| 766 | } else { |
| 767 | $in_non_block_braces++; |
| 768 | } |
| 769 | |
| 770 | $potential_name = ""; |
| 771 | $potential_start = 0; |
| 772 | |
| 773 | $in_braces++; |
| 774 | next; |
| 775 | } |
| 776 | |
| 777 | # Close brace. |
| 778 | if ($1 eq "}") |
| 779 | { |
| 780 | $in_braces--; |
| 781 | |
| 782 | # End of an outer level set of braces. |
| 783 | # This could be a function body. |
| 784 | if (!$in_non_block_braces) |
| 785 | { |
| 786 | if ($name) |
| 787 | { |
| 788 | push @ranges, [ $start, $., |
| 789 | join ('.', @current_scopes) ]; |
| 790 | |
| 791 | pop (@current_scopes); |
| 792 | |
| 793 | if (@current_scopes) |
| 794 | { |
| 795 | $current_name_is_class_or_interface = 1; |
| 796 | |
| 797 | $start = $. + 1; |
| 798 | $name = $current_scopes[$#current_scopes-1]; |
| 799 | } |
| 800 | else |
| 801 | { |
| 802 | $current_name_is_class_or_interface = 0; |
| 803 | $start = 0; |
| 804 | $name = ""; |
| 805 | } |
| 806 | } |
| 807 | } |
| 808 | else |
| 809 | { |
| 810 | $in_non_block_braces-- if $in_non_block_braces; |
| 811 | } |
| 812 | |
| 813 | $potential_start = 0; |
| 814 | $potential_name = ""; |
| 815 | next; |
| 816 | } |
| 817 | |
| 818 | # Semicolon. |
| 819 | if ($1 eq ";") |
| 820 | { |
| 821 | $potential_start = 0; |
| 822 | $potential_name = ""; |
| 823 | next; |
| 824 | } |
| 825 | |
| 826 | if ($1 eq "class" or $1 eq "interface") |
| 827 | { |
| 828 | $class_or_interface_just_seen = 1; |
| 829 | next; |
| 830 | } |
| 831 | |
| 832 | # Word. |
| 833 | $word = $1; |
| 834 | if (!$in_parentheses) |
| 835 | { |
| 836 | if ($class_or_interface_just_seen) { |
| 837 | $potential_name = $word; |
| 838 | $potential_start = $.; |
| 839 | $class_or_interface_just_seen = 0; |
| 840 | $potential_name_is_class_or_interface = 1; |
| 841 | next; |
| 842 | } |
| 843 | } |
| 844 | if (!$potential_start) |
| 845 | { |
| 846 | $potential_start = $.; |
| 847 | $potential_name = ""; |
| 848 | } |
| 849 | $class_or_interface_just_seen = 0; |
| 850 | } |
| 851 | } |
| 852 | |
| 853 | warn "mismatched braces in $file_name\n" if $in_braces; |
| 854 | warn "mismatched parentheses in $file_name\n" if $in_parentheses; |
| 855 | |
| 856 | return @ranges; |
| 857 | } |