| # Copyright (C) 2007, 2008, 2009 Apple Inc. All rights reserved. |
| # Copyright (C) 2009, 2010 Chris Jerdonek (chris.jerdonek@gmail.com) |
| # Copyright (C) 2010, 2011 Research In Motion Limited. All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions |
| # are met: |
| # |
| # 1. Redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer. |
| # 2. Redistributions in binary form must reproduce the above copyright |
| # notice, this list of conditions and the following disclaimer in the |
| # documentation and/or other materials provided with the distribution. |
| # 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of |
| # its contributors may be used to endorse or promote products derived |
| # from this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY |
| # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
| # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| # Module to share code to work with various version control systems. |
| package VCSUtils; |
| |
| use strict; |
| use warnings; |
| |
| use Cwd qw(); # "qw()" prevents warnings about redefining getcwd() with "use POSIX;" |
| use English; # for $POSTMATCH, etc. |
| use File::Basename; |
| use File::Spec; |
| use POSIX; |
| use Term::ANSIColor qw(colored); |
| |
| BEGIN { |
| use Exporter (); |
| our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); |
| $VERSION = 1.00; |
| @ISA = qw(Exporter); |
| @EXPORT = qw( |
| &applyGitBinaryPatchDelta |
| &callSilently |
| &canonicalizePath |
| &changeLogEmailAddress |
| &changeLogName |
| &chdirReturningRelativePath |
| &decodeGitBinaryChunk |
| &decodeGitBinaryPatch |
| &determineSVNRoot |
| &determineVCSRoot |
| &exitStatus |
| &fixChangeLogPatch |
| &gitBranch |
| &gitdiff2svndiff |
| &isGit |
| &isGitSVN |
| &isGitBranchBuild |
| &isGitDirectory |
| &isSVN |
| &isSVNDirectory |
| &isSVNVersion16OrNewer |
| &makeFilePathRelative |
| &mergeChangeLogs |
| &normalizePath |
| &parseChunkRange |
| &parseFirstEOL |
| &parsePatch |
| &pathRelativeToSVNRepositoryRootForPath |
| &possiblyColored |
| &prepareParsedPatch |
| &removeEOL |
| &runPatchCommand |
| &scmMoveOrRenameFile |
| &scmToggleExecutableBit |
| &setChangeLogDateAndReviewer |
| &svnRevisionForDirectory |
| &svnStatus |
| &toWindowsLineEndings |
| ); |
| %EXPORT_TAGS = ( ); |
| @EXPORT_OK = (); |
| } |
| |
| our @EXPORT_OK; |
| |
| my $gitBranch; |
| my $gitRoot; |
| my $isGit; |
| my $isGitSVN; |
| my $isGitBranchBuild; |
| my $isSVN; |
| my $svnVersion; |
| |
| # Project time zone for Cupertino, CA, US |
| my $changeLogTimeZone = "PST8PDT"; |
| |
| my $gitDiffStartRegEx = qr#^diff --git (\w/)?(.+) (\w/)?([^\r\n]+)#; |
| my $svnDiffStartRegEx = qr#^Index: ([^\r\n]+)#; |
| my $svnPropertiesStartRegEx = qr#^Property changes on: ([^\r\n]+)#; # $1 is normally the same as the index path. |
| my $svnPropertyStartRegEx = qr#^(Modified|Name|Added|Deleted): ([^\r\n]+)#; # $2 is the name of the property. |
| my $svnPropertyValueStartRegEx = qr#^ (\+|-|Merged|Reverse-merged) ([^\r\n]+)#; # $2 is the start of the property's value (which may span multiple lines). |
| |
| # This method is for portability. Return the system-appropriate exit |
| # status of a child process. |
| # |
| # Args: pass the child error status returned by the last pipe close, |
| # for example "$?". |
| sub exitStatus($) |
| { |
| my ($returnvalue) = @_; |
| if ($^O eq "MSWin32") { |
| return $returnvalue >> 8; |
| } |
| return WEXITSTATUS($returnvalue); |
| } |
| |
| # Call a function while suppressing STDERR, and return the return values |
| # as an array. |
| sub callSilently($@) { |
| my ($func, @args) = @_; |
| |
| # The following pattern was taken from here: |
| # http://www.sdsc.edu/~moreland/courses/IntroPerl/docs/manual/pod/perlfunc/open.html |
| # |
| # Also see this Perl documentation (search for "open OLDERR"): |
| # http://perldoc.perl.org/functions/open.html |
| open(OLDERR, ">&STDERR"); |
| close(STDERR); |
| my @returnValue = &$func(@args); |
| open(STDERR, ">&OLDERR"); |
| close(OLDERR); |
| |
| return @returnValue; |
| } |
| |
| sub toWindowsLineEndings |
| { |
| my ($text) = @_; |
| $text =~ s/\n/\r\n/g; |
| return $text; |
| } |
| |
| # Note, this method will not error if the file corresponding to the $source path does not exist. |
| sub scmMoveOrRenameFile |
| { |
| my ($source, $destination) = @_; |
| return if ! -e $source; |
| if (isSVN()) { |
| system("svn", "move", $source, $destination); |
| } elsif (isGit()) { |
| system("git", "mv", $source, $destination); |
| } |
| } |
| |
| # Note, this method will not error if the file corresponding to the path does not exist. |
| sub scmToggleExecutableBit |
| { |
| my ($path, $executableBitDelta) = @_; |
| return if ! -e $path; |
| if ($executableBitDelta == 1) { |
| scmAddExecutableBit($path); |
| } elsif ($executableBitDelta == -1) { |
| scmRemoveExecutableBit($path); |
| } |
| } |
| |
| sub scmAddExecutableBit($) |
| { |
| my ($path) = @_; |
| |
| if (isSVN()) { |
| system("svn", "propset", "svn:executable", "on", $path) == 0 or die "Failed to run 'svn propset svn:executable on $path'."; |
| } elsif (isGit()) { |
| chmod(0755, $path); |
| } |
| } |
| |
| sub scmRemoveExecutableBit($) |
| { |
| my ($path) = @_; |
| |
| if (isSVN()) { |
| system("svn", "propdel", "svn:executable", $path) == 0 or die "Failed to run 'svn propdel svn:executable $path'."; |
| } elsif (isGit()) { |
| chmod(0664, $path); |
| } |
| } |
| |
| sub isGitDirectory($) |
| { |
| my ($dir) = @_; |
| return system("cd $dir && git rev-parse > " . File::Spec->devnull() . " 2>&1") == 0; |
| } |
| |
| sub isGit() |
| { |
| return $isGit if defined $isGit; |
| |
| $isGit = isGitDirectory("."); |
| return $isGit; |
| } |
| |
| sub isGitSVN() |
| { |
| return $isGitSVN if defined $isGitSVN; |
| |
| # There doesn't seem to be an officially documented way to determine |
| # if you're in a git-svn checkout. The best suggestions seen so far |
| # all use something like the following: |
| my $output = `git config --get svn-remote.svn.fetch 2>& 1`; |
| $isGitSVN = $output ne ''; |
| return $isGitSVN; |
| } |
| |
| sub gitBranch() |
| { |
| unless (defined $gitBranch) { |
| chomp($gitBranch = `git symbolic-ref -q HEAD`); |
| $gitBranch = "" if exitStatus($?); |
| $gitBranch =~ s#^refs/heads/##; |
| $gitBranch = "" if $gitBranch eq "master"; |
| } |
| |
| return $gitBranch; |
| } |
| |
| sub isGitBranchBuild() |
| { |
| my $branch = gitBranch(); |
| chomp(my $override = `git config --bool branch.$branch.webKitBranchBuild`); |
| return 1 if $override eq "true"; |
| return 0 if $override eq "false"; |
| |
| unless (defined $isGitBranchBuild) { |
| chomp(my $gitBranchBuild = `git config --bool core.webKitBranchBuild`); |
| $isGitBranchBuild = $gitBranchBuild eq "true"; |
| } |
| |
| return $isGitBranchBuild; |
| } |
| |
| sub isSVNDirectory($) |
| { |
| my ($dir) = @_; |
| return system("cd $dir && svn info > " . File::Spec->devnull() . " 2>&1") == 0; |
| } |
| |
| sub isSVN() |
| { |
| return $isSVN if defined $isSVN; |
| |
| $isSVN = isSVNDirectory("."); |
| return $isSVN; |
| } |
| |
| sub svnVersion() |
| { |
| return $svnVersion if defined $svnVersion; |
| |
| if (!isSVN()) { |
| $svnVersion = 0; |
| } else { |
| chomp($svnVersion = `svn --version --quiet`); |
| } |
| return $svnVersion; |
| } |
| |
| sub isSVNVersion16OrNewer() |
| { |
| my $version = svnVersion(); |
| return eval "v$version" ge v1.6; |
| } |
| |
| sub chdirReturningRelativePath($) |
| { |
| my ($directory) = @_; |
| my $previousDirectory = Cwd::getcwd(); |
| chdir $directory; |
| my $newDirectory = Cwd::getcwd(); |
| return "." if $newDirectory eq $previousDirectory; |
| return File::Spec->abs2rel($previousDirectory, $newDirectory); |
| } |
| |
| sub determineGitRoot() |
| { |
| chomp(my $gitDir = `git rev-parse --git-dir`); |
| return dirname($gitDir); |
| } |
| |
| sub determineSVNRoot() |
| { |
| my $last = ''; |
| my $path = '.'; |
| my $parent = '..'; |
| my $repositoryRoot; |
| my $repositoryUUID; |
| while (1) { |
| my $thisRoot; |
| my $thisUUID; |
| # Ignore error messages in case we've run past the root of the checkout. |
| open INFO, "svn info '$path' 2> " . File::Spec->devnull() . " |" or die; |
| while (<INFO>) { |
| if (/^Repository Root: (.+)/) { |
| $thisRoot = $1; |
| } |
| if (/^Repository UUID: (.+)/) { |
| $thisUUID = $1; |
| } |
| if ($thisRoot && $thisUUID) { |
| local $/ = undef; |
| <INFO>; # Consume the rest of the input. |
| } |
| } |
| close INFO; |
| |
| # It's possible (e.g. for developers of some ports) to have a WebKit |
| # checkout in a subdirectory of another checkout. So abort if the |
| # repository root or the repository UUID suddenly changes. |
| last if !$thisUUID; |
| $repositoryUUID = $thisUUID if !$repositoryUUID; |
| last if $thisUUID ne $repositoryUUID; |
| |
| last if !$thisRoot; |
| $repositoryRoot = $thisRoot if !$repositoryRoot; |
| last if $thisRoot ne $repositoryRoot; |
| |
| $last = $path; |
| $path = File::Spec->catdir($parent, $path); |
| } |
| |
| return File::Spec->rel2abs($last); |
| } |
| |
| sub determineVCSRoot() |
| { |
| if (isGit()) { |
| return determineGitRoot(); |
| } |
| |
| if (!isSVN()) { |
| # Some users have a workflow where svn-create-patch, svn-apply and |
| # svn-unapply are used outside of multiple svn working directores, |
| # so warn the user and assume Subversion is being used in this case. |
| warn "Unable to determine VCS root for '" . Cwd::getcwd() . "'; assuming Subversion"; |
| $isSVN = 1; |
| } |
| |
| return determineSVNRoot(); |
| } |
| |
| sub svnRevisionForDirectory($) |
| { |
| my ($dir) = @_; |
| my $revision; |
| |
| if (isSVNDirectory($dir)) { |
| my $svnInfo = `LC_ALL=C svn info $dir | grep Revision:`; |
| ($revision) = ($svnInfo =~ m/Revision: (\d+).*/g); |
| } elsif (isGitDirectory($dir)) { |
| my $gitLog = `cd $dir && LC_ALL=C git log --grep='git-svn-id: ' -n 1 | grep git-svn-id:`; |
| ($revision) = ($gitLog =~ m/ +git-svn-id: .+@(\d+) /g); |
| } |
| die "Unable to determine current SVN revision in $dir" unless (defined $revision); |
| return $revision; |
| } |
| |
| sub pathRelativeToSVNRepositoryRootForPath($) |
| { |
| my ($file) = @_; |
| my $relativePath = File::Spec->abs2rel($file); |
| |
| my $svnInfo; |
| if (isSVN()) { |
| $svnInfo = `LC_ALL=C svn info $relativePath`; |
| } elsif (isGit()) { |
| $svnInfo = `LC_ALL=C git svn info $relativePath`; |
| } |
| |
| $svnInfo =~ /.*^URL: (.*?)$/m; |
| my $svnURL = $1; |
| |
| $svnInfo =~ /.*^Repository Root: (.*?)$/m; |
| my $repositoryRoot = $1; |
| |
| $svnURL =~ s/$repositoryRoot\///; |
| return $svnURL; |
| } |
| |
| sub makeFilePathRelative($) |
| { |
| my ($path) = @_; |
| return $path unless isGit(); |
| |
| unless (defined $gitRoot) { |
| chomp($gitRoot = `git rev-parse --show-cdup`); |
| } |
| return $gitRoot . $path; |
| } |
| |
| sub normalizePath($) |
| { |
| my ($path) = @_; |
| $path =~ s/\\/\//g; |
| return $path; |
| } |
| |
| sub possiblyColored($$) |
| { |
| my ($colors, $string) = @_; |
| |
| if (-t STDOUT) { |
| return colored([$colors], $string); |
| } else { |
| return $string; |
| } |
| } |
| |
| sub canonicalizePath($) |
| { |
| my ($file) = @_; |
| |
| # Remove extra slashes and '.' directories in path |
| $file = File::Spec->canonpath($file); |
| |
| # Remove '..' directories in path |
| my @dirs = (); |
| foreach my $dir (File::Spec->splitdir($file)) { |
| if ($dir eq '..' && $#dirs >= 0 && $dirs[$#dirs] ne '..') { |
| pop(@dirs); |
| } else { |
| push(@dirs, $dir); |
| } |
| } |
| return ($#dirs >= 0) ? File::Spec->catdir(@dirs) : "."; |
| } |
| |
| sub removeEOL($) |
| { |
| my ($line) = @_; |
| return "" unless $line; |
| |
| $line =~ s/[\r\n]+$//g; |
| return $line; |
| } |
| |
| sub parseFirstEOL($) |
| { |
| my ($fileHandle) = @_; |
| |
| # Make input record separator the new-line character to simplify regex matching below. |
| my $savedInputRecordSeparator = $INPUT_RECORD_SEPARATOR; |
| $INPUT_RECORD_SEPARATOR = "\n"; |
| my $firstLine = <$fileHandle>; |
| $INPUT_RECORD_SEPARATOR = $savedInputRecordSeparator; |
| |
| return unless defined($firstLine); |
| |
| my $eol; |
| if ($firstLine =~ /\r\n/) { |
| $eol = "\r\n"; |
| } elsif ($firstLine =~ /\r/) { |
| $eol = "\r"; |
| } elsif ($firstLine =~ /\n/) { |
| $eol = "\n"; |
| } |
| return $eol; |
| } |
| |
| sub firstEOLInFile($) |
| { |
| my ($file) = @_; |
| my $eol; |
| if (open(FILE, $file)) { |
| $eol = parseFirstEOL(*FILE); |
| close(FILE); |
| } |
| return $eol; |
| } |
| |
| # Parses a chunk range line into its components. |
| # |
| # A chunk range line has the form: @@ -L_1,N_1 +L_2,N_2 @@, where the pairs (L_1, N_1), |
| # (L_2, N_2) are ranges that represent the starting line number and line count in the |
| # original file and new file, respectively. |
| # |
| # Note, some versions of GNU diff may omit the comma and trailing line count (e.g. N_1), |
| # in which case the omitted line count defaults to 1. For example, GNU diff may output |
| # @@ -1 +1 @@, which is equivalent to @@ -1,1 +1,1 @@. |
| # |
| # This subroutine returns undef if given an invalid or malformed chunk range. |
| # |
| # Args: |
| # $line: the line to parse. |
| # |
| # Returns $chunkRangeHashRef |
| # $chunkRangeHashRef: a hash reference representing the parts of a chunk range, as follows-- |
| # startingLine: the starting line in the original file. |
| # lineCount: the line count in the original file. |
| # newStartingLine: the new starting line in the new file. |
| # newLineCount: the new line count in the new file. |
| sub parseChunkRange($) |
| { |
| my ($line) = @_; |
| my $chunkRangeRegEx = qr#^\@\@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? \@\@#; |
| if ($line !~ /$chunkRangeRegEx/) { |
| return; |
| } |
| my %chunkRange; |
| $chunkRange{startingLine} = $1; |
| $chunkRange{lineCount} = defined($2) ? $3 : 1; |
| $chunkRange{newStartingLine} = $4; |
| $chunkRange{newLineCount} = defined($5) ? $6 : 1; |
| return \%chunkRange; |
| } |
| |
| sub svnStatus($) |
| { |
| my ($fullPath) = @_; |
| my $svnStatus; |
| open SVN, "svn status --non-interactive --non-recursive '$fullPath' |" or die; |
| if (-d $fullPath) { |
| # When running "svn stat" on a directory, we can't assume that only one |
| # status will be returned (since any files with a status below the |
| # directory will be returned), and we can't assume that the directory will |
| # be first (since any files with unknown status will be listed first). |
| my $normalizedFullPath = File::Spec->catdir(File::Spec->splitdir($fullPath)); |
| while (<SVN>) { |
| # Input may use a different EOL sequence than $/, so avoid chomp. |
| $_ = removeEOL($_); |
| my $normalizedStatPath = File::Spec->catdir(File::Spec->splitdir(substr($_, 7))); |
| if ($normalizedFullPath eq $normalizedStatPath) { |
| $svnStatus = "$_\n"; |
| last; |
| } |
| } |
| # Read the rest of the svn command output to avoid a broken pipe warning. |
| local $/ = undef; |
| <SVN>; |
| } |
| else { |
| # Files will have only one status returned. |
| $svnStatus = removeEOL(<SVN>) . "\n"; |
| } |
| close SVN; |
| return $svnStatus; |
| } |
| |
| # Return whether the given file mode is executable in the source control |
| # sense. We make this determination based on whether the executable bit |
| # is set for "others" rather than the stronger condition that it be set |
| # for the user, group, and others. This is sufficient for distinguishing |
| # the default behavior in Git and SVN. |
| # |
| # Args: |
| # $fileMode: A number or string representing a file mode in octal notation. |
| sub isExecutable($) |
| { |
| my $fileMode = shift; |
| |
| return $fileMode % 2; |
| } |
| |
| # Parse the next Git diff header from the given file handle, and advance |
| # the handle so the last line read is the first line after the header. |
| # |
| # This subroutine dies if given leading junk. |
| # |
| # Args: |
| # $fileHandle: advanced so the last line read from the handle is the first |
| # line of the header to parse. This should be a line |
| # beginning with "diff --git". |
| # $line: the line last read from $fileHandle |
| # |
| # Returns ($headerHashRef, $lastReadLine): |
| # $headerHashRef: a hash reference representing a diff header, as follows-- |
| # copiedFromPath: the path from which the file was copied or moved if |
| # the diff is a copy or move. |
| # executableBitDelta: the value 1 or -1 if the executable bit was added or |
| # removed, respectively. New and deleted files have |
| # this value only if the file is executable, in which |
| # case the value is 1 and -1, respectively. |
| # indexPath: the path of the target file. |
| # isBinary: the value 1 if the diff is for a binary file. |
| # isDeletion: the value 1 if the diff is a file deletion. |
| # isCopyWithChanges: the value 1 if the file was copied or moved and |
| # the target file was changed in some way after being |
| # copied or moved (e.g. if its contents or executable |
| # bit were changed). |
| # isNew: the value 1 if the diff is for a new file. |
| # shouldDeleteSource: the value 1 if the file was copied or moved and |
| # the source file was deleted -- i.e. if the copy |
| # was actually a move. |
| # svnConvertedText: the header text with some lines converted to SVN |
| # format. Git-specific lines are preserved. |
| # $lastReadLine: the line last read from $fileHandle. |
| sub parseGitDiffHeader($$) |
| { |
| my ($fileHandle, $line) = @_; |
| |
| $_ = $line; |
| |
| my $indexPath; |
| if (/$gitDiffStartRegEx/) { |
| # The first and second paths can differ in the case of copies |
| # and renames. We use the second file path because it is the |
| # destination path. |
| $indexPath = $4; |
| # Use $POSTMATCH to preserve the end-of-line character. |
| $_ = "Index: $indexPath$POSTMATCH"; # Convert to SVN format. |
| } else { |
| die("Could not parse leading \"diff --git\" line: \"$line\"."); |
| } |
| |
| my $copiedFromPath; |
| my $foundHeaderEnding; |
| my $isBinary; |
| my $isDeletion; |
| my $isNew; |
| my $newExecutableBit = 0; |
| my $oldExecutableBit = 0; |
| my $shouldDeleteSource = 0; |
| my $similarityIndex = 0; |
| my $svnConvertedText; |
| while (1) { |
| # Temporarily strip off any end-of-line characters to simplify |
| # regex matching below. |
| s/([\n\r]+)$//; |
| my $eol = $1; |
| |
| if (/^(deleted file|old) mode (\d+)/) { |
| $oldExecutableBit = (isExecutable($2) ? 1 : 0); |
| $isDeletion = 1 if $1 eq "deleted file"; |
| } elsif (/^new( file)? mode (\d+)/) { |
| $newExecutableBit = (isExecutable($2) ? 1 : 0); |
| $isNew = 1 if $1; |
| } elsif (/^similarity index (\d+)%/) { |
| $similarityIndex = $1; |
| } elsif (/^copy from (\S+)/) { |
| $copiedFromPath = $1; |
| } elsif (/^rename from (\S+)/) { |
| # FIXME: Record this as a move rather than as a copy-and-delete. |
| # This will simplify adding rename support to svn-unapply. |
| # Otherwise, the hash for a deletion would have to know |
| # everything about the file being deleted in order to |
| # support undoing itself. Recording as a move will also |
| # permit us to use "svn move" and "git move". |
| $copiedFromPath = $1; |
| $shouldDeleteSource = 1; |
| } elsif (/^--- \S+/) { |
| $_ = "--- $indexPath"; # Convert to SVN format. |
| } elsif (/^\+\+\+ \S+/) { |
| $_ = "+++ $indexPath"; # Convert to SVN format. |
| $foundHeaderEnding = 1; |
| } elsif (/^GIT binary patch$/ ) { |
| $isBinary = 1; |
| $foundHeaderEnding = 1; |
| # The "git diff" command includes a line of the form "Binary files |
| # <path1> and <path2> differ" if the --binary flag is not used. |
| } elsif (/^Binary files / ) { |
| die("Error: the Git diff contains a binary file without the binary data in ". |
| "line: \"$_\". Be sure to use the --binary flag when invoking \"git diff\" ". |
| "with diffs containing binary files."); |
| } |
| |
| $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters. |
| |
| $_ = <$fileHandle>; # Not defined if end-of-file reached. |
| |
| last if (!defined($_) || /$gitDiffStartRegEx/ || $foundHeaderEnding); |
| } |
| |
| my $executableBitDelta = $newExecutableBit - $oldExecutableBit; |
| |
| my %header; |
| |
| $header{copiedFromPath} = $copiedFromPath if $copiedFromPath; |
| $header{executableBitDelta} = $executableBitDelta if $executableBitDelta; |
| $header{indexPath} = $indexPath; |
| $header{isBinary} = $isBinary if $isBinary; |
| $header{isCopyWithChanges} = 1 if ($copiedFromPath && ($similarityIndex != 100 || $executableBitDelta)); |
| $header{isDeletion} = $isDeletion if $isDeletion; |
| $header{isNew} = $isNew if $isNew; |
| $header{shouldDeleteSource} = $shouldDeleteSource if $shouldDeleteSource; |
| $header{svnConvertedText} = $svnConvertedText; |
| |
| return (\%header, $_); |
| } |
| |
| # Parse the next SVN diff header from the given file handle, and advance |
| # the handle so the last line read is the first line after the header. |
| # |
| # This subroutine dies if given leading junk or if it could not detect |
| # the end of the header block. |
| # |
| # Args: |
| # $fileHandle: advanced so the last line read from the handle is the first |
| # line of the header to parse. This should be a line |
| # beginning with "Index:". |
| # $line: the line last read from $fileHandle |
| # |
| # Returns ($headerHashRef, $lastReadLine): |
| # $headerHashRef: a hash reference representing a diff header, as follows-- |
| # copiedFromPath: the path from which the file was copied if the diff |
| # is a copy. |
| # indexPath: the path of the target file, which is the path found in |
| # the "Index:" line. |
| # isBinary: the value 1 if the diff is for a binary file. |
| # isNew: the value 1 if the diff is for a new file. |
| # sourceRevision: the revision number of the source, if it exists. This |
| # is the same as the revision number the file was copied |
| # from, in the case of a file copy. |
| # svnConvertedText: the header text converted to a header with the paths |
| # in some lines corrected. |
| # $lastReadLine: the line last read from $fileHandle. |
| sub parseSvnDiffHeader($$) |
| { |
| my ($fileHandle, $line) = @_; |
| |
| $_ = $line; |
| |
| my $indexPath; |
| if (/$svnDiffStartRegEx/) { |
| $indexPath = $1; |
| } else { |
| die("First line of SVN diff does not begin with \"Index \": \"$_\""); |
| } |
| |
| my $copiedFromPath; |
| my $foundHeaderEnding; |
| my $isBinary; |
| my $isNew; |
| my $sourceRevision; |
| my $svnConvertedText; |
| while (1) { |
| # Temporarily strip off any end-of-line characters to simplify |
| # regex matching below. |
| s/([\n\r]+)$//; |
| my $eol = $1; |
| |
| # Fix paths on ""---" and "+++" lines to match the leading |
| # index line. |
| if (s/^--- \S+/--- $indexPath/) { |
| # --- |
| if (/^--- .+\(revision (\d+)\)/) { |
| $sourceRevision = $1; |
| $isNew = 1 if !$sourceRevision; # if revision 0. |
| if (/\(from (\S+):(\d+)\)$/) { |
| # The "from" clause is created by svn-create-patch, in |
| # which case there is always also a "revision" clause. |
| $copiedFromPath = $1; |
| die("Revision number \"$2\" in \"from\" clause does not match " . |
| "source revision number \"$sourceRevision\".") if ($2 != $sourceRevision); |
| } |
| } |
| } elsif (s/^\+\+\+ \S+/+++ $indexPath/) { |
| $foundHeaderEnding = 1; |
| } elsif (/^Cannot display: file marked as a binary type.$/) { |
| $isBinary = 1; |
| $foundHeaderEnding = 1; |
| } |
| |
| $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters. |
| |
| $_ = <$fileHandle>; # Not defined if end-of-file reached. |
| |
| last if (!defined($_) || /$svnDiffStartRegEx/ || $foundHeaderEnding); |
| } |
| |
| if (!$foundHeaderEnding) { |
| die("Did not find end of header block corresponding to index path \"$indexPath\"."); |
| } |
| |
| my %header; |
| |
| $header{copiedFromPath} = $copiedFromPath if $copiedFromPath; |
| $header{indexPath} = $indexPath; |
| $header{isBinary} = $isBinary if $isBinary; |
| $header{isNew} = $isNew if $isNew; |
| $header{sourceRevision} = $sourceRevision if $sourceRevision; |
| $header{svnConvertedText} = $svnConvertedText; |
| |
| return (\%header, $_); |
| } |
| |
| # Parse the next diff header from the given file handle, and advance |
| # the handle so the last line read is the first line after the header. |
| # |
| # This subroutine dies if given leading junk or if it could not detect |
| # the end of the header block. |
| # |
| # Args: |
| # $fileHandle: advanced so the last line read from the handle is the first |
| # line of the header to parse. For SVN-formatted diffs, this |
| # is a line beginning with "Index:". For Git, this is a line |
| # beginning with "diff --git". |
| # $line: the line last read from $fileHandle |
| # |
| # Returns ($headerHashRef, $lastReadLine): |
| # $headerHashRef: a hash reference representing a diff header |
| # copiedFromPath: the path from which the file was copied if the diff |
| # is a copy. |
| # executableBitDelta: the value 1 or -1 if the executable bit was added or |
| # removed, respectively. New and deleted files have |
| # this value only if the file is executable, in which |
| # case the value is 1 and -1, respectively. |
| # indexPath: the path of the target file. |
| # isBinary: the value 1 if the diff is for a binary file. |
| # isGit: the value 1 if the diff is Git-formatted. |
| # isSvn: the value 1 if the diff is SVN-formatted. |
| # sourceRevision: the revision number of the source, if it exists. This |
| # is the same as the revision number the file was copied |
| # from, in the case of a file copy. |
| # svnConvertedText: the header text with some lines converted to SVN |
| # format. Git-specific lines are preserved. |
| # $lastReadLine: the line last read from $fileHandle. |
| sub parseDiffHeader($$) |
| { |
| my ($fileHandle, $line) = @_; |
| |
| my $header; # This is a hash ref. |
| my $isGit; |
| my $isSvn; |
| my $lastReadLine; |
| |
| if ($line =~ $svnDiffStartRegEx) { |
| $isSvn = 1; |
| ($header, $lastReadLine) = parseSvnDiffHeader($fileHandle, $line); |
| } elsif ($line =~ $gitDiffStartRegEx) { |
| $isGit = 1; |
| ($header, $lastReadLine) = parseGitDiffHeader($fileHandle, $line); |
| } else { |
| die("First line of diff does not begin with \"Index:\" or \"diff --git\": \"$line\""); |
| } |
| |
| $header->{isGit} = $isGit if $isGit; |
| $header->{isSvn} = $isSvn if $isSvn; |
| |
| return ($header, $lastReadLine); |
| } |
| |
| # FIXME: The %diffHash "object" should not have an svnConvertedText property. |
| # Instead, the hash object should store its information in a |
| # structured way as properties. This should be done in a way so |
| # that, if necessary, the text of an SVN or Git patch can be |
| # reconstructed from the information in those hash properties. |
| # |
| # A %diffHash is a hash representing a source control diff of a single |
| # file operation (e.g. a file modification, copy, or delete). |
| # |
| # These hashes appear, for example, in the parseDiff(), parsePatch(), |
| # and prepareParsedPatch() subroutines of this package. |
| # |
| # The corresponding values are-- |
| # |
| # copiedFromPath: the path from which the file was copied if the diff |
| # is a copy. |
| # executableBitDelta: the value 1 or -1 if the executable bit was added or |
| # removed from the target file, respectively. |
| # indexPath: the path of the target file. For SVN-formatted diffs, |
| # this is the same as the path in the "Index:" line. |
| # isBinary: the value 1 if the diff is for a binary file. |
| # isDeletion: the value 1 if the diff is known from the header to be a deletion. |
| # isGit: the value 1 if the diff is Git-formatted. |
| # isNew: the value 1 if the dif is known from the header to be a new file. |
| # isSvn: the value 1 if the diff is SVN-formatted. |
| # sourceRevision: the revision number of the source, if it exists. This |
| # is the same as the revision number the file was copied |
| # from, in the case of a file copy. |
| # svnConvertedText: the diff with some lines converted to SVN format. |
| # Git-specific lines are preserved. |
| |
| # Parse one diff from a patch file created by svn-create-patch, and |
| # advance the file handle so the last line read is the first line |
| # of the next header block. |
| # |
| # This subroutine preserves any leading junk encountered before the header. |
| # |
| # Composition of an SVN diff |
| # |
| # There are three parts to an SVN diff: the header, the property change, and |
| # the binary contents, in that order. Either the header or the property change |
| # may be ommitted, but not both. If there are binary changes, then you always |
| # have all three. |
| # |
| # Args: |
| # $fileHandle: a file handle advanced to the first line of the next |
| # header block. Leading junk is okay. |
| # $line: the line last read from $fileHandle. |
| # $optionsHashRef: a hash reference representing optional options to use |
| # when processing a diff. |
| # shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead |
| # instead of the line endings in the target file; the |
| # value of 1 if svnConvertedText should use the line |
| # endings in the diff. |
| # |
| # Returns ($diffHashRefs, $lastReadLine): |
| # $diffHashRefs: A reference to an array of references to %diffHash hashes. |
| # See the %diffHash documentation above. |
| # $lastReadLine: the line last read from $fileHandle |
| sub parseDiff($$;$) |
| { |
| # FIXME: Adjust this method so that it dies if the first line does not |
| # match the start of a diff. This will require a change to |
| # parsePatch() so that parsePatch() skips over leading junk. |
| my ($fileHandle, $line, $optionsHashRef) = @_; |
| |
| my $headerStartRegEx = $svnDiffStartRegEx; # SVN-style header for the default |
| |
| my $headerHashRef; # Last header found, as returned by parseDiffHeader(). |
| my $svnPropertiesHashRef; # Last SVN properties diff found, as returned by parseSvnDiffProperties(). |
| my $svnText; |
| my $indexPathEOL; |
| my $numTextChunks = 0; |
| while (defined($line)) { |
| if (!$headerHashRef && ($line =~ $gitDiffStartRegEx)) { |
| # Then assume all diffs in the patch are Git-formatted. This |
| # block was made to be enterable at most once since we assume |
| # all diffs in the patch are formatted the same (SVN or Git). |
| $headerStartRegEx = $gitDiffStartRegEx; |
| } |
| |
| if ($line =~ $svnPropertiesStartRegEx) { |
| my $propertyPath = $1; |
| if ($svnPropertiesHashRef || $headerHashRef && ($propertyPath ne $headerHashRef->{indexPath})) { |
| # This is the start of the second diff in the while loop, which happens to |
| # be a property diff. If $svnPropertiesHasRef is defined, then this is the |
| # second consecutive property diff, otherwise it's the start of a property |
| # diff for a file that only has property changes. |
| last; |
| } |
| ($svnPropertiesHashRef, $line) = parseSvnDiffProperties($fileHandle, $line); |
| next; |
| } |
| if ($line !~ $headerStartRegEx) { |
| # Then we are in the body of the diff. |
| my $isChunkRange = defined(parseChunkRange($line)); |
| $numTextChunks += 1 if $isChunkRange; |
| if ($indexPathEOL && !$isChunkRange) { |
| # The chunk range is part of the body of the diff, but its line endings should't be |
| # modified or patch(1) will complain. So, we only modify non-chunk range lines. |
| $line =~ s/\r\n|\r|\n/$indexPathEOL/g; |
| } |
| $svnText .= $line; |
| $line = <$fileHandle>; |
| next; |
| } # Otherwise, we found a diff header. |
| |
| if ($svnPropertiesHashRef || $headerHashRef) { |
| # Then either we just processed an SVN property change or this |
| # is the start of the second diff header of this while loop. |
| last; |
| } |
| |
| ($headerHashRef, $line) = parseDiffHeader($fileHandle, $line); |
| if (!$optionsHashRef || !$optionsHashRef->{shouldNotUseIndexPathEOL}) { |
| $indexPathEOL = firstEOLInFile($headerHashRef->{indexPath}) if !$headerHashRef->{isNew} && !$headerHashRef->{isBinary}; |
| } |
| |
| $svnText .= $headerHashRef->{svnConvertedText}; |
| } |
| |
| my @diffHashRefs; |
| |
| if ($headerHashRef->{shouldDeleteSource}) { |
| my %deletionHash; |
| $deletionHash{indexPath} = $headerHashRef->{copiedFromPath}; |
| $deletionHash{isDeletion} = 1; |
| push @diffHashRefs, \%deletionHash; |
| } |
| if ($headerHashRef->{copiedFromPath}) { |
| my %copyHash; |
| $copyHash{copiedFromPath} = $headerHashRef->{copiedFromPath}; |
| $copyHash{indexPath} = $headerHashRef->{indexPath}; |
| $copyHash{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision}; |
| if ($headerHashRef->{isSvn}) { |
| $copyHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta}; |
| } |
| push @diffHashRefs, \%copyHash; |
| } |
| |
| # Note, the order of evaluation for the following if conditional has been explicitly chosen so that |
| # it evaluates to false when there is no headerHashRef (e.g. a property change diff for a file that |
| # only has property changes). |
| if ($headerHashRef->{isCopyWithChanges} || (%$headerHashRef && !$headerHashRef->{copiedFromPath})) { |
| # Then add the usual file modification. |
| my %diffHash; |
| # FIXME: We should expand this code to support other properties. In the future, |
| # parseSvnDiffProperties may return a hash whose keys are the properties. |
| if ($headerHashRef->{isSvn}) { |
| # SVN records the change to the executable bit in a separate property change diff |
| # that follows the contents of the diff, except for binary diffs. For binary |
| # diffs, the property change diff follows the diff header. |
| $diffHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta}; |
| } elsif ($headerHashRef->{isGit}) { |
| # Git records the change to the executable bit in the header of a diff. |
| $diffHash{executableBitDelta} = $headerHashRef->{executableBitDelta} if $headerHashRef->{executableBitDelta}; |
| } |
| $diffHash{indexPath} = $headerHashRef->{indexPath}; |
| $diffHash{isBinary} = $headerHashRef->{isBinary} if $headerHashRef->{isBinary}; |
| $diffHash{isDeletion} = $headerHashRef->{isDeletion} if $headerHashRef->{isDeletion}; |
| $diffHash{isGit} = $headerHashRef->{isGit} if $headerHashRef->{isGit}; |
| $diffHash{isNew} = $headerHashRef->{isNew} if $headerHashRef->{isNew}; |
| $diffHash{isSvn} = $headerHashRef->{isSvn} if $headerHashRef->{isSvn}; |
| if (!$headerHashRef->{copiedFromPath}) { |
| # If the file was copied, then we have already incorporated the |
| # sourceRevision information into the change. |
| $diffHash{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision}; |
| } |
| # FIXME: Remove the need for svnConvertedText. See the %diffHash |
| # code comments above for more information. |
| # |
| # Note, we may not always have SVN converted text since we intend |
| # to deprecate it in the future. For example, a property change |
| # diff for a file that only has property changes will not return |
| # any SVN converted text. |
| $diffHash{svnConvertedText} = $svnText if $svnText; |
| $diffHash{numTextChunks} = $numTextChunks if $svnText && !$headerHashRef->{isBinary}; |
| push @diffHashRefs, \%diffHash; |
| } |
| |
| if (!%$headerHashRef && $svnPropertiesHashRef) { |
| # A property change diff for a file that only has property changes. |
| my %propertyChangeHash; |
| $propertyChangeHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta}; |
| $propertyChangeHash{indexPath} = $svnPropertiesHashRef->{propertyPath}; |
| $propertyChangeHash{isSvn} = 1; |
| push @diffHashRefs, \%propertyChangeHash; |
| } |
| |
| return (\@diffHashRefs, $line); |
| } |
| |
| # Parse an SVN property change diff from the given file handle, and advance |
| # the handle so the last line read is the first line after this diff. |
| # |
| # For the case of an SVN binary diff, the binary contents will follow the |
| # the property changes. |
| # |
| # This subroutine dies if the first line does not begin with "Property changes on" |
| # or if the separator line that follows this line is missing. |
| # |
| # Args: |
| # $fileHandle: advanced so the last line read from the handle is the first |
| # line of the footer to parse. This line begins with |
| # "Property changes on". |
| # $line: the line last read from $fileHandle. |
| # |
| # Returns ($propertyHashRef, $lastReadLine): |
| # $propertyHashRef: a hash reference representing an SVN diff footer. |
| # propertyPath: the path of the target file. |
| # executableBitDelta: the value 1 or -1 if the executable bit was added or |
| # removed from the target file, respectively. |
| # $lastReadLine: the line last read from $fileHandle. |
| sub parseSvnDiffProperties($$) |
| { |
| my ($fileHandle, $line) = @_; |
| |
| $_ = $line; |
| |
| my %footer; |
| if (/$svnPropertiesStartRegEx/) { |
| $footer{propertyPath} = $1; |
| } else { |
| die("Failed to find start of SVN property change, \"Property changes on \": \"$_\""); |
| } |
| |
| # We advance $fileHandle two lines so that the next line that |
| # we process is $svnPropertyStartRegEx in a well-formed footer. |
| # A well-formed footer has the form: |
| # Property changes on: FileA |
| # ___________________________________________________________________ |
| # Added: svn:executable |
| # + * |
| $_ = <$fileHandle>; # Not defined if end-of-file reached. |
| my $separator = "_" x 67; |
| if (defined($_) && /^$separator[\r\n]+$/) { |
| $_ = <$fileHandle>; |
| } else { |
| die("Failed to find separator line: \"$_\"."); |
| } |
| |
| # FIXME: We should expand this to support other SVN properties |
| # (e.g. return a hash of property key-values that represents |
| # all properties). |
| # |
| # Notice, we keep processing until we hit end-of-file or some |
| # line that does not resemble $svnPropertyStartRegEx, such as |
| # the empty line that precedes the start of the binary contents |
| # of a patch, or the start of the next diff (e.g. "Index:"). |
| my $propertyHashRef; |
| while (defined($_) && /$svnPropertyStartRegEx/) { |
| ($propertyHashRef, $_) = parseSvnProperty($fileHandle, $_); |
| if ($propertyHashRef->{name} eq "svn:executable") { |
| # Notice, for SVN properties, propertyChangeDelta is always non-zero |
| # because a property can only be added or removed. |
| $footer{executableBitDelta} = $propertyHashRef->{propertyChangeDelta}; |
| } |
| } |
| |
| return(\%footer, $_); |
| } |
| |
| # Parse the next SVN property from the given file handle, and advance the handle so the last |
| # line read is the first line after the property. |
| # |
| # This subroutine dies if the first line is not a valid start of an SVN property, |
| # or the property is missing a value, or the property change type (e.g. "Added") |
| # does not correspond to the property value type (e.g. "+"). |
| # |
| # Args: |
| # $fileHandle: advanced so the last line read from the handle is the first |
| # line of the property to parse. This should be a line |
| # that matches $svnPropertyStartRegEx. |
| # $line: the line last read from $fileHandle. |
| # |
| # Returns ($propertyHashRef, $lastReadLine): |
| # $propertyHashRef: a hash reference representing a SVN property. |
| # name: the name of the property. |
| # value: the last property value. For instance, suppose the property is "Modified". |
| # Then it has both a '-' and '+' property value in that order. Therefore, |
| # the value of this key is the value of the '+' property by ordering (since |
| # it is the last value). |
| # propertyChangeDelta: the value 1 or -1 if the property was added or |
| # removed, respectively. |
| # $lastReadLine: the line last read from $fileHandle. |
| sub parseSvnProperty($$) |
| { |
| my ($fileHandle, $line) = @_; |
| |
| $_ = $line; |
| |
| my $propertyName; |
| my $propertyChangeType; |
| if (/$svnPropertyStartRegEx/) { |
| $propertyChangeType = $1; |
| $propertyName = $2; |
| } else { |
| die("Failed to find SVN property: \"$_\"."); |
| } |
| |
| $_ = <$fileHandle>; # Not defined if end-of-file reached. |
| |
| # The "svn diff" command neither inserts newline characters between property values |
| # nor between successive properties. |
| # |
| # FIXME: We do not support property values that contain tailing newline characters |
| # as it is difficult to disambiguate these trailing newlines from the empty |
| # line that precedes the contents of a binary patch. |
| my $propertyValue; |
| my $propertyValueType; |
| while (defined($_) && /$svnPropertyValueStartRegEx/) { |
| # Note, a '-' property may be followed by a '+' property in the case of a "Modified" |
| # or "Name" property. We only care about the ending value (i.e. the '+' property) |
| # in such circumstances. So, we take the property value for the property to be its |
| # last parsed property value. |
| # |
| # FIXME: We may want to consider strictly enforcing a '-', '+' property ordering or |
| # add error checking to prevent '+', '+', ..., '+' and other invalid combinations. |
| $propertyValueType = $1; |
| ($propertyValue, $_) = parseSvnPropertyValue($fileHandle, $_); |
| } |
| |
| if (!$propertyValue) { |
| die("Failed to find the property value for the SVN property \"$propertyName\": \"$_\"."); |
| } |
| |
| my $propertyChangeDelta; |
| if ($propertyValueType eq "+" || $propertyValueType eq "Merged") { |
| $propertyChangeDelta = 1; |
| } elsif ($propertyValueType eq "-" || $propertyValueType eq "Reverse-merged") { |
| $propertyChangeDelta = -1; |
| } else { |
| die("Not reached."); |
| } |
| |
| # We perform a simple validation that an "Added" or "Deleted" property |
| # change type corresponds with a "+" and "-" value type, respectively. |
| my $expectedChangeDelta; |
| if ($propertyChangeType eq "Added") { |
| $expectedChangeDelta = 1; |
| } elsif ($propertyChangeType eq "Deleted") { |
| $expectedChangeDelta = -1; |
| } |
| |
| if ($expectedChangeDelta && $propertyChangeDelta != $expectedChangeDelta) { |
| die("The final property value type found \"$propertyValueType\" does not " . |
| "correspond to the property change type found \"$propertyChangeType\"."); |
| } |
| |
| my %propertyHash; |
| $propertyHash{name} = $propertyName; |
| $propertyHash{propertyChangeDelta} = $propertyChangeDelta; |
| $propertyHash{value} = $propertyValue; |
| return (\%propertyHash, $_); |
| } |
| |
| # Parse the value of an SVN property from the given file handle, and advance |
| # the handle so the last line read is the first line after the property value. |
| # |
| # This subroutine dies if the first line is an invalid SVN property value line |
| # (i.e. a line that does not begin with " +" or " -"). |
| # |
| # Args: |
| # $fileHandle: advanced so the last line read from the handle is the first |
| # line of the property value to parse. This should be a line |
| # beginning with " +" or " -". |
| # $line: the line last read from $fileHandle. |
| # |
| # Returns ($propertyValue, $lastReadLine): |
| # $propertyValue: the value of the property. |
| # $lastReadLine: the line last read from $fileHandle. |
| sub parseSvnPropertyValue($$) |
| { |
| my ($fileHandle, $line) = @_; |
| |
| $_ = $line; |
| |
| my $propertyValue; |
| my $eol; |
| if (/$svnPropertyValueStartRegEx/) { |
| $propertyValue = $2; # Does not include the end-of-line character(s). |
| $eol = $POSTMATCH; |
| } else { |
| die("Failed to find property value beginning with '+', '-', 'Merged', or 'Reverse-merged': \"$_\"."); |
| } |
| |
| while (<$fileHandle>) { |
| if (/^[\r\n]+$/ || /$svnPropertyValueStartRegEx/ || /$svnPropertyStartRegEx/) { |
| # Note, we may encounter an empty line before the contents of a binary patch. |
| # Also, we check for $svnPropertyValueStartRegEx because a '-' property may be |
| # followed by a '+' property in the case of a "Modified" or "Name" property. |
| # We check for $svnPropertyStartRegEx because it indicates the start of the |
| # next property to parse. |
| last; |
| } |
| |
| # Temporarily strip off any end-of-line characters. We add the end-of-line characters |
| # from the previously processed line to the start of this line so that the last line |
| # of the property value does not end in end-of-line characters. |
| s/([\n\r]+)$//; |
| $propertyValue .= "$eol$_"; |
| $eol = $1; |
| } |
| |
| return ($propertyValue, $_); |
| } |
| |
| # Parse a patch file created by svn-create-patch. |
| # |
| # Args: |
| # $fileHandle: A file handle to the patch file that has not yet been |
| # read from. |
| # $optionsHashRef: a hash reference representing optional options to use |
| # when processing a diff. |
| # shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead |
| # instead of the line endings in the target file; the |
| # value of 1 if svnConvertedText should use the line |
| # endings in the diff. |
| # |
| # Returns: |
| # @diffHashRefs: an array of diff hash references. |
| # See the %diffHash documentation above. |
| sub parsePatch($;$) |
| { |
| my ($fileHandle, $optionsHashRef) = @_; |
| |
| my $newDiffHashRefs; |
| my @diffHashRefs; # return value |
| |
| my $line = <$fileHandle>; |
| |
| while (defined($line)) { # Otherwise, at EOF. |
| |
| ($newDiffHashRefs, $line) = parseDiff($fileHandle, $line, $optionsHashRef); |
| |
| push @diffHashRefs, @$newDiffHashRefs; |
| } |
| |
| return @diffHashRefs; |
| } |
| |
| # Prepare the results of parsePatch() for use in svn-apply and svn-unapply. |
| # |
| # Args: |
| # $shouldForce: Whether to continue processing if an unexpected |
| # state occurs. |
| # @diffHashRefs: An array of references to %diffHashes. |
| # See the %diffHash documentation above. |
| # |
| # Returns $preparedPatchHashRef: |
| # copyDiffHashRefs: A reference to an array of the $diffHashRefs in |
| # @diffHashRefs that represent file copies. The original |
| # ordering is preserved. |
| # nonCopyDiffHashRefs: A reference to an array of the $diffHashRefs in |
| # @diffHashRefs that do not represent file copies. |
| # The original ordering is preserved. |
| # sourceRevisionHash: A reference to a hash of source path to source |
| # revision number. |
| sub prepareParsedPatch($@) |
| { |
| my ($shouldForce, @diffHashRefs) = @_; |
| |
| my %copiedFiles; |
| |
| # Return values |
| my @copyDiffHashRefs = (); |
| my @nonCopyDiffHashRefs = (); |
| my %sourceRevisionHash = (); |
| for my $diffHashRef (@diffHashRefs) { |
| my $copiedFromPath = $diffHashRef->{copiedFromPath}; |
| my $indexPath = $diffHashRef->{indexPath}; |
| my $sourceRevision = $diffHashRef->{sourceRevision}; |
| my $sourcePath; |
| |
| if (defined($copiedFromPath)) { |
| # Then the diff is a copy operation. |
| $sourcePath = $copiedFromPath; |
| |
| # FIXME: Consider printing a warning or exiting if |
| # exists($copiedFiles{$indexPath}) is true -- i.e. if |
| # $indexPath appears twice as a copy target. |
| $copiedFiles{$indexPath} = $sourcePath; |
| |
| push @copyDiffHashRefs, $diffHashRef; |
| } else { |
| # Then the diff is not a copy operation. |
| $sourcePath = $indexPath; |
| |
| push @nonCopyDiffHashRefs, $diffHashRef; |
| } |
| |
| if (defined($sourceRevision)) { |
| if (exists($sourceRevisionHash{$sourcePath}) && |
| ($sourceRevisionHash{$sourcePath} != $sourceRevision)) { |
| if (!$shouldForce) { |
| die "Two revisions of the same file required as a source:\n". |
| " $sourcePath:$sourceRevisionHash{$sourcePath}\n". |
| " $sourcePath:$sourceRevision"; |
| } |
| } |
| $sourceRevisionHash{$sourcePath} = $sourceRevision; |
| } |
| } |
| |
| my %preparedPatchHash; |
| |
| $preparedPatchHash{copyDiffHashRefs} = \@copyDiffHashRefs; |
| $preparedPatchHash{nonCopyDiffHashRefs} = \@nonCopyDiffHashRefs; |
| $preparedPatchHash{sourceRevisionHash} = \%sourceRevisionHash; |
| |
| return \%preparedPatchHash; |
| } |
| |
| # Return localtime() for the project's time zone, given an integer time as |
| # returned by Perl's time() function. |
| sub localTimeInProjectTimeZone($) |
| { |
| my $epochTime = shift; |
| |
| # Change the time zone temporarily for the localtime() call. |
| my $savedTimeZone = $ENV{'TZ'}; |
| $ENV{'TZ'} = $changeLogTimeZone; |
| my @localTime = localtime($epochTime); |
| if (defined $savedTimeZone) { |
| $ENV{'TZ'} = $savedTimeZone; |
| } else { |
| delete $ENV{'TZ'}; |
| } |
| |
| return @localTime; |
| } |
| |
| # Set the reviewer and date in a ChangeLog patch, and return the new patch. |
| # |
| # Args: |
| # $patch: a ChangeLog patch as a string. |
| # $reviewer: the name of the reviewer, or undef if the reviewer should not be set. |
| # $epochTime: an integer time as returned by Perl's time() function. |
| sub setChangeLogDateAndReviewer($$$) |
| { |
| my ($patch, $reviewer, $epochTime) = @_; |
| |
| my @localTime = localTimeInProjectTimeZone($epochTime); |
| my $newDate = strftime("%Y-%m-%d", @localTime); |
| |
| my $firstChangeLogLineRegEx = qr#(\n\+)\d{4}-[^-]{2}-[^-]{2}( )#; |
| $patch =~ s/$firstChangeLogLineRegEx/$1$newDate$2/; |
| |
| if (defined($reviewer)) { |
| # We include a leading plus ("+") in the regular expression to make |
| # the regular expression less likely to match text in the leading junk |
| # for the patch, if the patch has leading junk. |
| $patch =~ s/(\n\+.*)NOBODY \(OOPS!\)/$1$reviewer/; |
| } |
| |
| return $patch; |
| } |
| |
| # If possible, returns a ChangeLog patch equivalent to the given one, |
| # but with the newest ChangeLog entry inserted at the top of the |
| # file -- i.e. no leading context and all lines starting with "+". |
| # |
| # If given a patch string not representable as a patch with the above |
| # properties, it returns the input back unchanged. |
| # |
| # WARNING: This subroutine can return an inequivalent patch string if |
| # both the beginning of the new ChangeLog file matches the beginning |
| # of the source ChangeLog, and the source beginning was modified. |
| # Otherwise, it is guaranteed to return an equivalent patch string, |
| # if it returns. |
| # |
| # Applying this subroutine to ChangeLog patches allows svn-apply to |
| # insert new ChangeLog entries at the top of the ChangeLog file. |
| # svn-apply uses patch with --fuzz=3 to do this. We need to apply |
| # this subroutine because the diff(1) command is greedy when matching |
| # lines. A new ChangeLog entry with the same date and author as the |
| # previous will match and cause the diff to have lines of starting |
| # context. |
| # |
| # This subroutine has unit tests in VCSUtils_unittest.pl. |
| # |
| # Returns $changeLogHashRef: |
| # $changeLogHashRef: a hash reference representing a change log patch. |
| # patch: a ChangeLog patch equivalent to the given one, but with the |
| # newest ChangeLog entry inserted at the top of the file, if possible. |
| sub fixChangeLogPatch($) |
| { |
| my $patch = shift; # $patch will only contain patch fragments for ChangeLog. |
| |
| $patch =~ /(\r?\n)/; |
| my $lineEnding = $1; |
| my @lines = split(/$lineEnding/, $patch); |
| |
| my $i = 0; # We reuse the same index throughout. |
| |
| # Skip to beginning of first chunk. |
| for (; $i < @lines; ++$i) { |
| if (substr($lines[$i], 0, 1) eq "@") { |
| last; |
| } |
| } |
| my $chunkStartIndex = ++$i; |
| my %changeLogHashRef; |
| |
| # Optimization: do not process if new lines already begin the chunk. |
| if (substr($lines[$i], 0, 1) eq "+") { |
| $changeLogHashRef{patch} = $patch; |
| return \%changeLogHashRef; |
| } |
| |
| # Skip to first line of newly added ChangeLog entry. |
| # For example, +2009-06-03 Eric Seidel <eric@webkit.org> |
| my $dateStartRegEx = '^\+(\d{4}-\d{2}-\d{2})' # leading "+" and date |
| . '\s+(.+)\s+' # name |
| . '<([^<>]+)>$'; # e-mail address |
| |
| for (; $i < @lines; ++$i) { |
| my $line = $lines[$i]; |
| my $firstChar = substr($line, 0, 1); |
| if ($line =~ /$dateStartRegEx/) { |
| last; |
| } elsif ($firstChar eq " " or $firstChar eq "+") { |
| next; |
| } |
| $changeLogHashRef{patch} = $patch; # Do not change if, for example, "-" or "@" found. |
| return \%changeLogHashRef; |
| } |
| if ($i >= @lines) { |
| $changeLogHashRef{patch} = $patch; # Do not change if date not found. |
| return \%changeLogHashRef; |
| } |
| my $dateStartIndex = $i; |
| |
| # Rewrite overlapping lines to lead with " ". |
| my @overlappingLines = (); # These will include a leading "+". |
| for (; $i < @lines; ++$i) { |
| my $line = $lines[$i]; |
| if (substr($line, 0, 1) ne "+") { |
| last; |
| } |
| push(@overlappingLines, $line); |
| $lines[$i] = " " . substr($line, 1); |
| } |
| |
| # Remove excess ending context, if necessary. |
| my $shouldTrimContext = 1; |
| for (; $i < @lines; ++$i) { |
| my $firstChar = substr($lines[$i], 0, 1); |
| if ($firstChar eq " ") { |
| next; |
| } elsif ($firstChar eq "@") { |
| last; |
| } |
| $shouldTrimContext = 0; # For example, if "+" or "-" encountered. |
| last; |
| } |
| my $deletedLineCount = 0; |
| if ($shouldTrimContext) { # Also occurs if end of file reached. |
| splice(@lines, $i - @overlappingLines, @overlappingLines); |
| $deletedLineCount = @overlappingLines; |
| } |
| |
| # Work backwards, shifting overlapping lines towards front |
| # while checking that patch stays equivalent. |
| for ($i = $dateStartIndex - 1; @overlappingLines && $i >= $chunkStartIndex; --$i) { |
| my $line = $lines[$i]; |
| if (substr($line, 0, 1) ne " ") { |
| next; |
| } |
| my $text = substr($line, 1); |
| my $newLine = pop(@overlappingLines); |
| if ($text ne substr($newLine, 1)) { |
| $changeLogHashRef{patch} = $patch; # Unexpected difference. |
| return \%changeLogHashRef; |
| } |
| $lines[$i] = "+$text"; |
| } |
| |
| # If @overlappingLines > 0, this is where we make use of the |
| # assumption that the beginning of the source file was not modified. |
| splice(@lines, $chunkStartIndex, 0, @overlappingLines); |
| |
| # Update the date start index as it may have changed after shifting |
| # the overlapping lines towards the front. |
| for ($i = $chunkStartIndex; $i < $dateStartIndex; ++$i) { |
| $dateStartIndex = $i if $lines[$i] =~ /$dateStartRegEx/; |
| } |
| splice(@lines, $chunkStartIndex, $dateStartIndex - $chunkStartIndex); # Remove context of later entry. |
| $deletedLineCount += $dateStartIndex - $chunkStartIndex; |
| |
| # Update the initial chunk range. |
| my $chunkRangeHashRef = parseChunkRange($lines[$chunkStartIndex - 1]); |
| if (!$chunkRangeHashRef) { |
| # FIXME: Handle errors differently from ChangeLog files that |
| # are okay but should not be altered. That way we can find out |
| # if improvements to the script ever become necessary. |
| $changeLogHashRef{patch} = $patch; # Error: unexpected patch string format. |
| return \%changeLogHashRef; |
| } |
| my $oldSourceLineCount = $chunkRangeHashRef->{lineCount}; |
| my $oldTargetLineCount = $chunkRangeHashRef->{newLineCount}; |
| |
| my $sourceLineCount = $oldSourceLineCount + @overlappingLines - $deletedLineCount; |
| my $targetLineCount = $oldTargetLineCount + @overlappingLines - $deletedLineCount; |
| $lines[$chunkStartIndex - 1] = "@@ -1,$sourceLineCount +1,$targetLineCount @@"; |
| |
| $changeLogHashRef{patch} = join($lineEnding, @lines) . "\n"; # patch(1) expects an extra trailing newline. |
| return \%changeLogHashRef; |
| } |
| |
| # This is a supporting method for runPatchCommand. |
| # |
| # Arg: the optional $args parameter passed to runPatchCommand (can be undefined). |
| # |
| # Returns ($patchCommand, $isForcing). |
| # |
| # This subroutine has unit tests in VCSUtils_unittest.pl. |
| sub generatePatchCommand($) |
| { |
| my ($passedArgsHashRef) = @_; |
| |
| my $argsHashRef = { # Defaults |
| ensureForce => 0, |
| shouldReverse => 0, |
| options => [] |
| }; |
| |
| # Merges hash references. It's okay here if passed hash reference is undefined. |
| @{$argsHashRef}{keys %{$passedArgsHashRef}} = values %{$passedArgsHashRef}; |
| |
| my $ensureForce = $argsHashRef->{ensureForce}; |
| my $shouldReverse = $argsHashRef->{shouldReverse}; |
| my $options = $argsHashRef->{options}; |
| |
| if (! $options) { |
| $options = []; |
| } else { |
| $options = [@{$options}]; # Copy to avoid side effects. |
| } |
| |
| my $isForcing = 0; |
| if (grep /^--force$/, @{$options}) { |
| $isForcing = 1; |
| } elsif ($ensureForce) { |
| push @{$options}, "--force"; |
| $isForcing = 1; |
| } |
| |
| if ($shouldReverse) { # No check: --reverse should never be passed explicitly. |
| push @{$options}, "--reverse"; |
| } |
| |
| @{$options} = sort(@{$options}); # For easier testing. |
| |
| my $patchCommand = join(" ", "patch -p0", @{$options}); |
| |
| return ($patchCommand, $isForcing); |
| } |
| |
| # Apply the given patch using the patch(1) command. |
| # |
| # On success, return the resulting exit status. Otherwise, exit with the |
| # exit status. If "--force" is passed as an option, however, then never |
| # exit and always return the exit status. |
| # |
| # Args: |
| # $patch: a patch string. |
| # $repositoryRootPath: an absolute path to the repository root. |
| # $pathRelativeToRoot: the path of the file to be patched, relative to the |
| # repository root. This should normally be the path |
| # found in the patch's "Index:" line. It is passed |
| # explicitly rather than reparsed from the patch |
| # string for optimization purposes. |
| # This is used only for error reporting. The |
| # patch command gleans the actual file to patch |
| # from the patch string. |
| # $args: a reference to a hash of optional arguments. The possible |
| # keys are -- |
| # ensureForce: whether to ensure --force is passed (defaults to 0). |
| # shouldReverse: whether to pass --reverse (defaults to 0). |
| # options: a reference to an array of options to pass to the |
| # patch command. The subroutine passes the -p0 option |
| # no matter what. This should not include --reverse. |
| # |
| # This subroutine has unit tests in VCSUtils_unittest.pl. |
| sub runPatchCommand($$$;$) |
| { |
| my ($patch, $repositoryRootPath, $pathRelativeToRoot, $args) = @_; |
| |
| my ($patchCommand, $isForcing) = generatePatchCommand($args); |
| |
| # Temporarily change the working directory since the path found |
| # in the patch's "Index:" line is relative to the repository root |
| # (i.e. the same as $pathRelativeToRoot). |
| my $cwd = Cwd::getcwd(); |
| chdir $repositoryRootPath; |
| |
| open PATCH, "| $patchCommand" or die "Could not call \"$patchCommand\" for file \"$pathRelativeToRoot\": $!"; |
| print PATCH $patch; |
| close PATCH; |
| my $exitStatus = exitStatus($?); |
| |
| chdir $cwd; |
| |
| if ($exitStatus && !$isForcing) { |
| print "Calling \"$patchCommand\" for file \"$pathRelativeToRoot\" returned " . |
| "status $exitStatus. Pass --force to ignore patch failures.\n"; |
| exit $exitStatus; |
| } |
| |
| return $exitStatus; |
| } |
| |
| # Merge ChangeLog patches using a three-file approach. |
| # |
| # This is used by resolve-ChangeLogs when it's operated as a merge driver |
| # and when it's used to merge conflicts after a patch is applied or after |
| # an svn update. |
| # |
| # It's also used for traditional rejected patches. |
| # |
| # Args: |
| # $fileMine: The merged version of the file. Also known in git as the |
| # other branch's version (%B) or "ours". |
| # For traditional patch rejects, this is the *.rej file. |
| # $fileOlder: The base version of the file. Also known in git as the |
| # ancestor version (%O) or "base". |
| # For traditional patch rejects, this is the *.orig file. |
| # $fileNewer: The current version of the file. Also known in git as the |
| # current version (%A) or "theirs". |
| # For traditional patch rejects, this is the original-named |
| # file. |
| # |
| # Returns 1 if merge was successful, else 0. |
| sub mergeChangeLogs($$$) |
| { |
| my ($fileMine, $fileOlder, $fileNewer) = @_; |
| |
| my $traditionalReject = $fileMine =~ /\.rej$/ ? 1 : 0; |
| |
| local $/ = undef; |
| |
| my $patch; |
| if ($traditionalReject) { |
| open(DIFF, "<", $fileMine) or die $!; |
| $patch = <DIFF>; |
| close(DIFF); |
| rename($fileMine, "$fileMine.save"); |
| rename($fileOlder, "$fileOlder.save"); |
| } else { |
| open(DIFF, "diff -u -a --binary \"$fileOlder\" \"$fileMine\" |") or die $!; |
| $patch = <DIFF>; |
| close(DIFF); |
| } |
| |
| unlink("${fileNewer}.orig"); |
| unlink("${fileNewer}.rej"); |
| |
| open(PATCH, "| patch --force --fuzz=3 --binary \"$fileNewer\" > " . File::Spec->devnull()) or die $!; |
| if ($traditionalReject) { |
| print PATCH $patch; |
| } else { |
| my $changeLogHash = fixChangeLogPatch($patch); |
| print PATCH $changeLogHash->{patch}; |
| } |
| close(PATCH); |
| |
| my $result = !exitStatus($?); |
| |
| # Refuse to merge the patch if it did not apply cleanly |
| if (-e "${fileNewer}.rej") { |
| unlink("${fileNewer}.rej"); |
| if (-f "${fileNewer}.orig") { |
| unlink($fileNewer); |
| rename("${fileNewer}.orig", $fileNewer); |
| } |
| } else { |
| unlink("${fileNewer}.orig"); |
| } |
| |
| if ($traditionalReject) { |
| rename("$fileMine.save", $fileMine); |
| rename("$fileOlder.save", $fileOlder); |
| } |
| |
| return $result; |
| } |
| |
| sub gitConfig($) |
| { |
| return unless $isGit; |
| |
| my ($config) = @_; |
| |
| my $result = `git config $config`; |
| if (($? >> 8)) { |
| $result = `git repo-config $config`; |
| } |
| chomp $result; |
| return $result; |
| } |
| |
| sub changeLogNameError($) |
| { |
| my ($message) = @_; |
| print STDERR "$message\nEither:\n"; |
| print STDERR " set CHANGE_LOG_NAME in your environment\n"; |
| print STDERR " OR pass --name= on the command line\n"; |
| print STDERR " OR set REAL_NAME in your environment"; |
| print STDERR " OR git users can set 'git config user.name'\n"; |
| exit(1); |
| } |
| |
| sub changeLogName() |
| { |
| my $name = $ENV{CHANGE_LOG_NAME} || $ENV{REAL_NAME} || gitConfig("user.name") || (split /\s*,\s*/, (getpwuid $<)[6])[0]; |
| |
| changeLogNameError("Failed to determine ChangeLog name.") unless $name; |
| # getpwuid seems to always succeed on windows, returning the username instead of the full name. This check will catch that case. |
| changeLogNameError("'$name' does not contain a space! ChangeLogs should contain your full name.") unless ($name =~ /\S\s\S/); |
| |
| return $name; |
| } |
| |
| sub changeLogEmailAddressError($) |
| { |
| my ($message) = @_; |
| print STDERR "$message\nEither:\n"; |
| print STDERR " set CHANGE_LOG_EMAIL_ADDRESS in your environment\n"; |
| print STDERR " OR pass --email= on the command line\n"; |
| print STDERR " OR set EMAIL_ADDRESS in your environment\n"; |
| print STDERR " OR git users can set 'git config user.email'\n"; |
| exit(1); |
| } |
| |
| sub changeLogEmailAddress() |
| { |
| my $emailAddress = $ENV{CHANGE_LOG_EMAIL_ADDRESS} || $ENV{EMAIL_ADDRESS} || gitConfig("user.email"); |
| |
| changeLogEmailAddressError("Failed to determine email address for ChangeLog.") unless $emailAddress; |
| changeLogEmailAddressError("Email address '$emailAddress' does not contain '\@' and is likely invalid.") unless ($emailAddress =~ /\@/); |
| |
| return $emailAddress; |
| } |
| |
| # http://tools.ietf.org/html/rfc1924 |
| sub decodeBase85($) |
| { |
| my ($encoded) = @_; |
| my %table; |
| my @characters = ('0'..'9', 'A'..'Z', 'a'..'z', '!', '#', '$', '%', '&', '(', ')', '*', '+', '-', ';', '<', '=', '>', '?', '@', '^', '_', '`', '{', '|', '}', '~'); |
| for (my $i = 0; $i < 85; $i++) { |
| $table{$characters[$i]} = $i; |
| } |
| |
| my $decoded = ''; |
| my @encodedChars = $encoded =~ /./g; |
| |
| for (my $encodedIter = 0; defined($encodedChars[$encodedIter]);) { |
| my $digit = 0; |
| for (my $i = 0; $i < 5; $i++) { |
| $digit *= 85; |
| my $char = $encodedChars[$encodedIter]; |
| $digit += $table{$char}; |
| $encodedIter++; |
| } |
| |
| for (my $i = 0; $i < 4; $i++) { |
| $decoded .= chr(($digit >> (3 - $i) * 8) & 255); |
| } |
| } |
| |
| return $decoded; |
| } |
| |
| sub decodeGitBinaryChunk($$) |
| { |
| my ($contents, $fullPath) = @_; |
| |
| # Load this module lazily in case the user don't have this module |
| # and won't handle git binary patches. |
| require Compress::Zlib; |
| |
| my $encoded = ""; |
| my $compressedSize = 0; |
| while ($contents =~ /^([A-Za-z])(.*)$/gm) { |
| my $line = $2; |
| next if $line eq ""; |
| die "$fullPath: unexpected size of a line: $&" if length($2) % 5 != 0; |
| my $actualSize = length($2) / 5 * 4; |
| my $encodedExpectedSize = ord($1); |
| my $expectedSize = $encodedExpectedSize <= ord("Z") ? $encodedExpectedSize - ord("A") + 1 : $encodedExpectedSize - ord("a") + 27; |
| |
| die "$fullPath: unexpected size of a line: $&" if int(($expectedSize + 3) / 4) * 4 != $actualSize; |
| $compressedSize += $expectedSize; |
| $encoded .= $line; |
| } |
| |
| my $compressed = decodeBase85($encoded); |
| $compressed = substr($compressed, 0, $compressedSize); |
| return Compress::Zlib::uncompress($compressed); |
| } |
| |
| sub decodeGitBinaryPatch($$) |
| { |
| my ($contents, $fullPath) = @_; |
| |
| # Git binary patch has two chunks. One is for the normal patching |
| # and another is for the reverse patching. |
| # |
| # Each chunk a line which starts from either "literal" or "delta", |
| # followed by a number which specifies decoded size of the chunk. |
| # |
| # Then, content of the chunk comes. To decode the content, we |
| # need decode it with base85 first, and then zlib. |
| my $gitPatchRegExp = '(literal|delta) ([0-9]+)\n([A-Za-z0-9!#$%&()*+-;<=>?@^_`{|}~\\n]*?)\n\n'; |
| if ($contents !~ m"\nGIT binary patch\n$gitPatchRegExp$gitPatchRegExp\Z") { |
| die "$fullPath: unknown git binary patch format" |
| } |
| |
| my $binaryChunkType = $1; |
| my $binaryChunkExpectedSize = $2; |
| my $encodedChunk = $3; |
| my $reverseBinaryChunkType = $4; |
| my $reverseBinaryChunkExpectedSize = $5; |
| my $encodedReverseChunk = $6; |
| |
| my $binaryChunk = decodeGitBinaryChunk($encodedChunk, $fullPath); |
| my $binaryChunkActualSize = length($binaryChunk); |
| my $reverseBinaryChunk = decodeGitBinaryChunk($encodedReverseChunk, $fullPath); |
| my $reverseBinaryChunkActualSize = length($reverseBinaryChunk); |
| |
| die "$fullPath: unexpected size of the first chunk (expected $binaryChunkExpectedSize but was $binaryChunkActualSize" if ($binaryChunkType eq "literal" and $binaryChunkExpectedSize != $binaryChunkActualSize); |
| die "$fullPath: unexpected size of the second chunk (expected $reverseBinaryChunkExpectedSize but was $reverseBinaryChunkActualSize" if ($reverseBinaryChunkType eq "literal" and $reverseBinaryChunkExpectedSize != $reverseBinaryChunkActualSize); |
| |
| return ($binaryChunkType, $binaryChunk, $reverseBinaryChunkType, $reverseBinaryChunk); |
| } |
| |
| sub readByte($$) |
| { |
| my ($data, $location) = @_; |
| |
| # Return the byte at $location in $data as a numeric value. |
| return ord(substr($data, $location, 1)); |
| } |
| |
| # The git binary delta format is undocumented, except in code: |
| # - https://github.com/git/git/blob/master/delta.h:get_delta_hdr_size is the source |
| # of the algorithm in decodeGitBinaryPatchDeltaSize. |
| # - https://github.com/git/git/blob/master/patch-delta.c:patch_delta is the source |
| # of the algorithm in applyGitBinaryPatchDelta. |
| sub decodeGitBinaryPatchDeltaSize($) |
| { |
| my ($binaryChunk) = @_; |
| |
| # Source and destination buffer sizes are stored in 7-bit chunks at the |
| # start of the binary delta patch data. The highest bit in each byte |
| # except the last is set; the remaining 7 bits provide the next |
| # chunk of the size. The chunks are stored in ascending significance |
| # order. |
| my $cmd; |
| my $size = 0; |
| my $shift = 0; |
| for (my $i = 0; $i < length($binaryChunk);) { |
| $cmd = readByte($binaryChunk, $i++); |
| $size |= ($cmd & 0x7f) << $shift; |
| $shift += 7; |
| if (!($cmd & 0x80)) { |
| return ($size, $i); |
| } |
| } |
| } |
| |
| sub applyGitBinaryPatchDelta($$) |
| { |
| my ($binaryChunk, $originalContents) = @_; |
| |
| # Git delta format consists of two headers indicating source buffer size |
| # and result size, then a series of commands. Each command is either |
| # a copy-from-old-version (the 0x80 bit is set) or a copy-from-delta |
| # command. Commands are applied sequentially to generate the result. |
| # |
| # A copy-from-old-version command encodes an offset and size to copy |
| # from in subsequent bits, while a copy-from-delta command consists only |
| # of the number of bytes to copy from the delta. |
| |
| # We don't use these values, but we need to know how big they are so that |
| # we can skip to the diff data. |
| my ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk); |
| $binaryChunk = substr($binaryChunk, $bytesUsed); |
| ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk); |
| $binaryChunk = substr($binaryChunk, $bytesUsed); |
| |
| my $out = ""; |
| for (my $i = 0; $i < length($binaryChunk); ) { |
| my $cmd = ord(substr($binaryChunk, $i++, 1)); |
| if ($cmd & 0x80) { |
| # Extract an offset and size from the delta data, then copy |
| # $size bytes from $offset in the original data into the output. |
| my $offset = 0; |
| my $size = 0; |
| if ($cmd & 0x01) { $offset = readByte($binaryChunk, $i++); } |
| if ($cmd & 0x02) { $offset |= readByte($binaryChunk, $i++) << 8; } |
| if ($cmd & 0x04) { $offset |= readByte($binaryChunk, $i++) << 16; } |
| if ($cmd & 0x08) { $offset |= readByte($binaryChunk, $i++) << 24; } |
| if ($cmd & 0x10) { $size = readByte($binaryChunk, $i++); } |
| if ($cmd & 0x20) { $size |= readByte($binaryChunk, $i++) << 8; } |
| if ($cmd & 0x40) { $size |= readByte($binaryChunk, $i++) << 16; } |
| if ($size == 0) { $size = 0x10000; } |
| $out .= substr($originalContents, $offset, $size); |
| } elsif ($cmd) { |
| # Copy $cmd bytes from the delta data into the output. |
| $out .= substr($binaryChunk, $i, $cmd); |
| $i += $cmd; |
| } else { |
| die "unexpected delta opcode 0"; |
| } |
| } |
| |
| return $out; |
| } |
| |
| 1; |