| # This Source Code Form is subject to the terms of the Mozilla Public |
| # License, v. 2.0. If a copy of the MPL was not distributed with this |
| # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| # |
| # This Source Code Form is "Incompatible With Secondary Licenses", as |
| # defined by the Mozilla Public License, v. 2.0. |
| |
| package Bugzilla::Migrate::Gnats; |
| |
| use 5.10.1; |
| use strict; |
| use warnings; |
| |
| use parent qw(Bugzilla::Migrate); |
| |
| use Bugzilla::Constants; |
| use Bugzilla::Install::Util qw(indicate_progress); |
| use Bugzilla::Util qw(format_time trim generate_random_password); |
| |
| use Email::Address; |
| use Email::MIME; |
| use File::Basename; |
| use IO::File; |
| use List::MoreUtils qw(firstidx); |
| use List::Util qw(first); |
| |
| use constant REQUIRED_MODULES => [ |
| { |
| package => 'Email-Simple-FromHandle', |
| module => 'Email::Simple::FromHandle', |
| # This version added seekable handles. |
| version => 0.050, |
| }, |
| ]; |
| |
| use constant FIELD_MAP => { |
| 'Number' => 'bug_id', |
| 'Category' => 'product', |
| 'Synopsis' => 'short_desc', |
| 'Responsible' => 'assigned_to', |
| 'State' => 'bug_status', |
| 'Class' => 'cf_type', |
| 'Classification' => '', |
| 'Originator' => 'reporter', |
| 'Arrival-Date' => 'creation_ts', |
| 'Last-Modified' => 'delta_ts', |
| 'Release' => 'version', |
| 'Severity' => 'bug_severity', |
| 'Description' => 'comment', |
| }; |
| |
| use constant VALUE_MAP => { |
| bug_severity => { |
| 'serious' => 'major', |
| 'cosmetic' => 'trivial', |
| 'new-feature' => 'enhancement', |
| 'non-critical' => 'normal', |
| }, |
| bug_status => { |
| 'open' => 'CONFIRMED', |
| 'analyzed' => 'IN_PROGRESS', |
| 'suspended' => 'RESOLVED', |
| 'feedback' => 'RESOLVED', |
| 'released' => 'VERIFIED', |
| }, |
| bug_status_resolution => { |
| 'feedback' => 'FIXED', |
| 'released' => 'FIXED', |
| 'closed' => 'FIXED', |
| 'suspended' => 'LATER', |
| }, |
| priority => { |
| 'medium' => 'Normal', |
| }, |
| }; |
| |
| use constant GNATS_CONFIG_VARS => ( |
| { |
| name => 'gnats_path', |
| default => '/var/lib/gnats', |
| desc => <<END, |
| # The path to the directory that contains the GNATS database. |
| END |
| }, |
| { |
| name => 'default_email_domain', |
| default => 'example.com', |
| desc => <<'END', |
| # Some GNATS users do not have full email addresses, but Bugzilla requires |
| # every user to have an email address. What domain should be appended to |
| # usernames that don't have emails, to make them into email addresses? |
| # (For example, if you leave this at the default, "unknown" would become |
| # "unknown@example.com".) |
| END |
| }, |
| { |
| name => 'component_name', |
| default => 'General', |
| desc => <<'END', |
| # GNATS has only "Category" to classify bugs. However, Bugzilla has a |
| # multi-level system of Products that contain Components. When importing |
| # GNATS categories, they become a Product with one Component. What should |
| # the name of that Component be? |
| END |
| }, |
| { |
| name => 'version_regex', |
| default => '', |
| desc => <<'END', |
| # In GNATS, the "version" field can contain almost anything. However, in |
| # Bugzilla, it's a drop-down, so you don't want too many choices in there. |
| # If you specify a regular expression here, versions will be tested against |
| # this regular expression, and if they match, the first match (the first set |
| # of parentheses in the regular expression, also called "$1") will be used |
| # as the version value for the bug instead of the full version value specified |
| # in GNATS. |
| END |
| }, |
| { |
| name => 'default_originator', |
| default => 'gnats-admin', |
| desc => <<'END', |
| # Sometimes, a PR has no valid Originator, so we fall back to the From |
| # header of the email. If the From header also isn't a valid username |
| # (is just a name with spaces in it--we can't convert that to an email |
| # address) then this username (which can either be a GNATS username or an |
| # email address) will be considered to be the Originator of the PR. |
| END |
| } |
| ); |
| |
| sub CONFIG_VARS { |
| my $self = shift; |
| my @vars = (GNATS_CONFIG_VARS, $self->SUPER::CONFIG_VARS); |
| my $field_map = first { $_->{name} eq 'translate_fields' } @vars; |
| $field_map->{default} = FIELD_MAP; |
| my $value_map = first { $_->{name} eq 'translate_values' } @vars; |
| $value_map->{default} = VALUE_MAP; |
| return @vars; |
| } |
| |
| # Directories that aren't projects, or that we shouldn't be parsing |
| use constant SKIP_DIRECTORIES => qw( |
| gnats-adm |
| gnats-queue |
| pending |
| ); |
| |
| use constant NON_COMMENT_FIELDS => qw( |
| Audit-Trail |
| Closed-Date |
| Confidential |
| Unformatted |
| attachments |
| ); |
| |
| # Certain fields can contain things that look like fields in them, |
| # because they might contain quoted emails. To avoid mis-parsing, |
| # we list out here the exact order of fields at the end of a PR |
| # and wait for the next field to consider that we actually have |
| # a field to parse. |
| use constant END_FIELD_ORDER => qw( |
| Description |
| How-To-Repeat |
| Fix |
| Release-Note |
| Audit-Trail |
| Unformatted |
| ); |
| |
| use constant CUSTOM_FIELDS => { |
| cf_type => { |
| type => FIELD_TYPE_SINGLE_SELECT, |
| description => 'Type', |
| }, |
| }; |
| |
| use constant FIELD_REGEX => qr/^>(\S+):\s*(.*)$/; |
| |
| # Used for bugs that have no Synopsis. |
| use constant NO_SUBJECT => "(no subject)"; |
| |
| # This is the divider that GNATS uses between attachments in its database |
| # files. It's missign two hyphens at the beginning because MIME Emails use |
| # -- to start boundaries. |
| use constant GNATS_BOUNDARY => '----gnatsweb-attachment----'; |
| |
| use constant LONG_VERSION_LENGTH => 32; |
| |
| ######### |
| # Hooks # |
| ######### |
| |
| sub before_insert { |
| my $self = shift; |
| |
| # gnats_id isn't a valid User::create field, and we don't need it |
| # anymore now. |
| delete $_->{gnats_id} foreach @{ $self->users }; |
| |
| # Grab a version out of a bug for each product, so that there is a |
| # valid "version" argument for Bugzilla::Product->create. |
| foreach my $product (@{ $self->products }) { |
| my $bug = first { $_->{product} eq $product->{name} and $_->{version} } |
| @{ $self->bugs }; |
| if (defined $bug) { |
| $product->{version} = $bug->{version}; |
| } |
| else { |
| $product->{version} = 'unspecified'; |
| } |
| } |
| } |
| |
| ######### |
| # Users # |
| ######### |
| |
| sub _read_users { |
| my $self = shift; |
| my $path = $self->config('gnats_path'); |
| my $file = "$path/gnats-adm/responsible"; |
| $self->debug("Reading users from $file"); |
| my $default_domain = $self->config('default_email_domain'); |
| open(my $users_fh, '<', $file) || die "$file: $!"; |
| my @users; |
| foreach my $line (<$users_fh>) { |
| $line = trim($line); |
| next if $line =~ /^#/; |
| my ($id, $name, $email) = split(':', $line, 3); |
| $email ||= "$id\@$default_domain"; |
| # We can't call our own translate_value, because that depends on |
| # the existence of user_map, which doesn't exist until after |
| # this method. However, we still want to translate any users found. |
| $email = $self->SUPER::translate_value('user', $email); |
| push(@users, { realname => $name, login_name => $email, |
| gnats_id => $id }); |
| } |
| close($users_fh); |
| return \@users; |
| } |
| |
| sub user_map { |
| my $self = shift; |
| $self->{user_map} ||= { map { $_->{gnats_id} => $_->{login_name} } |
| @{ $self->users } }; |
| return $self->{user_map}; |
| } |
| |
| sub add_user { |
| my ($self, $id, $email) = @_; |
| return if defined $self->user_map->{$id}; |
| $self->user_map->{$id} = $email; |
| push(@{ $self->users }, { login_name => $email, gnats_id => $id }); |
| } |
| |
| sub user_to_email { |
| my ($self, $value) = @_; |
| if (defined $self->user_map->{$value}) { |
| $value = $self->user_map->{$value}; |
| } |
| elsif ($value !~ /@/) { |
| my $domain = $self->config('default_email_domain'); |
| $value = "$value\@$domain"; |
| } |
| return $value; |
| } |
| |
| ############ |
| # Products # |
| ############ |
| |
| sub _read_products { |
| my $self = shift; |
| my $path = $self->config('gnats_path'); |
| my $file = "$path/gnats-adm/categories"; |
| $self->debug("Reading categories from $file"); |
| |
| open(my $categories_fh, '<', $file) || die "$file: $!"; |
| my @products; |
| foreach my $line (<$categories_fh>) { |
| $line = trim($line); |
| next if $line =~ /^#/; |
| my ($name, $description, $assigned_to, $cc) = split(':', $line, 4); |
| my %product = ( name => $name, description => $description ); |
| |
| my @initial_cc = split(',', $cc); |
| @initial_cc = @{ $self->translate_value('user', \@initial_cc) }; |
| $assigned_to = $self->translate_value('user', $assigned_to); |
| my %component = ( name => $self->config('component_name'), |
| description => $description, |
| initialowner => $assigned_to, |
| initial_cc => \@initial_cc ); |
| $product{components} = [\%component]; |
| push(@products, \%product); |
| } |
| close($categories_fh); |
| return \@products; |
| } |
| |
| ################ |
| # Reading Bugs # |
| ################ |
| |
| sub _read_bugs { |
| my $self = shift; |
| my $path = $self->config('gnats_path'); |
| my @directories = glob("$path/*"); |
| my @bugs; |
| foreach my $directory (@directories) { |
| next if !-d $directory; |
| my $name = basename($directory); |
| next if grep($_ eq $name, SKIP_DIRECTORIES); |
| push(@bugs, @{ $self->_parse_project($directory) }); |
| } |
| @bugs = sort { $a->{Number} <=> $b->{Number} } @bugs; |
| return \@bugs; |
| } |
| |
| sub _parse_project { |
| my ($self, $directory) = @_; |
| my @files = glob("$directory/*"); |
| |
| $self->debug("Reading Project: $directory"); |
| # Sometimes other files get into gnats directories. |
| @files = grep { basename($_) =~ /^\d+$/ } @files; |
| my @bugs; |
| my $count = 1; |
| my $total = scalar @files; |
| print basename($directory) . ":\n"; |
| foreach my $file (@files) { |
| push(@bugs, $self->_parse_bug_file($file)); |
| if (!$self->verbose) { |
| indicate_progress({ current => $count++, every => 5, |
| total => $total }); |
| } |
| } |
| return \@bugs; |
| } |
| |
| sub _parse_bug_file { |
| my ($self, $file) = @_; |
| $self->debug("Reading $file"); |
| open(my $fh, "<", $file) || die "$file: $!"; |
| my $email = Email::Simple::FromHandle->new($fh); |
| my $fields = $self->_get_gnats_field_data($email); |
| # We parse attachments here instead of during translate_bug, |
| # because otherwise we'd be taking up huge amounts of memory storing |
| # all the raw attachment data in memory. |
| $fields->{attachments} = $self->_parse_attachments($fields); |
| close($fh); |
| return $fields; |
| } |
| |
| sub _get_gnats_field_data { |
| my ($self, $email) = @_; |
| my ($current_field, @value_lines, %fields); |
| $email->reset_handle(); |
| my $handle = $email->handle; |
| foreach my $line (<$handle>) { |
| # If this line starts a field name |
| if ($line =~ FIELD_REGEX) { |
| my ($new_field, $rest_of_line) = ($1, $2); |
| |
| # If this is one of the last few PR fields, then make sure |
| # that we're getting our fields in the right order. |
| my $new_field_valid = 1; |
| my $search_for = $current_field || ''; |
| my $current_field_pos = firstidx { $_ eq $search_for } |
| END_FIELD_ORDER; |
| if ($current_field_pos > -1) { |
| my $new_field_pos = firstidx { $_ eq $new_field } |
| END_FIELD_ORDER; |
| # We accept any field, as long as it's later than this one. |
| $new_field_valid = $new_field_pos > $current_field_pos ? 1 : 0; |
| } |
| |
| if ($new_field_valid) { |
| if ($current_field) { |
| $fields{$current_field} = _handle_lines(\@value_lines); |
| @value_lines = (); |
| } |
| $current_field = $new_field; |
| $line = $rest_of_line; |
| } |
| } |
| push(@value_lines, $line) if defined $line; |
| } |
| $fields{$current_field} = _handle_lines(\@value_lines); |
| $fields{cc} = [$email->header('Cc')] if $email->header('Cc'); |
| |
| # If the Originator is invalid and we don't have a translation for it, |
| # use the From header instead. |
| my $originator = $self->translate_value('reporter', $fields{Originator}, |
| { check_only => 1 }); |
| if ($originator !~ Bugzilla->params->{emailregexp}) { |
| # We use the raw header sometimes, because it looks like "From: user" |
| # which Email::Address won't parse but we can still use. |
| my $address = $email->header('From'); |
| my ($parsed) = Email::Address->parse($address); |
| if ($parsed) { |
| $address = $parsed->address; |
| } |
| if ($address) { |
| $self->debug( |
| "PR $fields{Number} had an Originator that was not a valid" |
| . " user ($fields{Originator}). Using From ($address)" |
| . " instead.\n"); |
| my $address_email = $self->translate_value('reporter', $address, |
| { check_only => 1 }); |
| if ($address_email !~ Bugzilla->params->{emailregexp}) { |
| $self->debug(" From was also invalid, using default_originator.\n"); |
| $address = $self->config('default_originator'); |
| } |
| $fields{Originator} = $address; |
| } |
| } |
| |
| $self->debug(\%fields, 3); |
| return \%fields; |
| } |
| |
| sub _handle_lines { |
| my ($lines) = @_; |
| my $value = join('', @$lines); |
| $value =~ s/\s+$//; |
| return $value; |
| } |
| |
| #################### |
| # Translating Bugs # |
| #################### |
| |
| sub translate_bug { |
| my ($self, $fields) = @_; |
| |
| my ($bug, $other_fields) = $self->SUPER::translate_bug($fields); |
| |
| $bug->{attachments} = delete $other_fields->{attachments}; |
| |
| if (defined $other_fields->{_add_to_comment}) { |
| $bug->{comment} .= delete $other_fields->{_add_to_comment}; |
| } |
| |
| my ($changes, $extra_comment) = |
| $self->_parse_audit_trail($bug, $other_fields->{'Audit-Trail'}); |
| |
| my @comments; |
| foreach my $change (@$changes) { |
| if (exists $change->{comment}) { |
| push(@comments, { |
| thetext => $change->{comment}, |
| who => $change->{who}, |
| bug_when => $change->{bug_when} }); |
| delete $change->{comment}; |
| } |
| } |
| $bug->{history} = $changes; |
| |
| if (trim($extra_comment)) { |
| push(@comments, { thetext => $extra_comment, who => $bug->{reporter}, |
| bug_when => $bug->{delta_ts} || $bug->{creation_ts} }); |
| } |
| $bug->{comments} = \@comments; |
| |
| $bug->{component} = $self->config('component_name'); |
| if (!$bug->{short_desc}) { |
| $bug->{short_desc} = NO_SUBJECT; |
| } |
| |
| foreach my $attachment (@{ $bug->{attachments} || [] }) { |
| $attachment->{submitter} = $bug->{reporter}; |
| $attachment->{creation_ts} = $bug->{creation_ts}; |
| } |
| |
| $self->debug($bug, 3); |
| return $bug; |
| } |
| |
| sub _parse_audit_trail { |
| my ($self, $bug, $audit_trail) = @_; |
| return [] if !trim($audit_trail); |
| $self->debug(" Parsing audit trail...", 2); |
| |
| if ($audit_trail !~ /^\S+-Changed-\S+:/ms) { |
| # This is just a comment from the bug's creator. |
| $self->debug(" Audit trail is just a comment.", 2); |
| return ([], $audit_trail); |
| } |
| |
| my (@changes, %current_data, $current_column, $on_why); |
| my $extra_comment = ''; |
| my $current_field; |
| my @all_lines = split("\n", $audit_trail); |
| foreach my $line (@all_lines) { |
| # GNATS history looks like: |
| # Status-Changed-From-To: open->closed |
| # Status-Changed-By: jack |
| # Status-Changed-When: Mon May 12 14:46:59 2003 |
| # Status-Changed-Why: |
| # This is some comment here about the change. |
| if ($line =~ /^(\S+)-Changed-(\S+):(.*)/) { |
| my ($field, $column, $value) = ($1, $2, $3); |
| my $bz_field = $self->translate_field($field); |
| # If it's not a field we're importing, we don't care about |
| # its history. |
| next if !$bz_field; |
| # GNATS doesn't track values for description changes, |
| # unfortunately, and that's the only information we'd be able to |
| # use in Bugzilla for the audit trail on that field. |
| next if $bz_field eq 'comment'; |
| $current_field = $bz_field if !$current_field; |
| if ($bz_field ne $current_field) { |
| $self->_store_audit_change( |
| \@changes, $current_field, \%current_data); |
| %current_data = (); |
| $current_field = $bz_field; |
| } |
| $value = trim($value); |
| $self->debug(" $bz_field $column: $value", 3); |
| if ($column eq 'From-To') { |
| my ($from, $to) = split('->', $value, 2); |
| # Sometimes there's just a - instead of a -> between the values. |
| if (!defined($to)) { |
| ($from, $to) = split('-', $value, 2); |
| } |
| $current_data{added} = $to; |
| $current_data{removed} = $from; |
| } |
| elsif ($column eq 'By') { |
| my $email = $self->translate_value('user', $value); |
| # Sometimes we hit users in the audit trail that we haven't |
| # seen anywhere else. |
| $current_data{who} = $email; |
| } |
| elsif ($column eq 'When') { |
| $current_data{bug_when} = $self->parse_date($value); |
| } |
| if ($column eq 'Why') { |
| $value = '' if !defined $value; |
| $current_data{comment} = $value; |
| $on_why = 1; |
| } |
| else { |
| $on_why = 0; |
| } |
| } |
| elsif ($on_why) { |
| # "Why" lines are indented four characters. |
| $line =~ s/^\s{4}//; |
| $current_data{comment} .= "$line\n"; |
| } |
| else { |
| $self->debug( |
| "Extra Audit-Trail line on $bug->{product} $bug->{bug_id}:" |
| . " $line\n", 2); |
| $extra_comment .= "$line\n"; |
| } |
| } |
| $self->_store_audit_change(\@changes, $current_field, \%current_data); |
| return (\@changes, $extra_comment); |
| } |
| |
| sub _store_audit_change { |
| my ($self, $changes, $old_field, $current_data) = @_; |
| |
| $current_data->{field} = $old_field; |
| $current_data->{removed} = |
| $self->translate_value($old_field, $current_data->{removed}); |
| $current_data->{added} = |
| $self->translate_value($old_field, $current_data->{added}); |
| push(@$changes, { %$current_data }); |
| } |
| |
| sub _parse_attachments { |
| my ($self, $fields) = @_; |
| my $unformatted = delete $fields->{'Unformatted'}; |
| my $gnats_boundary = GNATS_BOUNDARY; |
| # A sanity checker to make sure that we're parsing attachments right. |
| my $num_attachments = 0; |
| $num_attachments++ while ($unformatted =~ /\Q$gnats_boundary\E/g); |
| # Sometimes there's a GNATS_BOUNDARY that is on the same line as other data. |
| $unformatted =~ s/(\S\s*)\Q$gnats_boundary\E$/$1\n$gnats_boundary/mg; |
| # Often the "Unformatted" section starts with stuff before |
| # ----gnatsweb-attachment---- that isn't necessary. |
| $unformatted =~ s/^\s*From:.+?Reply-to:[^\n]+//s; |
| $unformatted = trim($unformatted); |
| return [] if !$unformatted; |
| $self->debug('Reading attachments...', 2); |
| my $boundary = generate_random_password(48); |
| $unformatted =~ s/\Q$gnats_boundary\E/--$boundary/g; |
| # Sometimes the whole Unformatted section is indented by exactly |
| # one space, and needs to be fixed. |
| if ($unformatted =~ /--\Q$boundary\E\n /) { |
| $unformatted =~ s/^ //mg; |
| } |
| $unformatted = <<END; |
| From: nobody |
| MIME-Version: 1.0 |
| Content-Type: multipart/mixed; boundary="$boundary" |
| |
| This is a multi-part message in MIME format. |
| --$boundary |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 7bit |
| |
| $unformatted |
| --$boundary-- |
| END |
| my $email = new Email::MIME(\$unformatted); |
| my @parts = $email->parts; |
| # Remove the fake body. |
| my $part1 = shift @parts; |
| if ($part1->body) { |
| $self->debug(" Additional Unformatted data found on " |
| . $fields->{Category} . " bug " . $fields->{Number}); |
| $self->debug($part1->body, 3); |
| $fields->{_add_comment} .= "\n\nUnformatted:\n" . $part1->body; |
| } |
| |
| my @attachments; |
| foreach my $part (@parts) { |
| $self->debug(' Parsing attachment: ' . $part->filename); |
| my $temp_fh = IO::File->new_tmpfile or die ("Can't create tempfile: $!"); |
| $temp_fh->binmode; |
| print $temp_fh $part->body; |
| my $content_type = $part->content_type; |
| $content_type =~ s/; name=.+$//; |
| my $attachment = { filename => $part->filename, |
| description => $part->filename, |
| mimetype => $content_type, |
| data => $temp_fh }; |
| $self->debug($attachment, 3); |
| push(@attachments, $attachment); |
| } |
| |
| if (scalar(@attachments) ne $num_attachments) { |
| warn "WARNING: Expected $num_attachments attachments but got " |
| . scalar(@attachments) . "\n" ; |
| $self->debug($unformatted, 3); |
| } |
| return \@attachments; |
| } |
| |
| sub translate_value { |
| my $self = shift; |
| my ($field, $value, $options) = @_; |
| my $original_value = $value; |
| $options ||= {}; |
| |
| if (!ref($value) and grep($_ eq $field, $self->USER_FIELDS)) { |
| if ($value =~ /(\S+\@\S+)/) { |
| $value = $1; |
| $value =~ s/^<//; |
| $value =~ s/>$//; |
| } |
| else { |
| # Sometimes names have extra stuff on the end like "(Somebody's Name)" |
| $value =~ s/\s+\(.+\)$//; |
| # Sometimes user fields look like "(user)" instead of just "user". |
| $value =~ s/^\((.+)\)$/$1/; |
| $value = trim($value); |
| } |
| } |
| |
| if ($field eq 'version' and $value ne '') { |
| my $version_re = $self->config('version_regex'); |
| if ($version_re and $value =~ $version_re) { |
| $value = $1; |
| } |
| # In the GNATS that I tested this with, there were many extremely long |
| # values for "version" that caused some import problems (they were |
| # longer than the max allowed version value). So if the version value |
| # is longer than 32 characters, pull out the first thing that looks |
| # like a version number. |
| elsif (length($value) > LONG_VERSION_LENGTH) { |
| $value =~ s/^.+?\b(\d[\w\.]+)\b.+$/$1/; |
| } |
| } |
| |
| my @args = @_; |
| $args[1] = $value; |
| |
| $value = $self->SUPER::translate_value(@args); |
| return $value if ref $value; |
| |
| if (grep($_ eq $field, $self->USER_FIELDS)) { |
| my $from_value = $value; |
| $value = $self->user_to_email($value); |
| $args[1] = $value; |
| # If we got something new from user_to_email, do any necessary |
| # translation of it. |
| $value = $self->SUPER::translate_value(@args); |
| if (!$options->{check_only}) { |
| $self->add_user($from_value, $value); |
| } |
| } |
| |
| return $value; |
| } |
| |
| 1; |
| |
| =head1 B<Methods in need of POD> |
| |
| =over |
| |
| =item user_map |
| |
| =item user_to_email |
| |
| =item add_user |
| |
| =item translate_value |
| |
| =item before_insert |
| |
| =item translate_bug |
| |
| =item CONFIG_VARS |
| |
| =back |