Websites/bugs.webkit.org/Bugzilla/Migrate/Gnats.pm - WebKit - Git at Google

 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #
 # This Source Code Form is "Incompatible With Secondary Licenses", as
 # defined by the Mozilla Public License, v. 2.0.

 package Bugzilla::Migrate::Gnats;

 use 5.10.1;
 use strict;
 use warnings;

 use parent qw(Bugzilla::Migrate);

 use Bugzilla::Constants;
 use Bugzilla::Install::Util qw(indicate_progress);
 use Bugzilla::Util qw(format_time trim generate_random_password);

 use Email::Address;
 use Email::MIME;
 use File::Basename;
 use IO::File;
 use List::MoreUtils qw(firstidx);
 use List::Util qw(first);

 use constant REQUIRED_MODULES => [
     {
         package => 'Email-Simple-FromHandle',
         module  => 'Email::Simple::FromHandle',
         # This version added seekable handles.
         version => 0.050,
     },
 ];

 use constant FIELD_MAP => {
     'Number'         => 'bug_id',
     'Category'       => 'product',
     'Synopsis'       => 'short_desc',
     'Responsible'    => 'assigned_to',
     'State'          => 'bug_status',
     'Class'          => 'cf_type',
     'Classification' => '',
     'Originator'     => 'reporter',
     'Arrival-Date'   => 'creation_ts',
     'Last-Modified'  => 'delta_ts',
     'Release'        => 'version',
     'Severity'       => 'bug_severity',
     'Description'    => 'comment',
 };

 use constant VALUE_MAP => {
     bug_severity => {
         'serious'      => 'major',
         'cosmetic'     => 'trivial',
         'new-feature'  => 'enhancement',
         'non-critical' => 'normal',
     },
     bug_status => {
         'open'      => 'CONFIRMED',
         'analyzed'  => 'IN_PROGRESS',
         'suspended' => 'RESOLVED',
         'feedback'  => 'RESOLVED',
         'released'  => 'VERIFIED',
     },
     bug_status_resolution => {
         'feedback'  => 'FIXED',
         'released'  => 'FIXED',
         'closed'    => 'FIXED',
         'suspended' => 'LATER',
     },
     priority => {
         'medium' => 'Normal',
     },
 };

 use constant GNATS_CONFIG_VARS => (
     {
         name    => 'gnats_path',
         default => '/var/lib/gnats',
         desc    => <<END,
 # The path to the directory that contains the GNATS database.
 END
     },
     {
         name    => 'default_email_domain',
         default => 'example.com',
         desc    => <<'END',
 # Some GNATS users do not have full email addresses, but Bugzilla requires
 # every user to have an email address. What domain should be appended to
 # usernames that don't have emails, to make them into email addresses?
 # (For example, if you leave this at the default, "unknown" would become
 # "unknown@example.com".)
 END
     },
     {
         name    => 'component_name',
         default => 'General',
         desc    => <<'END',
 # GNATS has only "Category" to classify bugs. However, Bugzilla has a
 # multi-level system of Products that contain Components. When importing
 # GNATS categories, they become a Product with one Component. What should
 # the name of that Component be?
 END
     },
     {
         name    => 'version_regex',
         default => '',
         desc    => <<'END',
 # In GNATS, the "version" field can contain almost anything. However, in
 # Bugzilla, it's a drop-down, so you don't want too many choices in there.
 # If you specify a regular expression here, versions will be tested against
 # this regular expression, and if they match, the first match (the first set
 # of parentheses in the regular expression, also called "$1") will be used
 # as the version value for the bug instead of the full version value specified
 # in GNATS.
 END
     },
     {
         name    => 'default_originator',
         default => 'gnats-admin',
         desc    => <<'END',
 # Sometimes, a PR has no valid Originator, so we fall back to the From
 # header of the email. If the From header also isn't a valid username
 # (is just a name with spaces in it--we can't convert that to an email
 # address) then this username (which can either be a GNATS username or an
 # email address) will be considered to be the Originator of the PR.
 END
     }
 );

 sub CONFIG_VARS {
     my $self = shift;
     my @vars = (GNATS_CONFIG_VARS, $self->SUPER::CONFIG_VARS);
     my $field_map = first { $_->{name} eq 'translate_fields' } @vars;
     $field_map->{default} = FIELD_MAP;
     my $value_map = first { $_->{name} eq 'translate_values' } @vars;
     $value_map->{default} = VALUE_MAP;
     return @vars;
 }

 # Directories that aren't projects, or that we shouldn't be parsing
 use constant SKIP_DIRECTORIES => qw(
     gnats-adm
     gnats-queue
     pending
 );

 use constant NON_COMMENT_FIELDS => qw(
     Audit-Trail
     Closed-Date
     Confidential
     Unformatted
     attachments
 );

 # Certain fields can contain things that look like fields in them,
 # because they might contain quoted emails. To avoid mis-parsing,
 # we list out here the exact order of fields at the end of a PR
 # and wait for the next field to consider that we actually have
 # a field to parse.
 use constant END_FIELD_ORDER => qw(
     Description
     How-To-Repeat
     Fix
     Release-Note
     Audit-Trail
     Unformatted
 );

 use constant CUSTOM_FIELDS => {
     cf_type => {
         type        => FIELD_TYPE_SINGLE_SELECT,
         description => 'Type',
     },
 };

 use constant FIELD_REGEX => qr/^>(\S+):\s*(.*)$/;

 # Used for bugs that have no Synopsis.
 use constant NO_SUBJECT => "(no subject)";

 # This is the divider that GNATS uses between attachments in its database
 # files. It's missign two hyphens at the beginning because MIME Emails use
 # -- to start boundaries.
 use constant GNATS_BOUNDARY => '----gnatsweb-attachment----';

 use constant LONG_VERSION_LENGTH => 32;

 #########
 # Hooks #
 #########

 sub before_insert {
     my $self = shift;

     # gnats_id isn't a valid User::create field, and we don't need it
     # anymore now.
     delete $_->{gnats_id} foreach @{ $self->users };

     # Grab a version out of a bug for each product, so that there is a
     # valid "version" argument for Bugzilla::Product->create.
     foreach my $product (@{ $self->products }) {
         my $bug = first { $_->{product} eq $product->{name} and $_->{version} }
                         @{ $self->bugs };
         if (defined $bug) {
             $product->{version} = $bug->{version};
         }
         else {
             $product->{version} = 'unspecified';
         }
     }
 }

 #########
 # Users #
 #########

 sub _read_users {
     my $self = shift;
     my $path = $self->config('gnats_path');
     my $file =  "$path/gnats-adm/responsible";
     $self->debug("Reading users from $file");
     my $default_domain = $self->config('default_email_domain');
     open(my $users_fh, '<', $file) || die "$file: $!";
     my @users;
     foreach my $line (<$users_fh>) {
         $line = trim($line);
         next if $line =~ /^#/;
         my ($id, $name, $email) = split(':', $line, 3);
         $email ||= "$id\@$default_domain";
         # We can't call our own translate_value, because that depends on
         # the existence of user_map, which doesn't exist until after
         # this method. However, we still want to translate any users found.
         $email = $self->SUPER::translate_value('user', $email);
         push(@users, { realname => $name, login_name => $email,
                        gnats_id => $id });
     }
     close($users_fh);
     return \@users;
 }

 sub user_map {
     my $self = shift;
     $self->{user_map} ||= { map { $_->{gnats_id} => $_->{login_name} }
                                 @{ $self->users } };
     return $self->{user_map};
 }

 sub add_user {
     my ($self, $id, $email) = @_;
     return if defined $self->user_map->{$id};
     $self->user_map->{$id} = $email;
     push(@{ $self->users }, { login_name => $email, gnats_id => $id });
 }

 sub user_to_email {
     my ($self, $value) = @_;
     if (defined $self->user_map->{$value}) {
         $value = $self->user_map->{$value};
     }
     elsif ($value !~ /@/) {
         my $domain = $self->config('default_email_domain');
         $value = "$value\@$domain";
     }
     return $value;
 }

 ############
 # Products #
 ############

 sub _read_products {
     my $self = shift;
     my $path = $self->config('gnats_path');
     my $file =  "$path/gnats-adm/categories";
     $self->debug("Reading categories from $file");

     open(my $categories_fh, '<', $file) || die "$file: $!";
     my @products;
     foreach my $line (<$categories_fh>) {
         $line = trim($line);
         next if $line =~ /^#/;
         my ($name, $description, $assigned_to, $cc) = split(':', $line, 4);
         my %product = ( name => $name, description => $description );

         my @initial_cc = split(',', $cc);
         @initial_cc = @{ $self->translate_value('user', \@initial_cc) };
         $assigned_to = $self->translate_value('user', $assigned_to);
         my %component = ( name         => $self->config('component_name'),
                           description  => $description,
                           initialowner => $assigned_to,
                           initial_cc   => \@initial_cc );
         $product{components} = [\%component];
         push(@products, \%product);
     }
     close($categories_fh);
     return \@products;
 }

 ################
 # Reading Bugs #
 ################

 sub _read_bugs {
     my $self = shift;
     my $path = $self->config('gnats_path');
     my @directories = glob("$path/*");
     my @bugs;
     foreach my $directory (@directories) {
         next if !-d $directory;
         my $name = basename($directory);
         next if grep($_ eq $name, SKIP_DIRECTORIES);
         push(@bugs, @{ $self->_parse_project($directory) });
     }
     @bugs = sort { $a->{Number} <=> $b->{Number} } @bugs;
     return \@bugs;
 }

 sub _parse_project {
     my ($self, $directory) = @_;
     my @files = glob("$directory/*");

     $self->debug("Reading Project: $directory");
     # Sometimes other files get into gnats directories.
     @files = grep { basename($_) =~ /^\d+$/ } @files;
     my @bugs;
     my $count = 1;
     my $total = scalar @files;
     print basename($directory) . ":\n";
     foreach my $file (@files) {
         push(@bugs, $self->_parse_bug_file($file));
         if (!$self->verbose) {
             indicate_progress({ current => $count++, every => 5,
                                 total => $total });
         }
     }
     return \@bugs;
 }

 sub _parse_bug_file {
     my ($self, $file) = @_;
     $self->debug("Reading $file");
     open(my $fh, "<", $file) || die "$file: $!";
     my $email = Email::Simple::FromHandle->new($fh);
     my $fields = $self->_get_gnats_field_data($email);
     # We parse attachments here instead of during translate_bug,
     # because otherwise we'd be taking up huge amounts of memory storing
     # all the raw attachment data in memory.
     $fields->{attachments} = $self->_parse_attachments($fields);
     close($fh);
     return $fields;
 }

 sub _get_gnats_field_data {
     my ($self, $email) = @_;
     my ($current_field, @value_lines, %fields);
     $email->reset_handle();
     my $handle = $email->handle;
     foreach my $line (<$handle>) {
         # If this line starts a field name
         if ($line =~ FIELD_REGEX) {
             my ($new_field, $rest_of_line) = ($1, $2);

             # If this is one of the last few PR fields, then make sure
             # that we're getting our fields in the right order.
             my $new_field_valid = 1;
             my $search_for = $current_field || '';
             my $current_field_pos = firstidx { $_ eq $search_for }
                                              END_FIELD_ORDER;
             if ($current_field_pos > -1) {
                 my $new_field_pos = firstidx { $_ eq $new_field }
                                              END_FIELD_ORDER;
                 # We accept any field, as long as it's later than this one.
                 $new_field_valid = $new_field_pos > $current_field_pos ? 1 : 0;
             }

             if ($new_field_valid) {
                 if ($current_field) {
                     $fields{$current_field} = _handle_lines(\@value_lines);
                     @value_lines = ();
                 }
                 $current_field = $new_field;
                 $line = $rest_of_line;
             }
         }
         push(@value_lines, $line) if defined $line;
     }
     $fields{$current_field} = _handle_lines(\@value_lines);
     $fields{cc} = [$email->header('Cc')] if $email->header('Cc');

     # If the Originator is invalid and we don't have a translation for it,
     # use the From header instead.
     my $originator = $self->translate_value('reporter', $fields{Originator},
                                             { check_only => 1 });
     if ($originator !~ Bugzilla->params->{emailregexp}) {
         # We use the raw header sometimes, because it looks like "From: user"
         # which Email::Address won't parse but we can still use.
         my $address = $email->header('From');
         my ($parsed) = Email::Address->parse($address);
         if ($parsed) {
             $address = $parsed->address;
         }
         if ($address) {
             $self->debug(
                 "PR $fields{Number} had an Originator that was not a valid"
                 . " user ($fields{Originator}). Using From ($address)"
                 . " instead.\n");
             my $address_email = $self->translate_value('reporter', $address,
                                                        { check_only => 1 });
             if ($address_email !~ Bugzilla->params->{emailregexp}) {
                 $self->debug(" From was also invalid, using default_originator.\n");
                 $address = $self->config('default_originator');
             }
             $fields{Originator} = $address;
         }
     }

     $self->debug(\%fields, 3);
     return \%fields;
 }

 sub _handle_lines {
     my ($lines) = @_;
     my $value = join('', @$lines);
     $value =~ s/\s+$//;
     return $value;
 }

 ####################
 # Translating Bugs #
 ####################

 sub translate_bug {
     my ($self, $fields) = @_;

     my ($bug, $other_fields) = $self->SUPER::translate_bug($fields);

     $bug->{attachments} = delete $other_fields->{attachments};

     if (defined $other_fields->{_add_to_comment}) {
         $bug->{comment} .= delete $other_fields->{_add_to_comment};
     }

     my ($changes, $extra_comment) =
         $self->_parse_audit_trail($bug, $other_fields->{'Audit-Trail'});

     my @comments;
     foreach my $change (@$changes) {
         if (exists $change->{comment}) {
             push(@comments, {
                 thetext  => $change->{comment},
                 who      => $change->{who},
                 bug_when => $change->{bug_when} });
             delete $change->{comment};
         }
     }
     $bug->{history}  = $changes;

     if (trim($extra_comment)) {
         push(@comments, { thetext => $extra_comment, who => $bug->{reporter},
                           bug_when => $bug->{delta_ts} || $bug->{creation_ts} });
     }
     $bug->{comments} = \@comments;

     $bug->{component} = $self->config('component_name');
     if (!$bug->{short_desc}) {
         $bug->{short_desc} = NO_SUBJECT;
     }

     foreach my $attachment (@{ $bug->{attachments} || [] }) {
         $attachment->{submitter} = $bug->{reporter};
         $attachment->{creation_ts} = $bug->{creation_ts};
     }

     $self->debug($bug, 3);
     return $bug;
 }

 sub _parse_audit_trail {
     my ($self, $bug, $audit_trail) = @_;
     return [] if !trim($audit_trail);
     $self->debug(" Parsing audit trail...", 2);

     if ($audit_trail !~ /^\S+-Changed-\S+:/ms) {
         # This is just a comment from the bug's creator.
         $self->debug("  Audit trail is just a comment.", 2);
         return ([], $audit_trail);
     }

     my (@changes, %current_data, $current_column, $on_why);
     my $extra_comment = '';
     my $current_field;
     my @all_lines = split("\n", $audit_trail);
     foreach my $line (@all_lines) {
         # GNATS history looks like:
         # Status-Changed-From-To: open->closed
         # Status-Changed-By: jack
         # Status-Changed-When: Mon May 12 14:46:59 2003
         # Status-Changed-Why:
         #     This is some comment here about the change.
         if ($line =~ /^(\S+)-Changed-(\S+):(.*)/) {
             my ($field, $column, $value) = ($1, $2, $3);
             my $bz_field = $self->translate_field($field);
             # If it's not a field we're importing, we don't care about
             # its history.
             next if !$bz_field;
             # GNATS doesn't track values for description changes,
             # unfortunately, and that's the only information we'd be able to
             # use in Bugzilla for the audit trail on that field.
             next if $bz_field eq 'comment';
             $current_field = $bz_field if !$current_field;
             if ($bz_field ne $current_field) {
                 $self->_store_audit_change(
                     \@changes, $current_field, \%current_data);
                 %current_data = ();
                 $current_field = $bz_field;
             }
             $value = trim($value);
             $self->debug("  $bz_field $column: $value", 3);
             if ($column eq 'From-To') {
                 my ($from, $to) = split('->', $value, 2);
                 # Sometimes there's just a - instead of a -> between the values.
                 if (!defined($to)) {
                     ($from, $to) = split('-', $value, 2);
                 }
                 $current_data{added} = $to;
                 $current_data{removed} = $from;
             }
             elsif ($column eq 'By') {
                 my $email = $self->translate_value('user', $value);
                 # Sometimes we hit users in the audit trail that we haven't
                 # seen anywhere else.
                 $current_data{who} = $email;
             }
             elsif ($column eq 'When') {
                 $current_data{bug_when} = $self->parse_date($value);
             }
             if ($column eq 'Why') {
                 $value = '' if !defined $value;
                 $current_data{comment} = $value;
                 $on_why = 1;
             }
             else {
                 $on_why = 0;
             }
         }
         elsif ($on_why) {
             # "Why" lines are indented four characters.
             $line =~ s/^\s{4}//;
             $current_data{comment} .= "$line\n";
         }
         else {
             $self->debug(
                 "Extra Audit-Trail line on $bug->{product} $bug->{bug_id}:"
                  . " $line\n", 2);
             $extra_comment .= "$line\n";
         }
     }
     $self->_store_audit_change(\@changes, $current_field, \%current_data);
     return (\@changes, $extra_comment);
 }

 sub _store_audit_change {
     my ($self, $changes, $old_field, $current_data) = @_;

     $current_data->{field} = $old_field;
     $current_data->{removed} =
         $self->translate_value($old_field, $current_data->{removed});
     $current_data->{added} =
         $self->translate_value($old_field, $current_data->{added});
     push(@$changes, { %$current_data });
 }

 sub _parse_attachments {
     my ($self, $fields) = @_;
     my $unformatted = delete $fields->{'Unformatted'};
     my $gnats_boundary = GNATS_BOUNDARY;
     # A sanity checker to make sure that we're parsing attachments right.
     my $num_attachments = 0;
     $num_attachments++ while ($unformatted =~ /\Q$gnats_boundary\E/g);
     # Sometimes there's a GNATS_BOUNDARY that is on the same line as other data.
     $unformatted =~ s/(\S\s*)\Q$gnats_boundary\E$/$1\n$gnats_boundary/mg;
     # Often the "Unformatted" section starts with stuff before
     # ----gnatsweb-attachment---- that isn't necessary.
     $unformatted =~ s/^\s*From:.+?Reply-to:[^\n]+//s;
     $unformatted = trim($unformatted);
     return [] if !$unformatted;
     $self->debug('Reading attachments...', 2);
     my $boundary = generate_random_password(48);
     $unformatted =~ s/\Q$gnats_boundary\E/--$boundary/g;
     # Sometimes the whole Unformatted section is indented by exactly
     # one space, and needs to be fixed.
     if ($unformatted =~ /--\Q$boundary\E\n /) {
         $unformatted =~ s/^ //mg;
     }
     $unformatted = <<END;
 From: nobody
 MIME-Version: 1.0
 Content-Type: multipart/mixed; boundary="$boundary"

 This is a multi-part message in MIME format.
 --$boundary
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 7bit

 $unformatted
 --$boundary--
 END
     my $email = new Email::MIME(\$unformatted);
     my @parts = $email->parts;
     # Remove the fake body.
     my $part1 = shift @parts;
     if ($part1->body) {
         $self->debug(" Additional Unformatted data found on "
                      . $fields->{Category} . " bug " . $fields->{Number});
         $self->debug($part1->body, 3);
         $fields->{_add_comment} .= "\n\nUnformatted:\n" . $part1->body;
     }

     my @attachments;
     foreach my $part (@parts) {
         $self->debug('  Parsing attachment: ' . $part->filename);
         my $temp_fh = IO::File->new_tmpfile or die ("Can't create tempfile: $!");
         $temp_fh->binmode;
         print $temp_fh $part->body;
         my $content_type = $part->content_type;
         $content_type =~ s/; name=.+$//;
         my $attachment = { filename    => $part->filename,
                            description => $part->filename,
                            mimetype    => $content_type,
                            data        => $temp_fh };
         $self->debug($attachment, 3);
         push(@attachments, $attachment);
     }

     if (scalar(@attachments) ne $num_attachments) {
         warn "WARNING: Expected $num_attachments attachments but got "
              . scalar(@attachments) . "\n" ;
         $self->debug($unformatted, 3);
     }
     return \@attachments;
 }

 sub translate_value {
     my $self = shift;
     my ($field, $value, $options) = @_;
     my $original_value = $value;
     $options ||= {};

     if (!ref($value) and grep($_ eq $field, $self->USER_FIELDS)) {
         if ($value =~ /(\S+\@\S+)/) {
             $value = $1;
             $value =~ s/^<//;
             $value =~ s/>$//;
         }
         else {
             # Sometimes names have extra stuff on the end like "(Somebody's Name)"
             $value =~ s/\s+\(.+\)$//;
             # Sometimes user fields look like "(user)" instead of just "user".
             $value =~ s/^\((.+)\)$/$1/;
             $value = trim($value);
         }
     }

     if ($field eq 'version' and $value ne '') {
         my $version_re = $self->config('version_regex');
         if ($version_re and $value =~ $version_re) {
             $value = $1;
         }
         # In the GNATS that I tested this with, there were many extremely long
         # values for "version" that caused some import problems (they were
         # longer than the max allowed version value). So if the version value
         # is longer than 32 characters, pull out the first thing that looks
         # like a version number.
         elsif (length($value) > LONG_VERSION_LENGTH) {
             $value =~ s/^.+?\b(\d[\w\.]+)\b.+$/$1/;
         }
     }

     my @args = @_;
     $args[1] = $value;

     $value = $self->SUPER::translate_value(@args);
     return $value if ref $value;

     if (grep($_ eq $field, $self->USER_FIELDS)) {
         my $from_value = $value;
         $value = $self->user_to_email($value);
         $args[1] = $value;
         # If we got something new from user_to_email, do any necessary
         # translation of it.
         $value = $self->SUPER::translate_value(@args);
         if (!$options->{check_only}) {
             $self->add_user($from_value, $value);
         }
     }

     return $value;
 }

 1;

 =head1 B<Methods in need of POD>

 =over

 =item user_map

 =item user_to_email

 =item add_user

 =item translate_value

 =item before_insert

 =item translate_bug

 =item CONFIG_VARS

 =back
	# This Source Code Form is subject to the terms of the Mozilla Public
	# License, v. 2.0. If a copy of the MPL was not distributed with this
	# file, You can obtain one at http://mozilla.org/MPL/2.0/.
	#
	# This Source Code Form is "Incompatible With Secondary Licenses", as
	# defined by the Mozilla Public License, v. 2.0.

	package Bugzilla::Migrate::Gnats;

	use 5.10.1;
	use strict;
	use warnings;

	use parent qw(Bugzilla::Migrate);

	use Bugzilla::Constants;
	use Bugzilla::Install::Util qw(indicate_progress);
	use Bugzilla::Util qw(format_time trim generate_random_password);

	use Email::Address;
	use Email::MIME;
	use File::Basename;
	use IO::File;
	use List::MoreUtils qw(firstidx);
	use List::Util qw(first);

	use constant REQUIRED_MODULES => [
	{
	package => 'Email-Simple-FromHandle',
	module => 'Email::Simple::FromHandle',
	# This version added seekable handles.
	version => 0.050,
	},
	];

	use constant FIELD_MAP => {
	'Number' => 'bug_id',
	'Category' => 'product',
	'Synopsis' => 'short_desc',
	'Responsible' => 'assigned_to',
	'State' => 'bug_status',
	'Class' => 'cf_type',
	'Classification' => '',
	'Originator' => 'reporter',
	'Arrival-Date' => 'creation_ts',
	'Last-Modified' => 'delta_ts',
	'Release' => 'version',
	'Severity' => 'bug_severity',
	'Description' => 'comment',
	};

	use constant VALUE_MAP => {
	bug_severity => {
	'serious' => 'major',
	'cosmetic' => 'trivial',
	'new-feature' => 'enhancement',
	'non-critical' => 'normal',
	},
	bug_status => {
	'open' => 'CONFIRMED',
	'analyzed' => 'IN_PROGRESS',
	'suspended' => 'RESOLVED',
	'feedback' => 'RESOLVED',
	'released' => 'VERIFIED',
	},
	bug_status_resolution => {
	'feedback' => 'FIXED',
	'released' => 'FIXED',
	'closed' => 'FIXED',
	'suspended' => 'LATER',
	},
	priority => {
	'medium' => 'Normal',
	},
	};

	use constant GNATS_CONFIG_VARS => (
	{
	name => 'gnats_path',
	default => '/var/lib/gnats',
	desc => <<END,
	# The path to the directory that contains the GNATS database.
	END
	},
	{
	name => 'default_email_domain',
	default => 'example.com',
	desc => <<'END',
	# Some GNATS users do not have full email addresses, but Bugzilla requires
	# every user to have an email address. What domain should be appended to
	# usernames that don't have emails, to make them into email addresses?
	# (For example, if you leave this at the default, "unknown" would become
	# "unknown@example.com".)
	END
	},
	{
	name => 'component_name',
	default => 'General',
	desc => <<'END',
	# GNATS has only "Category" to classify bugs. However, Bugzilla has a
	# multi-level system of Products that contain Components. When importing
	# GNATS categories, they become a Product with one Component. What should
	# the name of that Component be?
	END
	},
	{
	name => 'version_regex',
	default => '',
	desc => <<'END',
	# In GNATS, the "version" field can contain almost anything. However, in
	# Bugzilla, it's a drop-down, so you don't want too many choices in there.
	# If you specify a regular expression here, versions will be tested against
	# this regular expression, and if they match, the first match (the first set
	# of parentheses in the regular expression, also called "$1") will be used
	# as the version value for the bug instead of the full version value specified
	# in GNATS.
	END
	},
	{
	name => 'default_originator',
	default => 'gnats-admin',
	desc => <<'END',
	# Sometimes, a PR has no valid Originator, so we fall back to the From
	# header of the email. If the From header also isn't a valid username
	# (is just a name with spaces in it--we can't convert that to an email
	# address) then this username (which can either be a GNATS username or an
	# email address) will be considered to be the Originator of the PR.
	END
	}
	);

	sub CONFIG_VARS {
	my $self = shift;
	my @vars = (GNATS_CONFIG_VARS, $self->SUPER::CONFIG_VARS);
	my $field_map = first { $_->{name} eq 'translate_fields' } @vars;
	$field_map->{default} = FIELD_MAP;
	my $value_map = first { $_->{name} eq 'translate_values' } @vars;
	$value_map->{default} = VALUE_MAP;
	return @vars;
	}

	# Directories that aren't projects, or that we shouldn't be parsing
	use constant SKIP_DIRECTORIES => qw(
	gnats-adm
	gnats-queue
	pending
	);

	use constant NON_COMMENT_FIELDS => qw(
	Audit-Trail
	Closed-Date
	Confidential
	Unformatted
	attachments
	);

	# Certain fields can contain things that look like fields in them,
	# because they might contain quoted emails. To avoid mis-parsing,
	# we list out here the exact order of fields at the end of a PR
	# and wait for the next field to consider that we actually have
	# a field to parse.
	use constant END_FIELD_ORDER => qw(
	Description
	How-To-Repeat
	Fix
	Release-Note
	Audit-Trail
	Unformatted
	);

	use constant CUSTOM_FIELDS => {
	cf_type => {
	type => FIELD_TYPE_SINGLE_SELECT,
	description => 'Type',
	},
	};

	use constant FIELD_REGEX => qr/^>(\S+):\s(.)$/;

	# Used for bugs that have no Synopsis.
	use constant NO_SUBJECT => "(no subject)";

	# This is the divider that GNATS uses between attachments in its database
	# files. It's missign two hyphens at the beginning because MIME Emails use
	# -- to start boundaries.
	use constant GNATS_BOUNDARY => '----gnatsweb-attachment----';

	use constant LONG_VERSION_LENGTH => 32;

	#########
	# Hooks #
	#########

	sub before_insert {
	my $self = shift;

	# gnats_id isn't a valid User::create field, and we don't need it
	# anymore now.
	delete $_->{gnats_id} foreach @{ $self->users };

	# Grab a version out of a bug for each product, so that there is a
	# valid "version" argument for Bugzilla::Product->create.
	foreach my $product (@{ $self->products }) {
	my $bug = first { $_->{product} eq $product->{name} and $_->{version} }
	@{ $self->bugs };
	if (defined $bug) {
	$product->{version} = $bug->{version};
	}
	else {
	$product->{version} = 'unspecified';
	}
	}
	}

	#########
	# Users #
	#########

	sub _read_users {
	my $self = shift;
	my $path = $self->config('gnats_path');
	my $file = "$path/gnats-adm/responsible";
	$self->debug("Reading users from $file");
	my $default_domain = $self->config('default_email_domain');
	open(my $users_fh, '<', $file) \|\| die "$file: $!";
	my @users;
	foreach my $line (<$users_fh>) {
	$line = trim($line);
	next if $line =~ /^#/;
	my ($id, $name, $email) = split(':', $line, 3);
	$email \|\|= "$id\@$default_domain";
	# We can't call our own translate_value, because that depends on
	# the existence of user_map, which doesn't exist until after
	# this method. However, we still want to translate any users found.
	$email = $self->SUPER::translate_value('user', $email);
	push(@users, { realname => $name, login_name => $email,
	gnats_id => $id });
	}
	close($users_fh);
	return \@users;
	}

	sub user_map {
	my $self = shift;
	$self->{user_map} \|\|= { map { $_->{gnats_id} => $_->{login_name} }
	@{ $self->users } };
	return $self->{user_map};
	}

	sub add_user {
	my ($self, $id, $email) = @_;
	return if defined $self->user_map->{$id};
	$self->user_map->{$id} = $email;
	push(@{ $self->users }, { login_name => $email, gnats_id => $id });
	}

	sub user_to_email {
	my ($self, $value) = @_;
	if (defined $self->user_map->{$value}) {
	$value = $self->user_map->{$value};
	}
	elsif ($value !~ /@/) {
	my $domain = $self->config('default_email_domain');
	$value = "$value\@$domain";
	}
	return $value;
	}

	############
	# Products #
	############

	sub _read_products {
	my $self = shift;
	my $path = $self->config('gnats_path');
	my $file = "$path/gnats-adm/categories";
	$self->debug("Reading categories from $file");

	open(my $categories_fh, '<', $file) \|\| die "$file: $!";
	my @products;
	foreach my $line (<$categories_fh>) {
	$line = trim($line);
	next if $line =~ /^#/;
	my ($name, $description, $assigned_to, $cc) = split(':', $line, 4);
	my %product = ( name => $name, description => $description );

	my @initial_cc = split(',', $cc);
	@initial_cc = @{ $self->translate_value('user', \@initial_cc) };
	$assigned_to = $self->translate_value('user', $assigned_to);
	my %component = ( name => $self->config('component_name'),
	description => $description,
	initialowner => $assigned_to,
	initial_cc => \@initial_cc );
	$product{components} = [\%component];
	push(@products, \%product);
	}
	close($categories_fh);
	return \@products;
	}

	################
	# Reading Bugs #
	################

	sub _read_bugs {
	my $self = shift;
	my $path = $self->config('gnats_path');
	my @directories = glob("$path/*");
	my @bugs;
	foreach my $directory (@directories) {
	next if !-d $directory;
	my $name = basename($directory);
	next if grep($_ eq $name, SKIP_DIRECTORIES);
	push(@bugs, @{ $self->_parse_project($directory) });
	}
	@bugs = sort { $a->{Number} <=> $b->{Number} } @bugs;
	return \@bugs;
	}

	sub _parse_project {
	my ($self, $directory) = @_;
	my @files = glob("$directory/*");

	$self->debug("Reading Project: $directory");
	# Sometimes other files get into gnats directories.
	@files = grep { basename($_) =~ /^\d+$/ } @files;
	my @bugs;
	my $count = 1;
	my $total = scalar @files;
	print basename($directory) . ":\n";
	foreach my $file (@files) {
	push(@bugs, $self->_parse_bug_file($file));
	if (!$self->verbose) {
	indicate_progress({ current => $count++, every => 5,
	total => $total });
	}
	}
	return \@bugs;
	}

	sub _parse_bug_file {
	my ($self, $file) = @_;
	$self->debug("Reading $file");
	open(my $fh, "<", $file) \|\| die "$file: $!";
	my $email = Email::Simple::FromHandle->new($fh);
	my $fields = $self->_get_gnats_field_data($email);
	# We parse attachments here instead of during translate_bug,
	# because otherwise we'd be taking up huge amounts of memory storing
	# all the raw attachment data in memory.
	$fields->{attachments} = $self->_parse_attachments($fields);
	close($fh);
	return $fields;
	}

	sub _get_gnats_field_data {
	my ($self, $email) = @_;
	my ($current_field, @value_lines, %fields);
	$email->reset_handle();
	my $handle = $email->handle;
	foreach my $line (<$handle>) {
	# If this line starts a field name
	if ($line =~ FIELD_REGEX) {
	my ($new_field, $rest_of_line) = ($1, $2);

	# If this is one of the last few PR fields, then make sure
	# that we're getting our fields in the right order.
	my $new_field_valid = 1;
	my $search_for = $current_field \|\| '';
	my $current_field_pos = firstidx { $_ eq $search_for }
	END_FIELD_ORDER;
	if ($current_field_pos > -1) {
	my $new_field_pos = firstidx { $_ eq $new_field }
	END_FIELD_ORDER;
	# We accept any field, as long as it's later than this one.
	$new_field_valid = $new_field_pos > $current_field_pos ? 1 : 0;
	}

	if ($new_field_valid) {
	if ($current_field) {
	$fields{$current_field} = _handle_lines(\@value_lines);
	@value_lines = ();
	}
	$current_field = $new_field;
	$line = $rest_of_line;
	}
	}
	push(@value_lines, $line) if defined $line;
	}
	$fields{$current_field} = _handle_lines(\@value_lines);
	$fields{cc} = [$email->header('Cc')] if $email->header('Cc');

	# If the Originator is invalid and we don't have a translation for it,
	# use the From header instead.
	my $originator = $self->translate_value('reporter', $fields{Originator},
	{ check_only => 1 });
	if ($originator !~ Bugzilla->params->{emailregexp}) {
	# We use the raw header sometimes, because it looks like "From: user"
	# which Email::Address won't parse but we can still use.
	my $address = $email->header('From');
	my ($parsed) = Email::Address->parse($address);
	if ($parsed) {
	$address = $parsed->address;
	}
	if ($address) {
	$self->debug(
	"PR $fields{Number} had an Originator that was not a valid"
	. " user ($fields{Originator}). Using From ($address)"
	. " instead.\n");
	my $address_email = $self->translate_value('reporter', $address,
	{ check_only => 1 });
	if ($address_email !~ Bugzilla->params->{emailregexp}) {
	$self->debug(" From was also invalid, using default_originator.\n");
	$address = $self->config('default_originator');
	}
	$fields{Originator} = $address;
	}
	}

	$self->debug(\%fields, 3);
	return \%fields;
	}

	sub _handle_lines {
	my ($lines) = @_;
	my $value = join('', @$lines);
	$value =~ s/\s+$//;
	return $value;
	}

	####################
	# Translating Bugs #
	####################

	sub translate_bug {
	my ($self, $fields) = @_;

	my ($bug, $other_fields) = $self->SUPER::translate_bug($fields);

	$bug->{attachments} = delete $other_fields->{attachments};

	if (defined $other_fields->{_add_to_comment}) {
	$bug->{comment} .= delete $other_fields->{_add_to_comment};
	}

	my ($changes, $extra_comment) =
	$self->_parse_audit_trail($bug, $other_fields->{'Audit-Trail'});

	my @comments;
	foreach my $change (@$changes) {
	if (exists $change->{comment}) {
	push(@comments, {
	thetext => $change->{comment},
	who => $change->{who},
	bug_when => $change->{bug_when} });
	delete $change->{comment};
	}
	}
	$bug->{history} = $changes;

	if (trim($extra_comment)) {
	push(@comments, { thetext => $extra_comment, who => $bug->{reporter},
	bug_when => $bug->{delta_ts} \|\| $bug->{creation_ts} });
	}
	$bug->{comments} = \@comments;

	$bug->{component} = $self->config('component_name');
	if (!$bug->{short_desc}) {
	$bug->{short_desc} = NO_SUBJECT;
	}

	foreach my $attachment (@{ $bug->{attachments} \|\| [] }) {
	$attachment->{submitter} = $bug->{reporter};
	$attachment->{creation_ts} = $bug->{creation_ts};
	}

	$self->debug($bug, 3);
	return $bug;
	}

	sub _parse_audit_trail {
	my ($self, $bug, $audit_trail) = @_;
	return [] if !trim($audit_trail);
	$self->debug(" Parsing audit trail...", 2);

	if ($audit_trail !~ /^\S+-Changed-\S+:/ms) {
	# This is just a comment from the bug's creator.
	$self->debug(" Audit trail is just a comment.", 2);
	return ([], $audit_trail);
	}

	my (@changes, %current_data, $current_column, $on_why);
	my $extra_comment = '';
	my $current_field;
	my @all_lines = split("\n", $audit_trail);
	foreach my $line (@all_lines) {
	# GNATS history looks like:
	# Status-Changed-From-To: open->closed
	# Status-Changed-By: jack
	# Status-Changed-When: Mon May 12 14:46:59 2003
	# Status-Changed-Why:
	# This is some comment here about the change.
	if ($line =~ /^(\S+)-Changed-(\S+):(.*)/) {
	my ($field, $column, $value) = ($1, $2, $3);
	my $bz_field = $self->translate_field($field);
	# If it's not a field we're importing, we don't care about
	# its history.
	next if !$bz_field;
	# GNATS doesn't track values for description changes,
	# unfortunately, and that's the only information we'd be able to
	# use in Bugzilla for the audit trail on that field.
	next if $bz_field eq 'comment';
	$current_field = $bz_field if !$current_field;
	if ($bz_field ne $current_field) {
	$self->_store_audit_change(
	\@changes, $current_field, \%current_data);
	%current_data = ();
	$current_field = $bz_field;
	}
	$value = trim($value);
	$self->debug(" $bz_field $column: $value", 3);
	if ($column eq 'From-To') {
	my ($from, $to) = split('->', $value, 2);
	# Sometimes there's just a - instead of a -> between the values.
	if (!defined($to)) {
	($from, $to) = split('-', $value, 2);
	}
	$current_data{added} = $to;
	$current_data{removed} = $from;
	}
	elsif ($column eq 'By') {
	my $email = $self->translate_value('user', $value);
	# Sometimes we hit users in the audit trail that we haven't
	# seen anywhere else.
	$current_data{who} = $email;
	}
	elsif ($column eq 'When') {
	$current_data{bug_when} = $self->parse_date($value);
	}
	if ($column eq 'Why') {
	$value = '' if !defined $value;
	$current_data{comment} = $value;
	$on_why = 1;
	}
	else {
	$on_why = 0;
	}
	}
	elsif ($on_why) {
	# "Why" lines are indented four characters.
	$line =~ s/^\s{4}//;
	$current_data{comment} .= "$line\n";
	}
	else {
	$self->debug(
	"Extra Audit-Trail line on $bug->{product} $bug->{bug_id}:"
	. " $line\n", 2);
	$extra_comment .= "$line\n";
	}
	}
	$self->_store_audit_change(\@changes, $current_field, \%current_data);
	return (\@changes, $extra_comment);
	}

	sub _store_audit_change {
	my ($self, $changes, $old_field, $current_data) = @_;

	$current_data->{field} = $old_field;
	$current_data->{removed} =
	$self->translate_value($old_field, $current_data->{removed});
	$current_data->{added} =
	$self->translate_value($old_field, $current_data->{added});
	push(@$changes, { %$current_data });
	}

	sub _parse_attachments {
	my ($self, $fields) = @_;
	my $unformatted = delete $fields->{'Unformatted'};
	my $gnats_boundary = GNATS_BOUNDARY;
	# A sanity checker to make sure that we're parsing attachments right.
	my $num_attachments = 0;
	$num_attachments++ while ($unformatted =~ /\Q$gnats_boundary\E/g);
	# Sometimes there's a GNATS_BOUNDARY that is on the same line as other data.
	$unformatted =~ s/(\S\s*)\Q$gnats_boundary\E$/$1\n$gnats_boundary/mg;
	# Often the "Unformatted" section starts with stuff before
	# ----gnatsweb-attachment---- that isn't necessary.
	$unformatted =~ s/^\s*From:.+?Reply-to:[^\n]+//s;
	$unformatted = trim($unformatted);
	return [] if !$unformatted;
	$self->debug('Reading attachments...', 2);
	my $boundary = generate_random_password(48);
	$unformatted =~ s/\Q$gnats_boundary\E/--$boundary/g;
	# Sometimes the whole Unformatted section is indented by exactly
	# one space, and needs to be fixed.
	if ($unformatted =~ /--\Q$boundary\E\n /) {
	$unformatted =~ s/^ //mg;
	}
	$unformatted = <<END;
	From: nobody
	MIME-Version: 1.0
	Content-Type: multipart/mixed; boundary="$boundary"

	This is a multi-part message in MIME format.
	--$boundary
	Content-Type: text/plain; charset=UTF-8
	Content-Transfer-Encoding: 7bit

	$unformatted
	--$boundary--
	END
	my $email = new Email::MIME(\$unformatted);
	my @parts = $email->parts;
	# Remove the fake body.
	my $part1 = shift @parts;
	if ($part1->body) {
	$self->debug(" Additional Unformatted data found on "
	. $fields->{Category} . " bug " . $fields->{Number});
	$self->debug($part1->body, 3);
	$fields->{_add_comment} .= "\n\nUnformatted:\n" . $part1->body;
	}

	my @attachments;
	foreach my $part (@parts) {
	$self->debug(' Parsing attachment: ' . $part->filename);
	my $temp_fh = IO::File->new_tmpfile or die ("Can't create tempfile: $!");
	$temp_fh->binmode;
	print $temp_fh $part->body;
	my $content_type = $part->content_type;
	$content_type =~ s/; name=.+$//;
	my $attachment = { filename => $part->filename,
	description => $part->filename,
	mimetype => $content_type,
	data => $temp_fh };
	$self->debug($attachment, 3);
	push(@attachments, $attachment);
	}

	if (scalar(@attachments) ne $num_attachments) {
	warn "WARNING: Expected $num_attachments attachments but got "
	. scalar(@attachments) . "\n" ;
	$self->debug($unformatted, 3);
	}
	return \@attachments;
	}

	sub translate_value {
	my $self = shift;
	my ($field, $value, $options) = @_;
	my $original_value = $value;
	$options \|\|= {};

	if (!ref($value) and grep($_ eq $field, $self->USER_FIELDS)) {
	if ($value =~ /(\S+\@\S+)/) {
	$value = $1;
	$value =~ s/^<//;
	$value =~ s/>$//;
	}
	else {
	# Sometimes names have extra stuff on the end like "(Somebody's Name)"
	$value =~ s/\s+\(.+\)$//;
	# Sometimes user fields look like "(user)" instead of just "user".
	$value =~ s/^\((.+)\)$/$1/;
	$value = trim($value);
	}
	}

	if ($field eq 'version' and $value ne '') {
	my $version_re = $self->config('version_regex');
	if ($version_re and $value =~ $version_re) {
	$value = $1;
	}
	# In the GNATS that I tested this with, there were many extremely long
	# values for "version" that caused some import problems (they were
	# longer than the max allowed version value). So if the version value
	# is longer than 32 characters, pull out the first thing that looks
	# like a version number.
	elsif (length($value) > LONG_VERSION_LENGTH) {
	$value =~ s/^.+?\b(\d[\w\.]+)\b.+$/$1/;
	}
	}

	my @args = @_;
	$args[1] = $value;

	$value = $self->SUPER::translate_value(@args);
	return $value if ref $value;

	if (grep($_ eq $field, $self->USER_FIELDS)) {
	my $from_value = $value;
	$value = $self->user_to_email($value);
	$args[1] = $value;
	# If we got something new from user_to_email, do any necessary
	# translation of it.
	$value = $self->SUPER::translate_value(@args);
	if (!$options->{check_only}) {
	$self->add_user($from_value, $value);
	}
	}

	return $value;
	}

	1;

	=head1 B<Methods in need of POD>

	=over

	=item user_map

	=item user_to_email

	=item add_user

	=item translate_value

	=item before_insert

	=item translate_bug

	=item CONFIG_VARS

	=back