blob: 830177f8b513bc80a271356702587870608d9a7e [file] [log] [blame]
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# This Source Code Form is "Incompatible With Secondary Licenses", as
# defined by the Mozilla Public License, v. 2.0.
package Bugzilla::Search::Quicksearch;
use 5.10.1;
use strict;
use warnings;
use Bugzilla::Error;
use Bugzilla::Constants;
use Bugzilla::Keyword;
use Bugzilla::Status;
use Bugzilla::Field;
use Bugzilla::Util;
use List::Util qw(min max);
use List::MoreUtils qw(firstidx);
use Text::ParseWords qw(parse_line);
use parent qw(Exporter);
@Bugzilla::Search::Quicksearch::EXPORT = qw(quicksearch);
# Custom mappings for some fields.
use constant MAPPINGS => {
# Status, Resolution, Platform, OS, Priority, Severity
"status" => "bug_status",
"platform" => "rep_platform",
"os" => "op_sys",
"severity" => "bug_severity",
# People: AssignedTo, Reporter, QA Contact, CC, etc.
"assignee" => "assigned_to",
"owner" => "assigned_to",
# Product, Version, Component, Target Milestone
"milestone" => "target_milestone",
# Summary, Description, URL, Status whiteboard, Keywords
"summary" => "short_desc",
"description" => "longdesc",
"comment" => "longdesc",
"url" => "bug_file_loc",
"whiteboard" => "status_whiteboard",
"sw" => "status_whiteboard",
"kw" => "keywords",
"group" => "bug_group",
# Flags
"flag" => "flagtypes.name",
"requestee" => "requestees.login_name",
"setter" => "setters.login_name",
# Attachments
"attachment" => "attachments.description",
"attachmentdesc" => "attachments.description",
"attachdesc" => "attachments.description",
"attachmentdata" => "attach_data.thedata",
"attachdata" => "attach_data.thedata",
"attachmentmimetype" => "attachments.mimetype",
"attachmimetype" => "attachments.mimetype"
};
sub FIELD_MAP {
my $cache = Bugzilla->request_cache;
return $cache->{quicksearch_fields} if $cache->{quicksearch_fields};
# Get all the fields whose names don't contain periods. (Fields that
# contain periods are always handled in MAPPINGS.)
my @db_fields = grep { $_->name !~ /\./ }
@{ Bugzilla->fields({ obsolete => 0 }) };
my %full_map = (%{ MAPPINGS() }, map { $_->name => $_->name } @db_fields);
# Eliminate the fields that start with bug_ or rep_, because those are
# handled by the MAPPINGS instead, and we don't want too many names
# for them. (Also, otherwise "rep" doesn't match "reporter".)
#
# Remove "status_whiteboard" because we have "whiteboard" for it in
# the mappings, and otherwise "stat" can't match "status".
#
# Also, don't allow searching the _accessible stuff via quicksearch
# (both because it's unnecessary and because otherwise
# "reporter_accessible" and "reporter" both match "rep".
delete @full_map{qw(rep_platform bug_status bug_file_loc bug_group
bug_severity bug_status
status_whiteboard
cclist_accessible reporter_accessible)};
Bugzilla::Hook::process('quicksearch_map', {'map' => \%full_map} );
$cache->{quicksearch_fields} = \%full_map;
return $cache->{quicksearch_fields};
}
# Certain fields, when specified like "field:value" get an operator other
# than "substring"
use constant FIELD_OPERATOR => {
content => 'matches',
owner_idle_time => 'greaterthan',
};
# Mappings for operators symbols to support operators other than "substring"
use constant OPERATOR_SYMBOLS => {
':' => 'substring',
'=' => 'equals',
'!=' => 'notequals',
'>=' => 'greaterthaneq',
'<=' => 'lessthaneq',
'>' => 'greaterthan',
'<' => 'lessthan',
};
# We might want to put this into localconfig or somewhere
use constant PRODUCT_EXCEPTIONS => (
'row', # [Browser]
# ^^^
'new', # [MailNews]
# ^^^
);
use constant COMPONENT_EXCEPTIONS => (
'hang' # [Bugzilla: Component/Keyword Changes]
# ^^^^
);
# Quicksearch-wide globals for boolean charts.
our ($chart, $and, $or, $fulltext, $bug_status_set);
sub quicksearch {
my ($searchstring) = (@_);
my $cgi = Bugzilla->cgi;
$chart = 0;
$and = 0;
$or = 0;
# Remove leading and trailing commas and whitespace.
$searchstring =~ s/(^[\s,]+|[\s,]+$)//g;
ThrowUserError('buglist_parameters_required') unless ($searchstring);
if ($searchstring =~ m/^[0-9,\s]*$/) {
_bug_numbers_only($searchstring);
}
else {
_handle_alias($searchstring);
# Retain backslashes and quotes, to know which strings are quoted,
# and which ones are not.
my @words = _parse_line('\s+', 1, $searchstring);
# If parse_line() returns no data, this means strings are badly quoted.
# Rather than trying to guess what the user wanted to do, we throw an error.
scalar(@words)
|| ThrowUserError('quicksearch_unbalanced_quotes', {string => $searchstring});
# A query cannot start with AND or OR, nor can it end with AND, OR or NOT.
ThrowUserError('quicksearch_invalid_query')
if ($words[0] =~ /^(?:AND|OR)$/ || $words[$#words] =~ /^(?:AND|OR|NOT)$/);
my (@qswords, @or_group);
while (scalar @words) {
my $word = shift @words;
# AND is the default word separator, similar to a whitespace,
# but |a AND OR b| is not a valid combination.
if ($word eq 'AND') {
ThrowUserError('quicksearch_invalid_query', {operators => ['AND', 'OR']})
if $words[0] eq 'OR';
}
# |a OR AND b| is not a valid combination.
# |a OR OR b| is equivalent to |a OR b| and so is harmless.
elsif ($word eq 'OR') {
ThrowUserError('quicksearch_invalid_query', {operators => ['OR', 'AND']})
if $words[0] eq 'AND';
}
# NOT negates the following word.
# |NOT AND| and |NOT OR| are not valid combinations.
# |NOT NOT| is fine but has no effect as they cancel themselves.
elsif ($word eq 'NOT') {
$word = shift @words;
next if $word eq 'NOT';
if ($word eq 'AND' || $word eq 'OR') {
ThrowUserError('quicksearch_invalid_query', {operators => ['NOT', $word]});
}
unshift(@words, "-$word");
}
else {
# OR groups words together, as OR has higher precedence than AND.
push(@or_group, $word);
# If the next word is not OR, then we are not in a OR group,
# or we are leaving it.
if (!defined $words[0] || $words[0] ne 'OR') {
push(@qswords, join('|', @or_group));
@or_group = ();
}
}
}
_handle_status_and_resolution($qswords[0]);
shift(@qswords) if $bug_status_set;
my (@unknownFields, %ambiguous_fields);
$fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0;
# Loop over all main-level QuickSearch words.
foreach my $qsword (@qswords) {
my @or_operand = _parse_line('\|', 1, $qsword);
foreach my $term (@or_operand) {
next unless defined $term;
my $negate = substr($term, 0, 1) eq '-';
if ($negate) {
$term = substr($term, 1);
}
next if _handle_special_first_chars($term, $negate);
next if _handle_field_names($term, $negate, \@unknownFields,
\%ambiguous_fields);
# Having ruled out the special cases, we may now split
# by comma, which is another legal boolean OR indicator.
# Remove quotes from quoted words, if any.
@words = _parse_line(',', 0, $term);
foreach my $word (@words) {
if (!_special_field_syntax($word, $negate)) {
_default_quicksearch_word($word, $negate);
}
_handle_urls($word, $negate);
}
}
$chart++;
$and = 0;
$or = 0;
}
# If there is no mention of a bug status, we restrict the query
# to open bugs by default.
unless ($bug_status_set) {
$cgi->param('bug_status', BUG_STATE_OPEN);
}
# Inform user about any unknown fields
if (scalar(@unknownFields) || scalar(keys %ambiguous_fields)) {
ThrowUserError("quicksearch_unknown_field",
{ unknown => \@unknownFields,
ambiguous => \%ambiguous_fields });
}
# Make sure we have some query terms left
scalar($cgi->param())>0 || ThrowUserError("buglist_parameters_required");
}
# List of quicksearch-specific CGI parameters to get rid of.
my @params_to_strip = ('quicksearch', 'load', 'run');
my $modified_query_string = $cgi->canonicalise_query(@params_to_strip);
if ($cgi->param('load')) {
my $urlbase = correct_urlbase();
# Param 'load' asks us to display the query in the advanced search form.
print $cgi->redirect(-uri => "${urlbase}query.cgi?format=advanced&amp;"
. $modified_query_string);
}
# Otherwise, pass the modified query string to the caller.
# We modified $cgi->params, so the caller can choose to look at that, too,
# and disregard the return value.
$cgi->delete(@params_to_strip);
return $modified_query_string;
}
##########################
# Parts of quicksearch() #
##########################
sub _parse_line {
my ($delim, $keep, $line) = @_;
return () unless defined $line;
# parse_line always treats ' as a quote character, making it impossible
# to sanely search for contractions. As this behavour isn't
# configurable, we replace ' with a placeholder to hide it from the
# parser.
# only treat ' at the start or end of words as quotes
# it's easier to do this in reverse with regexes
$line =~ s/(^|\s|:)'/$1\001/g;
$line =~ s/'($|\s)/\001$1/g;
$line =~ s/\\?'/\000/g;
$line =~ tr/\001/'/;
my @words = parse_line($delim, $keep, $line);
foreach my $word (@words) {
$word =~ tr/\000/'/ if defined $word;
}
return @words;
}
sub _bug_numbers_only {
my $searchstring = shift;
my $cgi = Bugzilla->cgi;
# Allow separation by comma or whitespace.
$searchstring =~ s/[,\s]+/,/g;
if ($searchstring !~ /,/ && !i_am_webservice()) {
# Single bug number; shortcut to show_bug.cgi.
print $cgi->redirect(
-uri => correct_urlbase() . "show_bug.cgi?id=$searchstring");
exit;
}
else {
# List of bug numbers.
$cgi->param('bug_id', $searchstring);
$cgi->param('order', 'bugs.bug_id');
$cgi->param('bug_id_type', 'anyexact');
}
}
sub _handle_alias {
my $searchstring = shift;
if ($searchstring =~ /^([^,\s]+)$/) {
my $alias = $1;
# We use this direct SQL because we want quicksearch to be VERY fast.
my $bug_id = Bugzilla->dbh->selectrow_array(
q{SELECT bug_id FROM bugs_aliases WHERE alias = ?}, undef, $alias);
# If the user cannot see the bug or if we are using a webservice,
# do not resolve its alias.
if ($bug_id && Bugzilla->user->can_see_bug($bug_id) && !i_am_webservice()) {
$alias = url_quote($alias);
print Bugzilla->cgi->redirect(
-uri => correct_urlbase() . "show_bug.cgi?id=$alias");
exit;
}
}
}
sub _handle_status_and_resolution {
my $word = shift;
my $legal_statuses = get_legal_field_values('bug_status');
my (%states, %resolutions);
$bug_status_set = 1;
if ($word eq 'OPEN') {
$states{$_} = 1 foreach BUG_STATE_OPEN;
}
# If we want all bugs, then there is nothing to do.
elsif ($word ne 'ALL'
&& !matchPrefixes(\%states, \%resolutions, $word, $legal_statuses))
{
$bug_status_set = 0;
}
# If we have wanted resolutions, allow closed states
if (keys(%resolutions)) {
foreach my $status (@$legal_statuses) {
$states{$status} = 1 unless is_open_state($status);
}
}
Bugzilla->cgi->param('bug_status', keys(%states));
Bugzilla->cgi->param('resolution', keys(%resolutions));
}
sub _handle_special_first_chars {
my ($qsword, $negate) = @_;
return 0 if !defined $qsword || length($qsword) <= 1;
my $firstChar = substr($qsword, 0, 1);
my $baseWord = substr($qsword, 1);
my @subWords = split(/,/, $baseWord);
if ($firstChar eq '#') {
addChart('short_desc', 'substring', $baseWord, $negate);
addChart('content', 'matches', _matches_phrase($baseWord), $negate) if $fulltext;
return 1;
}
if ($firstChar eq ':') {
foreach (@subWords) {
addChart('product', 'substring', $_, $negate);
addChart('component', 'substring', $_, $negate);
}
return 1;
}
if ($firstChar eq '@') {
addChart('assigned_to', 'substring', $_, $negate) foreach (@subWords);
return 1;
}
if ($firstChar eq '[') {
addChart('short_desc', 'substring', $baseWord, $negate);
addChart('status_whiteboard', 'substring', $baseWord, $negate);
return 1;
}
if ($firstChar eq '!') {
addChart('keywords', 'anywords', $baseWord, $negate);
return 1;
}
return 0;
}
sub _handle_field_names {
my ($or_operand, $negate, $unknownFields, $ambiguous_fields) = @_;
# Generic field1,field2,field3:value1,value2 notation.
# We have to correctly ignore commas and colons in quotes.
# Longer operators must be tested first as we don't want single character
# operators such as <, > and = to be tested before <=, >= and !=.
my @operators = sort { length($b) <=> length($a) } keys %{ OPERATOR_SYMBOLS() };
foreach my $symbol (@operators) {
my @field_values = _parse_line($symbol, 1, $or_operand);
next unless scalar @field_values == 2;
my @fields = _parse_line(',', 1, $field_values[0]);
my @values = _parse_line(',', 1, $field_values[1]);
foreach my $field (@fields) {
my $translated = _translate_field_name($field);
# Skip and record any unknown fields
if (!defined $translated) {
push(@$unknownFields, $field);
}
# If we got back an array, that means the substring is
# ambiguous and could match more than field name
elsif (ref $translated) {
$ambiguous_fields->{$field} = $translated;
}
else {
if ($translated eq 'bug_status' || $translated eq 'resolution') {
$bug_status_set = 1;
}
foreach my $value (@values) {
my $operator = FIELD_OPERATOR->{$translated}
|| OPERATOR_SYMBOLS->{$symbol}
|| 'substring';
# If the string was quoted to protect some special
# characters such as commas and colons, we need
# to remove quotes.
if ($value =~ /^(["'])(.+)\1$/) {
$value = $2;
$value =~ s/\\(["'])/$1/g;
}
# If a requestee is set, we need to handle it separately.
if ($translated eq 'flagtypes.name' && $value =~ /^([^\?]+\?)([^\?]+)$/) {
_handle_flags($1, $2, $negate);
next;
}
addChart($translated, $operator, $value, $negate);
}
}
}
return 1;
}
# Do not look inside quoted strings.
return 0 if ($or_operand =~ /^(["']).*\1$/);
# Flag and requestee shortcut.
if ($or_operand =~ /^([^\?]+\?)([^\?]*)$/) {
_handle_flags($1, $2, $negate);
return 1;
}
return 0;
}
sub _handle_flags {
my ($flag, $requestee, $negate) = @_;
addChart('flagtypes.name', 'substring', $flag, $negate);
if ($requestee) {
# FIXME - Every time a requestee is involved and you use OR somewhere
# in your quick search, the logic will be wrong because boolean charts
# are unable to run queries of the form (a AND b) OR c. In our case:
# (flag name is foo AND requestee is bar) OR (any other criteria).
# But this has never been possible, so this is not a regression. If one
# needs to run such queries, they must use the Custom Search section of
# the Advanced Search page.
$chart++;
$and = $or = 0;
addChart('requestees.login_name', 'substring', $requestee, $negate);
}
}
sub _translate_field_name {
my $field = shift;
$field = lc($field);
my $field_map = FIELD_MAP;
# If the field exactly matches a mapping, just return right now.
return $field_map->{$field} if exists $field_map->{$field};
# Check if we match, as a starting substring, exactly one field.
my @field_names = keys %$field_map;
my @matches = grep { $_ =~ /^\Q$field\E/ } @field_names;
# Eliminate duplicates that are actually the same field
# (otherwise "assi" matches both "assignee" and "assigned_to", and
# the lines below fail when they shouldn't.)
my %match_unique = map { $field_map->{$_} => $_ } @matches;
@matches = values %match_unique;
if (scalar(@matches) == 1) {
return $field_map->{$matches[0]};
}
elsif (scalar(@matches) > 1) {
return \@matches;
}
# Check if we match exactly one custom field, ignoring the cf_ on the
# custom fields (to allow people to type things like "build" for
# "cf_build").
my %cfless;
foreach my $name (@field_names) {
my $no_cf = $name;
if ($no_cf =~ s/^cf_//) {
if ($field eq $no_cf) {
return $field_map->{$name};
}
$cfless{$no_cf} = $name;
}
}
# See if we match exactly one substring of any of the cf_-less fields.
my @cfless_matches = grep { $_ =~ /^\Q$field\E/ } (keys %cfless);
if (scalar(@cfless_matches) == 1) {
my $match = $cfless_matches[0];
my $actual_field = $cfless{$match};
return $field_map->{$actual_field};
}
elsif (scalar(@matches) > 1) {
return \@matches;
}
return undef;
}
sub _special_field_syntax {
my ($word, $negate) = @_;
# P1-5 Syntax
if ($word =~ m/^P(\d+)(?:-(\d+))?$/i) {
my ($p_start, $p_end) = ($1, $2);
my $legal_priorities = get_legal_field_values('priority');
# If Pn exists explicitly, use it.
my $start = firstidx { $_ eq "P$p_start" } @$legal_priorities;
my $end;
$end = firstidx { $_ eq "P$p_end" } @$legal_priorities if defined $p_end;
# If Pn doesn't exist explicitly, then we mean the nth priority.
if ($start == -1) {
$start = max(0, $p_start - 1);
}
my $prios = $legal_priorities->[$start];
if (defined $end) {
# If Pn doesn't exist explicitly, then we mean the nth priority.
if ($end == -1) {
$end = min(scalar(@$legal_priorities), $p_end) - 1;
$end = max(0, $end); # Just in case the user typed P0.
}
($start, $end) = ($end, $start) if $end < $start;
$prios = join(',', @$legal_priorities[$start..$end])
}
addChart('priority', 'anyexact', $prios, $negate);
return 1;
}
return 0;
}
sub _default_quicksearch_word {
my ($word, $negate) = @_;
if (!grep { lc($word) eq $_ } PRODUCT_EXCEPTIONS and length($word) > 2) {
addChart('product', 'substring', $word, $negate);
}
if (!grep { lc($word) eq $_ } COMPONENT_EXCEPTIONS and length($word) > 2) {
addChart('component', 'substring', $word, $negate);
}
my @legal_keywords = map($_->name, Bugzilla::Keyword->get_all);
if (grep { lc($word) eq lc($_) } @legal_keywords) {
addChart('keywords', 'substring', $word, $negate);
}
addChart('alias', 'substring', $word, $negate);
addChart('short_desc', 'substring', $word, $negate);
addChart('status_whiteboard', 'substring', $word, $negate);
addChart('content', 'matches', _matches_phrase($word), $negate) if $fulltext;
}
sub _handle_urls {
my ($word, $negate) = @_;
# URL field (for IP addrs, host.names,
# scheme://urls)
if ($word =~ m/[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/
|| $word =~ /^[A-Za-z]+(\.[A-Za-z]+)+/
|| $word =~ /:[\\\/][\\\/]/
|| $word =~ /localhost/
|| $word =~ /mailto[:]?/)
# || $word =~ /[A-Za-z]+[:][0-9]+/ #host:port
{
addChart('bug_file_loc', 'substring', $word, $negate);
}
}
###########################################################################
# Helpers
###########################################################################
# Quote and escape a phrase appropriately for a "content matches" search.
sub _matches_phrase {
my ($phrase) = @_;
$phrase =~ s/"/\\"/g;
return "\"$phrase\"";
}
# Expand found prefixes to states or resolutions
sub matchPrefixes {
my ($hr_states, $hr_resolutions, $word, $ar_check_states) = @_;
return unless $word =~ /^[A-Z_]+(,[A-Z_]+)*$/;
my @ar_prefixes = split(/,/, $word);
my $ar_check_resolutions = get_legal_field_values('resolution');
my $foundMatch = 0;
foreach my $prefix (@ar_prefixes) {
foreach (@$ar_check_states) {
if (/^$prefix/) {
$$hr_states{$_} = 1;
$foundMatch = 1;
}
}
foreach (@$ar_check_resolutions) {
if (/^$prefix/) {
$$hr_resolutions{$_} = 1;
$foundMatch = 1;
}
}
}
return $foundMatch;
}
# Negate comparison type
sub negateComparisonType {
my $comparisonType = shift;
if ($comparisonType eq 'anywords') {
return 'nowords';
}
return "not$comparisonType";
}
# Add a boolean chart
sub addChart {
my ($field, $comparisonType, $value, $negate) = @_;
$negate && ($comparisonType = negateComparisonType($comparisonType));
makeChart("$chart-$and-$or", $field, $comparisonType, $value);
if ($negate) {
$and++;
$or = 0;
}
else {
$or++;
}
}
# Create the CGI parameters for a boolean chart
sub makeChart {
my ($expr, $field, $type, $value) = @_;
my $cgi = Bugzilla->cgi;
$cgi->param("field$expr", $field);
$cgi->param("type$expr", $type);
$cgi->param("value$expr", $value);
}
1;
=head1 B<Methods in need of POD>
=over
=item FIELD_MAP
=item quicksearch
=item negateComparisonType
=item makeChart
=item addChart
=item matchPrefixes
=back