blob: 3db2488d60ec41b081f7a917d0e261b27e624a27 [file] [log] [blame]
/*
* Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of Apple Inc. ("Apple") nor the names of
* its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#import "WebHTMLRepresentation.h"
#import "DOMElementInternal.h"
#import "DOMNodeInternal.h"
#import "DOMRangeInternal.h"
#import "WebArchive.h"
#import "WebBasePluginPackage.h"
#import "WebDataSourceInternal.h"
#import "WebDocumentPrivate.h"
#import "WebFrameInternal.h"
#import "WebKitNSStringExtras.h"
#import "WebKitStatisticsPrivate.h"
#import "WebNSObjectExtras.h"
#import "WebView.h"
#import <Foundation/NSURLResponse.h>
#import <JavaScriptCore/RegularExpression.h>
#import <WebCore/Document.h>
#import <WebCore/DocumentLoader.h>
#import <WebCore/Editor.h>
#import <WebCore/ElementInlines.h>
#import <WebCore/Frame.h>
#import <WebCore/FrameLoader.h>
#import <WebCore/FrameLoaderClient.h>
#import <WebCore/HTMLConverter.h>
#import <WebCore/HTMLFormControlElement.h>
#import <WebCore/HTMLFormElement.h>
#import <WebCore/HTMLInputElement.h>
#import <WebCore/HTMLNames.h>
#import <WebCore/HTMLTableCellElement.h>
#import <WebCore/MIMETypeRegistry.h>
#import <WebCore/NodeTraversal.h>
#import <WebCore/Range.h>
#import <WebCore/RenderElement.h>
#import <WebCore/ScriptDisallowedScope.h>
#import <WebCore/TextResourceDecoder.h>
#import <WebKitLegacy/DOMHTMLInputElement.h>
#import <wtf/Assertions.h>
#import <wtf/FixedVector.h>
#import <wtf/NeverDestroyed.h>
#import <wtf/StdLibExtras.h>
#import <wtf/cocoa/VectorCocoa.h>
#import <wtf/text/StringBuilder.h>
using namespace WebCore;
using namespace HTMLNames;
using JSC::Yarr::RegularExpression;
@interface WebHTMLRepresentationPrivate : NSObject {
@public
WebDataSource *dataSource;
BOOL hasSentResponseToPlugin;
BOOL includedInWebKitStatistics;
id <WebPluginManualLoader> manualLoader;
NSView *pluginView;
}
@end
@implementation WebHTMLRepresentationPrivate
@end
@implementation WebHTMLRepresentation
static RetainPtr<NSArray> createNSArray(const HashSet<String, ASCIICaseInsensitiveHash>& set)
{
auto vector = copyToVectorOf<NSString *>(set);
return adoptNS([[NSArray alloc] initWithObjects:vector.data() count:vector.size()]);
}
+ (NSArray *)supportedMIMETypes
{
static NeverDestroyed<RetainPtr<NSArray>> staticSupportedMIMETypes = [[[self supportedNonImageMIMETypes] arrayByAddingObjectsFromArray:
[self supportedImageMIMETypes]] arrayByAddingObjectsFromArray:[self supportedMediaMIMETypes]];
return staticSupportedMIMETypes.get().get();
}
+ (NSArray *)supportedMediaMIMETypes
{
static NeverDestroyed<RetainPtr<NSArray>> staticSupportedMediaMIMETypes = createNSArray(MIMETypeRegistry::supportedMediaMIMETypes());
return staticSupportedMediaMIMETypes.get().get();
}
+ (NSArray *)supportedNonImageMIMETypes
{
static NeverDestroyed<RetainPtr<NSArray>> staticSupportedNonImageMIMETypes = createNSArray(MIMETypeRegistry::supportedNonImageMIMETypes());
return staticSupportedNonImageMIMETypes.get().get();
}
+ (NSArray *)supportedImageMIMETypes
{
static NeverDestroyed<RetainPtr<NSArray>> staticSupportedImageMIMETypes = createNSArray(MIMETypeRegistry::supportedImageMIMETypes());
return staticSupportedImageMIMETypes.get().get();
}
+ (NSArray *)unsupportedTextMIMETypes
{
static NeverDestroyed<RetainPtr<NSArray>> staticUnsupportedTextMIMETypes = createNSArray(MIMETypeRegistry::unsupportedTextMIMETypes());
return staticUnsupportedTextMIMETypes.get().get();
}
- (id)init
{
self = [super init];
if (!self)
return nil;
_private = [[WebHTMLRepresentationPrivate alloc] init];
return self;
}
- (void)dealloc
{
if (_private && _private->includedInWebKitStatistics)
--WebHTMLRepresentationCount;
[_private release];
[super dealloc];
}
- (void)_redirectDataToManualLoader:(id<WebPluginManualLoader>)manualLoader forPluginView:(NSView *)pluginView
{
_private->manualLoader = manualLoader;
_private->pluginView = pluginView;
}
- (void)setDataSource:(WebDataSource *)dataSource
{
_private->dataSource = dataSource;
if (!_private->includedInWebKitStatistics && [[dataSource webFrame] _isIncludedInWebKitStatistics]) {
_private->includedInWebKitStatistics = YES;
++WebHTMLRepresentationCount;
}
}
- (BOOL)_isDisplayingWebArchive
{
return [[_private->dataSource _responseMIMEType] _webkit_isCaseInsensitiveEqualToString:@"application/x-webarchive"];
}
- (void)receivedData:(NSData *)data withDataSource:(WebDataSource *)dataSource
{
auto protectedSelf = retainPtr(self);
WebFrame *webFrame = [dataSource webFrame];
if (!webFrame)
return;
if (!_private->pluginView)
[webFrame _commitData:data];
// If the document is a stand-alone media document, now is the right time to cancel the WebKit load
Frame* coreFrame = core(webFrame);
if (coreFrame->document()->isMediaDocument() && coreFrame->loader().documentLoader())
coreFrame->loader().documentLoader()->cancelMainResourceLoad(coreFrame->loader().client().pluginWillHandleLoadError(coreFrame->loader().documentLoader()->response()));
if (_private->pluginView) {
if (!_private->hasSentResponseToPlugin) {
[_private->manualLoader pluginView:_private->pluginView receivedResponse:[dataSource response]];
_private->hasSentResponseToPlugin = YES;
}
[_private->manualLoader pluginView:_private->pluginView receivedData:data];
}
}
- (void)receivedError:(NSError *)error withDataSource:(WebDataSource *)dataSource
{
if (_private->pluginView) {
[_private->manualLoader pluginView:_private->pluginView receivedError:error];
}
}
- (void)finishedLoadingWithDataSource:(WebDataSource *)dataSource
{
WebFrame* webFrame = [dataSource webFrame];
if (_private->pluginView) {
[_private->manualLoader pluginViewFinishedLoading:_private->pluginView];
return;
}
if (!webFrame)
return;
WebView *webView = [webFrame webView];
if ([webView mainFrame] == webFrame && [webView isEditable])
core(webFrame)->editor().applyEditingStyleToBodyElement();
}
- (BOOL)canProvideDocumentSource
{
return [[_private->dataSource webFrame] _canProvideDocumentSource];
}
- (BOOL)canSaveAsWebArchive
{
return [[_private->dataSource webFrame] _canSaveAsWebArchive];
}
- (NSString *)documentSource
{
if ([self _isDisplayingWebArchive]) {
auto *parsedArchiveData = [_private->dataSource _documentLoader]->parsedArchiveData();
return adoptNS([[NSString alloc] initWithData:parsedArchiveData ? parsedArchiveData->createNSData().get() : nil encoding:NSUTF8StringEncoding]).autorelease();
}
Frame* coreFrame = core([_private->dataSource webFrame]);
if (!coreFrame)
return nil;
Document* document = coreFrame->document();
if (!document)
return nil;
TextResourceDecoder* decoder = document->decoder();
if (!decoder)
return nil;
NSData *data = [_private->dataSource data];
if (!data)
return nil;
return decoder->encoding().decode(reinterpret_cast<const char*>([data bytes]), [data length]);
}
- (NSString *)title
{
return nsStringNilIfEmpty([_private->dataSource _documentLoader]->title().string);
}
- (DOMDocument *)DOMDocument
{
return [[_private->dataSource webFrame] DOMDocument];
}
#if PLATFORM(MAC)
- (NSAttributedString *)attributedText
{
return nil;
}
- (NSAttributedString *)attributedStringFrom:(DOMNode *)startNode startOffset:(int)startOffset to:(DOMNode *)endNode endOffset:(int)endOffset
{
if (!startNode || !endNode)
return adoptNS([[NSAttributedString alloc] init]).autorelease();
auto range = SimpleRange { { *core(startNode), static_cast<unsigned>(startOffset) }, { *core(endNode), static_cast<unsigned>(endOffset) } };
return editingAttributedString(range).string.autorelease();
}
#endif
static HTMLFormElement* formElementFromDOMElement(DOMElement *element)
{
Element* node = core(element);
return node && node->hasTagName(formTag) ? static_cast<HTMLFormElement*>(node) : nullptr;
}
- (DOMElement *)elementWithName:(NSString *)name inForm:(DOMElement *)form
{
HTMLFormElement* formElement = formElementFromDOMElement(form);
if (!formElement)
return nil;
ScriptDisallowedScope::InMainThread scriptDisallowedScope;
AtomString targetName = name;
for (auto& weakElement : formElement->unsafeAssociatedElements()) {
RefPtr element { weakElement.get() };
if (element && element->asFormAssociatedElement()->name() == targetName)
return kit(element.get());
}
return nil;
}
static HTMLInputElement* inputElementFromDOMElement(DOMElement* element)
{
Element* node = core(element);
return dynamicDowncast<HTMLInputElement>(node);
}
- (BOOL)elementDoesAutoComplete:(DOMElement *)element
{
HTMLInputElement* inputElement = inputElementFromDOMElement(element);
return inputElement
&& inputElement->isTextField()
&& !inputElement->isPasswordField()
&& inputElement->shouldAutocomplete();
}
- (BOOL)elementIsPassword:(DOMElement *)element
{
HTMLInputElement* inputElement = inputElementFromDOMElement(element);
return inputElement && inputElement->isPasswordField();
}
- (DOMElement *)formForElement:(DOMElement *)element
{
auto inputElement = inputElementFromDOMElement(element);
return inputElement ? kit(inputElement->form()) : nil;
}
- (DOMElement *)currentForm
{
return kit(core([_private->dataSource webFrame])->selection().currentForm());
}
- (NSArray *)controlsInForm:(DOMElement *)form
{
auto formElement = formElementFromDOMElement(form);
if (!formElement)
return nil;
ScriptDisallowedScope::InMainThread scriptDisallowedScope;
auto result = createNSArray(formElement->unsafeAssociatedElements(), [] (auto& weakElement) -> DOMElement * {
RefPtr coreElement { weakElement.get() };
if (!coreElement || !coreElement->asFormAssociatedElement()->isEnumeratable()) // Skip option elements, other duds
return nil;
return kit(coreElement.get());
});
return [result count] ? result.autorelease() : nil;
}
// Either get cached regexp or build one that matches any of the labels.
// The regexp we build is of the form: (STR1|STR2|STRN)
static RegularExpression* regExpForLabels(NSArray *labels)
{
// All the ObjC calls in this method are simple array and string
// calls which we can assume do not raise exceptions
// Parallel arrays that we use to cache regExps. In practice the number of expressions
// that the app will use is equal to the number of locales is used in searching.
static const unsigned int regExpCacheSize = 4;
static NeverDestroyed<RetainPtr<NSMutableArray>> regExpLabels = adoptNS([[NSMutableArray alloc] initWithCapacity:regExpCacheSize]);
static NeverDestroyed<Vector<RegularExpression*>> regExps;
static NeverDestroyed<RegularExpression> wordRegExp("\\w");
RegularExpression* result;
CFIndex cacheHit = [regExpLabels.get() indexOfObject:labels];
if (cacheHit != NSNotFound)
result = regExps.get().at(cacheHit);
else {
StringBuilder pattern;
pattern.append('(');
unsigned numLabels = [labels count];
unsigned i;
for (i = 0; i < numLabels; i++) {
String label = [labels objectAtIndex:i];
bool startsWithWordCharacter = false;
bool endsWithWordCharacter = false;
if (label.length()) {
startsWithWordCharacter = wordRegExp.get().match(label.substring(0, 1)) >= 0;
endsWithWordCharacter = wordRegExp.get().match(label.substring(label.length() - 1, 1)) >= 0;
}
// Search for word boundaries only if label starts/ends with "word characters".
// If we always searched for word boundaries, this wouldn't work for languages such as Japanese.
pattern.append(i ? "|" : "", startsWithWordCharacter ? "\\b" : "", label, endsWithWordCharacter ? "\\b" : "");
}
pattern.append(')');
result = new RegularExpression(pattern.toString(), JSC::Yarr::TextCaseInsensitive);
}
// add regexp to the cache, making sure it is at the front for LRU ordering
if (cacheHit != 0) {
if (cacheHit != NSNotFound) {
// remove from old spot
[regExpLabels.get() removeObjectAtIndex:cacheHit];
regExps.get().remove(cacheHit);
}
// add to start
[regExpLabels.get() insertObject:labels atIndex:0];
regExps.get().insert(0, result);
// trim if too big
if ([regExpLabels.get() count] > regExpCacheSize) {
[regExpLabels.get() removeObjectAtIndex:regExpCacheSize];
RegularExpression* last = regExps.get().last();
regExps.get().removeLast();
delete last;
}
}
return result;
}
// FIXME: This should take an Element&.
static NSString* searchForLabelsBeforeElement(Frame* frame, NSArray* labels, Element* element, size_t* resultDistance, bool* resultIsInCellAbove)
{
ASSERT(element);
RegularExpression* regExp = regExpForLabels(labels);
// We stop searching after we've seen this many chars
constexpr unsigned charsSearchedThreshold = 500;
// This is the absolute max we search. We allow a little more slop than
// charsSearchedThreshold, to make it more likely that we'll search whole nodes.
constexpr unsigned maxCharsSearched = 600;
// If the starting element is within a table, the cell that contains it
HTMLTableCellElement* startingTableCell = nullptr;
bool searchedCellAbove = false;
if (resultDistance)
*resultDistance = notFound;
if (resultIsInCellAbove)
*resultIsInCellAbove = false;
// walk backwards in the node tree, until another element, or form, or end of tree
unsigned lengthSearched = 0;
Node* n;
for (n = NodeTraversal::previous(*element); n && lengthSearched < charsSearchedThreshold; n = NodeTraversal::previous(*n)) {
if (is<HTMLFormElement>(*n) || is<HTMLFormControlElement>(*n)) {
// We hit another form element or the start of the form - bail out
break;
}
if (n->hasTagName(tdTag) && !startingTableCell) {
startingTableCell = static_cast<HTMLTableCellElement*>(n);
} else if (n->hasTagName(trTag) && startingTableCell) {
NSString *result = frame->searchForLabelsAboveCell(*regExp, startingTableCell, resultDistance);
if ([result length]) {
if (resultIsInCellAbove)
*resultIsInCellAbove = true;
return result;
}
searchedCellAbove = true;
} else if (n->isTextNode() && n->renderer() && n->renderer()->style().visibility() == Visibility::Visible) {
// For each text chunk, run the regexp
String nodeString = n->nodeValue();
// add 100 for slop, to make it more likely that we'll search whole nodes
if (lengthSearched + nodeString.length() > maxCharsSearched)
nodeString = nodeString.right(charsSearchedThreshold - lengthSearched);
int pos = regExp->searchRev(nodeString);
if (pos >= 0) {
if (resultDistance)
*resultDistance = lengthSearched;
return nodeString.substring(pos, regExp->matchedLength());
}
lengthSearched += nodeString.length();
}
}
// If we started in a cell, but bailed because we found the start of the form or the
// previous element, we still might need to search the row above us for a label.
if (startingTableCell && !searchedCellAbove) {
NSString *result = frame->searchForLabelsAboveCell(*regExp, startingTableCell, resultDistance);
if ([result length]) {
if (resultIsInCellAbove)
*resultIsInCellAbove = true;
return result;
}
}
return nil;
}
static NSString *matchLabelsAgainstString(NSArray *labels, const String& stringToMatch)
{
if (stringToMatch.isEmpty())
return nil;
String mutableStringToMatch = stringToMatch;
// Make numbers and _'s in field names behave like word boundaries, e.g., "address2"
replace(mutableStringToMatch, RegularExpression("\\d"), " ");
mutableStringToMatch.replace('_', ' ');
RegularExpression* regExp = regExpForLabels(labels);
// Use the largest match we can find in the whole string
int pos;
int length;
int bestPos = -1;
int bestLength = -1;
int start = 0;
do {
pos = regExp->match(mutableStringToMatch, start);
if (pos != -1) {
length = regExp->matchedLength();
if (length >= bestLength) {
bestPos = pos;
bestLength = length;
}
start = pos + 1;
}
} while (pos != -1);
if (bestPos != -1)
return mutableStringToMatch.substring(bestPos, bestLength);
return nil;
}
static NSString *matchLabelsAgainstElement(NSArray *labels, Element* element)
{
if (!element)
return nil;
// Match against the name element, then against the id element if no match is found for the name element.
// See 7538330 for one popular site that benefits from the id element check.
auto resultFromNameAttribute = matchLabelsAgainstString(labels, element->attributeWithoutSynchronization(nameAttr));
if (resultFromNameAttribute.length)
return resultFromNameAttribute;
return matchLabelsAgainstString(labels, element->attributeWithoutSynchronization(idAttr));
}
- (NSString *)searchForLabels:(NSArray *)labels beforeElement:(DOMElement *)element
{
return [self searchForLabels:labels beforeElement:element resultDistance:nullptr resultIsInCellAbove:nullptr];
}
- (NSString *)searchForLabels:(NSArray *)labels beforeElement:(DOMElement *)element resultDistance:(NSUInteger*)outDistance resultIsInCellAbove:(BOOL*)outIsInCellAbove
{
size_t distance;
bool isInCellAbove;
NSString *result = searchForLabelsBeforeElement(core([_private->dataSource webFrame]), labels, core(element), &distance, &isInCellAbove);
if (outDistance) {
if (distance == notFound)
*outDistance = NSNotFound;
else
*outDistance = distance;
}
if (outIsInCellAbove)
*outIsInCellAbove = isInCellAbove;
return result;
}
- (NSString *)matchLabels:(NSArray *)labels againstElement:(DOMElement *)element
{
return matchLabelsAgainstElement(labels, core(element));
}
@end