results.webkit.org: Handle duplicate archives
https://bugs.webkit.org/show_bug.cgi?id=204860
Reviewed by Stephanie Lewis.
* resultsdbpy/resultsdbpy/controller/archive_controller.py:
(ArchiveController): Pass test time to ArchiveContext, de-duplicate any
identical archives.
* resultsdbpy/resultsdbpy/model/archive_context.py:
(ArchiveContext): Only upack identical archives once, pass digest to caller.
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@253132 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/Tools/ChangeLog b/Tools/ChangeLog
index f322aa3..ace151c 100644
--- a/Tools/ChangeLog
+++ b/Tools/ChangeLog
@@ -1,5 +1,18 @@
2019-12-04 Jonathan Bedard <jbedard@apple.com>
+ results.webkit.org: Handle duplicate archives
+ https://bugs.webkit.org/show_bug.cgi?id=204860
+
+ Reviewed by Stephanie Lewis.
+
+ * resultsdbpy/resultsdbpy/controller/archive_controller.py:
+ (ArchiveController): Pass test time to ArchiveContext, de-duplicate any
+ identical archives.
+ * resultsdbpy/resultsdbpy/model/archive_context.py:
+ (ArchiveContext): Only upack identical archives once, pass digest to caller.
+
+2019-12-04 Jonathan Bedard <jbedard@apple.com>
+
Python 3: Add support in webkitpy.benchmark_runner
https://bugs.webkit.org/show_bug.cgi?id=204784
diff --git a/Tools/resultsdbpy/resultsdbpy/controller/archive_controller.py b/Tools/resultsdbpy/resultsdbpy/controller/archive_controller.py
index 04e0451..93f5b9f 100644
--- a/Tools/resultsdbpy/resultsdbpy/controller/archive_controller.py
+++ b/Tools/resultsdbpy/resultsdbpy/controller/archive_controller.py
@@ -20,6 +20,7 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import hashlib
import io
import json
import time
@@ -66,19 +67,22 @@
suites = set(suite)
result = None
- filename = 'archive.zip'
+ filename = None
+ digest = None
with self.archive_context, self.upload_context:
for suite in suites:
for configuration, archives in self.archive_context.find_archive(
configurations=configurations, suite=suite, branch=branch[0],
begin=begin, end=end, recent=recent, limit=2,
+ begin_query_time=begin_query_time, end_query_time=end_query_time,
).items():
for archive in archives:
- if archive.get('archive'):
- if result:
+ if archive.get('archive') and archive.get('digest'):
+ if digest and digest != archive.get('digest'):
abort(400, description='Multiple archives matching the specified criteria')
result = archive.get('archive')
filename = f'{configuration}@{archive["uuid"]}'.replace(' ', '_').replace('.', '-')
+ digest = archive.get('digest')
if not result:
abort(404, description='No archives matching the specified criteria')
diff --git a/Tools/resultsdbpy/resultsdbpy/model/archive_context.py b/Tools/resultsdbpy/resultsdbpy/model/archive_context.py
index 5874de4..e726caf 100644
--- a/Tools/resultsdbpy/resultsdbpy/model/archive_context.py
+++ b/Tools/resultsdbpy/resultsdbpy/model/archive_context.py
@@ -177,35 +177,40 @@
if memory_used > self.MEMORY_LIMIT:
raise RuntimeError('Hit soft-memory cap when fetching archives, aborting')
+ archive_by_digest = {}
result = {}
for config, values in metadata_by_config.items():
for value in values:
if not value.get('digest'):
continue
- rows = self.cassandra.select_from_table(
- self.ArchiveChunks.__table_name__,
- digest=value.get('digest'),
- limit=1 + int(value.get('size', 0) / self.CHUNK_SIZE),
- )
- if len(rows) == 0:
- continue
+ if not archive_by_digest.get(value.get('digest')):
+ rows = self.cassandra.select_from_table(
+ self.ArchiveChunks.__table_name__,
+ digest=value.get('digest'),
+ limit=1 + int(value.get('size', 0) / self.CHUNK_SIZE),
+ )
+ if len(rows) == 0:
+ continue
- digest = hashlib.md5()
- archive = io.BytesIO()
- archive_size = 0
- for row in rows:
- archive_size += len(row.chunk)
- digest.update(row.chunk)
- archive.write(row.chunk)
+ digest = hashlib.md5()
+ archive = io.BytesIO()
+ archive_size = 0
+ for row in rows:
+ archive_size += len(row.chunk)
+ digest.update(row.chunk)
+ archive.write(row.chunk)
- if archive_size != value.get('size', 0) or value.get('digest', '') != digest.hexdigest():
- raise RuntimeError('Failed to reconstruct archive from chunks')
+ if archive_size != value.get('size', 0) or value.get('digest', '') != digest.hexdigest():
+ raise RuntimeError('Failed to reconstruct archive from chunks')
- archive.seek(0)
+ archive_by_digest[value.get('digest')] = archive
+
+ archive_by_digest.get(value.get('digest')).seek(0)
result.setdefault(config, [])
result[config].append(dict(
- archive=archive,
+ archive=archive_by_digest.get(value.get('digest')),
+ digest=digest.hexdigest(),
uuid=value['uuid'],
start_time=value['start_time'],
))