[Search] Generic Worker Performance Tweaks

The generic search worker now does indexing work during the index operation,
ensuring that queries do not have to do extraneous or repeat calculations.

Change the return format slightly and fixed a bug in the GenericSearchProvider
which caused more objects than intended to be returned from the provider.
This commit is contained in:
Pete Richards 2015-10-16 12:39:41 -07:00
parent 9ad860babd
commit b5505f372f
2 changed files with 123 additions and 121 deletions

View File

@ -96,36 +96,33 @@ define(
// Handles responses from the web worker. Namely, the results of
// a search request.
function handleResponse(event) {
var ids = [],
id;
if (event.data.request !== 'search') {
return; // no idea how to handle anything else.
}
// If we have the results from a search
if (event.data.request === 'search') {
// Convert the ids given from the web worker into domain objects
for (id in event.data.results) {
ids.push(id);
}
objectService.getObjects(ids).then(function (objects) {
var searchResults = [],
id;
var workerResults = event.data.results,
ids = Object.keys(workerResults);
// Create searchResult objects
for (id in objects) {
searchResults.push({
object: objects[id],
id: id,
score: event.data.results[id]
objectService
.getObjects(ids)
.then(function (objects) {
var searchResults = Object
.keys(objects)
.map(function (id) {
return {
object: objects[id],
id: id,
score: workerResults[id].matchCount
};
});
}
// Resove the promise corresponding to this
pendingQueries[event.data.timestamp].resolve({
hits: searchResults,
total: event.data.total,
total: searchResults.length,
timedOut: event.data.timedOut
});
});
}
}
function requestAndIndex(id) {
@ -212,7 +209,7 @@ define(
var message = {
request: 'search',
input: searchInput,
maxNumber: maxResults,
maxResults: maxResults,
timestamp: timestamp,
timeout: timeout
};

View File

@ -26,78 +26,55 @@
*/
(function () {
"use strict";
// An array of objects composed of domain object IDs and models
// {id: domainObject's ID, model: domainObject's model}
var indexedItems = [];
// Helper function for serach()
function convertToTerms(input) {
var terms = input;
// Shave any spaces off of the ends of the input
while (terms.substr(0, 1) === ' ') {
terms = terms.substring(1, terms.length);
}
while (terms.substr(terms.length - 1, 1) === ' ') {
terms = terms.substring(0, terms.length - 1);
}
// Then split it at spaces and asterisks
terms = terms.split(/ |\*/);
// Remove any empty strings from the terms
while (terms.indexOf('') !== -1) {
terms.splice(terms.indexOf(''), 1);
}
return terms;
var indexedItems = [],
TERM_SPLITTER = /[ _\*]/;
function indexItem(id, model) {
var vector = {
name: model.name
};
vector.cleanName = model.name.trim();
vector.lowerCaseName = vector.cleanName.toLocaleLowerCase();
vector.terms = vector.lowerCaseName.split(TERM_SPLITTER);
indexedItems.push({
id: id,
vector: vector,
model: model
});
}
// Helper function for search()
function scoreItem(item, input, terms) {
var name = item.model.name.toLocaleLowerCase(),
weight = 0.65,
score = 0.0,
i;
// Make the score really big if the item name and
// the original search input are the same
if (name === input) {
score = 42;
}
for (i = 0; i < terms.length; i += 1) {
// Increase the score if the term is in the item name
if (name.indexOf(terms[i]) !== -1) {
score += 1;
// Add extra to the score if the search term exists
// as its own term within the items
if (name.split(' ').indexOf(terms[i]) !== -1) {
score += 0.5;
}
}
}
return score * weight;
function convertToTerms(input) {
var query = {
exactInput: input
};
query.inputClean = input.trim();
query.inputLowerCase = query.inputClean.toLocaleLowerCase();
query.terms = query.inputLowerCase.split(TERM_SPLITTER);
query.exactTerms = query.inputClean.split(TERM_SPLITTER);
return query;
}
/**
/**
* Gets search results from the indexedItems based on provided search
* input. Returns matching results from indexedItems, as well as the
* timestamp that was passed to it.
*
*
* @param data An object which contains:
* * input: The original string which we are searching with
* * maxNumber: The maximum number of search results desired
* * maxResults: The maximum number of search results desired
* * timestamp: The time identifier from when the query was made
*/
function search(data) {
// This results dictionary will have domain object ID keys which
// point to the value the domain object's score.
var results = {},
input = data.input.toLocaleLowerCase(),
terms = convertToTerms(input),
// This results dictionary will have domain object ID keys which
// point to the value the domain object's score.
var results,
input = data.input,
query = convertToTerms(input),
message = {
request: 'search',
results: {},
@ -105,54 +82,82 @@
timestamp: data.timestamp,
timedOut: false
},
score,
i,
id;
// If the user input is empty, we want to have no search results.
if (input !== '') {
for (i = 0; i < indexedItems.length; i += 1) {
// If this is taking too long, then stop
if (Date.now() > data.timestamp + data.timeout) {
message.timedOut = true;
break;
}
// Score and add items
score = scoreItem(indexedItems[i], input, terms);
if (score > 0) {
results[indexedItems[i].id] = score;
message.total += 1;
}
}
matches = {};
if (!query.inputClean) {
// No search terms, no results;
return message;
}
// Truncate results if there are more than maxResults
if (message.total > data.maxResults) {
i = 0;
for (id in results) {
message.results[id] = results[id];
i += 1;
if (i >= data.maxResults) {
break;
// Two phases: find matches, then score matches.
// Idea being that match finding should be fast, so that future scoring
// operations process fewer objects.
query.terms.forEach(function findMatchingItems(term) {
indexedItems
.filter(function matchesItem(item) {
return item.vector.lowerCaseName.indexOf(term) !== -1;
})
.forEach(function trackMatch(matchedItem) {
if (!matches[matchedItem.id]) {
matches[matchedItem.id] = {
matchCount: 0,
item: matchedItem
};
}
matches[matchedItem.id].matchCount += 1;
});
});
// Then, score matching items.
results = Object
.keys(matches)
.map(function asMatches(matchId) {
return matches[matchId];
})
.map(function prioritizeExactMatches(match) {
if (match.item.vector.name === query.exactInput) {
match.matchCount += 100;
} else if (match.item.vector.lowerCaseName ===
query.inputLowerCase) {
match.matchCount += 50;
}
}
// TODO: This seems inefficient.
} else {
message.results = results;
}
return match;
})
.map(function prioritizeCompleteTermMatches(match) {
match.item.vector.terms.forEach(function (term) {
if (query.terms.indexOf(term) !== -1) {
match.matchCount += 0.5;
}
});
return match;
})
.sort(function compare(a, b) {
if (a.matchCount > b.matchCount) {
return -1;
}
if (a.matchCount < b.matchCount) {
return 1;
}
return 0;
});
message.total = results.length;
message.results = results
.slice(0, data.maxResults)
.reduce(function arrayToObject(obj, match) {
obj[match.item.id] = match;
return obj;
}, {});
return message;
}
self.onmessage = function (event) {
if (event.data.request === 'index') {
indexedItems.push({
id: event.data.id,
model: event.data.model
});
indexItem(event.data.id, event.data.model);
} else if (event.data.request === 'search') {
self.postMessage(search(event.data));
}
};
}());
}());