[Search] Generic Worker Performance Tweaks

The generic search worker now does indexing work during the index operation,
ensuring that queries do not have to do extraneous or repeat calculations.

Change the return format slightly and fixed a bug in the GenericSearchProvider
which caused more objects than intended to be returned from the provider.
This commit is contained in:
Pete Richards 2015-10-16 12:39:41 -07:00
parent 9ad860babd
commit b5505f372f
2 changed files with 123 additions and 121 deletions

View File

@ -96,36 +96,33 @@ define(
// Handles responses from the web worker. Namely, the results of // Handles responses from the web worker. Namely, the results of
// a search request. // a search request.
function handleResponse(event) { function handleResponse(event) {
var ids = [], if (event.data.request !== 'search') {
id; return; // no idea how to handle anything else.
}
// If we have the results from a search var workerResults = event.data.results,
if (event.data.request === 'search') { ids = Object.keys(workerResults);
// Convert the ids given from the web worker into domain objects
for (id in event.data.results) {
ids.push(id);
}
objectService.getObjects(ids).then(function (objects) {
var searchResults = [],
id;
// Create searchResult objects objectService
for (id in objects) { .getObjects(ids)
searchResults.push({ .then(function (objects) {
object: objects[id], var searchResults = Object
id: id, .keys(objects)
score: event.data.results[id] .map(function (id) {
return {
object: objects[id],
id: id,
score: workerResults[id].matchCount
};
}); });
}
// Resove the promise corresponding to this // Resove the promise corresponding to this
pendingQueries[event.data.timestamp].resolve({ pendingQueries[event.data.timestamp].resolve({
hits: searchResults, hits: searchResults,
total: event.data.total, total: searchResults.length,
timedOut: event.data.timedOut timedOut: event.data.timedOut
}); });
}); });
}
} }
function requestAndIndex(id) { function requestAndIndex(id) {
@ -212,7 +209,7 @@ define(
var message = { var message = {
request: 'search', request: 'search',
input: searchInput, input: searchInput,
maxNumber: maxResults, maxResults: maxResults,
timestamp: timestamp, timestamp: timestamp,
timeout: timeout timeout: timeout
}; };

View File

@ -26,78 +26,55 @@
*/ */
(function () { (function () {
"use strict"; "use strict";
// An array of objects composed of domain object IDs and models // An array of objects composed of domain object IDs and models
// {id: domainObject's ID, model: domainObject's model} // {id: domainObject's ID, model: domainObject's model}
var indexedItems = []; var indexedItems = [],
TERM_SPLITTER = /[ _\*]/;
// Helper function for serach()
function convertToTerms(input) { function indexItem(id, model) {
var terms = input; var vector = {
// Shave any spaces off of the ends of the input name: model.name
while (terms.substr(0, 1) === ' ') { };
terms = terms.substring(1, terms.length); vector.cleanName = model.name.trim();
} vector.lowerCaseName = vector.cleanName.toLocaleLowerCase();
while (terms.substr(terms.length - 1, 1) === ' ') { vector.terms = vector.lowerCaseName.split(TERM_SPLITTER);
terms = terms.substring(0, terms.length - 1);
} indexedItems.push({
id: id,
// Then split it at spaces and asterisks vector: vector,
terms = terms.split(/ |\*/); model: model
});
// Remove any empty strings from the terms
while (terms.indexOf('') !== -1) {
terms.splice(terms.indexOf(''), 1);
}
return terms;
} }
// Helper function for search() // Helper function for search()
function scoreItem(item, input, terms) { function convertToTerms(input) {
var name = item.model.name.toLocaleLowerCase(), var query = {
weight = 0.65, exactInput: input
score = 0.0, };
i; query.inputClean = input.trim();
query.inputLowerCase = query.inputClean.toLocaleLowerCase();
// Make the score really big if the item name and query.terms = query.inputLowerCase.split(TERM_SPLITTER);
// the original search input are the same query.exactTerms = query.inputClean.split(TERM_SPLITTER);
if (name === input) { return query;
score = 42;
}
for (i = 0; i < terms.length; i += 1) {
// Increase the score if the term is in the item name
if (name.indexOf(terms[i]) !== -1) {
score += 1;
// Add extra to the score if the search term exists
// as its own term within the items
if (name.split(' ').indexOf(terms[i]) !== -1) {
score += 0.5;
}
}
}
return score * weight;
} }
/** /**
* Gets search results from the indexedItems based on provided search * Gets search results from the indexedItems based on provided search
* input. Returns matching results from indexedItems, as well as the * input. Returns matching results from indexedItems, as well as the
* timestamp that was passed to it. * timestamp that was passed to it.
* *
* @param data An object which contains: * @param data An object which contains:
* * input: The original string which we are searching with * * input: The original string which we are searching with
* * maxNumber: The maximum number of search results desired * * maxResults: The maximum number of search results desired
* * timestamp: The time identifier from when the query was made * * timestamp: The time identifier from when the query was made
*/ */
function search(data) { function search(data) {
// This results dictionary will have domain object ID keys which // This results dictionary will have domain object ID keys which
// point to the value the domain object's score. // point to the value the domain object's score.
var results = {}, var results,
input = data.input.toLocaleLowerCase(), input = data.input,
terms = convertToTerms(input), query = convertToTerms(input),
message = { message = {
request: 'search', request: 'search',
results: {}, results: {},
@ -105,54 +82,82 @@
timestamp: data.timestamp, timestamp: data.timestamp,
timedOut: false timedOut: false
}, },
score, matches = {};
i,
id; if (!query.inputClean) {
// No search terms, no results;
// If the user input is empty, we want to have no search results. return message;
if (input !== '') {
for (i = 0; i < indexedItems.length; i += 1) {
// If this is taking too long, then stop
if (Date.now() > data.timestamp + data.timeout) {
message.timedOut = true;
break;
}
// Score and add items
score = scoreItem(indexedItems[i], input, terms);
if (score > 0) {
results[indexedItems[i].id] = score;
message.total += 1;
}
}
} }
// Truncate results if there are more than maxResults // Two phases: find matches, then score matches.
if (message.total > data.maxResults) { // Idea being that match finding should be fast, so that future scoring
i = 0; // operations process fewer objects.
for (id in results) {
message.results[id] = results[id]; query.terms.forEach(function findMatchingItems(term) {
i += 1; indexedItems
if (i >= data.maxResults) { .filter(function matchesItem(item) {
break; return item.vector.lowerCaseName.indexOf(term) !== -1;
})
.forEach(function trackMatch(matchedItem) {
if (!matches[matchedItem.id]) {
matches[matchedItem.id] = {
matchCount: 0,
item: matchedItem
};
}
matches[matchedItem.id].matchCount += 1;
});
});
// Then, score matching items.
results = Object
.keys(matches)
.map(function asMatches(matchId) {
return matches[matchId];
})
.map(function prioritizeExactMatches(match) {
if (match.item.vector.name === query.exactInput) {
match.matchCount += 100;
} else if (match.item.vector.lowerCaseName ===
query.inputLowerCase) {
match.matchCount += 50;
} }
} return match;
// TODO: This seems inefficient. })
} else { .map(function prioritizeCompleteTermMatches(match) {
message.results = results; match.item.vector.terms.forEach(function (term) {
} if (query.terms.indexOf(term) !== -1) {
match.matchCount += 0.5;
}
});
return match;
})
.sort(function compare(a, b) {
if (a.matchCount > b.matchCount) {
return -1;
}
if (a.matchCount < b.matchCount) {
return 1;
}
return 0;
});
message.total = results.length;
message.results = results
.slice(0, data.maxResults)
.reduce(function arrayToObject(obj, match) {
obj[match.item.id] = match;
return obj;
}, {});
return message; return message;
} }
self.onmessage = function (event) { self.onmessage = function (event) {
if (event.data.request === 'index') { if (event.data.request === 'index') {
indexedItems.push({ indexItem(event.data.id, event.data.model);
id: event.data.id,
model: event.data.model
});
} else if (event.data.request === 'search') { } else if (event.data.request === 'search') {
self.postMessage(search(event.data)); self.postMessage(search(event.data));
} }
}; };
}()); }());