Author: jonesde
Date: Mon Dec 11 00:57:02 2006 New Revision: 485561 URL: http://svn.apache.org/viewvc?view=rev&rev=485561 Log: Refactored KeywordSearch class to move some more generic search code and properties to the common component Added: incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties (with props) incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java (with props) Modified: incubator/ofbiz/trunk/applications/product/config/prodsearch.properties incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java Modified: incubator/ofbiz/trunk/applications/product/config/prodsearch.properties URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/applications/product/config/prodsearch.properties?view=diff&rev=485561&r1=485560&r2=485561 ============================================================================== --- incubator/ofbiz/trunk/applications/product/config/prodsearch.properties (original) +++ incubator/ofbiz/trunk/applications/product/config/prodsearch.properties Mon Dec 11 00:57:02 2006 @@ -1,5 +1,4 @@ ##################################################################### -# # Copyright 2001-2006 The Apache Software Foundation # # Licensed under the Apache License, Version 2.0 (the "License"); you may not @@ -15,23 +14,8 @@ # under the License. ##################################################################### #### -# OFBiz Search Settings +# OFBiz Product Search Settings #### - -# The stop word bags contain words to be removed from search keyword list -# These should be colon separated and the list should start and end with colons -# The words should all be lower case -# The .or is for OR searches and the .and for AND searches -stop.word.bag.or=:the:and:or:not:if:is:it:of:to:a:as:at:in:into:on:onto:so:but:me:you:your:yes:no:this:that:there:their:because:for:while:with:without:get:put:have:has:do:does:same:different:use:using: -stop.word.bag.and=:the:and:or:not:if:is:it:of:to:a:as:at:in:into:on:onto:so:but:me:you:your:yes:no:this:that:there:their:because:for:while:with:without:get:put:have:has:do:does:same:different:use:using: - -# The stem bag is used to remove suffixes from words passed in the search string and found while indexing -# IF the remove.stems properties is true -remove.stems=true -stem.bag=:s:ies:y: - -# Characters that should be used as token separators when pulling out keywords -index.keyword.separators=;: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_ # Assign a weight to each product keyword source during indexing/keywork inuduction index.weight.Product.productId=1 Modified: incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java?view=diff&rev=485561&r1=485560&r2=485561 ============================================================================== --- incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java (original) +++ incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java Mon Dec 11 00:57:02 2006 @@ -32,6 +32,7 @@ import org.ofbiz.base.util.UtilDateTime; import org.ofbiz.base.util.UtilMisc; import org.ofbiz.base.util.UtilProperties; +import org.ofbiz.common.KeywordSearchUtil; import org.ofbiz.content.data.DataResourceWorker; import org.ofbiz.entity.GenericDelegator; import org.ofbiz.entity.GenericEntityException; @@ -68,11 +69,11 @@ String productId = product.getString("productId"); // get these in advance just once since they will be used many times for the multiple strings to index - String separators = KeywordSearch.getSeparators(); - String stopWordBagOr = KeywordSearch.getStopWordBagOr(); - String stopWordBagAnd = KeywordSearch.getStopWordBagAnd(); - boolean removeStems = KeywordSearch.getRemoveStems(); - Set stemSet = KeywordSearch.getStemSet(); + String separators = KeywordSearchUtil.getSeparators(); + String stopWordBagOr = KeywordSearchUtil.getStopWordBagOr(); + String stopWordBagAnd = KeywordSearchUtil.getStopWordBagAnd(); + boolean removeStems = KeywordSearchUtil.getRemoveStems(); + Set stemSet = KeywordSearchUtil.getStemSet(); Map keywords = new TreeMap(); List strings = new ArrayList(50); @@ -176,7 +177,7 @@ while (strIter.hasNext()) { String str = (String) strIter.next(); // call process keywords method here - KeywordSearch.processKeywordsForIndex(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems, stemSet); + KeywordSearchUtil.processKeywordsForIndex(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems, stemSet); } List toBeStored = new LinkedList(); Modified: incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java?view=diff&rev=485561&r1=485560&r2=485561 ============================================================================== --- incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java (original) +++ incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java Mon Dec 11 00:57:02 2006 @@ -16,227 +16,15 @@ */ package org.ofbiz.product.product; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.StringTokenizer; -import java.util.TreeSet; - -import org.ofbiz.base.util.Debug; -import org.ofbiz.base.util.UtilMisc; -import org.ofbiz.base.util.UtilProperties; -import org.ofbiz.base.util.UtilValidate; -import org.ofbiz.entity.GenericDelegator; import org.ofbiz.entity.GenericEntityException; import org.ofbiz.entity.GenericValue; /** - * Does a product search by keyword using the PRODUCT_KEYWORD table. - * <br/>Special thanks to Glen Thorne and the Weblogic Commerce Server for ideas. + * These are left over utlity methods from the product search code, just calling over to KeywordIndex now; can probably remove soon. */ public class KeywordSearch { public static final String module = KeywordSearch.class.getName(); - - public static Set thesaurusRelsToInclude = new HashSet(); - public static Set thesaurusRelsForReplace = new HashSet(); - - static { - thesaurusRelsToInclude.add("KWTR_UF"); - thesaurusRelsToInclude.add("KWTR_USE"); - thesaurusRelsToInclude.add("KWTR_CS"); - thesaurusRelsToInclude.add("KWTR_NT"); - thesaurusRelsToInclude.add("KWTR_BT"); - thesaurusRelsToInclude.add("KWTR_RT"); - - thesaurusRelsForReplace.add("KWTR_USE"); - thesaurusRelsForReplace.add("KWTR_CS"); - } - - public static String getSeparators() { - // String separators = ";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_"; - String seps = UtilProperties.getPropertyValue("prodsearch", "index.keyword.separators", ";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_"); - return seps; - } - - public static String getStopWordBagOr() { - return UtilProperties.getPropertyValue("prodsearch", "stop.word.bag.or"); - } - public static String getStopWordBagAnd() { - return UtilProperties.getPropertyValue("prodsearch", "stop.word.bag.and"); - } - - public static boolean getRemoveStems() { - String removeStemsStr = UtilProperties.getPropertyValue("prodsearch", "remove.stems"); - return "true".equals(removeStemsStr); - } - public static Set getStemSet() { - String stemBag = UtilProperties.getPropertyValue("prodsearch", "stem.bag"); - Set stemSet = new TreeSet(); - if (UtilValidate.isNotEmpty(stemBag)) { - String curToken; - StringTokenizer tokenizer = new StringTokenizer(stemBag, ": "); - while (tokenizer.hasMoreTokens()) { - curToken = tokenizer.nextToken(); - stemSet.add(curToken); - } - } - return stemSet; - } - - public static void processForKeywords(String str, Map keywords, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) { - String separators = getSeparators(); - String stopWordBagOr = getStopWordBagOr(); - String stopWordBagAnd = getStopWordBagAnd(); - - boolean removeStems = getRemoveStems(); - Set stemSet = getStemSet(); - - processForKeywords(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, forSearch, anyPrefix, anySuffix, isAnd); - } - - public static void processKeywordsForIndex(String str, Map keywords, String separators, String stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet) { - processForKeywords(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, false, false, false, false); - } - - public static void processForKeywords(String str, Map keywords, String separators, String stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) { - Set keywordSet = makeKeywordSet(str, separators, forSearch); - fixupKeywordSet(keywordSet, keywords, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, forSearch, anyPrefix, anySuffix, isAnd); - } - - public static void fixupKeywordSet(Set keywordSet, Map keywords, String stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) { - if (keywordSet == null) { - return; - } - - Iterator keywordIter = keywordSet.iterator(); - while (keywordIter.hasNext()) { - String token = (String) keywordIter.next(); - - // when cleaning up the tokens the ordering is inportant: check stop words, remove stems, then get rid of 1 character tokens (1 digit okay) - - // check stop words - String colonToken = ":" + token + ":"; - if (forSearch) { - if ((isAnd && stopWordBagAnd.indexOf(colonToken) >= 0) || (!isAnd && stopWordBagOr.indexOf(colonToken) >= 0)) { - continue; - } - } else { - if (stopWordBagOr.indexOf(colonToken) >= 0 && stopWordBagAnd.indexOf(colonToken) >= 0) { - continue; - } - } - - // remove stems - if (removeStems) { - Iterator stemIter = stemSet.iterator(); - while (stemIter.hasNext()) { - String stem = (String) stemIter.next(); - if (token.endsWith(stem)) { - token = token.substring(0, token.length() - stem.length()); - } - } - } - - // get rid of all length 0 tokens now - if (token.length() == 0) { - continue; - } - - // get rid of all length 1 character only tokens, pretty much useless - if (token.length() == 1 && Character.isLetter(token.charAt(0))) { - continue; - } - - if (forSearch) { - StringBuffer strSb = new StringBuffer(); - if (anyPrefix) strSb.append('%'); - strSb.append(token); - if (anySuffix) strSb.append('%'); - // replace all %% with % - int dblPercIdx = -1; - while ((dblPercIdx = strSb.indexOf("%%")) >= 0) { - //Debug.logInfo("before strSb: " + strSb, module); - strSb.replace(dblPercIdx, dblPercIdx+2, "%"); - //Debug.logInfo("after strSb: " + strSb, module); - } - token = strSb.toString(); - } - - // group by word, add up weight - Long curWeight = (Long) keywords.get(token); - if (curWeight == null) { - keywords.put(token, new Long(1)); - } else { - keywords.put(token, new Long(curWeight.longValue() + 1)); - } - } - } - - public static Set makeKeywordSet(String str, String separators, boolean forSearch) { - if (separators == null) separators = getSeparators(); - - Set keywords = new TreeSet(); - if (str.length() > 0) { - if (forSearch) { - // remove %_*? from separators if is for a search - StringBuffer sb = new StringBuffer(separators); - if (sb.indexOf("%") >= 0) sb.deleteCharAt(sb.indexOf("%")); - if (sb.indexOf("_") >= 0) sb.deleteCharAt(sb.indexOf("_")); - if (sb.indexOf("*") >= 0) sb.deleteCharAt(sb.indexOf("*")); - if (sb.indexOf("?") >= 0) sb.deleteCharAt(sb.indexOf("?")); - separators = sb.toString(); - } - - StringTokenizer tokener = new StringTokenizer(str, separators, false); - while (tokener.hasMoreTokens()) { - // make sure it is lower case before doing anything else - String token = tokener.nextToken().toLowerCase(); - - if (forSearch) { - // these characters will only be present if it is for a search, ie not for indexing - token = token.replace('*', '%'); - token = token.replace('?', '_'); - } - - keywords.add(token); - } - } - return keywords; - } - - public static Set fixKeywordsForSearch(Set keywordSet, boolean anyPrefix, boolean anySuffix, boolean removeStems, boolean isAnd) { - Map keywords = new HashMap(); - fixupKeywordSet(keywordSet, keywords, getStopWordBagAnd(), getStopWordBagOr(), removeStems, getStemSet(), true, anyPrefix, anySuffix, isAnd); - return keywords.keySet(); - } - - public static boolean expandKeywordForSearch(String enteredKeyword, Set addToSet, GenericDelegator delegator) { - boolean replaceEnteredKeyword = false; - - try { - List thesaurusList = delegator.findByAndCache("KeywordThesaurus", UtilMisc.toMap("enteredKeyword", enteredKeyword)); - Iterator thesaurusIter = thesaurusList.iterator(); - while (thesaurusIter.hasNext()) { - GenericValue keywordThesaurus = (GenericValue) thesaurusIter.next(); - String relationshipEnumId = (String) keywordThesaurus.get("relationshipEnumId"); - if (thesaurusRelsToInclude.contains(relationshipEnumId)) { - addToSet.addAll(makeKeywordSet(keywordThesaurus.getString("alternateKeyword"), null, true)); - if (thesaurusRelsForReplace.contains(relationshipEnumId)) { - replaceEnteredKeyword = true; - } - } - } - } catch (GenericEntityException e) { - Debug.logError(e, "Error expanding entered keyword", module); - } - - Debug.logInfo("Expanded keyword [" + enteredKeyword + "], got set: " + addToSet, module); - return replaceEnteredKeyword; - } public static void induceKeywords(GenericValue product) throws GenericEntityException { if (product == null) return; Modified: incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java?view=diff&rev=485561&r1=485560&r2=485561 ============================================================================== --- incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java (original) +++ incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java Mon Dec 11 00:57:02 2006 @@ -33,6 +33,7 @@ import org.ofbiz.base.util.UtilMisc; import org.ofbiz.base.util.UtilProperties; import org.ofbiz.base.util.UtilValidate; +import org.ofbiz.common.KeywordSearchUtil; import org.ofbiz.entity.GenericDelegator; import org.ofbiz.entity.GenericEntityException; import org.ofbiz.entity.GenericValue; @@ -840,7 +841,7 @@ } public Set makeFullKeywordSet(GenericDelegator delegator) { - Set keywordSet = KeywordSearch.makeKeywordSet(this.keywordsString, null, true); + Set keywordSet = KeywordSearchUtil.makeKeywordSet(this.keywordsString, null, true); Set fullKeywordSet = new TreeSet(); // expand the keyword list according to the thesaurus and create a new set of keywords @@ -848,7 +849,7 @@ while (keywordIter.hasNext()) { String keyword = (String) keywordIter.next(); Set expandedSet = new TreeSet(); - boolean replaceEntered = KeywordSearch.expandKeywordForSearch(keyword, expandedSet, delegator); + boolean replaceEntered = KeywordSearchUtil.expandKeywordForSearch(keyword, expandedSet, delegator); fullKeywordSet.addAll(expandedSet); if (!replaceEntered) { fullKeywordSet.add(keyword); @@ -867,18 +868,18 @@ //but then the sets should be and'ed to produce the overall expression; create the SQL for this //needs some work as the current method only support a list of and'ed words and a list of or'ed words, not //a list of or'ed sets to be and'ed together - Set keywordSet = KeywordSearch.makeKeywordSet(this.keywordsString, null, true); + Set keywordSet = KeywordSearchUtil.makeKeywordSet(this.keywordsString, null, true); // expand the keyword list according to the thesaurus and create a new set of keywords Iterator keywordIter = keywordSet.iterator(); while (keywordIter.hasNext()) { String keyword = (String) keywordIter.next(); Set expandedSet = new TreeSet(); - boolean replaceEntered = KeywordSearch.expandKeywordForSearch(keyword, expandedSet, productSearchContext.getDelegator()); + boolean replaceEntered = KeywordSearchUtil.expandKeywordForSearch(keyword, expandedSet, productSearchContext.getDelegator()); if (!replaceEntered) { expandedSet.add(keyword); } - Set fixedSet = KeywordSearch.fixKeywordsForSearch(expandedSet, anyPrefix, anySuffix, removeStems, isAnd); + Set fixedSet = KeywordSearchUtil.fixKeywordsForSearch(expandedSet, anyPrefix, anySuffix, removeStems, isAnd); Set fixedKeywordSet = new HashSet(); fixedKeywordSet.addAll(fixedSet); productSearchContext.keywordFixedOrSetAndList.add(fixedKeywordSet); @@ -886,7 +887,7 @@ } else { // when isAnd is false, just add all of the new entries to the big list Set keywordFirstPass = makeFullKeywordSet(productSearchContext.getDelegator()); // includes keyword expansion, etc - Set keywordSet = KeywordSearch.fixKeywordsForSearch(keywordFirstPass, anyPrefix, anySuffix, removeStems, isAnd); + Set keywordSet = KeywordSearchUtil.fixKeywordsForSearch(keywordFirstPass, anyPrefix, anySuffix, removeStems, isAnd); productSearchContext.orKeywordFixedSet.addAll(keywordSet); } Added: incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties?view=auto&rev=485561 ============================================================================== --- incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties (added) +++ incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties Mon Dec 11 00:57:02 2006 @@ -0,0 +1,33 @@ +##################################################################### +# Copyright 2001-2006 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +##################################################################### +#### +# OFBiz General Keyword Search Settings +#### + +# The stop word bags contain words to be removed from search keyword list +# These should be colon separated and the list should start and end with colons +# The words should all be lower case +# The .or is for OR searches and the .and for AND searches +stop.word.bag.or=:the:and:or:not:if:is:it:of:to:a:as:at:in:into:on:onto:so:but:me:you:your:yes:no:this:that:there:their:because:for:while:with:without:get:put:have:has:do:does:same:different:use:using: +stop.word.bag.and=:the:and:or:not:if:is:it:of:to:a:as:at:in:into:on:onto:so:but:me:you:your:yes:no:this:that:there:their:because:for:while:with:without:get:put:have:has:do:does:same:different:use:using: + +# The stem bag is used to remove suffixes from words passed in the search string and found while indexing +# IF the remove.stems properties is true +remove.stems=true +stem.bag=:s:ies:y: + +# Characters that should be used as token separators when pulling out keywords +index.keyword.separators=;: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_ Propchange: incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties ------------------------------------------------------------------------------ svn:eol-style = native Propchange: incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties ------------------------------------------------------------------------------ svn:keywords = "Date Rev Author URL Id" Propchange: incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java?view=auto&rev=485561 ============================================================================== --- incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java (added) +++ incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java Mon Dec 11 00:57:02 2006 @@ -0,0 +1,239 @@ +/* + * + * Copyright 2001-2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.ofbiz.common; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.StringTokenizer; +import java.util.TreeSet; + +import org.ofbiz.base.util.Debug; +import org.ofbiz.base.util.UtilMisc; +import org.ofbiz.base.util.UtilProperties; +import org.ofbiz.base.util.UtilValidate; +import org.ofbiz.entity.GenericDelegator; +import org.ofbiz.entity.GenericEntityException; +import org.ofbiz.entity.GenericValue; + +/** + * A few utility methods related to Keyword Search. + */ +public class KeywordSearchUtil { + + public static final String module = KeywordSearchUtil.class.getName(); + + public static Set thesaurusRelsToInclude = new HashSet(); + public static Set thesaurusRelsForReplace = new HashSet(); + + static { + thesaurusRelsToInclude.add("KWTR_UF"); + thesaurusRelsToInclude.add("KWTR_USE"); + thesaurusRelsToInclude.add("KWTR_CS"); + thesaurusRelsToInclude.add("KWTR_NT"); + thesaurusRelsToInclude.add("KWTR_BT"); + thesaurusRelsToInclude.add("KWTR_RT"); + + thesaurusRelsForReplace.add("KWTR_USE"); + thesaurusRelsForReplace.add("KWTR_CS"); + } + + public static String getSeparators() { + // String separators = ";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_"; + String seps = UtilProperties.getPropertyValue("keywordsearch", "index.keyword.separators", ";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_"); + return seps; + } + + public static String getStopWordBagOr() { + return UtilProperties.getPropertyValue("keywordsearch", "stop.word.bag.or"); + } + public static String getStopWordBagAnd() { + return UtilProperties.getPropertyValue("keywordsearch", "stop.word.bag.and"); + } + + public static boolean getRemoveStems() { + String removeStemsStr = UtilProperties.getPropertyValue("keywordsearch", "remove.stems"); + return "true".equals(removeStemsStr); + } + public static Set getStemSet() { + String stemBag = UtilProperties.getPropertyValue("keywordsearch", "stem.bag"); + Set stemSet = new TreeSet(); + if (UtilValidate.isNotEmpty(stemBag)) { + String curToken; + StringTokenizer tokenizer = new StringTokenizer(stemBag, ": "); + while (tokenizer.hasMoreTokens()) { + curToken = tokenizer.nextToken(); + stemSet.add(curToken); + } + } + return stemSet; + } + + public static void processForKeywords(String str, Map keywords, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) { + String separators = getSeparators(); + String stopWordBagOr = getStopWordBagOr(); + String stopWordBagAnd = getStopWordBagAnd(); + + boolean removeStems = getRemoveStems(); + Set stemSet = getStemSet(); + + processForKeywords(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, forSearch, anyPrefix, anySuffix, isAnd); + } + + public static void processKeywordsForIndex(String str, Map keywords, String separators, String stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet) { + processForKeywords(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, false, false, false, false); + } + + public static void processForKeywords(String str, Map keywords, String separators, String stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) { + Set keywordSet = makeKeywordSet(str, separators, forSearch); + fixupKeywordSet(keywordSet, keywords, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, forSearch, anyPrefix, anySuffix, isAnd); + } + + public static void fixupKeywordSet(Set keywordSet, Map keywords, String stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) { + if (keywordSet == null) { + return; + } + + Iterator keywordIter = keywordSet.iterator(); + while (keywordIter.hasNext()) { + String token = (String) keywordIter.next(); + + // when cleaning up the tokens the ordering is inportant: check stop words, remove stems, then get rid of 1 character tokens (1 digit okay) + + // check stop words + String colonToken = ":" + token + ":"; + if (forSearch) { + if ((isAnd && stopWordBagAnd.indexOf(colonToken) >= 0) || (!isAnd && stopWordBagOr.indexOf(colonToken) >= 0)) { + continue; + } + } else { + if (stopWordBagOr.indexOf(colonToken) >= 0 && stopWordBagAnd.indexOf(colonToken) >= 0) { + continue; + } + } + + // remove stems + if (removeStems) { + Iterator stemIter = stemSet.iterator(); + while (stemIter.hasNext()) { + String stem = (String) stemIter.next(); + if (token.endsWith(stem)) { + token = token.substring(0, token.length() - stem.length()); + } + } + } + + // get rid of all length 0 tokens now + if (token.length() == 0) { + continue; + } + + // get rid of all length 1 character only tokens, pretty much useless + if (token.length() == 1 && Character.isLetter(token.charAt(0))) { + continue; + } + + if (forSearch) { + StringBuffer strSb = new StringBuffer(); + if (anyPrefix) strSb.append('%'); + strSb.append(token); + if (anySuffix) strSb.append('%'); + // replace all %% with % + int dblPercIdx = -1; + while ((dblPercIdx = strSb.indexOf("%%")) >= 0) { + //Debug.logInfo("before strSb: " + strSb, module); + strSb.replace(dblPercIdx, dblPercIdx+2, "%"); + //Debug.logInfo("after strSb: " + strSb, module); + } + token = strSb.toString(); + } + + // group by word, add up weight + Long curWeight = (Long) keywords.get(token); + if (curWeight == null) { + keywords.put(token, new Long(1)); + } else { + keywords.put(token, new Long(curWeight.longValue() + 1)); + } + } + } + + public static Set makeKeywordSet(String str, String separators, boolean forSearch) { + if (separators == null) separators = getSeparators(); + + Set keywords = new TreeSet(); + if (str.length() > 0) { + if (forSearch) { + // remove %_*? from separators if is for a search + StringBuffer sb = new StringBuffer(separators); + if (sb.indexOf("%") >= 0) sb.deleteCharAt(sb.indexOf("%")); + if (sb.indexOf("_") >= 0) sb.deleteCharAt(sb.indexOf("_")); + if (sb.indexOf("*") >= 0) sb.deleteCharAt(sb.indexOf("*")); + if (sb.indexOf("?") >= 0) sb.deleteCharAt(sb.indexOf("?")); + separators = sb.toString(); + } + + StringTokenizer tokener = new StringTokenizer(str, separators, false); + while (tokener.hasMoreTokens()) { + // make sure it is lower case before doing anything else + String token = tokener.nextToken().toLowerCase(); + + if (forSearch) { + // these characters will only be present if it is for a search, ie not for indexing + token = token.replace('*', '%'); + token = token.replace('?', '_'); + } + + keywords.add(token); + } + } + return keywords; + } + + public static Set fixKeywordsForSearch(Set keywordSet, boolean anyPrefix, boolean anySuffix, boolean removeStems, boolean isAnd) { + Map keywords = new HashMap(); + fixupKeywordSet(keywordSet, keywords, getStopWordBagAnd(), getStopWordBagOr(), removeStems, getStemSet(), true, anyPrefix, anySuffix, isAnd); + return keywords.keySet(); + } + + public static boolean expandKeywordForSearch(String enteredKeyword, Set addToSet, GenericDelegator delegator) { + boolean replaceEnteredKeyword = false; + + try { + List thesaurusList = delegator.findByAndCache("KeywordThesaurus", UtilMisc.toMap("enteredKeyword", enteredKeyword)); + Iterator thesaurusIter = thesaurusList.iterator(); + while (thesaurusIter.hasNext()) { + GenericValue keywordThesaurus = (GenericValue) thesaurusIter.next(); + String relationshipEnumId = (String) keywordThesaurus.get("relationshipEnumId"); + if (thesaurusRelsToInclude.contains(relationshipEnumId)) { + addToSet.addAll(makeKeywordSet(keywordThesaurus.getString("alternateKeyword"), null, true)); + if (thesaurusRelsForReplace.contains(relationshipEnumId)) { + replaceEnteredKeyword = true; + } + } + } + } catch (GenericEntityException e) { + Debug.logError(e, "Error expanding entered keyword", module); + } + + Debug.logInfo("Expanded keyword [" + enteredKeyword + "], got set: " + addToSet, module); + return replaceEnteredKeyword; + } +} Propchange: incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java ------------------------------------------------------------------------------ svn:keywords = "Date Rev Author URL Id" Propchange: incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java ------------------------------------------------------------------------------ svn:mime-type = text/plain |
Free forum by Nabble | Edit this page |