svn commit: r485561 - in /incubator/ofbiz/trunk: applications/product/config/ applications/product/src/org/ofbiz/product/product/ framework/common/config/ framework/common/src/org/ofbiz/common/

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

svn commit: r485561 - in /incubator/ofbiz/trunk: applications/product/config/ applications/product/src/org/ofbiz/product/product/ framework/common/config/ framework/common/src/org/ofbiz/common/

jonesde
Author: jonesde
Date: Mon Dec 11 00:57:02 2006
New Revision: 485561

URL: http://svn.apache.org/viewvc?view=rev&rev=485561
Log:
Refactored KeywordSearch class to move some more generic search code and properties to the common component

Added:
    incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties   (with props)
    incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java   (with props)
Modified:
    incubator/ofbiz/trunk/applications/product/config/prodsearch.properties
    incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java
    incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java
    incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java

Modified: incubator/ofbiz/trunk/applications/product/config/prodsearch.properties
URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/applications/product/config/prodsearch.properties?view=diff&rev=485561&r1=485560&r2=485561
==============================================================================
--- incubator/ofbiz/trunk/applications/product/config/prodsearch.properties (original)
+++ incubator/ofbiz/trunk/applications/product/config/prodsearch.properties Mon Dec 11 00:57:02 2006
@@ -1,5 +1,4 @@
 #####################################################################
-#
 # Copyright 2001-2006 The Apache Software Foundation
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not
@@ -15,23 +14,8 @@
 # under the License.
 #####################################################################
 ####
-# OFBiz Search Settings
+# OFBiz Product Search Settings
 ####
-
-# The stop word bags contain words to be removed from search keyword list
-# These should be colon separated and the list should start and end with colons
-# The words should all be lower case
-# The .or is for OR searches and the .and for AND searches
-stop.word.bag.or=:the:and:or:not:if:is:it:of:to:a:as:at:in:into:on:onto:so:but:me:you:your:yes:no:this:that:there:their:because:for:while:with:without:get:put:have:has:do:does:same:different:use:using:
-stop.word.bag.and=:the:and:or:not:if:is:it:of:to:a:as:at:in:into:on:onto:so:but:me:you:your:yes:no:this:that:there:their:because:for:while:with:without:get:put:have:has:do:does:same:different:use:using:
-
-# The stem bag is used to remove suffixes from words passed in the search string and found while indexing
-# IF the remove.stems properties is true
-remove.stems=true
-stem.bag=:s:ies:y:
-
-# Characters that should be used as token separators when pulling out keywords
-index.keyword.separators=;: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_
 
 # Assign a weight to each product keyword source during indexing/keywork inuduction
 index.weight.Product.productId=1

Modified: incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java
URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java?view=diff&rev=485561&r1=485560&r2=485561
==============================================================================
--- incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java (original)
+++ incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java Mon Dec 11 00:57:02 2006
@@ -32,6 +32,7 @@
 import org.ofbiz.base.util.UtilDateTime;
 import org.ofbiz.base.util.UtilMisc;
 import org.ofbiz.base.util.UtilProperties;
+import org.ofbiz.common.KeywordSearchUtil;
 import org.ofbiz.content.data.DataResourceWorker;
 import org.ofbiz.entity.GenericDelegator;
 import org.ofbiz.entity.GenericEntityException;
@@ -68,11 +69,11 @@
         String productId = product.getString("productId");
 
         // get these in advance just once since they will be used many times for the multiple strings to index
-        String separators = KeywordSearch.getSeparators();
-        String stopWordBagOr = KeywordSearch.getStopWordBagOr();
-        String stopWordBagAnd = KeywordSearch.getStopWordBagAnd();
-        boolean removeStems = KeywordSearch.getRemoveStems();
-        Set stemSet = KeywordSearch.getStemSet();
+        String separators = KeywordSearchUtil.getSeparators();
+        String stopWordBagOr = KeywordSearchUtil.getStopWordBagOr();
+        String stopWordBagAnd = KeywordSearchUtil.getStopWordBagAnd();
+        boolean removeStems = KeywordSearchUtil.getRemoveStems();
+        Set stemSet = KeywordSearchUtil.getStemSet();
         
         Map keywords = new TreeMap();
         List strings = new ArrayList(50);
@@ -176,7 +177,7 @@
         while (strIter.hasNext()) {
             String str = (String) strIter.next();
             // call process keywords method here
-            KeywordSearch.processKeywordsForIndex(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems, stemSet);
+            KeywordSearchUtil.processKeywordsForIndex(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems, stemSet);
         }
 
         List toBeStored = new LinkedList();

Modified: incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java
URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java?view=diff&rev=485561&r1=485560&r2=485561
==============================================================================
--- incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java (original)
+++ incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java Mon Dec 11 00:57:02 2006
@@ -16,227 +16,15 @@
  */
 package org.ofbiz.product.product;
 
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.StringTokenizer;
-import java.util.TreeSet;
-
-import org.ofbiz.base.util.Debug;
-import org.ofbiz.base.util.UtilMisc;
-import org.ofbiz.base.util.UtilProperties;
-import org.ofbiz.base.util.UtilValidate;
-import org.ofbiz.entity.GenericDelegator;
 import org.ofbiz.entity.GenericEntityException;
 import org.ofbiz.entity.GenericValue;
 
 /**
- *  Does a product search by keyword using the PRODUCT_KEYWORD table.
- *  <br/>Special thanks to Glen Thorne and the Weblogic Commerce Server for ideas.
+ * These are left over utlity methods from the product search code, just calling over to KeywordIndex now; can probably remove soon.
  */
 public class KeywordSearch {
 
     public static final String module = KeywordSearch.class.getName();
-
-    public static Set thesaurusRelsToInclude = new HashSet();
-    public static Set thesaurusRelsForReplace = new HashSet();
-
-    static {
-        thesaurusRelsToInclude.add("KWTR_UF");
-        thesaurusRelsToInclude.add("KWTR_USE");
-        thesaurusRelsToInclude.add("KWTR_CS");
-        thesaurusRelsToInclude.add("KWTR_NT");
-        thesaurusRelsToInclude.add("KWTR_BT");
-        thesaurusRelsToInclude.add("KWTR_RT");
-
-        thesaurusRelsForReplace.add("KWTR_USE");
-        thesaurusRelsForReplace.add("KWTR_CS");
-    }
-
-    public static String getSeparators() {
-        // String separators = ";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_";
-        String seps = UtilProperties.getPropertyValue("prodsearch", "index.keyword.separators", ";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_");
-        return seps;
-    }
-    
-    public static String getStopWordBagOr() {
-        return UtilProperties.getPropertyValue("prodsearch", "stop.word.bag.or");
-    }
-    public static String getStopWordBagAnd() {
-        return UtilProperties.getPropertyValue("prodsearch", "stop.word.bag.and");
-    }
-    
-    public static boolean getRemoveStems() {
-        String removeStemsStr = UtilProperties.getPropertyValue("prodsearch", "remove.stems");
-        return "true".equals(removeStemsStr);
-    }
-    public static Set getStemSet() {
-        String stemBag = UtilProperties.getPropertyValue("prodsearch", "stem.bag");
-        Set stemSet = new TreeSet();
-        if (UtilValidate.isNotEmpty(stemBag)) {
-            String curToken;
-            StringTokenizer tokenizer = new StringTokenizer(stemBag, ": ");
-            while (tokenizer.hasMoreTokens()) {
-                curToken = tokenizer.nextToken();
-                stemSet.add(curToken);
-            }
-        }
-        return stemSet;
-    }
-    
-    public static void processForKeywords(String str, Map keywords, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) {
-        String separators = getSeparators();
-        String stopWordBagOr = getStopWordBagOr();
-        String stopWordBagAnd = getStopWordBagAnd();
-
-        boolean removeStems = getRemoveStems();
-        Set stemSet = getStemSet();
-        
-        processForKeywords(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, forSearch, anyPrefix, anySuffix, isAnd);
-    }
-    
-    public static void processKeywordsForIndex(String str, Map keywords, String separators, String stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet) {
-        processForKeywords(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, false, false, false, false);
-    }
-
-    public static void processForKeywords(String str, Map keywords, String separators, String stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) {
-        Set keywordSet = makeKeywordSet(str, separators, forSearch);
-        fixupKeywordSet(keywordSet, keywords, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, forSearch, anyPrefix, anySuffix, isAnd);
-    }
-    
-    public static void fixupKeywordSet(Set keywordSet, Map keywords, String stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) {
-        if (keywordSet == null) {
-            return;
-        }
-        
-        Iterator keywordIter = keywordSet.iterator();
-        while (keywordIter.hasNext()) {
-            String token = (String) keywordIter.next();
-            
-            // when cleaning up the tokens the ordering is inportant: check stop words, remove stems, then get rid of 1 character tokens (1 digit okay)
-            
-            // check stop words
-            String colonToken = ":" + token + ":";
-            if (forSearch) {
-                if ((isAnd && stopWordBagAnd.indexOf(colonToken) >= 0) || (!isAnd && stopWordBagOr.indexOf(colonToken) >= 0)) {
-                    continue;
-                }
-            } else {
-                if (stopWordBagOr.indexOf(colonToken) >= 0 && stopWordBagAnd.indexOf(colonToken) >= 0) {
-                    continue;
-                }
-            }
-            
-            // remove stems
-            if (removeStems) {
-                Iterator stemIter = stemSet.iterator();
-                while (stemIter.hasNext()) {
-                    String stem = (String) stemIter.next();
-                    if (token.endsWith(stem)) {
-                        token = token.substring(0, token.length() - stem.length());
-                    }
-                }
-            }
-            
-            // get rid of all length 0 tokens now
-            if (token.length() == 0) {
-                continue;
-            }
-            
-            // get rid of all length 1 character only tokens, pretty much useless
-            if (token.length() == 1 && Character.isLetter(token.charAt(0))) {
-                continue;
-            }
-
-            if (forSearch) {
-                StringBuffer strSb = new StringBuffer();
-                if (anyPrefix) strSb.append('%');
-                strSb.append(token);
-                if (anySuffix) strSb.append('%');
-                // replace all %% with %
-                int dblPercIdx = -1;
-                while ((dblPercIdx = strSb.indexOf("%%")) >= 0) {
-                    //Debug.logInfo("before strSb: " + strSb, module);
-                    strSb.replace(dblPercIdx, dblPercIdx+2, "%");
-                    //Debug.logInfo("after strSb: " + strSb, module);
-                }
-                token = strSb.toString();
-            }
-            
-            // group by word, add up weight
-            Long curWeight = (Long) keywords.get(token);
-            if (curWeight == null) {
-                keywords.put(token, new Long(1));
-            } else {
-                keywords.put(token, new Long(curWeight.longValue() + 1));
-            }
-        }
-    }
-
-    public static Set makeKeywordSet(String str, String separators, boolean forSearch) {
-        if (separators == null) separators = getSeparators();
-        
-        Set keywords = new TreeSet();
-        if (str.length() > 0) {
-            if (forSearch) {
-                // remove %_*? from separators if is for a search
-                StringBuffer sb = new StringBuffer(separators);
-                if (sb.indexOf("%") >= 0) sb.deleteCharAt(sb.indexOf("%"));
-                if (sb.indexOf("_") >= 0) sb.deleteCharAt(sb.indexOf("_"));
-                if (sb.indexOf("*") >= 0) sb.deleteCharAt(sb.indexOf("*"));
-                if (sb.indexOf("?") >= 0) sb.deleteCharAt(sb.indexOf("?"));
-                separators = sb.toString();
-            }
-            
-            StringTokenizer tokener = new StringTokenizer(str, separators, false);
-            while (tokener.hasMoreTokens()) {
-                // make sure it is lower case before doing anything else
-                String token = tokener.nextToken().toLowerCase();
-
-                if (forSearch) {
-                    // these characters will only be present if it is for a search, ie not for indexing
-                    token = token.replace('*', '%');
-                    token = token.replace('?', '_');
-                }
-                
-                keywords.add(token);
-            }
-        }
-        return keywords;
-    }
-    
-    public static Set fixKeywordsForSearch(Set keywordSet, boolean anyPrefix, boolean anySuffix, boolean removeStems, boolean isAnd) {
-        Map keywords = new HashMap();
-        fixupKeywordSet(keywordSet, keywords, getStopWordBagAnd(), getStopWordBagOr(), removeStems, getStemSet(), true, anyPrefix, anySuffix, isAnd);
-        return keywords.keySet();
-    }
-
-    public static boolean expandKeywordForSearch(String enteredKeyword, Set addToSet, GenericDelegator delegator) {
-        boolean replaceEnteredKeyword = false;
-
-        try {
-            List thesaurusList = delegator.findByAndCache("KeywordThesaurus", UtilMisc.toMap("enteredKeyword", enteredKeyword));
-            Iterator thesaurusIter = thesaurusList.iterator();
-            while (thesaurusIter.hasNext()) {
-                GenericValue keywordThesaurus = (GenericValue) thesaurusIter.next();
-                String relationshipEnumId = (String) keywordThesaurus.get("relationshipEnumId");
-                if (thesaurusRelsToInclude.contains(relationshipEnumId)) {
-                    addToSet.addAll(makeKeywordSet(keywordThesaurus.getString("alternateKeyword"), null, true));
-                    if (thesaurusRelsForReplace.contains(relationshipEnumId)) {
-                        replaceEnteredKeyword = true;
-                    }
-                }
-            }
-        } catch (GenericEntityException e) {
-            Debug.logError(e, "Error expanding entered keyword", module);
-        }
-
-        Debug.logInfo("Expanded keyword [" + enteredKeyword + "], got set: " + addToSet, module);
-        return replaceEnteredKeyword;
-    }
 
     public static void induceKeywords(GenericValue product) throws GenericEntityException {
         if (product == null) return;

Modified: incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java
URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java?view=diff&rev=485561&r1=485560&r2=485561
==============================================================================
--- incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java (original)
+++ incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java Mon Dec 11 00:57:02 2006
@@ -33,6 +33,7 @@
 import org.ofbiz.base.util.UtilMisc;
 import org.ofbiz.base.util.UtilProperties;
 import org.ofbiz.base.util.UtilValidate;
+import org.ofbiz.common.KeywordSearchUtil;
 import org.ofbiz.entity.GenericDelegator;
 import org.ofbiz.entity.GenericEntityException;
 import org.ofbiz.entity.GenericValue;
@@ -840,7 +841,7 @@
         }
 
         public Set makeFullKeywordSet(GenericDelegator delegator) {
-            Set keywordSet = KeywordSearch.makeKeywordSet(this.keywordsString, null, true);
+            Set keywordSet = KeywordSearchUtil.makeKeywordSet(this.keywordsString, null, true);
             Set fullKeywordSet = new TreeSet();
 
             // expand the keyword list according to the thesaurus and create a new set of keywords
@@ -848,7 +849,7 @@
             while (keywordIter.hasNext()) {
                 String keyword = (String) keywordIter.next();
                 Set expandedSet = new TreeSet();
-                boolean replaceEntered = KeywordSearch.expandKeywordForSearch(keyword, expandedSet, delegator);
+                boolean replaceEntered = KeywordSearchUtil.expandKeywordForSearch(keyword, expandedSet, delegator);
                 fullKeywordSet.addAll(expandedSet);
                 if (!replaceEntered) {
                     fullKeywordSet.add(keyword);
@@ -867,18 +868,18 @@
                 //but then the sets should be and'ed to produce the overall expression; create the SQL for this
                 //needs some work as the current method only support a list of and'ed words and a list of or'ed words, not
                 //a list of or'ed sets to be and'ed together
-                Set keywordSet = KeywordSearch.makeKeywordSet(this.keywordsString, null, true);
+                Set keywordSet = KeywordSearchUtil.makeKeywordSet(this.keywordsString, null, true);
 
                 // expand the keyword list according to the thesaurus and create a new set of keywords
                 Iterator keywordIter = keywordSet.iterator();
                 while (keywordIter.hasNext()) {
                     String keyword = (String) keywordIter.next();
                     Set expandedSet = new TreeSet();
-                    boolean replaceEntered = KeywordSearch.expandKeywordForSearch(keyword, expandedSet, productSearchContext.getDelegator());
+                    boolean replaceEntered = KeywordSearchUtil.expandKeywordForSearch(keyword, expandedSet, productSearchContext.getDelegator());
                     if (!replaceEntered) {
                         expandedSet.add(keyword);
                     }
-                    Set fixedSet = KeywordSearch.fixKeywordsForSearch(expandedSet, anyPrefix, anySuffix, removeStems, isAnd);
+                    Set fixedSet = KeywordSearchUtil.fixKeywordsForSearch(expandedSet, anyPrefix, anySuffix, removeStems, isAnd);
                     Set fixedKeywordSet = new HashSet();
                     fixedKeywordSet.addAll(fixedSet);
                     productSearchContext.keywordFixedOrSetAndList.add(fixedKeywordSet);
@@ -886,7 +887,7 @@
             } else {
                 // when isAnd is false, just add all of the new entries to the big list
                 Set keywordFirstPass = makeFullKeywordSet(productSearchContext.getDelegator()); // includes keyword expansion, etc
-                Set keywordSet = KeywordSearch.fixKeywordsForSearch(keywordFirstPass, anyPrefix, anySuffix, removeStems, isAnd);
+                Set keywordSet = KeywordSearchUtil.fixKeywordsForSearch(keywordFirstPass, anyPrefix, anySuffix, removeStems, isAnd);
                 productSearchContext.orKeywordFixedSet.addAll(keywordSet);
             }
 

Added: incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties
URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties?view=auto&rev=485561
==============================================================================
--- incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties (added)
+++ incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties Mon Dec 11 00:57:02 2006
@@ -0,0 +1,33 @@
+#####################################################################
+# Copyright 2001-2006 The Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+#####################################################################
+####
+# OFBiz General Keyword Search Settings
+####
+
+# The stop word bags contain words to be removed from search keyword list
+# These should be colon separated and the list should start and end with colons
+# The words should all be lower case
+# The .or is for OR searches and the .and for AND searches
+stop.word.bag.or=:the:and:or:not:if:is:it:of:to:a:as:at:in:into:on:onto:so:but:me:you:your:yes:no:this:that:there:their:because:for:while:with:without:get:put:have:has:do:does:same:different:use:using:
+stop.word.bag.and=:the:and:or:not:if:is:it:of:to:a:as:at:in:into:on:onto:so:but:me:you:your:yes:no:this:that:there:their:because:for:while:with:without:get:put:have:has:do:does:same:different:use:using:
+
+# The stem bag is used to remove suffixes from words passed in the search string and found while indexing
+# IF the remove.stems properties is true
+remove.stems=true
+stem.bag=:s:ies:y:
+
+# Characters that should be used as token separators when pulling out keywords
+index.keyword.separators=;: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_

Propchange: incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties
------------------------------------------------------------------------------
    svn:keywords = "Date Rev Author URL Id"

Propchange: incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java
URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java?view=auto&rev=485561
==============================================================================
--- incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java (added)
+++ incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java Mon Dec 11 00:57:02 2006
@@ -0,0 +1,239 @@
+/*
+ *
+ * Copyright 2001-2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package org.ofbiz.common;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.StringTokenizer;
+import java.util.TreeSet;
+
+import org.ofbiz.base.util.Debug;
+import org.ofbiz.base.util.UtilMisc;
+import org.ofbiz.base.util.UtilProperties;
+import org.ofbiz.base.util.UtilValidate;
+import org.ofbiz.entity.GenericDelegator;
+import org.ofbiz.entity.GenericEntityException;
+import org.ofbiz.entity.GenericValue;
+
+/**
+ * A few utility methods related to Keyword Search.
+ */
+public class KeywordSearchUtil {
+
+    public static final String module = KeywordSearchUtil.class.getName();
+
+    public static Set thesaurusRelsToInclude = new HashSet();
+    public static Set thesaurusRelsForReplace = new HashSet();
+
+    static {
+        thesaurusRelsToInclude.add("KWTR_UF");
+        thesaurusRelsToInclude.add("KWTR_USE");
+        thesaurusRelsToInclude.add("KWTR_CS");
+        thesaurusRelsToInclude.add("KWTR_NT");
+        thesaurusRelsToInclude.add("KWTR_BT");
+        thesaurusRelsToInclude.add("KWTR_RT");
+
+        thesaurusRelsForReplace.add("KWTR_USE");
+        thesaurusRelsForReplace.add("KWTR_CS");
+    }
+
+    public static String getSeparators() {
+        // String separators = ";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_";
+        String seps = UtilProperties.getPropertyValue("keywordsearch", "index.keyword.separators", ";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_");
+        return seps;
+    }
+    
+    public static String getStopWordBagOr() {
+        return UtilProperties.getPropertyValue("keywordsearch", "stop.word.bag.or");
+    }
+    public static String getStopWordBagAnd() {
+        return UtilProperties.getPropertyValue("keywordsearch", "stop.word.bag.and");
+    }
+    
+    public static boolean getRemoveStems() {
+        String removeStemsStr = UtilProperties.getPropertyValue("keywordsearch", "remove.stems");
+        return "true".equals(removeStemsStr);
+    }
+    public static Set getStemSet() {
+        String stemBag = UtilProperties.getPropertyValue("keywordsearch", "stem.bag");
+        Set stemSet = new TreeSet();
+        if (UtilValidate.isNotEmpty(stemBag)) {
+            String curToken;
+            StringTokenizer tokenizer = new StringTokenizer(stemBag, ": ");
+            while (tokenizer.hasMoreTokens()) {
+                curToken = tokenizer.nextToken();
+                stemSet.add(curToken);
+            }
+        }
+        return stemSet;
+    }
+    
+    public static void processForKeywords(String str, Map keywords, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) {
+        String separators = getSeparators();
+        String stopWordBagOr = getStopWordBagOr();
+        String stopWordBagAnd = getStopWordBagAnd();
+
+        boolean removeStems = getRemoveStems();
+        Set stemSet = getStemSet();
+        
+        processForKeywords(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, forSearch, anyPrefix, anySuffix, isAnd);
+    }
+    
+    public static void processKeywordsForIndex(String str, Map keywords, String separators, String stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet) {
+        processForKeywords(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, false, false, false, false);
+    }
+
+    public static void processForKeywords(String str, Map keywords, String separators, String stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) {
+        Set keywordSet = makeKeywordSet(str, separators, forSearch);
+        fixupKeywordSet(keywordSet, keywords, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, forSearch, anyPrefix, anySuffix, isAnd);
+    }
+    
+    public static void fixupKeywordSet(Set keywordSet, Map keywords, String stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) {
+        if (keywordSet == null) {
+            return;
+        }
+        
+        Iterator keywordIter = keywordSet.iterator();
+        while (keywordIter.hasNext()) {
+            String token = (String) keywordIter.next();
+            
+            // when cleaning up the tokens the ordering is inportant: check stop words, remove stems, then get rid of 1 character tokens (1 digit okay)
+            
+            // check stop words
+            String colonToken = ":" + token + ":";
+            if (forSearch) {
+                if ((isAnd && stopWordBagAnd.indexOf(colonToken) >= 0) || (!isAnd && stopWordBagOr.indexOf(colonToken) >= 0)) {
+                    continue;
+                }
+            } else {
+                if (stopWordBagOr.indexOf(colonToken) >= 0 && stopWordBagAnd.indexOf(colonToken) >= 0) {
+                    continue;
+                }
+            }
+            
+            // remove stems
+            if (removeStems) {
+                Iterator stemIter = stemSet.iterator();
+                while (stemIter.hasNext()) {
+                    String stem = (String) stemIter.next();
+                    if (token.endsWith(stem)) {
+                        token = token.substring(0, token.length() - stem.length());
+                    }
+                }
+            }
+            
+            // get rid of all length 0 tokens now
+            if (token.length() == 0) {
+                continue;
+            }
+            
+            // get rid of all length 1 character only tokens, pretty much useless
+            if (token.length() == 1 && Character.isLetter(token.charAt(0))) {
+                continue;
+            }
+
+            if (forSearch) {
+                StringBuffer strSb = new StringBuffer();
+                if (anyPrefix) strSb.append('%');
+                strSb.append(token);
+                if (anySuffix) strSb.append('%');
+                // replace all %% with %
+                int dblPercIdx = -1;
+                while ((dblPercIdx = strSb.indexOf("%%")) >= 0) {
+                    //Debug.logInfo("before strSb: " + strSb, module);
+                    strSb.replace(dblPercIdx, dblPercIdx+2, "%");
+                    //Debug.logInfo("after strSb: " + strSb, module);
+                }
+                token = strSb.toString();
+            }
+            
+            // group by word, add up weight
+            Long curWeight = (Long) keywords.get(token);
+            if (curWeight == null) {
+                keywords.put(token, new Long(1));
+            } else {
+                keywords.put(token, new Long(curWeight.longValue() + 1));
+            }
+        }
+    }
+
+    public static Set makeKeywordSet(String str, String separators, boolean forSearch) {
+        if (separators == null) separators = getSeparators();
+        
+        Set keywords = new TreeSet();
+        if (str.length() > 0) {
+            if (forSearch) {
+                // remove %_*? from separators if is for a search
+                StringBuffer sb = new StringBuffer(separators);
+                if (sb.indexOf("%") >= 0) sb.deleteCharAt(sb.indexOf("%"));
+                if (sb.indexOf("_") >= 0) sb.deleteCharAt(sb.indexOf("_"));
+                if (sb.indexOf("*") >= 0) sb.deleteCharAt(sb.indexOf("*"));
+                if (sb.indexOf("?") >= 0) sb.deleteCharAt(sb.indexOf("?"));
+                separators = sb.toString();
+            }
+            
+            StringTokenizer tokener = new StringTokenizer(str, separators, false);
+            while (tokener.hasMoreTokens()) {
+                // make sure it is lower case before doing anything else
+                String token = tokener.nextToken().toLowerCase();
+
+                if (forSearch) {
+                    // these characters will only be present if it is for a search, ie not for indexing
+                    token = token.replace('*', '%');
+                    token = token.replace('?', '_');
+                }
+                
+                keywords.add(token);
+            }
+        }
+        return keywords;
+    }
+    
+    public static Set fixKeywordsForSearch(Set keywordSet, boolean anyPrefix, boolean anySuffix, boolean removeStems, boolean isAnd) {
+        Map keywords = new HashMap();
+        fixupKeywordSet(keywordSet, keywords, getStopWordBagAnd(), getStopWordBagOr(), removeStems, getStemSet(), true, anyPrefix, anySuffix, isAnd);
+        return keywords.keySet();
+    }
+
+    public static boolean expandKeywordForSearch(String enteredKeyword, Set addToSet, GenericDelegator delegator) {
+        boolean replaceEnteredKeyword = false;
+
+        try {
+            List thesaurusList = delegator.findByAndCache("KeywordThesaurus", UtilMisc.toMap("enteredKeyword", enteredKeyword));
+            Iterator thesaurusIter = thesaurusList.iterator();
+            while (thesaurusIter.hasNext()) {
+                GenericValue keywordThesaurus = (GenericValue) thesaurusIter.next();
+                String relationshipEnumId = (String) keywordThesaurus.get("relationshipEnumId");
+                if (thesaurusRelsToInclude.contains(relationshipEnumId)) {
+                    addToSet.addAll(makeKeywordSet(keywordThesaurus.getString("alternateKeyword"), null, true));
+                    if (thesaurusRelsForReplace.contains(relationshipEnumId)) {
+                        replaceEnteredKeyword = true;
+                    }
+                }
+            }
+        } catch (GenericEntityException e) {
+            Debug.logError(e, "Error expanding entered keyword", module);
+        }
+
+        Debug.logInfo("Expanded keyword [" + enteredKeyword + "], got set: " + addToSet, module);
+        return replaceEnteredKeyword;
+    }
+}

Propchange: incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java
------------------------------------------------------------------------------
    svn:keywords = "Date Rev Author URL Id"

Propchange: incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain