//  home   //  advanced search   //  news   //  categories   //  sql build chart   //  downloads   //  statistics
 ASP FAQ 
Home
ASP FAQ Tutorials

   8000XXXX Errors
   Alerts
   ASP.NET 2.0
   Classic ASP 1.0
   Databases
      Access DB & ADO
      General SQL Server & Access Articles
      MySQL
      Other Articles
      Schema Tutorials
      Sql Server 2000
      Sql Server 2005
   General Concepts
   Search Engine Optimization (SEO)

Contact Us
Site Map

Search

Web
aspfaq.com
tutorials.aspfaq.com
databases.aspfaq.com

ASP FAQ Tutorials :: Databases :: Other Articles :: How do I ignore common words in a search?


How do I ignore common words in a search?

Words like the, is, that, etc. can lead to "false positives" and produce too much . Words like this are often referred to as "noise words" and, for better results, should be left out of a query. 
 
My preferred method is to reduce the chatter by eliminating noise words before constructing my search query and passing it to the database. I can do this by maintaining an array in my ASP page containing the noise words I believe should be used. Whether I'm using an ad hoc query or a stored procedure, there's no sense wasting bandwidth and forcing the database to parse through words I know I don't want to be included in the search. 
 

Ad Hoc Query (Access / SQL Server) 
So, if I were using an ad hoc query, my ASP code might look like this: 
 
<% 
    ignoredWords = array(_ 
        "a",_ 
        "all",_ 
        "am",_ 
        "an",_ 
        "and",_ 
        "any",_ 
        "are",_ 
        "as",_ 
        "at",_ 
        "be",_ 
        "but",_ 
        "can",_ 
        "did",_ 
        "do",_ 
        "does",_ 
        "for",_ 
        "from",_ 
        "had",_ 
        "has",_ 
        "have",_ 
        "here",_ 
        "how",_ 
        "i",_ 
        "if",_ 
        "in",_ 
        "is",_ 
        "it",_ 
        "no",_ 
        "not",_ 
        "of",_ 
        "on",_ 
        "or",_ 
        "so",_ 
        "that",_ 
        "the",_ 
        "then",_ 
        "there",_ 
        "this",_ 
        "to",_ 
        "too",_ 
        "up",_ 
        "use",_ 
        "what",_ 
        "when",_ 
        "where",_ 
        "who",_ 
        "why",_ 
        "you"_ 
    ) 
 
    iList = ";" & join(ignoredWords, ";") & ";" 
 
    'srch = Request.Form("srch") 
    srch = "foo is the best" 
    words = split(srch) 
    for i = 0 to ubound(words) 
        w = lcase(trim(words(i))) 
        if instr(iList, ";" & w & ";") > 0 then 
            iReturn = iReturn & w & ", " 
        else 
            q = q & "(column LIKE '%" & w & "%') AND " 
        end if 
    next 
    if iReturn > "" then 
        response.write "The following words were ignored: " 
        response.write left(iReturn, len(iReturn)-2) & "<p>" 
    end if 
 
    if q > "" then 
        response.write "My query would be <p>" 
        response.write "SELECT columns FROM table WHERE " & _ 
            left(q, len(q)-5) 
    else 
        response.write "Either srch was empty, or consisted" & _ 
            " solely of ignored words." 
    end if 
%>
 

Noise Words Table (SQL Server) 
 
Sometimes a different approach makes sense; you can store the list of noise words in a SQL Server table, and construct the query dynamically in the database. This can be useful if the list of noise words changes often, or if the search is accessed from several different pages / sites. 
 
So, first we need a noise words table: 
 
CREATE TABLE NoiseWords 

    w VARCHAR(5) 
        PRIMARY KEY CLUSTERED 

GO 
 
SET NOCOUNT ON 
INSERT NoiseWords VALUES('a') 
INSERT NoiseWords VALUES('all') 
INSERT NoiseWords VALUES('am') 
INSERT NoiseWords VALUES('an') 
INSERT NoiseWords VALUES('and') 
INSERT NoiseWords VALUES('any') 
INSERT NoiseWords VALUES('are') 
INSERT NoiseWords VALUES('as') 
INSERT NoiseWords VALUES('at') 
INSERT NoiseWords VALUES('be') 
INSERT NoiseWords VALUES('but') 
INSERT NoiseWords VALUES('can') 
INSERT NoiseWords VALUES('did') 
INSERT NoiseWords VALUES('do') 
INSERT NoiseWords VALUES('does') 
INSERT NoiseWords VALUES('for') 
INSERT NoiseWords VALUES('from') 
INSERT NoiseWords VALUES('had') 
INSERT NoiseWords VALUES('has') 
INSERT NoiseWords VALUES('have') 
INSERT NoiseWords VALUES('here') 
INSERT NoiseWords VALUES('how') 
INSERT NoiseWords VALUES('i') 
INSERT NoiseWords VALUES('if') 
INSERT NoiseWords VALUES('in') 
INSERT NoiseWords VALUES('is') 
INSERT NoiseWords VALUES('it') 
INSERT NoiseWords VALUES('no') 
INSERT NoiseWords VALUES('not') 
INSERT NoiseWords VALUES('of') 
INSERT NoiseWords VALUES('on') 
INSERT NoiseWords VALUES('or') 
INSERT NoiseWords VALUES('so') 
INSERT NoiseWords VALUES('that') 
INSERT NoiseWords VALUES('the') 
INSERT NoiseWords VALUES('then') 
INSERT NoiseWords VALUES('there') 
INSERT NoiseWords VALUES('this') 
INSERT NoiseWords VALUES('to') 
INSERT NoiseWords VALUES('too') 
INSERT NoiseWords VALUES('up') 
INSERT NoiseWords VALUES('use') 
INSERT NoiseWords VALUES('what') 
INSERT NoiseWords VALUES('when') 
INSERT NoiseWords VALUES('where') 
INSERT NoiseWords VALUES('who') 
INSERT NoiseWords VALUES('why') 
INSERT NoiseWords VALUES('you') 
GO
 
(Feel free to edit the list of noise words, of course.) 
 
Now, let's create a table to search: 
 
CREATE TABLE txt 

    txtID INT IDENTITY(1,1), 
    body VARCHAR(255)      

GO 
 
SET NOCOUNT ON 
INSERT txt(body) VALUES('foo is a bunch of fun') 
INSERT txt(body) VALUES('blat is always there') 
INSERT txt(body) VALUES('where has my foobar gone?') 
INSERT txt(body) VALUES('the best foo is cool') 
GO
 
Before we start, we're going to need a stored procedure to break down our search phrase into individual words, and put them into table form. So, we'll use a "List-To-Table" stored procedure, based on Article #2248
 
CREATE PROCEDURE dbo.VarcharListToTable 
    @cslist VARCHAR(8000),  
    @tablename SYSNAME 
AS  
BEGIN      
    SET NOCOUNT ON 
 
    DECLARE 
        @spot SMALLINT, 
        @str VARCHAR(8000), 
        @sql VARCHAR(8000)  
 
    WHILE @cslist <> ''  
    BEGIN  
        SET @spot = CHARINDEX(' ', @cslist)  
        IF @spot > 0  
        BEGIN  
            SET @str = LEFT(@cslist, @spot-1) 
            SET @cslist = RIGHT(@cslist, LEN(@cslist)-@spot)  
        END  
        ELSE  
        BEGIN  
            SET @str = RTRIM(@cslist) 
            SET @cslist = ''  
        END  
        SET @sql = 'INSERT INTO '+@tablename+' VALUES('''+@str+''')'  
        EXEC(@sql)  
    END 
END 
GO
 
Now, in our actual search stored procedure, we can just pass in our search phrase, and return rows where the "body" column contains any of the *valid* search words entered: 
 
CREATE PROCEDURE dbo.searchTxt 
    @srch VARCHAR(255) 
AS 
BEGIN 
    SET NOCOUNT ON 
 
    -- create a holding table for the 
    -- words in our search phrase 
 
    CREATE TABLE #w (w VARCHAR(255))  
 
    -- populate the table with our proc 
 
    EXEC VarcharListToTable @srch,'#w'  
 
    -- return words that will be ignored 
    -- so you can tell the user 
 
    SELECT [Ignored Words] = w 
    FROM #w WHERE w IN  
    ( 
        SELECT w FROM NoiseWords 
    ) ORDER BY w 
 
    -- now remove the ignored words 
 
    DELETE #w 
    WHERE w IN 
    ( 
        SELECT w FROM NoiseWords 
    ) 
 
    -- now run a join query using PATINDEX 
    SELECT txtID,body 
        FROM txt  
        INNER JOIN #w w 
        ON PATINDEX('%' + w.w + '%', txt.body) > 0 
        -- may be more than one match, so group 
        GROUP BY txtID,body 
 
    DROP TABLE #w 
END 
GO
 
Now when we enter our sample phrase: 
 
EXEC dbo.searchText 'foo is the best'
 
We can see the following results: 
 
Ignored Words 
------------------------- 
is 
the 
 
txtID body 
----- ------------------------- 
1     foo is a bunch of fun 
3     where has my foobar gone? 
4     the best foo is cool
 
You might want to do optional things to the phrase, such as remove punctuation and other non-alphanumeric characters, and offering the ability to match all of the valid words, or an exact phrase. I'll leave those enhancements as an exercise to the reader. 
 

Full Text Search 
 
Of course, if you're using SQL Server and you want real searching capabilities, you might consider using Full Text Search. Here are some articles that should help you get started: 
 
    SQL Team: Part I | Part II | Part III 
 
    KB #323739 INF: SQL Server 2000 Full-Text Search Deployment White Paper 
 
    SQL Mag: Part I | Part II

Related Articles

How do I build a query with optional parameters?
How do I calculate the median in a table?
How do I create a store locator feature?
How do I deal with MEMO, TEXT, HYPERLINK, and CURRENCY columns?
How do I deal with multiple resultsets from a stored procedure?
How do I debug my SQL statements?
How do I determine if a column exists in a given table?
How do I enable or disable connection pooling?
How do I enumerate through the DSNs on a machine?
How do I find a stored procedure containing <text>?
How do I get a list of Access tables and their row counts?
How do I get the latest version of the JET OLEDB drivers?
How do I handle alphabetic paging?
How do I handle BIT / BOOLEAN columns?
How do I handle error checking in a stored procedure?
How do I page through a recordset?
How do I present one-to-many relationships in my ASP page?
How do I prevent duplicates in a table?
How do I prevent my ASP pages from waiting for backend activity?
How do I prevent NULLs in my database from mucking up my HTML?
How do I protect my Access database (MDB file)?
How do I protect my stored procedure code?
How do I protect myself against the W32.Slammer worm?
How do I remove duplicates from a table?
How do I rename a column?
How do I retrieve a random record?
How do I return row numbers with my query?
How do I send a database query to a text file?
How do I simulate an array inside a stored procedure?
How do I solve 'Could not find installable ISAM' errors?
How do I solve 'Operation must use an updateable query' errors?
How do I temporarily disable a trigger?
How do I use a SELECT list alias in the WHERE or GROUP BY clause?
How do I use a variable in an ORDER BY clause?
Should I index my database table(s), and if so, how?
Should I store images in the database or the filesystem?
Should I use a #temp table or a @table variable?
Should I use a view, a stored procedure, or a user-defined function?
Should I use recordset iteration, or GetRows(), or GetString()?
What are all these dt_ stored procedures, and can I remove them?
What are the limitations of MS Access?
What are the limitations of MSDE?
What are the valid styles for converting datetime to string?
What datatype should I use for my character-based database columns?
What datatype should I use for numeric columns?
What does "ambiguous column name" mean?
What is this 'Multiple-step OLE DB' error?
What is wrong with 'SELECT *'?
What naming convention should I use in my database?
What should I choose for my primary key?
What should my connection string look like?
When should I use CreateObject to create my recordset objects?
Where can I get this 'Books Online' documentation?
Where do I get MSDE?
Which database platform should I use for my ASP application?
Which tool should I use: Enterprise Manager or Query Analyzer?
Why are there gaps in my IDENTITY / AUTOINCREMENT column?
Why can I not 'open a database created with a previous version...'?
Why can't I access a database or text file on another server?
Why can't I use the TOP keyword?
Why do I get 'Argument data type text is invalid for argument [...]'?
Why do I get 'Not enough space on temporary disk' errors?
Why does ASP give me ActiveX errors when connecting to a database?
Should I use COALESCE() or ISNULL()?
Where can I get basic info about using stored procedures?

 

 


Created: 12/7/2003 | Last Updated: 1/19/2004 | broken links | helpful | not helpful | statistics
© Copyright 2006, UBR, Inc. All Rights Reserved. (208)

 

Copyright 1999-2006, All rights reserved.
Finding content
Finding content.  An error has occured...