Added a stop-list to reduce the size of the full text search index. Fred,

populate the "stop_list" triple-quoted string with your favorite handful
of stop words.
This commit is contained in:
Tim Peters 2002-04-19 18:41:46 +00:00
parent e6b63e685b
commit 4f109c1cf9

View file

@ -1,4 +1,4 @@
'''
"""
Makes the necesary files to convert from plain html of
Python 1.5 and 1.5.x Documentation to
Microsoft HTML Help format version 1.1
@ -13,7 +13,7 @@
project, 19-Apr-2002 by Tim Peters. Assorted modifications by Tim
and Fred Drake. Obtained from Robin Dunn's .chm packaging of the
Python 2.2 docs, at <http://alldunn.com/python/>.
'''
"""
import sys
import os
@ -38,12 +38,12 @@ Usage: make_chm.py [-c] [-k] [-p] [-v 1.5[.x]] filename
# user-visible features (visible buttons, tabs, etc).
project_template = '''
[OPTIONS]
Compatibility=1.1
Compiled file=%(arch)s.chm
Contents file=%(arch)s.hhc
Default Window=%(arch)s
Default topic=index.html
Display compile progress=No
Full text search stop list file=%(arch)s.stp
Full-text search=Yes
Index file=%(arch)s.hhk
Language=0x409
@ -80,6 +80,23 @@ object_sitemap = '''
</OBJECT>
'''
# List of words the full text search facility shouldn't index. This
# becomes file ARCH.stp. Note that this list must be pretty small!
# Different versions of the MS docs claim the file has a maximum size of
# 256 or 512 bytes (including \r\n at the end of each line).
# Note that "and", "or", "not" and "near" are operators in the search
# language, so not point indexing them even if wanted to.
stop_list = '''
a an and
is
near
not
of
or
the
'''
# Library Doc list of tuples:
# each 'book' : ( Dir, Title, First page, Content page, Index page)
#
@ -335,6 +352,15 @@ def do_it(args = None) :
library = supported_libraries[ version ]
if not (('-p','') in optlist) :
fname = arch + '.stp'
f = openfile(fname)
print "Building stoplist", fname, "..."
words = stop_list.split()
words.sort()
for word in words:
print >> f, word
f.close()
f = openfile(arch + '.hhp')
print "Building Project..."
do_project(library, f, arch, version)