blender-archive/intern/python/modules/TextTools/TextTools.py

""" mxTextTools - A tools package for fast text processing.

    (c) Copyright Marc-Andre Lemburg; All Rights Reserved.
    See the documentation for further information on copyrights,
    or contact the author (mal@lemburg.com).
"""
import string,types

#
# import the C module and the version number
#
from mxTextTools import *
from mxTextTools import __version__

#
# import the symbols needed to write tag tables
#
from Constants.TagTables import *

#
# import the some handy character sets
#
from Constants.Sets import *

#
# format and print tables, taglists and joinlists:
#
def format_entry(table,i,

		 TupleType=types.TupleType):

    """ Returns a pp-formatted tag table entry as string 
    """
    e = table[i]
    jne = 0
    je = 1
    t,c,m = e[:3]
    if len(e)>3: jne = e[3]
    if len(e)>4: je = e[4]
    flags,cmd = divmod(c,256)
    c = id2cmd[cmd]
    if type(m) == TupleType and c in ('Table','SubTable'):
	m = '<table>'
    elif m == None:
	m = 'Here/To'
    else:
	m = repr(m)
	if len(m) > 17:
	    m = m[:17]+'...'
    return '%-15.15s : %-30s : jne=%+i : je=%+i' % \
	   (repr(t),'%-.15s : %s'%(c,m),jne,je)

def format_table(table,i=-1):
    
    """ Returns a pp-formatted version of the tag table as string """

    l = []
    for j in range(len(table)):
	if i == j:
	    l.append('--> '+format_entry(table,j))
	else:
	    l.append('    '+format_entry(table,j))
    return string.join(l,'\n')+'\n'

def print_tagtable(table):

    """ Print the tag table 
    """
    print format_table(table)

def print_tags(text,tags,indent=0):

    """ Print the taglist tags for text using the given indent level 
    """
    for tag,l,r,subtags in tags:
	tagname = repr(tag)
	if len(tagname) > 20:
	    tagname = tagname[:20] + '...'
	target = repr(text[l:r])
	if len(target) > 60:
	    target = target[:60] + '...'
	if subtags == None:
	    print ' '+indent*' |',tagname,': ',target,(l,r)
	else:
	    print ' '+indent*' |',tagname,': ',target,(l,r)
	    print_tags(text,subtags,indent+1)

def print_joinlist(joins,indent=0,
		   
		   StringType=types.StringType):

    """ Print the joinlist joins using the given indent level 
    """
    for j in joins:
	if type(j) == StringType:
	    text = repr(j)
	    if len(text) > 40:
		text = text[:40] + '...'
	    print ' '+indent*' |',text,' (len = %i)' % len(j)
	else:
	    text = j[0]
	    l,r = j[1:3]
	    text = repr(text[l:r])
	    if len(text) > 40:
		text = text[:40] + '...'
	    print ' '+indent*' |',text,' (len = %i)' % (r-l),(l,r)

def normlist(jlist,
		   
             StringType=types.StringType):

    """ Return a normalized joinlist.

        All tuples in the joinlist are turned into real strings.  The
        resulting list is a equivalent copy of the joinlist only
        consisting of strings.
        
    """
    l = [''] * len(jlist)
    for i in range(len(jlist)):
        entry = jlist[i]
	if type(entry) == StringType:
	    l[i] = entry
	else:
            l[i] = entry[0][entry[1]:entry[2]]
    return l

#
# aid for matching from a list of words
#
def _lookup_dict(l,index=0):
    
    d = {}
    for w in l:
	c = w[index]
	if d.has_key(c):
	    d[c].append(w)
	else:
	    d[c] = [w]
    return d

def word_in_list(l):

    """ Creates a lookup table that matches the words in l 
    """
    t = []
    d = _lookup_dict(l)
    keys = d.keys()
    if len(keys) < 18: # somewhat arbitrary bound
	# fast hint for small sets
	t.append((None,IsIn,string.join(d.keys(),'')))
	t.append((None,Skip,-1))
    # test groups
    for c, group in d.items():
	t.append(None) # hint will be filled in later
	i = len(t)-1
	for w in group:
	    t.append((None,Word,w[1:],+1,MatchOk))
	t.append((None,Fail,Here))
	# add hint
	t[i] = (None,Is,c,len(t)-i)
    t.append((None,Fail,Here))
    return tuple(t)

#
# Extra stuff useful in combination with the C functions
#

def replace(text,what,with,start=0,stop=None,

	    SearchObject=BMS,join=join,joinlist=joinlist,tag=tag,
	    string_replace=string.replace,type=type,
	    StringType=types.StringType):

    """A fast replacement for string.replace.
    
       what can be given as string or search object.

       This function is a good example for the AppendTagobj-flag usage
       (the taglist can be used directly as joinlist).
       
    """
    if type(what) == StringType:
	so = SearchObject(what)
    else:
	so = what
	what = so.match
    if stop is None:
	if start == 0 and len(what) < 2:
	    return string_replace(text,what,with)
	stop = len(text)
    t = ((text,sWordStart,so,+2),
	 # Found something, replace and continue searching
	 (with,Skip+AppendTagobj,len(what),-1,-1),
	 # Rest of text
	 (text,Move,ToEOF)
	 )
    found,taglist,last = tag(text,t,start,stop)
    if not found:
	return text
    return join(taglist)

# Alternative (usually slower) versions using different techniques:

def _replace2(text,what,with,start=0,stop=None,

	      join=join,joinlist=joinlist,tag=tag,
	      StringType=types.StringType,BMS=BMS):

    """Analogon to string.replace; returns a string with all occurences
       of what in text[start:stop] replaced by with
       - uses a one entry tag-table and a Boyer-Moore-Search-object
       - what can be a string or a BMS/FS search object
       - it's faster than string.replace in those cases, where
	 the what-string gets long and/or many replacements are found;
	 faster meaning from a few percent up to many times as fast
       - start and stop define the slice of text to work in
       - stop defaults to len(text)
    """
    if stop is None:
	stop = len(text)
    if type(what) == StringType:
	what=BMS(what)
    t = ((with,sFindWord,what,+1,+0),)
    found,taglist,last = tag(text,t,start,stop)
    if not found: 
	return text
    return join(joinlist(text,taglist))

def _replace3(text,what,with,

	      join=string.join,FS=FS,
	      StringType=types.StringType):

    if type(what) == StringType:
	what=FS(what)
    slices = what.findall(text)
    if not slices:
	return text
    l = []
    x = 0
    for left,right in slices:
	l.append(text[x:left] + with)
	x = right
    l.append(text[x:])
    return join(l,'')

def _replace4(text,what,with,

	      join=join,joinlist=joinlist,tag=tag,FS=FS,
	      StringType=types.StringType):

    if type(what) == StringType:
	what=FS(what)
    slices = what.findall(text)
    if not slices:
	return text
    repl = [None]*len(slices)
    for i in range(len(slices)):
	repl[i] = (with,)+slices[i]
    return join(joinlist(text,repl))


def find(text,what,start=0,stop=None,

	 SearchObject=FS):

    """ A faster replacement for string.find().

        Uses a search object for the task. Returns the position of the
        first occurance of what in text[start:stop]. stop defaults to
        len(text).  Returns -1 in case no occurance was found.
        
    """
    if stop:
	return SearchObject(what).find(text,start,stop)
    else:
	return SearchObject(what).find(text,start)

def findall(text,what,start=0,stop=None,

	    SearchObject=FS):

    """ Find all occurances of what in text.

        Uses a search object for the task. Returns a list of slice
        tuples (l,r) marking the all occurances in
        text[start:stop]. stop defaults to len(text).  Returns an
        empty list in case no occurance was found.
        
    """
    if stop:
	return SearchObject(what).findall(text,start,stop)
    else:
	return SearchObject(what).findall(text,start)

def split(text,sep,start=0,stop=None,translate=None,

	  SearchObject=FS):

    """ A faster replacement for string.split().

        Uses a search object for the task. Returns the result of
        cutting the text[start:stop] string into snippets at every sep
        occurance in form of a list of substrings. translate is passed
        to the search object as translation string.

	XXX convert to a C function... or even better, add as method
	to search objects.

    """
    if translate:
	so = SearchObject(sep,translate)
    else:
	so = SearchObject(sep)
    if stop:
	cuts = so.findall(text,start,stop)
    else:
	cuts = so.findall(text,start)
    l = 0
    list = []
    append = list.append
    for left,right in cuts:
	append(text[l:left])
	l = right
    append(text[l:])
    return list

# helper for tagdict
def _tagdict(text,dict,prefix,taglist):

    for o,l,r,s in taglist:
	pfx = prefix + str(o)
	dict[pfx] = text[l:r]
	if s:
	    _tagdict(text,dict,pfx+'.',s)

def tagdict(text,*args):

    """ Tag a text just like the function tag() and then convert
        its output into a dictionary where the tagobjects reference
	their respective strings
	- this function emulates the interface of tag()
	- in contrast to tag() this funtion *does* make copies
	  of the found stings
        - returns a tuple (rc,tagdict,next) with the same meaning
	  of rc and next as tag(); tagdict is the new dictionary - 
	  None in case rc is 0
    """
    rc,taglist,next = apply(tag,(text,)+args)
    if not rc:
	return (rc,None,next)
    d = {}
    tagdict = _tagdict
    for o,l,r,s in taglist:
	pfx = str(o)
	d[pfx] = text[l:r]
	if s:
	    tagdict(text,dict,pfx+'.',s)
    return (rc,d,next)

def invset(chars):
    
    """ Return a set with all characters *except* the ones in chars.
    """
    return set(chars,0)

def is_whitespace(text,start=0,stop=None,

		  nonwhitespace=nonwhitespace_set,setfind=setfind):

    """ Return 1 iff text[start:stop] only contains whitespace
        characters (as defined in Constants/Sets.py), 0 otherwise.
    """
    if stop is None:
	stop = len(text)
    i = setfind(text,nonwhitespace,start,stop)
    return (i < 0)

def collapse(text,seperator=' ',

	     join=join,setsplit=setsplit,collapse_set=set(newline+whitespace)):

    """ Eliminates newline characters and compresses whitespace
        characters into one space.

        The result is a one line text string. Tim Peters will like
        this function called with '-' seperator ;-)
        
    """
    return join(setsplit(text,collapse_set),seperator)

_linesplit_table = (
    (None,Is,'\r',+1),
    (None,Is,'\n',+1),
    ('line',AllInSet+AppendMatch,set('\r\n',0),+1,-2),
    (None,EOF,Here,+1,MatchOk),
    ('empty line',Skip+AppendMatch,0,0,-4),
    )

def splitlines(text,

               tag=tag,linesplit_table=_linesplit_table):

    """ Split text into a list of single lines.

        The following combinations are considered to be line-ends:
        '\r', '\r\n', '\n'; they may be used in any combination.  The
        line-end indicators are removed from the strings prior to
        adding them to the list.

        This function allows dealing with text files from Macs, PCs
        and Unix origins in a portable way.
        
    """
    return tag(text,linesplit_table)[1]

_linecount_table = (
    (None,Is,'\r',+1),
    (None,Is,'\n',+1),
    ('line',AllInSet+AppendTagobj,set('\r\n',0),+1,-2),
    (None,EOF,Here,+1,MatchOk),
    ('empty line',Skip+AppendTagobj,0,0,-4),
    )

def countlines(text,

               linecount_table=_linecount_table):

    """ Returns the number of lines in text.

        Line ends are treated just like for splitlines() in a
        portable way.
    """
    return len(tag(text,linecount_table)[1])

_wordsplit_table = (
    (None,AllInSet,whitespace_set,+1),
    ('word',AllInSet+AppendMatch,nonwhitespace_set,+1,-1),
    (None,EOF,Here,+1,MatchOk),
    )

def splitwords(text,

               setsplit=setsplit,whitespace_set=whitespace_set):

    """ Split text into a list of single words.

        Words are separated by whitespace. The whitespace is stripped
	before adding the words to the list.
        
    """
    return setsplit(text,whitespace_set)

#
# Testing and benchmarking
#

# Taken from my hack.py module:
import time
class _timer:

    """ timer class with a quite obvious interface
	- .start() starts a fairly accurate CPU-time timer plus an
	  absolute timer
	- .stop() stops the timer and returns a tuple: the CPU-time in seconds
	  and the absolute time elapsed since .start() was called
    """

    utime = 0
    atime = 0

    def start(self,
	      clock=time.clock,time=time.time):
	self.atime = time()
	self.utime = clock()

    def stop(self,
	     clock=time.clock,time=time.time):
	self.utime = clock() - self.utime
	self.atime = time() - self.atime
	return self.utime,self.atime

    def usertime(self,
		 clock=time.clock,time=time.time):
	self.utime = clock() - self.utime
	self.atime = time() - self.atime
	return self.utime

    def abstime(self,
		clock=time.clock,time=time.time):
	self.utime = clock() - self.utime
	self.atime = time() - self.atime
	return self.utime

    def __str__(self):

	return '%0.2fu %0.2fa sec.' % (self.utime,self.atime)

def _bench(file='mxTextTools/mxTextTools.c'):

    def mismatch(orig,new):
	print
	for i in range(len(orig)):
	    if orig[i] != new[i]:
		break
	else:
	    print 'Length mismatch: orig=%i new=%i' % (len(orig),len(new))
	    if len(orig) > len(new):
		print 'Missing chars:'+repr(orig[len(new):])
	    else:
		print 'Excess chars:'+repr(new[len(orig):])
	    print
	    return
	print 'Mismatch at offset %i:' % i
	print (orig[i-100:i] 
	       + '<- %s != %s ->' % (repr(orig[i]),repr(new[i]))
	       + orig[i+1:i+100])
	print
	
    text = open(file).read()
    import string

    t = _timer()
    print 'Working on a %i byte string' % len(text)

    if 0:
	print
	print 'Replacing strings'
	print '-'*72
	print
	for what,with in (('m','M'),('mx','MX'),('mxText','MXTEXT'),
			  ('hmm','HMM'),('hmmm','HMM'),('hmhmm','HMM')):
	    print 'Replace "%s" with "%s"' % (what,with)
	    t.start()
	    for i in range(100):
		rtext = string.replace(text,what,with)
	    print 'with string.replace:',t.stop(),'sec.'
	    t.start()
	    for i in range(100):
		ttext = replace(text,what,with)
	    print 'with tag.replace:',t.stop(),'sec.'
	    if ttext != rtext:
		print 'results are NOT ok !'
		print '-'*72
		mismatch(rtext,ttext)
	    t.start()
	    for i in range(100):
		ttext = _replace2(text,what,with)
	    print 'with tag._replace2:',t.stop(),'sec.'
	    if ttext != rtext:
		print 'results are NOT ok !'
		print '-'*72
		print rtext
	    t.start()
	    for i in range(100):
		ttext = _replace3(text,what,with)
	    print 'with tag._replace3:',t.stop(),'sec.'
	    if ttext != rtext:
		print 'results are NOT ok !'
		print '-'*72
		print rtext
	    t.start()
	    for i in range(100):
		ttext = _replace4(text,what,with)
	    print 'with tag._replace4:',t.stop(),'sec.'
	    if ttext != rtext:
		print 'results are NOT ok !'
		print '-'*72
		print rtext
	    print

    if 0:
	print
	print 'String lower/upper'
	print '-'*72
	print

	op = string.lower
	t.start()
	for i in range(1000):
	    op(text)
	t.stop()
	print ' string.lower:',t

	op = string.upper
	t.start()
	for i in range(1000):
	    op(text)
	t.stop()
	print ' string.upper:',t

	op = upper
	t.start()
	for i in range(1000):
	    op(text)
	t.stop()
	print ' TextTools.upper:',t

	op = lower
	t.start()
	for i in range(1000):
	    op(text)
	t.stop()
	print ' TextTools.lower:',t

	print 'Testing...',
	ltext = string.lower(text)
	assert ltext == lower(text)
	utext = string.upper(text)
	assert utext == upper(text)
	print 'ok.'

    if 0:
	print
	print 'Joining lists'
	print '-'*72
	print

	l = setsplit(text,whitespace_set)

	op = string.join
	t.start()
	for i in range(1000):
	    op(l)
	t.stop()
	print ' string.join:',t

	op = join
	t.start()
	for i in range(1000):
	    op(l)
	t.stop()
	print ' TextTools.join:',t

	op = string.join
	t.start()
	for i in range(1000):
	    op(l,' ')
	t.stop()
	print ' string.join with seperator:',t

	op = join
	t.start()
	for i in range(1000):
	    op(l,' ')
	t.stop()
	print ' TextTools.join with seperator:',t

    if 0:
	print
	print 'Creating join lists'
	print '-'*72
	print

	repl = []
	for i in range(0,len(text),10):
	    repl.append(str(i),i,i+1)

	op = joinlist
	t.start()
	for i in range(1000):
	    op(text,repl)
	t.stop()
	print ' TextTools.joinlist:',t

    if 0:
	print
	print 'Splitting text'
	print '-'*72
	print

	op = string.split
	t.start()
	for i in range(100):
	    op(text)
	t.stop()
	print ' string.split whitespace:',t,'(',len(op(text)),'snippets )'

	op = setsplit
	ws = whitespace_set
	t.start()
	for i in range(100):
	    op(text,ws)
	t.stop()
	print ' TextTools.setsplit whitespace:',t,'(',len(op(text,ws)),'snippets )'

	assert string.split(text) == setsplit(text,ws)

	op = string.split
	sep = 'a'
	t.start()
	for i in range(100):
	    op(text,sep)
	t.stop()
	print ' string.split at "a":',t,'(',len(op(text,sep)),'snippets )'

	op = split
	sep = 'a'
	t.start()
	for i in range(100):
	    op(text,sep)
	t.stop()
	print ' TextTools.split at "a":',t,'(',len(op(text,sep)),'snippets )'

	op = charsplit
	sep = 'a'
	t.start()
	for i in range(100):
	    op(text,sep)
	t.stop()
	print ' TextTools.charsplit at "a":',t,'(',len(op(text,sep)),'snippets )'

	op = setsplit
        sep = set('a')
	t.start()
	for i in range(100):
	    op(text,sep)
	t.stop()
	print ' TextTools.setsplit at "a":',t,'(',len(op(text,sep)),'snippets )'

	# Note: string.split and setsplit don't work identically !

	op = string.split
	sep = 'int'
	t.start()
	for i in range(100):
	    op(text,sep)
	t.stop()
	print ' string.split at "int":',t,'(',len(op(text,sep)),'snippets )'

	op = split
	sep = 'int'
	t.start()
	for i in range(100):
	    op(text,sep)
	t.stop()
	print ' TextTools.split at "int":',t,'(',len(op(text,sep)),'snippets )'

	op = setsplit
        sep = set('int')
	t.start()
	for i in range(100):
	    op(text,sep)
	t.stop()
	print ' TextTools.setsplit at "i", "n", "t":',t,'(',len(op(text,sep)),'snippets )'

	op = string.split
	sep = 'register'
	t.start()
	for i in range(100):
	    op(text,sep)
	t.stop()
	print ' string.split at "register":',t,'(',len(op(text,sep)),'snippets )'

	op = split
	sep = 'register'
	t.start()
	for i in range(100):
	    op(text,sep)
	t.stop()
	print ' TextTools.split at "register":',t,'(',len(op(text,sep)),'snippets )'

if __name__=='__main__':
    _bench()
Initial revision 2002-10-12 11:37:38 +00:00			`""" mxTextTools - A tools package for fast text processing.`

			`(c) Copyright Marc-Andre Lemburg; All Rights Reserved.`
			`See the documentation for further information on copyrights,`
			`or contact the author (mal@lemburg.com).`
			`"""`
			`import string,types`

			`#`
			`# import the C module and the version number`
			`#`
			`from mxTextTools import *`
Revert to the FUTURE_PYTHON_API, this means: - switching the define on in buildsystems (NaN, auto and msvc are done) - again import _Blender, which is the C module, from the Python modules 2003-02-06 03:30:25 +00:00			`from mxTextTools import __version__`
Initial revision 2002-10-12 11:37:38 +00:00
			`#`
			`# import the symbols needed to write tag tables`
			`#`
			`from Constants.TagTables import *`

			`#`
			`# import the some handy character sets`
			`#`
			`from Constants.Sets import *`

			`#`
			`# format and print tables, taglists and joinlists:`
			`#`
			`def format_entry(table,i,`

			`TupleType=types.TupleType):`

			`""" Returns a pp-formatted tag table entry as string`
			`"""`
			`e = table[i]`
			`jne = 0`
			`je = 1`
			`t,c,m = e[:3]`
			`if len(e)>3: jne = e[3]`
			`if len(e)>4: je = e[4]`
			`flags,cmd = divmod(c,256)`
			`c = id2cmd[cmd]`
			`if type(m) == TupleType and c in ('Table','SubTable'):`
			`m = '<table>'`
			`elif m == None:`
			`m = 'Here/To'`
			`else:`
			`m = repr(m)`
			`if len(m) > 17:`
			`m = m[:17]+'...'`
			`return '%-15.15s : %-30s : jne=%+i : je=%+i' % \`
			`(repr(t),'%-.15s : %s'%(c,m),jne,je)`

			`def format_table(table,i=-1):`

			`""" Returns a pp-formatted version of the tag table as string """`

			`l = []`
			`for j in range(len(table)):`
			`if i == j:`
			`l.append('--> '+format_entry(table,j))`
			`else:`
			`l.append(' '+format_entry(table,j))`
			`return string.join(l,'\n')+'\n'`

			`def print_tagtable(table):`

			`""" Print the tag table`
			`"""`
			`print format_table(table)`

			`def print_tags(text,tags,indent=0):`

			`""" Print the taglist tags for text using the given indent level`
			`"""`
			`for tag,l,r,subtags in tags:`
			`tagname = repr(tag)`
			`if len(tagname) > 20:`
			`tagname = tagname[:20] + '...'`
			`target = repr(text[l:r])`
			`if len(target) > 60:`
			`target = target[:60] + '...'`
			`if subtags == None:`
			`print ' '+indent*' \|',tagname,': ',target,(l,r)`
			`else:`
			`print ' '+indent*' \|',tagname,': ',target,(l,r)`
			`print_tags(text,subtags,indent+1)`

			`def print_joinlist(joins,indent=0,`

			`StringType=types.StringType):`

			`""" Print the joinlist joins using the given indent level`
			`"""`
			`for j in joins:`
			`if type(j) == StringType:`
			`text = repr(j)`
			`if len(text) > 40:`
			`text = text[:40] + '...'`
			`print ' '+indent*' \|',text,' (len = %i)' % len(j)`
			`else:`
			`text = j[0]`
			`l,r = j[1:3]`
			`text = repr(text[l:r])`
			`if len(text) > 40:`
			`text = text[:40] + '...'`
			`print ' '+indent*' \|',text,' (len = %i)' % (r-l),(l,r)`

			`def normlist(jlist,`

			`StringType=types.StringType):`

			`""" Return a normalized joinlist.`

			`All tuples in the joinlist are turned into real strings. The`
			`resulting list is a equivalent copy of the joinlist only`
			`consisting of strings.`

			`"""`
			`l = [''] * len(jlist)`
			`for i in range(len(jlist)):`
			`entry = jlist[i]`
			`if type(entry) == StringType:`
			`l[i] = entry`
			`else:`
			`l[i] = entry[0][entry[1]:entry[2]]`
			`return l`

			`#`
			`# aid for matching from a list of words`
			`#`
			`def _lookup_dict(l,index=0):`

			`d = {}`
			`for w in l:`
			`c = w[index]`
			`if d.has_key(c):`
			`d[c].append(w)`
			`else:`
			`d[c] = [w]`
			`return d`

			`def word_in_list(l):`

			`""" Creates a lookup table that matches the words in l`
			`"""`
			`t = []`
			`d = _lookup_dict(l)`
			`keys = d.keys()`
			`if len(keys) < 18: # somewhat arbitrary bound`
			`# fast hint for small sets`
			`t.append((None,IsIn,string.join(d.keys(),'')))`
			`t.append((None,Skip,-1))`
			`# test groups`
			`for c, group in d.items():`
			`t.append(None) # hint will be filled in later`
			`i = len(t)-1`
			`for w in group:`
			`t.append((None,Word,w[1:],+1,MatchOk))`
			`t.append((None,Fail,Here))`
			`# add hint`
			`t[i] = (None,Is,c,len(t)-i)`
			`t.append((None,Fail,Here))`
			`return tuple(t)`

			`#`
			`# Extra stuff useful in combination with the C functions`
			`#`

			`def replace(text,what,with,start=0,stop=None,`

			`SearchObject=BMS,join=join,joinlist=joinlist,tag=tag,`
			`string_replace=string.replace,type=type,`
			`StringType=types.StringType):`

			`"""A fast replacement for string.replace.`

			`what can be given as string or search object.`

			`This function is a good example for the AppendTagobj-flag usage`
			`(the taglist can be used directly as joinlist).`

			`"""`
			`if type(what) == StringType:`
			`so = SearchObject(what)`
			`else:`
			`so = what`
			`what = so.match`
			`if stop is None:`
			`if start == 0 and len(what) < 2:`
			`return string_replace(text,what,with)`
			`stop = len(text)`
			`t = ((text,sWordStart,so,+2),`
			`# Found something, replace and continue searching`
			`(with,Skip+AppendTagobj,len(what),-1,-1),`
			`# Rest of text`
			`(text,Move,ToEOF)`
			`)`
			`found,taglist,last = tag(text,t,start,stop)`
			`if not found:`
			`return text`
			`return join(taglist)`

			`# Alternative (usually slower) versions using different techniques:`

			`def _replace2(text,what,with,start=0,stop=None,`

			`join=join,joinlist=joinlist,tag=tag,`
			`StringType=types.StringType,BMS=BMS):`

			`"""Analogon to string.replace; returns a string with all occurences`
			`of what in text[start:stop] replaced by with`
			`- uses a one entry tag-table and a Boyer-Moore-Search-object`
			`- what can be a string or a BMS/FS search object`
			`- it's faster than string.replace in those cases, where`
			`the what-string gets long and/or many replacements are found;`
			`faster meaning from a few percent up to many times as fast`
			`- start and stop define the slice of text to work in`
			`- stop defaults to len(text)`
			`"""`
			`if stop is None:`
			`stop = len(text)`
			`if type(what) == StringType:`
			`what=BMS(what)`
			`t = ((with,sFindWord,what,+1,+0),)`
			`found,taglist,last = tag(text,t,start,stop)`
			`if not found:`
			`return text`
			`return join(joinlist(text,taglist))`

			`def _replace3(text,what,with,`

			`join=string.join,FS=FS,`
			`StringType=types.StringType):`

			`if type(what) == StringType:`
			`what=FS(what)`
			`slices = what.findall(text)`
			`if not slices:`
			`return text`
			`l = []`
			`x = 0`
			`for left,right in slices:`
			`l.append(text[x:left] + with)`
			`x = right`
			`l.append(text[x:])`
			`return join(l,'')`

			`def _replace4(text,what,with,`

			`join=join,joinlist=joinlist,tag=tag,FS=FS,`
			`StringType=types.StringType):`

			`if type(what) == StringType:`
			`what=FS(what)`
			`slices = what.findall(text)`
			`if not slices:`
			`return text`
			`repl = [None]*len(slices)`
			`for i in range(len(slices)):`
			`repl[i] = (with,)+slices[i]`
			`return join(joinlist(text,repl))`


			`def find(text,what,start=0,stop=None,`

			`SearchObject=FS):`

			`""" A faster replacement for string.find().`

			`Uses a search object for the task. Returns the position of the`
			`first occurance of what in text[start:stop]. stop defaults to`
			`len(text). Returns -1 in case no occurance was found.`

			`"""`
			`if stop:`
			`return SearchObject(what).find(text,start,stop)`
			`else:`
			`return SearchObject(what).find(text,start)`

			`def findall(text,what,start=0,stop=None,`

			`SearchObject=FS):`

			`""" Find all occurances of what in text.`

			`Uses a search object for the task. Returns a list of slice`
			`tuples (l,r) marking the all occurances in`
			`text[start:stop]. stop defaults to len(text). Returns an`
			`empty list in case no occurance was found.`

			`"""`
			`if stop:`
			`return SearchObject(what).findall(text,start,stop)`
			`else:`
			`return SearchObject(what).findall(text,start)`

			`def split(text,sep,start=0,stop=None,translate=None,`

			`SearchObject=FS):`

			`""" A faster replacement for string.split().`

			`Uses a search object for the task. Returns the result of`
			`cutting the text[start:stop] string into snippets at every sep`
			`occurance in form of a list of substrings. translate is passed`
			`to the search object as translation string.`

			`XXX convert to a C function... or even better, add as method`
			`to search objects.`

			`"""`
			`if translate:`
			`so = SearchObject(sep,translate)`
			`else:`
			`so = SearchObject(sep)`
			`if stop:`
			`cuts = so.findall(text,start,stop)`
			`else:`
			`cuts = so.findall(text,start)`
			`l = 0`
			`list = []`
			`append = list.append`
			`for left,right in cuts:`
			`append(text[l:left])`
			`l = right`
			`append(text[l:])`
			`return list`

			`# helper for tagdict`
			`def _tagdict(text,dict,prefix,taglist):`

			`for o,l,r,s in taglist:`
			`pfx = prefix + str(o)`
			`dict[pfx] = text[l:r]`
			`if s:`
			`_tagdict(text,dict,pfx+'.',s)`

			`def tagdict(text,*args):`

			`""" Tag a text just like the function tag() and then convert`
			`its output into a dictionary where the tagobjects reference`
			`their respective strings`
			`- this function emulates the interface of tag()`
			`- in contrast to tag() this funtion does make copies`
			`of the found stings`
			`- returns a tuple (rc,tagdict,next) with the same meaning`
			`of rc and next as tag(); tagdict is the new dictionary -`
			`None in case rc is 0`
			`"""`
			`rc,taglist,next = apply(tag,(text,)+args)`
			`if not rc:`
			`return (rc,None,next)`
			`d = {}`
			`tagdict = _tagdict`
			`for o,l,r,s in taglist:`
			`pfx = str(o)`
			`d[pfx] = text[l:r]`
			`if s:`
			`tagdict(text,dict,pfx+'.',s)`
			`return (rc,d,next)`

			`def invset(chars):`

			`""" Return a set with all characters except the ones in chars.`
			`"""`
			`return set(chars,0)`

			`def is_whitespace(text,start=0,stop=None,`

			`nonwhitespace=nonwhitespace_set,setfind=setfind):`

			`""" Return 1 iff text[start:stop] only contains whitespace`
			`characters (as defined in Constants/Sets.py), 0 otherwise.`
			`"""`
			`if stop is None:`
			`stop = len(text)`
			`i = setfind(text,nonwhitespace,start,stop)`
			`return (i < 0)`

			`def collapse(text,seperator=' ',`

			`join=join,setsplit=setsplit,collapse_set=set(newline+whitespace)):`

			`""" Eliminates newline characters and compresses whitespace`
			`characters into one space.`

			`The result is a one line text string. Tim Peters will like`
			`this function called with '-' seperator ;-)`

			`"""`
			`return join(setsplit(text,collapse_set),seperator)`

			`_linesplit_table = (`
			`(None,Is,'\r',+1),`
			`(None,Is,'\n',+1),`
			`('line',AllInSet+AppendMatch,set('\r\n',0),+1,-2),`
			`(None,EOF,Here,+1,MatchOk),`
			`('empty line',Skip+AppendMatch,0,0,-4),`
			`)`

			`def splitlines(text,`

			`tag=tag,linesplit_table=_linesplit_table):`

			`""" Split text into a list of single lines.`

			`The following combinations are considered to be line-ends:`
			`'\r', '\r\n', '\n'; they may be used in any combination. The`
			`line-end indicators are removed from the strings prior to`
			`adding them to the list.`

			`This function allows dealing with text files from Macs, PCs`
			`and Unix origins in a portable way.`

			`"""`
			`return tag(text,linesplit_table)[1]`

			`_linecount_table = (`
			`(None,Is,'\r',+1),`
			`(None,Is,'\n',+1),`
			`('line',AllInSet+AppendTagobj,set('\r\n',0),+1,-2),`
			`(None,EOF,Here,+1,MatchOk),`
			`('empty line',Skip+AppendTagobj,0,0,-4),`
			`)`

			`def countlines(text,`

			`linecount_table=_linecount_table):`

			`""" Returns the number of lines in text.`

			`Line ends are treated just like for splitlines() in a`
			`portable way.`
			`"""`
			`return len(tag(text,linecount_table)[1])`

			`_wordsplit_table = (`
			`(None,AllInSet,whitespace_set,+1),`
			`('word',AllInSet+AppendMatch,nonwhitespace_set,+1,-1),`
			`(None,EOF,Here,+1,MatchOk),`
			`)`

			`def splitwords(text,`

			`setsplit=setsplit,whitespace_set=whitespace_set):`

			`""" Split text into a list of single words.`

			`Words are separated by whitespace. The whitespace is stripped`
			`before adding the words to the list.`

			`"""`
			`return setsplit(text,whitespace_set)`

			`#`
			`# Testing and benchmarking`
			`#`

			`# Taken from my hack.py module:`
			`import time`
			`class _timer:`

			`""" timer class with a quite obvious interface`
			`- .start() starts a fairly accurate CPU-time timer plus an`
			`absolute timer`
			`- .stop() stops the timer and returns a tuple: the CPU-time in seconds`
			`and the absolute time elapsed since .start() was called`
			`"""`

			`utime = 0`
			`atime = 0`

			`def start(self,`
			`clock=time.clock,time=time.time):`
			`self.atime = time()`
			`self.utime = clock()`

			`def stop(self,`
			`clock=time.clock,time=time.time):`
			`self.utime = clock() - self.utime`
			`self.atime = time() - self.atime`
			`return self.utime,self.atime`

			`def usertime(self,`
			`clock=time.clock,time=time.time):`
			`self.utime = clock() - self.utime`
			`self.atime = time() - self.atime`
			`return self.utime`

			`def abstime(self,`
			`clock=time.clock,time=time.time):`
			`self.utime = clock() - self.utime`
			`self.atime = time() - self.atime`
			`return self.utime`

			`def __str__(self):`

			`return '%0.2fu %0.2fa sec.' % (self.utime,self.atime)`

			`def _bench(file='mxTextTools/mxTextTools.c'):`

			`def mismatch(orig,new):`
			`print`
			`for i in range(len(orig)):`
			`if orig[i] != new[i]:`
			`break`
			`else:`
			`print 'Length mismatch: orig=%i new=%i' % (len(orig),len(new))`
			`if len(orig) > len(new):`
			`print 'Missing chars:'+repr(orig[len(new):])`
			`else:`
			`print 'Excess chars:'+repr(new[len(orig):])`
			`print`
			`return`
			`print 'Mismatch at offset %i:' % i`
			`print (orig[i-100:i]`
			`+ '<- %s != %s ->' % (repr(orig[i]),repr(new[i]))`
			`+ orig[i+1:i+100])`
			`print`

			`text = open(file).read()`
			`import string`

			`t = _timer()`
			`print 'Working on a %i byte string' % len(text)`

			`if 0:`
			`print`
			`print 'Replacing strings'`
			`print '-'*72`
			`print`
			`for what,with in (('m','M'),('mx','MX'),('mxText','MXTEXT'),`
			`('hmm','HMM'),('hmmm','HMM'),('hmhmm','HMM')):`
			`print 'Replace "%s" with "%s"' % (what,with)`
			`t.start()`
			`for i in range(100):`
			`rtext = string.replace(text,what,with)`
			`print 'with string.replace:',t.stop(),'sec.'`
			`t.start()`
			`for i in range(100):`
			`ttext = replace(text,what,with)`
			`print 'with tag.replace:',t.stop(),'sec.'`
			`if ttext != rtext:`
			`print 'results are NOT ok !'`
			`print '-'*72`
			`mismatch(rtext,ttext)`
			`t.start()`
			`for i in range(100):`
			`ttext = _replace2(text,what,with)`
			`print 'with tag._replace2:',t.stop(),'sec.'`
			`if ttext != rtext:`
			`print 'results are NOT ok !'`
			`print '-'*72`
			`print rtext`
			`t.start()`
			`for i in range(100):`
			`ttext = _replace3(text,what,with)`
			`print 'with tag._replace3:',t.stop(),'sec.'`
			`if ttext != rtext:`
			`print 'results are NOT ok !'`
			`print '-'*72`
			`print rtext`
			`t.start()`
			`for i in range(100):`
			`ttext = _replace4(text,what,with)`
			`print 'with tag._replace4:',t.stop(),'sec.'`
			`if ttext != rtext:`
			`print 'results are NOT ok !'`
			`print '-'*72`
			`print rtext`
			`print`

			`if 0:`
			`print`
			`print 'String lower/upper'`
			`print '-'*72`
			`print`

			`op = string.lower`
			`t.start()`
			`for i in range(1000):`
			`op(text)`
			`t.stop()`
			`print ' string.lower:',t`

			`op = string.upper`
			`t.start()`
			`for i in range(1000):`
			`op(text)`
			`t.stop()`
			`print ' string.upper:',t`

			`op = upper`
			`t.start()`
			`for i in range(1000):`
			`op(text)`
			`t.stop()`
			`print ' TextTools.upper:',t`

			`op = lower`
			`t.start()`
			`for i in range(1000):`
			`op(text)`
			`t.stop()`
			`print ' TextTools.lower:',t`

			`print 'Testing...',`
			`ltext = string.lower(text)`
			`assert ltext == lower(text)`
			`utext = string.upper(text)`
			`assert utext == upper(text)`
			`print 'ok.'`

			`if 0:`
			`print`
			`print 'Joining lists'`
			`print '-'*72`
			`print`

			`l = setsplit(text,whitespace_set)`

			`op = string.join`
			`t.start()`
			`for i in range(1000):`
			`op(l)`
			`t.stop()`
			`print ' string.join:',t`

			`op = join`
			`t.start()`
			`for i in range(1000):`
			`op(l)`
			`t.stop()`
			`print ' TextTools.join:',t`

			`op = string.join`
			`t.start()`
			`for i in range(1000):`
			`op(l,' ')`
			`t.stop()`
			`print ' string.join with seperator:',t`

			`op = join`
			`t.start()`
			`for i in range(1000):`
			`op(l,' ')`
			`t.stop()`
			`print ' TextTools.join with seperator:',t`

			`if 0:`
			`print`
			`print 'Creating join lists'`
			`print '-'*72`
			`print`

			`repl = []`
			`for i in range(0,len(text),10):`
			`repl.append(str(i),i,i+1)`

			`op = joinlist`
			`t.start()`
			`for i in range(1000):`
			`op(text,repl)`
			`t.stop()`
			`print ' TextTools.joinlist:',t`

			`if 0:`
			`print`
			`print 'Splitting text'`
			`print '-'*72`
			`print`

			`op = string.split`
			`t.start()`
			`for i in range(100):`
			`op(text)`
			`t.stop()`
			`print ' string.split whitespace:',t,'(',len(op(text)),'snippets )'`

			`op = setsplit`
			`ws = whitespace_set`
			`t.start()`
			`for i in range(100):`
			`op(text,ws)`
			`t.stop()`
			`print ' TextTools.setsplit whitespace:',t,'(',len(op(text,ws)),'snippets )'`

			`assert string.split(text) == setsplit(text,ws)`

			`op = string.split`
			`sep = 'a'`
			`t.start()`
			`for i in range(100):`
			`op(text,sep)`
			`t.stop()`
			`print ' string.split at "a":',t,'(',len(op(text,sep)),'snippets )'`

			`op = split`
			`sep = 'a'`
			`t.start()`
			`for i in range(100):`
			`op(text,sep)`
			`t.stop()`
			`print ' TextTools.split at "a":',t,'(',len(op(text,sep)),'snippets )'`

			`op = charsplit`
			`sep = 'a'`
			`t.start()`
			`for i in range(100):`
			`op(text,sep)`
			`t.stop()`
			`print ' TextTools.charsplit at "a":',t,'(',len(op(text,sep)),'snippets )'`

			`op = setsplit`
			`sep = set('a')`
			`t.start()`
			`for i in range(100):`
			`op(text,sep)`
			`t.stop()`
			`print ' TextTools.setsplit at "a":',t,'(',len(op(text,sep)),'snippets )'`

			`# Note: string.split and setsplit don't work identically !`

			`op = string.split`
			`sep = 'int'`
			`t.start()`
			`for i in range(100):`
			`op(text,sep)`
			`t.stop()`
			`print ' string.split at "int":',t,'(',len(op(text,sep)),'snippets )'`

			`op = split`
			`sep = 'int'`
			`t.start()`
			`for i in range(100):`
			`op(text,sep)`
			`t.stop()`
			`print ' TextTools.split at "int":',t,'(',len(op(text,sep)),'snippets )'`

			`op = setsplit`
			`sep = set('int')`
			`t.start()`
			`for i in range(100):`
			`op(text,sep)`
			`t.stop()`
			`print ' TextTools.setsplit at "i", "n", "t":',t,'(',len(op(text,sep)),'snippets )'`

			`op = string.split`
			`sep = 'register'`
			`t.start()`
			`for i in range(100):`
			`op(text,sep)`
			`t.stop()`
			`print ' string.split at "register":',t,'(',len(op(text,sep)),'snippets )'`

			`op = split`
			`sep = 'register'`
			`t.start()`
			`for i in range(100):`
			`op(text,sep)`
			`t.stop()`
			`print ' TextTools.split at "register":',t,'(',len(op(text,sep)),'snippets )'`

			`if __name__=='__main__':`
			`_bench()`