eentzel · chadpaulson · Aug 13, 2013 · Aug 15, 2013 · Sep 12, 2013 · Sep 12, 2013
diff --git a/README.md b/README.md
@@ -1,12 +1,21 @@
-[![Build Status](https://travis-ci.org/eentzel/htmltruncate.py.png)](https://travis-ci.org/eentzel/htmltruncate.py)
+## htmltruncate
 
-A module to truncate strings containing HTML.
+[![Build Status](https://travis-ci.org/chadpaulson/htmltruncate.png)](https://travis-ci.org/chadpaulson/htmltruncate.py)
+
+Returns a truncated string while preserving HTML markup (which does not count towards length). All tags left open by truncation are closed.
+
+**Example**:
+
+```python
+>>> import htmltruncate
+>>> str = "<p>You're not gonna lose the house, <b>everybody</b> has three mortgages nowadays.</p>"
+>>> htmltruncate.truncate(str, 33)
+"<p>You're not gonna lose the house, </p>"
+```
+
+**Options**:
 
 ```python
-htmltruncate.truncate(str, target_len, ellipsis='')
+>>> htmltruncate.truncate(str, 33, full_word=True, ellipsis="...")
+"<p>You're not gonna lose the house, <b>everybody</b></p>..."
 ```
-    Returns a copy of str truncated to target_len characters,
-    preserving HTML markup (which does not count towards the length).
-    Any tags that would be left open by truncation will be closed at
-    the end of the returned string.  Optionally append ellipsis if
-    the string was truncated.
diff --git a/htmltruncate.py b/htmltruncate.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 
-from __future__ import print_function
 import sys
 
 
@@ -16,14 +15,14 @@ def __init__(self, tag, rest=''):
 
     def as_string(self):
         return '<' + self.tag + self.rest + '>'
-        
+
 class CloseTag(OpenTag):
     def as_string(self):
         return '</' + self.tag + '>'
 
 class SelfClosingTag(OpenTag):
     pass
-    
+
 class Tokenizer:
     def __init__(self, input):
         self.input = input
@@ -32,7 +31,7 @@ def __init__(self, input):
     def __next_char(self):
         self.counter += 1
         return self.input[self.counter]
-        
+
     def next_token(self):
         try:
             char = self.input[self.counter]
@@ -62,7 +61,7 @@ def __entity(self):
         entity.append(';')
         self.counter += 1
         return ''.join(entity)
-        
+
     def __open_tag(self):
         """Return an open/close tag token.
         Precondition: self.counter points at the first character of the tag name
@@ -97,7 +96,7 @@ def __close_tag(self):
         self.counter += 1
         return CloseTag( ''.join(tag) )
 
-def truncate(str, target_len, ellipsis = ''):
+def truncate(str, target_len, full_word=False, ellipsis = ''):
     """Returns a copy of str truncated to target_len characters,
     preserving HTML markup (which does not count towards the length).
     Any tags that would be left open by truncation will be closed at
@@ -108,7 +107,7 @@ def truncate(str, target_len, ellipsis = ''):
     length = 0   # number of characters (not counting markup) placed in retval so far
     tokens = Tokenizer(str)
     tok = tokens.next_token()
-    while tok != END and length < target_len:
+    while tok != END:
         if tok.__class__.__name__ == 'OpenTag':
             stack.append(tok)
             retval.append( tok.as_string() )
@@ -124,13 +123,18 @@ def truncate(str, target_len, ellipsis = ''):
             retval.append(tok)
             length += 1
         tok = tokens.next_token()
+        if length == target_len and not full_word:
+            break
+        elif length >= target_len and full_word and tok == " ":
+            break
+
     while len(stack) > 0:
         tok = CloseTag( stack.pop().tag )
         retval.append( tok.as_string() )
-    if length == target_len:
+    if len(str) > length:
         return ''.join(retval) + ellipsis
     else:
-        return ''.join(retval)        
+        return ''.join(retval)
 
 if __name__ == "__main__":
     try:

diff --git a/setup.py b/setup.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+from setuptools import setup
+
+setup(name='htmltruncate',
+      version='1.0',
+      description='To truncate html content meaningfull',
+      author='Eric Entzel',
+      url='https://github.com/eentzel/htmltruncate.py',
+      py_modules = ['htmltruncate']
+      )
diff --git a/tests.py b/tests.py
@@ -40,7 +40,10 @@ def testSelfClosing(self):
         self.assertEqual( htmltruncate.truncate( "I need<br /> a break", 11 ), "I need<br /> a br" )
 
     def testEllipsis(self):
-        self.assertEqual( htmltruncate.truncate('this <b>word</b> is bolded', 10, '...' ), "this <b>word</b> ...")
+        self.assertEqual( htmltruncate.truncate('this <b>word</b> is bolded', 10, ellipsis='...' ), "this <b>word</b> ...")
+
+    def testFullWord(self):
+        self.assertEqual( htmltruncate.truncate( "I need<br /> a break", 11, full_word=True ), "I need<br /> a break" )
 
 if __name__ == "__main__":
     unittest.main()