Module: Awestruct::ContextHelper
- Defined in:
- lib/awestruct/context_helper.rb
Instance Method Summary (collapse)
- - (Object) clean_html(str)
- - (Object) close_tags(s)
- - (Object) fix_url(base_url, url)
- - (Object) fully_qualify_urls(base_url, text)
- - (Object) html_to_text(str)
- - (Object) summarize(text, numwords = 20, ellipsis = '...')
- - (Object) without_images(str)
Instance Method Details
- (Object) clean_html(str)
10 11 12 |
# File 'lib/awestruct/context_helper.rb', line 10 def clean_html(str) str.gsub( / /, ' ' ) end |
- (Object) close_tags(s)
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/awestruct/context_helper.rb', line 18 def (s) stack = [] s.scan(/<\/?[^>]+>/).each do |tag| if tag[1] != '/' tag = tag[1..-1].scan(/\w+/).first stack = [ tag ] + stack else tag = tag[2..-1].scan(/\w+/).first if stack[0] == tag stack = stack.drop(1) else raise "Malformed HTML expected #{tag[0]} but got #{tag} '#{s}'" end end end stack.inject(s) { |memo,tag| memo += "</#{tag}>" } end |
- (Object) fix_url(base_url, url)
62 63 64 65 |
# File 'lib/awestruct/context_helper.rb', line 62 def fix_url(base_url, url) return url unless ( url =~ /^\// ) "#{base_url}#{url}" end |
- (Object) fully_qualify_urls(base_url, text)
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/awestruct/context_helper.rb', line 40 def fully_qualify_urls(base_url, text) doc = Hpricot( text ) doc.search( "//a" ).each do |a| a['href'] = fix_url( base_url, a['href'] ) end doc.search( "//link" ).each do |link| link['href'] = fix_url( base_url, link['href'] ) end doc.search( "//img" ).each do |img| img['src'] = fix_url( base_url, img['src'] ) end # Hpricot::Doc#to_s output encoding is not necessarily the same as the encoding of text if RUBY_VERSION.start_with? '1.8' doc.to_s else doc.to_s.tap do |d| d.force_encoding(text.encoding) if d.encoding != text.encoding end end end |
- (Object) html_to_text(str)
6 7 8 |
# File 'lib/awestruct/context_helper.rb', line 6 def html_to_text(str) str.gsub( /<[^>]+>/, '' ).gsub( / /, ' ' ) end |
- (Object) summarize(text, numwords = 20, ellipsis = '...')
36 37 38 |
# File 'lib/awestruct/context_helper.rb', line 36 def summarize(text, numwords=20, ellipsis='...') (text.split(/ /)[0, numwords].join(' ') + ellipsis) end |
- (Object) without_images(str)
14 15 16 |
# File 'lib/awestruct/context_helper.rb', line 14 def without_images(str) str.gsub(/<img[^>]+>/,'').gsub(/<a[^>]+>([^<]*)<\/a>/, '\1') end |