Module: Awestruct::ContextHelper

Defined in:
lib/awestruct/context_helper.rb

Instance Method Summary (collapse)

Instance Method Details

- (Object) clean_html(str)



10
11
12
# File 'lib/awestruct/context_helper.rb', line 10

def clean_html(str)
  str.gsub( / /, ' ' )
end

- (Object) close_tags(s)



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/awestruct/context_helper.rb', line 18

def close_tags(s)
  stack = []
  s.scan(/<\/?[^>]+>/).each do |tag|
    if tag[1] != '/'
      tag = tag[1..-1].scan(/\w+/).first
      stack = [ tag ] + stack
    else
      tag = tag[2..-1].scan(/\w+/).first
      if stack[0] == tag
        stack = stack.drop(1)
      else
        raise "Malformed HTML expected #{tag[0]} but got #{tag} '#{s}'"
      end
    end
  end
  stack.inject(s) { |memo,tag| memo += "</#{tag}>" }
end

- (Object) fix_url(base_url, url)



62
63
64
65
# File 'lib/awestruct/context_helper.rb', line 62

def fix_url(base_url, url)
  return url unless ( url =~ /^\// )
  "#{base_url}#{url}"
end

- (Object) fully_qualify_urls(base_url, text)



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/awestruct/context_helper.rb', line 40

def fully_qualify_urls(base_url, text)
  doc = Hpricot( text )

  doc.search( "//a" ).each do |a|
    a['href'] = fix_url( base_url, a['href'] )
  end
  doc.search( "//link" ).each do |link|
    link['href'] = fix_url( base_url, link['href'] )
  end
  doc.search( "//img" ).each do |img|
    img['src'] = fix_url( base_url, img['src'] )
  end
  # Hpricot::Doc#to_s output encoding is not necessarily the same as the encoding of text
  if RUBY_VERSION.start_with? '1.8'
    doc.to_s
  else
    doc.to_s.tap do |d| 
      d.force_encoding(text.encoding) if d.encoding != text.encoding 
    end
  end
end

- (Object) html_to_text(str)



6
7
8
# File 'lib/awestruct/context_helper.rb', line 6

def html_to_text(str)
  str.gsub( /<[^>]+>/, '' ).gsub( /&nbsp;/, ' ' )
end

- (Object) summarize(text, numwords = 20, ellipsis = '...')



36
37
38
# File 'lib/awestruct/context_helper.rb', line 36

def summarize(text, numwords=20, ellipsis='...')
  close_tags(text.split(/ /)[0, numwords].join(' ') + ellipsis)
end

- (Object) without_images(str)



14
15
16
# File 'lib/awestruct/context_helper.rb', line 14

def without_images(str)
  str.gsub(/<img[^>]+>/,'').gsub(/<a[^>]+>([^<]*)<\/a>/, '\1')
end