module Jekyll
  module AssetFilter
    def setup
      @stopwords = ''
      if @stopwords.empty? then
        file = File.open("stopwords-en.txt")
        @stopwords = file.readlines.map.each { |line| line.chomp().downcase()  }.join('|')
        file.close
        
        file = File.open("stopwords-de.txt")
        @stopwords += '|' + file.readlines.map.each { |line| line.chomp().downcase()  }.join('|')
        file.close
      end
    end

    def words(input)
      setup

      # downcase because lunr.js is not able to act case insensitive!
      input.downcase!()

      # remove everything but words assume strip_html and strip_newlines before
      input.gsub!(/[^\w]/, ' ')

      # remove short words
      input.gsub!(/\b(#{@stopwords})\b/i, ' ')

      # remove numbers
      input.gsub!(/\d+?/, ' ')

      # remove single chars (like I or a)
      input.gsub!(/\b\w\b/, ' ')

      
      # remove unused spaces
      input.gsub!(/\s\s*/, ' ')
    end
  end
end

Liquid::Template.register_filter(Jekyll::AssetFilter)
