Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
arttu committed Nov 23, 2011
0 parents commit 519cdcb
Show file tree
Hide file tree
Showing 16 changed files with 1,552 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.rvmrc
Gemfile.lock
7 changes: 7 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
source :rubygems

gem 'nokogiri'

group :test do
gem 'rspec'
end
19 changes: 19 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Copyright (c) 2011 Arttu Tervo

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
43 changes: 43 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# FeedParser

Rss and Atom feed parser built on top of Nokogiri. Supports custom sanitizers.

## Install

Add to Gemfile

gem "feed_parser"

## Usage

```# the most basic use case
fp = FeedParser.new(:url => "http://example.com/feed/")
# with sanitizer
fp = FeedParser.new(:url => "http://example.com/feed/", :sanitizer => MyBestestSanitizer.new)
# sanitizing custom field set
fp = FeedParser.new(:url => "http://example.com/feed/", :sanitizer => MyBestestSanitizer.new, :fields_to_sanitize => [:title, :content])
# parse the feed
feed = fp.parse
# using parsed feed in your code
feed.as_json
# => {:title => "Feed title", :url => "http://example.com/feed/", :items => [{:guid => , :title => , :author => ...}]}
feed.items.each do |feed_item|
pp feed_item
end
```

## Running tests

Install dependencies by running `bundle install`.

Run rspec tests:

$ bundle exec rspec

## Contributing

Fork, hack, push, create a pull request.
26 changes: 26 additions & 0 deletions feed_parser.gemspec
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# feed_parser.gemspec
# -*- encoding: utf-8 -*-

$:.push File.expand_path("../lib", __FILE__)
require 'feed_parser/version'

Gem::Specification.new do |s|
s.name = 'feed_parser'
s.version = FeedParser::VERSION
s.platform = Gem::Platform::RUBY
s.authors = ['Arttu Tervo']
s.email = ['arttu.tervo@gmail.com']
s.homepage = 'http://github.com/arttu/feed_parser'
s.summary = %q{Rss and Atom feed parser}
s.description = %q{Rss and Atom feed parser with sanitizer support built on top of Nokogiri.}

s.add_dependency 'nokogiri'

s.add_development_dependency 'rspec-rails', '~> 2.6'

s.extra_rdoc_files = %w[README.md Changelog.md]
s.require_paths = %w[lib]

s.files = `git ls-files`.split("\n")
s.test_files = `git ls-files -- spec/*`.split("\n")
end
31 changes: 31 additions & 0 deletions lib/feed_parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
require 'open-uri'
require 'nokogiri'

class FeedParser

VERSION = "0.1.0"

def initialize(opts)
@url = opts[:url]
@@sanitizer = (opts[:sanitizer] || SelfSanitizer.new)
@@fields_to_sanitize = (opts[:fields_to_sanitize] || [:content])
self
end

def self.sanitizer
@@sanitizer
end

def self.fields_to_sanitize
@@fields_to_sanitize
end

def parse
@feed ||= Feed.new(@url)
end
end

require 'feed_parser/dsl'
require 'feed_parser/feed'
require 'feed_parser/feed_item'
require 'feed_parser/self_sanitizer'
33 changes: 33 additions & 0 deletions lib/feed_parser/dsl.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
class FeedParser
class Dsl
def self.[](type)
send(type)
end
def self.rss
{
:title => "/rss/channel/title",
:url => "/rss/channel/link",
:item => "/rss/channel/item",
:item_guid => "guid",
:item_link => "link",
:item_title => "title",
:item_categories => "category",
:item_author => "creator",
:item_content => "encoded",
}
end
def self.atom
{
:title => "/feed/title",
:url => "/feed/link[@rel='self']",
:item => "/feed/entry",
:item_guid => "id",
:item_link => "link",
:item_title => "title",
:item_categories => "category",
:item_author => "author/name",
:item_content => "content",
}
end
end
end
37 changes: 37 additions & 0 deletions lib/feed_parser/feed.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
class FeedParser
class Feed
attr_reader :type

def initialize(feed_url)
@feed = Nokogiri::XML(open(feed_url))
@feed.remove_namespaces!
@type = (@feed.search('rss')[0] && :rss || :atom)
self
end

def title
@title = @feed.xpath(Dsl[@type][:title]).text
end

def url
_url = @feed.xpath(Dsl[@type][:url]).text
@url = (!_url.nil? && _url.length > 0 && _url || @feed.xpath(Dsl[@type][:url]).attribute("href").text)
end

def items
klass = (@type == :rss && RssItem || AtomItem)

@items ||= @feed.xpath(Dsl[@type][:item]).map do |item|
klass.new(item)
end
end

def as_json
{
:title => title,
:url => url,
:items => items.map(&:as_json)
}
end
end
end
54 changes: 54 additions & 0 deletions lib/feed_parser/feed_item.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
class FeedParser
class FeedItem
attr_reader :type

def initialize(item)
@guid = item.xpath(Dsl[@type][:item_guid]).text
@title = item.xpath(Dsl[@type][:item_title]).text
@author = item.xpath(Dsl[@type][:item_author]).text
@content = item.xpath(Dsl[@type][:item_content]).text
self
end

def method_missing(method_id)
if self.instance_variables.map(&:to_sym).include?("@#{method_id}".to_sym)
if FeedParser.fields_to_sanitize.include?(method_id)
FeedParser.sanitizer.sanitize(self.instance_variable_get("@#{method_id}".to_sym))
else
self.instance_variable_get("@#{method_id}".to_sym)
end
else
super
end
end

def as_json
{
:guid => guid,
:link => link,
:title => title,
:categories => categories,
:author => author,
:content => content
}
end
end

class RssItem < FeedItem
def initialize(item)
@type = :rss
super
@link = item.xpath(Dsl[@type][:item_link]).text
@categories = item.xpath(Dsl[@type][:item_categories]).map{|cat| cat.text}
end
end

class AtomItem < FeedItem
def initialize(item)
@type = :atom
super
@link = item.xpath(Dsl[@type][:item_link]).attribute("href").text
@categories = item.xpath(Dsl[@type][:item_categories]).map{|cat| cat.attribute("term").text}
end
end
end
7 changes: 7 additions & 0 deletions lib/feed_parser/self_sanitizer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
class FeedParser
class SelfSanitizer
def sanitize(str)
str
end
end
end
Loading

0 comments on commit 519cdcb

Please sign in to comment.