|
| 1 | +#--- |
| 2 | +# Excerpted from "Seven Databases in Seven Weeks", |
| 3 | +# published by The Pragmatic Bookshelf. |
| 4 | +# Copyrights apply to this code. It may not be used to create training material, |
| 5 | +# courses, books, articles, and the like. Contact us if you are in doubt. |
| 6 | +# We make no guarantees that this code is fit for any purpose. |
| 7 | +# Visit http://www.pragmaticprogrammer.com/titles/pwrdata for more book information. |
| 8 | +#--- |
| 9 | +require 'libxml' |
| 10 | +require 'couchrest' |
| 11 | + |
| 12 | +include LibXML |
| 13 | + |
| 14 | +class JamendoCallbacks |
| 15 | + include XML::SaxParser::Callbacks |
| 16 | + |
| 17 | + def initialize |
| 18 | + @db = CouchRest.database!(ENV["DB_HOST"] + "/music") |
| 19 | + @count = 0 |
| 20 | + @max = 10000 # maximum number to insert |
| 21 | + @stack = [] |
| 22 | + @artist = nil |
| 23 | + @album = nil |
| 24 | + @track = nil |
| 25 | + @tag = nil |
| 26 | + @buffer = nil |
| 27 | + end |
| 28 | + |
| 29 | + def on_start_element(element, attributes) |
| 30 | + case element |
| 31 | + when 'artist' |
| 32 | + @artist = { :albums => [] } |
| 33 | + @stack.push @artist |
| 34 | + when 'album' |
| 35 | + @album = { :tracks => [] } |
| 36 | + @artist[:albums].push @album |
| 37 | + @stack.push @album |
| 38 | + when 'track' |
| 39 | + @track = { :tags => [] } |
| 40 | + @album[:tracks].push @track |
| 41 | + @stack.push @track |
| 42 | + when 'tag' |
| 43 | + @tag = {} |
| 44 | + @track[:tags].push @tag |
| 45 | + @stack.push @tag |
| 46 | + when 'Artists', 'Albums', 'Tracks', 'Tags' |
| 47 | + # ignore |
| 48 | + else |
| 49 | + @buffer = [] |
| 50 | + end |
| 51 | + end |
| 52 | + |
| 53 | + def on_characters(chars) |
| 54 | + @buffer << chars unless @buffer.nil? |
| 55 | + end |
| 56 | + |
| 57 | + def on_end_element(element) |
| 58 | + case element |
| 59 | + when 'artist' |
| 60 | + @stack.pop |
| 61 | + @artist['_id'] = @artist['id'] # reuse Jamendo's artist id for doc _id |
| 62 | + @artist[:random] = rand |
| 63 | + @db.save_doc(@artist, false, true) |
| 64 | + @count += 1 |
| 65 | + if !@max.nil? && @count >= @max |
| 66 | + on_end_document |
| 67 | + end |
| 68 | + if @count % 500 == 0 |
| 69 | + puts " #{@count} records inserted" |
| 70 | + end |
| 71 | + when 'album', 'track', 'tag' |
| 72 | + top = @stack.pop |
| 73 | + top[:random] = rand |
| 74 | + when 'Artists', 'Albums', 'Tracks', 'Tags' |
| 75 | + # ignore |
| 76 | + else |
| 77 | + if @stack[-1] && @buffer |
| 78 | + @stack[-1][element] = @buffer.join.force_encoding('utf-8') |
| 79 | + @buffer = nil |
| 80 | + end |
| 81 | + end |
| 82 | + end |
| 83 | + |
| 84 | + def on_end_document |
| 85 | + puts "TOTAL: #{@count} records inserted" |
| 86 | + exit(0) |
| 87 | + end |
| 88 | +end |
| 89 | + |
| 90 | +parser = XML::SaxParser.io(ARGF) |
| 91 | +parser.callbacks = JamendoCallbacks.new |
| 92 | +parser.parse |
0 commit comments