#!/usr/bin/env ruby

# ffbackup 2015.04.02
# scrapes feed pages for entry IDs and downloads entries via the API
# will also download all associated images

# 2015.03.20 - initial attempt
# 2015.04.02 - check for existing entry file before making API request

require 'nokogiri'
require 'open-uri'
require 'fileutils'

ffbaseurl = "http://friendfeed.com"
apiurl = "https://friendfeed-api.com/v2"
mediadir = "media"
FileUtils.mkdir_p(mediadir) unless File.exists?(mediadir)


feedname = ARGV[0]
if feedname then
  subdirdepth = feedname.split('/').count
  localfeedfn = [feedname, '/', "index.xml"].join
	FileUtils.mkdir_p(feedname) unless File.exists?(feedname)
  localfeedfile = File.new(localfeedfn, "w+")
  remotefeedurl = [apiurl, "/feed/", feedname, "?format=xml"].join
  @remotefeed = Nokogiri::XML(open(remotefeedurl))
  localfeedfile.puts "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
  localfeedfile.puts "<feed xmlns:xi=\"http://www.w3.org/2001/XInclude\">"
  localfeedfile.puts "<sup_id>" + @remotefeed.xpath('/feed/sup_id').first.content + "</sup_id>"
  localfeedfile.puts "<type>" + @remotefeed.xpath('/feed/type').first.content + "</type>"
  localfeedfile.puts "<id>" + @remotefeed.xpath('/feed/id').first.content + "</id>"
  localfeedfile.puts "<name>" + @remotefeed.xpath('/feed/name').first.content + "</name>"
  
	currentindexurl = [ffbaseurl, feedname].join('/')
	
  loop do
		@index = Nokogiri::HTML(open(currentindexurl))
    
    if @index.xpath('(//div[@class="pager bottom"]/a)[2]/@href').any? then
      nextindexrelpath = @index.xpath('(//div[@class="pager bottom"]/a)[2]/@href')
    end
    if @index.xpath('(//div[@class="pager bottom"]/a)[1]').any? then
      if @index.xpath('(//div[@class="pager bottom"]/a)[1]').first.content == "Older items »" then
        nextindexrelpath = @index.xpath('(//div[@class="pager bottom"]/a)[1]/@href').first.content
      end
    end
    puts "nextindexrelpath: " + nextindexrelpath.to_s
    
    @index.xpath('//div[@eid]').each { |eidnode|
      entrylocalpath = ['.', eidnode.xpath('div[@class="body"]/div[@class="info"]/a[@class="date"]/@href').first.content].join
      entrydir = File.dirname(entrylocalpath)
      FileUtils.mkdir_p(entrydir) unless File.exist?(entrydir)
      
      if File.exist?(entrylocalpath) then
        puts "skipping " + entrylocalpath
      else
        puts "saving " + entrylocalpath
        eid = eidnode.xpath('@eid').first.content
        entryrequest = [apiurl, "/entry/e/", eid, "?format=xml"].join
        @entry = Nokogiri::XML(open(entryrequest))
        entryurl = @entry.xpath('/entry/url').first.content
        
        @entry.xpath('/entry/thumbnail/url').each { |imgtag|
          mediapath = ["./", "../"*subdirdepth, mediadir]
          if imgtag.content.include? 'http://m.friendfeed-media.com/' or imgtag.content.include? 'http://i.friendfeed.com/' then
            if imgtag.content.include? 'http://m.friendfeed-media.com/' then
              imgbasename = imgtag.content.gsub('http://m.friendfeed-media.com/', '')
            elsif imgtag.content.include? 'http://i.friendfeed.com/' then
              imgbasename = imgtag.content.gsub('http://i.friendfeed.com/', '')
            end
            imgfn = [mediadir, '/', imgbasename].join
            puts "imgurl: " + imgtag.content
            puts "imgfn: " + imgfn
            if File.exists?(imgfn) then
              puts "skipping " + imgfn
            else
              puts "downloading " + imgtag.content
              puts "saving " + imgfn
              File.write(imgfn, open(imgtag.content).read, {mode: 'wb'})
            end
            imgtag.content = [mediapath, '/', imgbasename].join
            puts "imgtag.content: " + imgtag.content
          end
        }
        
        @entry.xpath('/entry/thumbnail/link').each { |imgtag|
          mediapath = ["./", "../"*subdirdepth, mediadir]
          if imgtag.content.include? 'http://m.friendfeed-media.com/' or imgtag.content.include? 'http://i.friendfeed.com/' then
            if imgtag.content.include? 'http://m.friendfeed-media.com/' then
              imgbasename = imgtag.content.gsub('http://m.friendfeed-media.com/', '')
            elsif imgtag.content.include? 'http://i.friendfeed.com/' then
              imgbasename = imgtag.content.gsub('http://i.friendfeed.com/', '')
            end
            imgfn = [mediadir, '/', imgbasename].join
            puts "imgurl: " + imgtag.content
            puts "imgfn: " + imgfn
            if File.exists?(imgfn) then
              puts "skipping " + imgfn
            else
              puts "downloading " + imgtag.content
              puts "saving " + imgfn
              File.write(imgfn, open(imgtag.content).read, {mode: 'wb'})
            end
            imgtag.content = [mediapath, '/', imgbasename].join
            puts "imgtag.content: " + imgtag.content
          end
        }
        entryfile = File.new(entrylocalpath, "w+")
        entryfile.puts @entry.to_xml
      end
      localfeedfile.puts "<xi:include href=\"./" + "../" * subdirdepth + entrylocalpath + "\">"
      
    }
    
    currentindexurl = [ffbaseurl, nextindexrelpath].join
    break if @index.xpath('(//div[@class="pager bottom"]/a)[2]').empty? and @index.xpath('(//div[@class="pager bottom"]/a)[1]').first.content != "Older items »"
  end
  localfeedfile.puts "</feed>"
  
else
  puts "Please specify a feed name: ffbackup [feed name]"
  puts " "
  puts "e.g. ffbackup aswang           # download associated individual entries and images of user aswang"
  puts "     ffbackup aswang/comments  # download all entries that user aswang has commented on as well as individual entries and images"
end
    
    
	
	