#!/usr/bin/env ruby

# fix-indices 2015.04.09
#
# fix index URLS in already downloaded index files downloaded by scrape-ff 2015.03.17 to 2015.04.08
# (scrape-ff 2015.04.09 should address this issue directly)
# it should not cause any harm if you run it on index files downloaded by other versions of scrape-ff 
# (but backup your files just in case!)

require 'nokogiri'

feedname = ARGV[0]
if feedname then
	glob = feedname + "/index*.html"
	Dir[glob].each { |indexfn|
		puts indexfn
		index = Nokogiri::HTML(open(indexfn))
		if index.xpath('(//div[@class="pager top"]/a)[1]/@href').any? then 
			if index.xpath('(//div[@class="pager top"]/a)[1]/@href').first.content.match('index.*\.html') then 
				indexbasename = index.xpath('(//div[@class="pager top"]/a)[1]/@href').first.content.match('index.*\.html')[0]
				index.xpath('(//div[@class="pager top"]/a)[1]/@href').first.content = indexbasename
			end
		end
		if index.xpath('(//div[@class="pager top"]/a)[2]/@href').any? then 
			if index.xpath('(//div[@class="pager top"]/a)[2]/@href').first.content.match('index.*\.html') then 
				indexbasename = index.xpath('(//div[@class="pager top"]/a)[2]/@href').first.content.match('index.*\.html')[0]
				index.xpath('(//div[@class="pager top"]/a)[2]/@href').first.content = indexbasename
			end
		end
		if index.xpath('(//div[@class="pager bottom"]/a)[1]/@href').any? then 
			if index.xpath('(//div[@class="pager bottom"]/a)[1]/@href').first.content.match('index.*\.html') then 
				indexbasename = index.xpath('(//div[@class="pager bottom"]/a)[1]/@href').first.content.match('index.*\.html')[0]
				index.xpath('(//div[@class="pager bottom"]/a)[1]/@href').first.content = indexbasename
			end
		end
		if index.xpath('(//div[@class="pager bottom"]/a)[2]/@href').any? then 
			if index.xpath('(//div[@class="pager bottom"]/a)[2]/@href').first.content.match('index.*\.html') then 
				indexbasename = index.xpath('(//div[@class="pager bottom"]/a)[2]/@href').first.content.match('index.*\.html')[0]
				index.xpath('(//div[@class="pager bottom"]/a)[2]/@href').first.content = indexbasename
			end
		end
		File.open(indexfn, 'w') { |indexfile| indexfile.print(index.to_xml)}
	}
else
	puts "Please specify a directory containing index files: fix-indices [directory name]"
end