ulvis.paste.net

Paste Search Dynamic
Recent pastes
Extract contents of a .webarch
  1. #!/usr/bin/env ruby
  2. #
  3. # Mac OS X webarchive is a binary format of a plist file. You can extract the contents manually:
  4. #  1. convert the plist file into XML by "plutil -convert xml1 file.webarchive"
  5. #  2. parse the resulted XML file by some XML parser
  6. #  3. decode "WebResourceData" by Base64.decode64(data) in each key
  7. #  4. save the decoded content into a file indicated by "WebResourceData"
  8. # Thankfully, the plist library can take care of annoying steps 2 and 3.
  9. #
  10. # Preparation:
  11. #  % gem install plist
  12. #
  13. # Usage:
  14. #  % unwebarchive.rb filename.webarchive
  15. #
  16. # Result:
  17. #  You'll find the extracted contents under the 'filename/' directory.
  18. #
  19.  
  20. require 'rubygems'
  21. require 'fileutils'
  22. require 'plist'
  23.  
  24. webarchive = ARGV.shift
  25. exportdir = File.basename(webarchive, ".webarchive")
  26.  
  27. class UnWebarchive
  28.  
  29.   def initialize(webarchive, exportdir)
  30.     @file = webarchive
  31.     @dir  = exportdir
  32.  
  33.     prepare_exportdir
  34.     parse_webarchive
  35.   end
  36.  
  37.   def prepare_exportdir
  38.     if File.exists?(@dir)
  39.       print "Override existing export directory '#{@dir}' [Yes/No]? "
  40.       exit 1 unless gets.chomp[/^y(es)?$/i]
  41.     end
  42.     FileUtils.mkdir_p(@dir)
  43.     FileUtils.cp(@file, @dir)
  44.   end
  45.  
  46.   def parse_webarchive
  47.     FileUtils.cd(@dir) do
  48.       system("plutil -convert xml1 #{@file}")
  49.       plist = Plist.parse_xml(File.read(@file))
  50.       file = plist["WebMainResource"]["WebResourceURL"]
  51.       data = plist["WebMainResource"]["WebResourceData"].read
  52.       data.gsub!(/file:\/\/\//, './')
  53.       export(file, data)
  54.       plist["WebSubresources"].each do |res|
  55.         file = res["WebResourceURL"]
  56.         data = res["WebResourceData"].read
  57.         export(file, data)
  58.       end
  59.     end
  60.   end
  61.  
  62.   def export(resource_uri, resource_data)
  63.     if resource_uri[/^file:/]
  64.       name = resource_uri.sub('file:///', '')
  65.       puts "Writing '#{@dir}/#{name}' ..."
  66.       File.open(name, "w") do |file|
  67.         file.print resource_data
  68.       end
  69.     end
  70.   end
  71. end
  72.  
  73. UnWebarchive.new(webarchive, exportdir)
  74.  
  75.  
  76.  
Parsed in 0.014 seconds