From e839884fb67cb02ab80619bf3f6ef4554d8f3ef6 Mon Sep 17 00:00:00 2001 From: Pieter van de Bruggen Date: Wed, 15 Jun 2011 12:11:26 -0700 Subject: [PATCH] (#7938) Delayed import from file, not YAML string. When we import a report, pushing the entire content through the database using DelayedJob is fairly inefficient - we were shipping megabytes of YAML around, with some inefficient encoding, to get the data imported. Writing the report to a file, then importing by filename, increases performance substantially, at the cost of tying us to workers on the same node. Right now, that seems a worthwhile trade-off. Reviewed-By: Matt Robinson Based on commit 1d00feb7a66c3f8001eec87d22c1cef823c5b945 by Pieter van de Bruggen --- app/controllers/reports_controller.rb | 18 ++++++++++++-- app/models/report.rb | 10 +++++--- config/initializers/delayed_job.rb | 11 ++++++--- lib/tasks/import_reports.rake | 7 +++--- spec/controllers/reports_controller_spec.rb | 27 +++++++++++++-------- spool/.gitignore | 3 +++ 6 files changed, 52 insertions(+), 24 deletions(-) create mode 100644 spool/.gitignore diff --git a/app/controllers/reports_controller.rb b/app/controllers/reports_controller.rb index f8752d3c8..5952c738a 100644 --- a/app/controllers/reports_controller.rb +++ b/app/controllers/reports_controller.rb @@ -27,8 +27,22 @@ def create def upload begin - Report.create_from_yaml(params[:report][:report]) - render :text => "Report successfully uploaded" + yaml = params[:report][:report] + md5 = Digest::MD5.hexdigest(yaml) + file = Rails.root + 'spool' + "#{md5}.yaml" + n = 0 + + begin + fd = File.new(file, File::CREAT|File::EXCL|File::RDWR, 0600) + fd.print yaml + fd.close + + Report.delay.create_from_yaml_file(file.to_s, :delete => true) + render :text => "Report queued for import as #{md5}" + rescue Errno::EEXIST + file = Rails.root + 'spool' + "#{md5}-#{n += 1}.yaml" + retry + end rescue => e error_text = "ERROR! ReportsController#upload failed:" Rails.logger.debug error_text diff --git a/app/models/report.rb b/app/models/report.rb index 3fc20e34d..5012b04f9 100644 --- a/app/models/report.rb +++ b/app/models/report.rb @@ -79,6 +79,12 @@ def self.attribute_hash_from(report_hash) attribute_hash end + def self.create_from_yaml_file(report_file, options = {}) + report = create_from_yaml(File.read(report_file)) + File.unlink(report_file) if options[:delete] + return report + end + def self.create_from_yaml(report_yaml) raw_report = YAML.load(report_yaml) @@ -96,10 +102,6 @@ def self.create_from_yaml(report_yaml) report end - class << self - handle_asynchronously :create_from_yaml - end - def assign_to_node self.node = Node.find_or_create_by_name(self.host) end diff --git a/config/initializers/delayed_job.rb b/config/initializers/delayed_job.rb index 29183bdb7..d8f54e223 100644 --- a/config/initializers/delayed_job.rb +++ b/config/initializers/delayed_job.rb @@ -1,11 +1,12 @@ -DELAYED_JOB_PID_PATH = "#{Rails.root}/tmp/pids/delayed_job.pid" +DELAYED_JOB_PID_PATH = Pathname.new "#{Rails.root}/tmp/pids/delayed_job_#{Rails.env}" +DELAYED_JOB_PID_PATH.mkpath Delayed::Worker.destroy_failed_jobs = false Delayed::Worker.max_attempts = 3 def start_delayed_job Thread.new do - `#{Rails.root}/script/delayed_job -p dashboard -m start` + `#{Rails.root}/script/delayed_job --pid-dir=#{DELAYED_JOB_PID_PATH} -p dashboard -m start` end end @@ -19,6 +20,8 @@ def process_is_dead? end end -if !File.exist?(DELAYED_JOB_PID_PATH) && process_is_dead? - start_delayed_job +unless Rails.env == 'test' + if !File.exist?(DELAYED_JOB_PID_PATH + 'delayed_job.pid') && process_is_dead? + start_delayed_job + end end diff --git a/lib/tasks/import_reports.rake b/lib/tasks/import_reports.rake index 247fe4947..46b6db733 100644 --- a/lib/tasks/import_reports.rake +++ b/lib/tasks/import_reports.rake @@ -9,14 +9,13 @@ namespace :reports do plural = lambda{|str, count| str + (count != 1 ? 's' : '')} reports = FileList[File.join(report_dir, '**', '*.yaml')] - STDOUT.puts "Importing #{reports.size} #{plural['report', reports.size]} from #{report_dir}" + STDOUT.puts "Importing #{reports.size} #{plural['report', reports.size]} from #{report_dir} in the background" skipped = 0 pbar = ProgressBar.new("Importing:", reports.size, STDOUT) reports.each do |report| - data = File.read(report) success = begin - Report.create_from_yaml(data) + Report.delay.create_from_yaml_file(report) rescue => e puts e false @@ -28,7 +27,7 @@ namespace :reports do successes = reports.size - skipped - STDOUT.puts "#{successes} of #{reports.size} #{plural['report', successes]} imported" + STDOUT.puts "#{successes} of #{reports.size} #{plural['report', successes]} queued" STDOUT.puts "#{skipped} #{plural['report', skipped]} skipped" if skipped > 0 end end diff --git a/spec/controllers/reports_controller_spec.rb b/spec/controllers/reports_controller_spec.rb index 667d307bb..d7945efb1 100644 --- a/spec/controllers/reports_controller_spec.rb +++ b/spec/controllers/reports_controller_spec.rb @@ -15,12 +15,15 @@ def model; Report end describe "correctly formatted POST", :shared => true do it { should_not raise_error } it { should change(Report, :count).by(1) } - it { should change{ Node.find_by_name("sample_node")}.from(nil) } + it { should change { Node.find_by_name("sample_node") }.from(nil) } end describe "with a POST from Puppet 2.6.x" do subject do - lambda { post_with_body('upload', @yaml, :content_type => 'application/x-yaml') } + lambda { + post_with_body('upload', @yaml, :content_type => 'application/x-yaml') + Delayed::Worker.new.work_off + } end it_should_behave_like "correctly formatted POST" @@ -28,7 +31,10 @@ def model; Report end describe "with a POST from Puppet 0.25.x" do subject do - lambda { post('upload', :report => @yaml) } + lambda { + post('upload', :report => @yaml) + Delayed::Worker.new.work_off + } end it_should_behave_like "correctly formatted POST" @@ -36,7 +42,10 @@ def model; Report end describe "with a POST with a report inside the report parameter" do subject do - lambda { post('upload', :report => { :report => @yaml }) } + lambda { + post('upload', :report => { :report => @yaml }) + Delayed::Worker.new.work_off + } end it_should_behave_like "correctly formatted POST" @@ -47,9 +56,8 @@ def model; Report end post('upload', :report => "" ) end - it "should return a 406 response and the error text" do - response.code.should == '406' - response.body.should == "ERROR! ReportsController#upload failed: The supplied report is in invalid format 'FalseClass', expected 'Puppet::Transaction::Report'" + it "should be 200, because we queued the job" do + response.code.should == '200' end end @@ -58,9 +66,8 @@ def model; Report end post('upload', :report => "foo bar baz bad data invalid") end - it "should return a 406 response and the error text" do - response.code.should == '406' - response.body.should == "ERROR! ReportsController#upload failed: The supplied report is in invalid format 'String', expected 'Puppet::Transaction::Report'" + it "should be 200, because we queued the job" do + response.code.should == '200' end end end diff --git a/spool/.gitignore b/spool/.gitignore new file mode 100644 index 000000000..69c95830b --- /dev/null +++ b/spool/.gitignore @@ -0,0 +1,3 @@ +# This directory is used to spool YAML reports for background importing. +# None of the content lives beyond the time to load the report. +*