Harbor Documentation

Harbor::Contrib::Stats::PageViewReconciler

Parent

Methods

Attributes

  • logger [RW] (Not documented)

Public Instance Methods

run()

      # File lib/harbor/contrib/stats/reconciliation/page_view_reconciler.rb, line 8
 8:         def run
 9:           
10:           # setup our queue of apache requests
11:           logger.info "Filling the queue with apache requests (this could take a while)"
12:           queue = RequestQueue.new
13:           logger.info "No apache requests found, you may need to run the apache_importer before reconciling page views." && break unless queue.size > 0
14: 
15:           invalid_date = repository.adapter.query('select distinct created_at from page_views order by created_at asc limit 1').first + 1/60000.0
16:           
17:           # Pull in the table in blocks of 1000 and work on those blocks at a time, better than trying to do it all at once
18:           page_view_size = repository.adapter.query('select count(*) from page_views where created_at <= ?', invalid_date).first
19:           block_size = 10000
20:           loops = (page_view_size / block_size) + 1
21: 
22:           query = "select *,ctid from page_views\nwhere created_at <= ?\norder by ctid asc\nlimit ?\n"
23: 
24:           non_matches = 0
25:           matches = 0
26:           loops.times do |i|
27:             logger.info "Batch #{i}:"
28:             logger.info "Matches: #{matches}"
29:             logger.info "NonMatches: #{non_matches}"
30:             logger.info "Match %: #{matches / (matches + non_matches).to_f * 100}%"
31:             page_views = repository.adapter.query(query, invalid_date, block_size)
32: 
33:             # ctid, remote_ip, request_method, uri, referrer, date
34:             page_views.map { |v| PageViewRequest.new(v.ctid, nil, nil, v.uri, v.referrer == "/" ? "-" : v.referrer, v.created_at, v.session_id) }.each_with_index do |page_view, j|
35:               match = queue.expanded_search(page_view)
36:               if match
37:                 logger.info "\tMatch found for page view #{i}-#{j} with CTID #{page_view.id} -- #{match.id} -- QueueSize: #{queue.size}"
38:                 page_view.update!(match.remote_ip, match.request_date)
39:                 match.mark_as_processed!
40:                 matches += 1
41:               else
42:                 logger.info "\tNo match found for page view #{i}-#{j} with CTID #{page_view.id}"
43:                 non_matches += 1
44:               end
45:             end
46:           end
47: 
48:           logger.info "Matches: #{matches}"
49:           logger.info "NonMatches: #{non_matches}"
50:           logger.info "Match %: #{matches / (matches + non_matches).to_f * 100}%"
51:           
52:           exit!(0)
53:           
54:         end