ProjectSource

Fingerprint::Scanner

class Fingerprint::Scanner

The scanner class can scan a set of directories and produce an index.

Definitions

def initialize(roots, pwd: Dir.pwd, **options)

Initialize the scanner to scan a given set of directories in order. [+options[:excludes]+] An array of regular expressions of files to avoid indexing. [+options[:output]+] An +IO+ where the results will be written.

Implementation

def initialize(roots, pwd: Dir.pwd, **options)
	@roots = roots.collect{|root| File.expand_path(root, pwd)}

	@excludes = options[:excludes] || []
	@options = options

	@digests = {}

	@progress = nil

	unless @options[:checksums] and @options[:checksums].size > 0
		@options[:checksums] = DEFAULT_CHECKSUMS
	end

	@options[:checksums].each do |name|
		@digests[name] = CHECKSUMS[name].call
	end

	@callback = nil
end

def header_for(root)

Adds a header for a given path which is mainly version information.

Implementation

def header_for(root)
	Record.new(:configuration, File.expand_path(root), {
		'options.extended' => @options[:extended] == true,
		'options.checksums' => @options[:checksums].join(', '),
		'summary.time.start' => Time.now,
		'fingerprint.version' => Fingerprint::VERSION
	})
end

def digests_for(path)

This code won't handle multiple threads..

Implementation

def digests_for(path)
	total = 0

	@digests.each do |key, digest|
		digest.reset
	end

	File.open(path, "rb") do |file|
		buffer = ""
		while file.read(1024 * 1024 * 10, buffer)
			total += buffer.bytesize
			
			@progress.call(total) if @progress
			
			@digests.each do |key, digest|
				digest << buffer
			end
		end
	end

	metadata = {}
	
	@digests.each do |key, digest|
		metadata["key." + key] = digest.hexdigest
	end
	
	return metadata
end

def directory_record_for(path)

Output a directory header.

Implementation

def directory_record_for(path)
	Record.new(:directory, path.relative_path, metadata_for(:directory, path))
end

def file_record_for(path)

Output a file and associated metadata.

Implementation

def file_record_for(path)
	metadata = metadata_for(:file, path)
	
	# Should this be here or in metadata_for?
	# metadata.merge!(digests_for(path))
	
	Record.new(:file, path.relative_path, metadata)
end

def excluded_record_for(path)

Add information about excluded paths.

Implementation

def excluded_record_for(path)
	Record.new(:excluded, path.relative_path)
end

def excluded?(path)

Returns true if the given path should be excluded.

Implementation

def excluded?(path)
	@excludes.each do |exclusion|
		if path.match(exclusion)
			return true
		end
	end

	return false
end

def scan(recordset)

Run the scanning process.

Implementation

def scan(recordset)
	excluded_count = 0
	processed_count = 0
	processed_size = 0
	directory_count = 0

	total_count = 0
	total_size = 0

	# Estimate the number of files and amount of data to process..
	if @options[:progress]
		@roots.each do |root|
			Find.find(root) do |path|
				# Some special files fail here, and this was the simplest fix.
				Find.prune unless File.exist?(path)
				
				if @options[:progress]
					$stderr.puts "# Scanning: #{path}"
				end
				
				if excluded?(path)
					Find.prune if path.directory?
				elsif path.symlink?
					total_count += 1
				elsif path.file?
					total_count += 1
					total_size += File.size(path)
				end
			end
		end
	end
	
	if @options[:progress]
		@progress = lambda do |read_size|
			$stderr.puts "# Progress: File #{processed_count} / #{total_count}; Byte #{processed_size + read_size} / #{total_size} = #{sprintf('%0.3f%%', (processed_size + read_size).to_f / total_size.to_f * 100.0)} (#{read_size}, #{processed_size}, #{total_size})"
		end
	end
	
	@roots.each do |root|
		recordset << header_for(root)
		
		Find.find(root) do |path|
			# Some special files fail here, and this was the simplest fix.
			Find.prune unless File.exist?(path)
			
			if @options[:progress]
				$stderr.puts "# Path: #{path.relative_path}"
			end
			
			if excluded?(path)
				excluded_count += 1
				
				if @options[:verbose]
					recordset << excluded_record_for(path)
				end
				
				Find.prune if path.directory?
			elsif path.directory?
				directory_count += 1
				
				recordset << directory_record_for(path)
			elsif path.symlink?
				recordset << link_record_for(path)
				
				processed_count += 1
			elsif path.file?
				recordset << file_record_for(path)

				processed_count += 1
				processed_size += File.size(path)
			else
				excluded_count += 1
				
				if @options[:verbose]
					recordset << excluded_record_for(path)
				end
			end
			
			# Print out a progress summary if requested
			@progress.call(0) if @progress
		end
	end
	
	summary_message = "#{processed_count} files processed."

	# Output summary
	recordset << Record.new(:summary, summary_message, {
		'summary.directories' => directory_count,
		'summary.files' => processed_count,
		'summary.size' => processed_size,
		'summary.excluded' => excluded_count,
		'summary.time.end' => Time.now
	})
	
	return recordset
end

def self.scan_paths(paths, **options)

A helper function to scan a set of directories.

Implementation

def self.scan_paths(paths, **options)
	if options[:output]
		if options.key? :recordset
			recordset = options[:recordset]
		else
			recordset = RecordSet.new
		end
		
		options[:recordset] = RecordSetPrinter.new(recordset, options[:output])
	end

	scanner = Scanner.new(paths, **options)

	scanner.scan(options[:recordset])

	return options[:recordset]
end