Fingerprint::Scanner
class Fingerprint::Scanner
The scanner class can scan a set of directories and produce an index.
Definitions
def initialize(roots, pwd: Dir.pwd, **options)
Initialize the scanner to scan a given set of directories in order. [+options[:excludes]+] An array of regular expressions of files to avoid indexing. [+options[:output]+] An +IO+ where the results will be written.
Implementation
def initialize(roots, pwd: Dir.pwd, **options)
@roots = roots.collect{|root| File.expand_path(root, pwd)}
@excludes = options[:excludes] || []
@options = options
@digests = {}
@progress = nil
unless @options[:checksums] and @options[:checksums].size > 0
@options[:checksums] = DEFAULT_CHECKSUMS
end
@options[:checksums].each do |name|
@digests[name] = CHECKSUMS[name].call
end
@callback = nil
end
def header_for(root)
Adds a header for a given path which is mainly version information.
Implementation
def header_for(root)
Record.new(:configuration, File.expand_path(root), {
'options.extended' => @options[:extended] == true,
'options.checksums' => @options[:checksums].join(', '),
'summary.time.start' => Time.now,
'fingerprint.version' => Fingerprint::VERSION
})
end
def digests_for(path)
This code won't handle multiple threads..
Implementation
def digests_for(path)
total = 0
@digests.each do |key, digest|
digest.reset
end
File.open(path, "rb") do |file|
buffer = ""
while file.read(1024 * 1024 * 10, buffer)
total += buffer.bytesize
@progress.call(total) if @progress
@digests.each do |key, digest|
digest << buffer
end
end
end
metadata = {}
@digests.each do |key, digest|
metadata["key." + key] = digest.hexdigest
end
return metadata
end
def directory_record_for(path)
Output a directory header.
Implementation
def directory_record_for(path)
Record.new(:directory, path.relative_path, metadata_for(:directory, path))
end
def file_record_for(path)
Output a file and associated metadata.
Implementation
def file_record_for(path)
metadata = metadata_for(:file, path)
# Should this be here or in metadata_for?
# metadata.merge!(digests_for(path))
Record.new(:file, path.relative_path, metadata)
end
def excluded_record_for(path)
Add information about excluded paths.
Implementation
def excluded_record_for(path)
Record.new(:excluded, path.relative_path)
end
def excluded?(path)
Returns true if the given path should be excluded.
Implementation
def excluded?(path)
@excludes.each do |exclusion|
if path.match(exclusion)
return true
end
end
return false
end
def scan(recordset)
Run the scanning process.
Implementation
def scan(recordset)
excluded_count = 0
processed_count = 0
processed_size = 0
directory_count = 0
total_count = 0
total_size = 0
# Estimate the number of files and amount of data to process..
if @options[:progress]
@roots.each do |root|
Find.find(root) do |path|
# Some special files fail here, and this was the simplest fix.
Find.prune unless File.exist?(path)
if @options[:progress]
$stderr.puts "# Scanning: #{path}"
end
if excluded?(path)
Find.prune if path.directory?
elsif path.symlink?
total_count += 1
elsif path.file?
total_count += 1
total_size += File.size(path)
end
end
end
end
if @options[:progress]
@progress = lambda do |read_size|
$stderr.puts "# Progress: File #{processed_count} / #{total_count}; Byte #{processed_size + read_size} / #{total_size} = #{sprintf('%0.3f%%', (processed_size + read_size).to_f / total_size.to_f * 100.0)} (#{read_size}, #{processed_size}, #{total_size})"
end
end
@roots.each do |root|
recordset << header_for(root)
Find.find(root) do |path|
# Some special files fail here, and this was the simplest fix.
Find.prune unless File.exist?(path)
if @options[:progress]
$stderr.puts "# Path: #{path.relative_path}"
end
if excluded?(path)
excluded_count += 1
if @options[:verbose]
recordset << excluded_record_for(path)
end
Find.prune if path.directory?
elsif path.directory?
directory_count += 1
recordset << directory_record_for(path)
elsif path.symlink?
recordset << link_record_for(path)
processed_count += 1
elsif path.file?
recordset << file_record_for(path)
processed_count += 1
processed_size += File.size(path)
else
excluded_count += 1
if @options[:verbose]
recordset << excluded_record_for(path)
end
end
# Print out a progress summary if requested
@progress.call(0) if @progress
end
end
summary_message = "#{processed_count} files processed."
# Output summary
recordset << Record.new(:summary, summary_message, {
'summary.directories' => directory_count,
'summary.files' => processed_count,
'summary.size' => processed_size,
'summary.excluded' => excluded_count,
'summary.time.end' => Time.now
})
return recordset
end
def self.scan_paths(paths, **options)
A helper function to scan a set of directories.
Implementation
def self.scan_paths(paths, **options)
if options[:output]
if options.key? :recordset
recordset = options[:recordset]
else
recordset = RecordSet.new
end
options[:recordset] = RecordSetPrinter.new(recordset, options[:output])
end
scanner = Scanner.new(paths, **options)
scanner.scan(options[:recordset])
return options[:recordset]
end