# -*- perl -*- Lintian::Index::FileInfo
#
# Copyright © 2020 Felix Lechner
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see .
package Lintian::Index::FileInfo;
use v5.20;
use warnings;
use utf8;
use Const::Fast;
use Cwd;
use Unicode::UTF8 qw(encode_utf8 decode_utf8);
use Lintian::IPC::Run3 qw(xargs);
use Moo::Role;
use namespace::clean;
const my $EMPTY => q{};
const my $SPACE => q{ };
const my $COMMA => q{,};
const my $NEWLINE => qq{\n};
const my $KEEP_EMPTY_FIELDS => -1;
const my $GZIP_MAGIC_SIZE => 9;
const my $GZIP_MAGIC_BYTES => 0x1f8b;
=head1 NAME
Lintian::Index::FileInfo - determine file type via magic.
=head1 SYNOPSIS
use Lintian::Index;
=head1 DESCRIPTION
Lintian::Index::FileInfo determine file type via magic.
=head1 INSTANCE METHODS
=over 4
=item add_fileinfo
=cut
sub add_fileinfo {
my ($self) = @_;
my $savedir = getcwd;
chdir($self->basedir)
or die encode_utf8(
$self->identifier . ': Cannot change to directory ' . $self->basedir);
my $errors = $EMPTY;
my @files = grep { $_->is_file } @{$self->sorted_list};
my @names = map { $_->name } @files;
my @command = qw(file --no-pad --print0 --print0 --);
my %fileinfo;
xargs(
\@command,
\@names,
sub {
my ($stdout, $stderr, $status, @partial) = @_;
# ignore failures if possible; file returns non-zero and
# "ERROR" on parse errors but output is still usable
# undecoded split allows names with non UTF-8 bytes
$stdout =~ s/\0$//;
my @lines = split(/\0/, $stdout, $KEEP_EMPTY_FIELDS);
unless (@lines % 2 == 0) {
$errors
.= 'Did not get an even number lines from file command.'
. $NEWLINE;
return;
}
while(defined(my $path = shift @lines)) {
my $type = shift @lines;
unless (length $path && length $type) {
$errors
.= "syntax error in file-info output: '$path' '$type'"
. $NEWLINE;
next;
}
# drop relative prefix, if present
$path =~ s{^\./}{};
$fileinfo{$path} = $type;
}
return;
});
$_->file_info($fileinfo{$_->name}) for @files;
# some files need to be corrected
my @probably_compressed
= grep { $_->name =~ /\.gz$/i && $_->file_info !~ /compressed/ } @files;
for my $file (@probably_compressed) {
my $buffer = $file->magic($GZIP_MAGIC_SIZE);
next
unless length $buffer;
# translation of the unpack
# nn nn , NN NN NN NN, nn nn, cc - bytes read
# $magic, __ __ __ __, __ __, $comp - variables
my ($magic, undef, undef, $compression) = unpack('nNnc', $buffer);
# gzip file magic
next
unless $magic == $GZIP_MAGIC_BYTES;
my $text = 'gzip compressed data';
# 2 for max compression; RFC1952 suggests this is a
# flag and not a value, hence bit operation
$text .= $COMMA . $SPACE . 'max compression'
if $compression & 2;
my $new_type = $file->file_info . $COMMA . $SPACE . $text;
$file->file_info($new_type);
}
# some TFMs are categorized as gzip, see Bug#963589
my @not_gzip
= grep { $_->name =~ /\.tfm$/i && $_->file_info =~ /gzip compressed data/ }
@files;
$_->file_info('data') for @not_gzip;
chdir($savedir)
or die encode_utf8(
$self->identifier . ": Cannot change to directory $savedir");
return $errors;
}
=back
=head1 AUTHOR
Originally written by Felix Lechner for
Lintian.
=head1 SEE ALSO
lintian(1)
=cut
1;
# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et