CPAN::Forum
PDF-API2 - Re: Garbled Text
| Posted on Thu Mar 2 21:05:07 2006 by gnurob in response to 1892 (See the whole thread of 3) |
| Re: Garbled Text |
|
# The script, in brief...
use PDF::API2;
use LWP;
$ua = LWP::UserAgent->new;
$ua->agent('PDFInspector/0.2 (email@host.com)');
binmode STDOUT, ":utf8";
@url_list = qw(
http://www.gs.gov.nl.ca/ohs/pdf/ann-rep-whsi.pdf
http://www.gs.gov.nl.ca/cca/cr/pdf/coop/coop21-art-dis.pdf
http://www.gs.gov.nl.ca/misc/data/gazette/wk/2006-01-13.pdf
);
# ann-rep-whsi.pdf contains wide characters
# coop21-art-dis.pdf
# 2006-01-13.pdf is encrypted
foreach (@url_list) {
($pdf_doc, $pdf_status, undef, undef) = do_get ($pdf_url, "Accept-Language" => "en");
$pdf = PDF::API2->openScalar($pdf_doc);
%pdf_info = $pdf->info();
print "URL: $pdf_url";
print "Title: $pdf_info{'Title'}";
print "Author: $pdf_info{'Author'}";
print "Subject: $pdf_info{'Subject'}";
print "Creator: $pdf_info{'Creator'}";
print "Producer: $pdf_info{'Producer'}";
}
# Results look like...
URL: http://offline.gs.gov.nl.ca/ohs/pdf/ann-rep-whsi.pdf
Title: *Garbled*
Author: *Garbled*
Subject:
Creator: *Garbled*
Producer: Acrobat PDFWriter 4.0 for Windows
URL: http://offline.gs.gov.nl.ca/cca/cr/pdf/coop/coop01-art-inc.pdf
Title: The Co-Operatives Act (Form 1)
Author: Commercial Registrations
Subject: Articles of Incorporation (Section 8)
Creator:
Producer: Acrobat Distiller 4.05 for Windows
URL: http://offline.gs.gov.nl.ca/misc/data/gazette/wk/2006-01-13.pdf
Title: *Garbled*
Author: *Garbled*
Subject:
Creator: *Garbled*
Producer: *Garbled*
|
| Direct Responses: 2204 | Write a response |
(11)
]