#!/usr/bin/perl # Program Name: PDF Metadata Editor # Program URL: http://www.arilabs.com/software/pdfmeta/pdfmeta.pl # Filename: pdfmeta.pl # Version: 1.3 # Last Modified: 2005-06-02 # Author: Brian High # Copyright: Analytical Resources, Inc. (2005) # License: GNU GPL version 2 or greater. See LICENSE below. # Requires: pdftk version 1.12 or greater, Perl/Tk # pdftk must be in your environment's PATH # Tested under: Mandrake 10.1.0 (KDE 3.2.3-99, Perl 5.8.5-3, Perl-Tk # 804.027-2), Debian unstable (Kanotix 2005-01, KDE 3.3.2-1, # Perl 5.8.4-6, Perl-Tk 800.025-2), Win2K Pro, and # WinXP Pro (SP2) with ActivePerl 5.8.6.811. # Known Issues: (1) Under Windows XP Pro SP2 and ActivePerl 5.8.6.811, # Windows will not let me drop files onto Perl script icons, # so you can use the drag-and-drop built into the script # to select the PDF file (by dropping the file onto the # listbox widget) or you can simply run the script # from the command line. This issue is a function of # how Windows works and may be addressed by some sort # or "registry hack"[1] or "power toy". Anyway, if you # really want to be able to drop PDFs right onto your # script icon, you can simply create a DOS batch file # or WSH/VBS script which will run pdfmeta.pl. Example: # Type this one-liner into a file : # @perl pdfmeta.pl %1 # And save the file in the same folder as pdftk.exe and # pdfmeta.pl. Then make a shortcut to pdfmeta.bat and # place that shortcut on your desktop. Modify the shortcut # properties so that the console window it brings up will # be "minimized". # Right-Click -> Properties -> Run: Minimized # Rename the shortcut: PDF Metadata Editor # (2) While the drag-and-drop (XDND, remote) works fine # under Mandrake's KDE, it does not under Debian's (Kanotix) # KDE. This may have something to do with window manager # settings. I just get a cursor of a circle with a line # through it. I will have to look into this... # # [1] For a discussion of your registry hacking options, see: # http://www.perlmonks.org/?node=122205 # This program provides a simple and limited graphical interface for # pdftk. It only provides the functionality to modify PDF metadata # (document properties) fields. # Usage: [ perl ] pdfmeta.pl [ ] # # You can either run from the command line, with or without a filename, # or you can drag a file onto the script icon, if your desktop supports it, # or you can execute the script by clicking the icon and you will be # offered a drag and drop interface. # NOTE: You must have write permissions for the directory which # contains the original PDF file, as well as for the original # PDF file itself. # NOTE: Modify the @fieldnames array to use a different set of document # properties (metadata fields). Use only standard PDF field names: # Title, Author, Subject, Keywords, Creator, Producer, CreationDate, # ModDate, and Trapped. # PDFTK can be found here: http://www.accesspdf.com/pdftk/ # See also: http://hacks.oreilly.com/pub/h/2422 # Here is the pdftk man page section on the update_info feature: # # update_info # Changes the metadata stored in a single PDF's # Info dictionary to match the input data file. # The input data file uses the same syntax as the # output from dump_data. This does not change the # metadata stored in the PDF's XMP stream, if it # has one. For example: # # pdftk in.pdf update_info in.info output out.pdf # Also, the author of pdftk (Sid Steward) has this to say about the # XMP stream: # # PDFs store this metadata is two places: the Info dictionary and # the XMP (RDF/XML) stream. Pdftk updates only the Info dictionary, # but newer versions of Acrobat/Reader defer to the XMP stream. # # I am currently working on new features for updating both the Info # dictionary and the XMP stream. # # One workaround might be to remove the PDF's XMP stream. You can do # this using pdftk, but it also removes bookmarks and other PDF # features. Run: # # pdftk mydoc.xmp.pdf cat output mydoc.no_xmp.pdf # # to burn of the XMP stream. Then maybe the viewer will fall back to # the Info dictionary with your updated data. # # ( From: http://www.accesspdf.com/comment.php?mode=view&cid=153 ) # ==================================================================== # LICENSE: GNU GPL v2 or greater: http://www.gnu.org/licenses/gpl.txt # ==================================================================== # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. use strict; use warnings; use File::Basename; use File::Spec::Functions; use File::Copy; use POSIX; use Tk; use Tk::DropSite; use constant SUCCESS => 1; use constant FAILURE => 0; use constant TRUE => 1; use constant FALSE => 0; use constant PROG_NAME => "PDF Metadata Editor"; use constant PROG_VERS => "1.3"; use constant FILE_SUFF => "-meta"; my (%fields, $err_msg, $main); my ($input_fn, $output_fn, $meta_fn); #---------------------------------------------------------------------- # Configuration #---------------------------------------------------------------------- # PDFTK command to use. Include explicit path if necessary. my $pdftk = "pdftk"; # You may modify the @fieldnames array to use a different set of document # properties (metadata fields). Use only standard PDF field names: # Title, Author, Subject, Keywords, Creator, Producer, CreationDate, # ModDate, and Trapped. The order of field names in this array is the # same as the order of the fields as listed on the user interface. my @fieldnames = qw( Title Subject Author Keywords ); #---------------------------------------------------------------------- # Main Routine #---------------------------------------------------------------------- &create_main_window(); &set_fonts(); &create_field_hash(); &get_input_file(); &MainLoop(); #---------------------------------------------------------------------- # Subroutines #---------------------------------------------------------------------- sub create_main_window { $main = MainWindow->new(); $main->title( PROG_NAME . " " . PROG_VERS ); } sub set_fonts { my $font_family = 'Helvetica'; my $large_size = 12; my $small_size = 10; $main->fontCreate( 'title', -size => $large_size, -weight => 'bold', -family => $font_family, ); $main->fontCreate( 'header', -size => $large_size, -family => $font_family, ); $main->fontCreate( 'label', -size => $small_size, -weight => 'bold', -family => $font_family, ); $main->fontCreate( 'button', -size => $small_size, -family => $font_family, ); $main->fontCreate( 'input', -size => $small_size, -family => $font_family, ); } sub create_field_hash { # Create hash to store metadata fields and values %fields = (); foreach my $field ( @fieldnames ) { $fields{$field} = ''; } } sub get_input_file { # If a filename was given as an argument, use it if ( $ARGV[0] ) { $input_fn = $ARGV[0]; &get_metadata() && &complete_gui(); } else { # Otherwise offer drag and drop interface &create_drop_widgets(); } } sub create_drop_widgets { my $drop_label = $main->Label ( -text => "Drag your PDF file into the box below:", -font => 'title', ) ->pack ( -ipadx => 12, -ipady => 4, -padx => 8, -pady => 8, ); # Define a DropSite (source side) for Drag and Drop functionality my $drop = $main->Scrolled ( 'Listbox', -scrollbars => "osoe", -height => 1, ) ->pack ( -pady => 8, ); # Tell Tk that $drop should accept drops. # When dropping occurs, execute the accept_drop callback. $drop->DropSite ( -dropcommand => [\&accept_drop, $drop], -droptypes => ( $^O eq 'MSWin32' ? 'Win32' : 'XDND' ) ); } sub accept_drop { my( $widget, $selection ) = @_; eval { $input_fn = $widget->SelectionGet ( -selection => $selection, 'STRING' ); $input_fn =~ s/^file:(.*)/$1/; }; if ( defined $input_fn ) { $widget->insert( 0, $input_fn ); } # After the file is dropped, hide the widgets, and present new widgets &clean_gui(); &get_metadata() && &complete_gui(); } sub clean_gui { # Remove (hide) widgets on the form, if any my @w = $main->packSlaves; foreach (@w) { $_->packForget; } } sub complete_gui { &clean_gui(); # Finish defining MainWindow attributes and add widgets $main->Label ( -justify => 'left', -text => "Filename: \n$input_fn", -font => 'label', ) ->pack ( -anchor => 'w', -padx => 8, -pady => 8, ); foreach my $field ( @fieldnames ) { $main->Label ( -justify => 'left', -text => "$field: ", -font => 'label', ) ->pack ( -anchor => 'w', -padx => 8, ); $main->Entry ( -textvariable => \$fields{$field}, -font => 'input', ) ->pack ( -fill => 'x', -padx => 8, ); } $main->Button ( -text => "Save Changes and Exit", -font => 'button', -command => sub { &save_and_exit(); }, ) ->pack ( -side => 'left', -ipadx => 12, -ipady => 4, -pady => 8, -expand => TRUE, ); $main->Button ( -text => "Close", -font => 'button', -command => sub { exit }, ) ->pack ( -side => 'right', -ipadx => 12, -ipady => 4, -pady => 8, -expand => TRUE, ); } sub get_metadata { my ( $input_fn_base, $input_fn_path, $input_fn_type ); $err_msg = "Input file must be a (single) PDF. Please try again."; # If more than one file is selected, then show abort error if ( defined( $output_fn ) ) { undef $output_fn; &show_msg() && &create_drop_widgets() && return FAILURE; } # Parse the file path, abort if not a PDF, create new file names ($input_fn_base, $input_fn_path, $input_fn_type) = fileparse($input_fn, qr{\.pdf}i); $input_fn_type =~ /\.pdf/i || &abort_me() && return FAILURE; $output_fn = $input_fn_path . $input_fn_base . FILE_SUFF . $input_fn_type; $meta_fn = $input_fn_path . $input_fn_base . '.mta'; # Check to make sure we can read/write to files and directories $err_msg = "Cannot read from and/or write to input file!"; (-r $input_fn && -w $input_fn) || &abort_me() && return FAILURE; $err_msg = "Cannot write to $input_fn_path directory!\n\n" . "You must place the original file in a writable directory\n" . "before running this program."; -w $input_fn_path || &abort_me() && return FAILURE; # Dump the PDF's metadata to an ASCII text file and check for errors &dump_meta_data() || &abort_me() && return FAILURE; &check_for_dict() || &abort_me() && return FAILURE; # Read metadata text file into %fields hash $err_msg = "Error opening temporary metadata file for reading!"; open ( METADATA, "<", $meta_fn ) || &abort_me() && return FAILURE; while ( ) { foreach my $field ( @fieldnames ) { if ( /^InfoKey: $field$/ ) { $_ = ; chomp; s/^InfoValue: (.*)$/$1/; $fields{$field} = $_; } } } &close_and_delete_metadata_file(); } sub check_for_dict { my $no_dict = FALSE; # Read metadata text file to check for "no dictionary" error $err_msg = "Error opening temporary metadata file for reading!"; open ( METADATA, "<", $meta_fn ) || return FAILURE; while ( ) { if ( /no info dictionary found/ ) { $no_dict = TRUE; last; } } close METADATA; # If there was no info dictionary, then create one and save metadata if ( $no_dict ) { # Delete metadata file unlink $meta_fn; # Use PDFTK to add a new info dictionary using the 'cat' feature &cat_pdf() || return FAILURE; # Move repaired file to orig. file, get metadata, then store in file my $cmd = "$pdftk \"$input_fn\" dump_data > \"$meta_fn\" 2>&1"; $err_msg = "Error running pdftk dump_data command!"; move ( $output_fn, $input_fn ) && &dump_meta_data() || return FAILURE; } return SUCCESS; } sub dump_meta_data { my $cmd = "$pdftk \"$input_fn\" dump_data > \"$meta_fn\" 2>&1"; $err_msg = "Error running dump_data command!\n\n"; system ( $cmd ) == 0 || ( &report_metadata_errors() && return FAILURE ); return SUCCESS } sub report_metadata_errors { open ( METADATA, "<", $meta_fn ) || &abort_me() && return FAILURE; local $/ = undef; $err_msg .= ; &close_and_delete_metadata_file(); } sub close_and_delete_metadata_file { close METADATA; unlink $meta_fn; } sub cat_pdf { # Get current metadata info from input file and store in text file $err_msg = "Error running pdftk cat command!"; system ( "$pdftk \"$input_fn\" cat output \"$output_fn\" " . "dont_ask 2>&1" ) == 0 || return FAILURE; return SUCCESS; } sub abort_me { &clean_gui(); # Show an error message and an exit button $main->Label ( -text => $err_msg, -font => 'header', ) ->pack ( -ipadx => 12, -ipady => 4, -padx => 8, -pady => 8, ); $main->Button ( -text => "Exit", -command => sub { exit }, -font => 'button', ) ->pack ( -ipadx => 12, -ipady => 4, -padx => 8, -pady => 8, ); } sub show_msg { &clean_gui(); # Show an error message and an exit button $main->Label ( -text => $err_msg, -font => 'header', ) ->pack ( -ipadx => 12, -ipady => 4, -padx => 8, -pady => 8, ); } sub save_and_exit { # Save new metadata to metadata file $err_msg = "Error opening temporary metadata file for writing!"; open ( METADATA, ">", $meta_fn ) || &abort_me() && return FAILURE; foreach my $field ( @fieldnames ) { print METADATA "InfoKey: $field\n"; print METADATA "InfoValue: $fields{$field}\n"; } # Write new metadata into new PDF file system ( "$pdftk \"$input_fn\" " . "update_info \"$meta_fn\" " . "output \"$output_fn\" dont_ask" ); # Replace orig. PDF with new one, so that the old one has new metadata $err_msg = "Error updating PDF! Changes are in:\n" . $output_fn; move ( $output_fn, $input_fn ) || &abort_me() && return FAILURE; # Delete metadata file. (The delete does not work without opening first.) $err_msg = "Error opening temporary metadata file for reading!"; open ( METADATA, "<", $meta_fn ) || &abort_me() && return FAILURE; &close_and_delete_metadata_file(); exit; } __END__