// MultiPagePDF.jsx // An Illustrator CSX JavaScript // Created on: March 3rd 2010 // Last updated on: March 15th 2010 // Copyright (C) 2010 Bruno Nadeau. Obx Labs. (www.obxlabs.net) // This program is a modified version of the PDFPlacer script for loading // multipage PDFs in InDesign made by Scott Zanelli. // Lonelytree Software. (www.lonelytreesw.com). 2008. // http://indesignsecrets.com/placing-all-the-pages-of-a-pdf-inside-indesign.php // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 2 // of the License, or (at your option) any later version. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. // ToDo // 1- Detect if PDF file uses compression and show warning if so, or decompress. // Tested with Windows XP: // PDF 1.3 PDF 1.4 PDF 1.5 PDF 1.6 PDF 1.7 // CS // CS2 // CS3 // CS4 yes yes yes yes yes // Tested with Windows 7: // PDF 1.3 PDF 1.4 PDF 1.5 PDF 1.6 PDF 1.7 // CS // CS2 // CS3 yes yes yes yes yes // CS4 yes yes yes yes yes // Tested with OSX: // PDF 1.3 PDF 1.4 PDF 1.5 PDF 1.6 PDF 1.7 // CS // CS2 // CS3 // CS4 // Variables var pdfFile = null; // reference of the pdf file to import var pdfFileName = ""; // name of the pdf file to import var copiedFile = null; // reference of a copy of the pdf file var copiedFileName = ""; // name of a copied pdf file var copiedFolder = null; // reference to the folder where tmp pdf files are copied var startPage = 1; // start page var pdfOptions = null; // pdf open options var docRef = null; // opened pdf page document reference var docLayer = null; // opened pdf page layer var theDoc = null; // main document var oldInteractionPref = null; // saved interaction preferences // Open a Multipage PDF openMultipagePDF(); /************************/ function openMultipagePDF() { // Open a file using these preferences pdfFile = File.openDialog("Open Multi-page PDF", "*.pdf", false); // Check name ends with PDF if(pdfFile == null || (pdfFile.name.indexOf(".pdf") == -1 && pdfFile.name.indexOf(".PDF") == -1)){ throwError("A PDF file must be chosen. Quitting...", false, 1, null); return; } //Save the original file name pdfFileName = pdfFile.name; pdfFileName = pdfFileName.substr(0, pdfFileName.length-4); //Create a temp folder to place the copied pdfs in copiedFolder = new Folder(pdfFile.path + "/tmp"); if (!copiedFolder.exists) copiedFolder.create(); // Parse the PDF file and extract needed info var pdfInfo = getPDFInfo(pdfFile, true); var pdfPgCount = pdfInfo.pgCount; // Set the PDF file to open with specified options pdfOptions = app.preferences.PDFFileOptions; pdfOptions.pDFCropToBox = PDFBoxType.PDFARTBOX; // Save the old interaction level state //XXX might need to check of app version here oldInteractionPref = app.userInteractionLevel; // Turn off user interaction app.userInteractionLevel = UserInteractionLevel.DONTDISPLAYALERTS; // Save the reference, this is now our main document theDoc = app.documents.add(DocumentColorSpace.RGB, pdfInfo.pgSize.width, pdfInfo.pgSize.height); // Open all pages for(var page = startPage; page <= pdfPgCount; page++) { // The name of the new file copiedFullName = copiedFolder.fullName + "/" + pdfFileName + page + ".pdf"; // Make a copy of the orignal pdf pdfFile.copy(copiedFullName); // Open the new file copiedFile = new File(copiedFullName); // Set the PDFOptions page pdfOptions.pageToOpen = page; // Open the pdf page docRef = app.open(copiedFile, DocumentColorSpace.RGB); // If we have any layers; we should have one if (docRef.layers.length > 0) { // Create a new layer for the page in the main document // or use the first auto-created one for page 1 if (page == 1) docLayer = theDoc.layers[0]; else { docLayer = theDoc.layers.add(); docLayer.zOrder(ZOrderMethod.SENDTOBACK); } // Duplicate the page layer to the new layer in the main document duplicateLayer(docRef.layers[0], docLayer); } //Close the pdf page document. docRef.close(); // Clean up copiedFile.remove(); } // Remove tmp folder, if it's empty if (copiedFolder.getFiles("*").length == 0) copiedFolder.remove(); // Resture default preferences restoreDefaults(); } /************************/ //restore default preferences function restoreDefaults() { app.userInteractionLevel = oldInteractionPref; } function duplicateLayer(fromLayer, toLayer) { var newItem = null; for(var i = 0; i < fromLayer.pageItems.length; i++) { fromLayer.pageItems[i].duplicate(toLayer); } } // Extract info from the PDF file. // getSize is a boolean that will also determine page size and rotation of first page // *** File position changes in this function. *** // Results are as follows: // page count = retArray.pgCount // page width = retArray.pgSize.pgWidth // page height = retArray.pgSize.pgHeight function getPDFInfo(pdfFile, getSize){ var flag = 0; // used to keep track if the %EOF line was encountered var nlCount = 0; // number of newline characters per line (1 or 2) // The array to hold return values var retArray = new Array(); retArray["version"] = -1; retArray["pgCount"] = -1; retArray["pgSize"] = null; // Open the PDF file for reading pdfFile.open("r"); // Get the pdf version var sVersion = pdfFile.readln(); // Make sure this is a PDF if (sVersion.substr(0, 4) != "%PDF") { throwError("A PDF file must be chosen. Quitting...", false, 1, pdfFile); return; } // Save the version number retArray["version"] = parseFloat(sVersion.substr(5, sVersion.length)); // Search for %EOF line // This skips any garbage at the end of the file // if FOE% is encountered (%EOF read backwards), flag will be 15 for(i=0; flag != 15; i++){ pdfFile.seek(i,2); switch(pdfFile.readch()){ case "F": flag|=1; break; case "O": flag|=2; break; case "E": flag|=4; break; case "%": flag|=8; break; default: flag=0; break; } } // Jump back a small distance to allow going forward more easily pdfFile.seek(pdfFile.tell()-100); // Read until startxref section is reached while(pdfFile.readln() != "startxref"); // Set the position of the first xref section var xrefPos = parseInt(pdfFile.readln(), 10); // The array for all the xref sections var xrefArray = new Array(); // Go to the xref section pdfFile.seek(xrefPos); // Get all the xref sections while(xrefPos != -1){ // Go to next section pdfFile.seek(xrefPos); // Make sure it's an xref line we went to, otherwise PDF is no good var xrefLine = pdfFile.readln(); if (xrefLine != "xref"){ throwError("Cannot determine page count.", true, 99, pdfFile); return; } // Add the current xref section into the main array xrefArray[xrefArray.length] = makeXrefEntry(pdfFile); // See if there are any more xref sections xrefPos = xrefArray[xrefArray.length-1].prevXref; } // Go get the location of the /Catalog section (the /Root obj) var objRef = -1; for(i=0; i < xrefArray.length; i++){ objRef = xrefArray[i].rootObj; if(objRef != -1){ i = xrefArray.length; } } // Double check root obj was found if(objRef == -1){ throwError("Unable to find Root object.", true, 98, pdfFile); return; } // Get the offset of the root section and set file position to it var theOffset = getByteOffset(objRef, xrefArray); pdfFile.seek(theOffset); // Determine the obj where the first page is located objRef = getRootPageNode(pdfFile); // Get the offset where the root page node is located and set the file position to it theOffset = getByteOffset(objRef, xrefArray); pdfFile.seek(theOffset); // Get the page count info from the root page tree node section retArray.pgCount = readPageCount(pdfFile); // Does user need size also? If so, get size info if(getSize){ // Go back to root page tree node pdfFile.seek(theOffset); // Flag to tell if page tree root was visited already var rootFlag = false; // Loop until an actual page obj is found (page tree leaf) do{ var getOut = true; if(rootFlag){ // Try to find the line with the /Kids entry // Also look for instance when MediBox is in the root obj do{ var tempLine = pdfFile.readln(); }while(tempLine.indexOf("/Kids") == -1 && tempLine.indexOf(">>") == -1); } else{ // Try to first find the line with the /MediaBox entry rootFlag = true; // Indicate root page tree was visited getOut = false; // Force loop if /MediaBox isn't found here do{ var tempLine = pdfFile.readln(); // $.write("norootflag tempLine: " + tempLine); if(tempLine.indexOf("/MediaBox") != -1){ getOut = true; break; } }while(tempLine.indexOf(">>") == -1); if(!getOut){ // Reset the file pointer to the beginning of the root obj again pdfFile.seek(theOffset) } } // If /Kids entry was found, still at an internal page tree node if(tempLine.indexOf("/Kids") != -1){ // Check if the array is on the same line if(tempLine.indexOf("R") != -1){ // Grab the obj ref for the first page objRef = parseInt(tempLine.split("/Kids")[1].split("[")[1]); } else{ // Go down one line tempLine = pdfFile.readln(); // Check if the opening bracket is on this line if(tempLine.indexOf("[") != -1){ // Grab the obj ref for the first page objRef = parseInt(tempLine.split("[")[1]); } else{ // Grab the obj ref for the first page objRef = parseInt(tempLine); } } // Get the file offset for the page obj and set file pos to it theOffset = getByteOffset(objRef, xrefArray); pdfFile.seek(theOffset); getOut = false; } }while(!getOut); // Make sure file position is correct if finally at a leaf pdfFile.seek(theOffset); // Go get the page sizes retArray.pgSize = getPageSize(pdfFile); } // Close the PDF file, finally all done! pdfFile.close(); return retArray; } // Function to create an array of xref info // File position must be set to second line of xref section // *** File position changes in this function. *** //function makeXrefEntry(pdfFile, lineLen){ function makeXrefEntry(pdfFile){ var newEntry = new Array(); newEntry["theSects"] = new Array(); var temp = pdfFile.readln(); // Save info newEntry.theSects[0] = makeXrefSection(temp, pdfFile.tell()); // Try to get to trailer line var xrefSec = newEntry.theSects[newEntry.theSects.length-1].refPos; var numObjs = newEntry.theSects[newEntry.theSects.length-1].numObjs; var tempLine; do { tempLine = pdfFile.readln(); } while(tempLine.indexOf("trailer") == -1) // Read line with trailer dict info in it // Need to get /Root object ref newEntry["rootObj"] = -1; newEntry["prevXref"] = -1; do{ temp = pdfFile.readln(); if(temp.indexOf("/Root") != -1){ // Extract the obj location where the root of the page tree is located: newEntry.rootObj = parseInt(temp.substring(temp.indexOf("/Root") + 5), 10); } if(temp.indexOf("/Prev") != -1){ newEntry.prevXref = parseInt(temp.substring(temp.indexOf("/Prev") + 5), 10); } }while(temp.indexOf(">>") == -1); return newEntry; } // Function to save xref info to a given array function makeXrefSection(theLine, thePos){ var tempArray = new Array(); var temp = theLine.split(" "); tempArray["startObj"] = parseInt(temp[0], 10); tempArray["numObjs"] = parseInt(temp[1], 10); tempArray["refPos"] = thePos; return tempArray; } // Function to extract the sizes from a page reference section // File position must be at the start of the page object // *** File position changes in this function. *** function getPageSize(pdfFile){ var hasTrimBox = false; // Prevent MediaBox from overwriting TrimBox info var charOffset = -1; var isRotated = false; // Page rotated 90 or 270 degrees? var foundSize = false; // Was a size found? var outCount = 0; // keeps track of << >> pairs encountered for PDF version 1.6+ var gotIn = false; do{ var theLine = pdfFile.readln(); if(!hasTrimBox && (charOffset = theLine.indexOf("/MediaBox")) != -1){ // Is the array on the same line? if(theLine.indexOf("[", charOffset + 9) == -1){ // Need to go down one line to find the array theLine = pdfFile.readln(); // Extract the values of the MediaBox array (x1, y1, x2, y2) var theNums = theLine.split("[")[1].split("]")[0].split(" "); } else{ // Extract the values of the MediaBox array (x1, y1, x2, y2) var theNums = theLine.split("/MediaBox")[1].split("[")[1].split("]")[0].split(" "); } // Take care of leading space if(theNums[0] == ""){ theNums = theNums.slice(1); } foundSize = true; } if((charOffset = theLine.indexOf("/TrimBox")) != -1){ // Is the array on the same line? if(theLine.indexOf("[", charOffset + 8) == -1){ // Need to go down one line to find the array theLine = pdfFile.readln(); // Extract the values of the MediaBox array (x1, y1, x2, y2) var theNums = theLine.split("[")[1].split("]")[0].split(" "); } else{ // Extract the values of the MediaBox array (x1, y1, x2, y2) var theNums = theLine.split("/TrimBox")[1].split("[")[1].split("]")[0].split(" "); } // Prevent MediaBox overwriting TrimBox values hasTrimBox = true; // Take care of leading space if(theNums[0] == ""){ theNums = theNums.slice(1); } foundSize = true; } if((charOffset = theLine.indexOf("/Rotate") ) != -1){ var rotVal = parseInt(theLine.substring(charOffset + 7)); if(rotVal == 90 || rotVal == 270){ isRotated = true; } } if ((charOffset = theLine.indexOf("<<")) != -1){ outCount++; gotIn = true; } if ((charOffset = theLine.indexOf(">>")) != -1){ outCount--; } }while(!gotIn || (outCount != 0)); // Check if a size array wasn't found if(!foundSize){ throwError("Unable to determine PDF page size.", true, 96, pdfFile); } // Do the math var xSize = parseFloat(theNums[2]) - parseFloat(theNums[0]); var ySize = parseFloat(theNums[3]) - parseFloat(theNums[1]); // One last check that sizes are actually numbers if(isNaN(xSize) || isNaN(ySize)){ throwError("One or both page dimensions could not be calculated.", true, 95, pdfFile); } // Use rotation to determine orientation of pages var ret = new Array(); ret["width"] = isRotated ? ySize : xSize; ret["height"] = isRotated ? xSize : ySize; return ret; } // Function that determines the byte offset of an object number // Searches the built array of xref sections and reads the offset for theObj // *** File position changes in this function. *** function getByteOffset(theObj, xrefArray){ var theOffset = -1; // Look for the theObj in all sections found previously for(i = 0; i < xrefArray.length; i++){ var tempArray = xrefArray[i]; for(j=0; j < tempArray.theSects.length; j++){ var tempArray2 = tempArray.theSects[j]; // See if theObj falls within this section if(tempArray2.startObj <= theObj && theObj <= tempArray2.startObj + tempArray2.numObjs -1){ pdfFile.seek(tempArray2.refPos); for(k = 0; k < (theObj-tempArray2.startObj); k++) pdfFile.readln(); // Get the location of the obj var tempLine = pdfFile.readln(); // Check if this is an old obj, if so ignore it // An xref entry with n is live, with f is not if(tempLine.indexOf("n") != -1){ theOffset = parseInt(tempLine, 10); // Cleanly get out of both loops j = tempArray.theSects.length; i = xrefArray.length; } } } } return theOffset; } // Function to extract the root page node object from a section // File position must be at the start of the root page node // *** File position changes in this function. *** function getRootPageNode(pdfFile){ var tempLine = pdfFile.readln(); // Go to line with /Page token in it while(tempLine.indexOf("/Pages") == -1){ tempLine = pdfFile.readln(); } // Extract the root page obj number return parseInt(tempLine.substring(tempLine.indexOf("/Pages") + 6), 10); } // Function that gets the page count form a root page section // *** File position changes in this function. *** function readPageCount(pdfFile){ // Read in first line of section var theLine = pdfFile.readln(); // Locate the line containing the /Count entry while(theLine.indexOf("/Count") == -1){ theLine = pdfFile.readln(); } // Extract the page count return parseInt(theLine.substring(theLine.indexOf("/Count") +6), 10); } // Error function function throwError(msg, includeResave, idNum, fileToClose){ var RESAVE = "\n\nPlease open the PDF file in Acrobat, do a \"Save As...\" and try again."; alert("ERROR: " + msg + ((includeResave) ? RESAVE : ""), "Multi-page PDF Script Error"); if(fileToClose != null){ fileToClose.close(); } }