If you have a document or book manuscript, you may need to extract and attach figure references to inline images in a document. If you want to give them names associated with the chapter in which they appear, this can end up being more complicated than you'd thought it was going to be. In this example, I have a large book manuscript with several hundred images I want to extract to Drive with names that reflect which chapter they are in and index them in a spreadsheet. My starting chapter paragraphs are identified as DocumentApp.ParagraphHeading.HEADING1. The approachUsing the technique described in Sorting bookmarks in a document, I can identify where all the images and all the paragraphs appear in a document. From this it can easily be deduced which chapter a given image is extracted from. Code WalkthroughSetup - it's a container bound script for the active document. // where to put the images var IMAGE_PATH = '/books/going gas/admin/draft3/images'; // work on the current document var doc = DocumentApp.getActiveDocument(); var body = doc.getBody(); Identify the paragraphs that are the chapter headers and figure out their chapter numbers and position in document. // get all the chapter heading level parapgraphs var chapters = body.getParagraphs().filter(function(d) { return d.getHeading() === DocumentApp.ParagraphHeading.HEADING1; }) // and filter out those with no chapter heading text .filter(function(d) { var cs = d.getText().split(" "); return cs.length && cs[0] === 'Chapter'; }) // and attach path in document & chapter number .map (function(d) { var cs = d.getText().split(" "); if (cs.length < 2 ) throw 'no chapter number at ' + d.getText().slice(0,20); return {element:d , path:pathInDocument (d), chapter:cs[1]}; },[]); Get all the images and figure out which chapters they are in by comparing their document position with that of each chapter // now we can use the path in document to decide whether this image is in this chapter var images = body.getImages().map(function(d) { var path = pathInDocument (d); // path of image lies between start of this chapter and beginning of next var ch = chapters.filter(function (c,i,a) { return path > c.path && (i === a.length-1 || path < a[i+1].path); }); // there should be exactly 1 result if (ch.length !== 1) { throw 'no chapter found for image at path' + path; } // attach chapter object return { element:d, path:path , paragraph: ch[0] }; }); Index each image by its relative position within its chapter - will be used as the figure number and filename // and add the filename (index by 1 for each chapter) images.forEach (function(d,i,a) { d.name = d.paragraph.chapter.toString() + '.' + a.slice(0,i).reduce(function (p,c) { return c.paragraph.chapter === d.paragraph.chapter ? p+1 : p; },1).toString(); }); Write them all out, converting them to jpg - this could be preceded by deleting all the files in the receiving folder, but I've taken that out for safety in case you use this code as is. // now write them all out var folder = getDriveFolderFromPath(IMAGE_PATH); images.forEach(function(d) { var blob = d.element.getBlob().copyBlob().setName (d.name+".jpg").setContentTypeFromExtension(); folder.createFile (blob); }); Record the results to a sheet along with some control information // index to a sheet var sheet = SpreadsheetApp.openById('14xFQAdVKzlkujVZDwk27z-632y4FViBKwz-6Iv-UF3U').getSheetByName('figs'); // clear the sheet sheet.getDataRange().offset(1,0).clearContent(); // write the new values sheet.getRange(2,1,images.length, 4).setValues(images.map(function(d) { return [d.name,d.name+".jpg","Screenshot",d.paragraph.element.getText().slice(0,10)]; })); Utility function for figuring out position in document function pathInDocument(element,path) { path = path || ''; var parent = element.getParent(); if (parent) { path = pathInDocument( parent , Utilities.formatString ( '%04d.%s', parent.getChildIndex(element),path )); } return path; } Utility function for figuring out Drive folder from folder path name function getDriveFolderFromPath (path) { return (path || "/").split("/").reduce ( function(prev,current) { if (prev && current) { var fldrs = prev.getFoldersByName(current); return fldrs.hasNext() ? fldrs.next() : null; } else { return current ? null : prev; } },DriveApp.getRootFolder()); } The output
|
Services > Desktop Liberation - the definitive resource for Google Apps Script and Microsoft Office automation > Google Docs >