This is now deprecated since the demise of Gadgets on Sites

This was also part of each evenings scheduled run. The site was serialized and each analytics entry is matched up to a page on the site. This whole project was heavily recursive, right from the site organization, right through to the final output where rankings were shown both for the page and the topic to which the page belongs.

Here’s how the pages were fetched

  // this is the site i'm working with
  var site = SitesApp.getSite(options.domain, options.site);
  
  // get all the pages on the site
  var root = getPages(site);

Again I’m using exponential backoff to avoid those tiresome rate limit errors.

function getPages(site) {

  var root = new PageTreeObject (null, site);
  getChildPages (root, site);
  // assume the site has a top level page
  if (root.children.length != 1 ) throw 'site has no root page';
  return root;
  
  function getChildPages (parent,page) {
    var result,start = 0,pages=[];
    var pto = new PageTreeObject (parent,page);
    parent.children.push (pto);

    // this deals with any limits to get all the children
    while (!result || result.length) {
      Logger.log("working on " + page.getUrl());

      var result = cUseful.rateLimitExpBackoff(function () {
        return page.getChildren({
          start: start
        });
      },undefined,undefined,undefined,true);
      Array.prototype.push.apply (pages,result);
      start = pages.length;
    }
    
    // now pages contain all the direct children of this ob - get each of their children
    pages.forEach (function(d,i) {
      if (!DEBUGGING || i < 9) {
        getChildPages(pto , d);
      }
    });
    
    return pto;
  }
 
}
Now I have a a nice tree of all pages on my site, ready to be matched