Reference Source

Reconstructive/reconstructive.js

/**
 * [Reconstructive](https://github.com/oduwsdl/Reconstructive) is a [ServiceWorker](https://www.w3.org/TR/service-workers/) module for client-side reconstruction of composite mementos.
 * It reroutes embedded resource requests to their appropriate archival version without any URL rewriting.
 * It also provides functionality to add custom archival banners or rewrite hyperlinks on the client-side.
 * Use it in a ServiceWorker as illustrated below:
 *
 * ```js
 * importScripts('reconstructive.js');
 * const rc = new Reconstructive();
 * self.addEventListener('fetch', rc.reroute);
 * ```
 *
 * @overview  Reconstructive is a module to be used in a ServiceWorker of an archival replay.
 * @author    Sawood Alam <ibnesayeed@gmail.com>
 * @license   MIT
 * @copyright ODU Web Science / Digital Libraries Research Group 2017
 */
class Reconstructive {
  /**
   * Creates a new Reconstructive instance with optional configurations.
   *
   * @param {{id: string, urimPattern: string, bannerElementLocation: string, bannerLogoLocation: string, showBanner: boolean, debug: boolean}} [config] - Configuration options
   */
  constructor(config) {
    /**
     * Name of the module.
     * Treated as a constant.
     *
     * @type {string}
     */
    this.NAME = 'Reconstructive';

    /**
     * Version of the module.
     * Treated as a constant.
     *
     * @type {string}
     */
    this.VERSION = '0.7.1';

    /**
     * Identifier of the module, sent to the server as X-ServiceWorker header.
     * Defaults to the name and version of the module.
     *
     * @type {string}
     */
    this.id = `${this.NAME}:${this.VERSION}`;

    /**
     * The format of URI-Ms (e.g., http://example.com/archive/<datetime>/<urir>).
     *
     * @type {string}
     */
    this.urimPattern = `${self.location.origin}/memento/<datetime>/<urir>`;

    /**
     * The URL or absolute path of the JS file that defines custom banner element.
     * Only necessary if showBanner is set to true.
     *
     * @type {string}
     */
    this.bannerElementLocation = `${self.location.origin}/reconstructive-banner.js`;

    /**
     * The URL or absolute path of the logo image to appear in the banner.
     * An empty value will render the default Reconstructive logo as inline SVG.
     * Only necessary if showBanner is set to true.
     *
     * @type {string}
     */
    this.bannerLogoLocation = '';

    /**
     * The URL or absolute path to link from the logo image in the banner.
     * This should generally be set to the address of the homepage.
     * Only necessary if showBanner is set to true.
     *
     * @type {string}
     */
    this.bannerLogoHref = '/';

    /**
     * Whether or not to show an archival banner.
     * Defaults to false.
     *
     * @type {boolean}
     */
    this.showBanner = false;

    /**
     * Whether or not to show debug messages in the console.
     * Defaults to false.
     *
     * @type {boolean}
     */
    this.debug = false;

    // Iterate over the supplied configuration object to overwrite defaults and add new properties
    if (config instanceof Object) {
      for (const [k, v] of Object.entries(config)) {
        /** @ignore **/
        this[k] = v;
      }
    }

    /**
     * A private object with various RegExp properties (possibly derived from other properties) for internal use.
     *
     * @private
     * @type    {{urimPattern: RegExp, absoluteReference: RegExp, bodyEnd: RegExp}}
     */
    this._regexps = {
      urimPattern: new RegExp(`^${this.urimPattern.replace('<datetime>', '(\\d{14})').replace('<urir>', '(.*)')}$`),
      absoluteReference: new RegExp(`(<(iframe|a|meta).*?\\s+(src|href|content\\s*=\\s*["']?\\s*\\d+\\s*;\\s*url)\\s*=\\s*["']?)(https?:\/\/[^'"\\s]+)(.*?>)`, 'ig'),
      bodyEnd: new RegExp('<\/(body|html)>', 'i')
    };

    /**
     * An object of functions to check whether the request should be excluded from being rerouted.
     * Add more members to the object to add more exclusions or modify/delete existing ones.
     * The property name can be anything descriptive of the particular exclusion, which will be shown in debug logs.
     * Each member function is called with the fetch event as parameters.
     * If any member returns true, the fetch event is excluded from being rerouted.
     *
     * @type {{notGet: function(event: FetchEvent): boolean, bannerElement: function(event: FetchEvent): boolean, bannerLogo: function(event: FetchEvent): boolean, homePage: function(event: FetchEvent): boolean, localResource: function(event: FetchEvent): boolean}}
     */
    this.exclusions = {
      notGet: event => event.request.method !== 'GET',
      bannerElement: event => this.showBanner && event.request.url.endsWith(this.bannerElementLocation),
      bannerLogo: event => this.showBanner && this.bannerLogoLocation && event.request.url.endsWith(this.bannerLogoLocation),
      localResource: event => !(this._regexps.urimPattern.test(event.request.url) || this._regexps.urimPattern.test(event.request.referrer))
    };

    this.debug && console.log(`${this.NAME}:${this.VERSION} initialized:`, this);

    this.fetchFailure = this.fetchFailure.bind(this);
  }

  /**
   * Iterates over all the members of the exclusions object and returns true if any of the members return true, otherwise returns false.
   * Logs the first matching exclusion for debugging, if any.
   *
   * @param  {FetchEvent} event - The fetch event
   * @return {boolean}          - Should the request be rerouted?
   */
  shouldExclude(event) {
    return Object.entries(this.exclusions).some(([exclusionName, exclusionFunc]) => {
      if (exclusionFunc(event)) {
        this.debug && console.log('Exclusion found:', exclusionName, event.request.url);
        return true;
      }
      return false;
    });
  }

  /**
   * Creates a potential URI-M based on the requested URL and the referrer URL for request rerouting.
   *
   * @param  {FetchEvent} event - The fetch event
   * @return {string}           - A potential URI-M
   */
  createUrim(event) {
    // Extract datetime and the URI-R of the referrer.
    let [datetime, refUrir] = this.extractDatetimeUrir(event.request.referrer);
    let urir = new URL(event.request.url);
    // This condition will match when the request was initiated from an absolute path and fail if it was an absolute URL.
    if (urir.origin === self.location.origin) {
      // If it was an absolute path then referrer's origin was used.
      // We need to replace it with the origin of the referrer's URI-R instead.
      // The RegExp used will capture the origin with the protocol, if any (http, https, or BLANK).
      let refOrigin = refUrir.match(/^(https?:\/\/)?[^\/]+/)[0];
      urir = refOrigin + urir.pathname + urir.search;
    } else {
      urir = urir.href;
    }
    return this.urimPattern.replace('<datetime>', datetime).replace('<urir>', urir);
  }

  /**
   * Extracts datetime and URI-R from a URI-M.
   *
   * @param  {string}   urim - A URI-M
   * @return {string[]}      - An array of datetime and URI-R
   */
  extractDatetimeUrir(urim) {
    let [, datetime, urir] = urim.match(this._regexps.urimPattern);
    // Swap the two extracted values if the datetime pattern appeared after the URI-R.
    // This is not a common practice, but possible if an archive uses query parameters instead of paths.
    if (isNaN(datetime)) {
      return [urir, datetime];
    }
    return [datetime, urir];
  }

  /**
   * Creates a new request based on the original.
   * Copies all the headers from the original request.
   * Adds X-ServiceWorker header with the id of the module.
   * Sets the redirect mode to manual to ensure proper origin boundaries.
   *
   * @param  {FetchEvent} event - The fetch event
   * @return {Request}          - A new request object
   */
  createRequest(event) {
    let headers = this.cloneHeaders(event.request.headers);
    headers.set('X-ServiceWorker', this.id);
    return new Request(event.request.url, { headers: headers, redirect: 'manual' });
  }

  /**
   * Clones provided request or response headers.
   *
   * @param  {Headers} original - Original request or response headers
   * @return {Headers}          - A clone of the supplied headers
   */
  cloneHeaders(original) {
    let headers = new Headers();
    for (const [k, v] of original.entries()) {
      headers.append(k, v);
    }
    return headers;
  }

  /**
   * Redirects a non-URI-M request to its potentially URI-M locally.
   * The potential URI-M is generated using createUrim().
   * This function only returns a synthetic redirection response.
   *
   * @param  {string}            urim - A potential URI-M
   * @return {Promise<Response>}      - A 302 redirection response to the potential URI-M
   */
  localRedirect(urim) {
    this.debug && console.log('Locally redirecting to:', urim);
    return Promise.resolve(new Response(`<h1>Locally Redirecting</h1><p>${urim}</p>`, {
      status: 302,
      statusText: 'Found',
      headers: new Headers({
        'Location': urim,
        'Access-Control-Allow-Origin': '*',
        'Content-Type': 'text/html'
      })
    }));
  }

  /**
   * The callback function on a successful fetch from the server.
   * Calls the rewrite() function if the response code is 2xx.
   * Logs the response for debugging.
   * Resolves to a potentially modified response.
   *
   * @param  {Response}          response - Original response object
   * @param  {FetchEvent}        event    - The fetch event
   * @return {Promise<Response>}          - Potentially modified response
   */
  fetchSuccess(response, event) {
    this.debug && console.log('Fetched from server:', response);
    // Perform a potential rewrite only if the response code is 2xx.
    if (response.ok) {
      return this.rewrite(response, event);
    }
    return Promise.resolve(response);
  }

  /**
   * The callback function on network failure of the server fetch.
   * Logs the failure reason for debugging.
   * Returns a synthetic 503 Service Unavailable response.
   *
   * @param  {Error}    error - The exception raised on fetching from the server
   * @return {Response}       - A 503 Service Unavailable response
   */
  fetchFailure(error) {
    this.debug && console.log(error);
    return new Response('<h1>Service Unavailable</h1>', {
      status: 503,
      statusText: 'Service Unavailable',
      headers: new Headers({
        'Content-Type': 'text/html'
      })
    });
  }

  /**
   * Rewrites the fetched response when necessary.
   * Potential uses are to fix certain replay issues, adding an archival banner, or modifying hyperlinks.
   * When the showBanner config is set to true, it tries to add a banner in navigational HTML pages.
   * Resolves to a potentially modified response.
   *
   * @param  {Response}          response - Original response object
   * @param  {FetchEvent}        event    - The fetch event
   * @return {Promise<Response>}          - Potentially modified response
   */
  rewrite(response, event) {
    // TODO: Make necessary changes in the response
    if (/text\/html/i.test(response.headers.get('Content-Type'))) {
      let headers = this.cloneHeaders(response.headers);
      let init = {
        status: response.status,
        statusText: response.statusText,
        headers: headers
      };
      return response.text().then(body => {
        const [datetime] = this.extractDatetimeUrir(response.url);
        // Replace all absolute URLs in src and href attributes of <iframe> and <a> elements with corresponding URI-Ms to avoid replay and navigation issues.
        body = body.replace(this._regexps.absoluteReference, `$1${this.urimPattern.replace('<datetime>', datetime).replace('<urir>', '$4')}$5`);
        // Inject a banner only on navigational HTML pages when showBanner config is set to true.
        if (this.showBanner && event.request.mode === 'navigate') {
          const banner = this.createBanner(response, event);
          // Try to inject the banner markup before closing </body> tag, fallback to </html>.
          // If none of the two closing tags are found, append it to the body.
          if (this._regexps.bodyEnd.test(body)) {
            body = body.replace(this._regexps.bodyEnd, banner + '</$1>');
          } else {
            body += banner;
          }
        }
        return new Response(body, init);
      });
    }
    return Promise.resolve(response);
  }

  /**
   * Creates a string representing an HTML block to be injected in the response's HTML body.
   *
   * @param  {Response}   response - Original response object
   * @param  {FetchEvent} event    - The fetch event
   * @return {string}              - The banner markup
   */
  createBanner(response, event) {
    let mementoDatetime = response.headers.get('Memento-Datetime') || '';
    const [datetime, urir] = this.extractDatetimeUrir(response.url);
    if (!mementoDatetime) {
      mementoDatetime = new Date(`${datetime.slice(0, 4)}-${datetime.slice(4, 6)}-${datetime.slice(6, 8)}T${datetime.slice(8, 10)}:${datetime.slice(10, 12)}:${datetime.slice(12, 14)}Z`).toUTCString();
    }
    // TODO: Extract link parser in a method
    let rels = {};
    const links = response.headers.get('Link');
    if (links) {
      links.replace(/[\r\n]+/g, ' ').replace(/^\W+|\W+$/g, '').split(/\W+</).forEach(l => {
        let segs = l.split(/[>\s'"]*;\W*/);
        let href = segs.shift();
        let attributes = {};
        segs.forEach(s => {
          let [k, v] = s.split(/\W*=\W*/);
          attributes[k] = v;
        });
        attributes['rel'].split(/\s+/).forEach(r => {
          rels[r] = { href: href, datetime: attributes['datetime'] };
        });
      });
    }
    return `
      <script src="${this.bannerElementLocation}"></script>
      <reconstructive-banner logo-src="${this.bannerLogoLocation}"
                             home-href="${this.bannerLogoHref}"
                             urir="${urir}"
                             memento-datetime="${mementoDatetime}"
                             first-urim="${rels.first && rels.first.href || ''}"
                             first-datetime="${rels.first && rels.first.datetime || ''}"
                             last-urim="${rels.last && rels.last.href || ''}"
                             last-datetime="${rels.last && rels.last.datetime || ''}"
                             prev-urim="${rels.prev && rels.prev.href || ''}"
                             prev-datetime="${rels.prev && rels.prev.datetime || ''}"
                             next-urim="${rels.next && rels.next.href || ''}"
                             next-datetime="${rels.next && rels.next.datetime || ''}">
      </reconstructive-banner>
    `;
  }

  /**
   * The callback function on the fetch event.
   * Logs the fetch event for debugging.
   * Checks for any rerouting exclusions.
   * If the request URL is a URI-M then creates a new request with certain modifications in the original request and fetches it from the server.
   * Otherwise, responds with a redirect to the potential URI-M.
   * Both success and failure responses are dealt with appropriately.
   *
   * @param {FetchEvent} event - The fetch event
   */
  reroute(event) {
    this.debug && console.log('Rerouting requested', event);
    // Let the browser deal with the requests if it matches a rerouting exclusion.
    if (this.shouldExclude(event)) return;
    // This condition will match if the request URL is a URI-M.
    if (this._regexps.urimPattern.test(event.request.url)) {
      let request = this.createRequest(event);
      event.respondWith(fetch(request).then(response => this.fetchSuccess(response, event)).catch(this.fetchFailure));
    } else {
      let urim = this.createUrim(event);
      event.respondWith(this.localRedirect(urim));
    }
  }

}

// Export Reconstructive as a module for testing
if (typeof module != 'undefined' && typeof module.exports != 'undefined') {
  module.exports = Reconstructive;
}