Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Element collector #94

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions collectors/ElementCollector.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
const fs = require('fs').promises;
const BaseCollector = require('./BaseCollector');

class ElementCollector extends BaseCollector {

id() {
return 'elements';
}

/**
* @param {import('./BaseCollector').CollectorInitOptions} options
*/
init(options) {
this.context = options.context;
this.log = options.log;
/**
* @type {import('puppeteer').Frame[]}
*/
this.frames = [];
this.cookieSelectors = this._loadCookieMonsterRules();
}

async _loadCookieMonsterRules() {
const contents = String(await fs.readFile('./fanboy-cookiemonster.txt', {encoding: 'utf-8'}));
return contents.split('\n').filter(line => line.startsWith('##')).map(line => line.slice(2));
}

/**
* @param {{cdpClient: import('puppeteer').CDPSession, url: string, type: import('./TargetCollector').TargetType}} targetInfo
*/
// eslint-disable-next-line no-unused-vars
addTarget(targetInfo) {
if (targetInfo.type === 'page') {
this.context.pages().then(pages => {
this.page = pages[0];
this.check = new Promise(resolve => {
this.page.on('load', async () => {
// check CSS rule matches
const selectors = await this.cookieSelectors;
const isMatched = await Promise.all(selectors.map(async selector => (await this.page.$(selector)) !== null));
const present = selectors.filter((_, i) => isMatched[i]);
const visible = await this.page.evaluate(testSelectors => {
/**
* @param {HTMLElement} elem
*/
function isElementVisible(elem) {
if (!elem) {
return false;
}
if (elem.offsetParent !== null) {
return true;
}
// eslint-disable-next-line no-undef
const css = window.getComputedStyle(elem);
if (css.position === 'fixed' && css.display !== "none") { // fixed elements may be visible even if the parent is not
return true;
}
return false;
}
return testSelectors.filter((/** @type {string} */ s) => {
// eslint-disable-next-line no-undef
const elem = document.querySelectorAll(s);
const results = new Array(elem.length);
elem.forEach((e, i) => {
// check for display: none
// @ts-ignore
results[i] = isElementVisible(e);
if (results[i]) {
e.setAttribute('style', 'border: 4px dashed red;');
}
});
return results.some(r => r);
});
}, present);
resolve({
present,
visible,
});
});
});
}, () => this.log('Unable to get pages'));
}
}

/**
* Called after the crawl to retrieve the data. Can be async, can throw errors.
*
* @param {{finalUrl: string, urlFilter?: function(string):boolean}} options
* @returns {Promise<Object>|Object}
*/
// eslint-disable-next-line no-unused-vars
getData(options) {
return this.check;
}
}

module.exports = ElementCollector;
2 changes: 2 additions & 0 deletions main.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const TargetCollector = require('./collectors/TargetCollector');
const TraceCollector = require('./collectors/TraceCollector');
const ScreenshotCollector = require('./collectors/ScreenshotCollector');
const CMPCollector = require('./collectors/CMPCollector');
const ElementCollector = require('./collectors/ElementCollector');

// reexport main pieces of code so that they can be easily imported when this project is used as a dependency
// e.g. `const {crawlerConductor} = require('3p-crawler');`
Expand All @@ -22,4 +23,5 @@ module.exports = {
TraceCollector,
ScreenshotCollector,
CMPCollector,
ElementCollector,
};