WhiteCollar
Usage guidance
Background Images for Mosaic
Just add the node which has the background image as doing with normal images.
media:articleNode.querySelectorAll('div.backgroundImg'),
Columns
Site has x num of Columns. Define the number of columns in Whitecollar's main.js below document.whiteCollar:
document.whiteCollar = {
numColumns: 3,
InnerText / TextContent in the WhiteCollar
If text is not picked up for the title or any other category, substitute the .innerText tag for .textContent.
Limit Extracted Articles
Normally not more than 60 articles per section are allowed due to ads revenue drop the lower are positioned. Another criteria might apply.
//www.example.com/index/src/whiteCollar/main.js
document.LIMIT_NEWS = 60;
document.extractedNews = 0;
if(document.extractedNews < document.LIMIT_NEWS) {
document.extractedNews += 1;
} else {
item = null;
}
return item;
OR Selector
Add OR ( || ) for a second selector shall first be null:
title:articleNode.querySelector('h1')|| articleNode.querySelector('a'),
uri:articleNode.querySelector('.link')|| articleNode.querySelector('a')
From the selection get the attributes after the var item:
if(item.title) {
item.title = item.title.innerText;
}
if(item.uri) {
item.uri = item.uri.href;
}
Selectors for a Specific Tag
Target a specific HTML Tag, class or id and apply to it its whiteCollar:
document.whiteCollar = {
getItems: function () {
return document.querySelectorAll(".box, .videokiker");
},
getItem: function(articleNode) {
var item = {};
if(articleNode.className.indexOf('videokiker') >= 0 ) {
item = {
title:articleNode.querySelector('div + a'),
uri:articleNode.querySelector('div + a').href,
media:articleNode.querySelectorAll('img')
};
} else {
item = {
title:articleNode.querySelector('h1')|| articleNode.querySelector('a'),
uri:articleNode.querySelector('h1 > a')|| articleNode.querySelector('a'),
media:articleNode.querySelectorAll('img')
};
}
if(item.title) {
item.title = item.title.innerText;
}
if(item.uri) {
item.uri = item.uri.href;
}
return item;
}
};
Uri Returns Null
Do not pick up uris if contain a pattern in it.
if(item.uri.indexOf('pattern') >= 0) {
return null;
}
whiteCollar Template
Basic template with all article's options. Can be further customized.
document.whiteCollar = {
getItems: function () {
return document.querySelectorAll("");
},
getItem: function(articleNode) {
var item = {
title:articleNode.querySelector('').innerText,
uri:articleNode.querySelector('a').href,
media:articleNode.querySelectorAll('img'),
subtitle:articleNode.querySelector(''),
pocket: {
categories: articleNode.querySelectorAll('')
},
author:articleNode.querySelector(''),
date:articleNode.querySelector('.date'),
excerpt:articleNode.querySelector('')
};
if(item.subtitle) {
item.subtitle = item.subtitle.innerText;
}
var categoriesNode = item.pocket.categories;
if(categoriesNode) {
var categories = [];
for(var i = 0; i < categoriesNode.length; i++) {
categories.push(categoriesNode[i].innerText);
}
item.pocket.categories = categories
}
if(item.author){
item.author = {
name: item.author.innerText
}
}
if(item.date) {
item.date = item.date.innerHTML;
}
if(item.excerpt) {
item.excerpt = item.excerpt.innerText;
}
return item;
}
};
Usage
getItems: all selectors for the articles from tenant's Mosaic view.
getItem: Select the items (title, uri, ...) from the selected articles in getItems. Minimum required fields are title and uri. Unused fields must be removed, and its conditionals (if(item.field...)). Adapt this file to your needs.
Example from thediplomat.com:
document.whiteCollar = {
getItems: function () {
return document.querySelectorAll(".firstPost > a, .rowWrapper > .row");
},
getItem: function(articleNode) {
var item = {
title:articleNode.querySelector('.postTitle, .postPrevTitle').innerText,
uri:articleNode.querySelector('.postPrevImage').parentNode.href || articleNode.querySelector('a.row_link').href,
author:articleNode.querySelector('.postAuthor'),
date:articleNode.querySelector('.postDate'),
excerpt:articleNode.querySelector('.postTeaser, .postPrevTeaser')
};
if(item.author){
item.author = {
name: item.author.innerText
}
}
if(item.date) {
item.date = item.date.innerHTML;
}
if(item.excerpt) {
item.excerpt = item.excerpt.innerText;
}
return item;
}
};