// Usage
// ============
// 1. Using Chrome go to https://www.swirl.de/de/Staubsaugerbeutel-Finder-Staubsaugen-mit-Swirl-648.html
// 2. Open dev console
// 3. Copy-paste and run this script in the console
// 4. Save returned JS object using JSON.stringify in the console (e.g. first call JSON.stringify, then manually copy the string to a file)
// I am sorry for mixing English and German, but I believed retaining "marke" and "modell"
// has some advantages since they directly correspond to the things on the website we scrape.
function getOptions(selectId, skip = 0) {
return Array.from(document.getElementById(selectId).options).slice(skip)
}
function* extractAllModelle(htmlResponseText) {
const extractingRegex = /<option[^>]*? value="(?<value>.*?)"[^>]*?>(?<innerText>[^<]*?)<\/option>/gmi;
let match = null;
let alreadySkippedFirst = false;
while (match = extractingRegex.exec(htmlResponseText)) {
// First match contains <select> "header" text
if (!alreadySkippedFirst) {
alreadySkippedFirst = true;
continue;
}
yield match.groups;
}
}
function getModelleForMarke(marke) {
const url = `https://www.swirl.de/common/ajax.php?bereich=portal&modul_id=102&klasse=staubfilterbeutelsuche&com=laden_typen`;
// fetch call extracted via Chrome dev tools from site by performing actual UI action on site and waiting
// for the original request, which we're going to imitate, to fire
return fetch(
'https://www.swirl.de/common/ajax.php?bereich=portal&modul_id=102&klasse=staubfilterbeutelsuche&com=laden_typen', {
'headers': {
'accept': '*/*',
'accept-language': 'en,de;q=0.9,en-US;q=0.8',
'content-type': 'application/x-www-form-urlencoded;charset=UTF-8',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'x-requested-with': 'XMLHttpRequest'
},
'referrer': 'https://www.swirl.de/de/Staubsaugerbeutel-Finder-Staubsaugen-mit-Swirl-648.html',
'referrerPolicy': 'no-referrer-when-downgrade',
'body': `marke=${marke}&sprache=de&texte=staubfilterbeutelsuche%2Ftexte_modul_staubfilterbeutelsuche`,
'method': 'POST',
'mode': 'cors',
'credentials': 'include'
}
).then(response => response.text()).then(html => [...extractAllModelle(html)]);
}
function getModelleForMultipleMarken(marken) {
const allModelle = marken.map(async function(marke) {
return {marke, modelle: await getModelleForMarke(marke)};
});
return Promise.all(allModelle).then(arrayOfMarken => // Build a dictionary again
arrayOfMarken.reduce((dict, {marke, modelle}) => {
dict[marke] = modelle;
return dict;
}, {})
);
}
function* batchIterable(iterable, batchSize) {
let curBatch = [];
for (const value of iterable) {
if (curBatch.length == batchSize) {
yield curBatch;
curBatch = [];
}
curBatch.push(value);
}
if (curBatch.length >= 1) {
yield curBatch;
}
}
/**
* promisingReduce<S, T>
*
* @param array An array of Promise<T>
* @param f A function (acc: S, cur: T) => S
* @param defaultvalue An S
*
* @return A Promise<S>
*/
async function promisingReduce(array, f, defaultValue) {
let acc = defaultValue;
for (const item of array) {
acc = f(acc, await item);
}
return acc;
}
function batchProcess(array, f, combine, combineDefault) {
const batchSize = 50;
const batches = [...batchIterable(array, batchSize)];
return promisingReduce(batches.map(f), combine, combineDefault);
}
const marken = getOptions('sfbs-select-seite-marken', 1).map(option => parseInt(option.value, 10));
console.log(`Gathered ${marken.length} number of marken`);
function mergeDict(dict1, dict2) {
return Object.assign({}, dict1, dict2);
}
batchProcess(marken, getModelleForMultipleMarken, mergeDict, {})