diff --git a/experiments/skin-database/cli.js b/experiments/skin-database/cli.js index 2371c99a..b732dd8f 100755 --- a/experiments/skin-database/cli.js +++ b/experiments/skin-database/cli.js @@ -2,6 +2,7 @@ const argv = require("yargs").argv; const findTweetableSkin = require("./tasks/findTweetableSkins"); const fetchInternetArchiveMetadata = require("./tasks/fetchInternetArchiveMetadata"); +const ensureInternetArchiveItemsIndexByMd5 = require("./tasks/ensureInternetArchiveItemsIndexByMd5"); const path = require("path"); const logger = require("./logger"); const Skins = require("./data/skins"); @@ -52,8 +53,10 @@ async function main() { case "fetch-metadata": console.log("Going to download metadata from the Internet Archive"); await fetchInternetArchiveMetadata(); - console.log("Done"); break; + + case "ensure-md5s": + await ensureInternetArchiveItemsIndexByMd5(); case "metadata": { const hash = argv._[1]; console.log(await Skins.getInternetArchiveUrl(hash)); @@ -67,7 +70,7 @@ async function main() { default: console.log(`Unknown command ${argv._[0]}`); } - // await db.close(); + await db.close(); } main(); diff --git a/experiments/skin-database/data/skins.js b/experiments/skin-database/data/skins.js index 397b6a94..fc2f810a 100644 --- a/experiments/skin-database/data/skins.js +++ b/experiments/skin-database/data/skins.js @@ -14,7 +14,6 @@ function getSkinRecord(skin) { readmeText, filePaths, } = skin; - console.log(skin); const fileNames = filePaths.map(p => path.basename(p)); const skinUrl = `https://s3.amazonaws.com/webamp-uploaded-skins/skins/${md5}.wsz`; return { @@ -38,18 +37,41 @@ async function getProp(md5, prop) { return value == null ? null : value; } +const IA_URL = /^(https:\/\/)?archive.org\/details\/([^\/]+)\/?/; +const MD5 = /([a-fA-F0-9]{32})/; + +async function getMd5ByAnything(anything) { + const md5Match = anything.match(MD5); + if (md5Match != null) { + const md5 = md5Match[1]; + return md5; + } + const itemMatchResult = anything.match(IA_URL); + if (itemMatchResult != null) { + const itemName = itemMatchResult[2]; + const md5 = await getMd5FromInternetArchvieItemName(itemName); + if (md5 != null) { + return md5; + } + } + const md5 = await getMd5FromInternetArchvieItemName(anything); + if (md5 != null) { + return md5; + } +} + async function getSkinByMd5(md5) { const skin = await skins.findOne({ md5, type: "CLASSIC" }); if (skin == null) { return null; } - const internetArchiveItemName = await getInternetArchiveItemName(md5); - const internetArchiveUrl = await getInternetArchiveUrl(md5); + const internetArchiveItem = await getInternetArchiveItem(md5); + const itemName = internetArchiveItem.identifier; const tweetStatus = await getTweetStatus(md5); return { ...getSkinRecord(skin), - internetArchiveUrl, - internetArchiveItemName, + internetArchiveUrl: getInternetArchiveUrl(itemName), + internetArchiveItemName: itemName, tweetStatus, }; } @@ -74,23 +96,16 @@ async function getInternetArchiveItemName(md5) { return item.identifier; } async function getInternetArchiveItem(md5) { - return iaItems.findOne( - { "metadata.files.md5": md5 }, - { - fields: { - metadata: 1, - identifier: 1, - }, - } - ); + return iaItems.findOne({ md5: md5 }); } -async function getInternetArchiveUrl(md5) { - const itemName = await getInternetArchiveItemName(md5); - if (itemName == null) { - return null; - } - return `https://archive.org/details/${itemName}`; +async function getMd5FromInternetArchvieItemName(itemName) { + const item = await iaItems.findOne({ identifier: itemName }, { md5: 1 }); + return item == null ? null : item.md5; +} + +function getInternetArchiveUrl(itemName) { + return itemName == null ? null : `https://archive.org/details/${itemName}`; } async function getTweetStatus(md5) { @@ -98,6 +113,7 @@ async function getTweetStatus(md5) { } module.exports = { + getMd5ByAnything, getReadme, getScreenshotUrl, getSkinUrl, diff --git a/experiments/skin-database/discord-bot/commands/skin.js b/experiments/skin-database/discord-bot/commands/skin.js index 2cfc6458..dd45435a 100644 --- a/experiments/skin-database/discord-bot/commands/skin.js +++ b/experiments/skin-database/discord-bot/commands/skin.js @@ -1,6 +1,8 @@ const Utils = require("../utils"); +const Skins = require("../../data/skins"); async function handler(message, args) { - const [md5] = args; + const [anything] = args; + const md5 = await Skins.getMd5ByAnything(anything); await Utils.postSkin({ md5, dest: message.channel diff --git a/experiments/skin-database/tasks/ensureInternetArchiveItemsIndexByMd5.js b/experiments/skin-database/tasks/ensureInternetArchiveItemsIndexByMd5.js new file mode 100644 index 00000000..35a24fd0 --- /dev/null +++ b/experiments/skin-database/tasks/ensureInternetArchiveItemsIndexByMd5.js @@ -0,0 +1,35 @@ +const fetch = require("node-fetch"); +const db = require("../db"); +const iaItems = db.get("internetArchiveItems"); + +module.exports = async function main() { + const items = await iaItems.find( + { "metadata.metadata.skintype": { $eq: "wsz" }, md5: { $eq: null } }, + { + fields: { + identifier: 1, + metadata: 1, + }, + } + ); + + for (const item of items) { + const skinFiles = item.metadata.files.filter(file => { + return file.name.endsWith(".wsz"); + }); + if (skinFiles.length != 1) { + console.warn( + `Found a skin item with ${skinFiles.length} skin files. Identifier: ${ + item.identifier + }` + ); + continue; + } + + const { md5, name } = skinFiles[0]; + await iaItems.update( + { _id: { $eq: item._id } }, + { $set: { md5, skinFileName: name } } + ); + } +}; diff --git a/experiments/skin-database/tasks/fetchInternetArchiveMetadata.js b/experiments/skin-database/tasks/fetchInternetArchiveMetadata.js index 7d278c2f..c3fad57e 100644 --- a/experiments/skin-database/tasks/fetchInternetArchiveMetadata.js +++ b/experiments/skin-database/tasks/fetchInternetArchiveMetadata.js @@ -33,22 +33,29 @@ async function fetchAllMetadata(limit) { // TODO: Refetch collections from: // https://archive.org/advancedsearch.php?q=collection%3Awinampskins&fl%5B%5D=identifier&rows=100000&page=1&output=json module.exports = async function main() { - let delay = 60000; - async function go() { - console.log("Gonna fetch some more"); - try { - const count = await fetchAllMetadata(500); - if (count < 1) { - console.log("Done."); - return; + return new Promise((resolve, reject) => { + let delay = 60000; + let timeout = null; + async function go() { + console.log("Gonna fetch some more"); + try { + const count = await fetchAllMetadata(500); + if (count < 1) { + if (timeout != null) { + // I don't think this can ever happen + clearTimeout(timeout); + } + console.log("Done."); + resolve(); + return; + } + } catch (e) { + console.error(e); + delay += 60000; } - } catch (e) { - console.error(e); - delay += 60000; + timeout = setTimeout(go, delay); } - console.log("Scheduling another", delay / 1000); - setTimeout(go, delay); - } - go(); + go(); + }); };